Blame c++/contourgl/cuda/contour.cu

6fa009
/*
6fa009
    ......... 2018 Ivan Mahonin
6fa009
6fa009
    This program is free software: you can redistribute it and/or modify
6fa009
    it under the terms of the GNU General Public License as published by
6fa009
    the Free Software Foundation, either version 3 of the License, or
6fa009
    (at your option) any later version.
6fa009
6fa009
    This program is distributed in the hope that it will be useful,
6fa009
    but WITHOUT ANY WARRANTY; without even the implied warranty of
6fa009
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6fa009
    GNU General Public License for more details.
6fa009
6fa009
    You should have received a copy of the GNU General Public License
6fa009
    along with this program.  If not, see <http: licenses="" www.gnu.org="">.</http:>
6fa009
*/
6fa009
a7622f
//extern "C" {
6fa009
6fa009
#define ONE       65536
6fa009
#define TWO      131072               // (ONE)*2
6fa009
#define HALF      32768               // (ONE)/2
6fa009
#define ONE_F     65536.f             // (float)(ONE)
6fa009
#define DIV_ONE_F 0.0000152587890625f // 1.f/(ONE_F)
6fa009
6fa009
6fa009
__global__ void path(
6fa009
	int width,
6fa009
	int height,
6fa009
	int *marks,
6fa009
	const float2 *points,
6fa009
	int begin,
6fa009
	int end,
6fa009
	int minx )
6fa009
{
6fa009
	int id = blockIdx.x*blockDim.x + threadIdx.x + begin;
6fa009
	if (id >= end) return;
6fa009
	float2 p0 = points[id];
6fa009
	float2 p1 = points[id + 1];
6fa009
	
6fa009
	bool flipx = p1.x < p0.x;
6fa009
	bool flipy = p1.y < p0.y;
a7622f
	if (flipx) { p0.x = (float)width  - p0.x; p1.x = (float)width  - p1.x; }
6fa009
	if (flipy) { p0.y = (float)height - p0.y; p1.y = (float)height - p1.y; }
6fa009
	float2 d;
6fa009
	d.x = p1.x - p0.x;
6fa009
	d.y = p1.y - p0.y;
6fa009
	int w1 = width - 1;
6fa009
	int h1 = height - 1;
a7622f
	float kx = d.x/d.y;
a7622f
	float ky = d.y/d.x;
6fa009
	
6fa009
	while(p0.x != p1.x || p0.y != p1.y) {
a7622f
		int ix = max((int)p0.x, 0);
a7622f
		int iy = (int)p0.y;
a7622f
		if (ix > w1) return;
6fa009
6fa009
		float2 px, py;
6fa009
		px.x = (float)(ix + 1);
6fa009
		py.y = (float)(iy + 1);
a7622f
		iy = max(0, min(h1, iy));
6fa009
		
6fa009
		px.y = p0.y + ky*(px.x - p0.x);
6fa009
		py.x = p0.x + kx*(py.y - p0.y);
a7622f
		
6fa009
		float2 pp1 = p1;
6fa009
		if (pp1.x > px.x) pp1 = px;
6fa009
		if (pp1.y > py.y) pp1 = py;
6fa009
		
a7622f
		float cover = (pp1.x - p0.x)*ONE_F;
a7622f
		float area = py.y - 0.5f*(p0.y + pp1.y);
a7622f
		if (flipx) { ix = w1 - ix; cover = -cover; }
a7622f
		if (flipy) { iy = h1 - iy; area = 1.f - area; }
a7622f
		p0 = pp1;
6fa009
		
6fa009
		atomicAdd(
a7622f
			(unsigned long long*)(marks + 2*(iy*width + ix)),
6fa009
			((unsigned long long)(unsigned int)(int)(cover) << 32)
6fa009
			| (unsigned long long)(unsigned int)((int)(area*cover)) );
a7622f
		//int *mark = marks + ((iy*width + ix) << 1);
a7622f
		//atomicAdd(mark, (int)(area*cover));
a7622f
		//atomicAdd(mark + 1, (int)(cover));
6fa009
	}
6fa009
}
6fa009
6fa009
__global__ void fill(
6fa009
	int width,
a7622f
	int2 *marks,
6fa009
	float4 *image,
6fa009
	float4 color,
6fa009
	int4 bounds )
6fa009
{
a7622f
	int id = blockIdx.x*blockDim.x + threadIdx.x + bounds.x;
a7622f
	if (id >= bounds.z) return;
a7622f
	id += bounds.y*width;
6fa009
	marks += id;
6fa009
	image += id;
6fa009
a7622f
	int icover = 0;
a7622f
	while(true) {
a7622f
		int2 m = *marks;
a7622f
		*marks = make_int2(0, 0);
a7622f
		float alpha = (float)abs(m.x + icover)*color.w*DIV_ONE_F;
a7622f
		marks += width;
6fa009
a7622f
		icover += m.y;
6fa009
		float one_alpha = 1.f - alpha;
6fa009
		
a7622f
		float4 p = *image;
6fa009
		p.x = p.x*one_alpha + color.x*alpha;
6fa009
		p.y = p.y*one_alpha + color.y*alpha;
6fa009
		p.z = p.z*one_alpha + color.z*alpha;
6fa009
		p.w = p.w*one_alpha + color.w*alpha;
a7622f
		*image = p;
6fa009
		
a7622f
		if (++bounds.y >= bounds.w) return;
a7622f
		image += width;
6fa009
	}
6fa009
}
6fa009
a7622f
//}