Blame c++/contourgl/cl/contour-base.cl

105dfe
/*
20cefb
    ......... 2015-2018 Ivan Mahonin
105dfe
105dfe
    This program is free software: you can redistribute it and/or modify
105dfe
    it under the terms of the GNU General Public License as published by
105dfe
    the Free Software Foundation, either version 3 of the License, or
105dfe
    (at your option) any later version.
105dfe
105dfe
    This program is distributed in the hope that it will be useful,
105dfe
    but WITHOUT ANY WARRANTY; without even the implied warranty of
105dfe
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
105dfe
    GNU General Public License for more details.
105dfe
105dfe
    You should have received a copy of the GNU General Public License
105dfe
    along with this program.  If not, see <http: licenses="" www.gnu.org="">.</http:>
105dfe
*/
105dfe
feaa05
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable
feaa05
5a7afd
#define ONE       65536
5a7afd
#define TWO      131072               // (ONE)*2
5a7afd
#define HALF      32768               // (ONE)/2
5a7afd
#define ONE_F     65536.f             // (float)(ONE)
5a7afd
#define DIV_ONE_F 0.0000152587890625f // 1.f/(ONE_F)
5a7afd
5a7afd
105dfe
kernel void clear(
105dfe
	int width,
105dfe
	int height,
20cefb
	global int4 *marks )
105dfe
{
105dfe
	int id = get_global_id(0);
105dfe
	int c = id % width;
20cefb
	marks[id] = (int4)(0, 0, c | (c + 1), 0);
105dfe
}
105dfe
105dfe
kernel void path(
105dfe
	int width,
105dfe
	int height,
20cefb
	global int *marks,
105dfe
	global float2 *points,
f14ea7
	int4 bounds )
105dfe
{
105dfe
	int id = get_global_id(0);
105dfe
	float2 p0 = points[id];
105dfe
	float2 p1 = points[id + 1];
20cefb
	
105dfe
	bool flipx = p1.x < p0.x;
105dfe
	bool flipy = p1.y < p0.y;
20cefb
	if (flipx) { p0.x = (float)width - p0.x; p1.x = (float)width - p1.x; }
20cefb
	if (flipy) { p0.y = (float)height - p0.y; p1.y = (float)height - p1.y; }
105dfe
	float2 d = p1 - p0;
f14ea7
	float kx = d.x/d.y;
f14ea7
	float ky = d.y/d.x;
20cefb
	int w1 = width - 1;
20cefb
	int h1 = height - 1;
5a7afd
	
feaa05
	global int *row;
5a7afd
	float2 px, py, pp1;
5a7afd
	float cover, area;
5a7afd
	int ix, iy, iix;
105dfe
	
105dfe
	while(p0.x != p1.x || p0.y != p1.y) {
5a7afd
		ix = (int)p0.x;
f14ea7
		iy = max((int)p0.y, 0);
f14ea7
		if (iy > h1) return;
105dfe
105dfe
		px.x = (float)(ix + 1);
105dfe
		px.y = p0.y + ky*(px.x - p0.x);
f14ea7
		py.y = (float)(iy + 1);
105dfe
		py.x = p0.x + kx*(py.y - p0.y);
5a7afd
		pp1 = p1;
105dfe
		if (pp1.x > px.x) pp1 = px;
105dfe
		if (pp1.y > py.y) pp1 = py;
105dfe
		
f14ea7
		cover = (pp1.y - p0.y)*ONE_F;
f14ea7
		area = px.x - 0.5f*(p0.x + pp1.x);
f14ea7
		if (flipx) { ix = w1 - ix; area = 1.f - area; }
f14ea7
		if (flipy) { iy = h1 - iy; cover = -cover; }
f14ea7
		ix = clamp(ix, 0, w1);
f14ea7
		
20cefb
		row = marks + 4*iy*width;
feaa05
		atomic_add((global long*)(row + 4*ix), upsample((int)cover, (int)(area*cover)));
20cefb
		//atomic_add(row + 4*ix, (int)(area*cover));
20cefb
		//atomic_add(row + 4*ix + 1, (int)cover);
f14ea7
		
f14ea7
		row += 2;
f14ea7
		iix = (ix & (ix + 1)) - 1;
f14ea7
		while(iix >= bounds.s0) {
f14ea7
			atomic_min(row + 4*iix, ix);
f14ea7
			iix = (iix & (iix + 1)) - 1;
105dfe
		}
105dfe
		
105dfe
		p0 = pp1;
105dfe
	}
105dfe
}
105dfe
82f284
// TODO:
82f284
// different implementations for:
82f284
//   antialiased, transparent, inverted, evenodd contours and combinations (total 16 implementations)
105dfe
kernel void fill(
105dfe
	int width,
20cefb
	global int4 *marks,
105dfe
	global float4 *image,
105dfe
	float4 color,
20cefb
	int2 boundsx )
105dfe
{
20cefb
	int id = width*(int)get_global_id(0);
20cefb
	marks += id;
20cefb
	image += id;
82f284
	global int4 *mark;
82f284
	global float4 *pixel;
105dfe
20cefb
	//prefetch(row       + boundsx.s0, boundsx.s1 - boundsx.s0);
20cefb
	//prefetch(image_row + boundsx.s0, boundsx.s1 - boundsx.s0);
20cefb
82f284
	int4 m;
f14ea7
	float alpha;
f14ea7
	//int ialpha;
20cefb
	int icover = 0, c0 = boundsx.s0, c1 = boundsx.s0;
20cefb
	while(c1 < boundsx.s1) {
82f284
		//ialpha = abs(icover);
5a7afd
		//ialpha = evenodd ? ONE - abs((ialpha % TWO) - ONE)
5a7afd
		//				 : min(ialpha, ONE);
5a7afd
		//if (invert) ialpha = ONE - ialpha;
5a7afd
		if (abs(icover) > HALF)
82f284
			while(c0 < c1)
20cefb
				image[c0++] = color;
20cefb
20cefb
		mark = &marks[c1];
60f47b
		m = *mark;
60f47b
		*mark = (int4)(0, 0, c1 | (c1 + 1), 0); 
60f47b
		
82f284
		//ialpha = abs(mark.x + icover);
5a7afd
		//ialpha = evenodd ? ONE - abs((ialpha % TWO) - ONE)
5a7afd
		//				 : min(ialpha, ONE);
5a7afd
		//if (invert) ialpha = ONE - ialpha;  
82f284
5a7afd
		alpha = (float)abs(m.x + icover)*DIV_ONE_F;
20cefb
		pixel = &image[c1];
20cefb
		*pixel = *pixel*(1.f - alpha) + color*alpha;
105dfe
		
82f284
		icover += m.y;
20cefb
		c0 = c1 + 1;
20cefb
		c1 = m.z;
105dfe
	}
105dfe
}