Blame c++/contourgl/cl/contour-sort.cl

b09c5d
/*
b09c5d
    ......... 2018 Ivan Mahonin
b09c5d
b09c5d
    This program is free software: you can redistribute it and/or modify
b09c5d
    it under the terms of the GNU General Public License as published by
b09c5d
    the Free Software Foundation, either version 3 of the License, or
b09c5d
    (at your option) any later version.
b09c5d
b09c5d
    This program is distributed in the hope that it will be useful,
b09c5d
    but WITHOUT ANY WARRANTY; without even the implied warranty of
b09c5d
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
b09c5d
    GNU General Public License for more details.
b09c5d
b09c5d
    You should have received a copy of the GNU General Public License
b09c5d
    along with this program.  If not, see <http: licenses="" www.gnu.org="">.</http:>
b09c5d
*/
b09c5d
b09c5d
/*
b09c5d
  samples_buffer format:
b09c5d
    Sample count,         // only field 'next_index' is in use and store index of next sample to allocate
b09c5d
    Sample rows[height],  // only fields 'next_index' are in use and store index first sample in the row 
b09c5d
    Sample real_samples[]
b09c5d
*/
b09c5d
b09c5d
b09c5d
typedef struct {
b09c5d
	float4 color;
b09c5d
	int invert;
b09c5d
	int evenodd;
b09c5d
	int align0;
b09c5d
	int align1;
b09c5d
} Path __attribute__((aligned (32)));
b09c5d
b09c5d
typedef struct {
b09c5d
	float2 coord;
b09c5d
	int path_index;
b09c5d
	int align0;
b09c5d
} Point __attribute__((aligned (16)));
b09c5d
b09c5d
typedef struct {
b09c5d
	int path_index;
b09c5d
	int x;
b09c5d
	float area;
b09c5d
	float cover;
b09c5d
	int next_index;
b09c5d
	int align0;
b09c5d
	int align1;
b09c5d
	int align2;
b09c5d
} Sample __attribute__((aligned (32))); 
b09c5d
b09c5d
b09c5d
kernel void reset(global Sample *samples)
b09c5d
{
b09c5d
	int id = get_global_id(0);
b09c5d
	samples[1+id].path_index = -1;
b09c5d
	samples[1+id].next_index = 0;
b09c5d
	if (id == 0) {
b09c5d
		samples->path_index = -1;
b09c5d
		samples->next_index = get_global_size(0) + 1;
b09c5d
	}
b09c5d
}
b09c5d
b09c5d
b09c5d
kernel void paths(
b09c5d
	int width,
b09c5d
	int height,
b09c5d
	global Sample *samples,
b09c5d
	global const Point *points )
b09c5d
{
b09c5d
	const float e = 1e-6f;
b09c5d
	
b09c5d
	// flip order, because we will insert samples into front of linked list 
b09c5d
	int id = get_global_size(0) - get_global_id(0) - 1;
b09c5d
	
b09c5d
	float2 size = (float2)((float)width, (float)height); 
b09c5d
	int w1 = width - 1;
b09c5d
	int h1 = height - 1;
b09c5d
b09c5d
	global int *next_sample = &samples->next_index;
b09c5d
	global Sample *rows = &samples[1];
b09c5d
b09c5d
	Point point0 = points[id];
b09c5d
	Point point1 = points[id+1];
b09c5d
	if (point0.path_index != point1.path_index) return;
b09c5d
	
b09c5d
	int path_index = point0.path_index;
b09c5d
	float2 p0 = point0.coord;
b09c5d
	float2 p1 = point1.coord;
b09c5d
	
b09c5d
	bool flipx = p1.x < p0.x;
b09c5d
	bool flipy = p1.y < p0.y;
b09c5d
	if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; }
b09c5d
	if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; }
b09c5d
	float2 d = p1 - p0;
b09c5d
	float kx = fabs(d.y) < e ? 1e10 : d.x/d.y;
b09c5d
	float ky = fabs(d.x) < e ? 1e10 : d.y/d.x;
b09c5d
	
b09c5d
	while(p0.x != p1.x || p0.y != p1.y) {
b09c5d
		int ix = (int)floor(p0.x + e);
b09c5d
		int iy = (int)floor(p0.y + e);
b09c5d
		if (iy > h1) break;
b09c5d
b09c5d
		float px = (float)(ix + 1);
b09c5d
		float py = (float)(iy + 1);
b09c5d
		float2 pp1 = p1;
b09c5d
		if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); }
b09c5d
		if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); }
b09c5d
		
b09c5d
		if (iy >= 0) {
b09c5d
			// calc values
b09c5d
			Sample sample;
b09c5d
			sample.path_index = path_index;
b09c5d
			sample.cover = pp1.y - p0.y;
b09c5d
			sample.area = px - 0.5f*(p0.x + pp1.x);
b09c5d
			if (flipx) { ix = w1 - ix; sample.area = 1.f - sample.area; }
b09c5d
			if (flipy) { iy = h1 - iy; sample.cover = -sample.cover; }
b09c5d
			sample.area *= sample.cover;
b09c5d
			sample.x = clamp(ix, 0, w1);
b09c5d
			
b09c5d
			// store in buffer
b09c5d
			int sample_index = atomic_inc(next_sample);
b09c5d
			sample.next_index = atomic_xchg(&rows[iy].next_index, sample_index);
b09c5d
			samples[sample_index] = sample;
b09c5d
		}
b09c5d
		
b09c5d
		p0 = pp1;
b09c5d
	}
b09c5d
}
b09c5d
b09c5d
b09c5d
kernel void draw(
b09c5d
	const int width,
b09c5d
	global float4 *image,
b09c5d
	global Sample *samples,
b09c5d
	global Path *paths )
b09c5d
{
b09c5d
	int id = get_global_id(0);
b09c5d
b09c5d
	global float4 *image_row = image + id*width;
b09c5d
	global Sample *first = &samples[1+id];
b09c5d
b09c5d
	int current_index;
b09c5d
	global Sample *prev, *current, *next;
b09c5d
	
b09c5d
	// sort
b09c5d
	bool repeat = true;
b09c5d
	while(repeat) {
b09c5d
		repeat        = false;
b09c5d
		prev          = first;
b09c5d
		current       = &samples[ prev->next_index ];
b09c5d
		while(current->next_index) {
b09c5d
			next = &samples[ current->next_index ];
b09c5d
			if ( current->path_index > next->path_index
b09c5d
			  || (current->path_index == next->path_index && current->x > next->x) )
b09c5d
			{
b09c5d
				// swap
b09c5d
				current_index = prev->next_index;
b09c5d
				prev->next_index = current->next_index;
b09c5d
				current->next_index = next->next_index;
b09c5d
				next->next_index = current_index;
b09c5d
				prev = next;
b09c5d
				repeat = true;
b09c5d
			} else {
b09c5d
				prev = current;
b09c5d
				current = next;
b09c5d
			}
b09c5d
		}
b09c5d
	}
b09c5d
	
b09c5d
	// merge
b09c5d
	current = &samples[ first->next_index ];
b09c5d
	float c = 0.f;
b09c5d
	while(current->next_index) {
b09c5d
		c += current->cover;
b09c5d
		next = &samples[ current->next_index ];
b09c5d
		if (current->path_index == next->path_index && current->x == next->x) {
b09c5d
			current->area  += next->area;
b09c5d
			current->cover += next->cover;
b09c5d
			current->next_index = next->next_index;
b09c5d
		} else {
b09c5d
			current = next;
b09c5d
		}
b09c5d
	}
b09c5d
	
b09c5d
	// draw
b09c5d
	global float4 *pixel, *next_pixel;
b09c5d
	float cover = 0.f;
b09c5d
	float alpha;
b09c5d
	int next_index = first->next_index;
b09c5d
	while(next_index) {
b09c5d
		current = &samples[ next_index ];
b09c5d
		next_index = current->next_index;
b09c5d
		
b09c5d
		// draw current
b09c5d
		float4 color = paths[ current->path_index ].color;
b09c5d
		float alpha = min(1.f, fabs(cover + current->area))*color.w;
b09c5d
		cover += current->cover;
b09c5d
b09c5d
		pixel = &image_row[current->x];
b09c5d
		*pixel = *pixel*(1.f - alpha) + color*alpha; // TODO: valid composite blending
b09c5d
		++pixel;
b09c5d
		
b09c5d
		// draw span: current <--> next
b09c5d
		next_pixel = fabs(cover) > 0.5f && current->path_index == samples[next_index].path_index
b09c5d
				   ? &image_row[samples[next_index].x] : pixel;
b09c5d
		while(pixel < next_pixel) {
b09c5d
			*pixel = *pixel*(1.f - color.w) + color*color.w; // TODO: valid composite blending
b09c5d
			++pixel;
b09c5d
		}
b09c5d
b09c5d
		if (current->path_index != samples[next_index].path_index) cover = 0.f;
b09c5d
	}
b09c5d
}
b09c5d