Blob Blame Raw
/*
    ......... 2018 Ivan Mahonin

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/*
  samples_buffer format:
    Sample count,         // only field 'next_index' is in use and store index of next sample to allocate
    Sample rows[height],  // only fields 'next_index' are in use and store index first sample in the row 
    Sample real_samples[]
*/


typedef struct {
	float4 color;
	int invert;
	int evenodd;
	int align0;
	int align1;
} Path __attribute__((aligned (32)));

typedef struct {
	float2 coord;
	int path_index;
	int align0;
} Point __attribute__((aligned (16)));

typedef struct {
	int path_index;
	int x;
	float area;
	float cover;
	int next_index;
	int align0;
	int align1;
	int align2;
} Sample __attribute__((aligned (32))); 


kernel void reset(global Sample *samples)
{
	int id = get_global_id(0);
	samples[1+id].path_index = -1;
	samples[1+id].next_index = 0;
	if (id == 0) {
		samples->path_index = -1;
		samples->next_index = get_global_size(0) + 1;
	}
}


kernel void paths(
	int width,
	int height,
	global Sample *samples,
	global const Point *points )
{
	const float e = 1e-6f;
	
	// flip order, because we will insert samples into front of linked list 
	int id = get_global_size(0) - get_global_id(0) - 1;
	
	float2 size = (float2)((float)width, (float)height); 
	int w1 = width - 1;
	int h1 = height - 1;

	global int *next_sample = &samples->next_index;
	global Sample *rows = &samples[1];

	Point point0 = points[id];
	Point point1 = points[id+1];
	if (point0.path_index != point1.path_index) return;
	
	int path_index = point0.path_index;
	float2 p0 = point0.coord;
	float2 p1 = point1.coord;
	
	bool flipx = p1.x < p0.x;
	bool flipy = p1.y < p0.y;
	if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; }
	if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; }
	float2 d = p1 - p0;
	float kx = fabs(d.y) < e ? 1e10 : d.x/d.y;
	float ky = fabs(d.x) < e ? 1e10 : d.y/d.x;
	
	while(p0.x != p1.x || p0.y != p1.y) {
		int ix = (int)floor(p0.x + e);
		int iy = (int)floor(p0.y + e);
		if (iy > h1) break;

		float px = (float)(ix + 1);
		float py = (float)(iy + 1);
		float2 pp1 = p1;
		if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); }
		if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); }
		
		if (iy >= 0) {
			// calc values
			Sample sample;
			sample.path_index = path_index;
			sample.cover = pp1.y - p0.y;
			sample.area = px - 0.5f*(p0.x + pp1.x);
			if (flipx) { ix = w1 - ix; sample.area = 1.f - sample.area; }
			if (flipy) { iy = h1 - iy; sample.cover = -sample.cover; }
			sample.area *= sample.cover;
			sample.x = clamp(ix, 0, w1);
			
			// store in buffer
			int sample_index = atomic_inc(next_sample);
			sample.next_index = atomic_xchg(&rows[iy].next_index, sample_index);
			samples[sample_index] = sample;
		}
		
		p0 = pp1;
	}
}


kernel void draw(
	const int width,
	global float4 *image,
	global Sample *samples,
	global Path *paths )
{
	int id = get_global_id(0);

	global float4 *image_row = image + id*width;
	global Sample *first = &samples[1+id];

	int current_index;
	global Sample *prev, *current, *next;
	
	// sort
	bool repeat = true;
	while(repeat) {
		repeat        = false;
		prev          = first;
		current       = &samples[ prev->next_index ];
		while(current->next_index) {
			next = &samples[ current->next_index ];
			if ( current->path_index > next->path_index
			  || (current->path_index == next->path_index && current->x > next->x) )
			{
				// swap
				current_index = prev->next_index;
				prev->next_index = current->next_index;
				current->next_index = next->next_index;
				next->next_index = current_index;
				prev = next;
				repeat = true;
			} else {
				prev = current;
				current = next;
			}
		}
	}
	
	// merge
	current = &samples[ first->next_index ];
	float c = 0.f;
	while(current->next_index) {
		c += current->cover;
		next = &samples[ current->next_index ];
		if (current->path_index == next->path_index && current->x == next->x) {
			current->area  += next->area;
			current->cover += next->cover;
			current->next_index = next->next_index;
		} else {
			current = next;
		}
	}
	
	// draw
	global float4 *pixel, *next_pixel;
	float cover = 0.f;
	float alpha;
	int next_index = first->next_index;
	while(next_index) {
		current = &samples[ next_index ];
		next_index = current->next_index;
		
		// draw current
		float4 color = paths[ current->path_index ].color;
		float alpha = min(1.f, fabs(cover + current->area))*color.w;
		cover += current->cover;

		pixel = &image_row[current->x];
		*pixel = *pixel*(1.f - alpha) + color*alpha; // TODO: valid composite blending
		++pixel;
		
		// draw span: current <--> next
		next_pixel = fabs(cover) > 0.5f && current->path_index == samples[next_index].path_index
				   ? &image_row[samples[next_index].x] : pixel;
		while(pixel < next_pixel) {
			*pixel = *pixel*(1.f - color.w) + color*color.w; // TODO: valid composite blending
			++pixel;
		}

		if (current->path_index != samples[next_index].path_index) cover = 0.f;
	}
}