/* ......... 2018 Ivan Mahonin This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* samples_buffer format: Sample count, // only field 'next_index' is in use and store index of next sample to allocate Sample rows[height], // only fields 'next_index' are in use and store index first sample in the row Sample real_samples[] */ typedef struct { float4 color; int invert; int evenodd; int align0; int align1; } Path __attribute__((aligned (32))); typedef struct { float2 coord; int path_index; int align0; } Point __attribute__((aligned (16))); typedef struct { int path_index; int x; float area; float cover; int next_index; int align0; int align1; int align2; } Sample __attribute__((aligned (32))); kernel void reset(global Sample *samples) { int id = get_global_id(0); samples[1+id].path_index = -1; samples[1+id].next_index = 0; if (id == 0) { samples->path_index = -1; samples->next_index = get_global_size(0) + 1; } } kernel void paths( int width, int height, global Sample *samples, global const Point *points ) { const float e = 1e-6f; // flip order, because we will insert samples into front of linked list int id = get_global_size(0) - get_global_id(0) - 1; float2 size = (float2)((float)width, (float)height); int w1 = width - 1; int h1 = height - 1; global int *next_sample = &samples->next_index; global Sample *rows = &samples[1]; Point point0 = points[id]; Point point1 = points[id+1]; if (point0.path_index != point1.path_index) return; int path_index = point0.path_index; float2 p0 = point0.coord; float2 p1 = point1.coord; bool flipx = p1.x < p0.x; bool flipy = p1.y < p0.y; if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; } if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; } float2 d = p1 - p0; float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; while(p0.x != p1.x || p0.y != p1.y) { int ix = (int)floor(p0.x + e); int iy = (int)floor(p0.y + e); if (iy > h1) break; float px = (float)(ix + 1); float py = (float)(iy + 1); float2 pp1 = p1; if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); } if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); } if (iy >= 0) { // calc values Sample sample; sample.path_index = path_index; sample.cover = pp1.y - p0.y; sample.area = px - 0.5f*(p0.x + pp1.x); if (flipx) { ix = w1 - ix; sample.area = 1.f - sample.area; } if (flipy) { iy = h1 - iy; sample.cover = -sample.cover; } sample.area *= sample.cover; sample.x = clamp(ix, 0, w1); // store in buffer int sample_index = atomic_inc(next_sample); sample.next_index = atomic_xchg(&rows[iy].next_index, sample_index); samples[sample_index] = sample; } p0 = pp1; } } kernel void draw( const int width, global float4 *image, global Sample *samples, global Path *paths ) { int id = get_global_id(0); global float4 *image_row = image + id*width; global Sample *first = &samples[1+id]; int current_index; global Sample *prev, *current, *next; // sort bool repeat = true; while(repeat) { repeat = false; prev = first; current = &samples[ prev->next_index ]; while(current->next_index) { next = &samples[ current->next_index ]; if ( current->path_index > next->path_index || (current->path_index == next->path_index && current->x > next->x) ) { // swap current_index = prev->next_index; prev->next_index = current->next_index; current->next_index = next->next_index; next->next_index = current_index; prev = next; repeat = true; } else { prev = current; current = next; } } } // merge current = &samples[ first->next_index ]; float c = 0.f; while(current->next_index) { c += current->cover; next = &samples[ current->next_index ]; if (current->path_index == next->path_index && current->x == next->x) { current->area += next->area; current->cover += next->cover; current->next_index = next->next_index; } else { current = next; } } // draw global float4 *pixel, *next_pixel; float cover = 0.f; float alpha; int next_index = first->next_index; while(next_index) { current = &samples[ next_index ]; next_index = current->next_index; // draw current float4 color = paths[ current->path_index ].color; float alpha = min(1.f, fabs(cover + current->area))*color.w; cover += current->cover; pixel = &image_row[current->x]; *pixel = *pixel*(1.f - alpha) + color*alpha; // TODO: valid composite blending ++pixel; // draw span: current <--> next next_pixel = fabs(cover) > 0.5f && current->path_index == samples[next_index].path_index ? &image_row[samples[next_index].x] : pixel; while(pixel < next_pixel) { *pixel = *pixel*(1.f - color.w) + color*color.w; // TODO: valid composite blending ++pixel; } if (current->path_index != samples[next_index].path_index) cover = 0.f; } }