diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl index 68cfaf6..a7f678a 100644 --- a/c++/contourgl/cl/contour.cl +++ b/c++/contourgl/cl/contour.cl @@ -16,13 +16,10 @@ */ kernel void clear( - global int4 *mark_buffer, - int width ) + global int2 *mark_buffer ) { - size_t id = get_global_id(0); - int c = id % width; - int4 v = { 0, 0, c | (c + 1), 0 }; - mark_buffer[id] = v; + const int2 v = { 0, 0 }; + mark_buffer[ get_global_id(0) ] = v; } kernel void path( @@ -63,20 +60,17 @@ kernel void path( if (pp1.y > py.y) pp1 = py; if (iy >= 0) { + // calc values float cover = pp1.y - p0.y; float area = px.x - 0.5f*(p0.x + pp1.x); if (flipx) { ix = w1 - ix; area = 1.f - area; } if (flipy) { iy = h1 - iy; cover = -cover; } ix = clamp(ix, 0, w1); - global int *row = mark_buffer + 4*iy*width; - global int *mark = row + 4*ix; - atomic_add(mark, (int)round(area*cover*65536.f)); + + // store in buffer + global int *mark = mark_buffer + (iy*width + ix)*2; + atomic_add(mark, (int)round(area*cover*65536.f)); atomic_add(mark + 1, (int)round(cover*65536.f)); - int iix = (ix & (ix + 1)) - 1; - while(iix > 0) { - atomic_min(row + 4*iix + 2, ix); - iix = (iix & (iix + 1)) - 1; - } } p0 = pp1; @@ -85,66 +79,38 @@ kernel void path( kernel void fill( int width, - global int4 *mark_buffer, + global int2 *mark_buffer, read_only image2d_t surface_read_image, write_only image2d_t surface_write_image, + int minx, + int maxx, float4 color, int invert, int evenodd ) { - const int scale = 65536; - const int scale2 = 2*scale; - const int scale05 = scale/2; - size_t id = get_global_id(0); - int w1 = width - 1; - global int4 *row = mark_buffer + id*width; + global int2 *row = mark_buffer + id*width; + const int2 empty_mark = { 0, 0 }; - int cover = 0; - int ialpha; - int2 c0 = { 0, id }; - int2 c1 = c0; - int4 empty_mark = { 0, 0, 0, 0 }; - while(c0.x < w1) { - int4 mark; - while(c1.x < width) { - mark = row[c1.x]; - empty_mark.z = c1.x | (c1.x + 1); - row[c1.x] = empty_mark; - if (mark.x || mark.y) break; - c1.x = min(mark.z, width); - } + float cover = 0.f; + for(int2 c = {minx, id}; c.x < maxx; ++c.x) { + // read mark (x: alpha, y: cover) + global int2 *mark = row + c.x; + float alpha = fabs(cover + mark->x/65536.f); + //if (evenodd) alpha = 1.f - fabs(fmod(alpha, 2.f) - 1.f); + cover += mark->y/65536.f; + *mark = empty_mark; - ialpha = abs(cover); - ialpha = evenodd ? scale - abs((ialpha % scale2) - scale) - : min(ialpha, scale); - //if (invert) ialpha = scale - ialpha; - if (ialpha > scale05) { - while(c0.x < c1.x) { - write_imagef(surface_write_image, c0, color); - ++c0.x; - } - } - - if (c1.x < width) { - ialpha = abs(mark.x + cover); - ialpha = evenodd ? scale - abs((ialpha % scale2) - scale) - : min(ialpha, scale); - //if (invert) ialpha = scale - ialpha; - if (ialpha > 4) { - float alpha = (float)ialpha/(float)scale; - float alpha_inv = 1.f - alpha; - float4 cl = read_imagef(surface_read_image, c1); - cl.x = cl.x*alpha_inv + color.x*alpha; - cl.y = cl.y*alpha_inv + color.y*alpha; - cl.z = cl.z*alpha_inv + color.z*alpha; - cl.w = min(cl.w + alpha, 1.f); - write_imagef(surface_write_image, c1, cl); - } - } + //if (invert) alpha = 1.f - alpha; + alpha *= color.w; - c0.x = c1.x + 1; - c1.x = min(mark.z, width); - cover += mark.y; + // write color + float alpha_inv = 1.f - alpha; + float4 cl = read_imagef(surface_read_image, c); + cl.x = cl.x*alpha_inv + color.x*alpha; + cl.y = cl.y*alpha_inv + color.y*alpha; + cl.z = cl.z*alpha_inv + color.z*alpha; + cl.w = min(cl.w + alpha, 1.f); + write_imagef(surface_write_image, c, cl); } } diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index 447aab2..a3435f3 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -78,12 +78,11 @@ void ClRender::send_surface(Surface *surface) { mark_buffer = clCreateBuffer( cl.context, CL_MEM_READ_WRITE, - surface->count()*sizeof(cl_int4), NULL, + surface->count()*sizeof(cl_int2), NULL, NULL ); assert(mark_buffer); cl.err |= clSetKernelArg(contour_clear_kernel, 0, sizeof(mark_buffer), &mark_buffer); - cl.err |= clSetKernelArg(contour_clear_kernel, 1, sizeof(width), &width); assert(!cl.err); size_t pixels_count = (size_t)surface->count(); @@ -194,7 +193,7 @@ void ClRender::send_path(const vec2f *path, int count) { } } -void ClRender::path(int start, int count, const Color &color, bool invert, bool evenodd) { +void ClRender::path(int start, int count, const Color &color, bool invert, bool evenodd, ContextRect bounds) { //Measure t("ClRender::contour"); if (count <= 1) return; @@ -202,9 +201,11 @@ void ClRender::path(int start, int count, const Color &color, bool invert, bool // kernel args int iinvert = invert, ievenodd = evenodd; - cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(color), &color); - cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(int), &iinvert); - cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(int), &ievenodd); + cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(int), &bounds.minx); + cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(int), &bounds.maxx); + cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(color), &color); + cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(int), &iinvert); + cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &ievenodd); assert(!cl.err); // build marks @@ -225,13 +226,14 @@ void ClRender::path(int start, int count, const Color &color, bool invert, bool assert(!cl.err); // fill - size_t sheight = surface->height; + sstart = bounds.miny; + scount = bounds.maxy - bounds.miny; cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_fill_kernel, 1, - NULL, - &sheight, + &sstart, + &scount, NULL, 1, &path_event, diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h index 649c89b..ab19a17 100644 --- a/c++/contourgl/clrender.h +++ b/c++/contourgl/clrender.h @@ -47,7 +47,7 @@ public: void send_surface(Surface *surface); Surface* receive_surface(); void send_path(const vec2f *path, int count); - void path(int start, int count, const Color &color, bool invert, bool evenodd); + void path(int start, int count, const Color &color, bool invert, bool evenodd, ContextRect bounds); void wait(); }; diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index 03b134a..cda0d58 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -254,12 +254,30 @@ void Test::test_cl(Environment &e, Data &data, Surface &surface) { vector paths; vector starts(data.size()); vector counts(data.size()); + vector bounds(data.size()); for(int i = 0; i < (int)data.size(); ++i) { starts[i] = paths.size(); - for(Contour::ChunkList::const_iterator j = data[i].contour.get_chunks().begin(); j != data[i].contour.get_chunks().end(); ++j) - paths.push_back(vec2f(j->p1)); + if (!data[i].contour.get_chunks().empty()) { + Vector v = data[i].contour.get_chunks().front().p1; + bounds[i].minx = (int)floor( v.x ) - 2; + bounds[i].miny = (int)floor( v.y ) - 2; + bounds[i].maxx = (int)ceil ( v.x ) + 2; + bounds[i].maxy = (int)ceil ( v.y ) + 2; + for(Contour::ChunkList::const_iterator j = data[i].contour.get_chunks().begin(); j != data[i].contour.get_chunks().end(); ++j) { + paths.push_back(vec2f(j->p1)); + bounds[i].minx = std::min( bounds[i].minx, (int)floor( j->p1.x ) - 2 ); + bounds[i].miny = std::min( bounds[i].miny, (int)floor( j->p1.y ) - 2 ); + bounds[i].maxx = std::max( bounds[i].maxx, (int)ceil ( j->p1.x ) + 2 ); + bounds[i].maxy = std::max( bounds[i].maxy, (int)ceil ( j->p1.y ) + 2 ); + } + bounds[i].minx = std::max(0, std::min(surface.width, bounds[i].minx)); + bounds[i].miny = std::max(0, std::min(surface.height, bounds[i].miny)); + bounds[i].maxx = std::max(bounds[i].minx, std::min(surface.width, bounds[i].maxx)); + bounds[i].maxy = std::max(bounds[i].miny, std::min(surface.height, bounds[i].maxy)); + } paths.push_back(paths[starts[i]]); counts[i] = paths.size() - starts[i]; + paths.push_back(paths.front()); } ClRender clr(e.cl); @@ -268,8 +286,16 @@ void Test::test_cl(Environment &e, Data &data, Surface &surface) { { Measure t("render"); + + // all in one (single color) + //ContextRect bounds; + //bounds.maxx = surface.width; + //bounds.maxy = surface.height; + //clr.path(0, (int)paths.size(), Color(0.f, 0.f, 1.f, 1.f), false, false, bounds); + + // separete path (valid colors) for(int i = 0; i < (int)data.size(); ++i) - clr.path(starts[i], counts[i], data[i].color, data[i].invert, data[i].evenodd); + clr.path(starts[i], counts[i], data[i].color, data[i].invert, data[i].evenodd, bounds[i]); clr.wait(); }