diff --git a/c++/contourgl/cl/contour-base.cl b/c++/contourgl/cl/contour-base.cl new file mode 100644 index 0000000..d854ce3 --- /dev/null +++ b/c++/contourgl/cl/contour-base.cl @@ -0,0 +1,158 @@ +/* + ......... 2015 Ivan Mahonin + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +kernel void clear( + int width, + int height, + global int4 *mark_buffer ) +{ + int id = get_global_id(0); + if (id >= width*height) return; + int c = id % width; + int4 v = { 0, 0, c | (c + 1), 0 }; + mark_buffer[id] = v; +} + +kernel void path( + int width, + int height, + global int *mark_buffer, + global float2 *points, + int begin, + int end ) +{ + const float e = 1e-6f; + int id = get_global_id(0); + if (id >= end) return; + + float2 s = { (float)width, (float)height }; + int w1 = width - 1; + int h1 = height - 1; + + float2 p0 = points[id]; + float2 p1 = points[id + 1]; + bool flipx = p1.x < p0.x; + bool flipy = p1.y < p0.y; + if (flipx) { p0.x = s.x - p0.x; p1.x = s.x - p1.x; } + if (flipy) { p0.y = s.y - p0.y; p1.y = s.y - p1.y; } + float2 d = p1 - p0; + float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; + float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; + + while(p0.x != p1.x || p0.y != p1.y) { + int ix = (int)floor(p0.x + e); + int iy = (int)floor(p0.y + e); + if (iy > h1) break; + + float2 px, py; + px.x = (float)(ix + 1); + px.y = p0.y + ky*(px.x - p0.x); + py.y = max((float)(iy + 1), 0.f); + py.x = p0.x + kx*(py.y - p0.y); + float2 pp1 = p1; + if (pp1.x > px.x) pp1 = px; + if (pp1.y > py.y) pp1 = py; + + if (iy >= 0) { + float cover = pp1.y - p0.y; + float area = px.x - 0.5f*(p0.x + pp1.x); + if (flipx) { ix = w1 - ix; area = 1.f - area; } + if (flipy) { iy = h1 - iy; cover = -cover; } + ix = clamp(ix, 0, w1); + global int *row = mark_buffer + 4*iy*width; + global int *mark = row + 4*ix; + atomic_add(mark, (int)round(area*cover*65536.f)); + atomic_add(mark + 1, (int)round(cover*65536.f)); + int iix = (ix & (ix + 1)) - 1; + while(iix > 0) { + atomic_min(row + 4*iix + 2, ix); + iix = (iix & (iix + 1)) - 1; + } + } + + p0 = pp1; + } +} + +kernel void fill( + int width, + int height, + global int4 *mark_buffer, + global float4 *image, + float4 color, + int invert, + int evenodd ) +{ + const int scale = 65536; + const int scale2 = 2*scale; + const int scale05 = scale/2; + + int id = get_global_id(0); + if (id >= height) return; + int w1 = width - 1; + global int4 *row = mark_buffer + id*width; + global float4 *image_row = image + id*width; + + int cover = 0; + int ialpha; + int2 c0 = { 0, id }; + int2 c1 = c0; + int4 empty_mark = { 0, 0, 0, 0 }; + while(c0.x < w1) { + int4 mark; + while(c1.x < width) { + mark = row[c1.x]; + empty_mark.z = c1.x | (c1.x + 1); + row[c1.x] = empty_mark; + if (mark.x || mark.y) break; + c1.x = min(mark.z, width); + } + + ialpha = abs(cover); + ialpha = evenodd ? scale - abs((ialpha % scale2) - scale) + : min(ialpha, scale); + if (invert) ialpha = scale - ialpha; + if (ialpha > scale05) { + while(c0.x < c1.x) { + image_row[c0.x] = color; + ++c0.x; + } + } + + if (c1.x >= width) return; + + ialpha = abs(mark.x + cover); + ialpha = evenodd ? scale - abs((ialpha % scale2) - scale) + : min(ialpha, scale); + if (invert) ialpha = scale - ialpha; + if (ialpha > 4) { + float alpha = (float)ialpha/(float)scale; + float alpha_inv = 1.f - alpha; + global float4 *pixel = &image_row[c1.x]; + float4 cl = *pixel; + cl.x = cl.x*alpha_inv + color.x*alpha; + cl.y = cl.y*alpha_inv + color.y*alpha; + cl.z = cl.z*alpha_inv + color.z*alpha; + cl.w = min(cl.w + alpha, 1.f); + *pixel = cl; + } + + c0.x = c1.x + 1; + c1.x = min(mark.z, width); + cover += mark.y; + } +} diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index d4cda7a..bb079af 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -418,3 +418,213 @@ void ClRender2::wait() { } +// ------------------------------------------------ + + +ClRender3::ClRender3(ClContext &cl): + cl(cl), + contour_program(), + contour_clear_kernel(), + contour_path_kernel(), + contour_fill_kernel(), + surface(), + points_buffer(), + mark_buffer(), + surface_image(), + prev_event() +{ + contour_program = cl.load_program("contour-base.cl"); + assert(contour_program); + + contour_clear_kernel = clCreateKernel(contour_program, "clear", &cl.err); + assert(!cl.err); + assert(contour_clear_kernel); + + contour_path_kernel = clCreateKernel(contour_program, "path", &cl.err); + assert(!cl.err); + assert(contour_path_kernel); + + contour_fill_kernel = clCreateKernel(contour_program, "fill", &cl.err); + assert(!cl.err); + assert(contour_fill_kernel); +} + +ClRender3::~ClRender3() { + send_points(NULL, 0); + send_surface(NULL); + + clReleaseKernel(contour_path_kernel); + clReleaseKernel(contour_fill_kernel); + clReleaseProgram(contour_program); +} + +void ClRender3::send_surface(Surface *surface) { + if (this->surface) { + wait(); + cl.err |= clReleaseMemObject(surface_image); + assert(!cl.err); + surface_image = NULL; + } + + this->surface = surface; + + if (this->surface) { + //Measure t("ClRender::send_surface"); + + int zero_mark[4] = { }; + + surface_image = clCreateBuffer( + cl.context, CL_MEM_READ_WRITE, + surface->count()*sizeof(Color), NULL, + &cl.err ); + assert(!cl.err); + assert(surface_image); + + mark_buffer = clCreateBuffer( + cl.context, CL_MEM_READ_WRITE, + surface->count()*sizeof(zero_mark), NULL, + &cl.err ); + assert(!cl.err); + assert(mark_buffer); + + cl.err |= clEnqueueWriteBuffer( + cl.queue, surface_image, false, + 0, surface->count()*sizeof(Color), surface->data, + 0, NULL, NULL ); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_clear_kernel, 0, sizeof(surface->width), &surface->width); + cl.err |= clSetKernelArg(contour_clear_kernel, 1, sizeof(surface->height), &surface->height); + cl.err |= clSetKernelArg(contour_clear_kernel, 2, sizeof(mark_buffer), &mark_buffer); + assert(!cl.err); + + size_t count = surface->count(); + cl.err |= clEnqueueNDRangeKernel( + cl.queue, contour_clear_kernel, + 1, NULL, &count, NULL, + 0, NULL, NULL ); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_path_kernel, 0, sizeof(surface->width), &surface->width); + cl.err |= clSetKernelArg(contour_path_kernel, 1, sizeof(surface->height), &surface->height); + cl.err |= clSetKernelArg(contour_path_kernel, 2, sizeof(mark_buffer), &mark_buffer); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_fill_kernel, 0, sizeof(surface->width), &surface->width); + cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(surface->height), &surface->height); + cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(mark_buffer), &mark_buffer); + cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(surface_image), &surface_image); + assert(!cl.err); + + wait(); + } +} + +Surface* ClRender3::receive_surface() { + if (surface) { + //Measure t("ClRender::receive_surface"); + + cl.err |= clEnqueueReadBuffer( + cl.queue, surface_image, CL_FALSE, + 0, surface->count()*sizeof(Color), surface->data, + prev_event ? 1 : 0, + prev_event ? &prev_event : NULL, + NULL ); + assert(!cl.err); + + wait(); + } + return surface; +} + +void ClRender3::send_points(const vec2f *points, int count) { + if (points_buffer) { + wait(); + cl.err |= clReleaseMemObject(points_buffer); + assert(!cl.err); + points_buffer = NULL; + } + + if (points && count > 0) { + points_buffer = clCreateBuffer( + cl.context, CL_MEM_READ_ONLY, + count*sizeof(vec2f), NULL, + &cl.err ); + assert(!cl.err); + assert(points_buffer); + + cl.err |= clEnqueueWriteBuffer( + cl.queue, points_buffer, false, + 0, count*sizeof(vec2f), points, + 0, NULL, NULL ); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_path_kernel, 3, sizeof(points_buffer), &points_buffer); + assert(!cl.err); + + wait(); + } +} + +void ClRender3::draw(const Path &path) { + //Measure t("ClRender::contour"); + + assert(surface); + assert(points_buffer); + + int miny = max(0, path.miny); + int maxy = min(surface->height, path.maxy); + int invert_int = path.invert ? 1 : 0; + int evenodd_int = path.evenodd ? 1 : 0; + if (miny >= maxy || path.begin >= path.end) return; + + cl.err |= clSetKernelArg(contour_path_kernel, 4, sizeof(path.begin), &path.begin); + cl.err |= clSetKernelArg(contour_path_kernel, 5, sizeof(path.begin), &path.end); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(maxy), &maxy); // restrict height + cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(path.color), &path.color); + cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(invert_int), &invert_int); + cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(evenodd_int), &evenodd_int); + assert(!cl.err); + + + cl_event path_event; + + size_t group_size = 1; + + size_t offset = path.begin; + size_t count = ((path.end - path.begin - 1)/group_size + 1)*group_size; + cl.err |= clEnqueueNDRangeKernel( + cl.queue, + contour_path_kernel, + 1, + &offset, + &count, + NULL,//&group_size, + prev_event ? 1 : 0, + prev_event ? &prev_event : NULL, + &path_event ); + assert(!cl.err); + + offset = miny; + count = ((maxy - miny - 1)/group_size + 1)*group_size; + cl.err |= clEnqueueNDRangeKernel( + cl.queue, + contour_fill_kernel, + 1, + &offset, + &count, + NULL,//&group_size, + 1, + &path_event, + &prev_event ); + assert(!cl.err); +} + +void ClRender3::wait() { + cl.err |= clFinish(cl.queue); + assert(!cl.err); + prev_event = NULL; +} + diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h index fd32aa5..f676fa2 100644 --- a/c++/contourgl/clrender.h +++ b/c++/contourgl/clrender.h @@ -99,4 +99,43 @@ public: }; +class ClRender3 { +public: + struct Path { + int miny; + int maxy; + int begin; + int end; + Color color; + bool invert; + bool evenodd; + }; + +private: + ClContext &cl; + cl_program contour_program; + cl_kernel contour_clear_kernel; + cl_kernel contour_path_kernel; + cl_kernel contour_fill_kernel; + + Surface *surface; + cl_mem points_buffer; + cl_mem mark_buffer; + cl_mem surface_image; + cl_event prev_event; + +public: + ClRender3(ClContext &cl); + ~ClRender3(); + + void send_surface(Surface *surface); + Surface* receive_surface(); + + void send_points(const vec2f *points, int count); + + void draw(const Path &path); + void wait(); +}; + + #endif diff --git a/c++/contourgl/contourgl.cpp b/c++/contourgl/contourgl.cpp index c1c1596..e2063dd 100644 --- a/c++/contourgl/contourgl.cpp +++ b/c++/contourgl/contourgl.cpp @@ -66,6 +66,9 @@ int main() { { Surface surface(width, height); Measure t("test_lines_cl2.tga", surface, true); Test::test_cl2(e, data, surface); } + { Surface surface(width, height); + Measure t("test_lines_cl3.tga", surface, true); + Test::test_cl3(e, data, surface); } } { Measure t("test_lines_downgrade", true); Test::downgrade(data, datalow); } @@ -88,6 +91,9 @@ int main() { { Surface surface(width, height); Measure t("test_lineslow_cl2.tga", surface, true); Test::test_cl2(e, datalow, surface); } + { Surface surface(width, height); + Measure t("test_lineslow_cl3.tga", surface, true); + Test::test_cl3(e, datalow, surface); } } } diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index bf221c6..634b79e 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -351,3 +351,66 @@ void Test::test_cl2(Environment &e, Data &data, Surface &surface) { } clr.receive_surface(); } + +void Test::test_cl3(Environment &e, Data &data, Surface &surface) { + // prepare data + vector paths; + vector points; + paths.reserve(data.size()); + for(Data::const_iterator i = data.begin(); i != data.end(); ++i) { + if (!i->contour.get_chunks().empty()) { + ClRender3::Path path = {}; + path.color = i->color; + path.invert = i->invert; + path.evenodd = i->evenodd; + + path.miny = path.maxy = (int)floor(i->contour.get_chunks().front().p1.y); + path.begin = (int)points.size(); + points.reserve(points.size() + i->contour.get_chunks().size() + 1); + for(Contour::ChunkList::const_iterator j = i->contour.get_chunks().begin(); j != i->contour.get_chunks().end(); ++j) { + int y = (int)floor(j->p1.y); + if (path.miny > y) path.miny = y; + if (path.maxy < y) path.maxy = y; + points.push_back(vec2f(j->p1)); + } + path.end = (int)points.size(); + points.push_back( points[path.begin] ); + ++path.maxy; + + paths.push_back(path); + } + } + + // draw + + ClRender3 clr(e.cl); + + // warm-up + { + clr.send_surface(&surface); clr.send_points(&points.front(), (int)points.size()); + for(int ii = 0; ii < 100; ++ii) + for(vector::const_iterator i = paths.begin(); i != paths.end(); ++i) + clr.draw(*i); + clr.wait(); clr.send_points(NULL, 0); clr.send_surface(NULL); + } + + // measure + /*{ + clr.send_surface(&surface); clr.send_points(&points.front(), (int)points.size()); + for(int ii = 0; ii < 100; ++ii) + for(vector::const_iterator i = paths.begin(); i != paths.end(); ++i) + clr.draw(*i); + clr.wait(); clr.send_points(NULL, 0); clr.send_surface(NULL); + }*/ + + // actual task + clr.send_surface(&surface); + clr.send_points(&points.front(), (int)points.size()); + { + Measure t("render"); + for(vector::const_iterator i = paths.begin(); i != paths.end(); ++i) + clr.draw(*i); + clr.wait(); + } + clr.receive_surface(); +} diff --git a/c++/contourgl/test.h b/c++/contourgl/test.h index b8437ff..6d3d3dd 100644 --- a/c++/contourgl/test.h +++ b/c++/contourgl/test.h @@ -55,6 +55,7 @@ public: static void test_sw(Environment &e, Data &data, Surface &surface); static void test_cl(Environment &e, Data &data, Surface &surface); static void test_cl2(Environment &e, Data &data, Surface &surface); + static void test_cl3(Environment &e, Data &data, Surface &surface); }; #endif