From 013f0c90a571ab3097d2be1bf16504f2ed523c06 Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Jul 11 2018 16:09:05 +0000 Subject: contourgl: batch rendering via OpenCL --- diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl index a7f678a..5f6dffc 100644 --- a/c++/contourgl/cl/contour.cl +++ b/c++/contourgl/cl/contour.cl @@ -15,102 +15,161 @@ along with this program. If not, see . */ -kernel void clear( - global int2 *mark_buffer ) -{ - const int2 v = { 0, 0 }; - mark_buffer[ get_global_id(0) ] = v; -} +// paths format: +// { +// int count, +// paths: [ +// { +// int point_count, +// int flags, +// float4 color, +// points: [ float2, ... ] +// }, +// ... +// ] +// } + -kernel void path( - int width, - int height, +kernel void draw( + global char *paths_buffer, global int *mark_buffer, - global float2 *path ) + read_only image2d_t read_image, + write_only image2d_t write_image ) // assumed that read and write image is the same object { const float e = 1e-6f; - size_t id = get_global_id(0); + + int id = (int)get_global_id(0); + int count = (int)get_global_size(0); + + int paths_count = *(global int *)paths_buffer; + global char *paths = paths_buffer + sizeof(int); - float2 s = { (float)width, (float)height }; + int width = get_image_width(write_image); + int height = get_image_height(write_image); + int pixels_count = width*height; + float2 size = (float2)((float)width, (float)height); int w1 = width - 1; int h1 = height - 1; - - float2 p0 = path[id]; - float2 p1 = path[id + 1]; - bool flipx = p1.x < p0.x; - bool flipy = p1.y < p0.y; - if (flipx) { p0.x = s.x - p0.x; p1.x = s.x - p1.x; } - if (flipy) { p0.y = s.y - p0.y; p1.y = s.y - p1.y; } - float2 d = p1 - p0; - float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; - float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; - while(p0.x != p1.x || p0.y != p1.y) { - int ix = (int)floor(p0.x + e); - int iy = (int)floor(p0.y + e); - if (iy > h1) break; - - float2 px, py; - px.x = (float)(ix + 1); - px.y = p0.y + ky*(px.x - p0.x); - py.y = max((float)(iy + 1), 0.f); - py.x = p0.x + kx*(py.y - p0.y); - float2 pp1 = p1; - if (pp1.x > px.x) pp1 = px; - if (pp1.y > py.y) pp1 = py; + global int *bound_minx = (global int *)(mark_buffer + 2*pixels_count); + global int *bound_miny = bound_minx + 1; + global int *bound_maxx = bound_minx + 2; + global int *bound_maxy = bound_minx + 3; + + // clear marks + for(int i = id; i < 2*pixels_count; i += count) + mark_buffer[i] = 0; + barrier(CLK_LOCAL_MEM_FENCE); + + // draw paths + for(int p = 0; p < paths_count; ++p) { + int points_count = *(global int *)paths; paths += sizeof(int); + int flags = *(global int *)paths; paths += sizeof(int); - if (iy >= 0) { - // calc values - float cover = pp1.y - p0.y; - float area = px.x - 0.5f*(p0.x + pp1.x); - if (flipx) { ix = w1 - ix; area = 1.f - area; } - if (flipy) { iy = h1 - iy; cover = -cover; } - ix = clamp(ix, 0, w1); - - // store in buffer - global int *mark = mark_buffer + (iy*width + ix)*2; - atomic_add(mark, (int)round(area*cover*65536.f)); - atomic_add(mark + 1, (int)round(cover*65536.f)); - } + float4 color; + color.x = *(global float *)paths; paths += sizeof(float); + color.y = *(global float *)paths; paths += sizeof(float); + color.z = *(global float *)paths; paths += sizeof(float); + color.w = *(global float *)paths; paths += sizeof(float); - p0 = pp1; - } -} + global float *points = (global float *)paths; + paths += 2*points_count*sizeof(float); + + int segments_count = points_count - 1; + if (segments_count <= 0) continue; + + int invert = flags & 1; + int evenodd = flags & 2; + + *bound_minx = invert ? 0 : (int)floor(clamp(points[0] + e, 0.f, size.x - 1.f + e)); + *bound_miny = invert ? 0 : (int)floor(clamp(points[1] + e, 0.f, size.y - 1.f + e)); + *bound_maxx = invert ? w1 : *bound_minx; + *bound_maxy = invert ? h1 : *bound_miny; -kernel void fill( - int width, - global int2 *mark_buffer, - read_only image2d_t surface_read_image, - write_only image2d_t surface_write_image, - int minx, - int maxx, - float4 color, - int invert, - int evenodd ) -{ - size_t id = get_global_id(0); - global int2 *row = mark_buffer + id*width; - const int2 empty_mark = { 0, 0 }; + // trace path + for(int i = id; i < segments_count; i += count) { + int ii = 2*i; + float2 p0 = { points[ii + 0], points[ii + 1] }; + float2 p1 = { points[ii + 2], points[ii + 3] }; + + int p1x = (int)floor(clamp(p1.x + e, 0.f, size.x - 1.f + e)); + int p1y = (int)floor(clamp(p1.y + e, 0.f, size.y - 1.f + e)); + atomic_min(bound_minx, p1x - 1); + atomic_min(bound_miny, p1y - 1); + atomic_max(bound_maxx, p1x + 1); + atomic_max(bound_maxy, p1y + 1); + + bool flipx = p1.x < p0.x; + bool flipy = p1.y < p0.y; + if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; } + if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; } + float2 d = p1 - p0; + float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; + float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; + + while(p0.x != p1.x || p0.y != p1.y) { + int ix = (int)floor(p0.x + e); + int iy = (int)floor(p0.y + e); + if (iy > h1) break; - float cover = 0.f; - for(int2 c = {minx, id}; c.x < maxx; ++c.x) { - // read mark (x: alpha, y: cover) - global int2 *mark = row + c.x; - float alpha = fabs(cover + mark->x/65536.f); - //if (evenodd) alpha = 1.f - fabs(fmod(alpha, 2.f) - 1.f); - cover += mark->y/65536.f; - *mark = empty_mark; - - //if (invert) alpha = 1.f - alpha; - alpha *= color.w; + float2 px, py; + px.x = (float)(ix + 1); + px.y = p0.y + ky*(px.x - p0.x); + py.y = max((float)(iy + 1), 0.f); + py.x = p0.x + kx*(py.y - p0.y); + float2 pp1 = p1; + if (pp1.x > px.x) pp1 = px; + if (pp1.y > py.y) pp1 = py; + + if (iy >= 0) { + // calc values + float cover = pp1.y - p0.y; + float area = px.x - 0.5f*(p0.x + pp1.x); + if (flipx) { ix = w1 - ix; area = 1.f - area; } + if (flipy) { iy = h1 - iy; cover = -cover; } + ix = clamp(ix, 0, w1); + + // store in buffer + global int *mark = mark_buffer + (iy*width + ix)*2; + atomic_add(mark, (int)round(area*cover*65536.f)); + atomic_add(mark + 1, (int)round(cover*65536.f)); + } + + p0 = pp1; + } + } + barrier(CLK_LOCAL_MEM_FENCE); - // write color - float alpha_inv = 1.f - alpha; - float4 cl = read_imagef(surface_read_image, c); - cl.x = cl.x*alpha_inv + color.x*alpha; - cl.y = cl.y*alpha_inv + color.y*alpha; - cl.z = cl.z*alpha_inv + color.z*alpha; - cl.w = min(cl.w + alpha, 1.f); - write_imagef(surface_write_image, c, cl); + // fill + int2 coord; + int minx = max(*bound_minx, 0); + int miny = max(*bound_miny, 0); + int maxx = min(*bound_maxx, w1); + int maxy = min(*bound_maxy, h1); + for(coord.y = miny + id; coord.y <= maxy; coord.y += count) { + global int *mark = mark_buffer + (coord.y*width + minx)*2; + + float cover = 0.f; + for(coord.x = minx; coord.x <= maxx; ++coord.x) { + // read mark (alpha, cover) + float alpha = fabs(cover + *mark/65536.f); *mark = 0; ++mark; + cover += *mark/65536.f; *mark = 0; ++mark; + + //if (evenodd) alpha = 1.f - fabs(fmod(alpha, 2.f) - 1.f); + //if (invert) alpha = 1.f - alpha; + alpha *= color.w; + + // write color + float alpha_inv = 1.f - alpha; + float4 cl = read_imagef(read_image, coord); + cl.x = cl.x*alpha_inv + color.x*alpha; + cl.y = cl.y*alpha_inv + color.y*alpha; + cl.z = cl.z*alpha_inv + color.z*alpha; + cl.w = min(cl.w + alpha, 1.f); + write_imagef(write_image, coord, cl); + } + } + barrier(CLK_LOCAL_MEM_FENCE); } } + diff --git a/c++/contourgl/clcontext.cpp b/c++/contourgl/clcontext.cpp index d5a60a8..7f6f84d 100644 --- a/c++/contourgl/clcontext.cpp +++ b/c++/contourgl/clcontext.cpp @@ -26,7 +26,13 @@ using namespace std; -ClContext::ClContext(): err(), context(), queue() { +ClContext::ClContext(): + err(), + context(), + queue(), + max_compute_units(), + max_group_size() +{ // platform @@ -67,6 +73,12 @@ ClContext::ClContext(): err(), context(), queue() { clGetDeviceInfo(devices.front(), CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL); //cout << "Device 0 OpenCL version " << device_version << endl; + clGetDeviceInfo(devices.front(), CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, NULL); + //cout << "Device 0 max compute units " << max_compute_units << endl; + + clGetDeviceInfo(devices.front(), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_group_size), &max_group_size, NULL); + //cout << "Device 0 max group size " << max_group_size << endl; + // context cl_context_properties context_props[] = { @@ -134,7 +146,7 @@ void ClContext::hello() { assert(!err); size_t work_group_size = sizeof(data); - cl_event event = NULL; + cl_event event1 = NULL, event2 = NULL; err = clEnqueueNDRangeKernel( queue, kernel, @@ -144,15 +156,16 @@ void ClContext::hello() { NULL, 0, NULL, - &event ); + &event1 ); assert(!err); - clWaitForEvents(1, &event); - // read - clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(data), data, 0, NULL, &event); - clWaitForEvents(1, &event); + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(data), data, 1, &event1, &event2); + + // wait + + clWaitForEvents(1, &event2); cout << data << endl; // deinitialize diff --git a/c++/contourgl/clcontext.h b/c++/contourgl/clcontext.h index 6061d43..f73beae 100644 --- a/c++/contourgl/clcontext.h +++ b/c++/contourgl/clcontext.h @@ -31,6 +31,9 @@ public: std::vector devices; cl_command_queue queue; + unsigned int max_compute_units; + size_t max_group_size; + ClContext(); ~ClContext(); diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index a3435f3..fe1f159 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -30,29 +30,22 @@ using namespace std; ClRender::ClRender(ClContext &cl): cl(cl), contour_program(), - contour_path_kernel(), - contour_fill_kernel(), + contour_draw_kernel(), surface(), - path_buffer(), + paths_buffer(), mark_buffer(), surface_image(), prev_event() { contour_program = cl.load_program("contour.cl"); - contour_clear_kernel = clCreateKernel(contour_program, "clear", NULL); - assert(contour_clear_kernel); - contour_path_kernel = clCreateKernel(contour_program, "path", NULL); - assert(contour_path_kernel); - contour_fill_kernel = clCreateKernel(contour_program, "fill", NULL); - assert(contour_fill_kernel); + contour_draw_kernel = clCreateKernel(contour_program, "draw", NULL); + assert(contour_draw_kernel); } ClRender::~ClRender() { send_surface(NULL); - send_path(NULL, 0); - clReleaseKernel(contour_clear_kernel); - clReleaseKernel(contour_fill_kernel); - clReleaseKernel(contour_path_kernel); + send_paths(NULL, 0); + clReleaseKernel(contour_draw_kernel); clReleaseProgram(contour_program); } @@ -73,31 +66,12 @@ void ClRender::send_surface(Surface *surface) { if (this->surface) { //Measure t("ClRender::send_surface"); - int width = surface->width; - int height = surface->height; - mark_buffer = clCreateBuffer( cl.context, CL_MEM_READ_WRITE, - surface->count()*sizeof(cl_int2), NULL, + (surface->count() + 2)*sizeof(cl_int2), NULL, // extra two values to store contour bounds NULL ); assert(mark_buffer); - cl.err |= clSetKernelArg(contour_clear_kernel, 0, sizeof(mark_buffer), &mark_buffer); - assert(!cl.err); - - size_t pixels_count = (size_t)surface->count(); - cl.err |= clEnqueueNDRangeKernel( - cl.queue, - contour_clear_kernel, - 1, - NULL, - &pixels_count, - NULL, - 0, - NULL, - NULL ); - assert(!cl.err); - cl_image_format surface_format = { }; surface_format.image_channel_order = CL_RGBA; surface_format.image_channel_data_type = CL_FLOAT; @@ -121,15 +95,9 @@ void ClRender::send_surface(Surface *surface) { 0, NULL, NULL ); assert(!cl.err); - cl.err |= clSetKernelArg(contour_path_kernel, 0, sizeof(width), &width); - cl.err |= clSetKernelArg(contour_path_kernel, 1, sizeof(height), &height); - cl.err |= clSetKernelArg(contour_path_kernel, 2, sizeof(mark_buffer), &mark_buffer); - assert(!cl.err); - - cl.err |= clSetKernelArg(contour_fill_kernel, 0, sizeof(width), &width); - cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(mark_buffer), &mark_buffer); - cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_image), &surface_image); - cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(surface_image), &surface_image); + cl.err |= clSetKernelArg(contour_draw_kernel, 1, sizeof(mark_buffer), &mark_buffer); + cl.err |= clSetKernelArg(contour_draw_kernel, 2, sizeof(surface_image), &surface_image); + cl.err |= clSetKernelArg(contour_draw_kernel, 3, sizeof(surface_image), &surface_image); assert(!cl.err); cl.err |= clFinish(cl.queue); @@ -158,34 +126,34 @@ Surface* ClRender::receive_surface() { return surface; } -void ClRender::send_path(const vec2f *path, int count) { - if (!path_buffer && (!path || count <=0)) return; +void ClRender::send_paths(const void *paths, int size) { + if (!paths_buffer && (!paths || size <= 0)) return; cl.err |= clFinish(cl.queue); assert(!cl.err); prev_event = NULL; - if (path_buffer) { - clReleaseMemObject(path_buffer); - path_buffer = NULL; + if (paths_buffer) { + clReleaseMemObject(paths_buffer); + paths_buffer = NULL; } - if (path && count > 0) { + if (paths && size > 0) { //Measure t("ClRender::send_path"); - path_buffer = clCreateBuffer( + paths_buffer = clCreateBuffer( cl.context, CL_MEM_READ_ONLY, - count*sizeof(*path), NULL, + size, NULL, NULL ); - assert(path_buffer); + assert(paths_buffer); cl.err |= clEnqueueWriteBuffer( - cl.queue, path_buffer, CL_FALSE, - 0, count*sizeof(*path), path, + cl.queue, paths_buffer, CL_FALSE, + 0, size, paths, 0, NULL, NULL ); assert(!cl.err); - cl.err |= clSetKernelArg(contour_path_kernel, 3, sizeof(path_buffer), &path_buffer); + cl.err |= clSetKernelArg(contour_draw_kernel, 0, sizeof(paths_buffer), &paths_buffer); assert(!cl.err); cl.err |= clFinish(cl.queue); @@ -193,57 +161,29 @@ void ClRender::send_path(const vec2f *path, int count) { } } -void ClRender::path(int start, int count, const Color &color, bool invert, bool evenodd, ContextRect bounds) { +void ClRender::draw() { //Measure t("ClRender::contour"); - if (count <= 1) return; + cl_event event = prev_event; - // kernel args - - int iinvert = invert, ievenodd = evenodd; - cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(int), &bounds.minx); - cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(int), &bounds.maxx); - cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(color), &color); - cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(int), &iinvert); - cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &ievenodd); - assert(!cl.err); - - // build marks - - cl_event path_event = NULL; - size_t sstart = start; - size_t scount = count-1; + size_t start = 0; cl.err |= clEnqueueNDRangeKernel( cl.queue, - contour_path_kernel, + contour_draw_kernel, 1, - &sstart, - &scount, - NULL, - prev_event ? 1 : 0, - prev_event ? &prev_event : NULL, - &path_event ); - assert(!cl.err); - - // fill - sstart = bounds.miny; - scount = bounds.maxy - bounds.miny; - cl.err |= clEnqueueNDRangeKernel( - cl.queue, - contour_fill_kernel, - 1, - &sstart, - &scount, - NULL, - 1, - &path_event, + &start, + &cl.max_group_size, + &cl.max_group_size, + event ? 1 : 0, + event ? &event : NULL, &prev_event ); assert(!cl.err); } void ClRender::wait() { if (prev_event) { - clWaitForEvents(1, &prev_event); + cl.err |= clWaitForEvents(1, &prev_event); + assert(!cl.err); prev_event = NULL; } } diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h index ab19a17..db74532 100644 --- a/c++/contourgl/clrender.h +++ b/c++/contourgl/clrender.h @@ -30,12 +30,10 @@ class ClRender { private: ClContext &cl; cl_program contour_program; - cl_kernel contour_clear_kernel; - cl_kernel contour_path_kernel; - cl_kernel contour_fill_kernel; + cl_kernel contour_draw_kernel; Surface *surface; - cl_mem path_buffer; + cl_mem paths_buffer; cl_mem mark_buffer; cl_mem surface_image; cl_event prev_event; @@ -46,8 +44,8 @@ public: void send_surface(Surface *surface); Surface* receive_surface(); - void send_path(const vec2f *path, int count); - void path(int start, int count, const Color &color, bool invert, bool evenodd, ContextRect bounds); + void send_paths(const void *paths, int size); + void draw(); void wait(); }; diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index cda0d58..90766ba 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -251,51 +251,41 @@ void Test::test_sw(Environment &e, Data &data, Surface &surface) { } void Test::test_cl(Environment &e, Data &data, Surface &surface) { - vector paths; - vector starts(data.size()); - vector counts(data.size()); - vector bounds(data.size()); - for(int i = 0; i < (int)data.size(); ++i) { - starts[i] = paths.size(); - if (!data[i].contour.get_chunks().empty()) { - Vector v = data[i].contour.get_chunks().front().p1; - bounds[i].minx = (int)floor( v.x ) - 2; - bounds[i].miny = (int)floor( v.y ) - 2; - bounds[i].maxx = (int)ceil ( v.x ) + 2; - bounds[i].maxy = (int)ceil ( v.y ) + 2; - for(Contour::ChunkList::const_iterator j = data[i].contour.get_chunks().begin(); j != data[i].contour.get_chunks().end(); ++j) { - paths.push_back(vec2f(j->p1)); - bounds[i].minx = std::min( bounds[i].minx, (int)floor( j->p1.x ) - 2 ); - bounds[i].miny = std::min( bounds[i].miny, (int)floor( j->p1.y ) - 2 ); - bounds[i].maxx = std::max( bounds[i].maxx, (int)ceil ( j->p1.x ) + 2 ); - bounds[i].maxy = std::max( bounds[i].maxy, (int)ceil ( j->p1.y ) + 2 ); - } - bounds[i].minx = std::max(0, std::min(surface.width, bounds[i].minx)); - bounds[i].miny = std::max(0, std::min(surface.height, bounds[i].miny)); - bounds[i].maxx = std::max(bounds[i].minx, std::min(surface.width, bounds[i].maxx)); - bounds[i].maxy = std::max(bounds[i].miny, std::min(surface.height, bounds[i].maxy)); + // prepare data + + vector paths(sizeof(int)); + int count = 0; + for(Data::const_iterator i = data.begin(); i != data.end(); ++i) + if (int points_count = i->contour.get_chunks().size()) { + ++count; + + int flags = 0; + if (i->invert) flags |= 1; + if (i->evenodd) flags |= 2; + + size_t s = paths.size(); + paths.resize(paths.size() + sizeof(int) + sizeof(int) + sizeof(Color) + (points_count+1)*sizeof(vec2f)); + + *(int*)&paths[s] = points_count+1; s += sizeof(int); + *(int*)&paths[s] = flags; s += sizeof(int); + *(Color*)&paths[s] = i->color; s += sizeof(Color); + vec2f *point = (vec2f*)&paths[s]; + + for(Contour::ChunkList::const_iterator j = i->contour.get_chunks().begin(); j != i->contour.get_chunks().end(); ++j, ++point) + *point = vec2f(j->p1); + *point = vec2f(i->contour.get_chunks().front().p1); } - paths.push_back(paths[starts[i]]); - counts[i] = paths.size() - starts[i]; - paths.push_back(paths.front()); - } + *(int*)&paths.front() = count; + + // draw ClRender clr(e.cl); clr.send_surface(&surface); - clr.send_path(&paths.front(), paths.size()); { Measure t("render"); - - // all in one (single color) - //ContextRect bounds; - //bounds.maxx = surface.width; - //bounds.maxy = surface.height; - //clr.path(0, (int)paths.size(), Color(0.f, 0.f, 1.f, 1.f), false, false, bounds); - - // separete path (valid colors) - for(int i = 0; i < (int)data.size(); ++i) - clr.path(starts[i], counts[i], data[i].color, data[i].invert, data[i].evenodd, bounds[i]); + clr.send_paths(&paths.front(), paths.size()); + clr.draw(); clr.wait(); }