diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl index 2d3635f..6c9f926 100644 --- a/c++/contourgl/cl/contour.cl +++ b/c++/contourgl/cl/contour.cl @@ -15,19 +15,30 @@ along with this program. If not, see . */ +__kernel void clear2f( + __global float2 *buffer ) +{ + const float2 v = { 0.f, 0.f }; + buffer[get_global_id(0)] = v; +} + __kernel void lines( int width, __global float *lines, __global int *rows, - __global float *mark_buffer ) + __global float2 *mark_buffer ) { const float e = 1e-6f; - size_t id = get_global_id(0); - int begin = rows[id*2]; - int end = begin + rows[id*2 + 1]; - for(int i = begin; i < end; ++i) { - float2 p0 = { lines[4*i + 0], lines[4*i + 1] }; - float2 p1 = { lines[4*i + 2], lines[4*i + 3] }; + __global int *row = rows + 2*get_global_id(0); + sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE + | CLK_ADDRESS_NONE + | CLK_FILTER_NEAREST; + int w = width; + __global float *i = lines + 4*row[0]; + __global float *end = i + 4*row[1]; + for(; i < end; i += 4) { + float2 p0 = { i[0], i[1] }; + float2 p1 = { i[2], i[3] }; int iy0 = (int)floor(fmin(p0.y, p1.y) + e); int iy1 = (int)floor(fmax(p0.y, p1.y) - e); @@ -56,11 +67,10 @@ __kernel void lines( float2 ppp1 = pp1.x - x < -e ? pxa : (pp1.x - x > 1.f + e ? pxb : pp1); - float cover = ppp1.y - ppp0.y; - float area = (x + 1.f - 0.5f*(ppp0.x + ppp1.x))*cover; - __global float *mark = mark_buffer + 2*(r*width + c); - mark[0] += area; - mark[1] += cover; + float2 m; + m.y = ppp1.y - ppp0.y; + m.x = (x + 1.f - 0.5f*(ppp0.x + ppp1.x))*m.y; + mark_buffer[r*w + c] += m; } } } @@ -68,8 +78,9 @@ __kernel void lines( __kernel void fill( int width, - __global float *mark_buffer, - __global float *surface_buffer, + __global float2 *mark_buffer, + __read_only image2d_t surface_read_image, + __write_only image2d_t surface_write_image, float color_r, float color_g, float color_b, @@ -79,23 +90,28 @@ __kernel void fill( { size_t id = get_global_id(0); int w = width; - float cr = color_r; - float cg = color_g; - float cb = color_b; - float ca = color_a; - __global float *mark = mark_buffer + 2*id*w; - __global float *surface = surface_buffer + 4*id*w; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE + | CLK_ADDRESS_NONE + | CLK_FILTER_NEAREST; + float4 color = { color_r, color_g, color_b, color_a }; float cover = 0.f; - for(int i = 0; i < width; ++i, mark += 2, surface += 4) { - float alpha = fabs(*mark + cover); - alpha = evenodd ? ca*(1.f - fabs(1.f - alpha - 2.f*floor(0.5f*alpha))) + __global float2 *mark = mark_buffer + id*w; + for(int2 coord = { 0, id }; coord.x < w; ++coord.x, ++mark) { + float2 m = *mark; + + float alpha = fabs(m.x + cover); + cover += m.y; + alpha = evenodd ? (1.f - fabs(1.f - alpha - 2.f*floor(0.5f*alpha))) : fmin(alpha, 1.f); + alpha *= color.w; if (invert) alpha = 1.f - alpha; float alpha_inv = 1.f - alpha; - surface[0] = surface[0]*alpha_inv + cr*alpha; - surface[1] = surface[1]*alpha_inv + cg*alpha; - surface[2] = surface[2]*alpha_inv + cb*alpha; - surface[3] = fmin(surface[3] + ca*alpha, 1.f); - cover += mark[1]; + + float4 c = read_imagef(surface_read_image, sampler, coord); + c.x = c.x*alpha_inv + color.x*alpha; + c.y = c.y*alpha_inv + color.y*alpha; + c.z = c.z*alpha_inv + color.z*alpha; + c.w = min(c.w + alpha, 1.f); + write_imagef(surface_write_image, coord, c); } } diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index 6a68143..1694f38 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -34,13 +34,15 @@ ClRender::ClRender(ClContext &cl): surface(), rows_buffer(), mark_buffer(), - surface_buffer(), + surface_image(), prev_event(), rows_count(), even_rows_count(), odd_rows_count() { contour_program = cl.load_program("contour.cl"); + contour_clear2f_kernel = clCreateKernel(contour_program, "clear2f", NULL); + assert(contour_clear2f_kernel); contour_lines_kernel = clCreateKernel(contour_program, "lines", NULL); assert(contour_lines_kernel); contour_fill_kernel = clCreateKernel(contour_program, "fill", NULL); @@ -49,6 +51,7 @@ ClRender::ClRender(ClContext &cl): ClRender::~ClRender() { send_surface(NULL); + clReleaseKernel(contour_clear2f_kernel); clReleaseKernel(contour_fill_kernel); clReleaseKernel(contour_lines_kernel); clReleaseProgram(contour_program); @@ -65,7 +68,7 @@ void ClRender::send_surface(Surface *surface) { rows.clear(); clReleaseMemObject(rows_buffer); clReleaseMemObject(mark_buffer); - clReleaseMemObject(surface_buffer); + clReleaseMemObject(surface_image); } this->surface = surface; @@ -87,19 +90,25 @@ void ClRender::send_surface(Surface *surface) { mark_buffer = clCreateBuffer( cl.context, CL_MEM_READ_WRITE, - marks.size()*sizeof(marks.front()), NULL, + surface->count()*sizeof(cl_float2), NULL, NULL ); assert(mark_buffer); - surface_buffer = clCreateBuffer( - cl.context, CL_MEM_READ_WRITE, - surface->data_size(), surface->data, - NULL ); - assert(surface_buffer); + cl_image_format surface_format = { }; + surface_format.image_channel_order = CL_RGBA; + surface_format.image_channel_data_type = CL_FLOAT; - cl.err |= clEnqueueWriteBuffer( - cl.queue, surface_buffer, CL_TRUE, - 0, surface->data_size(), surface->data, + surface_image = clCreateImage2D( + cl.context, CL_MEM_READ_WRITE, + &surface_format, surface->width, surface->height, + 0, NULL, NULL ); + assert(surface_image); + + size_t origin[3] = { }; + size_t region[3] = { (size_t)surface->width, (size_t)surface->height, 1 }; + cl.err |= clEnqueueWriteImage( + cl.queue, surface_image, CL_TRUE, + origin, region, 0, 0, surface->data, 0, NULL, &prev_event ); assert(!cl.err); @@ -110,9 +119,11 @@ Surface* ClRender::receive_surface() { if (surface) { //Measure t("ClRender::receive_surface"); - cl.err |= clEnqueueReadBuffer( - cl.queue, surface_buffer, CL_TRUE, - 0, surface->data_size(), surface->data, + size_t origin[3] = { }; + size_t region[3] = { (size_t)surface->width, (size_t)surface->height, 1 }; + cl.err |= clEnqueueReadImage( + cl.queue, surface_image, CL_TRUE, + origin, region, 0, 0, surface->data, prev_event ? 1 : 0, &prev_event, NULL ); assert(!cl.err); clFinish(cl.queue); @@ -218,8 +229,12 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co //Measure t("enqueue commands"); // kernel args + int width = surface->width; + cl.err |= clSetKernelArg(contour_clear2f_kernel, 0, sizeof(mark_buffer), &mark_buffer); + assert(!cl.err); + cl.err |= clSetKernelArg(contour_lines_kernel, 0, sizeof(width), &width); cl.err |= clSetKernelArg(contour_lines_kernel, 1, sizeof(lines_buffer), &lines_buffer); cl.err |= clSetKernelArg(contour_lines_kernel, 2, sizeof(rows_buffer), &rows_buffer); @@ -229,16 +244,17 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co int iinvert = invert, ievenodd = evenodd; cl.err |= clSetKernelArg(contour_fill_kernel, 0, sizeof(width), &width); cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(mark_buffer), &mark_buffer); - cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_buffer), &surface_buffer); - cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(Color::type), &color.r); - cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(Color::type), &color.g); - cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(Color::type), &color.b); - cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(Color::type), &color.a); - cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(int), &iinvert); - cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &ievenodd); + cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_image), &surface_image); + cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(surface_image), &surface_image); + cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(Color::type), &color.r); + cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(Color::type), &color.g); + cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(Color::type), &color.b); + cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(Color::type), &color.a); + cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &iinvert); + cl.err |= clSetKernelArg(contour_fill_kernel, 9, sizeof(int), &ievenodd); assert(!cl.err); - // prepare buffers + // init buffers cl_event prepare_buffers_events[3] = { }; @@ -254,13 +270,20 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co 0, NULL, &prepare_buffers_events[1] ); assert(!cl.err); - cl.err |= clEnqueueWriteBuffer( - cl.queue, mark_buffer, CL_TRUE, - 0, marks.size()*sizeof(marks.front()), &marks.front(), - prev_event ? 1 : 0, &prev_event, &prepare_buffers_events[2] ); + size_t count = (size_t)surface->count(); + cl.err |= clEnqueueNDRangeKernel( + cl.queue, + contour_clear2f_kernel, + 1, + NULL, + &count, + NULL, + prev_event ? 1 : 0, + &prev_event, + &prepare_buffers_events[2] ); assert(!cl.err); - // run kernels + // build marks cl_event lines_odd_event = NULL; cl.err |= clEnqueueNDRangeKernel( @@ -288,6 +311,8 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co &lines_even_event ); assert(!cl.err); + // fill + cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_fill_kernel, diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h index 4e7a1d9..703195b 100644 --- a/c++/contourgl/clrender.h +++ b/c++/contourgl/clrender.h @@ -30,13 +30,14 @@ class ClRender { private: ClContext &cl; cl_program contour_program; + cl_kernel contour_clear2f_kernel; cl_kernel contour_lines_kernel; cl_kernel contour_fill_kernel; Surface *surface; cl_mem rows_buffer; cl_mem mark_buffer; - cl_mem surface_buffer; + cl_mem surface_image; cl_event prev_event; size_t rows_count; diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index 344c7ff..8185fe4 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -538,19 +538,19 @@ void Test::test4() { } } - Surface surface(width+2, height+2); + Surface surface(width, height); { // cl vector contours_cl = contours; - - Measure t("test_4_cl.tga", surface); - ClRender clr(e.cl); - clr.send_surface(&surface); - for(vector::const_iterator i = contours_cl.begin(); i != contours_cl.end(); ++i) - clr.contour(i->contour, bounds_file, i->color, i->invert, i->evenodd); - clr.receive_surface(); + { + Measure t("test_4_cl.tga", surface); + clr.send_surface(&surface); + for(vector::const_iterator i = contours_cl.begin(); i != contours_cl.end(); ++i) + clr.contour(i->contour, bounds_file, i->color, i->invert, i->evenodd); + clr.receive_surface(); + } } }