diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl
index 2d3635f..6c9f926 100644
--- a/c++/contourgl/cl/contour.cl
+++ b/c++/contourgl/cl/contour.cl
@@ -15,19 +15,30 @@
along with this program. If not, see .
*/
+__kernel void clear2f(
+ __global float2 *buffer )
+{
+ const float2 v = { 0.f, 0.f };
+ buffer[get_global_id(0)] = v;
+}
+
__kernel void lines(
int width,
__global float *lines,
__global int *rows,
- __global float *mark_buffer )
+ __global float2 *mark_buffer )
{
const float e = 1e-6f;
- size_t id = get_global_id(0);
- int begin = rows[id*2];
- int end = begin + rows[id*2 + 1];
- for(int i = begin; i < end; ++i) {
- float2 p0 = { lines[4*i + 0], lines[4*i + 1] };
- float2 p1 = { lines[4*i + 2], lines[4*i + 3] };
+ __global int *row = rows + 2*get_global_id(0);
+ sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE
+ | CLK_ADDRESS_NONE
+ | CLK_FILTER_NEAREST;
+ int w = width;
+ __global float *i = lines + 4*row[0];
+ __global float *end = i + 4*row[1];
+ for(; i < end; i += 4) {
+ float2 p0 = { i[0], i[1] };
+ float2 p1 = { i[2], i[3] };
int iy0 = (int)floor(fmin(p0.y, p1.y) + e);
int iy1 = (int)floor(fmax(p0.y, p1.y) - e);
@@ -56,11 +67,10 @@ __kernel void lines(
float2 ppp1 = pp1.x - x < -e ? pxa
: (pp1.x - x > 1.f + e ? pxb : pp1);
- float cover = ppp1.y - ppp0.y;
- float area = (x + 1.f - 0.5f*(ppp0.x + ppp1.x))*cover;
- __global float *mark = mark_buffer + 2*(r*width + c);
- mark[0] += area;
- mark[1] += cover;
+ float2 m;
+ m.y = ppp1.y - ppp0.y;
+ m.x = (x + 1.f - 0.5f*(ppp0.x + ppp1.x))*m.y;
+ mark_buffer[r*w + c] += m;
}
}
}
@@ -68,8 +78,9 @@ __kernel void lines(
__kernel void fill(
int width,
- __global float *mark_buffer,
- __global float *surface_buffer,
+ __global float2 *mark_buffer,
+ __read_only image2d_t surface_read_image,
+ __write_only image2d_t surface_write_image,
float color_r,
float color_g,
float color_b,
@@ -79,23 +90,28 @@ __kernel void fill(
{
size_t id = get_global_id(0);
int w = width;
- float cr = color_r;
- float cg = color_g;
- float cb = color_b;
- float ca = color_a;
- __global float *mark = mark_buffer + 2*id*w;
- __global float *surface = surface_buffer + 4*id*w;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE
+ | CLK_ADDRESS_NONE
+ | CLK_FILTER_NEAREST;
+ float4 color = { color_r, color_g, color_b, color_a };
float cover = 0.f;
- for(int i = 0; i < width; ++i, mark += 2, surface += 4) {
- float alpha = fabs(*mark + cover);
- alpha = evenodd ? ca*(1.f - fabs(1.f - alpha - 2.f*floor(0.5f*alpha)))
+ __global float2 *mark = mark_buffer + id*w;
+ for(int2 coord = { 0, id }; coord.x < w; ++coord.x, ++mark) {
+ float2 m = *mark;
+
+ float alpha = fabs(m.x + cover);
+ cover += m.y;
+ alpha = evenodd ? (1.f - fabs(1.f - alpha - 2.f*floor(0.5f*alpha)))
: fmin(alpha, 1.f);
+ alpha *= color.w;
if (invert) alpha = 1.f - alpha;
float alpha_inv = 1.f - alpha;
- surface[0] = surface[0]*alpha_inv + cr*alpha;
- surface[1] = surface[1]*alpha_inv + cg*alpha;
- surface[2] = surface[2]*alpha_inv + cb*alpha;
- surface[3] = fmin(surface[3] + ca*alpha, 1.f);
- cover += mark[1];
+
+ float4 c = read_imagef(surface_read_image, sampler, coord);
+ c.x = c.x*alpha_inv + color.x*alpha;
+ c.y = c.y*alpha_inv + color.y*alpha;
+ c.z = c.z*alpha_inv + color.z*alpha;
+ c.w = min(c.w + alpha, 1.f);
+ write_imagef(surface_write_image, coord, c);
}
}
diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp
index 6a68143..1694f38 100644
--- a/c++/contourgl/clrender.cpp
+++ b/c++/contourgl/clrender.cpp
@@ -34,13 +34,15 @@ ClRender::ClRender(ClContext &cl):
surface(),
rows_buffer(),
mark_buffer(),
- surface_buffer(),
+ surface_image(),
prev_event(),
rows_count(),
even_rows_count(),
odd_rows_count()
{
contour_program = cl.load_program("contour.cl");
+ contour_clear2f_kernel = clCreateKernel(contour_program, "clear2f", NULL);
+ assert(contour_clear2f_kernel);
contour_lines_kernel = clCreateKernel(contour_program, "lines", NULL);
assert(contour_lines_kernel);
contour_fill_kernel = clCreateKernel(contour_program, "fill", NULL);
@@ -49,6 +51,7 @@ ClRender::ClRender(ClContext &cl):
ClRender::~ClRender() {
send_surface(NULL);
+ clReleaseKernel(contour_clear2f_kernel);
clReleaseKernel(contour_fill_kernel);
clReleaseKernel(contour_lines_kernel);
clReleaseProgram(contour_program);
@@ -65,7 +68,7 @@ void ClRender::send_surface(Surface *surface) {
rows.clear();
clReleaseMemObject(rows_buffer);
clReleaseMemObject(mark_buffer);
- clReleaseMemObject(surface_buffer);
+ clReleaseMemObject(surface_image);
}
this->surface = surface;
@@ -87,19 +90,25 @@ void ClRender::send_surface(Surface *surface) {
mark_buffer = clCreateBuffer(
cl.context, CL_MEM_READ_WRITE,
- marks.size()*sizeof(marks.front()), NULL,
+ surface->count()*sizeof(cl_float2), NULL,
NULL );
assert(mark_buffer);
- surface_buffer = clCreateBuffer(
- cl.context, CL_MEM_READ_WRITE,
- surface->data_size(), surface->data,
- NULL );
- assert(surface_buffer);
+ cl_image_format surface_format = { };
+ surface_format.image_channel_order = CL_RGBA;
+ surface_format.image_channel_data_type = CL_FLOAT;
- cl.err |= clEnqueueWriteBuffer(
- cl.queue, surface_buffer, CL_TRUE,
- 0, surface->data_size(), surface->data,
+ surface_image = clCreateImage2D(
+ cl.context, CL_MEM_READ_WRITE,
+ &surface_format, surface->width, surface->height,
+ 0, NULL, NULL );
+ assert(surface_image);
+
+ size_t origin[3] = { };
+ size_t region[3] = { (size_t)surface->width, (size_t)surface->height, 1 };
+ cl.err |= clEnqueueWriteImage(
+ cl.queue, surface_image, CL_TRUE,
+ origin, region, 0, 0, surface->data,
0, NULL, &prev_event );
assert(!cl.err);
@@ -110,9 +119,11 @@ Surface* ClRender::receive_surface() {
if (surface) {
//Measure t("ClRender::receive_surface");
- cl.err |= clEnqueueReadBuffer(
- cl.queue, surface_buffer, CL_TRUE,
- 0, surface->data_size(), surface->data,
+ size_t origin[3] = { };
+ size_t region[3] = { (size_t)surface->width, (size_t)surface->height, 1 };
+ cl.err |= clEnqueueReadImage(
+ cl.queue, surface_image, CL_TRUE,
+ origin, region, 0, 0, surface->data,
prev_event ? 1 : 0, &prev_event, NULL );
assert(!cl.err);
clFinish(cl.queue);
@@ -218,8 +229,12 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co
//Measure t("enqueue commands");
// kernel args
+
int width = surface->width;
+ cl.err |= clSetKernelArg(contour_clear2f_kernel, 0, sizeof(mark_buffer), &mark_buffer);
+ assert(!cl.err);
+
cl.err |= clSetKernelArg(contour_lines_kernel, 0, sizeof(width), &width);
cl.err |= clSetKernelArg(contour_lines_kernel, 1, sizeof(lines_buffer), &lines_buffer);
cl.err |= clSetKernelArg(contour_lines_kernel, 2, sizeof(rows_buffer), &rows_buffer);
@@ -229,16 +244,17 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co
int iinvert = invert, ievenodd = evenodd;
cl.err |= clSetKernelArg(contour_fill_kernel, 0, sizeof(width), &width);
cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(mark_buffer), &mark_buffer);
- cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_buffer), &surface_buffer);
- cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(Color::type), &color.r);
- cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(Color::type), &color.g);
- cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(Color::type), &color.b);
- cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(Color::type), &color.a);
- cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(int), &iinvert);
- cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &ievenodd);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_image), &surface_image);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(surface_image), &surface_image);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(Color::type), &color.r);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(Color::type), &color.g);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(Color::type), &color.b);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(Color::type), &color.a);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &iinvert);
+ cl.err |= clSetKernelArg(contour_fill_kernel, 9, sizeof(int), &ievenodd);
assert(!cl.err);
- // prepare buffers
+ // init buffers
cl_event prepare_buffers_events[3] = { };
@@ -254,13 +270,20 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co
0, NULL, &prepare_buffers_events[1] );
assert(!cl.err);
- cl.err |= clEnqueueWriteBuffer(
- cl.queue, mark_buffer, CL_TRUE,
- 0, marks.size()*sizeof(marks.front()), &marks.front(),
- prev_event ? 1 : 0, &prev_event, &prepare_buffers_events[2] );
+ size_t count = (size_t)surface->count();
+ cl.err |= clEnqueueNDRangeKernel(
+ cl.queue,
+ contour_clear2f_kernel,
+ 1,
+ NULL,
+ &count,
+ NULL,
+ prev_event ? 1 : 0,
+ &prev_event,
+ &prepare_buffers_events[2] );
assert(!cl.err);
- // run kernels
+ // build marks
cl_event lines_odd_event = NULL;
cl.err |= clEnqueueNDRangeKernel(
@@ -288,6 +311,8 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co
&lines_even_event );
assert(!cl.err);
+ // fill
+
cl.err |= clEnqueueNDRangeKernel(
cl.queue,
contour_fill_kernel,
diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h
index 4e7a1d9..703195b 100644
--- a/c++/contourgl/clrender.h
+++ b/c++/contourgl/clrender.h
@@ -30,13 +30,14 @@ class ClRender {
private:
ClContext &cl;
cl_program contour_program;
+ cl_kernel contour_clear2f_kernel;
cl_kernel contour_lines_kernel;
cl_kernel contour_fill_kernel;
Surface *surface;
cl_mem rows_buffer;
cl_mem mark_buffer;
- cl_mem surface_buffer;
+ cl_mem surface_image;
cl_event prev_event;
size_t rows_count;
diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp
index 344c7ff..8185fe4 100644
--- a/c++/contourgl/test.cpp
+++ b/c++/contourgl/test.cpp
@@ -538,19 +538,19 @@ void Test::test4() {
}
}
- Surface surface(width+2, height+2);
+ Surface surface(width, height);
{
// cl
vector contours_cl = contours;
-
- Measure t("test_4_cl.tga", surface);
-
ClRender clr(e.cl);
- clr.send_surface(&surface);
- for(vector::const_iterator i = contours_cl.begin(); i != contours_cl.end(); ++i)
- clr.contour(i->contour, bounds_file, i->color, i->invert, i->evenodd);
- clr.receive_surface();
+ {
+ Measure t("test_4_cl.tga", surface);
+ clr.send_surface(&surface);
+ for(vector::const_iterator i = contours_cl.begin(); i != contours_cl.end(); ++i)
+ clr.contour(i->contour, bounds_file, i->color, i->invert, i->evenodd);
+ clr.receive_surface();
+ }
}
}