diff --git a/c++/contourgl/cl/contour-fs.cl b/c++/contourgl/cl/contour-fs.cl
new file mode 100644
index 0000000..84d7061
--- /dev/null
+++ b/c++/contourgl/cl/contour-fs.cl
@@ -0,0 +1,169 @@
+/*
+ ......... 2015 Ivan Mahonin
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+// paths format:
+// {
+// int count,
+// paths: [
+// {
+// int point_count,
+// int flags,
+// float4 color,
+// points: [ float2, ... ]
+// },
+// ...
+// ]
+// }
+
+
+kernel void draw(
+ const int width,
+ const int height,
+ global int *mark_buffer,
+ global float4 *image,
+ global const char *paths_buffer )
+{
+ const float e = 1e-6f;
+
+ int id = (int)get_local_id(0);
+ int count = (int)get_local_size(0);
+
+ int paths_count = *(global int *)paths_buffer;
+ global const char *paths = paths_buffer + sizeof(int);
+
+ int pixels_count = width*height;
+ float2 size = (float2)((float)width, (float)height);
+ int w1 = width - 1;
+ int h1 = height - 1;
+
+ local int bound_minx;
+ local int bound_miny;
+ local int bound_maxx;
+ local int bound_maxy;
+
+ // draw paths
+ for(int p = 0; p < paths_count; ++p) {
+ int points_count = *(global const int *)paths; paths += sizeof(int);
+ int flags = *(global const int *)paths; paths += sizeof(int);
+
+ float4 color;
+ color.x = *(global const float *)paths; paths += sizeof(float);
+ color.y = *(global const float *)paths; paths += sizeof(float);
+ color.z = *(global const float *)paths; paths += sizeof(float);
+ color.w = *(global const float *)paths; paths += sizeof(float);
+
+ global const float *points = (global const float *)paths;
+ paths += 2*points_count*sizeof(float);
+
+ int segments_count = points_count - 1;
+ if (segments_count <= 0) continue;
+
+ bool invert = flags & 1;
+ bool evenodd = flags & 2;
+
+ if (id == 0) {
+ bound_minx = invert ? 0 : (int)floor(points[0] + e);
+ bound_miny = invert ? 0 : (int)floor(points[1] + e);
+ bound_maxx = invert ? w1 : bound_minx;
+ bound_maxy = invert ? h1 : bound_miny;
+ }
+ barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
+
+ // trace path
+ for(int i = id; i < segments_count; i += count) {
+ int ii = 2*i;
+ float2 p0 = { points[ii + 0], points[ii + 1] };
+ float2 p1 = { points[ii + 2], points[ii + 3] };
+
+ int p1x = (int)floor(p1.x + e);
+ int p1y = (int)floor(p1.y + e);
+ atomic_min(&bound_minx, p1x);
+ atomic_min(&bound_miny, p1y);
+ atomic_max(&bound_maxx, p1x);
+ atomic_max(&bound_maxy, p1y);
+
+ bool flipx = p1.x < p0.x;
+ bool flipy = p1.y < p0.y;
+ if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; }
+ if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; }
+ float2 d = p1 - p0;
+ float kx = fabs(d.y) < e ? 1e10 : d.x/d.y;
+ float ky = fabs(d.x) < e ? 1e10 : d.y/d.x;
+
+ while(p0.x != p1.x || p0.y != p1.y) {
+ int ix = (int)floor(p0.x + e);
+ int iy = (int)floor(p0.y + e);
+ if (iy > h1) break;
+
+ float px = (float)(ix + 1);
+ float py = (float)(iy + 1);
+ float2 pp1 = p1;
+ if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); }
+ if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); }
+
+ if (iy >= 0) {
+ // calc values
+ float cover = pp1.y - p0.y;
+ float area = px - 0.5f*(p0.x + pp1.x);
+ if (flipx) { ix = w1 - ix; area = 1.f - area; }
+ if (flipy) { iy = h1 - iy; cover = -cover; }
+ ix = clamp(ix, 0, w1);
+
+ // store in buffer
+ global int *mark = mark_buffer + (iy*width + ix)*2;
+ atomic_add(mark, (int)round(area*cover*65536.f));
+ atomic_add(mark + 1, (int)round(cover*65536.f));
+ }
+
+ p0 = pp1;
+ }
+ }
+ barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
+
+ // read bounds
+ int minx = max(bound_minx, 0);
+ int miny = max(bound_miny, 0);
+ int maxx = min(bound_maxx, w1);
+ int maxy = min(bound_maxy, h1);
+ barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
+
+ // fill
+ for(int row = miny + id; row <= maxy; row += count) {
+ global int *mark = mark_buffer + (row*width + minx)*2;
+ global float4 *pixel = image + row*width + minx;
+ global float4 *pixel_end = pixel - minx + maxx + 1;
+ int icover = 0;
+
+ while(pixel < pixel_end) {
+ // read mark (alpha, cover)
+ int ialpha = abs(icover + *mark); *mark = 0; ++mark;
+ icover += *mark; *mark = 0; ++mark;
+
+ if (evenodd) ialpha = 65536 - abs(ialpha%131072 - 65536);
+ if (invert) ialpha = 65536 - ialpha;
+
+ //if (!ialpha) continue;
+
+ // write color
+ float alpha = (float)ialpha/65536.f*color.w;
+ *pixel = *pixel*(1.f - alpha) + color*alpha;
+ ++pixel;
+ }
+ }
+ }
+}
+
diff --git a/c++/contourgl/cl/contour-sort.cl b/c++/contourgl/cl/contour-sort.cl
new file mode 100644
index 0000000..8b4a98b
--- /dev/null
+++ b/c++/contourgl/cl/contour-sort.cl
@@ -0,0 +1,214 @@
+/*
+ ......... 2018 Ivan Mahonin
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+/*
+ samples_buffer format:
+ Sample count, // only field 'next_index' is in use and store index of next sample to allocate
+ Sample rows[height], // only fields 'next_index' are in use and store index first sample in the row
+ Sample real_samples[]
+*/
+
+
+typedef struct {
+ float4 color;
+ int invert;
+ int evenodd;
+ int align0;
+ int align1;
+} Path __attribute__((aligned (32)));
+
+typedef struct {
+ float2 coord;
+ int path_index;
+ int align0;
+} Point __attribute__((aligned (16)));
+
+typedef struct {
+ int path_index;
+ int x;
+ float area;
+ float cover;
+ int next_index;
+ int align0;
+ int align1;
+ int align2;
+} Sample __attribute__((aligned (32)));
+
+
+kernel void reset(global Sample *samples)
+{
+ int id = get_global_id(0);
+ samples[1+id].path_index = -1;
+ samples[1+id].next_index = 0;
+ if (id == 0) {
+ samples->path_index = -1;
+ samples->next_index = get_global_size(0) + 1;
+ }
+}
+
+
+kernel void paths(
+ int width,
+ int height,
+ global Sample *samples,
+ global const Point *points )
+{
+ const float e = 1e-6f;
+
+ // flip order, because we will insert samples into front of linked list
+ int id = get_global_size(0) - get_global_id(0) - 1;
+
+ float2 size = (float2)((float)width, (float)height);
+ int w1 = width - 1;
+ int h1 = height - 1;
+
+ global int *next_sample = &samples->next_index;
+ global Sample *rows = &samples[1];
+
+ Point point0 = points[id];
+ Point point1 = points[id+1];
+ if (point0.path_index != point1.path_index) return;
+
+ int path_index = point0.path_index;
+ float2 p0 = point0.coord;
+ float2 p1 = point1.coord;
+
+ bool flipx = p1.x < p0.x;
+ bool flipy = p1.y < p0.y;
+ if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; }
+ if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; }
+ float2 d = p1 - p0;
+ float kx = fabs(d.y) < e ? 1e10 : d.x/d.y;
+ float ky = fabs(d.x) < e ? 1e10 : d.y/d.x;
+
+ while(p0.x != p1.x || p0.y != p1.y) {
+ int ix = (int)floor(p0.x + e);
+ int iy = (int)floor(p0.y + e);
+ if (iy > h1) break;
+
+ float px = (float)(ix + 1);
+ float py = (float)(iy + 1);
+ float2 pp1 = p1;
+ if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); }
+ if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); }
+
+ if (iy >= 0) {
+ // calc values
+ Sample sample;
+ sample.path_index = path_index;
+ sample.cover = pp1.y - p0.y;
+ sample.area = px - 0.5f*(p0.x + pp1.x);
+ if (flipx) { ix = w1 - ix; sample.area = 1.f - sample.area; }
+ if (flipy) { iy = h1 - iy; sample.cover = -sample.cover; }
+ sample.area *= sample.cover;
+ sample.x = clamp(ix, 0, w1);
+
+ // store in buffer
+ int sample_index = atomic_inc(next_sample);
+ sample.next_index = atomic_xchg(&rows[iy].next_index, sample_index);
+ samples[sample_index] = sample;
+ }
+
+ p0 = pp1;
+ }
+}
+
+
+kernel void draw(
+ const int width,
+ global float4 *image,
+ global Sample *samples,
+ global Path *paths )
+{
+ int id = get_global_id(0);
+
+ global float4 *image_row = image + id*width;
+ global Sample *first = &samples[1+id];
+
+ int current_index;
+ global Sample *prev, *current, *next;
+
+ // sort
+ bool repeat = true;
+ while(repeat) {
+ repeat = false;
+ prev = first;
+ current = &samples[ prev->next_index ];
+ while(current->next_index) {
+ next = &samples[ current->next_index ];
+ if ( current->path_index > next->path_index
+ || (current->path_index == next->path_index && current->x > next->x) )
+ {
+ // swap
+ current_index = prev->next_index;
+ prev->next_index = current->next_index;
+ current->next_index = next->next_index;
+ next->next_index = current_index;
+ prev = next;
+ repeat = true;
+ } else {
+ prev = current;
+ current = next;
+ }
+ }
+ }
+
+ // merge
+ current = &samples[ first->next_index ];
+ float c = 0.f;
+ while(current->next_index) {
+ c += current->cover;
+ next = &samples[ current->next_index ];
+ if (current->path_index == next->path_index && current->x == next->x) {
+ current->area += next->area;
+ current->cover += next->cover;
+ current->next_index = next->next_index;
+ } else {
+ current = next;
+ }
+ }
+
+ // draw
+ global float4 *pixel, *next_pixel;
+ float cover = 0.f;
+ float alpha;
+ int next_index = first->next_index;
+ while(next_index) {
+ current = &samples[ next_index ];
+ next_index = current->next_index;
+
+ // draw current
+ float4 color = paths[ current->path_index ].color;
+ float alpha = min(1.f, fabs(cover + current->area))*color.w;
+ cover += current->cover;
+
+ pixel = &image_row[current->x];
+ *pixel = *pixel*(1.f - alpha) + color*alpha; // TODO: valid composite blending
+ ++pixel;
+
+ // draw span: current <--> next
+ next_pixel = fabs(cover) > 0.5f && current->path_index == samples[next_index].path_index
+ ? &image_row[samples[next_index].x] : pixel;
+ while(pixel < next_pixel) {
+ *pixel = *pixel*(1.f - color.w) + color*color.w; // TODO: valid composite blending
+ ++pixel;
+ }
+
+ if (current->path_index != samples[next_index].path_index) cover = 0.f;
+ }
+}
+
diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl
deleted file mode 100644
index 84d7061..0000000
--- a/c++/contourgl/cl/contour.cl
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- ......... 2015 Ivan Mahonin
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see .
-*/
-
-// paths format:
-// {
-// int count,
-// paths: [
-// {
-// int point_count,
-// int flags,
-// float4 color,
-// points: [ float2, ... ]
-// },
-// ...
-// ]
-// }
-
-
-kernel void draw(
- const int width,
- const int height,
- global int *mark_buffer,
- global float4 *image,
- global const char *paths_buffer )
-{
- const float e = 1e-6f;
-
- int id = (int)get_local_id(0);
- int count = (int)get_local_size(0);
-
- int paths_count = *(global int *)paths_buffer;
- global const char *paths = paths_buffer + sizeof(int);
-
- int pixels_count = width*height;
- float2 size = (float2)((float)width, (float)height);
- int w1 = width - 1;
- int h1 = height - 1;
-
- local int bound_minx;
- local int bound_miny;
- local int bound_maxx;
- local int bound_maxy;
-
- // draw paths
- for(int p = 0; p < paths_count; ++p) {
- int points_count = *(global const int *)paths; paths += sizeof(int);
- int flags = *(global const int *)paths; paths += sizeof(int);
-
- float4 color;
- color.x = *(global const float *)paths; paths += sizeof(float);
- color.y = *(global const float *)paths; paths += sizeof(float);
- color.z = *(global const float *)paths; paths += sizeof(float);
- color.w = *(global const float *)paths; paths += sizeof(float);
-
- global const float *points = (global const float *)paths;
- paths += 2*points_count*sizeof(float);
-
- int segments_count = points_count - 1;
- if (segments_count <= 0) continue;
-
- bool invert = flags & 1;
- bool evenodd = flags & 2;
-
- if (id == 0) {
- bound_minx = invert ? 0 : (int)floor(points[0] + e);
- bound_miny = invert ? 0 : (int)floor(points[1] + e);
- bound_maxx = invert ? w1 : bound_minx;
- bound_maxy = invert ? h1 : bound_miny;
- }
- barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
-
- // trace path
- for(int i = id; i < segments_count; i += count) {
- int ii = 2*i;
- float2 p0 = { points[ii + 0], points[ii + 1] };
- float2 p1 = { points[ii + 2], points[ii + 3] };
-
- int p1x = (int)floor(p1.x + e);
- int p1y = (int)floor(p1.y + e);
- atomic_min(&bound_minx, p1x);
- atomic_min(&bound_miny, p1y);
- atomic_max(&bound_maxx, p1x);
- atomic_max(&bound_maxy, p1y);
-
- bool flipx = p1.x < p0.x;
- bool flipy = p1.y < p0.y;
- if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; }
- if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; }
- float2 d = p1 - p0;
- float kx = fabs(d.y) < e ? 1e10 : d.x/d.y;
- float ky = fabs(d.x) < e ? 1e10 : d.y/d.x;
-
- while(p0.x != p1.x || p0.y != p1.y) {
- int ix = (int)floor(p0.x + e);
- int iy = (int)floor(p0.y + e);
- if (iy > h1) break;
-
- float px = (float)(ix + 1);
- float py = (float)(iy + 1);
- float2 pp1 = p1;
- if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); }
- if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); }
-
- if (iy >= 0) {
- // calc values
- float cover = pp1.y - p0.y;
- float area = px - 0.5f*(p0.x + pp1.x);
- if (flipx) { ix = w1 - ix; area = 1.f - area; }
- if (flipy) { iy = h1 - iy; cover = -cover; }
- ix = clamp(ix, 0, w1);
-
- // store in buffer
- global int *mark = mark_buffer + (iy*width + ix)*2;
- atomic_add(mark, (int)round(area*cover*65536.f));
- atomic_add(mark + 1, (int)round(cover*65536.f));
- }
-
- p0 = pp1;
- }
- }
- barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
-
- // read bounds
- int minx = max(bound_minx, 0);
- int miny = max(bound_miny, 0);
- int maxx = min(bound_maxx, w1);
- int maxy = min(bound_maxy, h1);
- barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
-
- // fill
- for(int row = miny + id; row <= maxy; row += count) {
- global int *mark = mark_buffer + (row*width + minx)*2;
- global float4 *pixel = image + row*width + minx;
- global float4 *pixel_end = pixel - minx + maxx + 1;
- int icover = 0;
-
- while(pixel < pixel_end) {
- // read mark (alpha, cover)
- int ialpha = abs(icover + *mark); *mark = 0; ++mark;
- icover += *mark; *mark = 0; ++mark;
-
- if (evenodd) ialpha = 65536 - abs(ialpha%131072 - 65536);
- if (invert) ialpha = 65536 - ialpha;
-
- //if (!ialpha) continue;
-
- // write color
- float alpha = (float)ialpha/65536.f*color.w;
- *pixel = *pixel*(1.f - alpha) + color*alpha;
- ++pixel;
- }
- }
- }
-}
-
diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp
index d29863b..d4cda7a 100644
--- a/c++/contourgl/clrender.cpp
+++ b/c++/contourgl/clrender.cpp
@@ -38,7 +38,7 @@ ClRender::ClRender(ClContext &cl):
surface_image(),
prev_event()
{
- contour_program = cl.load_program("contour.cl");
+ contour_program = cl.load_program("contour-fs.cl");
assert(contour_program);
contour_draw_kernel = clCreateKernel(contour_program, "draw", NULL);
@@ -166,13 +166,14 @@ void ClRender::draw() {
cl_event event = prev_event;
size_t count = contour_draw_workgroup_size;
+ size_t group_size = count;
cl.err |= clEnqueueNDRangeKernel(
cl.queue,
contour_draw_kernel,
1,
NULL,
&count,
- &count,
+ &group_size,
event ? 1 : 0,
event ? &event : NULL,
&prev_event );
@@ -187,3 +188,233 @@ void ClRender::wait() {
}
}
+
+// ------------------------------------------------
+
+
+ClRender2::ClRender2(ClContext &cl):
+ cl(cl),
+ contour_program(),
+ contour_reset_kernel(),
+ contour_paths_kernel(),
+ contour_draw_kernel(),
+ surface(),
+ points_count(),
+ paths_buffer(),
+ points_buffer(),
+ samples_buffer(),
+ surface_image(),
+ prev_event()
+{
+ contour_program = cl.load_program("contour-sort.cl");
+ assert(contour_program);
+
+ contour_reset_kernel = clCreateKernel(contour_program, "reset", &cl.err);
+ assert(!cl.err);
+ assert(contour_reset_kernel);
+
+ contour_paths_kernel = clCreateKernel(contour_program, "paths", &cl.err);
+ assert(!cl.err);
+ assert(contour_paths_kernel);
+
+ contour_draw_kernel = clCreateKernel(contour_program, "draw", &cl.err);
+ assert(!cl.err);
+ assert(contour_draw_kernel);
+
+ samples_buffer = clCreateBuffer(
+ cl.context, CL_MEM_READ_WRITE,
+ 1024*1024*1024, NULL,
+ &cl.err );
+ assert(!cl.err);
+ assert(samples_buffer);
+
+ cl.err |= clSetKernelArg(contour_reset_kernel, 0, sizeof(samples_buffer), &samples_buffer);
+ cl.err |= clSetKernelArg(contour_paths_kernel, 2, sizeof(samples_buffer), &samples_buffer);
+ cl.err |= clSetKernelArg(contour_draw_kernel, 2, sizeof(samples_buffer), &samples_buffer);
+ assert(!cl.err);
+}
+
+ClRender2::~ClRender2() {
+ remove_paths();
+ remove_surface();
+
+ cl.err |= clReleaseMemObject(samples_buffer);
+ assert(!cl.err);
+ samples_buffer = NULL;
+
+ clReleaseKernel(contour_reset_kernel);
+ clReleaseKernel(contour_paths_kernel);
+ clReleaseKernel(contour_draw_kernel);
+ clReleaseProgram(contour_program);
+}
+
+void ClRender2::remove_surface() {
+ wait();
+
+ if (surface) {
+ cl.err |= clReleaseMemObject(surface_image);
+ assert(!cl.err);
+ surface = NULL;
+ }
+}
+
+void ClRender2::send_surface(Surface *surface) {
+ if (!surface && !this->surface) return;
+
+ remove_surface();
+
+ assert(surface);
+ this->surface = surface;
+
+ //Measure t("ClRender::send_surface");
+
+ surface_image = clCreateBuffer(
+ cl.context, CL_MEM_READ_WRITE,
+ surface->count()*sizeof(Color), NULL,
+ &cl.err );
+ assert(!cl.err);
+ assert(surface_image);
+
+ cl.err |= clEnqueueWriteBuffer(
+ cl.queue, surface_image, false,
+ 0, surface->count()*sizeof(Color), surface->data,
+ 0, NULL, NULL );
+ assert(!cl.err);
+
+ cl.err |= clSetKernelArg(contour_paths_kernel, 0, sizeof(surface->width), &surface->width);
+ cl.err |= clSetKernelArg(contour_paths_kernel, 1, sizeof(surface->height), &surface->height);
+ cl.err |= clSetKernelArg(contour_draw_kernel, 0, sizeof(surface->width), &surface->width);
+ cl.err |= clSetKernelArg(contour_draw_kernel, 1, sizeof(surface_image), &surface_image);
+ assert(!cl.err);
+}
+
+Surface* ClRender2::receive_surface() {
+ if (surface) {
+ //Measure t("ClRender::receive_surface");
+
+ cl.err |= clEnqueueReadBuffer(
+ cl.queue, surface_image, CL_FALSE,
+ 0, surface->count()*sizeof(Color), surface->data,
+ prev_event ? 1 : 0,
+ prev_event ? &prev_event : NULL,
+ NULL );
+ assert(!cl.err);
+
+ wait();
+ }
+ return surface;
+}
+
+void ClRender2::remove_paths() {
+ wait();
+
+ if (paths_buffer) {
+ cl.err |= clReleaseMemObject(paths_buffer);
+ assert(!cl.err);
+ paths_buffer = NULL;
+ }
+
+ if (points_buffer) {
+ cl.err |= clReleaseMemObject(points_buffer);
+ assert(!cl.err);
+ points_buffer = NULL;
+ points_count = 0;
+ }
+}
+
+void ClRender2::send_paths(const Path *paths, int paths_count, const Point *points, int points_count) {
+ remove_paths();
+
+ assert(paths);
+ assert(paths_count > 0);
+
+ assert(points);
+ assert(points_count > 0);
+
+ paths_buffer = clCreateBuffer(
+ cl.context, CL_MEM_READ_ONLY,
+ paths_count*sizeof(Path), NULL,
+ &cl.err );
+ assert(!cl.err);
+ assert(paths_buffer);
+
+ cl.err |= clEnqueueWriteBuffer(
+ cl.queue, paths_buffer, false,
+ 0, paths_count*sizeof(Path), paths,
+ 0, NULL, NULL );
+ assert(!cl.err);
+
+ points_buffer = clCreateBuffer(
+ cl.context, CL_MEM_READ_ONLY,
+ points_count*sizeof(Point), NULL,
+ &cl.err );
+ assert(!cl.err);
+ assert(points_buffer);
+ this->points_count = points_count;
+
+ cl.err |= clEnqueueWriteBuffer(
+ cl.queue, points_buffer, false,
+ 0, points_count*sizeof(Point), points,
+ 0, NULL, NULL );
+ assert(!cl.err);
+
+ cl.err |= clSetKernelArg(contour_paths_kernel, 3, sizeof(points_buffer), &points_buffer);
+ cl.err |= clSetKernelArg(contour_draw_kernel, 3, sizeof(paths_buffer), &paths_buffer);
+ assert(!cl.err);
+
+ wait();
+}
+
+void ClRender2::draw() {
+ //Measure t("ClRender::contour");
+
+ cl_event prepare_event;
+ cl_event paths_event;
+
+ size_t count = surface->height;
+ cl.err |= clEnqueueNDRangeKernel(
+ cl.queue,
+ contour_reset_kernel,
+ 1,
+ NULL,
+ &count,
+ NULL,
+ prev_event ? 1 : 0,
+ prev_event ? &prev_event : NULL,
+ &prepare_event );
+ assert(!cl.err);
+
+ count = points_count - 1;
+ cl.err |= clEnqueueNDRangeKernel(
+ cl.queue,
+ contour_paths_kernel,
+ 1,
+ NULL,
+ &count,
+ NULL,
+ 1,
+ &prepare_event,
+ &paths_event );
+ assert(!cl.err);
+
+ count = surface->height;
+ cl.err |= clEnqueueNDRangeKernel(
+ cl.queue,
+ contour_draw_kernel,
+ 1,
+ NULL,
+ &count,
+ NULL,
+ 1,
+ &paths_event,
+ &prev_event );
+ assert(!cl.err);
+}
+
+void ClRender2::wait() {
+ cl.err |= clFinish(cl.queue);
+ assert(!cl.err);
+ prev_event = NULL;
+}
+
+
diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h
index 2724a39..fd32aa5 100644
--- a/c++/contourgl/clrender.h
+++ b/c++/contourgl/clrender.h
@@ -52,4 +52,51 @@ public:
};
+class ClRender2 {
+public:
+ struct Path {
+ Color color;
+ int invert;
+ int evenodd;
+ int align0;
+ int align1;
+ };
+
+ struct Point {
+ vec2f coord;
+ int path_index;
+ int align0;
+ };
+
+private:
+ ClContext &cl;
+ cl_program contour_program;
+ cl_kernel contour_reset_kernel;
+ cl_kernel contour_paths_kernel;
+ cl_kernel contour_draw_kernel;
+
+ Surface *surface;
+ int points_count;
+ cl_mem paths_buffer;
+ cl_mem points_buffer;
+ cl_mem samples_buffer;
+ cl_mem surface_image;
+ cl_event prev_event;
+
+public:
+ ClRender2(ClContext &cl);
+ ~ClRender2();
+
+ void send_surface(Surface *surface);
+ Surface* receive_surface();
+ void remove_surface();
+
+ void send_paths(const Path *paths, int paths_count, const Point *points, int points_count);
+ void remove_paths();
+
+ void draw();
+ void wait();
+};
+
+
#endif
diff --git a/c++/contourgl/contourgl.cpp b/c++/contourgl/contourgl.cpp
index 238e22f..c1c1596 100644
--- a/c++/contourgl/contourgl.cpp
+++ b/c++/contourgl/contourgl.cpp
@@ -63,6 +63,9 @@ int main() {
{ Surface surface(width, height);
Measure t("test_lines_cl.tga", surface, true);
Test::test_cl(e, data, surface); }
+ { Surface surface(width, height);
+ Measure t("test_lines_cl2.tga", surface, true);
+ Test::test_cl2(e, data, surface); }
}
{ Measure t("test_lines_downgrade", true); Test::downgrade(data, datalow); }
@@ -82,6 +85,9 @@ int main() {
{ Surface surface(width, height);
Measure t("test_lineslow_cl.tga", surface, true);
Test::test_cl(e, datalow, surface); }
+ { Surface surface(width, height);
+ Measure t("test_lineslow_cl2.tga", surface, true);
+ Test::test_cl2(e, datalow, surface); }
}
}
diff --git a/c++/contourgl/measure.cpp b/c++/contourgl/measure.cpp
index a83e088..4d0413d 100644
--- a/c++/contourgl/measure.cpp
+++ b/c++/contourgl/measure.cpp
@@ -18,6 +18,8 @@
#include
#include
+#include
+
#include "measure.h"
#include "utils.h"
#include "glcontext.h"
@@ -39,14 +41,19 @@ void Measure::init() {
<< filename
<< endl << flush;
stack.push_back(this);
- t = clock();
+
+ timespec spec;
+ clock_gettime(CLOCK_MONOTONIC , &spec);
+ t = spec.tv_sec*1000000000 + spec.tv_nsec;
}
Measure::~Measure() {
if (!surface && tga) glFinish();
- clock_t dt = subs ? subs : clock() - t;
- Real ms = 1000.0*(Real)dt/(Real)(CLOCKS_PER_SEC);
+ timespec spec;
+ clock_gettime(CLOCK_MONOTONIC , &spec);
+ long long dt = subs ? subs : spec.tv_sec*1000000000 + spec.tv_nsec - t;
+ Real ms = 1000.0*(Real)dt*(Real)(1e-9);
if (!hide)
cout << string((stack.size()-1)*2, ' ') << "end "
diff --git a/c++/contourgl/measure.h b/c++/contourgl/measure.h
index ec5156a..ebd8ded 100644
--- a/c++/contourgl/measure.h
+++ b/c++/contourgl/measure.h
@@ -18,8 +18,6 @@
#ifndef _MEASURE_H_
#define _MEASURE_H_
-#include
-
#include
#include
@@ -34,8 +32,8 @@ private:
bool tga;
bool hide;
bool hide_subs;
- clock_t subs;
- clock_t t;
+ long long subs;
+ long long t;
Measure(const Measure&): surface(), tga(), hide(), hide_subs(), subs(), t() { }
Measure& operator= (const Measure&) { return *this; }
diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp
index f351efa..bf221c6 100644
--- a/c++/contourgl/test.cpp
+++ b/c++/contourgl/test.cpp
@@ -298,3 +298,56 @@ void Test::test_cl(Environment &e, Data &data, Surface &surface) {
}
clr.receive_surface();
}
+
+void Test::test_cl2(Environment &e, Data &data, Surface &surface) {
+ // prepare data
+
+ vector paths;
+ vector points;
+ paths.reserve(data.size());
+ for(Data::const_iterator i = data.begin(); i != data.end(); ++i)
+ if (int points_count = i->contour.get_chunks().size()) {
+ ClRender2::Path path;
+ path.color = i->color;
+ path.invert = i->invert ? -1 : 0;
+ path.evenodd = i->evenodd ? -1 : 0;
+ path.align0 = 0;
+ path.align1 = 0;
+ paths.push_back(path);
+
+ int first_point_index = (int)points.size();
+ int path_index = (int)paths.size() - 1;
+ points.reserve(points.size() + points_count + 1);
+ for(Contour::ChunkList::const_iterator j = i->contour.get_chunks().begin(); j != i->contour.get_chunks().end(); ++j) {
+ ClRender2::Point point;
+ point.coord = vec2f(j->p1);
+ point.path_index = path_index;
+ point.align0 = 0;
+ points.push_back(point);
+ }
+ points.push_back(points[first_point_index]);
+ }
+
+ // draw
+
+ ClRender2 clr(e.cl);
+
+ // warm-up
+ {
+ //clr.send_surface(&surface);
+ //clr.send_paths(&paths.front(), (int)paths.size(), &points.front(), (int)points.size());
+ //for(int i = 0; i < 1000; ++i)
+ // clr.draw(), clr.wait();
+ //clr.remove_paths();
+ }
+
+ // actual task
+ clr.send_surface(&surface);
+ clr.send_paths(&paths.front(), (int)paths.size(), &points.front(), (int)points.size());
+ {
+ Measure t("render");
+ clr.draw();
+ clr.wait();
+ }
+ clr.receive_surface();
+}
diff --git a/c++/contourgl/test.h b/c++/contourgl/test.h
index 18f2680..b8437ff 100644
--- a/c++/contourgl/test.h
+++ b/c++/contourgl/test.h
@@ -54,6 +54,7 @@ public:
static void test_gl_stencil(Environment &e, Data &data);
static void test_sw(Environment &e, Data &data, Surface &surface);
static void test_cl(Environment &e, Data &data, Surface &surface);
+ static void test_cl2(Environment &e, Data &data, Surface &surface);
};
#endif