From b09c5dfae34c23f06364624330a7b645b7ac3d3c Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Jul 18 2018 04:12:05 +0000 Subject: contourgl: cl sort --- diff --git a/c++/contourgl/cl/contour-fs.cl b/c++/contourgl/cl/contour-fs.cl new file mode 100644 index 0000000..84d7061 --- /dev/null +++ b/c++/contourgl/cl/contour-fs.cl @@ -0,0 +1,169 @@ +/* + ......... 2015 Ivan Mahonin + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// paths format: +// { +// int count, +// paths: [ +// { +// int point_count, +// int flags, +// float4 color, +// points: [ float2, ... ] +// }, +// ... +// ] +// } + + +kernel void draw( + const int width, + const int height, + global int *mark_buffer, + global float4 *image, + global const char *paths_buffer ) +{ + const float e = 1e-6f; + + int id = (int)get_local_id(0); + int count = (int)get_local_size(0); + + int paths_count = *(global int *)paths_buffer; + global const char *paths = paths_buffer + sizeof(int); + + int pixels_count = width*height; + float2 size = (float2)((float)width, (float)height); + int w1 = width - 1; + int h1 = height - 1; + + local int bound_minx; + local int bound_miny; + local int bound_maxx; + local int bound_maxy; + + // draw paths + for(int p = 0; p < paths_count; ++p) { + int points_count = *(global const int *)paths; paths += sizeof(int); + int flags = *(global const int *)paths; paths += sizeof(int); + + float4 color; + color.x = *(global const float *)paths; paths += sizeof(float); + color.y = *(global const float *)paths; paths += sizeof(float); + color.z = *(global const float *)paths; paths += sizeof(float); + color.w = *(global const float *)paths; paths += sizeof(float); + + global const float *points = (global const float *)paths; + paths += 2*points_count*sizeof(float); + + int segments_count = points_count - 1; + if (segments_count <= 0) continue; + + bool invert = flags & 1; + bool evenodd = flags & 2; + + if (id == 0) { + bound_minx = invert ? 0 : (int)floor(points[0] + e); + bound_miny = invert ? 0 : (int)floor(points[1] + e); + bound_maxx = invert ? w1 : bound_minx; + bound_maxy = invert ? h1 : bound_miny; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + // trace path + for(int i = id; i < segments_count; i += count) { + int ii = 2*i; + float2 p0 = { points[ii + 0], points[ii + 1] }; + float2 p1 = { points[ii + 2], points[ii + 3] }; + + int p1x = (int)floor(p1.x + e); + int p1y = (int)floor(p1.y + e); + atomic_min(&bound_minx, p1x); + atomic_min(&bound_miny, p1y); + atomic_max(&bound_maxx, p1x); + atomic_max(&bound_maxy, p1y); + + bool flipx = p1.x < p0.x; + bool flipy = p1.y < p0.y; + if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; } + if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; } + float2 d = p1 - p0; + float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; + float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; + + while(p0.x != p1.x || p0.y != p1.y) { + int ix = (int)floor(p0.x + e); + int iy = (int)floor(p0.y + e); + if (iy > h1) break; + + float px = (float)(ix + 1); + float py = (float)(iy + 1); + float2 pp1 = p1; + if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); } + if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); } + + if (iy >= 0) { + // calc values + float cover = pp1.y - p0.y; + float area = px - 0.5f*(p0.x + pp1.x); + if (flipx) { ix = w1 - ix; area = 1.f - area; } + if (flipy) { iy = h1 - iy; cover = -cover; } + ix = clamp(ix, 0, w1); + + // store in buffer + global int *mark = mark_buffer + (iy*width + ix)*2; + atomic_add(mark, (int)round(area*cover*65536.f)); + atomic_add(mark + 1, (int)round(cover*65536.f)); + } + + p0 = pp1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + // read bounds + int minx = max(bound_minx, 0); + int miny = max(bound_miny, 0); + int maxx = min(bound_maxx, w1); + int maxy = min(bound_maxy, h1); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + // fill + for(int row = miny + id; row <= maxy; row += count) { + global int *mark = mark_buffer + (row*width + minx)*2; + global float4 *pixel = image + row*width + minx; + global float4 *pixel_end = pixel - minx + maxx + 1; + int icover = 0; + + while(pixel < pixel_end) { + // read mark (alpha, cover) + int ialpha = abs(icover + *mark); *mark = 0; ++mark; + icover += *mark; *mark = 0; ++mark; + + if (evenodd) ialpha = 65536 - abs(ialpha%131072 - 65536); + if (invert) ialpha = 65536 - ialpha; + + //if (!ialpha) continue; + + // write color + float alpha = (float)ialpha/65536.f*color.w; + *pixel = *pixel*(1.f - alpha) + color*alpha; + ++pixel; + } + } + } +} + diff --git a/c++/contourgl/cl/contour-sort.cl b/c++/contourgl/cl/contour-sort.cl new file mode 100644 index 0000000..8b4a98b --- /dev/null +++ b/c++/contourgl/cl/contour-sort.cl @@ -0,0 +1,214 @@ +/* + ......... 2018 Ivan Mahonin + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + samples_buffer format: + Sample count, // only field 'next_index' is in use and store index of next sample to allocate + Sample rows[height], // only fields 'next_index' are in use and store index first sample in the row + Sample real_samples[] +*/ + + +typedef struct { + float4 color; + int invert; + int evenodd; + int align0; + int align1; +} Path __attribute__((aligned (32))); + +typedef struct { + float2 coord; + int path_index; + int align0; +} Point __attribute__((aligned (16))); + +typedef struct { + int path_index; + int x; + float area; + float cover; + int next_index; + int align0; + int align1; + int align2; +} Sample __attribute__((aligned (32))); + + +kernel void reset(global Sample *samples) +{ + int id = get_global_id(0); + samples[1+id].path_index = -1; + samples[1+id].next_index = 0; + if (id == 0) { + samples->path_index = -1; + samples->next_index = get_global_size(0) + 1; + } +} + + +kernel void paths( + int width, + int height, + global Sample *samples, + global const Point *points ) +{ + const float e = 1e-6f; + + // flip order, because we will insert samples into front of linked list + int id = get_global_size(0) - get_global_id(0) - 1; + + float2 size = (float2)((float)width, (float)height); + int w1 = width - 1; + int h1 = height - 1; + + global int *next_sample = &samples->next_index; + global Sample *rows = &samples[1]; + + Point point0 = points[id]; + Point point1 = points[id+1]; + if (point0.path_index != point1.path_index) return; + + int path_index = point0.path_index; + float2 p0 = point0.coord; + float2 p1 = point1.coord; + + bool flipx = p1.x < p0.x; + bool flipy = p1.y < p0.y; + if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; } + if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; } + float2 d = p1 - p0; + float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; + float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; + + while(p0.x != p1.x || p0.y != p1.y) { + int ix = (int)floor(p0.x + e); + int iy = (int)floor(p0.y + e); + if (iy > h1) break; + + float px = (float)(ix + 1); + float py = (float)(iy + 1); + float2 pp1 = p1; + if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); } + if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); } + + if (iy >= 0) { + // calc values + Sample sample; + sample.path_index = path_index; + sample.cover = pp1.y - p0.y; + sample.area = px - 0.5f*(p0.x + pp1.x); + if (flipx) { ix = w1 - ix; sample.area = 1.f - sample.area; } + if (flipy) { iy = h1 - iy; sample.cover = -sample.cover; } + sample.area *= sample.cover; + sample.x = clamp(ix, 0, w1); + + // store in buffer + int sample_index = atomic_inc(next_sample); + sample.next_index = atomic_xchg(&rows[iy].next_index, sample_index); + samples[sample_index] = sample; + } + + p0 = pp1; + } +} + + +kernel void draw( + const int width, + global float4 *image, + global Sample *samples, + global Path *paths ) +{ + int id = get_global_id(0); + + global float4 *image_row = image + id*width; + global Sample *first = &samples[1+id]; + + int current_index; + global Sample *prev, *current, *next; + + // sort + bool repeat = true; + while(repeat) { + repeat = false; + prev = first; + current = &samples[ prev->next_index ]; + while(current->next_index) { + next = &samples[ current->next_index ]; + if ( current->path_index > next->path_index + || (current->path_index == next->path_index && current->x > next->x) ) + { + // swap + current_index = prev->next_index; + prev->next_index = current->next_index; + current->next_index = next->next_index; + next->next_index = current_index; + prev = next; + repeat = true; + } else { + prev = current; + current = next; + } + } + } + + // merge + current = &samples[ first->next_index ]; + float c = 0.f; + while(current->next_index) { + c += current->cover; + next = &samples[ current->next_index ]; + if (current->path_index == next->path_index && current->x == next->x) { + current->area += next->area; + current->cover += next->cover; + current->next_index = next->next_index; + } else { + current = next; + } + } + + // draw + global float4 *pixel, *next_pixel; + float cover = 0.f; + float alpha; + int next_index = first->next_index; + while(next_index) { + current = &samples[ next_index ]; + next_index = current->next_index; + + // draw current + float4 color = paths[ current->path_index ].color; + float alpha = min(1.f, fabs(cover + current->area))*color.w; + cover += current->cover; + + pixel = &image_row[current->x]; + *pixel = *pixel*(1.f - alpha) + color*alpha; // TODO: valid composite blending + ++pixel; + + // draw span: current <--> next + next_pixel = fabs(cover) > 0.5f && current->path_index == samples[next_index].path_index + ? &image_row[samples[next_index].x] : pixel; + while(pixel < next_pixel) { + *pixel = *pixel*(1.f - color.w) + color*color.w; // TODO: valid composite blending + ++pixel; + } + + if (current->path_index != samples[next_index].path_index) cover = 0.f; + } +} + diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl deleted file mode 100644 index 84d7061..0000000 --- a/c++/contourgl/cl/contour.cl +++ /dev/null @@ -1,169 +0,0 @@ -/* - ......... 2015 Ivan Mahonin - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// paths format: -// { -// int count, -// paths: [ -// { -// int point_count, -// int flags, -// float4 color, -// points: [ float2, ... ] -// }, -// ... -// ] -// } - - -kernel void draw( - const int width, - const int height, - global int *mark_buffer, - global float4 *image, - global const char *paths_buffer ) -{ - const float e = 1e-6f; - - int id = (int)get_local_id(0); - int count = (int)get_local_size(0); - - int paths_count = *(global int *)paths_buffer; - global const char *paths = paths_buffer + sizeof(int); - - int pixels_count = width*height; - float2 size = (float2)((float)width, (float)height); - int w1 = width - 1; - int h1 = height - 1; - - local int bound_minx; - local int bound_miny; - local int bound_maxx; - local int bound_maxy; - - // draw paths - for(int p = 0; p < paths_count; ++p) { - int points_count = *(global const int *)paths; paths += sizeof(int); - int flags = *(global const int *)paths; paths += sizeof(int); - - float4 color; - color.x = *(global const float *)paths; paths += sizeof(float); - color.y = *(global const float *)paths; paths += sizeof(float); - color.z = *(global const float *)paths; paths += sizeof(float); - color.w = *(global const float *)paths; paths += sizeof(float); - - global const float *points = (global const float *)paths; - paths += 2*points_count*sizeof(float); - - int segments_count = points_count - 1; - if (segments_count <= 0) continue; - - bool invert = flags & 1; - bool evenodd = flags & 2; - - if (id == 0) { - bound_minx = invert ? 0 : (int)floor(points[0] + e); - bound_miny = invert ? 0 : (int)floor(points[1] + e); - bound_maxx = invert ? w1 : bound_minx; - bound_maxy = invert ? h1 : bound_miny; - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - // trace path - for(int i = id; i < segments_count; i += count) { - int ii = 2*i; - float2 p0 = { points[ii + 0], points[ii + 1] }; - float2 p1 = { points[ii + 2], points[ii + 3] }; - - int p1x = (int)floor(p1.x + e); - int p1y = (int)floor(p1.y + e); - atomic_min(&bound_minx, p1x); - atomic_min(&bound_miny, p1y); - atomic_max(&bound_maxx, p1x); - atomic_max(&bound_maxy, p1y); - - bool flipx = p1.x < p0.x; - bool flipy = p1.y < p0.y; - if (flipx) { p0.x = size.x - p0.x; p1.x = size.x - p1.x; } - if (flipy) { p0.y = size.y - p0.y; p1.y = size.y - p1.y; } - float2 d = p1 - p0; - float kx = fabs(d.y) < e ? 1e10 : d.x/d.y; - float ky = fabs(d.x) < e ? 1e10 : d.y/d.x; - - while(p0.x != p1.x || p0.y != p1.y) { - int ix = (int)floor(p0.x + e); - int iy = (int)floor(p0.y + e); - if (iy > h1) break; - - float px = (float)(ix + 1); - float py = (float)(iy + 1); - float2 pp1 = p1; - if (pp1.x > px) { pp1.x = px; pp1.y = p0.y + ky*(px - p0.x); } - if (pp1.y > py) { pp1.y = py; pp1.x = p0.x + kx*(py - p0.y); } - - if (iy >= 0) { - // calc values - float cover = pp1.y - p0.y; - float area = px - 0.5f*(p0.x + pp1.x); - if (flipx) { ix = w1 - ix; area = 1.f - area; } - if (flipy) { iy = h1 - iy; cover = -cover; } - ix = clamp(ix, 0, w1); - - // store in buffer - global int *mark = mark_buffer + (iy*width + ix)*2; - atomic_add(mark, (int)round(area*cover*65536.f)); - atomic_add(mark + 1, (int)round(cover*65536.f)); - } - - p0 = pp1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - // read bounds - int minx = max(bound_minx, 0); - int miny = max(bound_miny, 0); - int maxx = min(bound_maxx, w1); - int maxy = min(bound_maxy, h1); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - // fill - for(int row = miny + id; row <= maxy; row += count) { - global int *mark = mark_buffer + (row*width + minx)*2; - global float4 *pixel = image + row*width + minx; - global float4 *pixel_end = pixel - minx + maxx + 1; - int icover = 0; - - while(pixel < pixel_end) { - // read mark (alpha, cover) - int ialpha = abs(icover + *mark); *mark = 0; ++mark; - icover += *mark; *mark = 0; ++mark; - - if (evenodd) ialpha = 65536 - abs(ialpha%131072 - 65536); - if (invert) ialpha = 65536 - ialpha; - - //if (!ialpha) continue; - - // write color - float alpha = (float)ialpha/65536.f*color.w; - *pixel = *pixel*(1.f - alpha) + color*alpha; - ++pixel; - } - } - } -} - diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index d29863b..d4cda7a 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -38,7 +38,7 @@ ClRender::ClRender(ClContext &cl): surface_image(), prev_event() { - contour_program = cl.load_program("contour.cl"); + contour_program = cl.load_program("contour-fs.cl"); assert(contour_program); contour_draw_kernel = clCreateKernel(contour_program, "draw", NULL); @@ -166,13 +166,14 @@ void ClRender::draw() { cl_event event = prev_event; size_t count = contour_draw_workgroup_size; + size_t group_size = count; cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_draw_kernel, 1, NULL, &count, - &count, + &group_size, event ? 1 : 0, event ? &event : NULL, &prev_event ); @@ -187,3 +188,233 @@ void ClRender::wait() { } } + +// ------------------------------------------------ + + +ClRender2::ClRender2(ClContext &cl): + cl(cl), + contour_program(), + contour_reset_kernel(), + contour_paths_kernel(), + contour_draw_kernel(), + surface(), + points_count(), + paths_buffer(), + points_buffer(), + samples_buffer(), + surface_image(), + prev_event() +{ + contour_program = cl.load_program("contour-sort.cl"); + assert(contour_program); + + contour_reset_kernel = clCreateKernel(contour_program, "reset", &cl.err); + assert(!cl.err); + assert(contour_reset_kernel); + + contour_paths_kernel = clCreateKernel(contour_program, "paths", &cl.err); + assert(!cl.err); + assert(contour_paths_kernel); + + contour_draw_kernel = clCreateKernel(contour_program, "draw", &cl.err); + assert(!cl.err); + assert(contour_draw_kernel); + + samples_buffer = clCreateBuffer( + cl.context, CL_MEM_READ_WRITE, + 1024*1024*1024, NULL, + &cl.err ); + assert(!cl.err); + assert(samples_buffer); + + cl.err |= clSetKernelArg(contour_reset_kernel, 0, sizeof(samples_buffer), &samples_buffer); + cl.err |= clSetKernelArg(contour_paths_kernel, 2, sizeof(samples_buffer), &samples_buffer); + cl.err |= clSetKernelArg(contour_draw_kernel, 2, sizeof(samples_buffer), &samples_buffer); + assert(!cl.err); +} + +ClRender2::~ClRender2() { + remove_paths(); + remove_surface(); + + cl.err |= clReleaseMemObject(samples_buffer); + assert(!cl.err); + samples_buffer = NULL; + + clReleaseKernel(contour_reset_kernel); + clReleaseKernel(contour_paths_kernel); + clReleaseKernel(contour_draw_kernel); + clReleaseProgram(contour_program); +} + +void ClRender2::remove_surface() { + wait(); + + if (surface) { + cl.err |= clReleaseMemObject(surface_image); + assert(!cl.err); + surface = NULL; + } +} + +void ClRender2::send_surface(Surface *surface) { + if (!surface && !this->surface) return; + + remove_surface(); + + assert(surface); + this->surface = surface; + + //Measure t("ClRender::send_surface"); + + surface_image = clCreateBuffer( + cl.context, CL_MEM_READ_WRITE, + surface->count()*sizeof(Color), NULL, + &cl.err ); + assert(!cl.err); + assert(surface_image); + + cl.err |= clEnqueueWriteBuffer( + cl.queue, surface_image, false, + 0, surface->count()*sizeof(Color), surface->data, + 0, NULL, NULL ); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_paths_kernel, 0, sizeof(surface->width), &surface->width); + cl.err |= clSetKernelArg(contour_paths_kernel, 1, sizeof(surface->height), &surface->height); + cl.err |= clSetKernelArg(contour_draw_kernel, 0, sizeof(surface->width), &surface->width); + cl.err |= clSetKernelArg(contour_draw_kernel, 1, sizeof(surface_image), &surface_image); + assert(!cl.err); +} + +Surface* ClRender2::receive_surface() { + if (surface) { + //Measure t("ClRender::receive_surface"); + + cl.err |= clEnqueueReadBuffer( + cl.queue, surface_image, CL_FALSE, + 0, surface->count()*sizeof(Color), surface->data, + prev_event ? 1 : 0, + prev_event ? &prev_event : NULL, + NULL ); + assert(!cl.err); + + wait(); + } + return surface; +} + +void ClRender2::remove_paths() { + wait(); + + if (paths_buffer) { + cl.err |= clReleaseMemObject(paths_buffer); + assert(!cl.err); + paths_buffer = NULL; + } + + if (points_buffer) { + cl.err |= clReleaseMemObject(points_buffer); + assert(!cl.err); + points_buffer = NULL; + points_count = 0; + } +} + +void ClRender2::send_paths(const Path *paths, int paths_count, const Point *points, int points_count) { + remove_paths(); + + assert(paths); + assert(paths_count > 0); + + assert(points); + assert(points_count > 0); + + paths_buffer = clCreateBuffer( + cl.context, CL_MEM_READ_ONLY, + paths_count*sizeof(Path), NULL, + &cl.err ); + assert(!cl.err); + assert(paths_buffer); + + cl.err |= clEnqueueWriteBuffer( + cl.queue, paths_buffer, false, + 0, paths_count*sizeof(Path), paths, + 0, NULL, NULL ); + assert(!cl.err); + + points_buffer = clCreateBuffer( + cl.context, CL_MEM_READ_ONLY, + points_count*sizeof(Point), NULL, + &cl.err ); + assert(!cl.err); + assert(points_buffer); + this->points_count = points_count; + + cl.err |= clEnqueueWriteBuffer( + cl.queue, points_buffer, false, + 0, points_count*sizeof(Point), points, + 0, NULL, NULL ); + assert(!cl.err); + + cl.err |= clSetKernelArg(contour_paths_kernel, 3, sizeof(points_buffer), &points_buffer); + cl.err |= clSetKernelArg(contour_draw_kernel, 3, sizeof(paths_buffer), &paths_buffer); + assert(!cl.err); + + wait(); +} + +void ClRender2::draw() { + //Measure t("ClRender::contour"); + + cl_event prepare_event; + cl_event paths_event; + + size_t count = surface->height; + cl.err |= clEnqueueNDRangeKernel( + cl.queue, + contour_reset_kernel, + 1, + NULL, + &count, + NULL, + prev_event ? 1 : 0, + prev_event ? &prev_event : NULL, + &prepare_event ); + assert(!cl.err); + + count = points_count - 1; + cl.err |= clEnqueueNDRangeKernel( + cl.queue, + contour_paths_kernel, + 1, + NULL, + &count, + NULL, + 1, + &prepare_event, + &paths_event ); + assert(!cl.err); + + count = surface->height; + cl.err |= clEnqueueNDRangeKernel( + cl.queue, + contour_draw_kernel, + 1, + NULL, + &count, + NULL, + 1, + &paths_event, + &prev_event ); + assert(!cl.err); +} + +void ClRender2::wait() { + cl.err |= clFinish(cl.queue); + assert(!cl.err); + prev_event = NULL; +} + + diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h index 2724a39..fd32aa5 100644 --- a/c++/contourgl/clrender.h +++ b/c++/contourgl/clrender.h @@ -52,4 +52,51 @@ public: }; +class ClRender2 { +public: + struct Path { + Color color; + int invert; + int evenodd; + int align0; + int align1; + }; + + struct Point { + vec2f coord; + int path_index; + int align0; + }; + +private: + ClContext &cl; + cl_program contour_program; + cl_kernel contour_reset_kernel; + cl_kernel contour_paths_kernel; + cl_kernel contour_draw_kernel; + + Surface *surface; + int points_count; + cl_mem paths_buffer; + cl_mem points_buffer; + cl_mem samples_buffer; + cl_mem surface_image; + cl_event prev_event; + +public: + ClRender2(ClContext &cl); + ~ClRender2(); + + void send_surface(Surface *surface); + Surface* receive_surface(); + void remove_surface(); + + void send_paths(const Path *paths, int paths_count, const Point *points, int points_count); + void remove_paths(); + + void draw(); + void wait(); +}; + + #endif diff --git a/c++/contourgl/contourgl.cpp b/c++/contourgl/contourgl.cpp index 238e22f..c1c1596 100644 --- a/c++/contourgl/contourgl.cpp +++ b/c++/contourgl/contourgl.cpp @@ -63,6 +63,9 @@ int main() { { Surface surface(width, height); Measure t("test_lines_cl.tga", surface, true); Test::test_cl(e, data, surface); } + { Surface surface(width, height); + Measure t("test_lines_cl2.tga", surface, true); + Test::test_cl2(e, data, surface); } } { Measure t("test_lines_downgrade", true); Test::downgrade(data, datalow); } @@ -82,6 +85,9 @@ int main() { { Surface surface(width, height); Measure t("test_lineslow_cl.tga", surface, true); Test::test_cl(e, datalow, surface); } + { Surface surface(width, height); + Measure t("test_lineslow_cl2.tga", surface, true); + Test::test_cl2(e, datalow, surface); } } } diff --git a/c++/contourgl/measure.cpp b/c++/contourgl/measure.cpp index a83e088..4d0413d 100644 --- a/c++/contourgl/measure.cpp +++ b/c++/contourgl/measure.cpp @@ -18,6 +18,8 @@ #include #include +#include + #include "measure.h" #include "utils.h" #include "glcontext.h" @@ -39,14 +41,19 @@ void Measure::init() { << filename << endl << flush; stack.push_back(this); - t = clock(); + + timespec spec; + clock_gettime(CLOCK_MONOTONIC , &spec); + t = spec.tv_sec*1000000000 + spec.tv_nsec; } Measure::~Measure() { if (!surface && tga) glFinish(); - clock_t dt = subs ? subs : clock() - t; - Real ms = 1000.0*(Real)dt/(Real)(CLOCKS_PER_SEC); + timespec spec; + clock_gettime(CLOCK_MONOTONIC , &spec); + long long dt = subs ? subs : spec.tv_sec*1000000000 + spec.tv_nsec - t; + Real ms = 1000.0*(Real)dt*(Real)(1e-9); if (!hide) cout << string((stack.size()-1)*2, ' ') << "end " diff --git a/c++/contourgl/measure.h b/c++/contourgl/measure.h index ec5156a..ebd8ded 100644 --- a/c++/contourgl/measure.h +++ b/c++/contourgl/measure.h @@ -18,8 +18,6 @@ #ifndef _MEASURE_H_ #define _MEASURE_H_ -#include - #include #include @@ -34,8 +32,8 @@ private: bool tga; bool hide; bool hide_subs; - clock_t subs; - clock_t t; + long long subs; + long long t; Measure(const Measure&): surface(), tga(), hide(), hide_subs(), subs(), t() { } Measure& operator= (const Measure&) { return *this; } diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index f351efa..bf221c6 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -298,3 +298,56 @@ void Test::test_cl(Environment &e, Data &data, Surface &surface) { } clr.receive_surface(); } + +void Test::test_cl2(Environment &e, Data &data, Surface &surface) { + // prepare data + + vector paths; + vector points; + paths.reserve(data.size()); + for(Data::const_iterator i = data.begin(); i != data.end(); ++i) + if (int points_count = i->contour.get_chunks().size()) { + ClRender2::Path path; + path.color = i->color; + path.invert = i->invert ? -1 : 0; + path.evenodd = i->evenodd ? -1 : 0; + path.align0 = 0; + path.align1 = 0; + paths.push_back(path); + + int first_point_index = (int)points.size(); + int path_index = (int)paths.size() - 1; + points.reserve(points.size() + points_count + 1); + for(Contour::ChunkList::const_iterator j = i->contour.get_chunks().begin(); j != i->contour.get_chunks().end(); ++j) { + ClRender2::Point point; + point.coord = vec2f(j->p1); + point.path_index = path_index; + point.align0 = 0; + points.push_back(point); + } + points.push_back(points[first_point_index]); + } + + // draw + + ClRender2 clr(e.cl); + + // warm-up + { + //clr.send_surface(&surface); + //clr.send_paths(&paths.front(), (int)paths.size(), &points.front(), (int)points.size()); + //for(int i = 0; i < 1000; ++i) + // clr.draw(), clr.wait(); + //clr.remove_paths(); + } + + // actual task + clr.send_surface(&surface); + clr.send_paths(&paths.front(), (int)paths.size(), &points.front(), (int)points.size()); + { + Measure t("render"); + clr.draw(); + clr.wait(); + } + clr.receive_surface(); +} diff --git a/c++/contourgl/test.h b/c++/contourgl/test.h index 18f2680..b8437ff 100644 --- a/c++/contourgl/test.h +++ b/c++/contourgl/test.h @@ -54,6 +54,7 @@ public: static void test_gl_stencil(Environment &e, Data &data); static void test_sw(Environment &e, Data &data, Surface &surface); static void test_cl(Environment &e, Data &data, Surface &surface); + static void test_cl2(Environment &e, Data &data, Surface &surface); }; #endif