diff --git a/c++/contourgl/cl/contour.cl b/c++/contourgl/cl/contour.cl index bbe9324..2d3635f 100644 --- a/c++/contourgl/cl/contour.cl +++ b/c++/contourgl/cl/contour.cl @@ -16,85 +16,50 @@ */ __kernel void lines( - __global int width, + int width, __global float *lines, __global int *rows, __global float *mark_buffer ) { + const float e = 1e-6f; size_t id = get_global_id(0); - int *row = rows + id*2; - int begin = *rows; - int end = begin + rows[1]; + int begin = rows[id*2]; + int end = begin + rows[id*2 + 1]; for(int i = begin; i < end; ++i) { - float *line = lines + 4*begin; - float2 p0(*line, line[1]); - float2 p1(line[2], line[3]); + float2 p0 = { lines[4*i + 0], lines[4*i + 1] }; + float2 p1 = { lines[4*i + 2], lines[4*i + 3] }; - int iy0 = (int)floor(p0.y); - int iy1 = (int)floor(p1.x); - if (iy1 < iy0) { int sw = iy0; iy0 = iy1; iy1 = iy0; } + int iy0 = (int)floor(fmin(p0.y, p1.y) + e); + int iy1 = (int)floor(fmax(p0.y, p1.y) - e); float2 d = p1 - p0; - float2 k( fabs(d.y) < 1e-6 ? 0.0 : d.x/d.y, - fabs(d.x) < 1e-6 ? 0.0 : d.y/d.x ); + float kx = fabs(d.y) < e ? 0.f : d.x/d.y; + float ky = fabs(d.x) < e ? 0.f : d.y/d.x; for(int r = iy0; r <= iy1; ++r) { - float y = (float)iy0; + float y = (float)r; + float2 pya = { p0.x + kx*(y - p0.y), y }; + float2 pyb = { p0.x + kx*(y + 1.0 - p0.y), y + 1.f }; + float2 pp0 = p0.y - y < -e ? pya + : (p0.y - y > 1.f + e ? pyb : p0); + float2 pp1 = p1.y - y < -e ? pya + : (p1.y - y > 1.f + e ? pyb : p1); - float2 pp0 = p0; - pp0.y -= y; - if (pp0.y < 0.0) { - pp0.y = 0.0; - pp0.x = p0.x - k.x*y; - } else - if (pp0.y > 1.0) { - pp0.y = 1.0; - pp0.x = p0.x - k.x*(y - 1.0); - } - - float2 pp1 = p1; - pp1.y -= y; - if (pp1.y < 0.0) { - pp1.y = 0.0; - pp1.x = p0.x - k.x*y; - } else - if (pp1.y > 1.0) { - pp1.y = 1.0; - pp1.x = p0.x - k.x*(y - 1.0); - } - - int ix0 = min(max((int)floor(pp0.x), 0), width); - int ix1 = min(max((int)floor(pp1.x), 0), width); - if (ix1 < ix0) { int sw = ix0; ix0 = ix1; ix1 = ix0; } + int ix0 = (int)floor(fmin(pp0.x, pp1.x) + e); + int ix1 = (int)floor(fmax(pp0.x, pp1.x) - e); for(int c = ix0; c <= ix1; ++c) { - float x = (float)ix0; + float x = (float)c; + float2 pxa = { x, p0.y + ky*(x - p0.x) }; + float2 pxb = { x + 1.0, p0.y + ky*(x + 1.0 - p0.x) }; + float2 ppp0 = pp0.x - x < -e ? pxa + : (pp0.x - x > 1.f + e ? pxb : pp0); + float2 ppp1 = pp1.x - x < -e ? pxa + : (pp1.x - x > 1.f + e ? pxb : pp1); - float2 ppp0 = pp0; - ppp0.x -= x; - if (ppp0.x < 0.0) { - ppp0.x = 0.0; - ppp0.y = pp0.y - k.y*x; - } else - if (ppp0.x > 1.0) { - ppp0.x = 1.0; - ppp0.y = pp0.y - k.y*(x - 1.0); - } - - float2 ppp1 = pp1; - ppp1.x -= x; - if (ppp1.x < 0.0) { - ppp1.x = 0.0; - ppp1.y = pp0.y - k.y*x; - } else - if (ppp1.x > 1.0) { - ppp1.x = 1.0; - ppp1.y = pp0.y - k.y*(x - 1.0); - } - - float cover = ppp0.y - ppp1.y; - float area = (0.5*(ppp1.x + ppp1.x) - 1.0)*cover; - float *mark = mark_buffer + 2*(r*width + c); - *mark += area; + float cover = ppp1.y - ppp0.y; + float area = (x + 1.f - 0.5f*(ppp0.x + ppp1.x))*cover; + __global float *mark = mark_buffer + 2*(r*width + c); + mark[0] += area; mark[1] += cover; } } @@ -102,30 +67,35 @@ __kernel void lines( } __kernel void fill( - __global int width, + int width, __global float *mark_buffer, __global float *surface_buffer, - __global float color_r, - __global float color_g, - __global float color_b, - __global float color_a ) + float color_r, + float color_g, + float color_b, + float color_a, + int invert, + int evenodd ) { - sizet id = get_global_id(0); + size_t id = get_global_id(0); int w = width; float cr = color_r; float cg = color_g; float cb = color_b; float ca = color_a; - float *mark = mark_buffer + 2*id*width; - float *surface = surface_buffer + 4*id*width; - float cover = 0; - for(int i = 0; i < w; ++i, mark += 2, surface += 4) { - float alpha = ca*(1.0 - fabs(1.0 - 0.5*frac(fabs(2.0*(*mark + cover))))); - float alpha_inv = 1.0 - alpha; + __global float *mark = mark_buffer + 2*id*w; + __global float *surface = surface_buffer + 4*id*w; + float cover = 0.f; + for(int i = 0; i < width; ++i, mark += 2, surface += 4) { + float alpha = fabs(*mark + cover); + alpha = evenodd ? ca*(1.f - fabs(1.f - alpha - 2.f*floor(0.5f*alpha))) + : fmin(alpha, 1.f); + if (invert) alpha = 1.f - alpha; + float alpha_inv = 1.f - alpha; surface[0] = surface[0]*alpha_inv + cr*alpha; surface[1] = surface[1]*alpha_inv + cg*alpha; surface[2] = surface[2]*alpha_inv + cb*alpha; - surface[3] = surface[3]*alpha_inv + ca*alpha; + surface[3] = fmin(surface[3] + ca*alpha, 1.f); cover += mark[1]; } } diff --git a/c++/contourgl/clcontext.cpp b/c++/contourgl/clcontext.cpp index 3a478dc..15c876d 100644 --- a/c++/contourgl/clcontext.cpp +++ b/c++/contourgl/clcontext.cpp @@ -43,6 +43,11 @@ ClContext::ClContext(): err(), context(), queue() { assert(!err); cout << "Use CL platform 0 by " << vendor << endl; + char platform_version[256]; + err = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, NULL); + assert(!err); + cout << "Platform 0 OpenCL version " << platform_version << endl; + // devices cl_uint device_count = 0; @@ -54,6 +59,14 @@ ClContext::ClContext(): err(), context(), queue() { err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices.size(), &devices.front(), NULL); assert(!err); + char device_name[256]; + clGetDeviceInfo(devices.front(), CL_DEVICE_NAME, sizeof(device_name), device_name, NULL); + cout << "Device 0 name " << device_name << endl; + + char device_version[256]; + clGetDeviceInfo(devices.front(), CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL); + cout << "Device 0 OpenCL version " << device_version << endl; + // context context = clCreateContext(0, 1, &devices.front(), NULL, NULL, &err); @@ -61,7 +74,7 @@ ClContext::ClContext(): err(), context(), queue() { // command queue - cl_command_queue queue = clCreateCommandQueue(context, devices[0], 0, NULL); + queue = clCreateCommandQueue(context, devices[0], 0, NULL); assert(queue); } @@ -78,7 +91,15 @@ cl_program ClContext::load_program(const std::string &filename) { cl_program program = clCreateProgramWithSource(context, 1, &text_pointer, NULL, NULL); assert(program); - err = clBuildProgram(program, devices.size(), &devices.front(), "", NULL, NULL); + err = clBuildProgram(program, 1, &devices.front(), "", NULL, NULL); + if (err) { + size_t size; + clGetProgramBuildInfo(program, devices.front(), CL_PROGRAM_BUILD_LOG, 0, NULL, &size); + char *log = new char[size]; + clGetProgramBuildInfo(program, devices.front(), CL_PROGRAM_BUILD_LOG, size, log, NULL); + cout << log << endl; + delete[] log; + } assert(!err); return program; diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index b0d8b0a..81e3de4 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -57,7 +57,7 @@ void ClRender::send_surface(Surface *surface) { if (this->surface == surface) return; cl.err = clFinish(cl.queue); - assert(cl.err); + assert(!cl.err); if (this->surface) { rows.clear(); @@ -69,12 +69,13 @@ void ClRender::send_surface(Surface *surface) { this->surface = surface; if (this->surface) { - Measure t("ClRender::send_surface"); + //Measure t("ClRender::send_surface"); rows_count = surface->height; even_rows_count = (rows_count+1)/2; odd_rows_count = rows_count - even_rows_count; rows.resize(rows_count); + marks.resize(surface->count()); rows_buffer = clCreateBuffer( cl.context, CL_MEM_READ_ONLY, @@ -84,61 +85,63 @@ void ClRender::send_surface(Surface *surface) { mark_buffer = clCreateBuffer( cl.context, CL_MEM_READ_WRITE, - surface->count()*sizeof(vec2f), NULL, + marks.size()*sizeof(marks.front()), NULL, NULL ); assert(mark_buffer); surface_buffer = clCreateBuffer( cl.context, CL_MEM_READ_WRITE, - surface->data_size(), NULL, + surface->data_size(), surface->data, NULL ); assert(surface_buffer); + cl_event event = NULL; cl.err |= clEnqueueWriteBuffer( cl.queue, surface_buffer, CL_TRUE, 0, surface->data_size(), surface->data, - 0, NULL, NULL ); + 0, NULL, &event ); + clWaitForEvents(1, &event); cl.err |= clFinish(cl.queue); - assert(cl.err); + assert(!cl.err); } } Surface* ClRender::receive_surface() { if (surface) { - Measure t("ClRender::receive_surface"); + //Measure t("ClRender::receive_surface"); - cl.err |= clFinish(cl.queue); + cl_event event = NULL; cl.err |= clEnqueueReadBuffer( cl.queue, surface_buffer, CL_TRUE, - 0, sizeof(surface->data_size()), surface->data, - 0, NULL, NULL ); - cl.err |= clFinish(cl.queue); - assert(cl.err); + 0, surface->data_size(), surface->data, + 0, NULL, &event ); + assert(!cl.err); + clWaitForEvents(1, &event); } return surface; } void ClRender::contour(const Contour &contour, const Rect &rect, const Color &color, bool invert, bool evenodd) { - Measure t("ClRender::contour"); + //Measure t("ClRender::contour"); Contour transformed, splitted; Rect to(1.0, 1.0, surface->width - 1.0, surface->height - 1.0); { - Measure t("clone"); + //Measure t("clone"); transformed = contour; } { - Measure t("transform"); + //Measure t("transform"); transformed.transform(rect, to); } { - Measure t("split"); - transformed.allow_split_lines = true; + //Measure t("split"); + splitted.allow_split_lines = true; transformed.split(splitted, to, Vector(0.5, 0.5)); } @@ -147,10 +150,10 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co vector line_rows; { - Measure t("sort lines"); + //Measure t("sort lines"); // reset rows - for(int i = 1; i < (int)rows_count; ++i) + for(int i = 0; i < (int)rows_count; ++i) rows[i].second = 0; // count lines @@ -167,8 +170,15 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co line2f l(vec2f(prev), vec2f(i->p1)); l.p0.x = min(max(l.p0.x, x0), x1); l.p1.x = min(max(l.p1.x, x0), x1); + assert( (int)floorf(l.p0.x) >= 0 && (int)floorf(l.p0.x) < surface->width + && (int)floorf(l.p1.x) >= 0 && (int)floorf(l.p1.x) < surface->width + && (int)floorf(l.p0.y) >= 0 && (int)floorf(l.p1.y) < surface->height + && (int)floorf(l.p1.y) >= 0 && (int)floorf(l.p1.y) < surface->height + && abs((int)floorf(l.p1.x) - (int)floorf(l.p0.x)) <= 1 + && abs((int)floorf(l.p1.y) - (int)floorf(l.p0.y)) <= 1 ); int row = (int)floorf(min(l.p0.y, l.p1.y)); row = row % 2 ? row/2 : even_rows_count + row/2; + assert(row >= 0 && row < (int)rows_count); line_rows.push_back(row); lines.push_back(l); ++rows[row].second; @@ -185,14 +195,18 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co // make sorted list sorted_lines.resize(lines_count); - for(int i = 0; i < lines_count; ++i) + for(int i = 0; i < lines_count; ++i) { + assert(rows[line_rows[i]].first > 0 && rows[line_rows[i]].first <= lines_count); sorted_lines[ --rows[line_rows[i]].first ] = lines[i]; + } } + if (sorted_lines.empty()) return; + cl_mem lines_buffer = NULL; { - Measure t("create lines buffer"); + //Measure t("create lines buffer"); lines_buffer = clCreateBuffer( cl.context, CL_MEM_READ_ONLY, @@ -202,7 +216,7 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co } { - Measure t("enqueue commands"); + //Measure t("enqueue commands"); clFinish(cl.queue); @@ -215,7 +229,7 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co cl.err |= clSetKernelArg(contour_lines_kernel, 3, sizeof(mark_buffer), &mark_buffer); assert(!cl.err); - // TODO: invert, evenodd + int iinvert = invert, ievenodd = evenodd; cl.err |= clSetKernelArg(contour_fill_kernel, 0, sizeof(width), &width); cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(mark_buffer), &mark_buffer); cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_buffer), &surface_buffer); @@ -223,6 +237,8 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(Color::type), &color.g); cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(Color::type), &color.b); cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(Color::type), &color.a); + cl.err |= clSetKernelArg(contour_fill_kernel, 7, sizeof(int), &iinvert); + cl.err |= clSetKernelArg(contour_fill_kernel, 8, sizeof(int), &ievenodd); assert(!cl.err); // prepare buffers @@ -233,23 +249,24 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co cl.queue, lines_buffer, CL_TRUE, 0, sorted_lines.size()*sizeof(sorted_lines.front()), &sorted_lines.front(), 0, NULL, &prepare_buffers_events[0] ); + assert(!cl.err); cl.err |= clEnqueueWriteBuffer( cl.queue, rows_buffer, CL_TRUE, 0, rows.size()*sizeof(rows.front()), &rows.front(), 0, NULL, &prepare_buffers_events[1] ); + assert(!cl.err); - vec2f pattern; - cl.err |= clEnqueueFillBuffer( - cl.queue, mark_buffer, - &pattern, sizeof(pattern), - 0, surface->count()*sizeof(vec2f), + cl.err |= clEnqueueWriteBuffer( + cl.queue, mark_buffer, CL_TRUE, + 0, marks.size()*sizeof(marks.front()), &marks.front(), 0, NULL, &prepare_buffers_events[2] ); + assert(!cl.err); // run kernels cl_event lines_odd_event = NULL; - cl.err = clEnqueueNDRangeKernel( + cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_lines_kernel, 1, @@ -262,7 +279,7 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co assert(!cl.err); cl_event lines_even_event = NULL; - cl.err = clEnqueueNDRangeKernel( + cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_lines_kernel, 1, @@ -275,7 +292,7 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co assert(!cl.err); cl_event fill_event = NULL; - cl.err = clEnqueueNDRangeKernel( + cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_fill_kernel, 1, @@ -291,7 +308,7 @@ void ClRender::contour(const Contour &contour, const Rect &rect, const Color &co } { - Measure t("release lines buffer"); + //Measure t("release lines buffer"); clReleaseMemObject(lines_buffer); } } diff --git a/c++/contourgl/clrender.h b/c++/contourgl/clrender.h index 874f757..097c9f0 100644 --- a/c++/contourgl/clrender.h +++ b/c++/contourgl/clrender.h @@ -44,6 +44,7 @@ private: typedef std::pair Row; std::vector rows; + std::vector marks; public: ClRender(ClContext &cl); diff --git a/c++/contourgl/contour.cpp b/c++/contourgl/contour.cpp index 54b0866..7329a9f 100644 --- a/c++/contourgl/contour.cpp +++ b/c++/contourgl/contour.cpp @@ -94,7 +94,7 @@ void Contour::line_split( { Vector p = (p0 + p1)*0.5; line_split(ref_line_bounds, bounds, min_size, p, level-1); - line_split(ref_line_bounds, bounds, min_size, p, level-1); + line_split(ref_line_bounds, bounds, min_size, p1, level-1); return; } } @@ -197,10 +197,8 @@ void Contour::split(Contour &c, const Rect &bounds, const Vector &min_size) cons line_bounds.p1 = c.current(); break; case LINE: - c.line_split(line_bounds, bounds, min_size, i->p1); - break; case CLOSE: - c.close(); + c.line_split(line_bounds, bounds, min_size, i->p1); break; case CONIC: { diff --git a/c++/contourgl/contourgl.cpp b/c++/contourgl/contourgl.cpp index b1a2546..207bfa4 100644 --- a/c++/contourgl/contourgl.cpp +++ b/c++/contourgl/contourgl.cpp @@ -28,8 +28,8 @@ int main() { Test test(e); e.cl.hello(); - test.test2(); - test.test3(); + //test.test2(); + //test.test3(); test.test4(); cout << "done" << endl; diff --git a/c++/contourgl/measure.cpp b/c++/contourgl/measure.cpp index cda2e07..f5ff2d1 100644 --- a/c++/contourgl/measure.cpp +++ b/c++/contourgl/measure.cpp @@ -36,6 +36,7 @@ Measure::Measure(const std::string &filename): sub_tasks(), t() { + cout << string(stack.size()*2, ' ') << "begin " << filename << endl << flush; stack.push_back(this); t = clock(); } @@ -47,7 +48,7 @@ Measure::Measure(const std::string &filename, Surface &surface): sub_tasks(), t() { - cout << string(stack.size()*2, ' ') << "begin " << filename; + cout << string(stack.size()*2, ' ') << "begin " << filename << endl << flush; stack.push_back(this); t = clock(); } @@ -56,10 +57,10 @@ Measure::~Measure() { if (!surface && tga) glFinish(); clock_t dt = sub_tasks ? sub_tasks : clock() - t; - Real ms = 1000.0*(Real)(clock() - t)/(Real)(CLOCKS_PER_SEC); + Real ms = 1000.0*(Real)dt/(Real)(CLOCKS_PER_SEC); - cout << setw(8) << fixed << setprecision(3) - << string(stack.size()*2, ' ') << "end " + cout << string((stack.size()-1)*2, ' ') << "end " + << setw(8) << fixed << setprecision(3) << ms << " ms - " << filename << endl << flush; diff --git a/c++/contourgl/swrender.h b/c++/contourgl/swrender.h index 3a4530f..44d67b0 100644 --- a/c++/contourgl/swrender.h +++ b/c++/contourgl/swrender.h @@ -45,7 +45,7 @@ public: { clear(); } ~Surface() - { delete data; } + { delete[] data; } void clear() { memset(data, 0, count()*sizeof(Color)); } int count() const { return width*height; } diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index e703cac..a9129f1 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -536,11 +536,12 @@ void Test::test4() { } } + Surface surface(width+2, height+2); + { // cl vector contours_cl = contours; - Surface surface(width+2, height+2); Measure t("test_4_cl.tga", surface);