From 77d27147b0ae8e0da0add026b17d908361d4269c Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Jul 25 2018 08:42:31 +0000 Subject: contourgl: cl optimize7 --- diff --git a/c++/contourgl/cl/contour-base.cl b/c++/contourgl/cl/contour-base.cl index 6106fc6..b6e4eca 100644 --- a/c++/contourgl/cl/contour-base.cl +++ b/c++/contourgl/cl/contour-base.cl @@ -39,9 +39,11 @@ kernel void path( int height, global int *marks, global float2 *points, - int4 bounds ) + int end, + int minx ) { int id = get_global_id(0); + if (id >= end) return; float2 p0 = points[id]; float2 p1 = points[id + 1]; @@ -86,7 +88,7 @@ kernel void path( row += 2; iix = (ix & (ix + 1)) - 1; - while(iix >= bounds.s0) { + while(iix >= minx) { atomic_min(row + 4*iix, ix); iix = (iix & (iix + 1)) - 1; } @@ -103,22 +105,23 @@ kernel void fill( global int4 *marks, global float4 *image, float4 color, - int2 boundsx ) + int4 bounds ) { + if (get_global_id(0) >= bounds.s3) return; int id = width*(int)get_global_id(0); marks += id; image += id; global int4 *mark; global float4 *pixel; - //prefetch(row + boundsx.s0, boundsx.s1 - boundsx.s0); - //prefetch(image_row + boundsx.s0, boundsx.s1 - boundsx.s0); + //prefetch(row + bounds.s0, bounds.s2 - bounds.s0); + //prefetch(image_row + bounds.s0, bounds.s2 - bounds.s0); int4 m; float alpha; //int ialpha; - int icover = 0, c0 = boundsx.s0, c1 = boundsx.s0; - while(c1 < boundsx.s1) { + int icover = 0, c0 = bounds.s0, c1 = bounds.s0; + while(c1 < bounds.s2) { //ialpha = abs(icover); //ialpha = evenodd ? ONE - abs((ialpha % TWO) - ONE) // : min(ialpha, ONE); diff --git a/c++/contourgl/clrender.cpp b/c++/contourgl/clrender.cpp index 03f32a3..587b703 100644 --- a/c++/contourgl/clrender.cpp +++ b/c++/contourgl/clrender.cpp @@ -583,26 +583,35 @@ void ClRender3::draw(const Path &path) { vec2i boundsx(bounds.minx, bounds.maxx); - cl.err |= clSetKernelArg(contour_path_kernel, 4, sizeof(bounds), &bounds); + cl.err |= clSetKernelArg(contour_path_kernel, 4, sizeof(path.end), &path.end); + cl.err |= clSetKernelArg(contour_path_kernel, 5, sizeof(bounds.minx), &bounds.minx); assert(!cl.err); cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(path.color), &path.color); - cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(boundsx), &boundsx); + cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(bounds), &bounds); assert(!cl.err); - size_t offset = path.begin; - size_t count = path.end - path.begin; + size_t group_size, offset, count; + + offset = path.begin; + count = path.end - path.begin - 1; + group_size = 8; + + count = ((count - 1)/group_size + 1)*group_size; cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_path_kernel, - 1, &offset, &count, NULL, + 1, &offset, &count, &group_size, 0, NULL, NULL ); assert(!cl.err); offset = bounds.miny; count = bounds.maxy - bounds.miny; + group_size = 3; + + count = ((count - 1)/group_size + 1)*group_size; cl.err |= clEnqueueNDRangeKernel( cl.queue, contour_fill_kernel, - 1, &offset, &count, NULL, + 1, &offset, &count, &group_size, 0, NULL, NULL ); assert(!cl.err); } diff --git a/c++/contourgl/contourgl.cpp b/c++/contourgl/contourgl.cpp index 93615fa..06ddd49 100644 --- a/c++/contourgl/contourgl.cpp +++ b/c++/contourgl/contourgl.cpp @@ -88,9 +88,9 @@ int main() { */ { Environment e(width, height, false, false, 8); - { Surface surface(width, height); - Measure t("test_lineslow_sw.tga", surface, true); - Test::test_sw(e, datalow, surface); } + //{ Surface surface(width, height); + // Measure t("test_lineslow_sw.tga", surface, true); + // Test::test_sw(e, datalow, surface); } /* { Surface surface(width, height); Measure t("test_lineslow_cl.tga", surface, true); diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp index b0434f3..fc75a99 100644 --- a/c++/contourgl/test.cpp +++ b/c++/contourgl/test.cpp @@ -233,18 +233,42 @@ void Test::test_gl_stencil(Environment &e, Data &data) { } void Test::test_sw(Environment &e, Data &data, Surface &surface) { - vector polyspans(data.size()); - { - Measure t("polyspans"); + const int warm_up_count = 1000; + const int measure_count = 1000; + Surface surface_tmp(surface.width, surface.height); + + // warm-up + for(int ii = 0; ii < warm_up_count; ++ii) { + vector polyspans(data.size()); for(int i = 0; i < (int)data.size(); ++i) { polyspans[i].init(0, 0, surface.width, surface.height); data[i].contour.to_polyspan(polyspans[i]); polyspans[i].sort_marks(); } + for(int i = 0; i < (int)data.size(); ++i) + SwRender::polyspan(surface_tmp, polyspans[i], data[i].color, data[i].evenodd, data[i].invert); } - { - Measure t("render"); + // measure + for(int ii = 0; ii < measure_count; ++ii) { + Measure t("render", false, true); + vector polyspans(data.size()); + for(int i = 0; i < (int)data.size(); ++i) { + polyspans[i].init(0, 0, surface.width, surface.height); + data[i].contour.to_polyspan(polyspans[i]); + polyspans[i].sort_marks(); + } + for(int i = 0; i < (int)data.size(); ++i) + SwRender::polyspan(surface_tmp, polyspans[i], data[i].color, data[i].evenodd, data[i].invert); + } + + { // draw + vector polyspans(data.size()); + for(int i = 0; i < (int)data.size(); ++i) { + polyspans[i].init(0, 0, surface.width, surface.height); + data[i].contour.to_polyspan(polyspans[i]); + polyspans[i].sort_marks(); + } for(int i = 0; i < (int)data.size(); ++i) SwRender::polyspan(surface, polyspans[i], data[i].color, data[i].evenodd, data[i].invert); }