diff --git a/c++/contourgl/Makefile b/c++/contourgl/Makefile
index 09ef1f2..c8d388c 100644
--- a/c++/contourgl/Makefile
+++ b/c++/contourgl/Makefile
@@ -43,7 +43,9 @@ SOURCES = \
utils.cpp
ifdef CUDA
- SOURCES += cudacontext.cpp
+ SOURCES += \
+ cudacontext.cpp \
+ cudarender.cpp
CUDA_SOURCES = \
cuda/contour.cu \
cuda/hello.cu
diff --git a/c++/contourgl/contourgl.cpp b/c++/contourgl/contourgl.cpp
index 06ddd49..9c24d70 100644
--- a/c++/contourgl/contourgl.cpp
+++ b/c++/contourgl/contourgl.cpp
@@ -98,10 +98,13 @@ int main() {
{ Surface surface(width, height);
Measure t("test_lineslow_cl2.tga", surface, true);
Test::test_cl2(e, datalow, surface); }
- */
{ Surface surface(width, height);
Measure t("test_lineslow_cl3.tga", surface, true);
Test::test_cl3(e, datalow, surface); }
+ */
+ { Surface surface(width, height);
+ Measure t("test_lineslow_cu.tga", surface, true);
+ Test::test_cu(e, datalow, surface); }
}
}
diff --git a/c++/contourgl/cuda/contour.cu b/c++/contourgl/cuda/contour.cu
index e69de29..db537b2 100644
--- a/c++/contourgl/cuda/contour.cu
+++ b/c++/contourgl/cuda/contour.cu
@@ -0,0 +1,151 @@
+/*
+ ......... 2018 Ivan Mahonin
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+extern "C" {
+
+#define ONE 65536
+#define TWO 131072 // (ONE)*2
+#define HALF 32768 // (ONE)/2
+#define ONE_F 65536.f // (float)(ONE)
+#define DIV_ONE_F 0.0000152587890625f // 1.f/(ONE_F)
+
+
+__global__ void clear(
+ int width,
+ int height,
+ int4 *marks )
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int c = id % width;
+ marks[id] = make_int4(0, 0, c | (c + 1), 0);
+}
+
+__global__ void path(
+ int width,
+ int height,
+ int *marks,
+ const float2 *points,
+ int begin,
+ int end,
+ int minx )
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x + begin;
+ if (id >= end) return;
+ float2 p0 = points[id];
+ float2 p1 = points[id + 1];
+
+ bool flipx = p1.x < p0.x;
+ bool flipy = p1.y < p0.y;
+ if (flipx) { p0.x = (float)width - p0.x; p1.x = (float)width - p1.x; }
+ if (flipy) { p0.y = (float)height - p0.y; p1.y = (float)height - p1.y; }
+
+ float2 d;
+ d.x = p1.x - p0.x;
+ d.y = p1.y - p0.y;
+ float kx = d.x/d.y;
+ float ky = d.y/d.x;
+ int w1 = width - 1;
+ int h1 = height - 1;
+
+ while(p0.x != p1.x || p0.y != p1.y) {
+ int ix = (int)p0.x;
+ int iy = max((int)p0.y, 0);
+ if (iy > h1) return;
+
+ float2 px, py;
+ px.x = (float)(ix + 1);
+ py.y = (float)(iy + 1);
+ ix = max(0, min(w1, ix));
+
+ px.y = p0.y + ky*(px.x - p0.x);
+ py.x = p0.x + kx*(py.y - p0.y);
+
+ float2 pp1 = p1;
+ if (pp1.x > px.x) pp1 = px;
+ if (pp1.y > py.y) pp1 = py;
+
+ float cover = (pp1.y - p0.y)*ONE_F;
+ float area = px.x - 0.5f*(p0.x + pp1.x);
+ if (flipx) { ix = w1 - ix; area = 1.f - area; }
+ if (flipy) { iy = h1 - iy; cover = -cover; }
+
+ int *row = marks + 4*iy*width;
+ atomicAdd(
+ (unsigned long long*)(row + 4*ix),
+ ((unsigned long long)(unsigned int)(int)(cover) << 32)
+ | (unsigned long long)(unsigned int)((int)(area*cover)) );
+ //row[4*ix] += (int)(area*cover);
+ //row[4*ix + 1] += (int)(cover);
+ //atomicAdd(row + 4*ix, (int)(area*cover));
+ //atomicAdd(row + 4*ix + 1, (int)(cover));
+
+ row += 2;
+ int iix = (ix & (ix + 1)) - 1;
+ while(iix >= minx) {
+ atomicMin(row + 4*iix, ix);
+ iix = (iix & (iix + 1)) - 1;
+ }
+
+ p0 = pp1;
+ }
+}
+
+__global__ void fill(
+ int width,
+ int4 *marks,
+ float4 *image,
+ float4 color,
+ int4 bounds )
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x + bounds.y;
+ if (id >= bounds.w) return;
+ id *= width;
+ marks += id;
+ image += id;
+
+ int4 *mark;
+ float4 *pixel;
+
+ int4 m;
+ int icover = 0, c0 = bounds.x, c1 = bounds.x;
+ while(c1 < bounds.z) {
+ if (abs(icover) > HALF)
+ while(c0 < c1)
+ image[c0++] = color;
+
+ mark = &marks[c1];
+ m = *mark;
+ *mark = make_int4(0, 0, c1 | (c1 + 1), 0);
+
+ float alpha = (float)abs(m.x + icover)*DIV_ONE_F;
+ float one_alpha = 1.f - alpha;
+
+ pixel = &image[c1];
+ float4 p = *pixel;
+ p.x = p.x*one_alpha + color.x*alpha;
+ p.y = p.y*one_alpha + color.y*alpha;
+ p.z = p.z*one_alpha + color.z*alpha;
+ p.w = p.w*one_alpha + color.w*alpha;
+ *pixel = p;
+
+ icover += m.y;
+ c0 = c1 + 1;
+ c1 = m.z;
+ }
+}
+
+}
\ No newline at end of file
diff --git a/c++/contourgl/cudacontext.cpp b/c++/contourgl/cudacontext.cpp
index 3901098..d9d978d 100644
--- a/c++/contourgl/cudacontext.cpp
+++ b/c++/contourgl/cudacontext.cpp
@@ -86,7 +86,7 @@ CudaContext::CudaContext():
err = cuCtxCreate(&context, CU_CTX_SCHED_AUTO, device);
assert(!err);
- hello();
+ //hello();
}
CudaContext::~CudaContext() {
diff --git a/c++/contourgl/cudarender.cpp b/c++/contourgl/cudarender.cpp
new file mode 100644
index 0000000..70eff45
--- /dev/null
+++ b/c++/contourgl/cudarender.cpp
@@ -0,0 +1,193 @@
+/*
+ ......... 2018 Ivan Mahonin
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#include
+
+#include
+#include
+
+#include "cudarender.h"
+#include "measure.h"
+
+
+using namespace std;
+
+
+CudaRender::CudaRender(CudaContext &cu):
+ cu(cu),
+ contour_module(),
+ contour_clear_kernel(),
+ contour_path_kernel(),
+ contour_fill_kernel(),
+ surface(),
+ points_buffer(),
+ mark_buffer(),
+ surface_image()
+{
+ cu.err = cuModuleLoad(&contour_module, "cuda/contour.ptx");
+ assert(!cu.err);
+
+ cu.err = cuModuleGetFunction(&contour_clear_kernel, contour_module, "clear");
+ assert(!cu.err);
+
+ cu.err = cuModuleGetFunction(&contour_path_kernel, contour_module, "path");
+ assert(!cu.err);
+
+ cu.err = cuModuleGetFunction(&contour_fill_kernel, contour_module, "fill");
+ assert(!cu.err);
+}
+
+CudaRender::~CudaRender() {
+ send_points(NULL, 0);
+ send_surface(NULL);
+
+ cu.err = cuModuleUnload(contour_module);
+ assert(!cu.err);
+}
+
+void CudaRender::send_surface(Surface *surface) {
+ if (this->surface) {
+ wait();
+
+ cu.err = cuMemFree(surface_image);
+ assert(!cu.err);
+ surface_image = 0;
+
+ cu.err = cuMemFree(mark_buffer);
+ assert(!cu.err);
+ mark_buffer = 0;
+ }
+
+ this->surface = surface;
+
+ if (this->surface) {
+ int zero_mark[4] = { };
+
+ cu.err = cuMemAlloc(&surface_image, surface->data_size());
+ assert(!cu.err);
+
+ cu.err = cuMemcpyHtoD(surface_image, surface->data, surface->data_size());
+ assert(!cu.err);
+
+ cu.err = cuMemAlloc(&mark_buffer, surface->count()*sizeof(zero_mark));
+ assert(!cu.err);
+
+ size_t group_size = 32;
+ size_t count = surface->count();
+ cu.err = cuLaunchKernel(
+ contour_clear_kernel,
+ (count - 1)/group_size + 1, 1, 1,
+ group_size, 1, 1,
+ 0, 0, 0,
+ CudaParams()
+ .add(surface->width)
+ .add(surface->height)
+ .add(mark_buffer)
+ .get_extra() );
+ assert(!cu.err);
+
+ wait();
+ }
+}
+
+Surface* CudaRender::receive_surface() {
+ if (surface) {
+ wait();
+ cu.err = cuMemcpyDtoH(surface->data, surface_image, surface->data_size());
+ assert(!cu.err);
+ }
+ return surface;
+}
+
+void CudaRender::send_points(const vec2f *points, int count) {
+ if (points_buffer) {
+ wait();
+ cu.err = cuMemFree(points_buffer);
+ assert(!cu.err);
+ points_buffer = 0;
+ }
+
+ if (points && count > 0) {
+ cu.err = cuMemAlloc(&points_buffer, count*sizeof(vec2f));
+ assert(!cu.err);
+
+ cu.err = cuMemcpyHtoD(points_buffer, points, count*sizeof(vec2f));
+ assert(!cu.err);
+ }
+}
+
+void CudaRender::draw(const Path &path) {
+ assert(surface);
+ assert(points_buffer);
+
+ ContextRect bounds;
+ bounds.minx = max(1, path.bounds.minx);
+ bounds.maxx = min(surface->width, path.bounds.maxx);
+ bounds.miny = max(0, path.bounds.miny);
+ bounds.maxy = min(surface->height, path.bounds.maxy);
+ if ( bounds.minx >= bounds.maxx
+ || bounds.miny >= bounds.maxy
+ || path.begin >= path.end ) return;
+
+ vec2i boundsx(bounds.minx, bounds.maxx);
+
+ size_t group_size, count;
+
+ count = path.end - path.begin;
+ group_size = 8;
+
+ count = (count - 1)/group_size + 1;
+ cu.err = cuLaunchKernel(
+ contour_path_kernel,
+ count, 1, 1,
+ group_size, 1, 1,
+ 0, 0, 0,
+ CudaParams()
+ .add(surface->width)
+ .add(surface->height)
+ .add(mark_buffer)
+ .add(points_buffer)
+ .add(path.begin)
+ .add(path.end)
+ .add(bounds.minx)
+ .get_extra() );
+ assert(!cu.err);
+
+ count = bounds.maxy - bounds.miny;
+ group_size = 1;
+
+ count = (count - 1)/group_size + 1;
+ cu.err = cuLaunchKernel(
+ contour_fill_kernel,
+ count, 1, 1,
+ group_size, 1, 1,
+ 0, 0, 0,
+ CudaParams()
+ .add(surface->width)
+ .add(mark_buffer)
+ .add(surface_image)
+ .add(path.color, 16)
+ .add(bounds, 16)
+ .get_extra() );
+ assert(!cu.err);
+}
+
+void CudaRender::wait() {
+ cu.err = cuStreamSynchronize(0);
+ assert(!cu.err);
+}
+
diff --git a/c++/contourgl/cudarender.h b/c++/contourgl/cudarender.h
new file mode 100644
index 0000000..7eae406
--- /dev/null
+++ b/c++/contourgl/cudarender.h
@@ -0,0 +1,66 @@
+/*
+ ......... 2018 Ivan Mahonin
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#ifndef _CUDARENDER_H_
+#define _CUDARENDER_H_
+
+#include
+
+#include "cudacontext.h"
+#include "geometry.h"
+#include "contour.h"
+#include "swrender.h"
+
+
+class CudaRender {
+public:
+ struct Path {
+ ContextRect bounds;
+ int begin;
+ int end;
+ Color color;
+ bool invert;
+ bool evenodd;
+ };
+
+private:
+ CudaContext &cu;
+ CUmodule contour_module;
+ CUfunction contour_clear_kernel;
+ CUfunction contour_path_kernel;
+ CUfunction contour_fill_kernel;
+
+ Surface *surface;
+ CUdeviceptr points_buffer;
+ CUdeviceptr mark_buffer;
+ CUdeviceptr surface_image;
+
+public:
+ CudaRender(CudaContext &cl);
+ ~CudaRender();
+
+ void send_surface(Surface *surface);
+ Surface* receive_surface();
+
+ void send_points(const vec2f *points, int count);
+
+ void draw(const Path &path);
+ void wait();
+};
+
+
+#endif
diff --git a/c++/contourgl/test.cpp b/c++/contourgl/test.cpp
index fc75a99..e32b29c 100644
--- a/c++/contourgl/test.cpp
+++ b/c++/contourgl/test.cpp
@@ -26,6 +26,9 @@
#include "utils.h"
#include "clrender.h"
+#ifdef CUDA
+#include "cudarender.h"
+#endif
using namespace std;
@@ -444,3 +447,74 @@ void Test::test_cl3(Environment &e, Data &data, Surface &surface) {
}
clr.receive_surface();
}
+
+void Test::test_cu(Environment &e, Data &data, Surface &surface) {
+#ifdef CUDA
+ // prepare data
+ vector paths;
+ vector points;
+ paths.reserve(data.size());
+ for(Data::const_iterator i = data.begin(); i != data.end(); ++i) {
+ if (!i->contour.get_chunks().empty()) {
+ CudaRender::Path path = {};
+ path.color = i->color;
+ path.invert = i->invert;
+ path.evenodd = i->evenodd;
+
+ path.bounds.minx = path.bounds.maxx = (int)floor(i->contour.get_chunks().front().p1.x);
+ path.bounds.miny = path.bounds.maxy = (int)floor(i->contour.get_chunks().front().p1.y);
+ path.begin = (int)points.size();
+ points.reserve(points.size() + i->contour.get_chunks().size() + 1);
+ for(Contour::ChunkList::const_iterator j = i->contour.get_chunks().begin(); j != i->contour.get_chunks().end(); ++j) {
+ int x = (int)floor(j->p1.x);
+ int y = (int)floor(j->p1.y);
+ if (path.bounds.minx > x) path.bounds.minx = x;
+ if (path.bounds.maxx < x) path.bounds.maxx = x;
+ if (path.bounds.miny > y) path.bounds.miny = y;
+ if (path.bounds.maxy < y) path.bounds.maxy = y;
+ points.push_back(vec2f(j->p1));
+ }
+ path.end = (int)points.size();
+ points.push_back( points[path.begin] );
+ ++path.bounds.maxx;
+ ++path.bounds.maxy;
+
+ paths.push_back(path);
+ }
+ }
+
+ // draw
+
+ CudaRender cur(e.cu);
+
+ // warm-up
+ cur.send_surface(&surface);
+ cur.send_points(&points.front(), (int)points.size());
+ for(int ii = 0; ii < 1000; ++ii)
+ for(vector::const_iterator i = paths.begin(); i != paths.end(); ++i)
+ cur.draw(*i);
+ cur.wait();
+
+ // measure
+ {
+ for(int ii = 0; ii < 1000; ++ii) {
+ Measure t("render", false, true);
+ for(vector::const_iterator i = paths.begin(); i != paths.end(); ++i)
+ cur.draw(*i);
+ cur.wait();
+ }
+ }
+ cur.send_points(NULL, 0);
+ cur.send_surface(NULL);
+
+ // actual task
+ cur.send_surface(&surface);
+ cur.send_points(&points.front(), (int)points.size());
+ {
+ for(vector::const_iterator i = paths.begin(); i != paths.end(); ++i)
+ cur.draw(*i);
+ cur.wait();
+ }
+ cur.receive_surface();
+#endif
+}
diff --git a/c++/contourgl/test.h b/c++/contourgl/test.h
index 6d3d3dd..a96d7c8 100644
--- a/c++/contourgl/test.h
+++ b/c++/contourgl/test.h
@@ -56,6 +56,7 @@ public:
static void test_cl(Environment &e, Data &data, Surface &surface);
static void test_cl2(Environment &e, Data &data, Surface &surface);
static void test_cl3(Environment &e, Data &data, Surface &surface);
+ static void test_cu(Environment &e, Data &data, Surface &surface);
};
#endif