Blame c++/contourgl/clrender.cpp

572d9c
/*
572d9c
    ......... 2015 Ivan Mahonin
572d9c
572d9c
    This program is free software: you can redistribute it and/or modify
572d9c
    it under the terms of the GNU General Public License as published by
572d9c
    the Free Software Foundation, either version 3 of the License, or
572d9c
    (at your option) any later version.
572d9c
572d9c
    This program is distributed in the hope that it will be useful,
572d9c
    but WITHOUT ANY WARRANTY; without even the implied warranty of
572d9c
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
572d9c
    GNU General Public License for more details.
572d9c
572d9c
    You should have received a copy of the GNU General Public License
572d9c
    along with this program.  If not, see <http: licenses="" www.gnu.org="">.</http:>
572d9c
*/
572d9c
f83e6b
#include <cassert></cassert>
f83e6b
f83e6b
#include <algorithm></algorithm>
f83e6b
572d9c
#include "clrender.h"
f83e6b
#include "measure.h"
572d9c
572d9c
572d9c
using namespace std;
572d9c
572d9c
f83e6b
ClRender::ClRender(ClContext &cl):
f83e6b
	cl(cl),
f83e6b
	contour_program(),
f83e6b
	contour_lines_kernel(),
f83e6b
	contour_fill_kernel(),
f83e6b
	surface(),
f83e6b
	rows_buffer(),
f83e6b
	mark_buffer(),
f83e6b
	surface_buffer(),
f83e6b
	rows_count(),
f83e6b
	even_rows_count(),
f83e6b
	odd_rows_count()
f83e6b
{
f83e6b
	contour_program = cl.load_program("contour.cl");
f83e6b
	contour_lines_kernel = clCreateKernel(contour_program, "lines", NULL);
f83e6b
	assert(contour_lines_kernel);
f83e6b
	contour_fill_kernel = clCreateKernel(contour_program, "fill", NULL);
f83e6b
	assert(contour_fill_kernel);
572d9c
}
572d9c
572d9c
ClRender::~ClRender() {
f83e6b
	send_surface(NULL);
f83e6b
	clReleaseKernel(contour_fill_kernel);
f83e6b
	clReleaseKernel(contour_lines_kernel);
f83e6b
	clReleaseProgram(contour_program);
f83e6b
}
f83e6b
f83e6b
void ClRender::send_surface(Surface *surface) {
f83e6b
	if (this->surface == surface) return;
f83e6b
f83e6b
	cl.err = clFinish(cl.queue);
f83e6b
	assert(cl.err);
f83e6b
f83e6b
	if (this->surface) {
f83e6b
		rows.clear();
f83e6b
		clReleaseMemObject(rows_buffer);
f83e6b
		clReleaseMemObject(mark_buffer);
f83e6b
		clReleaseMemObject(surface_buffer);
f83e6b
	}
f83e6b
f83e6b
	this->surface = surface;
f83e6b
f83e6b
	if (this->surface) {
f83e6b
		Measure t("ClRender::send_surface");
f83e6b
f83e6b
		rows_count = surface->height;
f83e6b
		even_rows_count = (rows_count+1)/2;
f83e6b
		odd_rows_count = rows_count - even_rows_count;
f83e6b
		rows.resize(rows_count);
f83e6b
f83e6b
		rows_buffer = clCreateBuffer(
f83e6b
			cl.context, CL_MEM_READ_ONLY,
f83e6b
			rows.size()*sizeof(rows.front()), NULL,
f83e6b
			NULL );
f83e6b
		assert(rows_buffer);
f83e6b
f83e6b
		mark_buffer = clCreateBuffer(
f83e6b
			cl.context, CL_MEM_READ_WRITE,
f83e6b
			surface->count()*sizeof(vec2f), NULL,
f83e6b
			NULL );
f83e6b
		assert(mark_buffer);
f83e6b
f83e6b
		surface_buffer = clCreateBuffer(
f83e6b
			cl.context, CL_MEM_READ_WRITE,
f83e6b
			surface->data_size(), NULL,
f83e6b
			NULL );
f83e6b
		assert(surface_buffer);
f83e6b
f83e6b
		cl.err |= clEnqueueWriteBuffer(
f83e6b
			cl.queue, surface_buffer, CL_TRUE,
f83e6b
			0, surface->data_size(), surface->data,
f83e6b
			0, NULL, NULL );
f83e6b
f83e6b
		cl.err |= clFinish(cl.queue);
f83e6b
		assert(cl.err);
f83e6b
	}
f83e6b
}
f83e6b
f83e6b
Surface* ClRender::receive_surface() {
f83e6b
	if (surface) {
f83e6b
		Measure t("ClRender::receive_surface");
f83e6b
f83e6b
		cl.err |= clFinish(cl.queue);
f83e6b
		cl.err |= clEnqueueReadBuffer(
f83e6b
			cl.queue, surface_buffer, CL_TRUE,
f83e6b
			0, sizeof(surface->data_size()), surface->data,
f83e6b
			0, NULL, NULL );
f83e6b
		cl.err |= clFinish(cl.queue);
f83e6b
		assert(cl.err);
f83e6b
	}
f83e6b
	return surface;
572d9c
}
572d9c
f83e6b
d989ab
void ClRender::contour(const Contour &contour, const Rect &rect, const Color &color, bool invert, bool evenodd) {
f83e6b
	Measure t("ClRender::contour");
f83e6b
f83e6b
	Contour transformed, splitted;
f83e6b
	Rect to(1.0, 1.0, surface->width - 1.0, surface->height - 1.0);
f83e6b
f83e6b
	{
f83e6b
		Measure t("clone");
f83e6b
		transformed = contour;
f83e6b
	}
f83e6b
f83e6b
	{
f83e6b
		Measure t("transform");
f83e6b
		transformed.transform(rect, to);
f83e6b
	}
f83e6b
f83e6b
	{
f83e6b
		Measure t("split");
d989ab
		transformed.allow_split_lines = true;
f83e6b
		transformed.split(splitted, to, Vector(0.5, 0.5));
f83e6b
	}
f83e6b
f83e6b
	vector<line2f> lines;</line2f>
f83e6b
	vector<line2f> sorted_lines;</line2f>
f83e6b
	vector<int> line_rows;</int>
f83e6b
f83e6b
	{
f83e6b
		Measure t("sort lines");
f83e6b
f83e6b
		// reset rows
f83e6b
		for(int i = 1; i < (int)rows_count; ++i)
f83e6b
			rows[i].second = 0;
f83e6b
f83e6b
		// count lines
f83e6b
		Vector prev;
f83e6b
		lines.reserve(splitted.get_chunks().size());
f83e6b
		line_rows.reserve(splitted.get_chunks().size());
f83e6b
		float x0 = (float)to.p0.x;
f83e6b
		float x1 = (float)to.p1.x;
f83e6b
		for(Contour::ChunkList::const_iterator i = splitted.get_chunks().begin(); i != splitted.get_chunks().end(); ++i) {
f83e6b
			if ( i->type == Contour::LINE
f83e6b
			  || i->type == Contour::CLOSE )
f83e6b
			{
f83e6b
				if (i->p1.y > to.p0.y && i->p1.y < to.p1.y) {
f83e6b
					line2f l(vec2f(prev), vec2f(i->p1));
f83e6b
					l.p0.x = min(max(l.p0.x, x0), x1);
f83e6b
					l.p1.x = min(max(l.p1.x, x0), x1);
f83e6b
					int row = (int)floorf(min(l.p0.y, l.p1.y));
f83e6b
					row = row % 2 ? row/2 : even_rows_count + row/2;
f83e6b
					line_rows.push_back(row);
f83e6b
					lines.push_back(l);
f83e6b
					++rows[row].second;
f83e6b
				}
f83e6b
			}
f83e6b
			prev = i->p1;
f83e6b
		}
f83e6b
f83e6b
		// calc rows offsets
f83e6b
		int lines_count = (int)lines.size();
f83e6b
		rows[0].first = rows[0].second;
f83e6b
		for(int i = 1; i < (int)rows_count; ++i)
f83e6b
			rows[i].first = rows[i-1].first + rows[i].second;
f83e6b
f83e6b
		// make sorted list
f83e6b
		sorted_lines.resize(lines_count);
f83e6b
		for(int i = 0; i < lines_count; ++i)
f83e6b
			sorted_lines[ --rows[line_rows[i]].first ] = lines[i];
f83e6b
	}
f83e6b
f83e6b
	cl_mem lines_buffer = NULL;
f83e6b
f83e6b
	{
f83e6b
		Measure t("create lines buffer");
f83e6b
f83e6b
		lines_buffer = clCreateBuffer(
f83e6b
			cl.context, CL_MEM_READ_ONLY,
f83e6b
			sorted_lines.size()*sizeof(sorted_lines.front()), NULL,
f83e6b
			NULL );
f83e6b
		assert(lines_buffer);
f83e6b
	}
f83e6b
f83e6b
	{
f83e6b
		Measure t("enqueue commands");
f83e6b
f83e6b
		clFinish(cl.queue);
f83e6b
f83e6b
		// kernel args
d989ab
		int width = surface->width;
f83e6b
d989ab
		cl.err |= clSetKernelArg(contour_lines_kernel, 0, sizeof(width), &width);
d989ab
		cl.err |= clSetKernelArg(contour_lines_kernel, 1, sizeof(lines_buffer), &lines_buffer);
d989ab
		cl.err |= clSetKernelArg(contour_lines_kernel, 2, sizeof(rows_buffer), &rows_buffer);
d989ab
		cl.err |= clSetKernelArg(contour_lines_kernel, 3, sizeof(mark_buffer), &mark_buffer);
f83e6b
		assert(!cl.err);
f83e6b
d989ab
		// TODO: invert, evenodd
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 0, sizeof(width), &width);
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 1, sizeof(mark_buffer), &mark_buffer);
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 2, sizeof(surface_buffer), &surface_buffer);
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 3, sizeof(Color::type), &color.r);
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 4, sizeof(Color::type), &color.g);
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 5, sizeof(Color::type), &color.b);
d989ab
		cl.err |= clSetKernelArg(contour_fill_kernel, 6, sizeof(Color::type), &color.a);
f83e6b
		assert(!cl.err);
f83e6b
f83e6b
		// prepare buffers
f83e6b
f83e6b
		cl_event prepare_buffers_events[3] = { };
f83e6b
f83e6b
		cl.err |= clEnqueueWriteBuffer(
f83e6b
			cl.queue, lines_buffer, CL_TRUE,
f83e6b
			0, sorted_lines.size()*sizeof(sorted_lines.front()), &sorted_lines.front(),
f83e6b
			0, NULL, &prepare_buffers_events[0] );
f83e6b
f83e6b
		cl.err |= clEnqueueWriteBuffer(
f83e6b
			cl.queue, rows_buffer, CL_TRUE,
f83e6b
			0, rows.size()*sizeof(rows.front()), &rows.front(),
f83e6b
			0, NULL, &prepare_buffers_events[1] );
f83e6b
f83e6b
		vec2f pattern;
f83e6b
		cl.err |= clEnqueueFillBuffer(
f83e6b
			cl.queue, mark_buffer,
f83e6b
			&pattern, sizeof(pattern),
f83e6b
			0, surface->count()*sizeof(vec2f),
f83e6b
			0, NULL, &prepare_buffers_events[2] );
f83e6b
f83e6b
		// run kernels
f83e6b
f83e6b
		cl_event lines_odd_event = NULL;
f83e6b
		cl.err = clEnqueueNDRangeKernel(
f83e6b
			cl.queue,
f83e6b
			contour_lines_kernel,
f83e6b
			1,
f83e6b
			NULL,
f83e6b
			&even_rows_count,
f83e6b
			NULL,
f83e6b
			3,
f83e6b
			prepare_buffers_events,
f83e6b
			&lines_odd_event );
f83e6b
		assert(!cl.err);
f83e6b
f83e6b
		cl_event lines_even_event = NULL;
f83e6b
		cl.err = clEnqueueNDRangeKernel(
f83e6b
			cl.queue,
f83e6b
			contour_lines_kernel,
f83e6b
			1,
f83e6b
			&even_rows_count,
f83e6b
			&odd_rows_count,
f83e6b
			NULL,
f83e6b
			1,
f83e6b
			&lines_odd_event,
f83e6b
			&lines_even_event );
f83e6b
		assert(!cl.err);
f83e6b
f83e6b
		cl_event fill_event = NULL;
f83e6b
		cl.err = clEnqueueNDRangeKernel(
f83e6b
			cl.queue,
f83e6b
			contour_fill_kernel,
f83e6b
			1,
f83e6b
			NULL,
f83e6b
			&rows_count,
f83e6b
			NULL,
f83e6b
			1,
f83e6b
			&lines_even_event,
f83e6b
			&fill_event );
f83e6b
		assert(!cl.err);
f83e6b
f83e6b
		clWaitForEvents(1, &fill_event);
f83e6b
	}
f83e6b
f83e6b
	{
f83e6b
		Measure t("release lines buffer");
f83e6b
		clReleaseMemObject(lines_buffer);
f83e6b
	}
572d9c
}
572d9c
572d9c
572d9c
572d9c
void SwRenderAlt::line(const Vector &p0, const Vector &p1) {
572d9c
	int iy0 = min(max((int)floor(p0.y), 0), height);
572d9c
	int iy1 = min(max((int)floor(p1.y), 0), height);
572d9c
	if (iy1 < iy0) swap(iy0, iy1);
572d9c
572d9c
	Vector d = p1 - p0;
572d9c
	Vector k( fabs(d.y) < 1e-6 ? 0.0 : d.x/d.y,
572d9c
		      fabs(d.x) < 1e-6 ? 0.0 : d.y/d.x );
572d9c
572d9c
	for(int r = iy0; r <= iy1; ++r) {
572d9c
		Real y = (Real)iy0;
572d9c
572d9c
		Vector pp0 = p0;
572d9c
		pp0.y -= y;
572d9c
		if (pp0.y < 0.0) {
572d9c
			pp0.y = 0.0;
572d9c
			pp0.x = p0.x - k.x*y;
572d9c
		} else
572d9c
		if (pp0.y > 1.0) {
572d9c
			pp0.y = 1.0;
572d9c
			pp0.x = p0.x - k.x*(y - 1.0);
572d9c
		}
572d9c
572d9c
		Vector pp1 = p1;
572d9c
		pp1.y -= y;
572d9c
		if (pp1.y < 0.0) {
572d9c
			pp1.y = 0.0;
572d9c
			pp1.x = p0.x - k.x*y;
572d9c
		} else
572d9c
		if (pp1.y > 1.0) {
572d9c
			pp1.y = 1.0;
572d9c
			pp1.x = p0.x - k.x*(y - 1.0);
572d9c
		}
572d9c
572d9c
		int ix0 = min(max((int)floor(pp0.x), 0), width);
572d9c
		int ix1 = min(max((int)floor(pp1.x), 0), width);
572d9c
		if (ix1 < ix0) swap(ix0, ix1);
572d9c
		for(int c = ix0; c <= ix1; ++c) {
572d9c
			Real x = (Real)ix0;
572d9c
572d9c
			Vector ppp0 = pp0;
572d9c
			ppp0.x -= x;
572d9c
			if (ppp0.x < 0.0) {
572d9c
				ppp0.x = 0.0;
572d9c
				ppp0.y = pp0.y - k.y*x;
572d9c
			} else
572d9c
			if (ppp0.x > 1.0) {
572d9c
				ppp0.x = 1.0;
572d9c
				ppp0.y = pp0.y - k.y*(x - 1.0);
572d9c
			}
572d9c
572d9c
			Vector ppp1 = pp1;
572d9c
			ppp1.x -= x;
572d9c
			if (ppp1.x < 0.0) {
572d9c
				ppp1.x = 0.0;
572d9c
				ppp1.y = pp0.y - k.y*x;
572d9c
			} else
572d9c
			if (ppp1.x > 1.0) {
572d9c
				ppp1.x = 1.0;
572d9c
				ppp1.y = pp0.y - k.y*(x - 1.0);
572d9c
			}
572d9c
572d9c
			Real cover = ppp0.y - ppp1.y;
572d9c
			Real area = (0.5*(ppp1.x + ppp1.x) - 1.0)*cover;
572d9c
			(*this)[r][c].add(area, cover);
572d9c
		}
572d9c
	}
572d9c
}