From e865c950724aa517cd4c7d814b2451fb1c07e847 Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Mar 16 2023 06:13:46 +0000 Subject: neural project --- diff --git a/projects/neural/build-trainer.sh b/projects/neural/build-trainer.sh new file mode 100755 index 0000000..72d6fa9 --- /dev/null +++ b/projects/neural/build-trainer.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e + + +if [ "$1" == "debug" ]; then + c++ -Wall -g -O0 -pthread trainer.cpp -lm -o trainer-dbg + echo done debug +else + c++ -Wall -DNDEBUG -O3 -pthread trainer.cpp -lm -o trainer + echo done release +fi + diff --git a/projects/neural/data b/projects/neural/data new file mode 120000 index 0000000..c5a9c9a --- /dev/null +++ b/projects/neural/data @@ -0,0 +1 @@ +../../simple/neural/data \ No newline at end of file diff --git a/projects/neural/layer.all.inc.cpp b/projects/neural/layer.all.inc.cpp new file mode 100644 index 0000000..2ccae82 --- /dev/null +++ b/projects/neural/layer.all.inc.cpp @@ -0,0 +1,9 @@ +#ifndef LAYER_ALL_INC_CPP +#define LAYER_ALL_INC_CPP + + +#include "layer.simple.inc.cpp" +#include "layer.conv.inc.cpp" + + +#endif diff --git a/projects/neural/layer.all.test.inc.cpp b/projects/neural/layer.all.test.inc.cpp new file mode 100644 index 0000000..8d42289 --- /dev/null +++ b/projects/neural/layer.all.test.inc.cpp @@ -0,0 +1,21 @@ +#ifndef LAYER_ALL_TEST_INC_CPP +#define LAYER_ALL_TEST_INC_CPP + + + +#include "layer.simple.test.inc.cpp" +#include "layer.conv.test.inc.cpp" + + +class AllTest: public Test { +public: + static bool test(const char *name = "all") { + Stage st(name); + SimpleTest::test(); + ConvTest::test(); + return st; + } +}; + + +#endif diff --git a/projects/neural/layer.conv.inc.cpp b/projects/neural/layer.conv.inc.cpp new file mode 100644 index 0000000..c0b6e82 --- /dev/null +++ b/projects/neural/layer.conv.inc.cpp @@ -0,0 +1,334 @@ +#ifndef LAYER_CONV_INC_CPP +#define LAYER_CONV_INC_CPP + + + +#include "layer.simple.inc.cpp" + + + +struct Kernel { + int sx, sy; + int dx, dy; + int ox, oy; + + inline Kernel(): + sx(), sy(), dx(), dy(), ox(), oy() { } + inline Kernel(int sx, int sy, int dx, int dy, int ox, int oy): + sx(sx), sy(sy), dx(dx), dy(dy), ox(ox), oy(oy) { } + inline Kernel(int s, int d, int o): + sx(s), sy(s), dx(d), dy(d), ox(o), oy(o) { } + inline operator bool() const + { return sx > 0 && sy > 0 && dx > 0 && dy > 0; } + + + void print(const char *prefix = nullptr) const { + if (prefix && *prefix) printf("%s: ", prefix); + printf("x(sdo): %d %d %d, y(sdo): %d %d %d\n", sx, dx, ox, sy, dy, oy); + } + void printYX(const char *prefix = nullptr) const { + if (prefix && *prefix) printf("%s: ", prefix); + printf("y(sdo): %d %d %d, x(sdo): %d %d %d\n", sy, dy, oy, sx, dx, ox); + } +}; + + +template +void iterateTestConvolution(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + int ci = (cy*cl.sx + cx)*cl.sz + cz; + Neuron &cn = c_neurons[ci]; + typename Iter::AccumType a = {}; + Iter::init(cn, a); + + for(int ky = 0; ky < k.sy; ++ky) + for(int kx = 0; kx < k.sx; ++kx) + for(int pz = pl.z0; pz < pl.z1; ++pz) { + int wi = ((cy - cl.y0)*cl.getW() + cx - cl.x0)*cl.getD() + cz - cl.z0; + wi = ((wi*k.sy + ky)*k.sx + kx)*pl.getD() + pz - pl.z0; + Weight &w = weights[wi]; + + int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx; + int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky; + int pi = (py*pl.sx + px)*pl.sz + pz; + Neuron &pn = p_neurons[pi]; + + Iter::iter(pn, w, a); + } + + Iter::done(cn, a); + } +} + + + + +template +void iterateConvolution(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(wl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(cl.isSubLayoutOf(wl)); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_swz = c_w*cl.sz; + int c_shxz = c_h*cl.sx*cl.sz; + int c_dx = cl.sz - c_d; + int c_dy = (cl.sx - c_w)*cl.sz; + + int p_d = pl.getD(); + int p_dx = k.dx*pl.sz; + int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; + + int k_sxd = k.sx*p_d; + int k_syxd = k.sy*k_sxd; + int p_ddy = (pl.sx - k.sx)*pl.sz; + int p_ddx = pl.sz - p_d; + + int w_w = wl.getW(); + int w_d = wl.getD(); + int w_dx = (w_d - c_d)*k_syxd; + int w_dy = (w_w - c_w)*w_d*k_syxd; + + int cx0 = cl.x0 - wl.x0; + int cy0 = cl.y0 - wl.y0; + int cz0 = cl.z0 - wl.z0; + + Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; + Neuron *ipn = p_neurons + ((pl.y0 + cy0*k.dy + k.oy)*pl.sx + pl.x0 + cx0*k.dx + k.ox)*pl.sz + pl.z0; + Weight *iw = weights + ((cy0*w_w + cx0)*w_d + cz0)*k_syxd; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy, iw += w_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, iw += w_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn) { + typename Iter::AccumType a; + Iter::init(*icn, a); + + Neuron *iipn = ipn; + for(Weight *e = iw + k_syxd; iw < e; iipn += p_ddy) + for(Weight *e = iw + k_sxd; iw < e; iipn += p_ddx) + for(Weight *e = iw + p_d; iw < e; ++iw, ++iipn) + Iter::iter(*iipn, *iw, a); + + Iter::done(*icn, a); + } +} + +template +void iterateConvolutionPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(wl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(cl.isSubLayoutOf(wl)); + assert(kx >= 0 && kx < k.sx); + assert(ky >= 0 && ky < k.sy); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_swz = c_w*cl.sz; + int c_shxz = c_h*cl.sx*cl.sz; + int c_dx = cl.sz - c_d; + int c_dy = (cl.sx - c_w)*cl.sz; + + int p_d = pl.getD(); + int p_dx = k.dx*pl.sz; + int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; + + int k_sxd = k.sx*p_d; + int k_syxd = k.sy*k_sxd; + + int w_w = wl.getW(); + int w_d = wl.getD(); + int w_dz = k_syxd - p_d; + int w_dx = (w_d - c_d)*k_syxd; + int w_dy = (w_w - c_w)*w_d*k_syxd; + + int cx0 = cl.x0 - wl.x0; + int cy0 = cl.y0 - wl.y0; + int cz0 = cl.z0 - wl.z0; + + Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; + Neuron *ipn = p_neurons + ((pl.y0 + cy0*k.dy + k.oy + ky)*pl.sx + pl.x0 + cx0*k.dx + k.ox + kx)*pl.sz + pl.z0; + Weight *iw = weights + ((cy0*w_w + cx0)*w_d + cz0)*k_syxd + ky*k_sxd + kx*p_d; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy, iw += w_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, iw += w_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d, iw += w_dz) + for(Neuron *e = ipn + p_d; ipn < e; ++ipn, ++iw) + Iter::iter2(*icn, *ipn, *iw); +} + + + +template +class LayerConv: public Layer { +public: + Kernel kernel; + + LayerConv(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): + Layer(&prev, layout, layout.getActiveCount()*kernel.sx*kernel.sy*prev.back().layout.getD(), weights), + kernel(kernel) + { + assert(kernel); + if (ownWeights) fillWeights(-1, 1); + } + + + void pass(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; } + static inline void done(Neuron &n, AccumType &a) { func(n, a.v); } + }; + iterateConvolution(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + } + + + void backpassWeights(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.d; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; } + }; + iterateConvolution(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + } + + + void backpassDeltas(Barrier &barrier) override { + struct I: public Iter { + static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.d * w.w; } + static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; } + }; + int ksx = kernel.sx, ksy = kernel.sy; + for(int kx = 0; kx < ksx; ++kx) + for(int ky = 0; ky < ksy; ++ky) { + iterateConvolutionPoint(mtLayouts[barrier.tid], prev->layout, layout, kernel, kx, ky, neurons, prev->neurons, weights); + barrier.wait(); + } + iterateNeurons(prev->mtLayouts[barrier.tid], prev->neurons); + } + + + void testPass() override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; } + static inline void done(Neuron &n, AccumType &a) { func(n, a.v); } + }; + iterateTestConvolution(layout, prev->layout, kernel, neurons, prev->neurons, weights); + } + + + void testBackpass() override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.d; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; w.w += a.v * n.v; } + static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; } + }; + clearAccum(); + iterateTestConvolution(layout, prev->layout, kernel, neurons, prev->neurons, weights); + iterateNeurons(prev->layout, prev->neurons); + clearAccum(); + } +}; + + + +template +class LayerDeconv: public Layer { +public: + Kernel kernel; + + LayerDeconv(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): + Layer(&prev, layout, prev.back().layout.getActiveCount()*kernel.sx*kernel.sy*layout.getD(), weights), + kernel(kernel) + { + assert(kernel); + if (ownWeights) fillWeights(-1, 1); + } + + + void pass(Barrier &barrier) override { + struct I: public Iter { + static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.v * w.w; } + static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; } + }; + int k_sx = kernel.sx, k_sy = kernel.sy; + for(int kx = 0; kx < k_sx; ++kx) + for(int ky = 0; ky < k_sy; ++ky) { + iterateConvolutionPoint(prev->mtLayouts[barrier.tid], layout, prev->layout, kernel, kx, ky, prev->neurons, neurons, weights); + barrier.wait(); + } + iterateNeurons(mtLayouts[barrier.tid], neurons); + } + + + void backpassWeights(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.v; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; } + }; + iterateConvolution(prev->mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + } + + + void backpassDeltas(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; } + static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; } + }; + iterateConvolution(prev->mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + } + + + void testPass() override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.v; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; } + static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; } + }; + clearAccum(); + iterateTestConvolution(prev->layout, layout, kernel, prev->neurons, neurons, weights); + iterateNeurons(layout, neurons); + clearAccum(); + } + + + void testBackpass() override { + struct I: public Iter { + struct AccumType: public Accum { NeuronReal vv; }; + static inline void init(Neuron &n, AccumType &a) { a.v = 0; a.vv = n.v; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; w.w += n.d * a.vv; } + static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; } + }; + iterateTestConvolution(prev->layout, layout, kernel, prev->neurons, neurons, weights); + } +}; + +#endif diff --git a/projects/neural/layer.conv.test.inc.cpp b/projects/neural/layer.conv.test.inc.cpp new file mode 100644 index 0000000..7124b30 --- /dev/null +++ b/projects/neural/layer.conv.test.inc.cpp @@ -0,0 +1,171 @@ +#ifndef LAYER_CONV_TEST_INC_CPP +#define LAYER_CONV_TEST_INC_CPP + + + +#include "layer.test.inc.cpp" +#include "layer.conv.inc.cpp" + + +class ConvTest: public Test { +public: + static void init(const Layout &cl, const Layout &pl, const Kernel &k, bool shared = false) + { Test::init(cl.getCount(), pl.getCount(), (shared ? 1 : cl.getActiveCount())*k.sx*k.sy*pl.getD()); } + + + static bool verifyWeights(const char *name, const Layout &cl, const Layout &pl, const Kernel &k) { + Stage st(name); + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + int ci = (cy*cl.sx + cx)*cl.sz + cz; + for(int ky = 0; ky < k.sy; ++ky) + for(int kx = 0; kx < k.sx; ++kx) + for(int pz = pl.z0; pz < pl.z1; ++pz) { + int wi = ((cy - cl.y0)*cl.getW() + cx - cl.x0)*cl.getD() + cz - cl.z0; + wi = ((wi*k.sy + ky)*k.sx + kx)*pl.getD() + pz - pl.z0; + + int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx; + int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky; + if ( px < pl.x0 || px >= pl.x1 + || py < pl.y0 || py >= pl.y1 ) continue; + + int pi = (py*pl.sx + px)*pl.sz + pz; + + int s = (int)p_neurons.size(); + int w = weights[wi].i; + int i = ci*s + pi + 1; + + if (w != i) { + int ww = w; + int wpz = ww%pl.sz; ww /= pl.sz; + int wpx = ww%pl.sx; ww /= pl.sx; + int wpy = ww%pl.sy; ww /= pl.sy; + int wcz = ww%cl.sz; ww /= cl.sz; + int wcx = ww%cl.sx; ww /= cl.sx; + int wcy = ww; + + printf( + "wrong index: %d = ((%d*%d + %d)*%d + %d)*%d + (%d*%d + %d)*%d + %d + 1,\n" + "expected: %d = ((%d*%d + %d)*%d + %d)*%d + (%d*%d + %d)*%d + %d + 1\n" + "wi %d, ky %d, kx %d \n", + w, + wcy, cl.sx, wcx, cl.sz, wcz, s, + wpy, pl.sx, wpx, pl.sz, wpz, + i, + cy, cl.sx, cx, cl.sz, cz, s, + py, pl.sx, px, pl.sz, pz, + wi, ky, kx ); + pl.printYXZ("prev layout"); + cl.printYXZ("curr layout"); + k.printYX("kernel"); + ++errors; + return st; + } + } + } + return st; + } + + static bool testIterators(const char *name, const Layout &cl, const Layout &pl, const Kernel &k, int threads) { + Stage st(name); + + assert(cl && pl && k && threads > 0); + Layout::List clist, plist; + cl.split(clist, threads); + pl.split(plist, threads); + + struct I: public Iter { + static inline void init(Neuron &n, Iter::AccumType &a) { ++n.a.i; a.i = (AccumInt)(&n - c_neurons.data()); } + static inline void iter(Neuron &n, Weight &w, Iter::AccumType &a) { + if (w.i) + ++errors; + w.i = (WeightInt)(&n - p_neurons.data() + a.i*p_neurons.size() + 1); + } + static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { + if (w.i) + ++errors; + w.i = (WeightInt)((&cn - c_neurons.data())*p_neurons.size() + &pn - p_neurons.data() + 1); + pn.v = pn.v + 1; + } + }; + + if (threads == 1) { + Stage st("iterateTestConvolution"); + init(cl, pl, k); + iterateTestConvolution(cl, pl, k, c_neurons.data(), p_neurons.data(), weights.data()); + verifyNeurons("conv-neurons", cl, c_neurons.data()); + verifyWeights("conv-weights", cl, pl, k); + } + + { + Stage st("iterateConvolution"); + init(cl, pl, k); + for(int i = 0; i < threads; ++i) + iterateConvolution(clist[i], pl, cl, k, c_neurons.data(), p_neurons.data(), weights.data()); + verifyNeurons("conv-neurons", cl, c_neurons.data()); + verifyWeights("conv-weights", cl, pl, k); + } + + { + Stage st("iterateConvolutionPoint"); + init(cl, pl, k); + int e = errors; + for(int ky = 0; ky < k.sy && errors == e; ++ky) + for(int kx = 0; kx < k.sx && errors == e; ++kx) { + for(int i = 0; i < threads; ++i) + iterateConvolutionPoint(clist[i], pl, cl, k, kx, ky, c_neurons.data(), p_neurons.data(), weights.data()); + if (!verifyNeuronsAccum(pl, p_neurons.data(), cl.getD(), true)) + printf("kx: %d, ky: %d\n", kx, ky), k.printYX("kernel"); + } + verifyNeurons("conv-neurons", pl, p_neurons.data(), true); + verifyWeights("conv-weights", cl, pl, k); + } + + return st; + } + + static bool testIterators(const char *name, const Layout &cl, const Layout &pl, const Kernel &k) { + Stage st(name); + testIterators( "single-thread", cl, pl, k, 1 ); + testIterators( "2-threads", cl, pl, k, 2 ); + testIterators( "7-threads", cl, pl, k, 7 ); + testIterators( "8-threads", cl, pl, k, 8 ); + testIterators( "512-threads", cl, pl, k, 512 ); + return st; + } + + static bool test(const char *name, const Layout &cl, const Layout &pl, const Kernel &k) { + Stage st(name); + + testIterators("iterators", cl, pl, k); + + { + Layer l(nullptr, pl); + new LayerConv(l, cl, k); + Test::testLayer("LayerConv", l); + } + + { + Layer l(nullptr, pl); + new LayerConv(l, cl, k); + Test::testLayer("LayerDeconv", l); + } + + return st; + } + + static bool test(const char *name = "convolution") { + Stage st(name); + test( "square", Layout(64, 64, 4), Layout(128, 128, 4).expandXY(2), Kernel(5, 2, -2) ); + test( "rect1", Layout(63, 43, 5), Layout( 63, 85, 3).expandX(2). expandY(3), Kernel(5, 7, 1, 2, -2, -3) ); + test( "rect2", Layout(43, 63, 3), Layout( 85, 63, 5).expandX(3). expandY(3, 2), Kernel(7, 5, 2, 1, -3, -2) ); + test( "rect3", Layout(64, 48, 5), Layout( 64, 96, 3).expandX(1, 2).expandY(3, 1), Kernel(4, 6, 1, 2, -1, -3) ); + test( "pad", Layout(64, 48, 5).expandX(3, 4).expandY(4, 3).expandZ(5, 4), + Layout(64, 96, 3).expandX(6, 5).expandY(7, 6).expandZ(0, 1), Kernel(4, 6, 1, 2, -1, -3) ); + return st; + } +}; + + +#endif diff --git a/projects/neural/layer.inc.cpp b/projects/neural/layer.inc.cpp new file mode 100644 index 0000000..ad7c516 --- /dev/null +++ b/projects/neural/layer.inc.cpp @@ -0,0 +1,223 @@ +#ifndef LAYER_INC_CPP +#define LAYER_INC_CPP + + +#include +#include +#include +#include +#include + +#include +#include +#include + + +#include "layout.inc.cpp" + + + +typedef double WeightReal; +typedef double NeuronReal; +typedef double AccumReal; + +typedef int WeightInt; +typedef int AccumInt; + + +#define RANDOM_MAX 0x7fffffff +inline unsigned int randomNext(unsigned int prev) + { return (1103515245*prev + 12345) & RANDOM_MAX; } + + +struct Accum { + union { AccumReal v; AccumInt i; }; +}; + + +struct Neuron { + NeuronReal v, d; + Accum a; +}; + + +struct Weight { + union { WeightReal w; WeightInt i; }; +}; + + +struct Iter { + typedef Accum AccumType; + typedef NeuronReal* DataType; + typedef AccumType DataAccumType; + static inline void init(Neuron&, AccumType&) { } + static inline void iter(Neuron&, Weight&, AccumType&) { } + static inline void done(Neuron&, AccumType&) { } + static inline void iter2(Neuron&, Neuron&, Weight&) { } + static inline void iter3(Neuron&) { } + static inline void iter4(Neuron&, DataType, DataAccumType&) { } +}; + + +class Barrier { +private: + std::atomic &counter; + unsigned int next; +public: + const unsigned int tid; + const unsigned int threads; + + Barrier(const Barrier&) = delete; + inline Barrier(std::atomic &counter, unsigned int tid, unsigned int threads): + counter(counter), next(), tid(tid), threads(threads) { assert(tid < threads); } + inline void wait() { next += threads; ++counter; while(counter < next); } + inline void subwait() { while(counter < next + tid); } +}; + + +struct Stat { + int neurons; + int activeNeurons; + int weights; + int links; + size_t memsize; + + Stat(): neurons(), activeNeurons(), weights(), links(), memsize() { } + + Stat& operator+= (const Stat &b) { + neurons += b.neurons; + activeNeurons += b.activeNeurons; + weights += b.weights; + links += b.links; + memsize += b.memsize; + return *this; + } + + void print(const char *prefix = nullptr) const { + if (prefix && *prefix) printf("%s: ", prefix); + printf("neurons: %d / %d, links %d / %d, memSize: %llu\n", activeNeurons, neurons, weights, links, (unsigned long long)memsize); + } +}; + + +class Layer { +public: + Layer *prev, *next; + + Layout layout; + + Neuron *neurons; + int neuronsCount; + + Weight *weights; + int weightsCount; + bool ownWeights; + + const char *filename; + + Stat stat; + + Layout::List mtLayouts; + + + Layer(Layer *prev, const Layout &layout, int weightsCount = 0, Weight *weights = nullptr): + prev(prev ? &prev->back() : nullptr), + next(), + layout(layout), + neurons(), + neuronsCount(layout.getCount()), + weights(weights), + weightsCount(weightsCount), + ownWeights(!weights && weightsCount), + filename() + { + assert(layout); + assert(neuronsCount > 0); + assert(weightsCount >= 0); + assert(!prev == !weightsCount); + + if (this->prev) this->prev->next = this; + if (neuronsCount) { + neurons = new Neuron[neuronsCount]; + memset(neurons, 0, sizeof(*neurons)*neuronsCount); + } + if (ownWeights) { + this->weights = new Weight[weightsCount]; + memset(this->weights, 0, sizeof(*this->weights)*weightsCount); + } + + stat.neurons = neuronsCount; + stat.activeNeurons = layout.getActiveCount(); + stat.weights = weightsCount; + stat.links = weightsCount; + stat.memsize = neuronsCount*sizeof(*neurons); + if (ownWeights) stat.memsize += weightsCount*sizeof(*weights); + } + + + virtual ~Layer() { + if (next) delete next; + if (neurons) delete[] neurons; + if (ownWeights) delete[] weights; + } + + + inline Layer& front() + { Layer *l = this; while(l->prev) l = l->prev; return *l; } + inline Layer& back() + { Layer *l = this; while(l->next) l = l->next; return *l; } + inline Stat sumStat() const + { Stat s; for(const Layer *l = this; l; l = l->next) s += l->stat; return s; } + + bool save() const { + if (filename && weightsCount) { + FILE *f = fopen(filename, "wb"); + if (!f) + return printf("cannot open file for write: %s\n", filename), false; + if (!fwrite(weights, sizeof(*weights)*weightsCount, 1, f)) + return fclose(f), printf("cannot write to file: %s\n", filename), false; + fclose(f); + } + return !next || next->save(); + } + + + bool load() { + if (filename && weightsCount) { + FILE *f = fopen(filename, "rb"); + if (!f) + return printf("cannot open file for read: %s\n", filename), false; + if (!fread(weights, sizeof(*weights)*weightsCount, 1, f)) + return fclose(f), printf("cannot read from file: %s\n", filename), false; + fclose(f); + } + return !next || next->load(); + } + + + void clearAccum() { + Accum a = {}; + for(Neuron *in = neurons, *e = in + neuronsCount; in < e; ++in) + in->a = a; + } + + + void fillWeights(WeightReal wmin, WeightReal wmax) { + WeightReal k = (wmax - wmin)/RAND_MAX; + for(Weight *iw = weights, *e = iw + weightsCount; iw < e; ++iw) + iw->w = rand()*k + wmin; + } + + + virtual void split(int threadsCount) + { layout.split(mtLayouts, threadsCount); } + virtual void pass(Barrier &barrier) { } + virtual void backpassWeights(Barrier &barrier) { } + virtual void backpassDeltas(Barrier &barrier) { } + + virtual void testPass() { } + virtual void testBackpass() { } +}; + + +#endif diff --git a/projects/neural/layer.simple.inc.cpp b/projects/neural/layer.simple.inc.cpp new file mode 100644 index 0000000..1f2a7b7 --- /dev/null +++ b/projects/neural/layer.simple.inc.cpp @@ -0,0 +1,309 @@ +#ifndef LAYER_SIMPLE_INC_CPP +#define LAYER_SIMPLE_INC_CPP + + +#include "layer.inc.cpp" + + +typedef void Func(Neuron &n, AccumReal s); + + +inline void funcSigmoidExp(Neuron &n, AccumReal s) { + //if (s > 5) s = 5; else if (s < -5) s = -5; + AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = ss * (1-ss); +} + + +template +inline void iterateNeurons(const Layout &l, Neuron *neurons) { + if (!l) return; + assert(neurons); + + int h = l.y1 - l.y0; + int w = l.x1 - l.x0; + int d = l.z1 - l.z0; + int sz = l.sz; + int sxz = l.sx*sz; + int swz = w*sz; + int shxz = h*sxz; + int dy = sxz - swz; + int dx = sz - d; + + Neuron *in = neurons + l.y0*sxz + l.x0*sz + l.z0; + + for(Neuron *e = in + shxz; in < e; in += dy) + for(Neuron *e = in + swz; in < e; in += dx) + for(Neuron *e = in + d; in < e; ++in) + Iter::iter3(*in); +} + + +template +inline void iterateNeurons2(Layout l, Layout dl, Neuron *neurons, typename Iter::DataType data, int stride = 1, typename Iter::DataAccumType *accum = nullptr) { + if (!l) return; + assert(dl); + assert(neurons); + assert(l.isSubLayoutOf(dl)); + + int h = l.getH(); + int w = l.getW(); + int d = l.getD(); + int sxz = l.sx*l.sz; + int swz = w*l.sz; + int shxz = h*sxz; + int dy = sxz - swz; + int dx = l.sz - d; + + int d_w = dl.getW(); + int d_d = dl.getD(); + int d_dx = (d_d - d)*stride; + int d_dy = (d_w - w)*d_d*stride; + + Neuron *in = neurons + l.y0*sxz + l.x0*l.sz + l.z0; + data += (((l.y0 - dl.y0)*d_w + l.x0 - dl.x0)*d_d + l.z0 - dl.z0)*stride; + + for(Neuron *e = in + shxz; in < e; in += dy, data += d_dy) + for(Neuron *e = in + swz; in < e; in += dx, data += d_dx) + for(Neuron *e = in + d; in < e; ++in, data += stride) + Iter::iter4(*in, data, *accum); +} + + +template +inline void iterateSimple(Layout cl, Layout pl, Layout wl, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(wl); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(cl.isSubLayoutOf(wl)); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_sxz = cl.sx*cl.sz; + int c_swz = c_w*cl.sz; + int c_shxz = c_h*c_sxz; + int c_dy = c_sxz - c_swz; + int c_dx = cl.sz - c_d; + + int p_h = pl.getH(); + int p_w = pl.getW(); + int p_d = pl.getD(); + int p_sxz = pl.sx*pl.sz; + int p_swz = p_w*pl.sz; + int p_shxz = p_h*p_sxz; + int p_dy = p_sxz - p_swz; + int p_dx = pl.sz - p_d; + + int w_w = wl.getW(); + int w_d = wl.getD(); + int w_dz = p_h*p_w*p_d; + int w_dx = (w_d - c_d)*w_dz; + int w_dy = (w_w - c_w)*w_d*w_dz; + + Neuron *icn = c_neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0); + p_neurons += pl.y0*p_sxz + pl.x0*pl.sz + pl.z0; + + Weight *iw = weights + (((cl.y0 - wl.y0)*w_w + cl.x0 - wl.x0)*w_d + cl.z0 - wl.z0)*w_dz; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, iw += w_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, iw += w_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn) { + typename Iter::AccumType a; + Iter::init(*icn, a); + + Neuron *ipn = p_neurons; + for(Neuron *e = ipn + p_shxz; ipn < e; ipn += p_dy) + for(Neuron *e = ipn + p_swz; ipn < e; ipn += p_dx) + for(Neuron *e = ipn + p_d; ipn < e; ++ipn, ++iw) + Iter::iter(*ipn, *iw, a); + + Iter::done(*icn, a); + } +} + + +template +void iterateSimpleInv(Layout cl, Layout pl, Layout wl, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(wl); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(cl.isSubLayoutOf(wl)); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_sxz = cl.sx*cl.sz; + int c_swz = c_w*cl.sz; + int c_shxz = c_h*c_sxz; + int c_dy = c_sxz - c_swz; + int c_dx = cl.sz - c_d; + + int p_h = pl.getH(); + int p_w = pl.getW(); + int p_d = pl.getD(); + int p_sxz = pl.sx*pl.sz; + int p_swz = p_w*pl.sz; + int p_shxz = p_h*p_sxz; + int p_dy = p_sxz - p_swz; + int p_dx = pl.sz - p_d; + + int w_w = wl.getW(); + int w_h = wl.getH(); + int w_d = wl.getD(); + int w_ddz = w_h*w_w*w_d; + int w_dx = w_d - c_d; + int w_dy = (w_w - c_w)*w_d; + + Neuron *icn = c_neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0); + p_neurons += pl.y0*p_sxz + pl.x0*pl.sz + pl.z0; + + Weight *iw = weights + ((cl.y0 - wl.y0)*w_w + cl.x0 - wl.x0)*w_d + cl.z0 - wl.z0; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, iw += w_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, iw += w_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn, ++iw) { + typename Iter::AccumType a; + Iter::init(*icn, a); + + Weight *iiw = iw; + Neuron *ipn = p_neurons; + for(Neuron *e = ipn + p_shxz; ipn < e; ipn += p_dy) + for(Neuron *e = ipn + p_swz; ipn < e; ipn += p_dx) + for(Neuron *e = ipn + p_d; ipn < e; ++ipn, iiw += w_ddz) + Iter::iter(*ipn, *iiw, a); + + Iter::done(*icn, a); + } +} + + +Layout optimizeLayoutSimple(const Layout &layout) { + Layout l = layout; + if (l.x0 == 0 && l.x1 == l.sx) + { l.x0 = l.y0*l.sx; l.x1 *= l.y1; l.sx *= l.sy; l.y0 = 0; l.y1 = l.sy = 1; } + if (l.z0 == 0 && l.z1 == l.sz) + { l.z0 = l.x0*l.sz; l.z1 *= l.x1; l.sz *= l.sx; l.x0 = 0; l.x1 = l.sx = 1; } + return l; +} + + +template +class LayerSimple: public Layer { +public: + Layout optLayout; + Layout prevOptLayout; + Layout::List mtOptLayouts; + Layout::List mtPrevOptLayouts; + + + LayerSimple(Layer &prev, const Layout &layout, Weight *weights = nullptr): + Layer(&prev, layout, layout.getActiveCount() * prev.back().layout.getActiveCount(), weights), + optLayout(optimizeLayoutSimple(layout)), + prevOptLayout(optimizeLayoutSimple(this->prev->layout)) + { + if (ownWeights) fillWeights(-1, 1); + } + + + void split(int threadsCount) override { + Layer::split(threadsCount); + optLayout.split(mtOptLayouts, threadsCount); + prevOptLayout.split(mtPrevOptLayouts, threadsCount); + } + + + void pass(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; } + static inline void done(Neuron &n, AccumType &a) { func(n, a.v); } + }; + iterateSimple(mtOptLayouts[barrier.tid], prevOptLayout, optLayout, neurons, prev->neurons, weights); + } + + + void backpassWeights(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.d; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; } + }; + iterateSimple(mtOptLayouts[barrier.tid], prevOptLayout, optLayout, neurons, prev->neurons, weights); + } + + + void backpassDeltas(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; } + static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; } + }; + iterateSimpleInv(mtPrevOptLayouts[barrier.tid], optLayout, prevOptLayout, prev->neurons, neurons, weights); + } + + + void testPass() override { + Layout cl = layout; + Layout pl = prev->layout; + + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + AccumReal a = 0; + Neuron &cn = neurons[ (cy*cl.sx + cx)*cl.sz + cz ]; + int wi = ((cy-cl.y0)*cl.getW() + cx-cl.x0)*cl.getD() + cz-cl.z0; + + for(int py = pl.y0; py < pl.y1; ++py) + for(int px = pl.x0; px < pl.x1; ++px) + for(int pz = pl.z0; pz < pl.z1; ++pz) { + Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ]; + int wii = ((wi*pl.getH() + py-pl.y0)*pl.getW() + px-pl.x0)*pl.getD() + pz-pl.z0; + Weight &w = weights[wii]; + a += pn.v * w.w; + } + + func(cn, a); + } + } + + + void testBackpass() override { + Layout cl = layout; + Layout pl = prev->layout; + + for(int i = 0; i < prev->neuronsCount; ++i) + prev->neurons[i].a.v = 0; + + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + Neuron &cn = neurons[ (cy*cl.sx + cx)*cl.sz + cz ]; + int wi = ((cy-cl.y0)*cl.getW() + cx-cl.x0)*cl.getD() + cz-cl.z0; + + for(int py = pl.y0; py < pl.y1; ++py) + for(int px = pl.x0; px < pl.x1; ++px) + for(int pz = pl.z0; pz < pl.z1; ++pz) { + Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ]; + int wii = ((wi*pl.getH() + py-pl.y0)*pl.getW() + px-pl.x0)*pl.getD() + pz-pl.z0; + Weight &w = weights[wii]; + + pn.a.v += w.w * cn.d; + w.w += pn.v * cn.d; + } + } + + for(int i = 0; i < prev->neuronsCount; ++i) { + Neuron &pn = prev->neurons[i]; + pn.d *= pn.a.v; + pn.a.v = 0; + } + } +}; + + +#endif diff --git a/projects/neural/layer.simple.test.inc.cpp b/projects/neural/layer.simple.test.inc.cpp new file mode 100644 index 0000000..7fef70e --- /dev/null +++ b/projects/neural/layer.simple.test.inc.cpp @@ -0,0 +1,192 @@ +#ifndef LAYER_SIMPLE_TEST_INC_CPP +#define LAYER_SIMPLE_TEST_INC_CPP + + +#include "layer.test.inc.cpp" +#include "layer.simple.inc.cpp" + + +class SimpleTest: public Test { +public: + static void init(const Layout &cl, const Layout &pl = Layout()) + { Test::init(cl.getCount(), pl.getCount(), cl.getActiveCount()*pl.getActiveCount()); } + + + static bool verifyWeights(const char *name, const Layout &cl, const Layout &pl) { + Stage st(name); + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + int ci = (cy*cl.sx + cx)*cl.sz + cz; + + for(int py = pl.y0; py < pl.y1; ++py) + for(int px = pl.x0; px < pl.x1; ++px) + for(int pz = pl.z0; pz < pl.z1; ++pz) { + int pi = (py*pl.sx + px)*pl.sz + pz; + + int wi = ((cy - cl.y0)*cl.getW() + cx - cl.x0)*cl.getD() + cz - cl.z0; + wi = ((wi*pl.getH() + py - pl.y0)*pl.getW() + px - pl.x0)*pl.getD() + pz - pl.z0; + + int s = (int)p_neurons.size(); + int w = weights[wi].i; + int i = ci*s + pi + 1; + + if (w != i) { + int ww = w; + int wpz = ww%pl.sz; ww /= pl.sz; + int wpx = ww%pl.sx; ww /= pl.sx; + int wpy = ww%pl.sy; ww /= pl.sy; + int wcz = ww%cl.sz; ww /= cl.sz; + int wcx = ww%cl.sx; ww /= cl.sx; + int wcy = ww; + + printf( + "wrong index: %d = ((%d*%d + %d)*%d + %d)*%d + (%d*%d + %d)*%d + %d + 1,\n" + "expected: %d = ((%d*%d + %d)*%d + %d)*%d + (%d*%d + %d)*%d + %d + 1\n" + "wi = %d\n", + w, + wcy, cl.sx, wcx, cl.sz, wcz, s, + wpy, pl.sx, wpx, pl.sz, wpz, + i, + cy, cl.sx, cx, cl.sz, cz, s, + py, pl.sx, px, pl.sz, pz, + wi ); + pl.printYXZ("prev layout"); + cl.printYXZ("curr layout"); + ++errors; + return st; + } + } + } + return st; + } + + + static bool testIterators(const char *name, const Layout &cl, const Layout &pl, const Layout &ocl, const Layout &opl, int threads) { + Stage st(name); + + assert(cl && pl && ocl && opl && threads > 0); + Layout::List oclist, oplist; + ocl.split(oclist, threads); + opl.split(oplist, threads); + + struct I: public Iter { + typedef int DataType; + typedef int DataAccumType; + static inline void init(Neuron &n, Iter::AccumType &a) { ++n.a.i; a.i = (AccumInt)(&n - c_neurons.data()); } + static inline void iter(Neuron &n, Weight &w, Iter::AccumType &a) { + if (w.i) + ++errors; + w.i = (WeightInt)(&n - p_neurons.data() + a.i*p_neurons.size() + 1); + } + static inline void iter3(Neuron &n) { ++n.a.i; } + static inline void iter4(Neuron &n, DataType d, DataAccumType &a) { n.a.i = d; ++a; } + }; + + struct IB: public Iter { + static inline void init(Neuron &n, Iter::AccumType &a) { ++n.a.i; a.i = (AccumInt)(&n - p_neurons.data()); } + static inline void iter(Neuron &n, Weight &w, Iter::AccumType &a) { + if (w.i) + ++errors; + w.i = (WeightInt)((&n - c_neurons.data())*p_neurons.size() + a.i + 1); + } + }; + + { + Stage st("iterateNeurons"); + init(cl); + for(int i = 0; i < threads; ++i) + iterateNeurons(oclist[i], c_neurons.data()); + verifyNeurons("check-neurons", cl, c_neurons.data()); + } + + { + Stage st("iterateNeurons2"); + init(cl); + for(int i = 0; i < threads; ++i) { + int a = 5, aa = a + oclist[i].getActiveCount(); + iterateNeurons2(oclist[i], ocl, c_neurons.data(), 5, 3, &a); + if (a != aa) { + printf("wrong accum value %d, expected %d, tid: %d/%d\n", a, aa, i, threads); + oclist[i].printYXZ("sub layout"); + oclist[i].printYXZ("orig layout"); + } + } + verifyNeuronIndices("check-neuron-indices", cl, c_neurons.data(), 5, 3); + } + + { + Stage st("iterateSimple"); + init(cl, pl); + for(int i = 0; i < threads; ++i) + iterateSimple(oclist[i], opl, ocl, c_neurons.data(), p_neurons.data(), weights.data()); + verifyNeurons("check-neurons", cl, c_neurons.data()); + verifyWeights("check-weights", cl, pl); + } + + { + Stage st("iterateSimpleInv"); + init(cl, pl); + for(int i = 0; i < threads; ++i) + iterateSimpleInv(oplist[i], ocl, opl, p_neurons.data(), c_neurons.data(), weights.data()); + verifyNeurons("check-neurons", pl, p_neurons.data()); + verifyWeights("check-weights", cl, pl); + } + + return st; + } + + + static bool testIterators(const char *name, const Layout &cl, const Layout &pl) { + Stage st(name); + + { + Stage st("plain"); + testIterators( "single-thread", cl, pl, cl, pl, 1 ); + testIterators( "2-threads", cl, pl, cl, pl, 2 ); + testIterators( "7-threads", cl, pl, cl, pl, 7 ); + testIterators( "8-threads", cl, pl, cl, pl, 8 ); + testIterators( "512-threads", cl, pl, cl, pl, 512 ); + } + + { + Stage st("optimized"); + Layout ocl = optimizeLayoutSimple(cl); + Layout opl = optimizeLayoutSimple(pl); + testIterators( "single-thread", cl, pl, ocl, opl, 1 ); + testIterators( "2-threads", cl, pl, ocl, opl, 2 ); + testIterators( "7-threads", cl, pl, ocl, opl, 7 ); + testIterators( "8-threads", cl, pl, ocl, opl, 8 ); + testIterators( "512-threads", cl, pl, ocl, opl, 512 ); + } + + return st; + } + + + static bool test(const char *name, const Layout &cl, const Layout &pl) { + Stage st(name); + + testIterators(name, cl, pl); + + { + Layer l(nullptr, pl); + new LayerSimple(l, cl); + Test::testLayer("LayerSimple", l); + } + + return st; + } + + + static bool test(const char *name = "simple") { + Stage st(name); + test("square-16x8", Layout(8, 8, 4), Layout(16, 16, 3)); + test("random-rect", Layout( 7, 4, 3).expandX(1, 2).expandY(3, 1).expandZ(5, 8), + Layout(13, 9, 4).expandX(2, 0).expandY(5, 3).expandZ(3, 1) ); + return st; + } +}; + + +#endif diff --git a/projects/neural/layer.test.inc.cpp b/projects/neural/layer.test.inc.cpp new file mode 100644 index 0000000..83327c2 --- /dev/null +++ b/projects/neural/layer.test.inc.cpp @@ -0,0 +1,253 @@ +#ifndef LAYER_TEST_INC_CPP +#define LAYER_TEST_INC_CPP + + +#include + +#include "layer.inc.cpp" + + + +class Test { +public: + class Stage { + public: + const int errors; + inline explicit Stage(const char *name): errors(Test::errors) { + for(int i = 0; i < level; ++i) printf("- "); + printf("%s\n", name); + fflush(stdout); + ++level; + } + inline ~Stage() { + --level; + if (!*this) { + for(int i = 0; i < level; ++i) printf("- "); + printf("FAILED\n"); + } + fflush(stdout); + } + operator bool() { return Test::errors == errors; } + }; + +private: + static int level; + +protected: + static std::vector c_neurons; + static std::vector p_neurons; + static std::vector weights; + +public: + static int errors; + + + static void init(int c_count, int p_count, int w_count) { + Neuron n = {}; + Weight w = {}; + + c_neurons.clear(); + p_neurons.clear(); + weights.clear(); + c_neurons.resize(c_count, n); + p_neurons.resize(p_count, n); + weights.resize(w_count, w); + } + + + static bool verifyNeurons(const char *name, const Layout &l, const Neuron *neurons, bool ignorePadded = false) { + Stage st(name); + for(int y = 0; y < l.sy; ++y) + for(int x = 0; x < l.sx; ++x) + for(int z = 0; z < l.sz; ++z) { + int n = neurons[ (y*l.sx + x)*l.sz + z ].a.i; + int i = x >= l.x0 && x < l.x1 + && y >= l.y0 && y < l.y1 + && z >= l.z0 && z < l.z1; + if (ignorePadded ? i && n != i : n != i) { + printf( + "wrong neuron mark %d, expected %d (%d, %d, %d)\n", + n, i, y, x, z ); + l.printYXZ("layout"); + ++errors; + return st; + } + } + return st; + } + + + static bool verifyNeuronIndices(const char *name, const Layout &l, const Neuron *neurons, int base = 1, int stride = 1) { + Stage st(name); + for(int y = 0; y < l.sy; ++y) + for(int x = 0; x < l.sx; ++x) + for(int z = 0; z < l.sz; ++z) { + bool active = x >= l.x0 && x < l.x1 + && y >= l.y0 && y < l.y1 + && z >= l.z0 && z < l.z1; + + int n = neurons[ (y*l.sx + x)*l.sz + z ].a.i; + int i = (((y - l.y0)*l.getW() + x - l.x0)*l.getD() + z - l.z0)*stride + base; + + if (!active) i = 0; + + if (n != i) { + printf( + "wrong neuron mark %d, expected %d (%d, %d, %d)\n", + n, i, y, x, z ); + l.printYXZ("layout"); + ++errors; + return st; + } + } + return st; + } + + + static bool verifyNeuronsAccum(const Layout &l, Neuron *neurons, int accum = 1, bool ignoreBounds = false) { + for(int y = 0; y < l.sy; ++y) + for(int x = 0; x < l.sx; ++x) + for(int z = 0; z < l.sz; ++z) { + Neuron &n = neurons[ (y*l.sx + x)*l.sz + z ]; + int i = ( x >= l.x0 && x < l.x1 + && y >= l.y0 && y < l.y1 + && z >= l.z0 && z < l.z1 )*accum; + if (ignoreBounds) i = accum; + if (n.v != 0 && n.v != i) { + printf( + "wrong neuron mark %g, expected 0 or %d (%d, %d, %d)\n", + n.v, i, y, x, z ); + l.printYXZ("layout"); + ++errors; + return false; + } + if (n.v) n.a.i = 1; + n.v = 0; + } + return true; + } + + + static bool testLayer(const char *name, Layer &l) { + Stage st(name); + + assert(l.next); + Layer &p = l; + Layer &c = *l.next; + + + struct H { + Layer &p; + Layer &c; + + std::vector threads; + std::atomic counter; + + H(Layer &p, Layer &c): p(p), c(c), counter(0) { } + + void prepareData() { + memcpy(c.neurons, c_neurons.data(), c.neuronsCount*sizeof(Neuron)); + memcpy(p.neurons, p_neurons.data(), p.neuronsCount*sizeof(Neuron)); + memcpy(c.weights, weights.data(), c.weightsCount*sizeof(Weight)); + } + + void applyDelta() { + for(int i = 0; i < c.neuronsCount; ++i) + c.neurons[i].d *= c_neurons[i].v - c.neurons[i].v; + } + + void func(int tid) { + Barrier barrier(counter, tid, threads.size()); + c.pass(barrier); + barrier.wait(); + if (!tid) applyDelta(); + barrier.wait(); + c.backpassDeltas(barrier); + barrier.wait(); + c.backpassWeights(barrier); + } + + bool test(const char *name, int threadsCount) { + Stage st(name); + + assert(threadsCount > 0); + + counter = 0; + threads.clear(); + threads.resize(threadsCount, nullptr); + + prepareData(); + + p.split(threadsCount); + c.split(threadsCount); + for(int i = 1; i < threadsCount; ++i) threads[i] = new std::thread(&H::func, this, i); + func(0); + for(int i = 1; i < threadsCount; ++i) { threads[i]->join(); delete threads[i]; } + threads.clear(); + + for(int i = 0; i < c.neuronsCount; ++i) { + NeuronReal a = c.neurons[i].v; + NeuronReal b = c_neurons[i + c.neuronsCount].v; + if (fabs(a - b) > 1e-6) + { printf("results differs at neuron %d, was %g, expected %g\n", i, a, b); ++errors; break; } + } + + for(int i = 0; i < p.neuronsCount; ++i) { + NeuronReal a = p.neurons[i].d; + NeuronReal b = p_neurons[i + p.neuronsCount].d; + if (fabs(a - b) > 1e-6) + { printf("deltas differs at neuron %d, was %g, expected %g\n", i, a, b); ++errors; break; } + } + + for(int i = 0; i < c.weightsCount; ++i) { + WeightReal a = c.weights[i].w; + WeightReal b = weights[i + c.weightsCount].w; + if (fabs(a - b) > 1e-6) + { printf("weights differs at %d, was %g, expected %g\n", i, a, b); ++errors; break; } + } + + if (!st) { + p.layout.printYXZ("prev layout"); + c.layout.printYXZ("curr layout"); + } + + return st; + } + } h(p, c); + + // make base data + + init(c.neuronsCount*2, p.neuronsCount*2, c.weightsCount*2); + for(int i = 0; i < c.neuronsCount; ++i) c_neurons[i].v = rand()/(NeuronReal)RAND_MAX; + for(int i = 0; i < p.neuronsCount; ++i) p_neurons[i].v = rand()/(NeuronReal)RAND_MAX; + memcpy(weights.data(), c.weights, c.weightsCount*sizeof(Weight)); + + h.prepareData(); + c.testPass(); + h.applyDelta(); + c.testBackpass(); + + memcpy(&c_neurons[c.neuronsCount], c.neurons, c.neuronsCount*sizeof(Neuron)); + memcpy(&p_neurons[p.neuronsCount], p.neurons, p.neuronsCount*sizeof(Neuron)); + memcpy(&weights[c.weightsCount], c.weights, c.weightsCount*sizeof(Weight)); + + h.test("single-thread", 1); + h.test("2-threads", 2); + h.test("7-threads", 7); + h.test("8-threads", 8); + //h.test("512-threads", 512); + + return st; + } +}; + + +int Test::level = 0; +std::vector Test::c_neurons; +std::vector Test::p_neurons; +std::vector Test::weights; +int Test::errors = 0; + + + +#endif diff --git a/projects/neural/layout.inc.cpp b/projects/neural/layout.inc.cpp new file mode 100644 index 0000000..83a78a0 --- /dev/null +++ b/projects/neural/layout.inc.cpp @@ -0,0 +1,129 @@ +#ifndef LAYOUT_INC_CPP +#define LAYOUT_INC_CPP + + +#include +#include + +#include +#include + + +struct Layout { + typedef std::vector List; + + int sx, sy, sz; + int x0, x1; + int y0, y1; + int z0, z1; + + inline Layout(): sx(), sy(), sz(), x0(), x1(), y0(), y1(), z0(), z1() { } + + explicit inline Layout(int sx, int sy = 1, int sz = 1): + sx(sx), sy(sy), sz(sz), x0(), x1(sx), y0(), y1(sy), z0(), z1(sz) { } + + + inline Layout& expandX (int e0, int e1) { return sx += e0+e1, x0 += e0, x1 += e0, *this; } + inline Layout& expandY (int e0, int e1) { return sy += e0+e1, y0 += e0, y1 += e0, *this; } + inline Layout& expandZ (int e0, int e1) { return sz += e0+e1, z0 += e0, z1 += e0, *this; } + inline Layout& expandXY (int e0, int e1) { return expandX (e0, e1).expandY(e0, e1); } + inline Layout& expandXYZ(int e0, int e1) { return expandXY(e0, e1).expandZ(e0, e1); } + inline Layout& expandX (int e) { return expandX (e, e); } + inline Layout& expandY (int e) { return expandY (e, e); } + inline Layout& expandZ (int e) { return expandX (e, e); } + inline Layout& expandXY (int e) { return expandXY (e, e); } + inline Layout& expandXYZ(int e) { return expandXYZ(e, e); } + + inline Layout& padX (int p0, int p1) { return x0 += p0, x1 -= p0, *this; } + inline Layout& padY (int p0, int p1) { return y0 += p0, y1 -= p0, *this; } + inline Layout& padZ (int p0, int p1) { return z0 += p0, z1 -= p0, *this; } + inline Layout& padXY (int p0, int p1) { return padX (p0, p1).padY(p0, p1); } + inline Layout& padXYZ(int p0, int p1) { return padXY(p0, p1).padZ(p0, p1); } + inline Layout& padX (int p) { return padX (p, p); } + inline Layout& padY (int p) { return padY (p, p); } + inline Layout& padZ (int p) { return padX (p, p); } + inline Layout& padXY (int p) { return padXY (p, p); } + inline Layout& padXYZ(int p) { return padXYZ(p, p); } + + + inline int getW() const { return x1 - x0; } + inline int getH() const { return y1 - y0; } + inline int getD() const { return z1 - z0; } + + inline int getCount() const { return sx*sy*sz; } + inline int getActiveCount() const { return getW()*getH()*getD(); } + + + inline operator bool() const { + return x0 >= 0 && x0 < x1 && x1 <= sx + && y0 >= 0 && y0 < y1 && y1 <= sy + && z0 >= 0 && z0 < z1 && z1 <= sz; + } + + + inline bool isSameSizeWith(const Layout &b) const + { return sx == b.sx && sy == b.sy && sz == b.sz; } + inline bool isSameActiveSizeWith(const Layout &b) const + { return getW() == b.getW() && getH() == b.getH() && getD() == b.getD(); } + + inline bool isSubLayoutOf(const Layout &b) const + { return isSameSizeWith(b) && b.x0 <= x0 && x0 < x1 && x1 <= b.x1; } + inline bool isParentLayoutOf(const Layout &b) const + { return b.isSubLayoutOf(*this); } + + + void splitX(List &list, int count) const { + if (count <= 0) return list.clear(); + list.resize(count); + int v = x0, s = x1 - v; + for(int i = 0; i < count; ++i) { + Layout &l = list[i] = *this; + l.x0 = v; + l.x1 = (v += s/count + (i < s%count)); + } + } + + void splitY(List &list, int count) const { + if (count <= 0) return list.clear(); + list.resize(count); + int v = y0, s = y1 - v; + for(int i = 0; i < count; ++i) { + Layout &l = list[i] = *this; + l.y0 = v; + l.y1 = (v += s/count + (i < s%count)); + } + } + + void splitZ(List &list, int count) const { + if (count <= 0) return list.clear(); + list.resize(count); + int v = z0, s = z1 - v; + for(int i = 0; i < count; ++i) { + Layout &l = list[i] = *this; + l.z0 = v; + l.z1 = (v += s/count + (i < s%count)); + } + } + + void split(List &list, int count) const { + int h = getH(), w = getW(), d = getD(); + if (h >= w && h >= d) splitY(list, count); else + if (w >= d) splitX(list, count); else + splitZ(list, count); + } + + void print(const char *prefix = nullptr) const { + if (prefix && *prefix) printf("%s: ", prefix); + printf("x: %d (%d-%d), y: %d (%d-%d), z: %d (%d-%d)\n", sx, x0, x1, sy, y0, y1, sz, z0, z1); + } + void printYXZ(const char *prefix = nullptr) const { + if (prefix && *prefix) printf("%s: ", prefix); + printf("y: %d (%d-%d), x: %d (%d-%d), z: %d (%d-%d)\n", sy, y0, y1, sx, x0, x1, sz, z0, z1); + } +}; + + + + + +#endif diff --git a/projects/neural/train.digit.inc.cpp b/projects/neural/train.digit.inc.cpp new file mode 100644 index 0000000..9b3bc9e --- /dev/null +++ b/projects/neural/train.digit.inc.cpp @@ -0,0 +1,94 @@ +#ifndef TRAIN_DIGIT_INC_CPP +#define TRAIN_DIGIT_INC_CPP + + +#include "train.inc.cpp" +#include "layer.simple.inc.cpp" + + +class TrainerDigit: public Trainer { +protected: + std::vector data; + std::vector shuffle; + Layout ofl, obl; + Layout::List oflist, oblist; + int stride, count; + +public: + TrainerDigit(): stride(), count() { } + + bool loadSymbolMap(const char *filename) { + data.clear(); + + FILE *f = fopen(filename, "rb"); + if (!f) + return printf("cannot open file for read: %s\n", filename), false; + fseek(f, 0, SEEK_END); + size_t fs = ftello(f); + fseek(f, 0, SEEK_SET); + + data.resize(fs, 0); + if (!fread(data.data(), fs, 1, f)) + return printf("cannot read from file: %s\n", filename), fclose(f), data.clear(), false; + + fclose(f); + return true; + } + +protected: + bool prepare() override { + ofl = optimizeLayoutSimple(fl->layout); + obl = optimizeLayoutSimple(bl->layout); + ofl.split(oflist, threadsCount); + obl.split(oblist, threadsCount); + stride = ofl.getActiveCount() + 1; + count = data.size()/stride; + if (count <= 0) return false; + shuffle.resize(count); + for(int i = 0; i < count; ++i) + shuffle[i] = i; + return true; + } + + + bool prepareBlock() override { + int cnt = itersPerBlock > count ? count : itersPerBlock; + for(int i = 0; i < cnt; ++i) { + int j = rand()%count; + if (i != j) std::swap(shuffle[i], shuffle[j]); + } + return true; + } + + + void loadData(Barrier &barrier, int, int iter) override { + struct I: public Iter { + typedef const unsigned char* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; } + }; + const unsigned char *id = data.data() + shuffle[iter%count]*stride; + iterateNeurons2(oflist[barrier.tid], ofl, fl->neurons, id); + } + + + AccumReal verifyDataMain(int, int iter) override { + struct I: public Iter { + typedef int DataType; + struct DataAccumType { int ri, mi; NeuronReal m; }; + static inline void iter4(Neuron &n, DataType d, DataAccumType &a) { + NeuronReal v1 = d == a.ri; + NeuronReal v0 = n.v; + n.d *= v1 - v0; + if (a.m < v0) { a.m = v0; a.mi = d; } + } + }; + + I::DataAccumType a = { data[ (shuffle[iter%count] + 1)*stride - 1 ], 0, 0 }; + iterateNeurons2(obl, obl, bl->neurons, 0, 1, &a); + + return a.mi != a.ri; + } +}; + + +#endif diff --git a/projects/neural/train.image.inc.cpp b/projects/neural/train.image.inc.cpp new file mode 100644 index 0000000..d7dd9a2 --- /dev/null +++ b/projects/neural/train.image.inc.cpp @@ -0,0 +1,204 @@ +#ifndef TRAIN_IMAGE_INC_CPP +#define TRAIN_IMAGE_INC_CPP + + +#include "train.inc.cpp" +#include "layer.simple.inc.cpp" + + +class TrainerImage: public Trainer { +protected: + std::vector data; + std::vector shuffle; + const char *datafile; + const char *outfile; + Layout ofl, obl; + Layout::List oflist, oblist; + int stride, count; + +public: + TrainerImage(): stride(), count() { } + + bool configure(const char *datafile, const char *outfile) { + this->datafile = datafile; + this->outfile = outfile; + } + + data.clear(); + + FILE *f = fopen(filename, "rb"); + if (!f) + return printf("cannot open file for read: %s\n", filename), false; + fseek(f, 0, SEEK_END); + size_t fs = ftello(f); + fseek(f, 0, SEEK_SET); + + data.resize(fs, 0); + if (!fread(data.data(), fs, 1, f)) + return printf("cannot read from file: %s\n", filename), fclose(f), data.clear(), false; + + fclose(f); + return true; + } + + +void imgTrain(Layer &l, const char *datafile, int size, const char *outfile, double trainRatio, int count) { + Layer &bl = l.back(); + + assert(!l.prev); + assert(datafile); + assert(count > 0 && size > 0); + assert(l.size == size); + assert(bl.size == size); + + int blockSize = 1000;//1024*1024*1024/size; + assert(blockSize > 0); + + FILE *f = fopen(datafile, "rb"); + if (!f) + { printf("cannot open file: %s\n", datafile); return; } + fseeko64(f, 0, SEEK_END); + long long fsize = ftello64(f); + int xCount = (int)(fsize/size); + if (xCount <= 0) + { printf("no tests in file: %s\n", datafile); return; } + + int *block = new int[blockSize*2]; + int *shuffle = block + blockSize; + double *results = new double[blockSize]; + unsigned char *blockData = new unsigned char[(blockSize + 1)*size]; + unsigned char *blockResData = blockData + blockSize*size; + bool err = false; + + for(int j = 0; j < blockSize; ++j) + { shuffle[j] = j; results[j] = 0; } + + int blocksCount = (count - 1)/blockSize + 1; + + printf("training %d (%d x %d blocks), tests: %d, ratio: %f:\n", blocksCount*blockSize, blocksCount, blockSize, xCount, trainRatio); + + double avgSum = 0; + for(int i = 0; i < blocksCount; ++i) { + for(int j = 0; j < blockSize; ++j) { + block[j] = rand()%xCount; + std::swap(shuffle[i], shuffle[rand()%blockSize]); + } + std::sort(block, block + blockSize); + + for(int j = 0; j < blockSize; ++j) { + fseeko64(f, block[j]*(long long)size, SEEK_SET); + if (!fread(blockData + j*size, size, 1, f)) + { printf("cannot read data from file: %s\n", datafile); err = true; break; } + } + if (err) break; + + printf(" next data block loaded\n"); + + double sumQ = 0; + for(int j = 0; j < blockSize; ++j) { + unsigned char *data = blockData + shuffle[j]*size; + for(double *ia = l.a, *e = ia + l.size; ia < e; ++ia, ++data) + *ia = *data/255.0; + + double firstQ = 0, q = 0; + for(int repeat = 0; repeat < 1; ++repeat) { + l.pass(); + + for(double *ia = l.a, *iba = bl.a, *ibda = bl.da, *e = ia + l.size; ia < e; ++ia, ++iba, ++ibda) { + double d = *ia - *iba; + *ibda = d; + q += d*d; + } + q /= size; + if (!repeat) firstQ = q; + + bl.backpass(trainRatio); + } + + sumQ += firstQ; + avgSum += firstQ - results[j]; + results[j] = firstQ; + int avgCnt = i ? blockSize : j + 1; + printf(" %4d: total: %6d, avg result: %f, last result: %f -> %f\n", j+1, i*blockSize+j+1, avgSum/avgCnt, firstQ, q); + } + + printf("%4d: total: %6d, avg result: %f\n", i+1, (i+1)*blockSize, sumQ/blockSize); + + if (outfile && !l.save(outfile)) + { printf("cannot save neural network weights to file: %s\n", outfile); err = true; break; } + + unsigned char *data = blockResData; + for(double *iba = bl.a, *e = iba + bl.size; iba < e; ++iba, ++data) + *data = (unsigned char)(*iba*255.999); + tgaSave("data/output/sampleX.tga", blockData + shuffle[blockSize-1]*size, 256, 256, 3); + tgaSave("data/output/sampleY.tga", blockResData, 256, 256, 3); + } + + delete[] block; + delete[] results; + delete[] blockData; + + printf("finished\n"); +} + + +protected: + bool prepare() override { + ofl = optimizeLayoutSimple(fl->layout); + obl = optimizeLayoutSimple(bl->layout); + assert(ofl && obl); + assert(ofl.getActiveCount() == obl.getActiveCount()); + + ofl.split(oflist, threadsCount); + obl.split(oblist, threadsCount); + stride = ofl.getActiveCount() + 1; + count = data.size()/stride; + if (count <= 0) return false; + shuffle.resize(count); + for(int i = 0; i < count; ++i) + shuffle[i] = i; + return true; + } + + + bool prepareBlock() override { + int cnt = itersPerBlock > count ? count : itersPerBlock; + for(int i = 0; i < cnt; ++i) { + int j = rand()%count; + if (i != j) std::swap(shuffle[i], shuffle[j]); + } + return true; + } + + + void loadData(Barrier &barrier, int, int iter) override { + struct I: public Iter { + typedef const unsigned char* Type; + static inline void iter4(Neuron &n, Type d, AccumType&) { n.v = *d/(NeuronReal)255; } + }; + const unsigned char *id = data.data() + shuffle[iter%count]*stride; + iterateNeurons2(oflist[barrier.tid], ofl, fl->neurons, id); + } + + + AccumReal verifyDataMain(int, int iter) override { + struct I: public Iter { + typedef int Type; + struct AccumType { int ri, mi; NeuronReal m; }; + static inline void iter4(Neuron &n, Type d, AccumType &a) { + NeuronReal v1 = d == a.ri; + NeuronReal v0 = n.v; + n.d *= v1 - v0; + if (a.m < v0) { a.m = v0; a.mi = d; } + } + }; + + I::AccumType a = { data[ (shuffle[iter%count] + 1)*stride - 1 ], 0, 0 }; + iterateNeurons2(obl, obl, bl->neurons, 0, 1, &a); + + return a.mi != a.ri; + } +}; + + +#endif diff --git a/projects/neural/train.inc.cpp b/projects/neural/train.inc.cpp new file mode 100644 index 0000000..8806713 --- /dev/null +++ b/projects/neural/train.inc.cpp @@ -0,0 +1,195 @@ +#ifndef NNTRAIN_INC_CPP +#define NNTRAIN_INC_CPP + + +#include +#include + + +#include "layer.inc.cpp" + + +long long timeUs() { + static std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + return (long long)std::chrono::duration_cast( std::chrono::steady_clock::now() - begin ).count(); +} + + +class Trainer { +private: + std::atomic barrierCounter; + std::vector qualities; + +public: + Layer *layer; + AccumReal ratio; + int threadsCount; + int itersPerBlock; + int blocksPerSaving; + int blocksCount; + AccumReal qmin; + +protected: + volatile bool doBackpassAtThisIteration; + Layer *fl; + Layer *bl; + + virtual bool prepare() { return true; } + virtual bool prepareBlock() { return true; } + virtual void finishBlock() { } + virtual void finish() { } + + virtual void loadData(Barrier &barrier, int block, int iter) { } + virtual AccumReal verifyData(Barrier &barrier, int block, int iter) { return 0; } + + virtual void loadDataMain(int block, int iter) { }; + virtual AccumReal verifyDataMain(int block, int iter) { return 0; }; + +private: + void threadFunc(int tid, int block) { + Barrier barrier(barrierCounter, tid, threadsCount); + + volatile AccumReal &sumQ = qualities[tid] = 0; + for(int i = 0; i < itersPerBlock; ++i) { + barrier.wait(); + loadData(barrier, block, i); + barrier.wait(); + if (!tid) loadDataMain(block, i); + + for(Layer *l = fl->next; l; l = l->next) { + barrier.wait(); + l->pass(barrier); + } + + barrier.wait(); + sumQ += verifyData(barrier, block, i); + barrier.wait(); + if (!tid) { + doBackpassAtThisIteration = true; + sumQ += verifyDataMain(block, i); + } + + barrier.wait(); + if (ratio > 0 && doBackpassAtThisIteration) { + for(Layer *l = bl; l->prev && l->prev->prev; l = l->prev) { + barrier.wait(); + l->backpassDeltas(barrier); + } + for(Layer *l = bl; l->prev; l = l->prev) { + barrier.wait(); + l->backpassWeights(barrier); + } + } + } + } + + + AccumReal runThreads(int block) { + barrierCounter = 0; + std::vector t(threadsCount, nullptr); + for(int i = 1; i < threadsCount; ++i) + t[i] = new std::thread(&Trainer::threadFunc, this, i, block); + threadFunc(0, block); + + AccumReal result = qualities[0]; + for(int i = 1; i < threadsCount; ++i) + { t[i]->join(); delete t[i]; result += qualities[i]; } + return result / itersPerBlock; + } + + +public: + Trainer(): + barrierCounter(0), + layer(), + ratio(), + threadsCount(1), + itersPerBlock(100), + blocksPerSaving(), + blocksCount(1000), + qmin(), + doBackpassAtThisIteration(), + fl(), + bl() { } + + + Trainer& configure( + Layer &layer, + AccumReal ratio, + int threadsCount, + int itersPerBlock, + int blocksPerSaving, + int blocksCount, + AccumReal qmin ) + { + this->layer = &layer; + this->ratio = ratio; + this->threadsCount = threadsCount; + this->itersPerBlock = itersPerBlock; + this->blocksPerSaving = blocksPerSaving; + this->blocksCount = blocksCount; + this->qmin = qmin; + return *this; + } + + + AccumReal run() { + assert(layer && !layer->prev && layer->next); + assert(threadsCount > 0); + assert(itersPerBlock > 0); + + printf("training: threads %d, itersPerBlock %d, ratio: %lf\n", threadsCount, itersPerBlock, ratio); + + fl = layer; + bl = &layer->back(); + + qualities.clear(); + qualities.resize(threadsCount, 0); + for(Layer *l = layer; l; l = l->next) + l->split(threadsCount); + + if (!prepare()) + return printf("cannot prepare\n"), -1; + + AccumReal result = -1; + long long fullTimeStartUs = timeUs(); + int i = 0; + while(true) { + if (!prepareBlock()) { + printf("cannot prepare block\n"); + result = -1; + break; + }; + + long long runTimeUs = timeUs(); + result = runThreads(i); + runTimeUs = timeUs() - runTimeUs; + + finishBlock(); + + long long t = timeUs(); + long long fullTimeUs = t - fullTimeStartUs; + fullTimeStartUs = t; + ++i; + + printf("%4d, total %7d, avg.result %f, time: %f / %f\n", i, i*itersPerBlock, result, runTimeUs*0.000001, fullTimeUs*0.000001); + + bool done = (blocksCount > 0 && i >= blocksCount) || result <= qmin; + + if (ratio > 0 && (blocksPerSaving <= 0 || i%blocksPerSaving == 0 || done) && !layer->save()) { + printf("saving failed\n"); + result = -1; + break; + } + + if (done) break; + } + + finish(); + + return result; + } +}; + + +#endif diff --git a/projects/neural/trainer.cpp b/projects/neural/trainer.cpp new file mode 100644 index 0000000..51149f5 --- /dev/null +++ b/projects/neural/trainer.cpp @@ -0,0 +1,47 @@ + + +#include + +#include "layer.all.inc.cpp" +#include "layer.all.test.inc.cpp" +#include "train.digit.inc.cpp" + + +bool runTests() { + if (!AllTest::test()) return false; + return printf("success\n"), true; +} + + +int main() { + srand(time(NULL)); + + //return !runTests(); + + #define FILENAME "data/output/weights.bin" // 28x28 + + printf("create neural network\n"); + Layer l(nullptr, Layout(28, 28)); l.filename = FILENAME "1"; + //(new LayerSimple(l, Layout(256)))->filename = FILENAME "2"; + //(new LayerSimple(l, Layout(64)))->filename = FILENAME "3"; + //(new LayerSimple(l, Layout(128)))->filename = FILENAME "4"; + //(new LayerSimple(l, Layout(64)))->filename = FILENAME "5"; + //(new LayerSimple(l, Layout(128)))->filename = FILENAME "5"; + (new LayerSimple(l, Layout(32)))->filename = FILENAME "6"; + //(new LayerSimple(l, Layout(16)))->filename = FILENAME "7"; + (new LayerSimple(l, Layout(10)))->filename = FILENAME "8"; + + l.sumStat().print(); + + printf("load training data\n"); + TrainerDigit t; + if (!t.loadSymbolMap("data/symbols-data.bin")) return 1; + + //printf("try load previously saved network\n"); l.load(); + + printf("train\n"); + t.configure(l, 0.5, 4, 1000000, 0, 0, 0.0000001).run(); + + return 0; +} +