From 8e5348c557b3104a62756f182bd9d46933448abb Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Mar 17 2023 14:03:54 +0000 Subject: neural: convolution shared --- diff --git a/projects/neural/build-view-digits.sh b/projects/neural/build-view-digits.sh new file mode 100755 index 0000000..9615903 --- /dev/null +++ b/projects/neural/build-view-digits.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +set -e + +export PKG_CONFIG_PATH="$HOME/opt/helianthus-release/lib/pkgconfig:/usr/local/lib/pkgconfig:/usr/lib/pkgconfig:/usr/lib/x86_64-linux-gnu/pkgconfig" + +if [ "$1" == "debug" ]; then + c++ -Wall `pkg-config --cflags --libs helianthus` -g -O0 -pthread view-digits.cpp -lm -o view-didigts-dbg + echo done debug +else + c++ -Wall `pkg-config --cflags --libs helianthus` -DNDEBUG -O3 -pthread view-digits.cpp -lm -o view-digits + echo done release +fi + diff --git a/projects/neural/layer.conv.inc.cpp b/projects/neural/layer.conv.inc.cpp index 02ffb6c..efd72ac 100644 --- a/projects/neural/layer.conv.inc.cpp +++ b/projects/neural/layer.conv.inc.cpp @@ -293,7 +293,7 @@ public: static inline void init(Neuron &n, AccumType &a) { a.v = n.v; } static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; } }; - iterateConvolution(mtPrevLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + iterateConvolution(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights); } @@ -303,7 +303,7 @@ public: static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; } static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; } }; - iterateConvolution(mtPrevLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + iterateConvolution(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights); } diff --git a/projects/neural/layer.conv.shared.inc.cpp b/projects/neural/layer.conv.shared.inc.cpp new file mode 100644 index 0000000..dee1861 --- /dev/null +++ b/projects/neural/layer.conv.shared.inc.cpp @@ -0,0 +1,333 @@ +#ifndef LAYER_CONV_SHARED_INC_CPP +#define LAYER_CONV_SHARED_INC_CPP + + + +#include "layer.conv.inc.cpp" + + + +template +void iterateTestConvolutionShared(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + int ci = (cy*cl.sx + cx)*cl.sz + cz; + Neuron &cn = c_neurons[ci]; + typename Iter::AccumType a = {}; + Iter::init(cn, a); + + for(int ky = 0; ky < k.sy; ++ky) + for(int kx = 0; kx < k.sx; ++kx) + for(int pz = pl.z0; pz < pl.z1; ++pz) { + int wi = (ky*k.sx + kx)*pl.getD() + pz - pl.z0; + Weight &w = weights[wi]; + + int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx; + int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky; + int pi = (py*pl.sx + px)*pl.sz + pz; + Neuron &pn = p_neurons[pi]; + + Iter::iter(pn, w, a); + } + + Iter::done(cn, a); + } +} + + +template +void iterateConvolutionShared(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(wl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(cl.isSubLayoutOf(wl)); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_swz = c_w*cl.sz; + int c_shxz = c_h*cl.sx*cl.sz; + int c_dx = cl.sz - c_d; + int c_dy = (cl.sx - c_w)*cl.sz; + + int p_d = pl.getD(); + int p_dx = k.dx*pl.sz; + int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; + + int k_sxd = k.sx*p_d; + int p_ddy = (pl.sx - k.sx)*pl.sz; + int p_ddx = pl.sz - p_d; + + Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; + Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0; + Weight *ew = weights + k.sy*k_sxd; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn) { + typename Iter::AccumType a; + Iter::init(*icn, a); + + Neuron *iipn = ipn; + for(Weight *iw = weights; iw < ew; iipn += p_ddy) + for(Weight *e = iw + k_sxd; iw < e; iipn += p_ddx) + for(Weight *e = iw + p_d; iw < e; ++iw, ++iipn) + Iter::iter(*iipn, *iw, a); + + Iter::done(*icn, a); + } +} + + +template +void iterateConvolutionSharedPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + if (!cl) return; + assert(pl); + assert(wl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(cl.isSubLayoutOf(wl)); + assert(kx >= 0 && kx < k.sx); + assert(ky >= 0 && ky < k.sy); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_swz = c_w*cl.sz; + int c_shxz = c_h*cl.sx*cl.sz; + int c_dx = cl.sz - c_d; + int c_dy = (cl.sx - c_w)*cl.sz; + + int p_d = pl.getD(); + int p_dx = k.dx*pl.sz; + int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; + + Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; + Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy + ky)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox + kx)*pl.sz + pl.z0; + weights += (ky*k.sx + kx)*p_d; + Weight *ew = weights + p_d; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d) + for(Weight *iw = weights; iw < ew; ++ipn, ++iw) + Iter::iter2(*icn, *ipn, *iw); +} + + + + +class LayerConvSharedBase: public Layer { +public: + std::vector mtWeights; + + using Layer::Layer; + + + void split(int threadsCount) override { + Layer::split(threadsCount); + Weight w = {}; + mtWeights.clear(); + mtWeights.resize(threadsCount*weightsCount, w); + } + + + inline void sumWeights(int tid, int threads) { + int wc = weightsCount; + Weight *iw = weights + tid; + Weight *ia = mtWeights.data() + tid; + Weight *ea = mtWeights.data() + threads*wc; + for(Weight *ew = weights + wc; iw < ew; iw += threads, ia += threads) { + WeightReal w = iw->w; + for(Weight *iia = ia; iia < ea; iia += wc) + w += iia->w, iia->w = 0; + iw->w = w; + } + } +}; + + + +template +class LayerConvShared: public LayerConvSharedBase { +public: + Kernel kernel; + + + LayerConvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): + LayerConvSharedBase(&prev, layout, kernel.sx*kernel.sy*prev.back().layout.getD(), weights), + kernel(kernel) + { + assert(kernel); + stat.links = weightsCount*neuronsCount; + if (ownWeights) fillWeights(-1, 1); + } + + + void pass(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; } + static inline void done(Neuron &n, AccumType &a) { func(n, a.v); } + }; + iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights); + } + + + void backpassWeights(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.d; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; } + }; + iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, &mtWeights[barrier.tid * weightsCount]); + barrier.wait(); + sumWeights(barrier.tid, barrier.threads); + } + + + void backpassDeltas(Barrier &barrier) override { + struct I: public Iter { + static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.d * w.w; } + static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; } + }; + int ksx = kernel.sx, ksy = kernel.sy; + for(int kx = 0; kx < ksx; ++kx) + for(int ky = 0; ky < ksy; ++ky) { + iterateConvolutionSharedPoint(mtLayouts[barrier.tid], prev->layout, layout, kernel, kx, ky, neurons, prev->neurons, weights); + barrier.wait(); + } + iterateNeurons(mtPrevLayouts[barrier.tid], prev->neurons); + } + + + void testPass() override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; } + static inline void done(Neuron &n, AccumType &a) { func(n, a.v); } + }; + iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights); + } + + + void testBackpass() override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.d; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; } + static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; } + }; + struct IW: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.d; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += a.v * n.v; } + }; + clearAccum(); + iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights); + iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights); + iterateNeurons(prev->layout, prev->neurons); + clearAccum(); + } +}; + + + +template +class LayerDeconvShared: public LayerConvSharedBase { +public: + Kernel kernel; + + + LayerDeconvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): + LayerConvSharedBase(&prev, layout, kernel.sx*kernel.sy*layout.getD(), weights), + kernel(kernel) + { + assert(kernel); + stat.links = weightsCount*neuronsCount; + if (ownWeights) fillWeights(-1, 1); + } + + + void pass(Barrier &barrier) override { + struct I: public Iter { + static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.v * w.w; } + static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; } + }; + int k_sx = kernel.sx, k_sy = kernel.sy; + for(int kx = 0; kx < k_sx; ++kx) + for(int ky = 0; ky < k_sy; ++ky) { + iterateConvolutionSharedPoint(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, kx, ky, prev->neurons, neurons, weights); + barrier.wait(); + } + iterateNeurons(mtLayouts[barrier.tid], neurons); + } + + + void backpassWeights(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.v; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; } + }; + iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, &mtWeights[barrier.tid * weightsCount]); + barrier.wait(); + sumWeights(barrier.tid, barrier.threads); + } + + + void backpassDeltas(Barrier &barrier) override { + struct I: public Iter { + static inline void init(Neuron&, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; } + static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; } + }; + iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights); + } + + + void testPass() override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.v; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; } + static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; } + }; + clearAccum(); + iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights); + iterateNeurons(layout, neurons); + clearAccum(); + } + + + void testBackpass() override { + struct I: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = 0; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; } + static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; } + }; + struct IW: public Iter { + static inline void init(Neuron &n, AccumType &a) { a.v = n.v; } + static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; } + }; + iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights); + iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights); + } +}; + +#endif diff --git a/projects/neural/layer.conv.test.inc.cpp b/projects/neural/layer.conv.test.inc.cpp index 7124b30..c0b4be8 100644 --- a/projects/neural/layer.conv.test.inc.cpp +++ b/projects/neural/layer.conv.test.inc.cpp @@ -5,6 +5,7 @@ #include "layer.test.inc.cpp" #include "layer.conv.inc.cpp" +#include "layer.conv.shared.inc.cpp" class ConvTest: public Test { @@ -147,11 +148,23 @@ public: } { - Layer l(nullptr, pl); - new LayerConv(l, cl, k); + Layer l(nullptr, cl); + new LayerDeconv(l, pl, k); Test::testLayer("LayerDeconv", l); } + { + Layer l(nullptr, pl); + new LayerConvShared(l, cl, k); + Test::testLayer("LayerConvShared", l); + } + + { + Layer l(nullptr, cl); + new LayerDeconvShared(l, pl, k); + Test::testLayer("LayerDeconvShared", l); + } + return st; } diff --git a/projects/neural/layer.simple.inc.cpp b/projects/neural/layer.simple.inc.cpp index 1f2a7b7..1da0312 100644 --- a/projects/neural/layer.simple.inc.cpp +++ b/projects/neural/layer.simple.inc.cpp @@ -14,6 +14,17 @@ inline void funcSigmoidExp(Neuron &n, AccumReal s) { } +inline void funcSigmoidExp2(Neuron &n, AccumReal s) { + if (s > 5) s = 5; else if (s < -5) s = -5; + AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = 0;//ss * (1-ss) * 0.1; +} + + +inline void funcReLU(Neuron &n, AccumReal s) + { n.v = s > 0 ? s : 0; n.d = s > 0; } + + + template inline void iterateNeurons(const Layout &l, Neuron *neurons) { if (!l) return; diff --git a/projects/neural/train.digit.inc.cpp b/projects/neural/train.digit.inc.cpp index 9b3bc9e..8277f42 100644 --- a/projects/neural/train.digit.inc.cpp +++ b/projects/neural/train.digit.inc.cpp @@ -34,6 +34,21 @@ public: fclose(f); return true; } + + static void printSymbol(const unsigned char *data, int w, int h, int index = -1) { + if (index >= 0) printf("\nsymbol %d (%d):\n", (int)data[w*h], index); + else printf("\nsymbol %d:\n", (int)data[w*h]); + for(int i = 0; i < h; ++i) { + for(int j = 0; j < w; ++j) printf("%s", data[i*w+j] > 128u ? "#" : " "); + printf("\n"); + } + printf("\n"); + } + + void printSymbol(int index) { + const Layout &l = layer->layout; + printSymbol(&data[(l.getActiveCount()+1)*index], l.getW(), l.getH(), index); + } protected: bool prepare() override { @@ -71,22 +86,38 @@ protected: } - AccumReal verifyDataMain(int, int iter) override { + Quality verifyData(Barrier &barrier, int, int iter) override { + Quality q = {}; + if (barrier.tid) return q; + struct I: public Iter { typedef int DataType; - struct DataAccumType { int ri, mi; NeuronReal m; }; + struct DataAccumType { int ri, mi; NeuronReal m, ratio, q; }; static inline void iter4(Neuron &n, DataType d, DataAccumType &a) { NeuronReal v1 = d == a.ri; NeuronReal v0 = n.v; - n.d *= v1 - v0; + NeuronReal diff = v1 - v0; + n.d *= diff*a.ratio; + a.q += diff*diff; if (a.m < v0) { a.m = v0; a.mi = d; } } }; - I::DataAccumType a = { data[ (shuffle[iter%count] + 1)*stride - 1 ], 0, 0 }; + int index = shuffle[iter%count]; + if (index == 59915) { + ++skipBackpass; + return q; + } + + I::DataAccumType a = { data[ (index + 1)*stride - 1 ], 0, 0, ratio }; iterateNeurons2(obl, obl, bl->neurons, 0, 1, &a); - return a.mi != a.ri; + q.train = sqrt(a.q/obl.getActiveCount()); + q.human = a.mi != a.ri; + //if (!q.human && q.train < 0.01) ++skipBackpass; + //if (!q.human) ++skipBackpass; + //if (q.human) printSymbol(index); + return q; } }; diff --git a/projects/neural/train.inc.cpp b/projects/neural/train.inc.cpp index a99b9f3..8582aa4 100644 --- a/projects/neural/train.inc.cpp +++ b/projects/neural/train.inc.cpp @@ -15,10 +15,34 @@ long long timeUs() { } +struct Quality { + AccumReal train; + AccumReal human; + + inline Quality& operator+=(const Quality &b) { + train += b.train; + human += b.human; + return *this; + } + + inline Quality& operator*=(AccumReal x) { + train *= x; + human *= x; + return *this; + } + + inline bool operator<(const Quality &b) const { + return human < b.human ? true + : b.human < human ? false + : train < b.train; + } +}; + + class Trainer { private: std::atomic barrierCounter; - std::vector qualities; + std::vector qualities; public: Layer *layer; @@ -30,7 +54,7 @@ public: AccumReal qmin; protected: - volatile bool doBackpassAtThisIteration; + std::atomic skipBackpass; Layer *fl; Layer *bl; @@ -40,21 +64,16 @@ protected: virtual void finish() { } virtual void loadData(Barrier &barrier, int block, int iter) { } - virtual AccumReal verifyData(Barrier &barrier, int block, int iter) { return 0; } - - virtual void loadDataMain(int block, int iter) { }; - virtual AccumReal verifyDataMain(int block, int iter) { return 0; }; + virtual Quality verifyData(Barrier &barrier, int block, int iter) { return Quality{}; } private: void threadFunc(int tid, int block) { Barrier barrier(barrierCounter, tid, threadsCount); - volatile AccumReal &sumQ = qualities[tid] = 0; + Quality sumQ = {}; for(int i = 0; i < itersPerBlock; ++i) { barrier.wait(); loadData(barrier, block, i); - barrier.wait(); - if (!tid) loadDataMain(block, i); for(Layer *l = fl->next; l; l = l->next) { barrier.wait(); @@ -62,15 +81,14 @@ private: } barrier.wait(); + skipBackpass = 0; sumQ += verifyData(barrier, block, i); + barrier.wait(); - if (!tid) { - doBackpassAtThisIteration = true; - sumQ += verifyDataMain(block, i); - } + bool skipBp = skipBackpass; barrier.wait(); - if (ratio > 0 && doBackpassAtThisIteration) { + if (ratio > 0 && !skipBp) { for(Layer *l = bl; l->prev && l->prev->prev; l = l->prev) { barrier.wait(); l->backpassDeltas(barrier); @@ -81,20 +99,21 @@ private: } } } + qualities[tid] = sumQ; } - AccumReal runThreads(int block) { + Quality runThreads(int block) { barrierCounter = 0; std::vector t(threadsCount, nullptr); for(int i = 1; i < threadsCount; ++i) t[i] = new std::thread(&Trainer::threadFunc, this, i, block); threadFunc(0, block); - AccumReal result = qualities[0]; + Quality result = qualities[0]; for(int i = 1; i < threadsCount; ++i) { t[i]->join(); delete t[i]; result += qualities[i]; } - return result / itersPerBlock; + return result *= 1/(AccumReal)itersPerBlock; } @@ -108,7 +127,7 @@ public: blocksPerSaving(), blocksCount(1000), qmin(), - doBackpassAtThisIteration(), + skipBackpass(0), fl(), bl() { } @@ -133,31 +152,40 @@ public: } - AccumReal run() { + Quality run() { assert(layer && !layer->prev && layer->next); assert(threadsCount > 0); assert(itersPerBlock > 0); + Quality bad = {INFINITY, INFINITY}; + printf("training: threads %d, itersPerBlock %d, ratio: %lf\n", threadsCount, itersPerBlock, ratio); + fflush(stdout); fl = layer; bl = &layer->back(); qualities.clear(); - qualities.resize(threadsCount, 0); + qualities.resize(threadsCount, Quality{}); for(Layer *l = layer; l; l = l->next) l->split(threadsCount); if (!prepare()) - return printf("cannot prepare\n"), -1; + return printf("cannot prepare\n"), bad; - AccumReal result = -1; + + + AccumReal ratioCopy = ratio; + Quality result = bad, best = result, saved = result; long long fullTimeStartUs = timeUs(); + ratio = 0; int i = 0; + int bps = blocksPerSaving > 0 ? blocksPerSaving : 1; + int nextSave = i + bps; while(true) { if (!prepareBlock()) { printf("cannot prepare block\n"); - result = -1; + result = bad; break; }; @@ -172,18 +200,33 @@ public: fullTimeStartUs = t; ++i; - printf("%4d, total %7d, avg.result %f, time: %f / %f\n", i, i*itersPerBlock, result, runTimeUs*0.000001, fullTimeUs*0.000001); - - bool done = (blocksCount > 0 && i >= blocksCount) || result <= qmin; - - if (ratio > 0 && (blocksPerSaving <= 0 || i%blocksPerSaving == 0 || done) && !layer->save()) { - printf("saving failed\n"); - result = -1; - break; + if (i == 1) saved = result; + bool good = result < best; + bool done = (blocksCount > 0 && i >= blocksCount) || result.human <= qmin; + bool saving = ratio > 0 && (i >= nextSave || done) && result < saved; + if (good) best = result; + + printf("%4d, total %7d, avg.result %f (%f), best %f (%f), time: %f / %f%s\n", + i, i*itersPerBlock, + result.human, result.train, best.human, best.train, + runTimeUs*0.000001, fullTimeUs*0.000001, + (saving ? ", saving" : "" ) ); + fflush(stdout); + + if (saving) { + if (!layer->save()) { + printf("saving failed\n"); + result = bad; + break; + } + saved = result; + nextSave += bps; } if (done) break; + ratio = ratioCopy; } + ratio = ratioCopy; finish(); diff --git a/projects/neural/trainer.cpp b/projects/neural/trainer.cpp index d8f8424..e14f3e6 100644 --- a/projects/neural/trainer.cpp +++ b/projects/neural/trainer.cpp @@ -16,31 +16,33 @@ bool runTests() { int main() { srand(time(NULL)); - return !runTests(); + //return !runTests(); - #define FILENAME "data/output/weights.bin" // 28x28 + //#define FILENAME "data/output/weights-digit.bin" + #define FILENAME "data/output/weights-digit-conv.bin" printf("create neural network\n"); - Layer l(nullptr, Layout(28, 28)); l.filename = FILENAME "1"; - //(new LayerSimple(l, Layout(256)))->filename = FILENAME "2"; - //(new LayerSimple(l, Layout(64)))->filename = FILENAME "3"; - //(new LayerSimple(l, Layout(128)))->filename = FILENAME "4"; - //(new LayerSimple(l, Layout(64)))->filename = FILENAME "5"; - //(new LayerSimple(l, Layout(128)))->filename = FILENAME "5"; - (new LayerSimple(l, Layout(32)))->filename = FILENAME "6"; - //(new LayerSimple(l, Layout(16)))->filename = FILENAME "7"; - (new LayerSimple(l, Layout(10)))->filename = FILENAME "8"; + //Layer l( nullptr, Layout(28, 28) ); + //(new LayerSimple( l, Layout(256) ))->filename = FILENAME "1"; + //(new LayerSimple( l, Layout(64) ))->filename = FILENAME "2"; + //(new LayerSimple( l, Layout(10) ))->filename = FILENAME "3"; + + Layer l(nullptr, Layout(28, 28)); + (new LayerConvShared(l, Layout(11, 11, 16), Kernel(6, 2, 0)))->filename = FILENAME "1"; + (new LayerSimple(l, Layout(64)))->filename = FILENAME "2"; + (new LayerSimple(l, Layout(10)))->filename = FILENAME "3"; l.sumStat().print(); printf("load training data\n"); TrainerDigit t; - if (!t.loadSymbolMap("data/symbols-data.bin")) return 1; + if (!t.loadSymbolMap("data/symbols-data.bin")) return 1; // 28x28 //printf("try load previously saved network\n"); l.load(); printf("train\n"); - t.configure(l, 0.5, 4, 1000000, 0, 0, 0.0000001).run(); + //t.configure(l, 0.5, 8, 70000, 0, 0, 0.00001).run(); + t.configure(l, 0.1, 8, 70000, 0, 0, 0.00001).run(); return 0; } diff --git a/projects/neural/view-digits.cpp b/projects/neural/view-digits.cpp new file mode 100644 index 0000000..525ff90 --- /dev/null +++ b/projects/neural/view-digits.cpp @@ -0,0 +1,154 @@ + +#include + + +#include "layer.all.inc.cpp" + + +Layer *nl; +Framebuffer fb, fbMin; +Animation fbAnim, fbMinAnim; + +int wasPressed; +double prevX, prevY; + + + +void prepareImage() { + int w, h; + unsigned char *pixels = NULL; + + saveState(); + target(fb); + imageFromViewport(&w, &h, &pixels); + restoreState(); + if (!pixels) return; + + int x0 = w, y0 = h, x1 = 0, y1 = 0; + for(int y = 0; y < h; ++y) { + for(int x = 0; x < w; ++x) { + if (imageGetPixel(w, h, pixels, x, y) != 0x000000ff) { + if (x0 > x) x0 = x; + if (x1 < x) x1 = x; + if (y0 > y) y0 = y; + if (y1 < y) y1 = y; + } + } + } + free(pixels); + pixels = NULL; + + if (x1 < x0 || y1 < y0) return; + + int fw = framebufferGetWidth(fbMin); + int fh = framebufferGetHeight(fbMin); + + double wx = x1 - x0 + 1; + double wy = y1 - y0 + 1; + double s = (fw - 4)/(double)(wx > wy ? wx : wy); + double cx = (x0 + x1)/2.0; + double cy = (y0 + y1)/2.0; + + double xx = fw/2 - s*cx; + double yy = fh/2 - s*cy; + double ww = s*w; + double hh = s*h; + + saveState(); + target(fbMin); + noStroke(); + rectTextured(fbAnim, xx, yy, ww, hh); + imageFromViewport(&w, &h, &pixels); + restoreState(); + + if (!pixels) return; + Neuron *in = nl->neurons; + for(int y = 0; y < h; ++y) + for(int x = 0; x < w; ++x) + (in++)->v = colorGetValue(imageGetPixel(w, h, pixels, x, y)); +} + + +void init() { + background(COLOR_BLACK); + stroke(COLOR_WHITE); + fb = createFramebufferEx(512, 512, NULL, FALSE, FALSE, TRUE); + fbMin = createFramebufferEx(28, 28, NULL, FALSE, FALSE, TRUE); + fbAnim = createAnimationFromFramebuffer(fb); + fbMinAnim = createAnimationFromFramebuffer(fbMin); + + saveState(); + target(fb); + clear(); + target(fbMin); + clear(); + restoreState(); + + #define FILENAME "data/weights-digit.bin" + nl = new Layer( nullptr, Layout(28, 28)); + (new LayerSimple ( *nl, Layout(256) ))->filename = FILENAME "1"; + (new LayerSimple ( *nl, Layout( 64) ))->filename = FILENAME "2"; + (new LayerSimple ( *nl, Layout( 10) ))->filename = FILENAME "3"; + nl->load(); +} + + +void draw() { + saveState(); + + if (mouseDown("left")) { + double x = mouseX(), y = mouseY(); + if (!wasPressed) prevX = x, prevY = y; + + saveState(); + strokeWidth(32); + target(fb); + line(prevX, prevY, x, y); + restoreState(); + + prevX = x, prevY = y; + wasPressed = TRUE; + } else { + wasPressed = FALSE; + } + + if (keyWentDown("space")) { + prepareImage(); + for(Layer *l = nl->next; l; l = l->next) + l->testPass(); + saveState(); + target(fb); + clear(); + restoreState(); + } + + noStroke(); + rectTextured(fbAnim, 0, 0, 512, 512); + + stroke(COLOR_WHITE); + rectTextured(fbMinAnim, 16, 16, 28, 28); + + noFill(); + + Layer &nlb = nl->back(); + textSize(8); + int res = 0; + for(int i = 0; i < 10; ++i) { + if (nlb.neurons[i].v > nlb.neurons[res].v) res = i; + textf(16, 90+8*i, "%d: %lf", i, nlb.neurons[i].v); + } + textSize(16); + textf(16, 60, "%d", res); + + restoreState(); +} + + +int main(int largc, char **largv) { + windowSetVariableFrameRate(); + windowSetInit(&init); + windowSetDraw(&draw); + windowRun(); + return 0; +} +