Tree - bw/helianthuslab - Cool Bug Repo

bw / helianthuslab

Blame projects/neural/layer.conv.shared.inc.cpp

Blob Raw

		8e5348	`#ifndef LAYER_CONV_SHARED_INC_CPP`
		8e5348	`#define LAYER_CONV_SHARED_INC_CPP`
		8e5348
		8e5348
		8e5348
		8e5348	`#include "layer.conv.inc.cpp"`
		8e5348
		8e5348
		8e5348
		8e5348	`template<typename iter=""></typename>`
		8e5348	`void iterateTestConvolutionShared(Layout cl, Layout pl, Kernel k, Neuron c_neurons, Neuron p_neurons, Weight *weights) {`
		8e5348	`if (!cl) return;`
		8e5348	`assert(pl);`
		8e5348	`assert(k);`
		8e5348	`assert(c_neurons);`
		8e5348	`assert(p_neurons);`
		8e5348	`assert(weights);`
		8e5348	`assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);`
		8e5348	`assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);`
		8e5348
		8e5348	`for(int cy = cl.y0; cy < cl.y1; ++cy)`
		8e5348	`for(int cx = cl.x0; cx < cl.x1; ++cx)`
		8e5348	`for(int cz = cl.z0; cz < cl.z1; ++cz) {`
		8e5348	`int ci = (cycl.sx + cx)cl.sz + cz;`
		8e5348	`Neuron &cn = c_neurons[ci];`
		8e5348	`typename Iter::AccumType a = {};`
		8e5348	`Iter::init(cn, a);`
		8e5348
		8e5348	`for(int ky = 0; ky < k.sy; ++ky)`
		8e5348	`for(int kx = 0; kx < k.sx; ++kx)`
		8e5348	`for(int pz = pl.z0; pz < pl.z1; ++pz) {`
		b579b3	`int wi = (((cz - cl.z0)k.sy + ky)k.sx + kx)*pl.getD() + pz - pl.z0;`
		8e5348	`Weight &w = weights[wi];`
		8e5348
		8e5348	`int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx;`
		8e5348	`int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky;`
		8e5348	`int pi = (pypl.sx + px)pl.sz + pz;`
		8e5348	`Neuron &pn = p_neurons[pi];`
		8e5348
		8e5348	`Iter::iter(pn, w, a);`
		8e5348	`}`
		8e5348
		8e5348	`Iter::done(cn, a);`
		8e5348	`}`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`template<typename iter=""></typename>`
		b579b3	`void iterateConvolutionShared(Layout cl, Layout pl, Layout wl, Kernel k, Neuron c_neurons, Neuron p_neurons, Weight *weights) {`
		8e5348	`if (!cl) return;`
		8e5348	`assert(pl);`
		8e5348	`assert(wl);`
		8e5348	`assert(k);`
		8e5348	`assert(c_neurons);`
		8e5348	`assert(p_neurons);`
		8e5348	`assert(weights);`
		8e5348	`assert(cl.isSubLayoutOf(wl));`
		8e5348	`assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);`
		8e5348	`assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);`
		8e5348
		8e5348	`int c_h = cl.getH();`
		8e5348	`int c_w = cl.getW();`
		8e5348	`int c_d = cl.getD();`
		8e5348	`int c_swz = c_w*cl.sz;`
		8e5348	`int c_shxz = c_hcl.sxcl.sz;`
		8e5348	`int c_dx = cl.sz - c_d;`
		8e5348	`int c_dy = (cl.sx - c_w)*cl.sz;`
		8e5348
		8e5348	`int p_d = pl.getD();`
		8e5348	`int p_dx = k.dx*pl.sz;`
		8e5348	`int p_dy = k.dypl.sxpl.sz - c_w*p_dx;`
		8e5348
		8e5348	`int k_sxd = k.sx*p_d;`
		b579b3	`int k_syxd = k.sy*k_sxd;`
		8e5348	`int p_ddy = (pl.sx - k.sx)*pl.sz;`
		8e5348	`int p_ddx = pl.sz - p_d;`
		8e5348
		8e5348	`Neuron icn = c_neurons + (cl.y0cl.sx + cl.x0)*cl.sz + cl.z0;`
		8e5348	`Neuron ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)k.dy + k.oy)pl.sx + pl.x0 + (cl.x0 - wl.x0)k.dx + k.ox)*pl.sz + pl.z0;`
		15c502
		b579b3	`weights += (cl.z0 - wl.z0)*k_syxd;`
		b579b3	`Weight *iw = weights;`
		15c502
		15c502	`for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)`
		b579b3	`for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, iw = weights)`
		15c502	`for(Neuron *e = icn + c_d; icn < e; ++icn) {`
		15c502	`typename Iter::AccumType a;`
		15c502	`Iter::init(*icn, a);`
		15c502
		15c502	`Neuron *iipn = ipn;`
		b579b3	`for(Weight *e = iw + k_syxd; iw < e; iipn += p_ddy)`
		b579b3	`for(Weight *e = iw + k_sxd; iw < e; iipn += p_ddx)`
		b579b3	`for(Weight *e = iw + p_d; iw < e; ++iw, ++iipn)`
		15c502	`Iter::iter(iipn, iw, a);`
		15c502
		15c502	`Iter::done(*icn, a);`
		15c502	`}`
		15c502	`}`
		15c502
		15c502
		8e5348	`template<typename iter=""></typename>`
		8e5348	`void iterateConvolutionSharedPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron c_neurons, Neuron p_neurons, Weight *weights) {`
		8e5348	`if (!cl) return;`
		8e5348	`assert(pl);`
		8e5348	`assert(wl);`
		8e5348	`assert(k);`
		8e5348	`assert(c_neurons);`
		8e5348	`assert(p_neurons);`
		8e5348	`assert(weights);`
		8e5348	`assert(cl.isSubLayoutOf(wl));`
		8e5348	`assert(kx >= 0 && kx < k.sx);`
		8e5348	`assert(ky >= 0 && ky < k.sy);`
		8e5348	`assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);`
		8e5348	`assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);`
		8e5348
		8e5348	`int c_h = cl.getH();`
		8e5348	`int c_w = cl.getW();`
		8e5348	`int c_d = cl.getD();`
		8e5348	`int c_swz = c_w*cl.sz;`
		8e5348	`int c_shxz = c_hcl.sxcl.sz;`
		8e5348	`int c_dx = cl.sz - c_d;`
		8e5348	`int c_dy = (cl.sx - c_w)*cl.sz;`
		8e5348
		8e5348	`int p_d = pl.getD();`
		8e5348	`int p_dx = k.dx*pl.sz;`
		8e5348	`int p_dy = k.dypl.sxpl.sz - c_w*p_dx;`
		b579b3
		b579b3	`int w_dz = (k.syk.sx - 1)p_d;`
		8e5348
		8e5348	`Neuron icn = c_neurons + (cl.y0cl.sx + cl.x0)*cl.sz + cl.z0;`
		8e5348	`Neuron ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)k.dy + k.oy + ky)pl.sx + pl.x0 + (cl.x0 - wl.x0)k.dx + k.ox + kx)*pl.sz + pl.z0;`
		b579b3	`weights += (((cl.z0 - wl.z0)k.sy + ky)k.sx + kx)*p_d;`
		b579b3	`Weight *iw = weights;`
		8e5348
		8e5348	`for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)`
		b579b3	`for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, iw = weights)`
		b579b3	`for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d, iw += w_dz)`
		b579b3	`for(Weight *e = iw + p_d; iw < e; ++ipn, ++iw)`
		8e5348	`Iter::iter2(icn, ipn, *iw);`
		8e5348	`}`
		8e5348
		8e5348
		b579b3	`void fillConvolutionWeights(int kx, int ky, int kz, int count, Weight *weights) {`
		b579b3	`double kr = 1.5;`
		b579b3	`double sum = 0;`
		b579b3
		b579b3	`Weight *iw = weights;`
		b579b3	`for(int i = 0; i < count; ++i)`
		b579b3	`for(int y = 0; y < ky; ++y)`
		b579b3	`for(int x = 0; x < kx; ++x)`
		b579b3	`for(int z = 0; z < kz; ++z, ++iw) {`
		b579b3	`double dx = (2.0x/(kx-1) - 1)kr;`
		b579b3	`double dy = (2.0y/(ky-1) - 1)kr;`
		b579b3	`double e = exp( -dxdx - dydy );`
		b579b3	`sum += e;`
		b579b3	`iw->w = (WeightReal)( (rand()/(double)RAND_MAX2 - 1)e );`
		b579b3	`//iw->w = (WeightReal)( rand()/(double)RAND_MAX*e );`
		b579b3	`}`
		b579b3
		b579b3	`WeightReal k = (WeightReal)(10*kz/sum);`
		b579b3	`Weight *ew = iw;`
		b579b3	`for(iw = weights; iw < ew; ++iw) iw->w *= k;`
		b579b3	`}`
		b579b3
		8e5348
		8e5348
		8e5348	`class LayerConvSharedBase: public Layer {`
		8e5348	`public:`
		b579b3	`Kernel kernel;`
		8e5348	`std::vector<weight> mtWeights;</weight>`
		8e5348
		8e5348
		b579b3	`LayerConvSharedBase(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):`
		b579b3	`Layer(&prev, layout, kernel.sx * kernel.sy * layout.getD() * prev.back().layout.getD(), weights),`
		b579b3	`kernel(kernel)`
		b579b3	`{`
		b579b3	`assert(kernel);`
		b579b3	`stat.links = weightsCount * layout.getW() * layout.getH();`
		b579b3	`if (ownWeights) fillWeights(-1, 1);`
		b579b3	`}`
		b579b3
		b579b3
		8e5348	`void split(int threadsCount) override {`
		8e5348	`Layer::split(threadsCount);`
		8e5348	`Weight w = {};`
		8e5348	`mtWeights.clear();`
		8e5348	`mtWeights.resize(threadsCount*weightsCount, w);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`inline void sumWeights(int tid, int threads) {`
		8e5348	`int wc = weightsCount;`
		8e5348	`Weight *iw = weights + tid;`
		8e5348	`Weight *ia = mtWeights.data() + tid;`
		8e5348	`Weight ea = mtWeights.data() + threadswc;`
		8e5348	`for(Weight *ew = weights + wc; iw < ew; iw += threads, ia += threads) {`
		8e5348	`WeightReal w = iw->w;`
		8e5348	`for(Weight *iia = ia; iia < ea; iia += wc)`
		8e5348	`w += iia->w, iia->w = 0;`
		8e5348	`iw->w = w;`
		8e5348	`}`
		8e5348	`}`
		8e5348	`};`
		8e5348
		8e5348
		8e5348
		8e5348	`template<func func=""></func>`
		8e5348	`class LayerConvShared: public LayerConvSharedBase {`
		8e5348	`public:`
		8e5348	`LayerConvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):`
		b579b3	`LayerConvSharedBase(prev, layout, kernel, weights)`
		8e5348	`{`
		b579b3	`stat.links = weightsCount * layout.getW() * layout.getH();`
		b579b3	`if (ownWeights) fillConvolutionWeights(kernel.sx, kernel.sy, this->prev->layout.getD(), layout.getD(), this->weights);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void pass(Barrier &barrier) override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron&, AccumType &a) { a.v = 0; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }`
		8e5348	`static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }`
		8e5348	`};`
		8e5348	`iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void backpassWeights(Barrier &barrier) override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; }`
		8e5348	`};`
		8e5348	`iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, &mtWeights[barrier.tid * weightsCount]);`
		8e5348	`barrier.wait();`
		8e5348	`sumWeights(barrier.tid, barrier.threads);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void backpassDeltas(Barrier &barrier) override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.d * w.w; }`
		8e5348	`static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }`
		8e5348	`};`
		8e5348	`int ksx = kernel.sx, ksy = kernel.sy;`
		8e5348	`for(int kx = 0; kx < ksx; ++kx)`
		8e5348	`for(int ky = 0; ky < ksy; ++ky) {`
		8e5348	`iterateConvolutionSharedPoint(mtLayouts[barrier.tid], prev->layout, layout, kernel, kx, ky, neurons, prev->neurons, weights);`
		8e5348	`barrier.wait();`
		8e5348	`}`
		8e5348	`iterateNeurons(mtPrevLayouts[barrier.tid], prev->neurons);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void testPass() override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron&, AccumType &a) { a.v = 0; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }`
		8e5348	`static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }`
		8e5348	`};`
		8e5348	`iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void testBackpass() override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }`
		8e5348	`static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }`
		8e5348	`};`
		8e5348	`struct IW: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += a.v * n.v; }`
		8e5348	`};`
		8e5348	`clearAccum();`
		8e5348	`iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights);`
		8e5348	`iterateTestConvolutionShared<iw>(layout, prev->layout, kernel, neurons, prev->neurons, weights);</iw>`
		8e5348	`iterateNeurons(prev->layout, prev->neurons);`
		8e5348	`clearAccum();`
		8e5348	`}`
		b579b3
		b579b3
		b579b3	`bool saveDemo() override`
		b579b3	`{ return !filename \|\| saveConvDemoImage( filename, layout.getD(), kernel.sx, kernel.sy, prev->layout.getD(), weights ); }`
		8e5348	`};`
		8e5348
		8e5348
		8e5348
		8e5348	`template<func func=""></func>`
		8e5348	`class LayerDeconvShared: public LayerConvSharedBase {`
		8e5348	`public:`
		8e5348	`LayerDeconvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):`
		b579b3	`LayerConvSharedBase(prev, layout, kernel, weights)`
		8e5348	`{`
		b579b3	`stat.links = weightsCount * this->prev->layout.getW() * this->prev->layout.getH();`
		b579b3	`if (ownWeights) fillConvolutionWeights(kernel.sx, kernel.sy, layout.getD(), this->prev->layout.getD(), this->weights);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void pass(Barrier &barrier) override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.v * w.w; }`
		8e5348	`static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }`
		8e5348	`};`
		8e5348	`int k_sx = kernel.sx, k_sy = kernel.sy;`
		8e5348	`for(int kx = 0; kx < k_sx; ++kx)`
		8e5348	`for(int ky = 0; ky < k_sy; ++ky) {`
		8e5348	`iterateConvolutionSharedPoint(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, kx, ky, prev->neurons, neurons, weights);`
		8e5348	`barrier.wait();`
		8e5348	`}`
		8e5348	`iterateNeurons(mtLayouts[barrier.tid], neurons);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void backpassWeights(Barrier &barrier) override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }`
		8e5348	`};`
		8e5348	`iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, &mtWeights[barrier.tid * weightsCount]);`
		8e5348	`barrier.wait();`
		8e5348	`sumWeights(barrier.tid, barrier.threads);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void backpassDeltas(Barrier &barrier) override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron&, AccumType &a) { a.v = 0; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }`
		8e5348	`static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }`
		8e5348	`};`
		8e5348	`iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights);`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void testPass() override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }`
		8e5348	`static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }`
		8e5348	`};`
		8e5348	`clearAccum();`
		8e5348	`iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights);`
		8e5348	`iterateNeurons(layout, neurons);`
		8e5348	`clearAccum();`
		8e5348	`}`
		8e5348
		8e5348
		8e5348	`void testBackpass() override {`
		8e5348	`struct I: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = 0; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }`
		8e5348	`static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }`
		8e5348	`};`
		8e5348	`struct IW: public Iter {`
		8e5348	`static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }`
		8e5348	`static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }`
		8e5348	`};`
		8e5348	`iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights);`
		8e5348	`iterateTestConvolutionShared<iw>(prev->layout, layout, kernel, prev->neurons, neurons, weights);</iw>`
		8e5348	`}`
		b579b3
		b579b3
		b579b3	`bool saveDemo() override`
		b579b3	`{ return !filename \|\| saveConvDemoImage( filename, prev->layout.getD(), kernel.sx, kernel.sy, layout.getD(), weights ); }`
		8e5348	`};`
		8e5348
		8e5348	`#endif`

bw / helianthuslab

Source Code

Blame projects/neural/layer.conv.shared.inc.cpp