|
|
8e5348 |
#ifndef LAYER_CONV_SHARED_INC_CPP
|
|
|
8e5348 |
#define LAYER_CONV_SHARED_INC_CPP
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
#include "layer.conv.inc.cpp"
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
template<typename iter=""></typename>
|
|
|
8e5348 |
void iterateTestConvolutionShared(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
|
|
|
8e5348 |
if (!cl) return;
|
|
|
8e5348 |
assert(pl);
|
|
|
8e5348 |
assert(k);
|
|
|
8e5348 |
assert(c_neurons);
|
|
|
8e5348 |
assert(p_neurons);
|
|
|
8e5348 |
assert(weights);
|
|
|
8e5348 |
assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
|
|
|
8e5348 |
assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
|
|
|
8e5348 |
|
|
|
8e5348 |
for(int cy = cl.y0; cy < cl.y1; ++cy)
|
|
|
8e5348 |
for(int cx = cl.x0; cx < cl.x1; ++cx)
|
|
|
8e5348 |
for(int cz = cl.z0; cz < cl.z1; ++cz) {
|
|
|
8e5348 |
int ci = (cy*cl.sx + cx)*cl.sz + cz;
|
|
|
8e5348 |
Neuron &cn = c_neurons[ci];
|
|
|
8e5348 |
typename Iter::AccumType a = {};
|
|
|
8e5348 |
Iter::init(cn, a);
|
|
|
8e5348 |
|
|
|
8e5348 |
for(int ky = 0; ky < k.sy; ++ky)
|
|
|
8e5348 |
for(int kx = 0; kx < k.sx; ++kx)
|
|
|
8e5348 |
for(int pz = pl.z0; pz < pl.z1; ++pz) {
|
|
|
8e5348 |
int wi = (ky*k.sx + kx)*pl.getD() + pz - pl.z0;
|
|
|
8e5348 |
Weight &w = weights[wi];
|
|
|
8e5348 |
|
|
|
8e5348 |
int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx;
|
|
|
8e5348 |
int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky;
|
|
|
8e5348 |
int pi = (py*pl.sx + px)*pl.sz + pz;
|
|
|
8e5348 |
Neuron &pn = p_neurons[pi];
|
|
|
8e5348 |
|
|
|
8e5348 |
Iter::iter(pn, w, a);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
Iter::done(cn, a);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
template<typename iter=""></typename>
|
|
|
8e5348 |
void iterateConvolutionShared(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
|
|
|
8e5348 |
if (!cl) return;
|
|
|
8e5348 |
assert(pl);
|
|
|
8e5348 |
assert(wl);
|
|
|
8e5348 |
assert(k);
|
|
|
8e5348 |
assert(c_neurons);
|
|
|
8e5348 |
assert(p_neurons);
|
|
|
8e5348 |
assert(weights);
|
|
|
8e5348 |
assert(cl.isSubLayoutOf(wl));
|
|
|
8e5348 |
assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
|
|
|
8e5348 |
assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
|
|
|
8e5348 |
|
|
|
8e5348 |
int c_h = cl.getH();
|
|
|
8e5348 |
int c_w = cl.getW();
|
|
|
8e5348 |
int c_d = cl.getD();
|
|
|
8e5348 |
int c_swz = c_w*cl.sz;
|
|
|
8e5348 |
int c_shxz = c_h*cl.sx*cl.sz;
|
|
|
8e5348 |
int c_dx = cl.sz - c_d;
|
|
|
8e5348 |
int c_dy = (cl.sx - c_w)*cl.sz;
|
|
|
8e5348 |
|
|
|
8e5348 |
int p_d = pl.getD();
|
|
|
8e5348 |
int p_dx = k.dx*pl.sz;
|
|
|
8e5348 |
int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx;
|
|
|
8e5348 |
|
|
|
8e5348 |
int k_sxd = k.sx*p_d;
|
|
|
8e5348 |
int p_ddy = (pl.sx - k.sx)*pl.sz;
|
|
|
8e5348 |
int p_ddx = pl.sz - p_d;
|
|
|
8e5348 |
|
|
|
8e5348 |
Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
|
|
|
8e5348 |
Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0;
|
|
|
8e5348 |
Weight *ew = weights + k.sy*k_sxd;
|
|
|
8e5348 |
|
|
|
8e5348 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)
|
|
|
8e5348 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx)
|
|
|
8e5348 |
for(Neuron *e = icn + c_d; icn < e; ++icn) {
|
|
|
8e5348 |
typename Iter::AccumType a;
|
|
|
8e5348 |
Iter::init(*icn, a);
|
|
|
8e5348 |
|
|
|
8e5348 |
Neuron *iipn = ipn;
|
|
|
8e5348 |
for(Weight *iw = weights; iw < ew; iipn += p_ddy)
|
|
|
8e5348 |
for(Weight *e = iw + k_sxd; iw < e; iipn += p_ddx)
|
|
|
8e5348 |
for(Weight *e = iw + p_d; iw < e; ++iw, ++iipn)
|
|
|
8e5348 |
Iter::iter(*iipn, *iw, a);
|
|
|
8e5348 |
|
|
|
8e5348 |
Iter::done(*icn, a);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
template<typename iter=""></typename>
|
|
|
8e5348 |
void iterateConvolutionSharedPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
|
|
|
8e5348 |
if (!cl) return;
|
|
|
8e5348 |
assert(pl);
|
|
|
8e5348 |
assert(wl);
|
|
|
8e5348 |
assert(k);
|
|
|
8e5348 |
assert(c_neurons);
|
|
|
8e5348 |
assert(p_neurons);
|
|
|
8e5348 |
assert(weights);
|
|
|
8e5348 |
assert(cl.isSubLayoutOf(wl));
|
|
|
8e5348 |
assert(kx >= 0 && kx < k.sx);
|
|
|
8e5348 |
assert(ky >= 0 && ky < k.sy);
|
|
|
8e5348 |
assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
|
|
|
8e5348 |
assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
|
|
|
8e5348 |
|
|
|
8e5348 |
int c_h = cl.getH();
|
|
|
8e5348 |
int c_w = cl.getW();
|
|
|
8e5348 |
int c_d = cl.getD();
|
|
|
8e5348 |
int c_swz = c_w*cl.sz;
|
|
|
8e5348 |
int c_shxz = c_h*cl.sx*cl.sz;
|
|
|
8e5348 |
int c_dx = cl.sz - c_d;
|
|
|
8e5348 |
int c_dy = (cl.sx - c_w)*cl.sz;
|
|
|
8e5348 |
|
|
|
8e5348 |
int p_d = pl.getD();
|
|
|
8e5348 |
int p_dx = k.dx*pl.sz;
|
|
|
8e5348 |
int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx;
|
|
|
8e5348 |
|
|
|
8e5348 |
Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
|
|
|
8e5348 |
Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy + ky)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox + kx)*pl.sz + pl.z0;
|
|
|
8e5348 |
weights += (ky*k.sx + kx)*p_d;
|
|
|
8e5348 |
Weight *ew = weights + p_d;
|
|
|
8e5348 |
|
|
|
8e5348 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)
|
|
|
8e5348 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx)
|
|
|
8e5348 |
for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d)
|
|
|
8e5348 |
for(Weight *iw = weights; iw < ew; ++ipn, ++iw)
|
|
|
8e5348 |
Iter::iter2(*icn, *ipn, *iw);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
class LayerConvSharedBase: public Layer {
|
|
|
8e5348 |
public:
|
|
|
8e5348 |
std::vector<weight> mtWeights;</weight>
|
|
|
8e5348 |
|
|
|
8e5348 |
using Layer::Layer;
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void split(int threadsCount) override {
|
|
|
8e5348 |
Layer::split(threadsCount);
|
|
|
8e5348 |
Weight w = {};
|
|
|
8e5348 |
mtWeights.clear();
|
|
|
8e5348 |
mtWeights.resize(threadsCount*weightsCount, w);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
inline void sumWeights(int tid, int threads) {
|
|
|
8e5348 |
int wc = weightsCount;
|
|
|
8e5348 |
Weight *iw = weights + tid;
|
|
|
8e5348 |
Weight *ia = mtWeights.data() + tid;
|
|
|
8e5348 |
Weight *ea = mtWeights.data() + threads*wc;
|
|
|
8e5348 |
for(Weight *ew = weights + wc; iw < ew; iw += threads, ia += threads) {
|
|
|
8e5348 |
WeightReal w = iw->w;
|
|
|
8e5348 |
for(Weight *iia = ia; iia < ea; iia += wc)
|
|
|
8e5348 |
w += iia->w, iia->w = 0;
|
|
|
8e5348 |
iw->w = w;
|
|
|
8e5348 |
}
|
|
|
8e5348 |
}
|
|
|
8e5348 |
};
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
template<func func=""></func>
|
|
|
8e5348 |
class LayerConvShared: public LayerConvSharedBase {
|
|
|
8e5348 |
public:
|
|
|
8e5348 |
Kernel kernel;
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
LayerConvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
|
|
|
8e5348 |
LayerConvSharedBase(&prev, layout, kernel.sx*kernel.sy*prev.back().layout.getD(), weights),
|
|
|
8e5348 |
kernel(kernel)
|
|
|
8e5348 |
{
|
|
|
8e5348 |
assert(kernel);
|
|
|
8e5348 |
stat.links = weightsCount*neuronsCount;
|
|
|
8e5348 |
if (ownWeights) fillWeights(-1, 1);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void pass(Barrier &barrier) override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron&, AccumType &a) { a.v = 0; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
|
|
|
8e5348 |
static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void backpassWeights(Barrier &barrier) override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, &mtWeights[barrier.tid * weightsCount]);
|
|
|
8e5348 |
barrier.wait();
|
|
|
8e5348 |
sumWeights(barrier.tid, barrier.threads);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void backpassDeltas(Barrier &barrier) override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.d * w.w; }
|
|
|
8e5348 |
static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
int ksx = kernel.sx, ksy = kernel.sy;
|
|
|
8e5348 |
for(int kx = 0; kx < ksx; ++kx)
|
|
|
8e5348 |
for(int ky = 0; ky < ksy; ++ky) {
|
|
|
8e5348 |
iterateConvolutionSharedPoint(mtLayouts[barrier.tid], prev->layout, layout, kernel, kx, ky, neurons, prev->neurons, weights);
|
|
|
8e5348 |
barrier.wait();
|
|
|
8e5348 |
}
|
|
|
8e5348 |
iterateNeurons(mtPrevLayouts[barrier.tid], prev->neurons);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void testPass() override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron&, AccumType &a) { a.v = 0; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
|
|
|
8e5348 |
static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void testBackpass() override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }
|
|
|
8e5348 |
static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
struct IW: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += a.v * n.v; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
clearAccum();
|
|
|
8e5348 |
iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights);
|
|
|
8e5348 |
iterateTestConvolutionShared<iw>(layout, prev->layout, kernel, neurons, prev->neurons, weights);</iw>
|
|
|
8e5348 |
iterateNeurons(prev->layout, prev->neurons);
|
|
|
8e5348 |
clearAccum();
|
|
|
8e5348 |
}
|
|
|
8e5348 |
};
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
template<func func=""></func>
|
|
|
8e5348 |
class LayerDeconvShared: public LayerConvSharedBase {
|
|
|
8e5348 |
public:
|
|
|
8e5348 |
Kernel kernel;
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
LayerDeconvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
|
|
|
8e5348 |
LayerConvSharedBase(&prev, layout, kernel.sx*kernel.sy*layout.getD(), weights),
|
|
|
8e5348 |
kernel(kernel)
|
|
|
8e5348 |
{
|
|
|
8e5348 |
assert(kernel);
|
|
|
8e5348 |
stat.links = weightsCount*neuronsCount;
|
|
|
8e5348 |
if (ownWeights) fillWeights(-1, 1);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void pass(Barrier &barrier) override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.v * w.w; }
|
|
|
8e5348 |
static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
int k_sx = kernel.sx, k_sy = kernel.sy;
|
|
|
8e5348 |
for(int kx = 0; kx < k_sx; ++kx)
|
|
|
8e5348 |
for(int ky = 0; ky < k_sy; ++ky) {
|
|
|
8e5348 |
iterateConvolutionSharedPoint(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, kx, ky, prev->neurons, neurons, weights);
|
|
|
8e5348 |
barrier.wait();
|
|
|
8e5348 |
}
|
|
|
8e5348 |
iterateNeurons(mtLayouts[barrier.tid], neurons);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void backpassWeights(Barrier &barrier) override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, &mtWeights[barrier.tid * weightsCount]);
|
|
|
8e5348 |
barrier.wait();
|
|
|
8e5348 |
sumWeights(barrier.tid, barrier.threads);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void backpassDeltas(Barrier &barrier) override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron&, AccumType &a) { a.v = 0; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }
|
|
|
8e5348 |
static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights);
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void testPass() override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }
|
|
|
8e5348 |
static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
clearAccum();
|
|
|
8e5348 |
iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights);
|
|
|
8e5348 |
iterateNeurons(layout, neurons);
|
|
|
8e5348 |
clearAccum();
|
|
|
8e5348 |
}
|
|
|
8e5348 |
|
|
|
8e5348 |
|
|
|
8e5348 |
void testBackpass() override {
|
|
|
8e5348 |
struct I: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = 0; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }
|
|
|
8e5348 |
static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
struct IW: public Iter {
|
|
|
8e5348 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
|
|
|
8e5348 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }
|
|
|
8e5348 |
};
|
|
|
8e5348 |
iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights);
|
|
|
8e5348 |
iterateTestConvolutionShared<iw>(prev->layout, layout, kernel, prev->neurons, neurons, weights);</iw>
|
|
|
8e5348 |
}
|
|
|
8e5348 |
};
|
|
|
8e5348 |
|
|
|
8e5348 |
#endif
|