|
|
e865c9 |
#ifndef LAYER_SIMPLE_INC_CPP
|
|
|
e865c9 |
#define LAYER_SIMPLE_INC_CPP
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
#include "layer.inc.cpp"
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
typedef void Func(Neuron &n, AccumReal s);
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
inline void funcSigmoidExp(Neuron &n, AccumReal s) {
|
|
|
e865c9 |
//if (s > 5) s = 5; else if (s < -5) s = -5;
|
|
|
e865c9 |
AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = ss * (1-ss);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
template<typename iter=""></typename>
|
|
|
e865c9 |
inline void iterateNeurons(const Layout &l, Neuron *neurons) {
|
|
|
e865c9 |
if (!l) return;
|
|
|
e865c9 |
assert(neurons);
|
|
|
e865c9 |
|
|
|
e865c9 |
int h = l.y1 - l.y0;
|
|
|
e865c9 |
int w = l.x1 - l.x0;
|
|
|
e865c9 |
int d = l.z1 - l.z0;
|
|
|
e865c9 |
int sz = l.sz;
|
|
|
e865c9 |
int sxz = l.sx*sz;
|
|
|
e865c9 |
int swz = w*sz;
|
|
|
e865c9 |
int shxz = h*sxz;
|
|
|
e865c9 |
int dy = sxz - swz;
|
|
|
e865c9 |
int dx = sz - d;
|
|
|
e865c9 |
|
|
|
e865c9 |
Neuron *in = neurons + l.y0*sxz + l.x0*sz + l.z0;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(Neuron *e = in + shxz; in < e; in += dy)
|
|
|
e865c9 |
for(Neuron *e = in + swz; in < e; in += dx)
|
|
|
e865c9 |
for(Neuron *e = in + d; in < e; ++in)
|
|
|
e865c9 |
Iter::iter3(*in);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
template<typename iter=""></typename>
|
|
|
e865c9 |
inline void iterateNeurons2(Layout l, Layout dl, Neuron *neurons, typename Iter::DataType data, int stride = 1, typename Iter::DataAccumType *accum = nullptr) {
|
|
|
e865c9 |
if (!l) return;
|
|
|
e865c9 |
assert(dl);
|
|
|
e865c9 |
assert(neurons);
|
|
|
e865c9 |
assert(l.isSubLayoutOf(dl));
|
|
|
e865c9 |
|
|
|
e865c9 |
int h = l.getH();
|
|
|
e865c9 |
int w = l.getW();
|
|
|
e865c9 |
int d = l.getD();
|
|
|
e865c9 |
int sxz = l.sx*l.sz;
|
|
|
e865c9 |
int swz = w*l.sz;
|
|
|
e865c9 |
int shxz = h*sxz;
|
|
|
e865c9 |
int dy = sxz - swz;
|
|
|
e865c9 |
int dx = l.sz - d;
|
|
|
e865c9 |
|
|
|
e865c9 |
int d_w = dl.getW();
|
|
|
e865c9 |
int d_d = dl.getD();
|
|
|
e865c9 |
int d_dx = (d_d - d)*stride;
|
|
|
e865c9 |
int d_dy = (d_w - w)*d_d*stride;
|
|
|
e865c9 |
|
|
|
e865c9 |
Neuron *in = neurons + l.y0*sxz + l.x0*l.sz + l.z0;
|
|
|
e865c9 |
data += (((l.y0 - dl.y0)*d_w + l.x0 - dl.x0)*d_d + l.z0 - dl.z0)*stride;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(Neuron *e = in + shxz; in < e; in += dy, data += d_dy)
|
|
|
e865c9 |
for(Neuron *e = in + swz; in < e; in += dx, data += d_dx)
|
|
|
e865c9 |
for(Neuron *e = in + d; in < e; ++in, data += stride)
|
|
|
e865c9 |
Iter::iter4(*in, data, *accum);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
template<typename iter=""></typename>
|
|
|
e865c9 |
inline void iterateSimple(Layout cl, Layout pl, Layout wl, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
|
|
|
e865c9 |
if (!cl) return;
|
|
|
e865c9 |
assert(pl);
|
|
|
e865c9 |
assert(wl);
|
|
|
e865c9 |
assert(c_neurons);
|
|
|
e865c9 |
assert(p_neurons);
|
|
|
e865c9 |
assert(weights);
|
|
|
e865c9 |
assert(cl.isSubLayoutOf(wl));
|
|
|
e865c9 |
|
|
|
e865c9 |
int c_h = cl.getH();
|
|
|
e865c9 |
int c_w = cl.getW();
|
|
|
e865c9 |
int c_d = cl.getD();
|
|
|
e865c9 |
int c_sxz = cl.sx*cl.sz;
|
|
|
e865c9 |
int c_swz = c_w*cl.sz;
|
|
|
e865c9 |
int c_shxz = c_h*c_sxz;
|
|
|
e865c9 |
int c_dy = c_sxz - c_swz;
|
|
|
e865c9 |
int c_dx = cl.sz - c_d;
|
|
|
e865c9 |
|
|
|
e865c9 |
int p_h = pl.getH();
|
|
|
e865c9 |
int p_w = pl.getW();
|
|
|
e865c9 |
int p_d = pl.getD();
|
|
|
e865c9 |
int p_sxz = pl.sx*pl.sz;
|
|
|
e865c9 |
int p_swz = p_w*pl.sz;
|
|
|
e865c9 |
int p_shxz = p_h*p_sxz;
|
|
|
e865c9 |
int p_dy = p_sxz - p_swz;
|
|
|
e865c9 |
int p_dx = pl.sz - p_d;
|
|
|
e865c9 |
|
|
|
e865c9 |
int w_w = wl.getW();
|
|
|
e865c9 |
int w_d = wl.getD();
|
|
|
e865c9 |
int w_dz = p_h*p_w*p_d;
|
|
|
e865c9 |
int w_dx = (w_d - c_d)*w_dz;
|
|
|
e865c9 |
int w_dy = (w_w - c_w)*w_d*w_dz;
|
|
|
e865c9 |
|
|
|
e865c9 |
Neuron *icn = c_neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0);
|
|
|
e865c9 |
p_neurons += pl.y0*p_sxz + pl.x0*pl.sz + pl.z0;
|
|
|
e865c9 |
|
|
|
e865c9 |
Weight *iw = weights + (((cl.y0 - wl.y0)*w_w + cl.x0 - wl.x0)*w_d + cl.z0 - wl.z0)*w_dz;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, iw += w_dy)
|
|
|
e865c9 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, iw += w_dx)
|
|
|
e865c9 |
for(Neuron *e = icn + c_d; icn < e; ++icn) {
|
|
|
e865c9 |
typename Iter::AccumType a;
|
|
|
e865c9 |
Iter::init(*icn, a);
|
|
|
e865c9 |
|
|
|
e865c9 |
Neuron *ipn = p_neurons;
|
|
|
e865c9 |
for(Neuron *e = ipn + p_shxz; ipn < e; ipn += p_dy)
|
|
|
e865c9 |
for(Neuron *e = ipn + p_swz; ipn < e; ipn += p_dx)
|
|
|
e865c9 |
for(Neuron *e = ipn + p_d; ipn < e; ++ipn, ++iw)
|
|
|
e865c9 |
Iter::iter(*ipn, *iw, a);
|
|
|
e865c9 |
|
|
|
e865c9 |
Iter::done(*icn, a);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
template<typename iter=""></typename>
|
|
|
e865c9 |
void iterateSimpleInv(Layout cl, Layout pl, Layout wl, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
|
|
|
e865c9 |
if (!cl) return;
|
|
|
e865c9 |
assert(pl);
|
|
|
e865c9 |
assert(wl);
|
|
|
e865c9 |
assert(c_neurons);
|
|
|
e865c9 |
assert(p_neurons);
|
|
|
e865c9 |
assert(weights);
|
|
|
e865c9 |
assert(cl.isSubLayoutOf(wl));
|
|
|
e865c9 |
|
|
|
e865c9 |
int c_h = cl.getH();
|
|
|
e865c9 |
int c_w = cl.getW();
|
|
|
e865c9 |
int c_d = cl.getD();
|
|
|
e865c9 |
int c_sxz = cl.sx*cl.sz;
|
|
|
e865c9 |
int c_swz = c_w*cl.sz;
|
|
|
e865c9 |
int c_shxz = c_h*c_sxz;
|
|
|
e865c9 |
int c_dy = c_sxz - c_swz;
|
|
|
e865c9 |
int c_dx = cl.sz - c_d;
|
|
|
e865c9 |
|
|
|
e865c9 |
int p_h = pl.getH();
|
|
|
e865c9 |
int p_w = pl.getW();
|
|
|
e865c9 |
int p_d = pl.getD();
|
|
|
e865c9 |
int p_sxz = pl.sx*pl.sz;
|
|
|
e865c9 |
int p_swz = p_w*pl.sz;
|
|
|
e865c9 |
int p_shxz = p_h*p_sxz;
|
|
|
e865c9 |
int p_dy = p_sxz - p_swz;
|
|
|
e865c9 |
int p_dx = pl.sz - p_d;
|
|
|
e865c9 |
|
|
|
e865c9 |
int w_w = wl.getW();
|
|
|
e865c9 |
int w_h = wl.getH();
|
|
|
e865c9 |
int w_d = wl.getD();
|
|
|
e865c9 |
int w_ddz = w_h*w_w*w_d;
|
|
|
e865c9 |
int w_dx = w_d - c_d;
|
|
|
e865c9 |
int w_dy = (w_w - c_w)*w_d;
|
|
|
e865c9 |
|
|
|
e865c9 |
Neuron *icn = c_neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0);
|
|
|
e865c9 |
p_neurons += pl.y0*p_sxz + pl.x0*pl.sz + pl.z0;
|
|
|
e865c9 |
|
|
|
e865c9 |
Weight *iw = weights + ((cl.y0 - wl.y0)*w_w + cl.x0 - wl.x0)*w_d + cl.z0 - wl.z0;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, iw += w_dy)
|
|
|
e865c9 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, iw += w_dx)
|
|
|
e865c9 |
for(Neuron *e = icn + c_d; icn < e; ++icn, ++iw) {
|
|
|
e865c9 |
typename Iter::AccumType a;
|
|
|
e865c9 |
Iter::init(*icn, a);
|
|
|
e865c9 |
|
|
|
e865c9 |
Weight *iiw = iw;
|
|
|
e865c9 |
Neuron *ipn = p_neurons;
|
|
|
e865c9 |
for(Neuron *e = ipn + p_shxz; ipn < e; ipn += p_dy)
|
|
|
e865c9 |
for(Neuron *e = ipn + p_swz; ipn < e; ipn += p_dx)
|
|
|
e865c9 |
for(Neuron *e = ipn + p_d; ipn < e; ++ipn, iiw += w_ddz)
|
|
|
e865c9 |
Iter::iter(*ipn, *iiw, a);
|
|
|
e865c9 |
|
|
|
e865c9 |
Iter::done(*icn, a);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
Layout optimizeLayoutSimple(const Layout &layout) {
|
|
|
e865c9 |
Layout l = layout;
|
|
|
e865c9 |
if (l.x0 == 0 && l.x1 == l.sx)
|
|
|
e865c9 |
{ l.x0 = l.y0*l.sx; l.x1 *= l.y1; l.sx *= l.sy; l.y0 = 0; l.y1 = l.sy = 1; }
|
|
|
e865c9 |
if (l.z0 == 0 && l.z1 == l.sz)
|
|
|
e865c9 |
{ l.z0 = l.x0*l.sz; l.z1 *= l.x1; l.sz *= l.sx; l.x0 = 0; l.x1 = l.sx = 1; }
|
|
|
e865c9 |
return l;
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
template<func func=""></func>
|
|
|
e865c9 |
class LayerSimple: public Layer {
|
|
|
e865c9 |
public:
|
|
|
e865c9 |
Layout optLayout;
|
|
|
e865c9 |
Layout prevOptLayout;
|
|
|
e865c9 |
Layout::List mtOptLayouts;
|
|
|
e865c9 |
Layout::List mtPrevOptLayouts;
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
LayerSimple(Layer &prev, const Layout &layout, Weight *weights = nullptr):
|
|
|
e865c9 |
Layer(&prev, layout, layout.getActiveCount() * prev.back().layout.getActiveCount(), weights),
|
|
|
e865c9 |
optLayout(optimizeLayoutSimple(layout)),
|
|
|
e865c9 |
prevOptLayout(optimizeLayoutSimple(this->prev->layout))
|
|
|
e865c9 |
{
|
|
|
e865c9 |
if (ownWeights) fillWeights(-1, 1);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
void split(int threadsCount) override {
|
|
|
e865c9 |
Layer::split(threadsCount);
|
|
|
e865c9 |
optLayout.split(mtOptLayouts, threadsCount);
|
|
|
e865c9 |
prevOptLayout.split(mtPrevOptLayouts, threadsCount);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
void pass(Barrier &barrier) override {
|
|
|
e865c9 |
struct I: public Iter {
|
|
|
e865c9 |
static inline void init(Neuron&, AccumType &a) { a.v = 0; }
|
|
|
e865c9 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
|
|
|
e865c9 |
static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
|
|
|
e865c9 |
};
|
|
|
e865c9 |
iterateSimple(mtOptLayouts[barrier.tid], prevOptLayout, optLayout, neurons, prev->neurons, weights);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
void backpassWeights(Barrier &barrier) override {
|
|
|
e865c9 |
struct I: public Iter {
|
|
|
e865c9 |
static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
|
|
|
e865c9 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; }
|
|
|
e865c9 |
};
|
|
|
e865c9 |
iterateSimple(mtOptLayouts[barrier.tid], prevOptLayout, optLayout, neurons, prev->neurons, weights);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
void backpassDeltas(Barrier &barrier) override {
|
|
|
e865c9 |
struct I: public Iter {
|
|
|
e865c9 |
static inline void init(Neuron&, AccumType &a) { a.v = 0; }
|
|
|
e865c9 |
static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }
|
|
|
e865c9 |
static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
|
|
|
e865c9 |
};
|
|
|
e865c9 |
iterateSimpleInv(mtPrevOptLayouts[barrier.tid], optLayout, prevOptLayout, prev->neurons, neurons, weights);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
void testPass() override {
|
|
|
e865c9 |
Layout cl = layout;
|
|
|
e865c9 |
Layout pl = prev->layout;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(int cy = cl.y0; cy < cl.y1; ++cy)
|
|
|
e865c9 |
for(int cx = cl.x0; cx < cl.x1; ++cx)
|
|
|
e865c9 |
for(int cz = cl.z0; cz < cl.z1; ++cz) {
|
|
|
e865c9 |
AccumReal a = 0;
|
|
|
e865c9 |
Neuron &cn = neurons[ (cy*cl.sx + cx)*cl.sz + cz ];
|
|
|
e865c9 |
int wi = ((cy-cl.y0)*cl.getW() + cx-cl.x0)*cl.getD() + cz-cl.z0;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(int py = pl.y0; py < pl.y1; ++py)
|
|
|
e865c9 |
for(int px = pl.x0; px < pl.x1; ++px)
|
|
|
e865c9 |
for(int pz = pl.z0; pz < pl.z1; ++pz) {
|
|
|
e865c9 |
Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ];
|
|
|
e865c9 |
int wii = ((wi*pl.getH() + py-pl.y0)*pl.getW() + px-pl.x0)*pl.getD() + pz-pl.z0;
|
|
|
e865c9 |
Weight &w = weights[wii];
|
|
|
e865c9 |
a += pn.v * w.w;
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
func(cn, a);
|
|
|
e865c9 |
}
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
void testBackpass() override {
|
|
|
e865c9 |
Layout cl = layout;
|
|
|
e865c9 |
Layout pl = prev->layout;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(int i = 0; i < prev->neuronsCount; ++i)
|
|
|
e865c9 |
prev->neurons[i].a.v = 0;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(int cy = cl.y0; cy < cl.y1; ++cy)
|
|
|
e865c9 |
for(int cx = cl.x0; cx < cl.x1; ++cx)
|
|
|
e865c9 |
for(int cz = cl.z0; cz < cl.z1; ++cz) {
|
|
|
e865c9 |
Neuron &cn = neurons[ (cy*cl.sx + cx)*cl.sz + cz ];
|
|
|
e865c9 |
int wi = ((cy-cl.y0)*cl.getW() + cx-cl.x0)*cl.getD() + cz-cl.z0;
|
|
|
e865c9 |
|
|
|
e865c9 |
for(int py = pl.y0; py < pl.y1; ++py)
|
|
|
e865c9 |
for(int px = pl.x0; px < pl.x1; ++px)
|
|
|
e865c9 |
for(int pz = pl.z0; pz < pl.z1; ++pz) {
|
|
|
e865c9 |
Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ];
|
|
|
e865c9 |
int wii = ((wi*pl.getH() + py-pl.y0)*pl.getW() + px-pl.x0)*pl.getD() + pz-pl.z0;
|
|
|
e865c9 |
Weight &w = weights[wii];
|
|
|
e865c9 |
|
|
|
e865c9 |
pn.a.v += w.w * cn.d;
|
|
|
e865c9 |
w.w += pn.v * cn.d;
|
|
|
e865c9 |
}
|
|
|
e865c9 |
}
|
|
|
e865c9 |
|
|
|
e865c9 |
for(int i = 0; i < prev->neuronsCount; ++i) {
|
|
|
e865c9 |
Neuron &pn = prev->neurons[i];
|
|
|
e865c9 |
pn.d *= pn.a.v;
|
|
|
e865c9 |
pn.a.v = 0;
|
|
|
e865c9 |
}
|
|
|
e865c9 |
}
|
|
|
e865c9 |
};
|
|
|
e865c9 |
|
|
|
e865c9 |
|
|
|
e865c9 |
#endif
|