|
|
b579b3 |
#ifndef LAYER_CONVSUB_SHARED_INC_CPP
|
|
|
b579b3 |
#define LAYER_CONVSUB_SHARED_INC_CPP
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
#include "layer.conv.inc.cpp"
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
template<typename iter=""></typename>
|
|
|
b579b3 |
void iterateConvolutionShared2(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
|
|
|
b579b3 |
assert(cl);
|
|
|
b579b3 |
assert(pl);
|
|
|
b579b3 |
assert(k);
|
|
|
b579b3 |
assert(c_neurons);
|
|
|
b579b3 |
assert(p_neurons);
|
|
|
b579b3 |
assert(weights);
|
|
|
b579b3 |
assert(!cl.hasPadZ());
|
|
|
b579b3 |
assert(!pl.hasPadZ());
|
|
|
b579b3 |
assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
|
|
|
b579b3 |
assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
|
|
|
b579b3 |
|
|
|
b579b3 |
int c_h = cl.getH();
|
|
|
b579b3 |
int c_w = cl.getW();
|
|
|
b579b3 |
int c_swz = c_w*cl.sz;
|
|
|
b579b3 |
int c_shxz = c_h*cl.sx*cl.sz;
|
|
|
b579b3 |
int c_dx = cl.sz - c_d;
|
|
|
b579b3 |
int c_dy = (cl.sx - c_w)*cl.sz;
|
|
|
b579b3 |
|
|
|
b579b3 |
int p_d = pl.getD();
|
|
|
b579b3 |
int p_dkx = pl.sx - k.sx
|
|
|
b579b3 |
int p_dx = k.dx*pl.sz;
|
|
|
b579b3 |
int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx;
|
|
|
b579b3 |
|
|
|
b579b3 |
c_neurons += (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
|
|
|
b579b3 |
p_neurons += ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < k.sy; ++ky, p_neurons += p_dkx)
|
|
|
b579b3 |
for(int kx = 0; kx < k.sx; ++kx, p_neurons += pl.sz) {
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
|
|
|
b579b3 |
Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy + ky)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox + kx)*pl.sz + pl.z0;
|
|
|
b579b3 |
weights += (ky*k.sx + kx)*p_d;
|
|
|
b579b3 |
Weight *ew = weights + p_d;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)
|
|
|
b579b3 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx)
|
|
|
b579b3 |
for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d)
|
|
|
b579b3 |
for(Weight *iw = weights; iw < ew; ++ipn, ++iw)
|
|
|
b579b3 |
Iter::iter2(*icn, *ipn, *iw);
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
template<func func=""></func>
|
|
|
b579b3 |
class LayerSub: public Layer {
|
|
|
b579b3 |
public:
|
|
|
b579b3 |
Layout optLayout;
|
|
|
b579b3 |
Layout::List mtOptLayouts;
|
|
|
b579b3 |
std::vector<neuron*> choosen;</neuron*>
|
|
|
b579b3 |
|
|
|
b579b3 |
LayerSub(Layer &prev, const Layout &layout):
|
|
|
b579b3 |
Layer(&prev, layout),
|
|
|
b579b3 |
optLayout(optimizeLayoutSimple(layout)),
|
|
|
b579b3 |
choosen(layout.getActiveCount(), nullptr)
|
|
|
b579b3 |
{ }
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
void split(int threadsCount) override {
|
|
|
b579b3 |
Layer::split(threadsCount);
|
|
|
b579b3 |
optLayout.split(mtOptLayouts, threadsCount);
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
void pass(Barrier &barrier) override {
|
|
|
b579b3 |
Layout cl = mtLayouts[barrier.tid];
|
|
|
b579b3 |
Layout pl = prev->layout;
|
|
|
b579b3 |
Layout wl = layout;
|
|
|
b579b3 |
if (!cl) return;
|
|
|
b579b3 |
|
|
|
b579b3 |
assert(pl.getW() == wl.getW()*2);
|
|
|
b579b3 |
assert(pl.getH() == wl.getH()*2);
|
|
|
b579b3 |
assert(pl.getD() == wl.getD());
|
|
|
b579b3 |
assert(cl.isSubLayoutOf(wl));
|
|
|
b579b3 |
|
|
|
b579b3 |
int c_h = cl.getH();
|
|
|
b579b3 |
int c_w = cl.getW();
|
|
|
b579b3 |
int c_d = cl.getD();
|
|
|
b579b3 |
int c_sxz = cl.sx*cl.sz;
|
|
|
b579b3 |
int c_swz = c_w*cl.sz;
|
|
|
b579b3 |
int c_shxz = c_h*c_sxz;
|
|
|
b579b3 |
int c_dy = c_sxz - c_swz;
|
|
|
b579b3 |
int c_dx = cl.sz - c_d;
|
|
|
b579b3 |
|
|
|
b579b3 |
int w_d = wl.getD();
|
|
|
b579b3 |
int w_w = wl.getW();
|
|
|
b579b3 |
int w_dy = (w_w - c_w)*w_d;
|
|
|
b579b3 |
int w_dx = w_d - c_d;
|
|
|
b579b3 |
|
|
|
b579b3 |
int p_dy = (pl.sx - c_w)*pl.sz*2;
|
|
|
b579b3 |
int p_dx = pl.sz*2 - c_d;
|
|
|
b579b3 |
|
|
|
b579b3 |
int p_i1 = pl.sz;
|
|
|
b579b3 |
int p_i2 = pl.sx*pl.sz;
|
|
|
b579b3 |
int p_i3 = p_i1 + p_i2;
|
|
|
b579b3 |
|
|
|
b579b3 |
int cx0 = cl.x0 - wl.x0;
|
|
|
b579b3 |
int cy0 = cl.y0 - wl.y0;
|
|
|
b579b3 |
int cz0 = cl.z0 - wl.z0;
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron *icn = neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0);
|
|
|
b579b3 |
Neuron *ipn = prev->neurons + ((pl.y0 + cy0*2)*pl.sx + pl.x0 + cx0*2)*pl.sz + pl.z0 + cz0;
|
|
|
b579b3 |
Neuron **icc = choosen.data() + (cy0*w_w + cx0)*w_d + cz0;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy, icc += w_dy)
|
|
|
b579b3 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, icc += w_dx)
|
|
|
b579b3 |
for(Neuron *e = icn + c_d; icn < e; ++icn, ++ipn, ++icc) {
|
|
|
b579b3 |
Neuron *iipn = ipn, *pn = iipn;
|
|
|
b579b3 |
NeuronReal v = pn->v, d = pn->d;
|
|
|
b579b3 |
pn->d = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
iipn = ipn + p_i1;
|
|
|
b579b3 |
if (v < iipn->v) { v = iipn->v; d = iipn->d; pn = iipn; }
|
|
|
b579b3 |
iipn->d = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
iipn = ipn + p_i2;
|
|
|
b579b3 |
if (v < iipn->v) { v = iipn->v; d = iipn->d; pn = iipn; }
|
|
|
b579b3 |
iipn->d = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
iipn = ipn + p_i3;
|
|
|
b579b3 |
if (v < iipn->v) { v = iipn->v; d = iipn->d; pn = iipn; }
|
|
|
b579b3 |
iipn->d = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
func(*icn, v);
|
|
|
b579b3 |
icn->d *= d;
|
|
|
b579b3 |
*icc = pn;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
void backpassDeltas(Barrier &barrier) override {
|
|
|
b579b3 |
Layout cl = mtOptLayouts[barrier.tid];
|
|
|
b579b3 |
Layout wl = optLayout;
|
|
|
b579b3 |
if (!cl) return;
|
|
|
b579b3 |
|
|
|
b579b3 |
int c_h = cl.getH();
|
|
|
b579b3 |
int c_w = cl.getW();
|
|
|
b579b3 |
int c_d = cl.getD();
|
|
|
b579b3 |
int c_sxz = cl.sx*cl.sz;
|
|
|
b579b3 |
int c_swz = c_w*cl.sz;
|
|
|
b579b3 |
int c_shxz = c_h*c_sxz;
|
|
|
b579b3 |
int c_dy = c_sxz - c_swz;
|
|
|
b579b3 |
int c_dx = cl.sz - c_d;
|
|
|
b579b3 |
|
|
|
b579b3 |
int w_d = wl.getD();
|
|
|
b579b3 |
int w_w = wl.getW();
|
|
|
b579b3 |
int w_dy = (w_w - c_w)*w_d;
|
|
|
b579b3 |
int w_dx = w_d - c_d;
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron *icn = neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0);
|
|
|
b579b3 |
Neuron **icc = choosen.data() + ((cl.y0 - wl.y0)*w_w + cl.x0 - wl.x0)*w_d + cl.z0 - wl.z0;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, icc += w_dy)
|
|
|
b579b3 |
for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, icc += w_dx)
|
|
|
b579b3 |
for(Neuron *e = icn + c_d; icn < e; ++icn, ++icc) {
|
|
|
b579b3 |
assert(*icc);
|
|
|
b579b3 |
(*icc)->d = icn->d;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
void testPass() override {
|
|
|
b579b3 |
Layout cl = layout;
|
|
|
b579b3 |
Layout pl = prev->layout;
|
|
|
b579b3 |
|
|
|
b579b3 |
assert(pl.getW() == cl.getW()*2);
|
|
|
b579b3 |
assert(pl.getH() == cl.getH()*2);
|
|
|
b579b3 |
assert(pl.getD() == cl.getD());
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int cy = cl.y0; cy < cl.y1; ++cy)
|
|
|
b579b3 |
for(int cx = cl.x0; cx < cl.x1; ++cx)
|
|
|
b579b3 |
for(int cz = cl.z0; cz < cl.z1; ++cz) {
|
|
|
b579b3 |
int ci = (cy*cl.sx + cx)*cl.sz + cz;
|
|
|
b579b3 |
Neuron &cn = neurons[ci];
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron *c = nullptr;
|
|
|
b579b3 |
NeuronReal v = 0, d = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < 2; ++ky)
|
|
|
b579b3 |
for(int kx = 0; kx < 2; ++kx) {
|
|
|
b579b3 |
int px = pl.x0 + (cx - cl.x0)*2 + kx;
|
|
|
b579b3 |
int py = pl.y0 + (cy - cl.y0)*2 + ky;
|
|
|
b579b3 |
int pz = pl.z0 + cz - cl.z0;
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ];
|
|
|
b579b3 |
if (!c || v < pn.v) { v = pn.v; d = pn.d; c = &pn; }
|
|
|
b579b3 |
pn.d = 0;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
assert(c);
|
|
|
b579b3 |
c->d = d;
|
|
|
b579b3 |
func(cn, v);
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
void testBackpass() override {
|
|
|
b579b3 |
Layout cl = layout;
|
|
|
b579b3 |
Layout pl = prev->layout;
|
|
|
b579b3 |
|
|
|
b579b3 |
assert(pl.getW() == cl.getW()*2);
|
|
|
b579b3 |
assert(pl.getH() == cl.getH()*2);
|
|
|
b579b3 |
assert(pl.getD() == cl.getD());
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int cy = cl.y0; cy < cl.y1; ++cy)
|
|
|
b579b3 |
for(int cx = cl.x0; cx < cl.x1; ++cx)
|
|
|
b579b3 |
for(int cz = cl.z0; cz < cl.z1; ++cz) {
|
|
|
b579b3 |
int ci = (cy*cl.sx + cx)*cl.sz + cz;
|
|
|
b579b3 |
Neuron &cn = neurons[ci];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < 2; ++ky)
|
|
|
b579b3 |
for(int kx = 0; kx < 2; ++kx) {
|
|
|
b579b3 |
int px = pl.x0 + (cx - cl.x0)*2 + kx;
|
|
|
b579b3 |
int py = pl.y0 + (cy - cl.y0)*2 + ky;
|
|
|
b579b3 |
int pz = pl.z0 + cz - cl.z0;
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ];
|
|
|
b579b3 |
pn.d *= cn.d;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
};
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
#endif
|