#ifndef NNLAYER_CONV_INC_CPP
#define NNLAYER_CONV_INC_CPP
#include "nnlayer.inc.cpp"
template<bool RaLU, int Size, int Step = 1, int Padding = 0>
class LayerConvolution: public Layer {
public:
enum { W = Size, WW = W*W, D = Step, P = Padding, P2 = P*2 };
int sx, sy, sz;
int psx, psy, psz;
LayerConvolution(Layer &prev, int sx, int sy, int sz):
Layer(&prev, sx*sy*sz),
sx(sx), sy(sy), sz(sz),
psx((sx-P2-1)*D + W), psy((sy-P2-1)*D + W), psz(this->prev->size/(psx*psy))
{
assert(sx > 0 && sy > 0 && sz > 0);
assert(psx > 0 && psy > 0 && psz > 0);
assert(psx*psy*psz == this->prev->size);
links = wsize = WW*psz*sz*(sy-P2)*(sx-P2);
w = new double[wsize];
double k = RaLU ? 1.0/(WW*psz*sz) : 1;
for(double *iw = w, *e = iw + wsize; iw < e; ++iw)
*iw = (rand()/(double)RAND_MAX*2 - 1)*k;
memsize += wsize*sizeof(double);
}
Layer& pass() override {
const int asy = sx*(sy-P2);
const int asx = sx-P2;
const int adz = sx*P2;
const int wdz = WW*psz;
const int pady = D*(psx-asx);
const int paddz = psx*(psy - W);
const int paddy = psx - W;
double *pa = prev->a;
double *ia = a + P*sx + P;
double *iw = w;
double *ipa = pa;
for(double *e = ia + asy*sz; ia < e; ia += adz, ipa = pa) {
for(double *e = ia + asy; ia < e; ia += P2, ipa += pady) {
for(double *e = ia + asx; ia < e; ++ia, ipa += D) {
double s = 0;
double *iipa = ipa;
for(double *ew = iw + wdz; iw < ew; iipa += paddz)
for(int yy = 0; yy < W; ++yy, iipa += paddy)
for(int xx = 0; xx < W; ++xx, ++iw, ++iipa)
s += *iipa * *iw;
*ia = RaLU ? (s < -1 ? -1 : s > 1 ? 1 : s) : 1/(1 + exp(-s)); // RaLU : sigmoid
}
}
}
return next ? next->pass() : *this;
}
template<bool Deep>
Layer& backpassT(double trainRatio) {
const int asy = sx*(sy-P2);
const int asx = sx-P2;
const int adz = sx*P2;
const int wdz = WW*psz;
const int pady = D*(psx-asx);
const int paddz = psx*(psy - W);
const int paddy = psx - W;
double *pa = prev->a;
double *pda = prev->da;
double *ia = a + P*sx + P;
double *iw = w;
double *ida = da + P*sx + P;
double *ipa = pa;
double *ipda = pda;
if (Deep) memset(pda, 0, sizeof(*pda)*prev->size);
for(double *e = ia + asy*sz; ia < e; ia += adz, ida += adz, ipa = pa, ipda = pda) {
for(double *e = ia + asy; ia < e; ia += P2, ida += P2, ipa += pady, ipda += pady) {
for(double *e = ia + asx; ia < e; ++ia, ++ida, ipa += D, ipda += D) {
double ds;
if (RaLU) {
double a = *ia;
if (a == -1 || a == 1) { iw += wdz; continue; }
ds = *ida; // RaLU derivation * *ida
} else {
double a = *ia;
ds = a * (1-a) * *ida; // sigmoid derivation * *ida
}
double dst = ds*trainRatio;
double *iipa = ipa;
double *iipda = ipda;
for(double *ew = iw + wdz; iw < ew; iipa += paddz, iipda += paddz)
for(int yy = 0; yy < W; ++yy, iipa += paddy, iipda += paddy)
for(int xx = 0; xx < W; ++xx, ++iw, ++iipa, ++iipda) {
if (Deep) *iipda += ds * *iw;
*iw += dst * *iipa;
}
}
}
}
return prev->backpass(trainRatio);
}
Layer& backpass(double trainRatio) override
{ return prev->prev ? backpassT<true>(trainRatio) : backpassT<false>(trainRatio); }
};
template<int Size, int Step = 1, int Padding = 0>
class LayerConvolutionShared: public Layer {
public:
enum { W = Size, WW = W*W, D = Step, P = Padding, P2 = P*2 };
double *dw;
int sx, sy, sz;
int psx, psy, psz;
LayerConvolutionShared(Layer &prev, int sx, int sy, int sz):
Layer(&prev, sx*sy*sz),
sx(sx), sy(sy), sz(sz),
psx((sx-P2-1)*D + W), psy((sy-P2-1)*D + W), psz(this->prev->size/(psx*psy))
{
assert(sx > 0 && sy > 0 && sz > 0);
assert(psx > 0 && psy > 0 && psz > 0);
assert(psx*psy*psz == this->prev->size);
wsize = WW*psz*sz;
links = wsize*(sy-P2)*(sx-P2);
w = new double[wsize + WW*psz];
dw = w + wsize;
for(double *iw = w, *e = iw + wsize; iw < e; ++iw)
*iw = (rand()/(double)RAND_MAX*2 - 1)*1;
memsize += (wsize + WW*psz)*sizeof(double);
}
Layer& pass() override {
const int asy = sx*(sy-P2);
const int asx = sx-P2;
const int adz = sx*P2;
const int wdz = WW*psz;
const int pady = D*(psx-asx);
const int paddz = psx*(psy - W);
const int paddy = psx - W;
double *pa = prev->a;
double *ia = a + P*sx + P;
double *iw = w;
double *ipa = pa;
for(double *e = ia + asy*sz; ia < e; ia += adz, iw += wdz, ipa = pa) {
double *eew = iw + wdz;
for(double *e = ia + asy; ia < e; ia += P2, ipa += pady) {
for(double *e = ia + asx; ia < e; ++ia, ipa += D) {
double s = 0;
double *iipa = ipa;
for(double *iiw = iw; iiw < eew; iipa += paddz)
for(int yy = 0; yy < W; ++yy, iipa += paddy)
for(int xx = 0; xx < W; ++xx, ++iiw, ++iipa)
s += *iipa * *iiw;
*ia = 1/(1 + exp(-s)); // sigmoid
}
}
}
return next ? next->pass() : *this;
}
template<bool Deep>
Layer& backpassT(double trainRatio) {
const int asy = sx*(sy-P2);
const int asx = sx-P2;
const int adz = sx*P2;
const int wdz = WW*psz;
const int pady = D*(psx-asx);
const int paddz = psx*(psy - W);
const int paddy = psx - W;
double *dw = this->dw;
double *edw = dw + wdz;
double *pa = prev->a;
double *pda = prev->da;
double *ia = a + P*sx + P;
double *iw = w;
double *ida = da + P*sx + P;
double *ipa = pa;
double *ipda = pda;
if (Deep) memset(pda, 0, sizeof(*pda)*prev->size);
for(double *e = ia + asy*sz; ia < e; ia += adz, ida += adz, ipa = pa, ipda = pda) {
memset(dw, 0, sizeof(*dw) * wdz);
for(double *e = ia + asy; ia < e; ia += P2, ida += P2, ipa += pady, ipda += pady) {
for(double *e = ia + asx; ia < e; ++ia, ++ida, ipa += D, ipda += D) {
const double a = *ia;
const double ds = a * (1-a) * *ida; // sigmoid derivation * *ida
const double dst = ds*trainRatio;
double *iiw = iw;
double *iipa = ipa;
double *iipda = ipda;
for(double *idw = dw; idw < edw; iipa += paddz, iipda += paddz)
for(int yy = 0; yy < W; ++yy, iipa += paddy, iipda += paddy)
for(int xx = 0; xx < W; ++xx, ++iiw, ++idw, ++iipa, ++iipda) {
if (Deep) *iipda += ds * *iiw;
*idw += dst * *iipa;
}
}
}
for(double *idw = dw; idw < edw; ++iw, ++idw)
*iw += *idw;
}
return prev->backpass(trainRatio);
}
Layer& backpass(double trainRatio) override
{ return prev->prev ? backpassT<true>(trainRatio) : backpassT<false>(trainRatio); }
};
#endif