|
|
b579b3 |
#ifndef SEGMENT_CX4_INC_CPP
|
|
|
b579b3 |
#define SEGMENT_CX4_INC_CPP
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
#include "segment.inc.cpp"
|
|
|
b579b3 |
#include "func.inc.cpp"
|
|
|
b579b3 |
#include "layer.conv.inc.cpp"
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
class SegmentCx4: public Segment {
|
|
|
b579b3 |
public:
|
|
|
b579b3 |
enum {
|
|
|
b579b3 |
KSX = 4,
|
|
|
b579b3 |
KSY = 4,
|
|
|
b579b3 |
SX = 12,
|
|
|
b579b3 |
SY = 12,
|
|
|
b579b3 |
MSX = 5,
|
|
|
b579b3 |
MSY = 5,
|
|
|
b579b3 |
};
|
|
|
b579b3 |
|
|
|
b579b3 |
const int msx, msy, msz;
|
|
|
b579b3 |
|
|
|
b579b3 |
Neuron *m_neurons;
|
|
|
b579b3 |
Neuron *b_neurons;
|
|
|
b579b3 |
|
|
|
b579b3 |
SegmentCx4(int sz, int msz, Weight *weights = nullptr):
|
|
|
b579b3 |
Segment(SX, SY, sz, msz*KSY*KSX*sz, weights), msx(MSX), msy(MSY), msz(msz)
|
|
|
b579b3 |
{
|
|
|
b579b3 |
m_neurons = new Neuron[msx*msy*msz + sx*sy*sz];
|
|
|
b579b3 |
b_neurons = m_neurons + msx*msy*msz;
|
|
|
b579b3 |
clear();
|
|
|
b579b3 |
}
|
|
|
b579b3 |
~SegmentCx4()
|
|
|
b579b3 |
{ delete[] m_neurons; }
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
void clear() override
|
|
|
b579b3 |
{ memset(m_neurons, 0, sizeof(*m_neurons)*(msx*msy*msz + sx*sy*sz)); }
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
inline void check(int x, int y, int z) {
|
|
|
b579b3 |
Segment::check(x, y, z);
|
|
|
b579b3 |
assert(layout.getD() == sz);
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
Quality pass(Barrier &barrier, int x, int y, int z, NeuronReal trainRatio) override {
|
|
|
b579b3 |
check(x, y, z);
|
|
|
b579b3 |
|
|
|
b579b3 |
Layout l = layout;
|
|
|
b579b3 |
const int ksx = 4, ksy = 4;
|
|
|
b579b3 |
int tid = barrier.tid;
|
|
|
b579b3 |
int threads = barrier.threads;
|
|
|
b579b3 |
|
|
|
b579b3 |
int sx = this->sx;
|
|
|
b579b3 |
int sy = this->sy;
|
|
|
b579b3 |
int sz = this->sz;
|
|
|
b579b3 |
int msx = this->msx;
|
|
|
b579b3 |
int msy = this->msy;
|
|
|
b579b3 |
int msz = this->msz;
|
|
|
b579b3 |
|
|
|
b579b3 |
int ksxyz = ksx*ksy*sz;
|
|
|
b579b3 |
int fv_dkx = l.sz - sz;
|
|
|
b579b3 |
int fv_dky = (l.sx - ksx)*l.sz;
|
|
|
b579b3 |
|
|
|
b579b3 |
NeuronReal *f_values = this->f_values + (y*l.sx + x)*l.sz + z;
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 1: pass from front to mid
|
|
|
b579b3 |
|
|
|
b579b3 |
Weight *iw = weights + tid*ksxyz;
|
|
|
b579b3 |
Neuron *imn = m_neurons + tid;
|
|
|
b579b3 |
NeuronReal *ifv = f_values;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int mz = tid; mz < msz; mz += threads, iw += threads*ksxyz, imn += threads - msx*msy*msz, ifv = f_values)
|
|
|
b579b3 |
for(int my = 0; my < MSY; ++my, ifv += 2*(l.sx - MSX)*l.sz)
|
|
|
b579b3 |
for(int mx = 0; mx < MSX; ++mx, imn += msz, ifv += 2*l.sz) {
|
|
|
b579b3 |
AccumReal a = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
Weight *iiw = iw;
|
|
|
b579b3 |
NeuronReal *iifv = ifv;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < KSY; ++ky, iifv += fv_dky)
|
|
|
b579b3 |
for(int kx = 0; kx < KSX; ++kx, iifv += fv_dkx)
|
|
|
b579b3 |
for(Weight *e = iiw + sz; iiw < e; ++iiw, ++iifv)
|
|
|
b579b3 |
a += *iifv * iiw->w;
|
|
|
b579b3 |
|
|
|
b579b3 |
if (a > 0) imn->v = a, imn->d = 1; else imn->v = imn->d = 0;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
barrier.wait();
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 2: pass from mid to back and verify
|
|
|
b579b3 |
|
|
|
b579b3 |
AccumReal qa = 0;
|
|
|
b579b3 |
for(int by = 2 + tid; by < 10; by += threads)
|
|
|
b579b3 |
for(int bx = 2; bx < 10; ++bx)
|
|
|
b579b3 |
for(int bz = 0; bz < sz; ++bz) {
|
|
|
b579b3 |
AccumReal a = 0;
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = by%2; ky < ksy; ky += 2)
|
|
|
b579b3 |
for(int kx = bx%2; kx < ksx; kx += 2) {
|
|
|
b579b3 |
int mx = (bx - kx)/2;
|
|
|
b579b3 |
int my = (by - ky)/2;
|
|
|
b579b3 |
assert(mx >= 0 && mx < msx && (bx - kx)%2 == 0);
|
|
|
b579b3 |
assert(my >= 0 && my < msy && (by - ky)%2 == 0);
|
|
|
b579b3 |
for(int mz = 0; mz < msz; ++mz) {
|
|
|
b579b3 |
Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ];
|
|
|
b579b3 |
a += mn.v * w.w;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
if (a > 0) bn.v = a, bn.d = 1; else bn.v = bn.d = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
NeuronReal fn = f_values[ (by*l.sx + bx)*l.sz + bz ];
|
|
|
b579b3 |
NeuronReal d = fn - bn.v;
|
|
|
b579b3 |
bn.d *= d*trainRatio;
|
|
|
b579b3 |
qa += d*d;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
Quality q(qa/(64*sz));
|
|
|
b579b3 |
|
|
|
b579b3 |
if (trainRatio <= 0) return q;
|
|
|
b579b3 |
|
|
|
b579b3 |
barrier.wait();
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 3: backpass deltas
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int mz = tid; mz < msz; mz += threads)
|
|
|
b579b3 |
for(int my = 1; my < 4; ++my)
|
|
|
b579b3 |
for(int mx = 1; mx < 4; ++mx) {
|
|
|
b579b3 |
AccumReal a = 0;
|
|
|
b579b3 |
Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < ksy; ++ky)
|
|
|
b579b3 |
for(int kx = 0; kx < ksx; ++kx)
|
|
|
b579b3 |
for(int kz = 0; kz < sz; ++kz) {
|
|
|
b579b3 |
int bx = mx*2 + kx;
|
|
|
b579b3 |
int by = my*2 + ky;
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + kz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ];
|
|
|
b579b3 |
a += bn.d * w.w;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
mn.d *= a;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
barrier.wait();
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 4: update weights
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int mz = tid; mz < msz; mz += threads)
|
|
|
b579b3 |
for(int by = 4; by < 8; ++by)
|
|
|
b579b3 |
for(int bx = 4; bx < 8; ++bx)
|
|
|
b579b3 |
for(int bz = 0; bz < sz; ++bz) {
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
|
|
|
b579b3 |
NeuronReal fv = f_values[ (by*l.sx + bx)*l.sz + bz ];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = by%2; ky < ksy; ky += 2)
|
|
|
b579b3 |
for(int kx = bx%2; kx < ksx; kx += 2) {
|
|
|
b579b3 |
int mx = (bx - kx)/2;
|
|
|
b579b3 |
int my = (by - ky)/2;
|
|
|
b579b3 |
assert(mx >= 1 && mx < 4 && (bx - kx)%2 == 0);
|
|
|
b579b3 |
assert(my >= 1 && my < 4 && (by - ky)%2 == 0);
|
|
|
b579b3 |
Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ];
|
|
|
b579b3 |
w.w += bn.d*mn.v + mn.d*fv;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
return q;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
Quality testPass(int x, int y, int z, NeuronReal trainRatio) override {
|
|
|
b579b3 |
check(x, y, z);
|
|
|
b579b3 |
|
|
|
b579b3 |
Layout l = layout;
|
|
|
b579b3 |
const int ksx = 4, ksy = 4;
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 1: pass
|
|
|
b579b3 |
|
|
|
b579b3 |
clear();
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int my = 0; my < msy; ++my)
|
|
|
b579b3 |
for(int mx = 0; mx < msx; ++mx)
|
|
|
b579b3 |
for(int mz = 0; mz < msz; ++mz) {
|
|
|
b579b3 |
AccumReal a = 0;
|
|
|
b579b3 |
Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < ksy; ++ky)
|
|
|
b579b3 |
for(int kx = 0; kx < ksx; ++kx)
|
|
|
b579b3 |
for(int kz = 0; kz < sz; ++kz) {
|
|
|
b579b3 |
int fx = x + mx*2 + kx;
|
|
|
b579b3 |
int fy = y + my*2 + ky;
|
|
|
b579b3 |
int fz = z + kz;
|
|
|
b579b3 |
NeuronReal fv = f_values[ (fy*l.sx + fx)*l.sz + fz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ];
|
|
|
b579b3 |
a += fv * w.w;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
if (a < 0) { mn.v = mn.d = 0; continue; }
|
|
|
b579b3 |
mn.v = a; mn.d = 1;
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < ksy; ++ky)
|
|
|
b579b3 |
for(int kx = 0; kx < ksx; ++kx)
|
|
|
b579b3 |
for(int kz = 0; kz < sz; ++kz) {
|
|
|
b579b3 |
int bx = mx*2 + kx;
|
|
|
b579b3 |
int by = my*2 + ky;
|
|
|
b579b3 |
int bz = kz;
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ];
|
|
|
b579b3 |
bn.a.v += a * w.w;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 2: finalize values and verify
|
|
|
b579b3 |
|
|
|
b579b3 |
AccumReal qa = 0;
|
|
|
b579b3 |
for(int by = 2; by < 10; ++by)
|
|
|
b579b3 |
for(int bx = 2; bx < 10; ++bx)
|
|
|
b579b3 |
for(int bz = 0; bz < sz; ++bz) {
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
|
|
|
b579b3 |
if (bn.a.v > 0) bn.v = bn.a.v, bn.d = 1; else bn.v = bn.d = 0;
|
|
|
b579b3 |
bn.a.v = 0;
|
|
|
b579b3 |
|
|
|
b579b3 |
NeuronReal fn = f_values[ ((y + by)*l.sx + x + bx)*l.sz + z + bz ];
|
|
|
b579b3 |
NeuronReal d = fn - bn.v;
|
|
|
b579b3 |
bn.d *= d*trainRatio;
|
|
|
b579b3 |
qa += d*d;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
Quality q(qa/(64*sz));
|
|
|
b579b3 |
|
|
|
b579b3 |
if (trainRatio <= 0) return q;
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 3: backpass deltas
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int my = 0; my < msy; ++my)
|
|
|
b579b3 |
for(int mx = 0; mx < msx; ++mx)
|
|
|
b579b3 |
for(int mz = 0; mz < msz; ++mz) {
|
|
|
b579b3 |
AccumReal a = 0;
|
|
|
b579b3 |
Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = 0; ky < ksy; ++ky)
|
|
|
b579b3 |
for(int kx = 0; kx < ksx; ++kx)
|
|
|
b579b3 |
for(int kz = 0; kz < sz; ++kz) {
|
|
|
b579b3 |
int bx = mx*2 + kx;
|
|
|
b579b3 |
int by = my*2 + ky;
|
|
|
b579b3 |
int bz = kz;
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ];
|
|
|
b579b3 |
a += bn.d * w.w;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
mn.d *= a;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
// stage 4: update weights
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int by = 4; by < 8; ++by)
|
|
|
b579b3 |
for(int bx = 4; bx < 8; ++bx)
|
|
|
b579b3 |
for(int bz = 0; bz < sz; ++bz) {
|
|
|
b579b3 |
Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
|
|
|
b579b3 |
NeuronReal fv = f_values[ ((y + by)*l.sx + x + bx)*l.sz + z + bz ];
|
|
|
b579b3 |
|
|
|
b579b3 |
for(int ky = by%2; ky < ksy; ky += 2)
|
|
|
b579b3 |
for(int kx = bx%2; kx < ksx; kx += 2)
|
|
|
b579b3 |
for(int mz = 0; mz < msz; ++mz) {
|
|
|
b579b3 |
int mx = (bx - kx)/2;
|
|
|
b579b3 |
int my = (by - ky)/2;
|
|
|
b579b3 |
assert(mx >= 1 && mx < 4 && (bx - kx)%2 == 0);
|
|
|
b579b3 |
assert(my >= 1 && my < 4 && (by - ky)%2 == 0);
|
|
|
b579b3 |
Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
|
|
|
b579b3 |
Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ];
|
|
|
b579b3 |
w.w += bn.d*mn.v + mn.d*fv;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
return q;
|
|
|
b579b3 |
}
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
bool saveDemo() override
|
|
|
b579b3 |
{ return !filename || saveConvDemoImage(filename, msz, 4, 4, sz, weights); }
|
|
|
b579b3 |
};
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
|
|
|
b579b3 |
#endif
|
|
|
b579b3 |
|
|
|
b579b3 |
|