Blame projects/neural/layer.conv.shared.inc.cpp

8e5348
#ifndef LAYER_CONV_SHARED_INC_CPP
8e5348
#define LAYER_CONV_SHARED_INC_CPP
8e5348
8e5348
8e5348
8e5348
#include "layer.conv.inc.cpp"
8e5348
8e5348
8e5348
8e5348
template<typename iter=""></typename>
8e5348
void iterateTestConvolutionShared(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
8e5348
  if (!cl) return;
8e5348
  assert(pl);
8e5348
  assert(k);
8e5348
  assert(c_neurons);
8e5348
  assert(p_neurons);
8e5348
  assert(weights);
8e5348
  assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
8e5348
  assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
8e5348
 
8e5348
  for(int cy = cl.y0; cy < cl.y1; ++cy)
8e5348
  for(int cx = cl.x0; cx < cl.x1; ++cx)
8e5348
  for(int cz = cl.z0; cz < cl.z1; ++cz) {
8e5348
    int ci = (cy*cl.sx + cx)*cl.sz + cz;
8e5348
    Neuron &cn = c_neurons[ci];
8e5348
    typename Iter::AccumType a = {};
8e5348
    Iter::init(cn, a);
8e5348
    
8e5348
    for(int ky = 0; ky < k.sy; ++ky)
8e5348
    for(int kx = 0; kx < k.sx; ++kx)
8e5348
    for(int pz = pl.z0; pz < pl.z1; ++pz) {
b579b3
      int wi = (((cz - cl.z0)*k.sy + ky)*k.sx + kx)*pl.getD() + pz - pl.z0;
8e5348
      Weight &w = weights[wi];
8e5348
8e5348
      int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx;
8e5348
      int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky;
8e5348
      int pi = (py*pl.sx + px)*pl.sz + pz;
8e5348
      Neuron &pn = p_neurons[pi];
8e5348
      
8e5348
      Iter::iter(pn, w, a);
8e5348
    }
8e5348
    
8e5348
    Iter::done(cn, a);
8e5348
  }
8e5348
}
8e5348
8e5348
8e5348
template<typename iter=""></typename>
b579b3
void iterateConvolutionShared(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
8e5348
  if (!cl) return;
8e5348
  assert(pl);
8e5348
  assert(wl);
8e5348
  assert(k);
8e5348
  assert(c_neurons);
8e5348
  assert(p_neurons);
8e5348
  assert(weights);
8e5348
  assert(cl.isSubLayoutOf(wl));
8e5348
  assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
8e5348
  assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
8e5348
8e5348
  int c_h    = cl.getH();
8e5348
  int c_w    = cl.getW();
8e5348
  int c_d    = cl.getD();
8e5348
  int c_swz  = c_w*cl.sz;
8e5348
  int c_shxz = c_h*cl.sx*cl.sz;
8e5348
  int c_dx   = cl.sz - c_d;
8e5348
  int c_dy   = (cl.sx - c_w)*cl.sz;
8e5348
8e5348
  int p_d    = pl.getD();
8e5348
  int p_dx   = k.dx*pl.sz;
8e5348
  int p_dy   = k.dy*pl.sx*pl.sz - c_w*p_dx;
8e5348
8e5348
  int k_sxd  = k.sx*p_d;
b579b3
  int k_syxd = k.sy*k_sxd;
8e5348
  int p_ddy  = (pl.sx - k.sx)*pl.sz;
8e5348
  int p_ddx  = pl.sz - p_d;
8e5348
8e5348
  Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
8e5348
  Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0;
15c502
  
b579b3
  weights += (cl.z0 - wl.z0)*k_syxd;
b579b3
  Weight *iw = weights;
15c502
15c502
  for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)
b579b3
  for(Neuron *e = icn +  c_swz; icn < e; icn += c_dx, ipn += p_dx, iw = weights)
15c502
  for(Neuron *e = icn +    c_d; icn < e; ++icn) {
15c502
    typename Iter::AccumType a;
15c502
    Iter::init(*icn, a);
15c502
15c502
    Neuron *iipn = ipn;
b579b3
    for(Weight *e = iw + k_syxd; iw < e; iipn += p_ddy)
b579b3
    for(Weight *e = iw +  k_sxd; iw < e; iipn += p_ddx)
b579b3
    for(Weight *e = iw +    p_d; iw < e; ++iw, ++iipn)
15c502
      Iter::iter(*iipn, *iw, a);
15c502
15c502
    Iter::done(*icn, a);
15c502
  }
15c502
}
15c502
15c502
8e5348
template<typename iter=""></typename>
8e5348
void iterateConvolutionSharedPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
8e5348
  if (!cl) return;
8e5348
  assert(pl);
8e5348
  assert(wl);
8e5348
  assert(k);
8e5348
  assert(c_neurons);
8e5348
  assert(p_neurons);
8e5348
  assert(weights);
8e5348
  assert(cl.isSubLayoutOf(wl));
8e5348
  assert(kx >= 0 && kx < k.sx);
8e5348
  assert(ky >= 0 && ky < k.sy);
8e5348
  assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
8e5348
  assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
8e5348
8e5348
  int c_h    = cl.getH();
8e5348
  int c_w    = cl.getW();
8e5348
  int c_d    = cl.getD();
8e5348
  int c_swz  = c_w*cl.sz;
8e5348
  int c_shxz = c_h*cl.sx*cl.sz;
8e5348
  int c_dx   = cl.sz - c_d;
8e5348
  int c_dy   = (cl.sx - c_w)*cl.sz;
8e5348
8e5348
  int p_d    = pl.getD();
8e5348
  int p_dx   = k.dx*pl.sz;
8e5348
  int p_dy   = k.dy*pl.sx*pl.sz - c_w*p_dx;
b579b3
  
b579b3
  int w_dz   = (k.sy*k.sx - 1)*p_d;
8e5348
8e5348
  Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
8e5348
  Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy + ky)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox + kx)*pl.sz + pl.z0;
b579b3
  weights += (((cl.z0 - wl.z0)*k.sy + ky)*k.sx + kx)*p_d;
b579b3
  Weight *iw = weights;
8e5348
8e5348
  for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy)
b579b3
  for(Neuron *e = icn +  c_swz; icn < e; icn += c_dx, ipn += p_dx, iw = weights)
b579b3
  for(Neuron *e = icn +    c_d; icn < e; ++icn,       ipn -= p_d,  iw += w_dz)
b579b3
  for(Weight *e = iw  +    p_d; iw  < e; ++ipn, ++iw)
8e5348
    Iter::iter2(*icn, *ipn, *iw);
8e5348
}
8e5348
8e5348
b579b3
void fillConvolutionWeights(int kx, int ky, int kz, int count, Weight *weights) {
b579b3
  double kr = 1.5;
b579b3
  double sum = 0;
b579b3
  
b579b3
  Weight *iw = weights;
b579b3
  for(int i = 0; i < count; ++i)
b579b3
  for(int y = 0; y < ky; ++y)
b579b3
  for(int x = 0; x < kx; ++x)
b579b3
  for(int z = 0; z < kz; ++z, ++iw) {
b579b3
    double dx = (2.0*x/(kx-1) - 1)*kr;
b579b3
    double dy = (2.0*y/(ky-1) - 1)*kr;
b579b3
    double e = exp( -dx*dx - dy*dy );
b579b3
    sum += e;
b579b3
    iw->w = (WeightReal)( (rand()/(double)RAND_MAX*2 - 1)*e );
b579b3
    //iw->w = (WeightReal)( rand()/(double)RAND_MAX*e );
b579b3
  }
b579b3
  
b579b3
  WeightReal k = (WeightReal)(10*kz/sum);
b579b3
  Weight *ew = iw;
b579b3
  for(iw = weights; iw < ew; ++iw) iw->w *= k;
b579b3
}
b579b3
8e5348
8e5348
8e5348
class LayerConvSharedBase: public Layer {
8e5348
public:
b579b3
  Kernel kernel;
8e5348
  std::vector<weight> mtWeights;</weight>
8e5348
  
8e5348
b579b3
  LayerConvSharedBase(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
b579b3
    Layer(&prev, layout, kernel.sx * kernel.sy * layout.getD() * prev.back().layout.getD(), weights),
b579b3
    kernel(kernel)
b579b3
  {
b579b3
    assert(kernel);
b579b3
    stat.links = weightsCount * layout.getW() * layout.getH();
b579b3
    if (ownWeights) fillWeights(-1, 1);
b579b3
  }
b579b3
b579b3
  
8e5348
  void split(int threadsCount) override {
8e5348
    Layer::split(threadsCount);
8e5348
    Weight w = {};
8e5348
    mtWeights.clear();
8e5348
    mtWeights.resize(threadsCount*weightsCount, w);
8e5348
  }
8e5348
8e5348
  
8e5348
  inline void sumWeights(int tid, int threads) {
8e5348
    int wc = weightsCount;
8e5348
    Weight *iw = weights + tid;
8e5348
    Weight *ia = mtWeights.data() + tid;
8e5348
    Weight *ea = mtWeights.data() + threads*wc;
8e5348
    for(Weight *ew = weights + wc; iw < ew; iw += threads, ia += threads) {
8e5348
      WeightReal w = iw->w;
8e5348
      for(Weight *iia = ia; iia < ea; iia += wc)
8e5348
        w += iia->w, iia->w = 0;
8e5348
      iw->w = w;
8e5348
    }
8e5348
  }
8e5348
};
8e5348
8e5348
8e5348
8e5348
template<func func=""></func>
8e5348
class LayerConvShared: public LayerConvSharedBase {
8e5348
public:
8e5348
  LayerConvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
b579b3
    LayerConvSharedBase(prev, layout, kernel, weights)
8e5348
  {
b579b3
    stat.links = weightsCount * layout.getW() * layout.getH();
b579b3
    if (ownWeights) fillConvolutionWeights(kernel.sx, kernel.sy, this->prev->layout.getD(), layout.getD(), this->weights);
8e5348
  }
8e5348
8e5348
  
8e5348
  void pass(Barrier &barrier) override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron&, AccumType &a) { a.v = 0; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
8e5348
      static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
8e5348
    };
8e5348
    iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights);
8e5348
  }
8e5348
  
8e5348
8e5348
  void backpassWeights(Barrier &barrier) override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; }
8e5348
    };
8e5348
    iterateConvolutionShared(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, &mtWeights[barrier.tid * weightsCount]);
8e5348
    barrier.wait();
8e5348
    sumWeights(barrier.tid, barrier.threads);
8e5348
  }
8e5348
  
8e5348
  
8e5348
  void backpassDeltas(Barrier &barrier) override {
8e5348
    struct I: public Iter {
8e5348
      static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.d * w.w; }
8e5348
      static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }
8e5348
    };
8e5348
    int ksx = kernel.sx, ksy = kernel.sy;
8e5348
    for(int kx = 0; kx < ksx; ++kx)
8e5348
    for(int ky = 0; ky < ksy; ++ky) {
8e5348
      iterateConvolutionSharedPoint(mtLayouts[barrier.tid], prev->layout, layout, kernel, kx, ky, neurons, prev->neurons, weights);
8e5348
      barrier.wait();
8e5348
    }
8e5348
    iterateNeurons(mtPrevLayouts[barrier.tid], prev->neurons);
8e5348
  }
8e5348
  
8e5348
  
8e5348
  void testPass() override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron&, AccumType &a) { a.v = 0; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
8e5348
      static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
8e5348
    };
8e5348
    iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights);
8e5348
  }
8e5348
8e5348
    
8e5348
  void testBackpass() override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }
8e5348
      static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }
8e5348
    };
8e5348
    struct IW: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += a.v * n.v; }
8e5348
    };
8e5348
    clearAccum();
8e5348
    iterateTestConvolutionShared(layout, prev->layout, kernel, neurons, prev->neurons, weights);
8e5348
    iterateTestConvolutionShared<iw>(layout, prev->layout, kernel, neurons, prev->neurons, weights);</iw>
8e5348
    iterateNeurons(prev->layout, prev->neurons);
8e5348
    clearAccum();
8e5348
  }
b579b3
b579b3
b579b3
  bool saveDemo() override
b579b3
    { return !filename || saveConvDemoImage( filename, layout.getD(), kernel.sx, kernel.sy, prev->layout.getD(), weights ); }
8e5348
};
8e5348
8e5348
8e5348
8e5348
template<func func=""></func>
8e5348
class LayerDeconvShared: public LayerConvSharedBase {
8e5348
public:
8e5348
  LayerDeconvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
b579b3
    LayerConvSharedBase(prev, layout, kernel, weights)
8e5348
  {
b579b3
    stat.links = weightsCount * this->prev->layout.getW() * this->prev->layout.getH();
b579b3
    if (ownWeights) fillConvolutionWeights(kernel.sx, kernel.sy, layout.getD(), this->prev->layout.getD(), this->weights);
8e5348
  }
8e5348
8e5348
  
8e5348
  void pass(Barrier &barrier) override {
8e5348
    struct I: public Iter {
8e5348
      static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.v * w.w; }
8e5348
      static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }
8e5348
    };
8e5348
    int k_sx = kernel.sx, k_sy = kernel.sy;
8e5348
    for(int kx = 0; kx < k_sx; ++kx)
8e5348
    for(int ky = 0; ky < k_sy; ++ky) {
8e5348
      iterateConvolutionSharedPoint(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, kx, ky, prev->neurons, neurons, weights);
8e5348
      barrier.wait();
8e5348
    }
8e5348
    iterateNeurons(mtLayouts[barrier.tid], neurons);
8e5348
  }
8e5348
  
8e5348
  
8e5348
  void backpassWeights(Barrier &barrier) override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }
8e5348
    };
8e5348
    iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, &mtWeights[barrier.tid * weightsCount]);
8e5348
    barrier.wait();
8e5348
    sumWeights(barrier.tid, barrier.threads);
8e5348
  }
8e5348
  
8e5348
  
8e5348
  void backpassDeltas(Barrier &barrier) override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron&, AccumType &a) { a.v = 0; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }
8e5348
      static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
8e5348
    };
8e5348
    iterateConvolutionShared(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights);
8e5348
  }
8e5348
8e5348
  
8e5348
  void testPass() override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }
8e5348
      static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }
8e5348
    };
8e5348
    clearAccum();
8e5348
    iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights);
8e5348
    iterateNeurons(layout, neurons);
8e5348
    clearAccum();
8e5348
  }
8e5348
  
8e5348
  
8e5348
  void testBackpass() override {
8e5348
    struct I: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = 0; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }
8e5348
      static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
8e5348
    };
8e5348
    struct IW: public Iter {
8e5348
      static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
8e5348
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }
8e5348
    };
8e5348
    iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights);
8e5348
    iterateTestConvolutionShared<iw>(prev->layout, layout, kernel, prev->neurons, neurons, weights);</iw>
8e5348
  }
b579b3
b579b3
  
b579b3
  bool saveDemo() override
b579b3
    { return !filename || saveConvDemoImage( filename, prev->layout.getD(), kernel.sx, kernel.sy, layout.getD(), weights ); }
8e5348
};
8e5348
8e5348
#endif