Blame projects/neural/layer.conv.inc.cpp

e865c9
#ifndef LAYER_CONV_INC_CPP
e865c9
#define LAYER_CONV_INC_CPP
e865c9
e865c9
e865c9
b579b3
#include "tga.inc.cpp"
b579b3
#include "font.inc.cpp"
e865c9
#include "layer.simple.inc.cpp"
e865c9
e865c9
e865c9
e865c9
struct Kernel {
e865c9
  int sx, sy;
e865c9
  int dx, dy;
e865c9
  int ox, oy;
e865c9
e865c9
  inline Kernel():
e865c9
    sx(), sy(), dx(), dy(), ox(), oy() { }
e865c9
  inline Kernel(int sx, int sy, int dx, int dy, int ox, int oy):
e865c9
    sx(sx), sy(sy), dx(dx), dy(dy), ox(ox), oy(oy) { }
e865c9
  inline Kernel(int s, int d, int o):
e865c9
    sx(s), sy(s), dx(d), dy(d), ox(o), oy(o) { }
e865c9
  inline operator bool() const
e865c9
    { return sx > 0 && sy > 0 && dx > 0 && dy > 0; }
e865c9
  
e865c9
  
e865c9
  void print(const char *prefix = nullptr) const {
e865c9
    if (prefix && *prefix) printf("%s: ", prefix);
e865c9
    printf("x(sdo): %d %d %d, y(sdo): %d %d %d\n", sx, dx, ox, sy, dy, oy);
e865c9
  }
e865c9
  void printYX(const char *prefix = nullptr) const {
e865c9
    if (prefix && *prefix) printf("%s: ", prefix);
e865c9
    printf("y(sdo): %d %d %d, x(sdo): %d %d %d\n", sy, dy, oy, sx, dx, ox);
e865c9
  }
e865c9
};
e865c9
e865c9
e865c9
template<typename iter=""></typename>
e865c9
void iterateTestConvolution(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
e865c9
  if (!cl) return;
e865c9
  assert(pl);
e865c9
  assert(k);
e865c9
  assert(c_neurons);
e865c9
  assert(p_neurons);
e865c9
  assert(weights);
e865c9
  assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
e865c9
  assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
e865c9
 
e865c9
  for(int cy = cl.y0; cy < cl.y1; ++cy)
e865c9
  for(int cx = cl.x0; cx < cl.x1; ++cx)
e865c9
  for(int cz = cl.z0; cz < cl.z1; ++cz) {
e865c9
    int ci = (cy*cl.sx + cx)*cl.sz + cz;
e865c9
    Neuron &cn = c_neurons[ci];
e865c9
    typename Iter::AccumType a = {};
e865c9
    Iter::init(cn, a);
e865c9
    
e865c9
    for(int ky = 0; ky < k.sy; ++ky)
e865c9
    for(int kx = 0; kx < k.sx; ++kx)
e865c9
    for(int pz = pl.z0; pz < pl.z1; ++pz) {
e865c9
      int wi = ((cy - cl.y0)*cl.getW() + cx - cl.x0)*cl.getD() + cz - cl.z0;
e865c9
      wi = ((wi*k.sy + ky)*k.sx + kx)*pl.getD() + pz - pl.z0;
e865c9
      Weight &w = weights[wi];
e865c9
e865c9
      int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx;
e865c9
      int py = pl.y0 + (cy - cl.y0)*k.dy + k.oy + ky;
e865c9
      int pi = (py*pl.sx + px)*pl.sz + pz;
e865c9
      Neuron &pn = p_neurons[pi];
e865c9
      
e865c9
      Iter::iter(pn, w, a);
e865c9
    }
e865c9
    
e865c9
    Iter::done(cn, a);
e865c9
  }
e865c9
}
e865c9
    
e865c9
e865c9
e865c9
e865c9
template<typename iter=""></typename>
e865c9
void iterateConvolution(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
e865c9
  if (!cl) return;
e865c9
  assert(pl);
e865c9
  assert(wl);
e865c9
  assert(k);
e865c9
  assert(c_neurons);
e865c9
  assert(p_neurons);
e865c9
  assert(weights);
e865c9
  assert(cl.isSubLayoutOf(wl));
e865c9
  assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
e865c9
  assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
e865c9
e865c9
  int c_h    = cl.getH();
e865c9
  int c_w    = cl.getW();
e865c9
  int c_d    = cl.getD();
e865c9
  int c_swz  = c_w*cl.sz;
e865c9
  int c_shxz = c_h*cl.sx*cl.sz;
e865c9
  int c_dx   = cl.sz - c_d;
e865c9
  int c_dy   = (cl.sx - c_w)*cl.sz;
e865c9
e865c9
  int p_d    = pl.getD();
e865c9
  int p_dx   = k.dx*pl.sz;
e865c9
  int p_dy   = k.dy*pl.sx*pl.sz - c_w*p_dx;
e865c9
e865c9
  int k_sxd  = k.sx*p_d;
e865c9
  int k_syxd = k.sy*k_sxd;
e865c9
  int p_ddy  = (pl.sx - k.sx)*pl.sz;
e865c9
  int p_ddx  = pl.sz - p_d;
e865c9
e865c9
  int w_w    = wl.getW();
e865c9
  int w_d    = wl.getD();
e865c9
  int w_dx   = (w_d - c_d)*k_syxd;
e865c9
  int w_dy   = (w_w - c_w)*w_d*k_syxd;
e865c9
e865c9
  int cx0    = cl.x0 - wl.x0;
e865c9
  int cy0    = cl.y0 - wl.y0;
e865c9
  int cz0    = cl.z0 - wl.z0;
e865c9
  
e865c9
  Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
e865c9
  Neuron *ipn = p_neurons + ((pl.y0 + cy0*k.dy + k.oy)*pl.sx + pl.x0 + cx0*k.dx + k.ox)*pl.sz + pl.z0;
e865c9
  Weight *iw = weights + ((cy0*w_w + cx0)*w_d + cz0)*k_syxd;
e865c9
e865c9
  for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy, iw += w_dy)
e865c9
  for(Neuron *e = icn +  c_swz; icn < e; icn += c_dx, ipn += p_dx, iw += w_dx)
e865c9
  for(Neuron *e = icn +    c_d; icn < e; ++icn) {
e865c9
    typename Iter::AccumType a;
e865c9
    Iter::init(*icn, a);
e865c9
e865c9
    Neuron *iipn = ipn;
e865c9
    for(Weight *e = iw + k_syxd; iw < e; iipn += p_ddy)
e865c9
    for(Weight *e = iw +  k_sxd; iw < e; iipn += p_ddx)
e865c9
    for(Weight *e = iw +    p_d; iw < e; ++iw, ++iipn)
e865c9
      Iter::iter(*iipn, *iw, a);
e865c9
e865c9
    Iter::done(*icn, a);
e865c9
  }
e865c9
}
e865c9
e865c9
template<typename iter=""></typename>
e865c9
void iterateConvolutionPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) {
e865c9
  if (!cl) return;
e865c9
  assert(pl);
e865c9
  assert(wl);
e865c9
  assert(k);
e865c9
  assert(c_neurons);
e865c9
  assert(p_neurons);
e865c9
  assert(weights);
e865c9
  assert(cl.isSubLayoutOf(wl));
e865c9
  assert(kx >= 0 && kx < k.sx);
e865c9
  assert(ky >= 0 && ky < k.sy);
e865c9
  assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx);
e865c9
  assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy);
e865c9
e865c9
  int c_h    = cl.getH();
e865c9
  int c_w    = cl.getW();
e865c9
  int c_d    = cl.getD();
e865c9
  int c_swz  = c_w*cl.sz;
e865c9
  int c_shxz = c_h*cl.sx*cl.sz;
e865c9
  int c_dx   = cl.sz - c_d;
e865c9
  int c_dy   = (cl.sx - c_w)*cl.sz;
e865c9
e865c9
  int p_d    = pl.getD();
e865c9
  int p_dx   = k.dx*pl.sz;
e865c9
  int p_dy   = k.dy*pl.sx*pl.sz - c_w*p_dx;
e865c9
e865c9
  int k_sxd  = k.sx*p_d;
e865c9
  int k_syxd = k.sy*k_sxd;
e865c9
e865c9
  int w_w    = wl.getW();
e865c9
  int w_d    = wl.getD();
e865c9
  int w_dz   = k_syxd - p_d;
e865c9
  int w_dx   = (w_d - c_d)*k_syxd;
e865c9
  int w_dy   = (w_w - c_w)*w_d*k_syxd;
e865c9
e865c9
  int cx0    = cl.x0 - wl.x0;
e865c9
  int cy0    = cl.y0 - wl.y0;
e865c9
  int cz0    = cl.z0 - wl.z0;
e865c9
  
e865c9
  Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0;
e865c9
  Neuron *ipn = p_neurons + ((pl.y0 + cy0*k.dy + k.oy + ky)*pl.sx + pl.x0 + cx0*k.dx + k.ox + kx)*pl.sz + pl.z0;
e865c9
  Weight *iw = weights + ((cy0*w_w + cx0)*w_d + cz0)*k_syxd + ky*k_sxd + kx*p_d;
e865c9
e865c9
  for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy, iw += w_dy)
e865c9
  for(Neuron *e = icn +  c_swz; icn < e; icn += c_dx, ipn += p_dx, iw += w_dx)
e865c9
  for(Neuron *e = icn +    c_d; icn < e; ++icn,       ipn -= p_d,  iw += w_dz)
e865c9
  for(Neuron *e = ipn +    p_d; ipn < e; ++ipn, ++iw)
e865c9
    Iter::iter2(*icn, *ipn, *iw);
e865c9
}
e865c9
e865c9
e865c9
b579b3
bool saveConvDemoImage(const char *filename, int count, int ksx, int ksy, int ksz, const Weight *weights) {
b579b3
  int cols = count;
b579b3
  int rows = ksz + 1;
b579b3
  int w = 1 + cols*(ksx + 1);
b579b3
  int h = 10 + rows*(ksy + 1);
b579b3
  std::vector<unsigned char=""> pixels(w*h*3, 0);</unsigned>
b579b3
b579b3
  WeightReal range = 0;
b579b3
  for(const Weight *iw = weights, *e = iw + count*ksx*ksy*ksz; iw < e; ++iw) {
b579b3
    WeightReal r = fabs(iw->w);
b579b3
    if (range < r) range = r;
b579b3
  }
b579b3
  
b579b3
  const unsigned char white[] = { 255, 255, 255 };
b579b3
  imgPrintf(pixels.data(), w, h, 3, 1, 1, white, "%f", range);
b579b3
  
b579b3
  // rgb row
b579b3
  
b579b3
  for(int i = 0; i < count; ++i)
b579b3
  for(int ky = 0; ky < ksy; ++ky)
b579b3
  for(int kx = 0; kx < ksx; ++kx) {
b579b3
    int y0 = 10;
b579b3
    int x0 = i*(ksx + 1) + 1;
b579b3
    unsigned char *p = &pixels[ ((y0 + ky)*w + x0 + kx)*3 ];
b579b3
    
b579b3
    for(int kz = 0; kz < 3; ++kz) {
b579b3
      if (kz < ksz) {
b579b3
        WeightReal x = weights[ ((i*ksy + ky)*ksx + kx)*3 + kz ].w;
b579b3
        x /= range;
b579b3
        x = (x + 0.5)*256;
b579b3
        unsigned char c = x < 0 ? 0 : x > 255 ? 255 : (unsigned char)x;
b579b3
        p[kz] = c;
b579b3
      } else {
b579b3
        p[kz] = 0;
b579b3
      }
b579b3
    }
b579b3
  }
b579b3
b579b3
  // gray rows
b579b3
  
b579b3
  for(int i = 0; i < count; ++i)
b579b3
  for(int kz = 0; kz < ksz; ++kz)
b579b3
  for(int ky = 0; ky < ksy; ++ky)
b579b3
  for(int kx = 0; kx < ksx; ++kx) {
b579b3
    WeightReal x = weights[ ((i*ksy + ky)*ksx + kx)*ksz + kz ].w;
b579b3
    x /= range;
b579b3
    x = (x + 0.5)*256;
b579b3
    unsigned char c = x < 0 ? 0 : x > 255 ? 255 : (unsigned char)x;
b579b3
b579b3
    int y0 = (kz + 1)*(ksy + 1) + 10;
b579b3
    int x0 = i*(ksx + 1) + 1;
b579b3
    unsigned char *p = &pixels[ ((y0 + ky)*w + x0 + kx)*3 ];
b579b3
    p[0] = p[1] = p[2] = c;
b579b3
    
b579b3
    //if (c ==   0) p[0] = p[1] = 0; // blue un underflow
b579b3
    //if (c == 255) p[1] = p[2] = 0; // red on overflow
b579b3
  }
b579b3
  
b579b3
  std::string fn(filename);
b579b3
  fn += ".tga";
b579b3
  return tgaSave(fn.c_str(), pixels.data(), w, h, 3);
b579b3
}
b579b3
b579b3
b579b3
e865c9
template<func func=""></func>
e865c9
class LayerConv: public Layer {
e865c9
public:
e865c9
  Kernel kernel;
e865c9
e865c9
  LayerConv(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
e865c9
    Layer(&prev, layout, layout.getActiveCount()*kernel.sx*kernel.sy*prev.back().layout.getD(), weights),
e865c9
    kernel(kernel)
e865c9
  {
e865c9
    assert(kernel);
e865c9
    if (ownWeights) fillWeights(-1, 1);
e865c9
  }
e865c9
e865c9
  
e865c9
  void pass(Barrier &barrier) override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron&, AccumType &a) { a.v = 0; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
e865c9
      static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
e865c9
    };
e865c9
    iterateConvolution(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights);
e865c9
  }
e865c9
  
e865c9
e865c9
  void backpassWeights(Barrier &barrier) override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.v * a.v; }
e865c9
    };
e865c9
    iterateConvolution(mtLayouts[barrier.tid], prev->layout, layout, kernel, neurons, prev->neurons, weights);
e865c9
  }
e865c9
  
e865c9
  
e865c9
  void backpassDeltas(Barrier &barrier) override {
e865c9
    struct I: public Iter {
e865c9
      static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.d * w.w; }
e865c9
      static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }
e865c9
    };
e865c9
    int ksx = kernel.sx, ksy = kernel.sy;
e865c9
    for(int kx = 0; kx < ksx; ++kx)
e865c9
    for(int ky = 0; ky < ksy; ++ky) {
e865c9
      iterateConvolutionPoint(mtLayouts[barrier.tid], prev->layout, layout, kernel, kx, ky, neurons, prev->neurons, weights);
e865c9
      barrier.wait();
e865c9
    }
036a8f
    iterateNeurons(mtPrevLayouts[barrier.tid], prev->neurons);
e865c9
  }
e865c9
  
e865c9
  
e865c9
  void testPass() override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron&, AccumType &a) { a.v = 0; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.v * w.w; }
e865c9
      static inline void done(Neuron &n, AccumType &a) { func(n, a.v); }
e865c9
    };
e865c9
    iterateTestConvolution(layout, prev->layout, kernel, neurons, prev->neurons, weights);
e865c9
  }
e865c9
e865c9
    
e865c9
  void testBackpass() override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron &n, AccumType &a) { a.v = n.d; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; w.w += a.v * n.v; }
e865c9
      static inline void iter3(Neuron &n) { n.d *= n.a.v; n.a.v = 0; }
e865c9
    };
e865c9
    clearAccum();
e865c9
    iterateTestConvolution(layout, prev->layout, kernel, neurons, prev->neurons, weights);
e865c9
    iterateNeurons(prev->layout, prev->neurons);
e865c9
    clearAccum();
e865c9
  }
e865c9
};
e865c9
e865c9
e865c9
e865c9
template<func func=""></func>
e865c9
class LayerDeconv: public Layer {
e865c9
public:
e865c9
  Kernel kernel;
e865c9
e865c9
  LayerDeconv(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr):
e865c9
    Layer(&prev, layout, prev.back().layout.getActiveCount()*kernel.sx*kernel.sy*layout.getD(), weights),
e865c9
    kernel(kernel)
e865c9
  {
e865c9
    assert(kernel);
e865c9
    if (ownWeights) fillWeights(-1, 1);
e865c9
  }
e865c9
  
e865c9
e865c9
  void pass(Barrier &barrier) override {
e865c9
    struct I: public Iter {
e865c9
      static inline void iter2(Neuron &cn, Neuron &pn, Weight &w) { pn.a.v += cn.v * w.w; }
e865c9
      static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }
e865c9
    };
e865c9
    int k_sx = kernel.sx, k_sy = kernel.sy;
e865c9
    for(int kx = 0; kx < k_sx; ++kx)
e865c9
    for(int ky = 0; ky < k_sy; ++ky) {
036a8f
      iterateConvolutionPoint(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, kx, ky, prev->neurons, neurons, weights);
e865c9
      barrier.wait();
e865c9
    }
e865c9
    iterateNeurons(mtLayouts[barrier.tid], neurons);
e865c9
  }
e865c9
  
e865c9
  
e865c9
  void backpassWeights(Barrier &barrier) override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { w.w += n.d * a.v; }
e865c9
    };
8e5348
    iterateConvolution(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights);
e865c9
  }
e865c9
  
e865c9
  
e865c9
  void backpassDeltas(Barrier &barrier) override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron&, AccumType &a) { a.v = 0; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; }
e865c9
      static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
e865c9
    };
8e5348
    iterateConvolution(mtPrevLayouts[barrier.tid], layout, prev->layout, kernel, prev->neurons, neurons, weights);
e865c9
  }
e865c9
e865c9
  
e865c9
  void testPass() override {
e865c9
    struct I: public Iter {
e865c9
      static inline void init(Neuron &n, AccumType &a) { a.v = n.v; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { n.a.v += a.v * w.w; }
e865c9
      static inline void iter3(Neuron &n) { func(n, n.a.v); n.a.v = 0; }
e865c9
    };
e865c9
    clearAccum();
e865c9
    iterateTestConvolution(prev->layout, layout, kernel, prev->neurons, neurons, weights);
e865c9
    iterateNeurons(layout, neurons);
e865c9
    clearAccum();
e865c9
  }
e865c9
  
e865c9
  
e865c9
  void testBackpass() override {
e865c9
    struct I: public Iter {
e865c9
      struct AccumType: public Accum { NeuronReal vv; };
e865c9
      static inline void init(Neuron &n, AccumType &a) { a.v = 0; a.vv = n.v; }
e865c9
      static inline void iter(Neuron &n, Weight &w, AccumType &a) { a.v += n.d * w.w; w.w += n.d * a.vv; }
e865c9
      static inline void done(Neuron &n, AccumType &a) { n.d *= a.v; }
e865c9
    };
e865c9
    iterateTestConvolution(prev->layout, layout, kernel, prev->neurons, neurons, weights);
e865c9
  }
e865c9
};
e865c9
e865c9
#endif