Blame projects/neural/segment.conv.inc.cpp

Ivan Mahonin e4740d
#ifndef SEGMENT_CONV_INC_CPP
Ivan Mahonin e4740d
#define SEGMENT_CONV_INC_CPP
Ivan Mahonin e4740d
Ivan Mahonin e4740d
Ivan Mahonin e4740d
#include "segment.inc.cpp"
Ivan Mahonin e4740d
#include "func.inc.cpp"
Ivan Mahonin e4740d
#include "layer.conv.inc.cpp"
Ivan Mahonin e4740d
Ivan Mahonin e4740d
Ivan Mahonin e4740d
class SegmentConv: public Segment {
Ivan Mahonin e4740d
public:
Ivan Mahonin e4740d
  enum {
Ivan Mahonin e4740d
    KSX = 4,
Ivan Mahonin e4740d
    KSY = 4,
Ivan Mahonin e4740d
  };
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  const int msx, msy, msz;
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  NeuronReal *m_values;
Ivan Mahonin e4740d
  NeuronReal *b_values;
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  SegmentConv(int sx, int sy, int sz, int msz, Weight *weights = nullptr):
Ivan Mahonin e4740d
    Segment(sx, sy, sz, msz*KSY*KSX*sz, weights), msx((sx - KSX)/2 + 1), msy((sy - KSY)/2 + 1), msz(msz)
Ivan Mahonin e4740d
  {
Ivan Mahonin e4740d
    assert(msx > 0);
Ivan Mahonin e4740d
    assert(msy > 0);
Ivan Mahonin e4740d
    assert(msz > 0);
Ivan Mahonin e4740d
    m_values = new NeuronReal[msx*msy*msz + sx*sy*sz];
Ivan Mahonin e4740d
    b_values = m_values + msx*msy*msz;
Ivan Mahonin e4740d
    clear();
Ivan Mahonin e4740d
  }
Ivan Mahonin e4740d
  ~SegmentConv()
Ivan Mahonin e4740d
    { delete[] m_values; }  
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  void clear() override
Ivan Mahonin e4740d
    { memset(m_values, 0, sizeof(*m_values)*(msx*msy*msz + sx*sy*sz)); }
Ivan Mahonin e4740d
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
  inline void check(int x, int y, int z) {
Ivan Mahonin e4740d
    Segment::check(x, y, z);
Ivan Mahonin e4740d
    assert(layout.getD() == sz);
Ivan Mahonin e4740d
  }
Ivan Mahonin e4740d
Ivan Mahonin e4740d
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  Quality pass(Barrier &barrier, int x, int y, int z, NeuronReal trainRatio) override {
Ivan Mahonin e4740d
    check(x, y, z);
Ivan Mahonin e4740d
    Layout l = layout;
Ivan Mahonin e4740d
    NeuronReal *f_values = this->f_values + (y*l.sx + x)*l.sz + z;
Ivan Mahonin e4740d
  }
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  __attribute__((always_inline))
Ivan Mahonin e4740d
  inline Quality pass(Barrier &barrier, NeuronReal *f_values, NeuronReal trainRatio) {
Ivan Mahonin e4740d
    Layout l = layout;
Ivan Mahonin e4740d
    int tid = barrier.tid;
Ivan Mahonin e4740d
    int threads = barrier.threads;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    int sx = this->sx;
Ivan Mahonin e4740d
    //int sy = this->sy;
Ivan Mahonin e4740d
    int sz = this->sz;
Ivan Mahonin e4740d
    int msx = this->msx;
Ivan Mahonin e4740d
    int msy = this->msy;
Ivan Mahonin e4740d
    int msz = this->msz;
Ivan Mahonin e4740d
    int msxz = msx*msz;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    int ksxz = KSX*sz;
Ivan Mahonin e4740d
    int ksyxz = KSY*ksxz;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    int fv_dkx = l.sz - sz;
Ivan Mahonin e4740d
    int fv_dky = (l.sx - KSX)*l.sz;
Ivan Mahonin e4740d
    int fv_dmx = 2*l.sz;
Ivan Mahonin e4740d
    int fv_dmy = 2*(l.sx - msx)*l.sz;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    int mn_dtz = threads - msx*msy*msz;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 1: pass from front to mid
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    int f_sxz = l.sx*l.sz;
Ivan Mahonin e4740d
    int f_sz2 = l.sz*2;
Ivan Mahonin e4740d
    int f_sxz2 = l.sx*f_sz2;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    int m_cnt = msx*msy*msz;
Ivan Mahonin e4740d
    int mi0 = m_cnt*tid/threads;
Ivan Mahonin e4740d
    int mi1 = m_cnt*(tid+1)/threads;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    for(int i = mi0; i < mi1; ++i) {
Ivan Mahonin e4740d
      int my = i/msxz;
Ivan Mahonin e4740d
      int mx = i%msxz/msz;
Ivan Mahonin e4740d
      int mz = i%msz;
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      AccumReal a = 0;
Ivan Mahonin e4740d
      int wi = i*ksyxz;
Ivan Mahonin e4740d
      int fvi = my*f_sxz2 + mx*f_sz2 + mz;
Ivan Mahonin e4740d
      for(int ky = 0; ky < KSY; ++ky, fvi += f_sxz, wi += ksxz) {
Ivan Mahonin e4740d
        Weight *iw = &weights[wi];
Ivan Mahonin e4740d
        NeuronReal *ifv = &f_values[fvi];
Ivan Mahonin e4740d
        for(int i = 0; i < ksxz; ++i)
Ivan Mahonin e4740d
          a += ifv[i]*iw[i].w;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      m_values[i] = a > 0 ? a : 0;
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    barrier.wait();
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 2: pass from mid to back and verify
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    AccumReal qa = 0;
Ivan Mahonin e4740d
    for(int by = 2 + tid; by < 10; by += threads)
Ivan Mahonin e4740d
    for(int bx = 2; bx < 10; ++bx)
Ivan Mahonin e4740d
    for(int bz = 0; bz < sz; ++bz) {
Ivan Mahonin e4740d
      AccumReal a = 0;
Ivan Mahonin e4740d
      Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      for(int ky = by%2; ky < KSY; ky += 2)
Ivan Mahonin e4740d
      for(int kx = bx%2; kx < KSX; kx += 2) {
Ivan Mahonin e4740d
        int mx = (bx - kx)/2;
Ivan Mahonin e4740d
        int my = (by - ky)/2;
Ivan Mahonin e4740d
        assert(mx >= 0 && mx < msx && (bx - kx)%2 == 0);
Ivan Mahonin e4740d
        assert(my >= 0 && my < msy && (by - ky)%2 == 0);
Ivan Mahonin e4740d
        for(int mz = 0; mz < msz; ++mz) {
Ivan Mahonin e4740d
          Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
Ivan Mahonin e4740d
          Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ];
Ivan Mahonin e4740d
          a += mn.v * w.w;
Ivan Mahonin e4740d
        }
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      if (a > 0) bn.v = a, bn.d = 1; else bn.v = bn.d = 0;
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      NeuronReal fn = f_values[ (by*l.sx + bx)*l.sz + bz ];
Ivan Mahonin e4740d
      NeuronReal d = fn - bn.v;
Ivan Mahonin e4740d
      bn.d *= d*trainRatio;
Ivan Mahonin e4740d
      qa += d*d;
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    Quality q(qa/(64*sz));
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    if (trainRatio <= 0) return q;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    barrier.wait();
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 3: backpass deltas
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    for(int mz = tid; mz < msz; mz += threads)
Ivan Mahonin e4740d
    for(int my = 1; my < 4; ++my)
Ivan Mahonin e4740d
    for(int mx = 1; mx < 4; ++mx) {
Ivan Mahonin e4740d
      AccumReal a = 0;
Ivan Mahonin e4740d
      Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      for(int ky = 0; ky < ksy; ++ky)
Ivan Mahonin e4740d
      for(int kx = 0; kx < ksx; ++kx)
Ivan Mahonin e4740d
      for(int kz = 0; kz < sz;  ++kz) {
Ivan Mahonin e4740d
        int bx = mx*2 + kx;
Ivan Mahonin e4740d
        int by = my*2 + ky;
Ivan Mahonin e4740d
        Neuron &bn = b_neurons[ (by*sx + bx)*sz + kz ];
Ivan Mahonin e4740d
        Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ];
Ivan Mahonin e4740d
        a += bn.d * w.w;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
      mn.d *= a;
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    barrier.wait();
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 4: update weights
Ivan Mahonin e4740d
Ivan Mahonin e4740d
    for(int mz = tid; mz < msz; mz += threads)
Ivan Mahonin e4740d
    for(int by = 4; by <  8; ++by)
Ivan Mahonin e4740d
    for(int bx = 4; bx <  8; ++bx)
Ivan Mahonin e4740d
    for(int bz = 0; bz < sz; ++bz) {
Ivan Mahonin e4740d
      Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ];
Ivan Mahonin e4740d
      NeuronReal fv = f_values[ (by*l.sx + bx)*l.sz + bz ];
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      for(int ky = by%2; ky < ksy; ky += 2)
Ivan Mahonin e4740d
      for(int kx = bx%2; kx < ksx; kx += 2) {
Ivan Mahonin e4740d
        int mx = (bx - kx)/2;
Ivan Mahonin e4740d
        int my = (by - ky)/2;
Ivan Mahonin e4740d
        assert(mx >= 1 && mx < 4 && (bx - kx)%2 == 0);
Ivan Mahonin e4740d
        assert(my >= 1 && my < 4 && (by - ky)%2 == 0);
Ivan Mahonin e4740d
        Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ];
Ivan Mahonin e4740d
        Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ];
Ivan Mahonin e4740d
        w.w += bn.d*mn.v + mn.d*fv;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    return q;
Ivan Mahonin e4740d
  }
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  Quality testPass(int x, int y, int z, NeuronReal trainRatio) override {
Ivan Mahonin e4740d
    check(x, y, z);
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    Layout l = layout;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 1: pass
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    clear();
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    for(int my = 0; my < msy; ++my)
Ivan Mahonin e4740d
    for(int mx = 0; mx < msx; ++mx)
Ivan Mahonin e4740d
    for(int mz = 0; mz < msz; ++mz) {
Ivan Mahonin e4740d
      AccumReal a = 0;
Ivan Mahonin e4740d
      for(int ky = 0; ky < KSY; ++ky)
Ivan Mahonin e4740d
      for(int kx = 0; kx < KSX; ++kx)
Ivan Mahonin e4740d
      for(int kz = 0; kz < sz;  ++kz) {
Ivan Mahonin e4740d
        int fx = x + mx*2 + kx;
Ivan Mahonin e4740d
        int fy = y + my*2 + ky;
Ivan Mahonin e4740d
        int fz = z + kz;
Ivan Mahonin e4740d
        NeuronReal fv = f_values[ (fy*l.sx + fx)*l.sz + fz ];
Ivan Mahonin e4740d
        Weight &w = weights[ ((mz*KSY + ky)*KSX + kx)*sz + kz ];
Ivan Mahonin e4740d
        a += fv * w.w;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      NeuronReal &mv = m_values[ (my*msx + mx)*msz + mz ];
Ivan Mahonin e4740d
      if (a < 0) { mv = 0; continue; }
Ivan Mahonin e4740d
      mv = a;
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      for(int ky = 0; ky < KSY; ++ky)
Ivan Mahonin e4740d
      for(int kx = 0; kx < KSX; ++kx)
Ivan Mahonin e4740d
      for(int kz = 0; kz < sz;  ++kz) {
Ivan Mahonin e4740d
        int bx = mx*2 + kx;
Ivan Mahonin e4740d
        int by = my*2 + ky;
Ivan Mahonin e4740d
        int bz = kz;
Ivan Mahonin e4740d
        NeuronReal &bv = b_values[ (by*sx + bx)*sz + bz ];
Ivan Mahonin e4740d
        Weight &w = weights[ ((mz*KSY + ky)*KSX + kx)*sz + kz ];
Ivan Mahonin e4740d
        bv += a * w.w;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 2: finalize values and verify
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    AccumReal qa = 0;
Ivan Mahonin e4740d
    for(int by = 0; by < sy; ++by)
Ivan Mahonin e4740d
    for(int bx = 0; bx < sx; ++bx)
Ivan Mahonin e4740d
    for(int bz = 0; bz < sz; ++bz) {
Ivan Mahonin e4740d
        NeuronReal fn = f_values[ ((y + by)*l.sx + x + bx)*l.sz + z + bz ];
Ivan Mahonin e4740d
        NeuronReal &bv = b_values[ (by*sx + bx)*sz + bz ];
Ivan Mahonin e4740d
        if (bv > 0) {
Ivan Mahonin e4740d
          NeuronReal d = fn - bv;
Ivan Mahonin e4740d
          bv = d*trainRatio;
Ivan Mahonin e4740d
          qa += d*d;
Ivan Mahonin e4740d
        } else {
Ivan Mahonin e4740d
          bv = 0;
Ivan Mahonin e4740d
          qa += fn*fn;
Ivan Mahonin e4740d
        }
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    Quality q(qa/(KSX*KSY*sz));
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    if (trainRatio <= 0) return q;
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    // stage 3: backpass deltas and update weights
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    for(int my = 0; my < msy; ++my)
Ivan Mahonin e4740d
    for(int mx = 0; mx < msx; ++mx)
Ivan Mahonin e4740d
    for(int mz = 0; mz < msz; ++mz) {
Ivan Mahonin e4740d
      NeuronReal mv = m_values[ (my*msx + mx)*msz + mz ];
Ivan Mahonin e4740d
      if (!mv) continue;
Ivan Mahonin e4740d
      
Ivan Mahonin e4740d
      AccumReal a = 0;
Ivan Mahonin e4740d
      for(int ky = 0; ky < KSY; ++ky)
Ivan Mahonin e4740d
      for(int kx = 0; kx < KSX; ++kx)
Ivan Mahonin e4740d
      for(int kz = 0; kz < sz;  ++kz) {
Ivan Mahonin e4740d
        int bx = mx*2 + kx;
Ivan Mahonin e4740d
        int by = my*2 + ky;
Ivan Mahonin e4740d
        int bz = kz;
Ivan Mahonin e4740d
        NeuronReal bv = b_values[ (by*sx + bx)*sz + bz ];
Ivan Mahonin e4740d
        Weight &w = weights[ ((mz*KSY + ky)*KSX + kx)*sz + kz ];
Ivan Mahonin e4740d
        a += bv * w.w;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
Ivan Mahonin e4740d
      for(int ky = 0; ky < KSY; ++ky)
Ivan Mahonin e4740d
      for(int kx = 0; kx < KSX; ++kx)
Ivan Mahonin e4740d
      for(int kz = 0; kz < sz;  ++kz) {
Ivan Mahonin e4740d
        int bx = mx*2 + kx;
Ivan Mahonin e4740d
        int by = my*2 + ky;
Ivan Mahonin e4740d
        int bz = kz;
Ivan Mahonin e4740d
        NeuronReal fv = f_values[ ((y + by)*l.sx + x + bx)*l.sz + z + bz ];
Ivan Mahonin e4740d
        NeuronReal bv = b_values[ (by*sx + bx)*sz + bz ];
Ivan Mahonin e4740d
        Weight &w = weights[ ((mz*KSY + ky)*KSX + kx)*sz + kz ];
Ivan Mahonin e4740d
        w.w += bv*mv + fv*a;
Ivan Mahonin e4740d
      }
Ivan Mahonin e4740d
    }
Ivan Mahonin e4740d
    
Ivan Mahonin e4740d
    return q;
Ivan Mahonin e4740d
  }
Ivan Mahonin e4740d
Ivan Mahonin e4740d
  
Ivan Mahonin e4740d
  bool saveDemo() override
Ivan Mahonin e4740d
    { return !filename || saveConvDemoImage(filename, msz, KSX, KSY, sz, weights); }
Ivan Mahonin e4740d
};
Ivan Mahonin e4740d
Ivan Mahonin e4740d
Ivan Mahonin e4740d
Ivan Mahonin e4740d
Ivan Mahonin e4740d
#endif
Ivan Mahonin e4740d
Ivan Mahonin e4740d