Blame simple/neural/nnlayer3.mt.inc.cpp

Ivan Mahonin 53488e
#ifndef NNLAYER3_MT_INC_CPP
Ivan Mahonin 53488e
#define NNLAYER3_MT_INC_CPP
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
#include "nnlayer3.inc.cpp"
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
#include <atomic>
Ivan Mahonin 53488e
#include <thread>
Ivan Mahonin 53488e
#include <vector>
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
class Barrier {
Ivan Mahonin 53488e
private:
Ivan Mahonin 53488e
  std::atomic<unsigned int> &counter;
Ivan Mahonin 53488e
  const unsigned int threads;
Ivan Mahonin 53488e
  unsigned int next;
Ivan Mahonin 53488e
public:
Ivan Mahonin 53488e
  inline Barrier(std::atomic<unsigned int> &counter, unsigned int threads): counter(counter), threads(threads), next() { }
Ivan Mahonin 53488e
  inline void wait() { next += threads; ++counter; while(counter < next); }
Ivan Mahonin 53488e
  inline void subwait(int tid) { while(counter < next + tid); }
Ivan Mahonin 53488e
};
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
class TrainMT {
Ivan Mahonin 53488e
private:
Ivan Mahonin 53488e
  struct LDesc {
Ivan Mahonin 53488e
    int y0, y1;
Ivan Mahonin 53488e
    double sumQ;
Ivan Mahonin 53488e
    LDesc(): y0(), y1(), sumQ() { }
Ivan Mahonin 53488e
  };
Ivan Mahonin 53488e
Ivan Mahonin 53488e
public:
Ivan Mahonin 53488e
  Layer *layer;
Ivan Mahonin 53488e
  const unsigned char *dataX;
Ivan Mahonin 53488e
  const unsigned char *dataY;
Ivan Mahonin 53488e
  int strideX;
Ivan Mahonin 53488e
  int strideY;
Ivan Mahonin 53488e
  int *shuffle;
Ivan Mahonin 53488e
  int count;
Ivan Mahonin 53488e
  Real trainRatio;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
  TrainMT():
Ivan Mahonin 53488e
    layer(),
Ivan Mahonin 53488e
    dataX(),
Ivan Mahonin 53488e
    dataY(),
Ivan Mahonin 53488e
    strideX(),
Ivan Mahonin 53488e
    strideY(),
Ivan Mahonin 53488e
    shuffle(),
Ivan Mahonin 53488e
    count(),
Ivan Mahonin 53488e
    trainRatio() { }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
private:
Ivan Mahonin 53488e
  void trainFunc(int tid, int threads, std::atomic<unsigned int> &barrierCounter, LDesc *ldescs) {
Ivan Mahonin 53488e
    Barrier barrier(barrierCounter, threads);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    Layer &fl = *layer;
Ivan Mahonin 53488e
    Layer &bl = layer->back();
Ivan Mahonin 53488e
    int layersCount = fl.totalLayers();
Ivan Mahonin 53488e
    LDesc *fld = ldescs, *bld = fld + layersCount - 1;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    Real trainRatio = this->trainRatio;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    int fsxz = fl.sx*fl.sz;
Ivan Mahonin 53488e
    int bsxz = bl.sx*bl.sz;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    //barrier.subwait(tid);
Ivan Mahonin 53488e
    //for(LDesc *ld = fld; ld <= bld; ++ld)
Ivan Mahonin 53488e
    //  printf("t%d %d %d %d\n", tid, (int)(ld-fld), ld->y0, ld->y1);
Ivan Mahonin 53488e
    //barrier.wait();
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    const unsigned char *dataX = this->dataX + fsxz*fld->y0;
Ivan Mahonin 53488e
    const unsigned char *dataY = this->dataY + bsxz*bld->y0;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    double sumQ = 0;
Ivan Mahonin 53488e
    for(int i = 0; i < count; ++i) {
Ivan Mahonin 53488e
      int ii = shuffle[i];
Ivan Mahonin 53488e
      const unsigned char *curX = dataX + strideX*ii;
Ivan Mahonin 53488e
      const unsigned char *curY = dataY + strideY*ii;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      barrier.wait();
Ivan Mahonin 53488e
      const unsigned char *px = curX;
Ivan Mahonin 53488e
      for(Neuron *in = fl.neurons + fsxz*fld->y0, *e = fl.neurons + fsxz*fld->y1; in < e; ++in, ++px)
Ivan Mahonin 53488e
        in->v = Real(*px)*Real(1/255.0);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      LDesc *ld = fld + 1;
Ivan Mahonin 53488e
      for(Layer *l = fl.next; l; l = l->next, ++ld) {
Ivan Mahonin 53488e
        barrier.wait();
Ivan Mahonin 53488e
        l->pass(ld->y0, ld->y1);
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      double q = 0;
Ivan Mahonin 53488e
      const unsigned char *py = curY;
Ivan Mahonin 53488e
      for(Neuron *in = bl.neurons + bsxz*bld->y0, *e = bl.neurons + bsxz*bld->y1; in < e; ++in, ++py) {
Ivan Mahonin 53488e
        Real v = (in->v - 0.25)*2;
Ivan Mahonin 53488e
        Real d = Real(*py)*Real(1/255.0) - v;
Ivan Mahonin 53488e
        in->d *= d * trainRatio;
Ivan Mahonin 53488e
        d *= d;
Ivan Mahonin 53488e
        q += d*d;
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
      sumQ += q;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      if (trainRatio > 0) {
Ivan Mahonin 53488e
        ld = bld;
Ivan Mahonin 53488e
        for(Layer *l = &bl; l->prev; l = l->prev, --ld) {
Ivan Mahonin 53488e
          if (!l->prev->prev) {
Ivan Mahonin 53488e
            barrier.wait();
Ivan Mahonin 53488e
            l->backpassWeights(ld->y0, ld->y1);
Ivan Mahonin 53488e
            break;
Ivan Mahonin 53488e
          } else
Ivan Mahonin 53488e
          if (l->next) {
Ivan Mahonin 53488e
            barrier.wait();
Ivan Mahonin 53488e
            l->backpassTpl<true>(ld->y0, ld->y1);
Ivan Mahonin 53488e
            //l->backpassTpl<false>(ld->y0, ld->y1);
Ivan Mahonin 53488e
            //barrier.wait();
Ivan Mahonin 53488e
            //l->next->backpassWeights(ld[1].y0, ld[1].y1);
Ivan Mahonin 53488e
          }
Ivan Mahonin 53488e
        }
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      //if (!tid) printf(" - %d, %f, %f\n", i, q, sumQ);
Ivan Mahonin 53488e
    }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    ldescs->sumQ = sumQ;
Ivan Mahonin 53488e
  }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
public:
Ivan Mahonin 53488e
  double train(int threads) {
Ivan Mahonin 53488e
    assert(threads > 0);
Ivan Mahonin 53488e
    assert(layer && !layer->prev);
Ivan Mahonin 53488e
    assert(dataX && dataY && shuffle);
Ivan Mahonin 53488e
    assert(count > 0);
Ivan Mahonin 53488e
    assert(trainRatio >= 0);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    int layersCount = layer->totalLayers();
Ivan Mahonin 53488e
    assert(layersCount > 0);
Ivan Mahonin 53488e
    std::vector<LDesc> ldescs( threads*layersCount );
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    int layerId = 0;
Ivan Mahonin 53488e
    for(Layer *l = layer; l; l = l->next, ++layerId) {
Ivan Mahonin 53488e
      assert(layerId < layersCount);
Ivan Mahonin 53488e
      int tsy = l->sy/threads;
Ivan Mahonin 53488e
      for(int tid = 0; tid < threads; ++tid) {
Ivan Mahonin 53488e
        LDesc &desc = ldescs[tid*layersCount + layerId];
Ivan Mahonin 53488e
        desc.y0 = tid*tsy;
Ivan Mahonin 53488e
        desc.y1 = desc.y0 + tsy;
Ivan Mahonin 53488e
        if (tid == threads-1) desc.y1 = l->sy;
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
    }
Ivan Mahonin 53488e
    assert(layerId == layersCount);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    std::atomic<unsigned int> barrierCounter(0);
Ivan Mahonin 53488e
    std::vector<std::thread*> t(threads - 1);
Ivan Mahonin 53488e
    for(int i = 1; i < threads; ++i)
Ivan Mahonin 53488e
      t[i-1] = new std::thread(&TrainMT::trainFunc, this, i, threads, std::ref(barrierCounter), &ldescs[i*layersCount]);
Ivan Mahonin 53488e
    trainFunc(0, threads, barrierCounter, &ldescs[0]);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    double result = ldescs[0].sumQ;
Ivan Mahonin 53488e
    for(int i = 1; i < threads; ++i)
Ivan Mahonin 53488e
      { t[i-1]->join(); delete t[i-1]; result += ldescs[i*layersCount].sumQ; }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    return sqrt(sqrt( result/(count * layer->back().countNeurons()) ));
Ivan Mahonin 53488e
  }
Ivan Mahonin 53488e
};
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
#endif