Blame simple/neural/nnlayer2.mt.inc.cpp

Ivan Mahonin 53488e
#ifndef NNLAYER2_MT_INC_CPP
Ivan Mahonin 53488e
#define NNLAYER2_MT_INC_CPP
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
#include "nnlayer2.inc.cpp"
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
#include <atomic>
Ivan Mahonin 53488e
#include <thread>
Ivan Mahonin 53488e
#include <vector>
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
class Barrier {
Ivan Mahonin 53488e
private:
Ivan Mahonin 53488e
  std::atomic<unsigned int> &counter;
Ivan Mahonin 53488e
  const unsigned int threads;
Ivan Mahonin 53488e
  unsigned int next;
Ivan Mahonin 53488e
public:
Ivan Mahonin 53488e
  inline Barrier(std::atomic<unsigned int> &counter, unsigned int threads): counter(counter), threads(threads), next() { }
Ivan Mahonin 53488e
  inline void wait() { next += threads; ++counter; while(counter < next); }
Ivan Mahonin 53488e
};
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
class TrainMT {
Ivan Mahonin 53488e
private:
Ivan Mahonin 53488e
  struct LDesc {
Ivan Mahonin 53488e
    int nb, ne, lb, le;
Ivan Mahonin 53488e
    double sumQ;
Ivan Mahonin 53488e
    LDesc(): nb(), ne(), lb(), le(), sumQ() { }
Ivan Mahonin 53488e
  };
Ivan Mahonin 53488e
Ivan Mahonin 53488e
public:
Ivan Mahonin 53488e
  Layer *layer;
Ivan Mahonin 53488e
  const unsigned char *dataX;
Ivan Mahonin 53488e
  const unsigned char *dataY;
Ivan Mahonin 53488e
  int strideX;
Ivan Mahonin 53488e
  int strideY;
Ivan Mahonin 53488e
  int *shuffle;
Ivan Mahonin 53488e
  int count;
Ivan Mahonin 53488e
  Real trainRatio;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
  TrainMT():
Ivan Mahonin 53488e
    layer(),
Ivan Mahonin 53488e
    dataX(),
Ivan Mahonin 53488e
    dataY(),
Ivan Mahonin 53488e
    strideX(),
Ivan Mahonin 53488e
    strideY(),
Ivan Mahonin 53488e
    shuffle(),
Ivan Mahonin 53488e
    count(),
Ivan Mahonin 53488e
    trainRatio() { }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
private:
Ivan Mahonin 53488e
  void trainFunc(int tid, int threads, std::atomic<unsigned int> &barrierCounter, LDesc *ldescs) {
Ivan Mahonin 53488e
    Barrier barrier(barrierCounter, threads);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    Layer &fl = *layer;
Ivan Mahonin 53488e
    Layer &bl = layer->back();
Ivan Mahonin 53488e
    int layersCount = fl.totalLayers();
Ivan Mahonin 53488e
    LDesc *fld = ldescs, *bld = fld + layersCount - 1;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    Real trainRatio = this->trainRatio;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    double sumQ = 0;
Ivan Mahonin 53488e
    for(int i = 0; i < count; ++i) {
Ivan Mahonin 53488e
      int ii = shuffle[i];
Ivan Mahonin 53488e
      const unsigned char *curX = dataX + strideX*ii;
Ivan Mahonin 53488e
      const unsigned char *curY = dataY + strideY*ii;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      const unsigned char *px = curX;
Ivan Mahonin 53488e
      for(Neuron *in = fl.neurons + fld->nb, *e = fl.neurons + fld->ne; in < e; ++in, ++px)
Ivan Mahonin 53488e
        in->v = Real(*px)*Real(1/255.0);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      LDesc *ld = fld + 1;
Ivan Mahonin 53488e
      for(Layer *l = fl.next; l; l = l->next, ++ld) {
Ivan Mahonin 53488e
        barrier.wait();
Ivan Mahonin 53488e
        l->pass(ld->nb, ld->ne);
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      double q = 0;
Ivan Mahonin 53488e
      const unsigned char *py = curY;
Ivan Mahonin 53488e
      for(Neuron *in = bl.neurons + bld->nb, *e = bl.neurons + bld->ne; in < e; ++in, ++py) {
Ivan Mahonin 53488e
        Real d = Real(*py)*Real(1/255.0) - in->v;
Ivan Mahonin 53488e
        in->d *= d * trainRatio;
Ivan Mahonin 53488e
        q += d*d;
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
      sumQ += q;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      if (trainRatio > 0) {
Ivan Mahonin 53488e
        ld = bld - 1;
Ivan Mahonin 53488e
        for(Layer *l = bl.prev; l; l = l->prev, --ld) {
Ivan Mahonin 53488e
          barrier.wait();
Ivan Mahonin 53488e
          l->backpass(ld->lb, ld->le);
Ivan Mahonin 53488e
        }
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      //if (!tid) printf(" - %d, %f, %f\n", i, q, sumQ);
Ivan Mahonin 53488e
    }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    ldescs->sumQ = sumQ;
Ivan Mahonin 53488e
  }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
public:
Ivan Mahonin 53488e
  double train(int threads) {
Ivan Mahonin 53488e
    assert(threads > 0);
Ivan Mahonin 53488e
    assert(layer && !layer->prev);
Ivan Mahonin 53488e
    assert(dataX && dataY && shuffle);
Ivan Mahonin 53488e
    assert(count > 0);
Ivan Mahonin 53488e
    assert(trainRatio >= 0);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    int layersCount = layer->totalLayers();
Ivan Mahonin 53488e
    assert(layersCount > 0);
Ivan Mahonin 53488e
    std::vector<LDesc> ldescs( threads*layersCount );
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    int layerId = 0;
Ivan Mahonin 53488e
    for(Layer *l = layer; l; l = l->next, ++layerId) {
Ivan Mahonin 53488e
      assert(layerId < layersCount);
Ivan Mahonin 53488e
      int tsize = l->size/threads;
Ivan Mahonin 53488e
      for(int tid = 0; tid < threads; ++tid) {
Ivan Mahonin 53488e
        LDesc &desc = ldescs[tid*layersCount + layerId];
Ivan Mahonin 53488e
        desc.nb = tid*tsize;
Ivan Mahonin 53488e
        desc.ne = desc.nb + tsize;
Ivan Mahonin 53488e
        if (tid == threads-1) desc.ne = l->size;
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
      if (int lsize = l->size*l->lsize) {
Ivan Mahonin 53488e
        int tlsize = lsize/threads;
Ivan Mahonin 53488e
        int ipn = l->links[ l->lfirst ].nprev;
Ivan Mahonin 53488e
        int tid = 0;
Ivan Mahonin 53488e
        int count = 0;
Ivan Mahonin 53488e
Ivan Mahonin 53488e
        ldescs[tid*layersCount + layerId].lb = l->lfirst;
Ivan Mahonin 53488e
        if (threads > 1) {
Ivan Mahonin 53488e
          for(int il = l->lfirst; il != lsize; il = l->links[il].lnext, ++count) {
Ivan Mahonin 53488e
            Link &link = l->links[il];
Ivan Mahonin 53488e
            if (ipn != link.nprev) {
Ivan Mahonin 53488e
              if (count >= tlsize) {
Ivan Mahonin 53488e
                ldescs[tid*layersCount + layerId].le = il;
Ivan Mahonin 53488e
                ++tid;
Ivan Mahonin 53488e
                count -= tlsize;
Ivan Mahonin 53488e
                ldescs[tid*layersCount + layerId].lb = il;
Ivan Mahonin 53488e
                if (tid == threads - 1) break;
Ivan Mahonin 53488e
              }
Ivan Mahonin 53488e
              ipn = link.nprev;
Ivan Mahonin 53488e
            }
Ivan Mahonin 53488e
          }
Ivan Mahonin 53488e
        }
Ivan Mahonin 53488e
        ldescs[tid*layersCount + layerId].le = lsize;
Ivan Mahonin 53488e
      }
Ivan Mahonin 53488e
    }
Ivan Mahonin 53488e
    assert(layerId == layersCount);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    std::atomic<unsigned int> barrierCounter(0);
Ivan Mahonin 53488e
    std::vector<std::thread*> t(threads - 1);
Ivan Mahonin 53488e
    for(int i = 1; i < threads; ++i)
Ivan Mahonin 53488e
      t[i-1] = new std::thread(&TrainMT::trainFunc, this, i, threads, std::ref(barrierCounter), &ldescs[i*layersCount]);
Ivan Mahonin 53488e
    trainFunc(0, threads, barrierCounter, &ldescs[0]);
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    double result = ldescs[0].sumQ;
Ivan Mahonin 53488e
    for(int i = 1; i < threads; ++i)
Ivan Mahonin 53488e
      { t[i-1]->join(); delete t[i-1]; result += ldescs[i*layersCount].sumQ; }
Ivan Mahonin 53488e
Ivan Mahonin 53488e
    return result/(count * layer->back().size);
Ivan Mahonin 53488e
  }
Ivan Mahonin 53488e
};
Ivan Mahonin 53488e
Ivan Mahonin 53488e
Ivan Mahonin 53488e
#endif