Blob Blame Raw
#ifndef TRAIN_CX4_INC_CPP
#define TRAIN_CX4_INC_CPP


#include "train.segment.inc.cpp"
#include "segment.cx4.inc.cpp"
#include "layer.inc.cpp"


class TrainerCx4: public TrainerSegment {
protected:
  FILE *f;
  std::vector<unsigned char> data;
  std::vector<NeuronReal> values;
  std::vector<NeuronReal> valuesMeasure;
  std::vector<unsigned char> tmpdata;
  std::vector<int> shuffle;
  
  Layout trainLayout;
  Layout measureLayout;
  
  size_t imageSize;
  size_t preparedImageSize;
  int imagesInFile;
  int imagesInMemory;
  
  volatile unsigned int seed;

public:
  Layer *layerFull;
  Layer *layerPre;
  int loadImagesCount;
  int blocksPerLoading;
  
  const char *infile;
  const char *cachefile;
  const char *outfile;
  
  TrainerCx4():
    f(),
    imageSize(),
    preparedImageSize(),
    imagesInFile(),
    imagesInMemory(),
    seed(),
    layerFull(),
    layerPre(),
    loadImagesCount(),
    blocksPerLoading(1),
    infile(),
    cachefile(),
    outfile() { }

protected:
  void preprocess(unsigned char *src, NeuronReal *dst) {
    struct IL: public Iter {
      typedef const unsigned char* DataType;
      static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; }
    };
    struct IS: public Iter {
      typedef NeuronReal* DataType;
      static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v; }
    };
    
    Layer &fl = *layerFull;
    Layer &bl = *layerPre;

    iterateNeurons2<IL>(fl.layout, fl.layout, fl.neurons, src);
    fl.passFull(&bl, threadsCount);
    iterateNeurons2<IS>(bl.layout, bl.layout, bl.neurons, dst);
  }
  
  
  bool loadImage(int fromIndex, int toIndex) {
    unsigned char *src = data.data();
    if (!layerPre) src += toIndex*imageSize;
    
    fseeko64(f, fromIndex*imageSize, SEEK_SET);
    if (!fread(src, imageSize, 1, f))
      return fclose(f), f = nullptr, false;
    
    if (layerPre) preprocess(src, values.data() + toIndex*preparedImageSize);
    
    return true;
  }
  
  
  bool loadImages() {
    for(int i = 0; i < imagesInMemory; ++i) {
      int j = rand()%imagesInFile;
      if (i != j) std::swap(shuffle[i], shuffle[j]);
    }
    
    typedef std::pair<int, int> Pair;
    typedef std::set<Pair> Set;
    Set set;
    for(int i = 0; i < imagesInMemory; ++i)
      set.insert(Pair(shuffle[i], i));
    for(Set::iterator i = set.begin(); i != set.end(); ++i)
      loadImage(i->first, i->second);
    
    return true;
  }
  
  
  void prepareMeasure() {
    if (measuresPerBlock <= 0) return;
    int sy = segment->sy;
    int sx = segment->sx;
    int sz = segment->sz;
    int sxz = sx*sz;
    int w = (layerPre ? layerPre : layerFull)->layout.getW();
    int h = (layerPre ? layerPre : layerFull)->layout.getH();
    int rowstride = w*sz;
    NeuronReal *dst = valuesMeasure.data();
    for(int i = 0; i < measuresPerBlock; ++i) {
      int index = rand()%imagesInMemory;
      int x = rand()%(w - sx + 1);
      int y = rand()%(h - sy + 1);
      if (layerPre) {
        const NeuronReal *src = values.data() + index*preparedImageSize + y*rowstride + x*sz;
        for(int j = 0; j < sy; ++j, src += rowstride, dst += sxz)
          memcpy(dst, src, sxz*sizeof(*dst));
      } else {
        const unsigned char *src = data.data() + index*preparedImageSize + y*rowstride + x*sz;
        for(int j = 0; j < sy; ++j, src += rowstride - sxz)
        for(int k = 0; k < sxz; ++k, ++src, ++dst)
          *dst = *src/(NeuronReal)255;
      }
    }
  }
  
  
  bool prepare() override {
    assert(infile);
    assert(layerFull);
    assert(loadImagesCount > 0);

    Layer &fl = layerFull->front();
    Layer &bl = layerFull->back();
    
    imageSize = fl.layout.getActiveCount();
    f = fopen(infile, "rb");
    if (!f) return false;
    fseeko64(f, 0, SEEK_END);
    imagesInFile = ftello64(f)/imageSize;
    if (imagesInFile < 1) return fclose(f), f = nullptr, false;
    imagesInMemory = loadImagesCount > imagesInFile ? imagesInFile : loadImagesCount;
    
    for(Layer *l = layerFull; l; l = l->next)
      l->split(threadsCount);
    
    Layout l = layerPre ? layerPre->layout : layerFull->layout;
    assert(l.getW() >= segment->sx);
    assert(l.getH() >= segment->sy);
    assert(l.getD() == segment->sz);
    
    measureLayout = Layout(segment->sx, segment->sy, segment->sz);
    valuesMeasure.resize(measuresPerBlock * measureLayout.getActiveCount());
    if (layerPre) {
      assert(l);
      preparedImageSize = layerPre->layout.getActiveCount();
      trainLayout = Layout(l.getW(), l.getH(), l.getD());
      data.resize(imageSize);
      values.resize(imagesInMemory * preparedImageSize);
    } else {
      trainLayout = measureLayout;
      data.resize(imagesInMemory * imageSize);
      values.resize(segment->sx * segment->sy * segment->sz);
    }

    segment->f_values = values.data();
    segment->layout = trainLayout;
    tmpdata.resize(bl.layout.getActiveCount());
    if (tmpdata.size() < imageSize) tmpdata.resize(imageSize);
    
    size_t memsize = data.size()*sizeof(data.front())
                   + values.size()*sizeof(values.front())
                   + valuesMeasure.size()*sizeof(valuesMeasure.front())
                   + tmpdata.size()*sizeof(tmpdata.front());
    printf("allocated size: %lld\n", (long long)(memsize));

    shuffle.resize(imagesInFile);
    for(int i = 0; i < imagesInFile; ++i)
      shuffle[i] = i;

    if (!loadImages()) return false;
    prepareMeasure();
    return true;
  }
  
  
  void finish() override
    { if (f) fclose(f), f = nullptr; }

    
  bool prepareBlock(int block, bool measureOnly) override {
    if (block > 0 && blocksPerLoading > 0 && (block % blocksPerLoading) == 0 && !loadImages())
      return false;
    seed = rand();
    return true;
  }
  
  
  void finishBlock(int block) override {
    if (outfile) {
      struct IL: public Iter {
        typedef const unsigned char* DataType;
        static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; }
      };
      struct IS: public Iter {
        typedef unsigned char* DataType;
        static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v < 0 ? 0 : n.v > 1 ? 255 : (unsigned char)(n.v*255.999); }
      };
      
      Layer &fl = *layerFull;
      Layer &bl = fl.back();

      std::string outfile0(outfile);
      std::string outfile1 = outfile0 + ".1.tga";
      outfile0 += ".0.tga";

      int index = rand()%imagesInFile;
      fseeko64(f, index*imageSize, SEEK_SET);
      fread(tmpdata.data(), imageSize, 1, f);
      tgaSave(outfile0.c_str(), tmpdata.data(), fl.layout.getW(), fl.layout.getH(), fl.layout.getD());

      iterateNeurons2<IL>(fl.layout, fl.layout, fl.neurons, tmpdata.data());
      fl.passFull(&bl, threadsCount);
      
      iterateNeurons2<IS>(bl.layout, bl.layout, bl.neurons, tmpdata.data());
      tgaSave(outfile1.c_str(), tmpdata.data(), bl.layout.getW(), bl.layout.getH(), bl.layout.getD());
      
      segment->saveDemo();
    }
  }


  void loadData(Barrier &barrier, int block, int iter, bool measureOnly) override {
    int tid = barrier.tid;
    int threads = barrier.threads;
    int sx = segment->sx;
    int sy = segment->sy;
    int sz = segment->sz;
    int sxz = sx*sz;

    if (measureOnly) {
      if (!tid) {
        segment->layout = measureLayout;
        segment->f_values = valuesMeasure.data() + iter*sy*sxz;
        x = y = z = 0;
      }
    } else
    if (layerPre) {
      if (!tid) {
        unsigned int s = randomNext(seed & iter);
        int index = (s = randomNext(s))%imagesInMemory;
        x = (s = randomNext(s)) % (layerPre->layout.getW() - sx + 1);
        y = (s = randomNext(s)) % (layerPre->layout.getH() - sy + 1);
        z = 0;
        segment->layout = trainLayout;
        segment->f_values = values.data() + index*preparedImageSize;
      }
    } else {
      int w = layerFull->layout.getW();
      int h = layerFull->layout.getH();
      
      unsigned int s = randomNext(seed & iter);
      int index = (s = randomNext(s))%imagesInMemory;
      int x0    = (s = randomNext(s))%(w - sx + 1);
      int y0    = (s = randomNext(s))%(h - sy + 1);
      
      int rowstride = w*sz;
      int dr = rowstride*threads - sxz;
      int vdr = sxz*(threads - 1);

      const unsigned char *id0 = data.data() + index*imageSize + y0*rowstride + x0*sz;
      const unsigned char *id = id0 + tid*rowstride;
      NeuronReal *iv = values.data() + tid*sxz;
      
      for(const unsigned char *e = id0 + sy*rowstride; id < e; id += dr, iv += vdr)
      for(const unsigned char *e = id + sxz;        id < e; ++id, ++iv)
        *iv = *id/(NeuronReal)255;
      
      if (!tid) {
        segment->layout = trainLayout;
        segment->f_values = values.data();
        x = 0, y = 0, z = 0;
      }
    }
  }
};


#endif