#ifndef TRAIN_CX4_INC_CPP
#define TRAIN_CX4_INC_CPP
#include "train.segment.inc.cpp"
#include "segment.cx4.inc.cpp"
#include "layer.inc.cpp"
class TrainerCx4: public TrainerSegment {
protected:
FILE *f;
std::vector<unsigned char> data;
std::vector<NeuronReal> values;
std::vector<NeuronReal> valuesMeasure;
std::vector<unsigned char> tmpdata;
std::vector<int> shuffle;
Layout trainLayout;
Layout measureLayout;
size_t imageSize;
size_t preparedImageSize;
int imagesInFile;
int imagesInMemory;
volatile unsigned int seed;
public:
Layer *layerFull;
Layer *layerPre;
int loadImagesCount;
int blocksPerLoading;
const char *infile;
const char *cachefile;
const char *outfile;
TrainerCx4():
f(),
imageSize(),
preparedImageSize(),
imagesInFile(),
imagesInMemory(),
seed(),
layerFull(),
layerPre(),
loadImagesCount(),
blocksPerLoading(1),
infile(),
cachefile(),
outfile() { }
protected:
void preprocess(unsigned char *src, NeuronReal *dst) {
struct IL: public Iter {
typedef const unsigned char* DataType;
static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; }
};
struct IS: public Iter {
typedef NeuronReal* DataType;
static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v; }
};
Layer &fl = *layerFull;
Layer &bl = *layerPre;
iterateNeurons2<IL>(fl.layout, fl.layout, fl.neurons, src);
fl.passFull(&bl, threadsCount);
iterateNeurons2<IS>(bl.layout, bl.layout, bl.neurons, dst);
}
bool loadImage(int fromIndex, int toIndex) {
unsigned char *src = data.data();
if (!layerPre) src += toIndex*imageSize;
fseeko64(f, fromIndex*imageSize, SEEK_SET);
if (!fread(src, imageSize, 1, f))
return fclose(f), f = nullptr, false;
if (layerPre) preprocess(src, values.data() + toIndex*preparedImageSize);
return true;
}
bool loadImages() {
for(int i = 0; i < imagesInMemory; ++i) {
int j = rand()%imagesInFile;
if (i != j) std::swap(shuffle[i], shuffle[j]);
}
typedef std::pair<int, int> Pair;
typedef std::set<Pair> Set;
Set set;
for(int i = 0; i < imagesInMemory; ++i)
set.insert(Pair(shuffle[i], i));
for(Set::iterator i = set.begin(); i != set.end(); ++i)
loadImage(i->first, i->second);
return true;
}
void prepareMeasure() {
if (measuresPerBlock <= 0) return;
int sy = segment->sy;
int sx = segment->sx;
int sz = segment->sz;
int sxz = sx*sz;
int w = (layerPre ? layerPre : layerFull)->layout.getW();
int h = (layerPre ? layerPre : layerFull)->layout.getH();
int rowstride = w*sz;
NeuronReal *dst = valuesMeasure.data();
for(int i = 0; i < measuresPerBlock; ++i) {
int index = rand()%imagesInMemory;
int x = rand()%(w - sx + 1);
int y = rand()%(h - sy + 1);
if (layerPre) {
const NeuronReal *src = values.data() + index*preparedImageSize + y*rowstride + x*sz;
for(int j = 0; j < sy; ++j, src += rowstride, dst += sxz)
memcpy(dst, src, sxz*sizeof(*dst));
} else {
const unsigned char *src = data.data() + index*preparedImageSize + y*rowstride + x*sz;
for(int j = 0; j < sy; ++j, src += rowstride - sxz)
for(int k = 0; k < sxz; ++k, ++src, ++dst)
*dst = *src/(NeuronReal)255;
}
}
}
bool prepare() override {
assert(infile);
assert(layerFull);
assert(loadImagesCount > 0);
Layer &fl = layerFull->front();
Layer &bl = layerFull->back();
imageSize = fl.layout.getActiveCount();
f = fopen(infile, "rb");
if (!f) return false;
fseeko64(f, 0, SEEK_END);
imagesInFile = ftello64(f)/imageSize;
if (imagesInFile < 1) return fclose(f), f = nullptr, false;
imagesInMemory = loadImagesCount > imagesInFile ? imagesInFile : loadImagesCount;
for(Layer *l = layerFull; l; l = l->next)
l->split(threadsCount);
Layout l = layerPre ? layerPre->layout : layerFull->layout;
assert(l.getW() >= segment->sx);
assert(l.getH() >= segment->sy);
assert(l.getD() == segment->sz);
measureLayout = Layout(segment->sx, segment->sy, segment->sz);
valuesMeasure.resize(measuresPerBlock * measureLayout.getActiveCount());
if (layerPre) {
assert(l);
preparedImageSize = layerPre->layout.getActiveCount();
trainLayout = Layout(l.getW(), l.getH(), l.getD());
data.resize(imageSize);
values.resize(imagesInMemory * preparedImageSize);
} else {
trainLayout = measureLayout;
data.resize(imagesInMemory * imageSize);
values.resize(segment->sx * segment->sy * segment->sz);
}
segment->f_values = values.data();
segment->layout = trainLayout;
tmpdata.resize(bl.layout.getActiveCount());
if (tmpdata.size() < imageSize) tmpdata.resize(imageSize);
size_t memsize = data.size()*sizeof(data.front())
+ values.size()*sizeof(values.front())
+ valuesMeasure.size()*sizeof(valuesMeasure.front())
+ tmpdata.size()*sizeof(tmpdata.front());
printf("allocated size: %lld\n", (long long)(memsize));
shuffle.resize(imagesInFile);
for(int i = 0; i < imagesInFile; ++i)
shuffle[i] = i;
if (!loadImages()) return false;
prepareMeasure();
return true;
}
void finish() override
{ if (f) fclose(f), f = nullptr; }
bool prepareBlock(int block, bool measureOnly) override {
if (block > 0 && blocksPerLoading > 0 && (block % blocksPerLoading) == 0 && !loadImages())
return false;
seed = rand();
return true;
}
void finishBlock(int block) override {
if (outfile) {
struct IL: public Iter {
typedef const unsigned char* DataType;
static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; }
};
struct IS: public Iter {
typedef unsigned char* DataType;
static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v < 0 ? 0 : n.v > 1 ? 255 : (unsigned char)(n.v*255.999); }
};
Layer &fl = *layerFull;
Layer &bl = fl.back();
std::string outfile0(outfile);
std::string outfile1 = outfile0 + ".1.tga";
outfile0 += ".0.tga";
int index = rand()%imagesInFile;
fseeko64(f, index*imageSize, SEEK_SET);
fread(tmpdata.data(), imageSize, 1, f);
tgaSave(outfile0.c_str(), tmpdata.data(), fl.layout.getW(), fl.layout.getH(), fl.layout.getD());
iterateNeurons2<IL>(fl.layout, fl.layout, fl.neurons, tmpdata.data());
fl.passFull(&bl, threadsCount);
iterateNeurons2<IS>(bl.layout, bl.layout, bl.neurons, tmpdata.data());
tgaSave(outfile1.c_str(), tmpdata.data(), bl.layout.getW(), bl.layout.getH(), bl.layout.getD());
segment->saveDemo();
}
}
void loadData(Barrier &barrier, int block, int iter, bool measureOnly) override {
int tid = barrier.tid;
int threads = barrier.threads;
int sx = segment->sx;
int sy = segment->sy;
int sz = segment->sz;
int sxz = sx*sz;
if (measureOnly) {
if (!tid) {
segment->layout = measureLayout;
segment->f_values = valuesMeasure.data() + iter*sy*sxz;
x = y = z = 0;
}
} else
if (layerPre) {
if (!tid) {
unsigned int s = randomNext(seed & iter);
int index = (s = randomNext(s))%imagesInMemory;
x = (s = randomNext(s)) % (layerPre->layout.getW() - sx + 1);
y = (s = randomNext(s)) % (layerPre->layout.getH() - sy + 1);
z = 0;
segment->layout = trainLayout;
segment->f_values = values.data() + index*preparedImageSize;
}
} else {
int w = layerFull->layout.getW();
int h = layerFull->layout.getH();
unsigned int s = randomNext(seed & iter);
int index = (s = randomNext(s))%imagesInMemory;
int x0 = (s = randomNext(s))%(w - sx + 1);
int y0 = (s = randomNext(s))%(h - sy + 1);
int rowstride = w*sz;
int dr = rowstride*threads - sxz;
int vdr = sxz*(threads - 1);
const unsigned char *id0 = data.data() + index*imageSize + y0*rowstride + x0*sz;
const unsigned char *id = id0 + tid*rowstride;
NeuronReal *iv = values.data() + tid*sxz;
for(const unsigned char *e = id0 + sy*rowstride; id < e; id += dr, iv += vdr)
for(const unsigned char *e = id + sxz; id < e; ++id, ++iv)
*iv = *id/(NeuronReal)255;
if (!tid) {
segment->layout = trainLayout;
segment->f_values = values.data();
x = 0, y = 0, z = 0;
}
}
}
};
#endif