diff --git a/simple/neural/build-nn-trainer-img-pp.sh b/simple/neural/build-nn-trainer-img-pp.sh new file mode 100755 index 0000000..b6c6c7d --- /dev/null +++ b/simple/neural/build-nn-trainer-img-pp.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +c++ -Wall -DNDEBUG -O3 nn-trainer-img.cpp -lm -o nn-trainer-img-pp + + + + diff --git a/simple/neural/build-nn-trainer2-pp.sh b/simple/neural/build-nn-trainer2-pp.sh new file mode 100755 index 0000000..cc78b35 --- /dev/null +++ b/simple/neural/build-nn-trainer2-pp.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +c++ -Wall -DNDEBUG -O3 nn-trainer2.cpp -lm -o nn-trainer2-pp + + + + diff --git a/simple/neural/build-nn-trainer3-img-pp.sh b/simple/neural/build-nn-trainer3-img-pp.sh new file mode 100755 index 0000000..5605767 --- /dev/null +++ b/simple/neural/build-nn-trainer3-img-pp.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +c++ -Wall -DNDEBUG -O3 -pthread nn-trainer3-img.cpp -lm -o nn-trainer3-img-pp + + + + diff --git a/simple/neural/build-nn-trainer3-pp.sh b/simple/neural/build-nn-trainer3-pp.sh new file mode 100755 index 0000000..c777057 --- /dev/null +++ b/simple/neural/build-nn-trainer3-pp.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +c++ -Wall -DNDEBUG -O3 -pthread nn-trainer3.cpp -lm -o nn-trainer3-pp + + + + diff --git a/simple/neural/nn-trainer-img.cpp b/simple/neural/nn-trainer-img.cpp new file mode 100644 index 0000000..28312c5 --- /dev/null +++ b/simple/neural/nn-trainer-img.cpp @@ -0,0 +1,157 @@ + +#include +#include +#include + +#include + +#include "nnlayer.lnk.inc.cpp" +#include "tga.inc.cpp" + + +void imgTrain(Layer &l, const char *datafile, int size, const char *outfile, double trainRatio, int count) { + Layer &bl = l.back(); + + assert(!l.prev); + assert(datafile); + assert(count > 0 && size > 0); + assert(l.size == size); + assert(bl.size == size); + + int blockSize = 1000;//1024*1024*1024/size; + assert(blockSize > 0); + + FILE *f = fopen(datafile, "rb"); + if (!f) + { printf("cannot open file: %s\n", datafile); return; } + fseeko64(f, 0, SEEK_END); + long long fsize = ftello64(f); + int xCount = (int)(fsize/size); + if (xCount <= 0) + { printf("no tests in file: %s\n", datafile); return; } + + int *block = new int[blockSize*2]; + int *shuffle = block + blockSize; + double *results = new double[blockSize]; + unsigned char *blockData = new unsigned char[(blockSize + 1)*size]; + unsigned char *blockResData = blockData + blockSize*size; + bool err = false; + + for(int j = 0; j < blockSize; ++j) + { shuffle[j] = j; results[j] = 0; } + + int blocksCount = (count - 1)/blockSize + 1; + + printf("training %d (%d x %d blocks), tests: %d, ratio: %f:\n", blocksCount*blockSize, blocksCount, blockSize, xCount, trainRatio); + + double avgSum = 0; + for(int i = 0; i < blocksCount; ++i) { + for(int j = 0; j < blockSize; ++j) { + block[j] = rand()%xCount; + std::swap(shuffle[i], shuffle[rand()%blockSize]); + } + std::sort(block, block + blockSize); + + for(int j = 0; j < blockSize; ++j) { + fseeko64(f, block[j]*(long long)size, SEEK_SET); + if (!fread(blockData + j*size, size, 1, f)) + { printf("cannot read data from file: %s\n", datafile); err = true; break; } + } + if (err) break; + + printf(" next data block loaded\n"); + + double sumQ = 0; + for(int j = 0; j < blockSize; ++j) { + unsigned char *data = blockData + shuffle[j]*size; + for(double *ia = l.a, *e = ia + l.size; ia < e; ++ia, ++data) + *ia = *data/255.0; + + double firstQ = 0, q = 0; + for(int repeat = 0; repeat < 1; ++repeat) { + l.pass(); + + for(double *ia = l.a, *iba = bl.a, *ibda = bl.da, *e = ia + l.size; ia < e; ++ia, ++iba, ++ibda) { + double d = *ia - *iba; + *ibda = d; + q += d*d; + } + q /= size; + if (!repeat) firstQ = q; + + bl.backpass(trainRatio); + } + + sumQ += firstQ; + avgSum += firstQ - results[j]; + results[j] = firstQ; + int avgCnt = i ? blockSize : j + 1; + printf(" %4d: total: %6d, avg result: %f, last result: %f -> %f\n", j+1, i*blockSize+j+1, avgSum/avgCnt, firstQ, q); + } + + printf("%4d: total: %6d, avg result: %f\n", i+1, (i+1)*blockSize, sumQ/blockSize); + + if (outfile && !l.save(outfile)) + { printf("cannot save neural network weights to file: %s\n", outfile); err = true; break; } + + unsigned char *data = blockResData; + for(double *iba = bl.a, *e = iba + bl.size; iba < e; ++iba, ++data) + *data = (unsigned char)(*iba*255.999); + tgaSave("data/output/sampleX.tga", blockData + shuffle[blockSize-1]*size, 256, 256, 3); + tgaSave("data/output/sampleY.tga", blockResData, 256, 256, 3); + } + + delete[] block; + delete[] results; + delete[] blockData; + + printf("finished\n"); +} + + +int main() { + srand(time(NULL)); + + //const char *datafile = "data/img512-data.bin"; + //const char *outfile = "data/output/img512-weights.bin"; + const char *datafile = "data/img256-data.bin"; + const char *outfile = "data/output/img256-weights.bin"; + + printf("create neural network\n"); + //Layer l(nullptr, 512*512*3); + //new LayerLinkConvolution(l, 512, 512, 3, 256, 256, 3, 32); + //new LayerLinkConvolution(l, 256, 256, 3, 128, 128, 3, 64); + //new LayerLinkConvolution(l, 128, 128, 3, 64, 64, 3, 128); + //new LayerLinkConvolution(l, 64, 64, 3, 32, 32, 3, 256); + //new LayerLinkConvolution(l, 32, 32, 3, 16, 16, 4, 256); + //new LayerLinkConvolution(l, 16, 16, 4, 16, 16, 4, 256); + //new LayerLinkConvolution(l, 16, 16, 4, 16, 16, 4, 256); + //new LayerLinkConvolution(l, 16, 16, 4, 32, 32, 3, 256); + //new LayerLinkConvolution(l, 32, 32, 3, 64, 64, 3, 128); + //new LayerLinkConvolution(l, 64, 64, 3, 128, 128, 3, 64); + //new LayerLinkConvolution(l, 128, 128, 3, 256, 256, 3, 32); + //new LayerLinkConvolution(l, 256, 256, 3, 512, 512, 3, 16); + + Layer l(nullptr, 256*256*3); + new LayerLinkConvolution(l, 256, 256, 3, 128, 128, 3, 32); + new LayerLinkConvolution(l, 128, 128, 3, 64, 64, 3, 64); + new LayerLinkConvolution(l, 64, 64, 3, 32, 32, 3, 128); + new LayerLinkConvolution(l, 32, 32, 3, 16, 16, 4, 256); + new LayerLinkConvolution(l, 16, 16, 4, 16, 16, 4, 256); + new LayerLinkConvolution(l, 16, 16, 4, 16, 16, 4, 256); + new LayerLinkConvolution(l, 16, 16, 4, 32, 32, 3, 128); + new LayerLinkConvolution(l, 32, 32, 3, 64, 64, 3, 64); + new LayerLinkConvolution(l, 64, 64, 3, 128, 128, 3, 32); + new LayerLinkConvolution(l, 128, 128, 3, 256, 256, 3, 16); + + printf(" neurons: %d, links %d, memSize: %llu\n", l.totalSize(), l.totalLinks(), (unsigned long long)l.totalMemSize()); + + printf("try load previously saved network\n"); + l.load(outfile); + + printf("train\n"); + imgTrain(l, datafile, l.size, outfile, 0.1, 1000000); + + return 0; +} + diff --git a/simple/neural/nn-trainer.cpp b/simple/neural/nn-trainer.cpp index 2465ed2..c6af313 100644 --- a/simple/neural/nn-trainer.cpp +++ b/simple/neural/nn-trainer.cpp @@ -39,12 +39,12 @@ int main() { //new LayerSimple(l, 10); Layer l(nullptr, 784); - new LayerLinkConvolution(l, 28, 28, 1, 22, 22, 1, 60, 1, 3); - new LayerLinkConvolution(l, 22, 22, 1, 14, 14, 1, 100, 1, 4); - new LayerLinkConvolution(l, 14, 14, 1, 4, 4, 1, 140, 1, 5); + new LayerLinkConvolution(l, 28, 28, 1, 22, 22, 1, 60); + new LayerLinkConvolution(l, 22, 22, 1, 14, 14, 1, 100); + new LayerLinkConvolution(l, 14, 14, 1, 4, 4, 1, 140); new LayerSimple(l, 10); - printf(" neurons: %d, links %d\n", l.totalSize(), l.totalLinks()); + printf(" neurons: %d, links %d, memSize: %llu\n", l.totalSize(), l.totalLinks(), (unsigned long long)l.totalMemSize()); //printf("try load previously saved network\n"); //l.load(filename); diff --git a/simple/neural/nn-trainer2.cpp b/simple/neural/nn-trainer2.cpp new file mode 100644 index 0000000..152b452 --- /dev/null +++ b/simple/neural/nn-trainer2.cpp @@ -0,0 +1,96 @@ + +#include +#include + +#include "nnlayer2.inc.cpp" +#include "nnlayer2.conv.inc.cpp" +#include "nnlayer2.mt.inc.cpp" + + +bool train(const char *infile, const char *outfile, Layer &l, int blockSize, int totalCount, Real trainRatio) { + assert(blockSize > 0); + int blockCount = totalCount/blockSize; + assert(blockCount > 0); + assert(!l.prev); + assert(l.size && l.back().size); + + printf("load training data\n"); + + FILE *f = fopen(infile, "rb"); + if (!f) + return printf("cannot open file '%s' for read\n", infile), false; + fseek(f, 0, SEEK_END); + int fs = ftell(f); + fseek(f, 0, SEEK_SET); + + int sizeX = l.size; + int sizeY = l.back().size; + int count = fs/(sizeX+1); + if (count < blockSize) + return printf("file '%s' is lesser minimal size\n", infile), fclose(f), false; + + unsigned char *data = new unsigned char[(sizeX + sizeY)*count]; + memset(data, 0, (sizeX + sizeY)*count); + for(int i = 0; i < count; ++i) { + unsigned char *d = data + (sizeX + sizeY)*i; + if (!fread(d, sizeX+1, 1, f) || d[sizeX] >= sizeY) + return printf("cannot read from file '%s'\n", infile), delete[] data, fclose(f), false; + d += sizeX; + unsigned char c = *d; + *d = 0; + d[c] = 255; + } + fclose(f); + + printf("train %d x %d = %d, ratio: %f\n", blockCount, blockSize, blockCount*blockSize, trainRatio); + + int *shuffle = new int[blockSize]; + TrainMT tmt; + tmt.layer = &l; + tmt.dataX = data; + tmt.dataY = data + sizeX; + tmt.strideX = tmt.strideY = sizeX + sizeY; + tmt.shuffle = shuffle; + tmt.count = blockSize; + tmt.trainRatio = trainRatio; + for(int i = 0; i < blockCount; ++i) { + for(int j = 0; j < blockSize; ++j) + shuffle[j] = rand()%count; + Real res = tmt.train(8); + printf("%4d, total %7d, avg.result %f\n", i+1, (i+1)*blockSize, res); + if ( ((i+1)%10) == 0 || i+1 == blockCount ) { + if (!l.saveAll(outfile)) return delete[] data, delete[] shuffle, false; + printf(" saved\n"); + } + } + + delete[] shuffle; + delete[] data; + return true; +} + + +int main() { + srand(time(NULL)); + + const char *infile = "data/symbols-data.bin"; // 28x28 + const char *outfile = "data/output/weights.bin"; + + printf("create neural network\n"); + Layer l(nullptr, 784); + createConv(l, 28, 28, 1, 22, 22, 1, 60); + createConv(l, 22, 22, 1, 14, 14, 1, 100); + createConv(l, 14, 14, 1, 4, 4, 1, 140); + createConv(l, 4, 4, 1, 1, 1, 10, 16); + + printf(" neurons: %d, links %d, memSize: %llu\n", l.totalNeurons(), l.totalLinks(), (unsigned long long)l.totalMemSize()); + + //printf("try load previously saved network\n"); + //l.loadAll(outfile); + + printf("train\n"); + train(infile, outfile, l, 10000, 2000000, 0.1); + + return 0; +} + diff --git a/simple/neural/nn-trainer3-img.cpp b/simple/neural/nn-trainer3-img.cpp new file mode 100644 index 0000000..3290622 --- /dev/null +++ b/simple/neural/nn-trainer3-img.cpp @@ -0,0 +1,146 @@ + +#include +#include +#include + +#include +#include + +#include "nnlayer3.mt.inc.cpp" +#include "tga.inc.cpp" + + +long long timeUs() { + static std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + return (long long)std::chrono::duration_cast( std::chrono::steady_clock::now() - begin ).count(); +} + + +void imgTrain(Layer &l, const char *datafile, int size, const char *outfile, int blockSize, int blocksCount, Real trainRatio, int threads) { + Layer &fl = l.front(); + Layer &bl = l.back(); + + assert(!l.prev); + assert(datafile); + assert(size > 0); + assert(fl.countNeurons() == size); + assert(bl.countNeurons() == size); + + assert(blockSize > 0); + assert(blocksCount > 0); + assert(trainRatio > 0); + assert(threads > 0); + + FILE *f = fopen(datafile, "rb"); + if (!f) + { printf("cannot open file: %s\n", datafile); return; } + fseeko64(f, 0, SEEK_END); + long long fsize = ftello64(f); + int xCount = (int)(fsize/size); + if (xCount <= 0) + { printf("no tests in file: %s\n", datafile); return; } + + printf("allocate %lld bytes for tests\n", ((long long)blockSize + 1)*size); + + int *block = new int[blockSize*2]; + int *shuffle = block + blockSize; + unsigned char *blockData = new unsigned char[(blockSize + 1)*size]; + unsigned char *blockResData = blockData + blockSize*size; + bool err = false; + + for(int j = 0; j < blockSize; ++j) + shuffle[j] = j; + + TrainMT tmt; + tmt.layer = &fl; + tmt.dataX = blockData; + tmt.dataY = blockData; + tmt.strideX = tmt.strideY = size; + tmt.shuffle = shuffle; + tmt.count = blockSize; + tmt.trainRatio = trainRatio; + + printf("training %d (%d x %d blocks), tests: %d, ratio: %f:\n", blocksCount*blockSize, blocksCount, blockSize, xCount, trainRatio); + + long long t0 = timeUs(); + for(int i = 0; i < blocksCount; ++i) { + for(int j = 0; j < blockSize; ++j) { + block[j] = rand()%xCount; + std::swap(shuffle[j], shuffle[rand()%blockSize]); + } + std::sort(block, block + blockSize); + + for(int j = 0; j < blockSize; ++j) { + fseeko64(f, block[j]*(long long)size, SEEK_SET); + if (!fread(blockData + j*size, size, 1, f)) + { printf("cannot read data from file: %s\n", datafile); err = true; break; } + } + if (err) break; + + //printf(" next data block loaded\n"); + + long long t = timeUs(); + double res = tmt.train(threads); + long long dt = timeUs() - t; + + if (outfile && !fl.saveAll(outfile)) + { printf("cannot save neural network weights to file: %s\n", outfile); err = true; break; } + + unsigned char *data = blockResData; + for(Neuron *ibn = bl.neurons, *e = ibn + size; ibn < e; ++ibn, ++data) { + Real v = (ibn->v - 0.25)*2; + *data = v < 0 ? 0u : v > 1 ? 255u : (unsigned char)(v * 255.999); + } + tgaSave("data/output/sampleX.tga", blockData + shuffle[blockSize-1]*size, 256, 256, 3); + tgaSave("data/output/sampleY.tga", blockResData, 256, 256, 3); + + long long t1 = timeUs(); + long long dt0 = t1 - t0; + t0 = t1; + + printf("%4d: total: %6d, avg result: %f, time: %f + %f = %f\n", i+1, (i+1)*blockSize, res, (dt0-dt)*0.000001, dt*0.000001, dt0*0.000001); + + } + + delete[] block; + delete[] blockData; + + printf("finished\n"); +} + + +int main() { + srand(time(NULL)); + + const char *datafile = "data/img256-data.bin"; + const char *outfile = "data/output/img256-weights.bin"; + + printf("create neural network\n"); + + Layer l(nullptr, 256, 256, 3); + //new Layer(&l, 128, 128, 3, 6); + //new Layer(&l, 64, 64, 3, 8); + //new Layer(&l, 32, 32, 3, 11); + //new Layer(&l, 16, 16, 4, 16); + //new Layer(&l, 16, 16, 4, 16); + //new Layer(&l, 16, 16, 4, 16); + //new Layer(&l, 32, 32, 3, 11); + //new Layer(&l, 64, 64, 3, 8); + //new Layer(&l, 128, 128, 3, 6); + new Layer(&l, 256, 256, 3, 4); + new Layer(&l, 256, 256, 3, 4); + new Layer(&l, 256, 256, 3, 4); + + printf(" neurons: %d, links %d, memSize: %llu\n", l.totalNeurons(), l.totalLinks(), (unsigned long long)l.totalMemSize()); + + if (outfile) { + printf("try load previously saved network\n"); + l.loadAll(outfile); + } + + printf("train\n"); + imgTrain(l, datafile, l.countNeurons(), outfile, 1000, 10000, 0.1, 4); + + return 0; +} + diff --git a/simple/neural/nn-trainer3.cpp b/simple/neural/nn-trainer3.cpp new file mode 100644 index 0000000..327bfec --- /dev/null +++ b/simple/neural/nn-trainer3.cpp @@ -0,0 +1,112 @@ + +#include +#include + +#include + +#include "nnlayer3.inc.cpp" +#include "nnlayer3.mt.inc.cpp" + + +long long timeUs() { + static std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + return (long long)std::chrono::duration_cast( std::chrono::steady_clock::now() - begin ).count(); +} + + +bool train(const char *infile, const char *outfile, Layer &l, int blockSize, int totalCount, Real trainRatio) { + assert(blockSize > 0); + int blockCount = totalCount/blockSize; + assert(blockCount > 0); + assert(!l.prev); + assert(l.countNeurons() && l.back().countNeurons()); + + printf("load training data\n"); + + FILE *f = fopen(infile, "rb"); + if (!f) + return printf("cannot open file '%s' for read\n", infile), false; + fseek(f, 0, SEEK_END); + int fs = ftell(f); + fseek(f, 0, SEEK_SET); + + int sizeX = l.countNeurons(); + int sizeY = l.back().countNeurons(); + int count = fs/(sizeX+1); + if (count < blockSize) + return printf("file '%s' is lesser minimal size\n", infile), fclose(f), false; + + unsigned char *data = new unsigned char[(sizeX + sizeY)*count]; + memset(data, 0, (sizeX + sizeY)*count); + for(int i = 0; i < count; ++i) { + unsigned char *d = data + (sizeX + sizeY)*i; + if (!fread(d, sizeX+1, 1, f) || d[sizeX] >= sizeY) + return printf("cannot read from file '%s'\n", infile), delete[] data, fclose(f), false; + d += sizeX; + unsigned char c = *d; + *d = 0; + d[c] = 255; + } + fclose(f); + + printf("train %d x %d = %d, ratio: %f\n", blockCount, blockSize, blockCount*blockSize, trainRatio); + + int *shuffle = new int[blockSize]; + TrainMT tmt; + tmt.layer = &l; + tmt.dataX = data; + tmt.dataY = data + sizeX; + tmt.strideX = tmt.strideY = sizeX + sizeY; + tmt.shuffle = shuffle; + tmt.count = blockSize; + tmt.trainRatio = trainRatio; + + long long timeStartUs = timeUs(); + for(int i = 0; i < blockCount; ++i) { + long long timeBlockStartUs = timeUs(); + + for(int j = 0; j < blockSize; ++j) + shuffle[j] = rand()%count; + double res = tmt.train(4); + + long long dt = timeUs() - timeBlockStartUs; + + printf("%4d, total %7d, avg.result %f, time: %f\n", i+1, (i+1)*blockSize, res, dt*0.000001); + if ( ((i+1)%100) == 0 || i+1 == blockCount ) { + if (!l.saveAll(outfile)) return delete[] data, delete[] shuffle, false; + printf(" saved\n"); + } + } + + long long dt = timeUs() - timeStartUs; + printf("finished int time: %f\n", dt*0.000001); + + delete[] shuffle; + delete[] data; + return true; +} + + +int main() { + srand(time(NULL)); + + const char *infile = "data/symbols-data.bin"; // 28x28 + const char *outfile = "data/output/weights.bin"; + + printf("create neural network\n"); + Layer l(nullptr, 28, 28, 1); + new Layer(&l, 14, 14, 1, 28); + new Layer(&l, 7, 7, 1, 14); + new Layer(&l, 1, 1, 10, 7); + + printf(" neurons: %d, links %d, memSize: %llu\n", l.totalNeurons(), l.totalLinks(), (unsigned long long)l.totalMemSize()); + + //printf("try load previously saved network\n"); + //l.loadAll(outfile); + + printf("train\n"); + train(infile, outfile, l, 10000, 1000000, 0.01); + + return 0; +} + diff --git a/simple/neural/nnlayer.conv.inc.cpp b/simple/neural/nnlayer.conv.inc.cpp index ff792a5..e05c8e6 100644 --- a/simple/neural/nnlayer.conv.inc.cpp +++ b/simple/neural/nnlayer.conv.inc.cpp @@ -26,6 +26,7 @@ public: double k = RaLU ? 1.0/(WW*psz*sz) : 1; for(double *iw = w, *e = iw + wsize; iw < e; ++iw) *iw = (rand()/(double)RAND_MAX*2 - 1)*k; + memsize += wsize*sizeof(double); } Layer& pass() override { @@ -133,6 +134,7 @@ public: dw = w + wsize; for(double *iw = w, *e = iw + wsize; iw < e; ++iw) *iw = (rand()/(double)RAND_MAX*2 - 1)*1; + memsize += (wsize + WW*psz)*sizeof(double); } Layer& pass() override { diff --git a/simple/neural/nnlayer.inc.cpp b/simple/neural/nnlayer.inc.cpp index a3cef7e..3edf30b 100644 --- a/simple/neural/nnlayer.inc.cpp +++ b/simple/neural/nnlayer.inc.cpp @@ -13,17 +13,19 @@ class Layer { public: Layer *prev, *next; + size_t memsize; int size, wsize, links; double *a, *da, *w; Layer(Layer *prev, int size): - prev(), next(), size(size), wsize(), links(), w() + prev(), next(), memsize(), size(size), wsize(), links(), w() { assert(size > 0); a = new double[size*2]; da = a + size; memset(a, 0, sizeof(*a)*size*2); if (prev) (this->prev = &prev->back())->next = this; + memsize += size*2*sizeof(double); } virtual ~Layer() { @@ -43,6 +45,8 @@ public: inline Layer& back() { Layer *l = this; while(l->next) l = l->next; return *l; } + inline size_t totalMemSize() const + { size_t s = 0; for(const Layer *l = this; l; l = l->next) s += l->memsize; return s; } inline int totalSize() const { int c = 0; for(const Layer *l = this; l; l = l->next) c += l->size; return c; } inline int totalLinks() const @@ -111,6 +115,7 @@ public: double k = 1.0/this->prev->size; for(double *iw = w, *e = iw + wsize; iw < e; ++iw) *iw = (rand()/(double)RAND_MAX*2 - 1)*k; + memsize += wsize*sizeof(double); } Layer& pass() override { diff --git a/simple/neural/nnlayer.lnk.inc.cpp b/simple/neural/nnlayer.lnk.inc.cpp index 3efc334..d96a9c6 100644 --- a/simple/neural/nnlayer.lnk.inc.cpp +++ b/simple/neural/nnlayer.lnk.inc.cpp @@ -23,11 +23,20 @@ public: memset(wa, 0, sizeof(*wa)*wsize*2); for(double *iw = w, *e = iw + wsize; iw < e; ++iw) *iw = rand()/(double)RAND_MAX*2 - 1; + memsize += wsize*sizeof(double) + wsize*2*sizeof(double*); } ~LayerLink() { delete[] wa; } + bool selfCheck() const { + for(int i = 0; i < wsize; ++i) + if ( !wa[i] || wa[i] < prev->a || wa[i] >= prev->a + prev->size + || !wda[i] || wda[i] < prev->da || wda[i] >= prev->da + prev->size ) + return false; + return true; + } + Layer& pass() override { double *ia = a; double *iw = w; @@ -72,69 +81,64 @@ public: class LayerLinkConvolution: public LayerLink { public: - LayerLinkConvolution(Layer &prev, int psx, int psy, int psz, int sx, int sy, int sz, int lsize, int step = 1, int pad = 0): + LayerLinkConvolution(Layer &prev, int psx, int psy, int psz, int sx, int sy, int sz, int lsize): LayerLink(prev, sx*sy*sz, lsize*psz) { assert(psx > 0 && psy > 0 && psz > 0); assert(sx > 0 && sy > 0 && sz > 0); - assert(step > 0 && pad >= 0); assert(psx*psy*psz == this->prev->size); - assert(pad + (sx-1)*step < psx); - assert(pad + (sy-1)*step < psy); + assert(lsize > 0 && lsize <= psx*psy); - int hs = (int)ceil(sqrt(lsize)) + 1; + int hs = (int)sqrt(lsize*1.5) + 2; int s = hs*2 + 1; + struct Point { int x, y, r; - inline bool operator< (const Point &p) const { return r < p.r; } + inline bool operator<(const Point &b) const + { return r < b.r; } } *points = new Point[s*s], *p = points; - for(int y = -hs; y <= hs; ++y) { - for(int x = -hs; x <= hs; ++x, ++p) { - int sector = x >= 0 && y > 0 ? 1 - : y >= 0 && x < 0 ? 2 - : x <= 0 && y < 0 ? 3 - : y <= 0 && x > 0 ? 4 - : 0; - int subsector = sector % 2 ? abs(x) >= abs(y) : abs(y) >= abs(x); - p->x = x; - p->y = y; - p->r = (x*x + y*y)*10 + sector*2 + subsector; - } - } - std::sort(points, points + s*s); + int r = 0; + static const int rnd[] = { 9, 12, 4, 6, 0, 15, 13, 8, 2, 3, 10, 1, 5, 11, 14, 7 }; + for(int y = -hs; y <= hs; ++y) + for(int x = -hs; x <= hs; ++x, ++r, ++p) + { p->x = x, p->y = y, p->r = (x*x + y*y)*16 + rnd[r%16]; } + std::sort(points, p); int *order = new int[lsize]; - for(int z = 0; z < sz; ++z) { - for(int y = 0; y < sy; ++y) { - for(int x = 0; x < sx; ++x) { - p = points; - int *io = order; - for(int i = 0; i < lsize; ++i, ++io) { - int xx, yy; - do { - xx = pad + x*step + p->x; - yy = pad + y*step + p->y; - ++p; - } while(xx < 0 || yy < 0 || xx >= psx || yy >= psy); - *io = yy*psx + xx; - } - std::sort(order, order + lsize); - - double **iwa = &wa [ (z*sx*sy + y*sx + x)*this->lsize ]; - double **iwda = &wda[ (z*sx*sy + y*sx + x)*this->lsize ]; - for(int pz = 0; pz < psz; ++pz) { - for(int i = 0; i < lsize; ++i, ++iwa, ++iwda) { - *iwa = &this->prev->a [pz*psx*psy + order[i]]; - *iwda = &this->prev->da[pz*psx*psy + order[i]]; + + for(int y = 0; y < sy; ++y) { + for(int x = 0; x < sx; ++x) { + int cx = (int)((x + 0.5)/(sx + 1)*(psx + 1)); + int cy = (int)((y + 0.5)/(sy + 1)*(psy + 1)); + + p = points; + for(int l = 0; l < lsize; ++l) { + int px, py; + do { px = cx + p->x; py = cy + p->y; ++p; } + while(px < 0 || py < 0 || px >= psx || py >= psy); + order[l] = py*psx + px; + } + std::sort(order, order + lsize); + + for(int z = 0; z < sz; ++z) { + for(int l = 0; l < lsize; ++l, ++p) { + for(int pz = 0; pz < psz; ++pz) { + int i = (((y*sx + x)*sz + z)*lsize + l)*psz + pz; + int pi = order[l]*psz + pz; + assert(i >= 0 && i < wsize); + assert(pi >= 0 && pi < this->prev->size); + wa[i] = &this->prev->a[pi]; + wda[i] = &this->prev->da[pi]; } } } } } - delete[] order; delete[] points; + delete[] order; + assert(selfCheck()); } }; diff --git a/simple/neural/nnlayer2.conv.inc.cpp b/simple/neural/nnlayer2.conv.inc.cpp new file mode 100644 index 0000000..b5f8d20 --- /dev/null +++ b/simple/neural/nnlayer2.conv.inc.cpp @@ -0,0 +1,77 @@ +#ifndef NNLAYER2_CONV_INC_CPP +#define NNLAYER2_CONV_INC_CPP + + +#include "nnlayer2.inc.cpp" + + + +Layer* createConv(Layer &prev, int psx, int psy, int psz, int sx, int sy, int sz, int lsize) { + struct Point { + int x, y, r; + inline bool operator<(const Point &b) const { return r < b.r; } + }; + + static const int hs = 256; + static const int s = hs*2 + 1; + static const int rnd[] = { 9, 12, 4, 6, 0, 15, 13, 8, 2, 3, 10, 1, 5, 11, 14, 7 }; + static std::vector points; + + if (points.empty()) { + points.resize(s*s); + Point *p = points.data(); + for(int y = -hs, r = 0; y <= hs; ++y) + for(int x = -hs; x <= hs; ++x, ++r, ++p) + { p->x = x, p->y = y, p->r = (x*x + y*y)*16 + rnd[r%16]; } + std::sort(points.begin(), points.end()); + } + + Layer &pl = prev.back(); + assert(psx > 0 && psy > 0 && psz > 0); + assert(sx > 0 && sy > 0 && sz > 0); + assert(psx*psy*psz == pl.size); + assert(lsize > 0 && lsize <= psx*psy); + + Layer &cl = *new Layer(&pl, sx*sy*sz, lsize*psz); + assert(cl.size && cl.lsize); + + const Point *pb = points.data(); + int *order = new int[lsize]; + + for(int y = 0; y < sy; ++y) { + for(int x = 0; x < sx; ++x) { + int cx = (int)((x + 0.5)/(sx + 1)*(psx + 1)); + int cy = (int)((y + 0.5)/(sy + 1)*(psy + 1)); + + const Point *p = pb; + for(int l = 0; l < lsize; ++l) { + int px, py; + do { assert(p < pb + points.size()); px = cx + p->x; py = cy + p->y; ++p; } + while(px < 0 || py < 0 || px >= psx || py >= psy); + order[l] = py*psx + px; + } + std::sort(order, order + lsize); + + for(int z = 0; z < sz; ++z) { + for(int l = 0; l < lsize; ++l) { + for(int pz = 0; pz < psz; ++pz) { + int i = (((y*sx + x)*sz + z)*lsize + l)*psz + pz; + int pi = order[l]*psz + pz; + assert(i >= 0 && i < cl.size*cl.lsize); + assert(pi >= 0 && pi < pl.size); + cl.links[i].nprev = pi; + } + } + } + } + } + + delete[] order; + cl.prepareBackLinks(); + + return &cl; +} + + + +#endif diff --git a/simple/neural/nnlayer2.inc.cpp b/simple/neural/nnlayer2.inc.cpp new file mode 100644 index 0000000..fb4a44e --- /dev/null +++ b/simple/neural/nnlayer2.inc.cpp @@ -0,0 +1,258 @@ +#ifndef NNLAYER2_INC_CPP +#define NNLAYER2_INC_CPP + + +#include +#include +#include +#include +#include + +#include +#include + + +typedef float Real; + + +template +bool twrite(const T &x, FILE *f) + { return fwrite(&x, sizeof(x), 1, f); } +template +bool tread(const T &x, FILE *f) + { return fwrite(&x, sizeof(x), 1, f); } + + +struct Link; +struct Neuron { + Real v, d; + inline Neuron(): v(), d() { } +}; + +struct Link { + int nprev, lnext; + Real w; + inline Link(): nprev(), lnext(), w((Real)rand()/(Real)RAND_MAX*2 - 1) { } +}; + + +class Layer { +public: + Layer *prev, *next; + + int size, lsize; + Neuron *neurons; + Link *links; + int lfirst; + + explicit Layer(Layer *prev = nullptr, int size = 0, int lsize = 0): + prev(), next(), size(), lsize(), neurons(), links(), lfirst() + { + while(prev && prev->next) prev = prev->next; + if (prev) prev->next = this; + this->prev = prev; + init(size, lsize); + } + + virtual ~Layer() { + if (next) delete next; + if (prev) prev->next = nullptr; + delete[] neurons; + if (links) delete[] links; + } + + bool init(int size, int lsize = 0) { + clear(); + if (size <= 0) return false; + if (lsize < 0) lsize = 0; + + if (prev ? lsize <= 0 : lsize) return false; + if (size) neurons = new Neuron[size]; + if (lsize) links = new Link[lsize*size]; + this->size = size; + this->lsize = lsize; + return true; + } + + void clear() { + if (neurons) delete[] neurons; + if (links) delete[] links; + this->size = lsize = 0; + neurons = nullptr; + links = nullptr; + lfirst = 0; + } + + void prepareBackLinks() { + if (!links || lsize <= 0) return; + + lfirst = 0; + int lend = size*lsize; + for(Link *il = links, *e = il + lend; il < e; ++il) { + assert(prev && prev->neurons && il->nprev >= 0 && il->nprev < prev->size); + il->lnext = il - links + 1; + } + + while(true) { + bool done = true; + for(int *il = &lfirst; links[*il].lnext != lend; il = &links[*il].lnext) { + int a = *il, b = links[a].lnext; + if (links[a].nprev > links[b].nprev) { + links[a].lnext = links[b].lnext; + links[b].lnext = a; + *il = b; + done = false; + } + } + if (done) break; + } + + #ifndef NDEBUG + if (lsize) { + int next = 0; + for(int il = lfirst; il != lend; il = links[il].lnext) { + assert(links[il].nprev == next || links[il].nprev == next-1); + if (links[il].nprev == next) ++next; + } + assert(next = prev->size); + } + #endif + } + + bool save(FILE *f) const { + if (!twrite(size, f) || !twrite(lsize, f)) + return false; + for(Link *il = links, *e = il + lsize*size; il < e; ++il) + if (!twrite(il->nprev, f) || !twrite(il->w, f)) + return false; + return true; + } + + bool load(FILE *f) { + clear(); + int size = 0, lsize = 0; + if (!tread(size, f) || !tread(lsize, f) || !init(size, lsize)) + return false; + for(Link *il = links, *e = il + lsize*size; il < e; ++il) + if (!tread(il->nprev, f) || !tread(il->w, f) || !prev || !prev->neurons || il->nprev < 0 || il->nprev >= prev->size) + return false; + prepareBackLinks(); + return true; + } + + bool saveAll(FILE *f) const + { return save(f) && (!next || next->save(f)); } + bool loadAll(FILE *f) + { return load(f) && (!next || next->load(f)); } + + bool saveAll(const char *filename) const { + assert(!prev); + FILE *f = fopen(filename, "wb"); + if (!f) + { printf("cannot open file for write: %s\n", filename); return false; } + int count = totalLayers(); + if (!twrite(count, f) || !saveAll(f)) + { printf("cannot save to file: %s\n", filename); fclose(f); return false; } + fclose(f); + return true; + } + + bool loadAll(const char *filename) { + assert(!prev); + FILE *f = fopen(filename, "rb"); + if (!f) + { printf("cannot open file for read: %s\n", filename); return false; } + int count = 0; + if (!tread(count, f) || count <= 0) + { printf("cannot load from file: %s\n", filename); fclose(f); return false; } + if (next) + delete next; + while(--count) + new Layer(this); + if (!loadAll(f)) + { printf("cannot load from file: %s\n", filename); fclose(f); return false; } + fclose(f); + return true; + } + + int totalLayers() const + { int t = 0; for(const Layer *l = this; l; l = l->next) ++t; return t; } + int totalNeurons() const + { int t = 0; for(const Layer *l = this; l; l = l->next) t += l->size; return t; } + int totalLinks() const + { int t = 0; for(const Layer *l = this; l; l = l->next) t += l->size*l->lsize; return t; } + inline size_t totalMemSize() const + { return totalNeurons()*sizeof(Neuron) + totalLinks()*sizeof(Link); } + + inline Layer& front() + { Layer *l = this; while(l->prev) l = l->prev; return *l; } + inline Layer& back() + { Layer *l = this; while(l->next) l = l->next; return *l; } + + void pass(int nb, int ne) { + assert(prev); + int lsize = this->lsize; + Neuron *pn = prev->neurons; + Link *il = links + nb*lsize; + for(Neuron *in = neurons + nb, *e = neurons + ne; in < e; ++in) { + double s = 0; + for(Link *e = il + lsize; il < e; ++il) + s += pn[il->nprev].v * il->w; + + // exp sigmoid + double ss = 1/(1 + exp(-s)); + in->v = ss; + in->d = ss * (1-ss); + + // 1/(x+1) sigmoid + //double ss = 1/(1+fabs(s)); + //double ss2 = ss*0.5; + //in->v = s > 0 ? 1 - ss2 : ss2; + //in->d = ss2 * ss; + } + } + + template + void backpassTpl(int lb, int le) { + assert(hasPrev == (bool)prev); + if (lb == le) return; + Link *links = this->links; + Neuron *neurons = this->neurons; + Neuron *pneurons = prev->neurons; + + double s = 0; + int ipn = links[lb].nprev; + for(int il = lb; il != le; ) { + Link &l = links[il]; + if (hasPrev) + if (ipn != l.nprev) + { pneurons[ipn].d *= s; ipn = l.nprev; } + Neuron &n = neurons[ il/lsize ]; + Real d = n.d; + if (hasPrev) s += d * l.w; + l.w += d * n.v; + il = l.lnext; + } + if (hasPrev) + pneurons[ipn].d *= s; + + assert(le == size*lsize || ipn < links[le].nprev); + } + + void backpass(int lb, int le) { + if (prev) backpassTpl(lb, le); else backpassTpl(lb, le); + } + + void passAll() { + if (prev) pass(0, size); + if (next) next->passAll(); + } + + void backpassAll(Real k) { + if (next) backpass(lfirst, size*lsize); + if (prev) prev->passAll(); + } +}; + + +#endif diff --git a/simple/neural/nnlayer2.mt.inc.cpp b/simple/neural/nnlayer2.mt.inc.cpp new file mode 100644 index 0000000..23ea906 --- /dev/null +++ b/simple/neural/nnlayer2.mt.inc.cpp @@ -0,0 +1,167 @@ +#ifndef NNLAYER2_MT_INC_CPP +#define NNLAYER2_MT_INC_CPP + + +#include "nnlayer2.inc.cpp" + + +#include +#include +#include + + +class Barrier { +private: + std::atomic &counter; + const unsigned int threads; + unsigned int next; +public: + inline Barrier(std::atomic &counter, unsigned int threads): counter(counter), threads(threads), next() { } + inline void wait() { next += threads; ++counter; while(counter < next); } +}; + + +class TrainMT { +private: + struct LDesc { + int nb, ne, lb, le; + double sumQ; + LDesc(): nb(), ne(), lb(), le(), sumQ() { } + }; + +public: + Layer *layer; + const unsigned char *dataX; + const unsigned char *dataY; + int strideX; + int strideY; + int *shuffle; + int count; + Real trainRatio; + + TrainMT(): + layer(), + dataX(), + dataY(), + strideX(), + strideY(), + shuffle(), + count(), + trainRatio() { } + +private: + void trainFunc(int tid, int threads, std::atomic &barrierCounter, LDesc *ldescs) { + Barrier barrier(barrierCounter, threads); + + Layer &fl = *layer; + Layer &bl = layer->back(); + int layersCount = fl.totalLayers(); + LDesc *fld = ldescs, *bld = fld + layersCount - 1; + + Real trainRatio = this->trainRatio; + + double sumQ = 0; + for(int i = 0; i < count; ++i) { + int ii = shuffle[i]; + const unsigned char *curX = dataX + strideX*ii; + const unsigned char *curY = dataY + strideY*ii; + + const unsigned char *px = curX; + for(Neuron *in = fl.neurons + fld->nb, *e = fl.neurons + fld->ne; in < e; ++in, ++px) + in->v = Real(*px)*Real(1/255.0); + + LDesc *ld = fld + 1; + for(Layer *l = fl.next; l; l = l->next, ++ld) { + barrier.wait(); + l->pass(ld->nb, ld->ne); + } + + double q = 0; + const unsigned char *py = curY; + for(Neuron *in = bl.neurons + bld->nb, *e = bl.neurons + bld->ne; in < e; ++in, ++py) { + Real d = Real(*py)*Real(1/255.0) - in->v; + in->d *= d * trainRatio; + q += d*d; + } + sumQ += q; + + if (trainRatio > 0) { + ld = bld - 1; + for(Layer *l = bl.prev; l; l = l->prev, --ld) { + barrier.wait(); + l->backpass(ld->lb, ld->le); + } + } + + //if (!tid) printf(" - %d, %f, %f\n", i, q, sumQ); + } + + ldescs->sumQ = sumQ; + } + +public: + double train(int threads) { + assert(threads > 0); + assert(layer && !layer->prev); + assert(dataX && dataY && shuffle); + assert(count > 0); + assert(trainRatio >= 0); + + int layersCount = layer->totalLayers(); + assert(layersCount > 0); + std::vector ldescs( threads*layersCount ); + + int layerId = 0; + for(Layer *l = layer; l; l = l->next, ++layerId) { + assert(layerId < layersCount); + int tsize = l->size/threads; + for(int tid = 0; tid < threads; ++tid) { + LDesc &desc = ldescs[tid*layersCount + layerId]; + desc.nb = tid*tsize; + desc.ne = desc.nb + tsize; + if (tid == threads-1) desc.ne = l->size; + } + + if (int lsize = l->size*l->lsize) { + int tlsize = lsize/threads; + int ipn = l->links[ l->lfirst ].nprev; + int tid = 0; + int count = 0; + + ldescs[tid*layersCount + layerId].lb = l->lfirst; + if (threads > 1) { + for(int il = l->lfirst; il != lsize; il = l->links[il].lnext, ++count) { + Link &link = l->links[il]; + if (ipn != link.nprev) { + if (count >= tlsize) { + ldescs[tid*layersCount + layerId].le = il; + ++tid; + count -= tlsize; + ldescs[tid*layersCount + layerId].lb = il; + if (tid == threads - 1) break; + } + ipn = link.nprev; + } + } + } + ldescs[tid*layersCount + layerId].le = lsize; + } + } + assert(layerId == layersCount); + + std::atomic barrierCounter(0); + std::vector t(threads - 1); + for(int i = 1; i < threads; ++i) + t[i-1] = new std::thread(&TrainMT::trainFunc, this, i, threads, std::ref(barrierCounter), &ldescs[i*layersCount]); + trainFunc(0, threads, barrierCounter, &ldescs[0]); + + double result = ldescs[0].sumQ; + for(int i = 1; i < threads; ++i) + { t[i-1]->join(); delete t[i-1]; result += ldescs[i*layersCount].sumQ; } + + return result/(count * layer->back().size); + } +}; + + +#endif diff --git a/simple/neural/nnlayer3.inc.cpp b/simple/neural/nnlayer3.inc.cpp new file mode 100644 index 0000000..4151893 --- /dev/null +++ b/simple/neural/nnlayer3.inc.cpp @@ -0,0 +1,370 @@ +#ifndef NNLAYER3_INC_CPP +#define NNLAYER3_INC_CPP + + +#include +#include +#include +#include +#include + +#include +#include + + +typedef float Real; + + +template +bool twrite(const T &x, FILE *f) + { return fwrite(&x, sizeof(T), 1, f); } +template +bool tread(T &x, FILE *f) + { return fread(&x, sizeof(T), 1, f); } + + +struct Neuron { + Real v, d; +}; + + +class Layer { +public: + Layer *prev, *next; + + int sx, sy, sz, sl; + Neuron *neurons; + Real *weights; + int *invWeights; + int *invNeurons; + + + explicit Layer(Layer *prev = nullptr, int sx = 0, int sy = 0, int sz = 0, int sl = 0): + prev(), next(), sx(), sy(), sz(), sl(), neurons(), weights(), invWeights(), invNeurons() + { + while(prev && prev->next) prev = prev->next; + if (prev) prev->next = this; + this->prev = prev; + init(sx, sy, sz, sl); + } + + + virtual ~Layer() { + clear(); + if (next) delete next; + if (prev) prev->next = nullptr; + } + + + void clear() { + if (next) next->clear(); + if (neurons) delete[] neurons; + if (weights) delete[] weights; + if (invWeights) delete[] invWeights; + sx = sy = sz = sl = 0; + neurons = nullptr; + weights = nullptr; + invWeights = nullptr; + invNeurons = nullptr; + } + + + bool init(int sx, int sy, int sz, int sl = 0) { + clear(); + if (prev && !prev->countNeurons()) return false; + if (sx <= 0 || sy <= 0 || sz <= 0) return false; + if (prev ? (sl <= 0 || sl > prev->sx || sl > prev->sy) : sl != 0) return false; + printf("init %d %d %d %d\n", sx, sy, sz, sl); + + this->sx = sx; + this->sy = sy; + this->sz = sz; + + int size = sx*sy*sz; + neurons = new Neuron[size]; + memset(neurons, 0, sizeof(*neurons)*size); + + if (prev) { + int psx = prev->sx; + int psy = prev->sy; + int psz = prev->sz; + int psize = psx*psy*psz; + + this->sl = sl; + int wsize = size*sl*sl*psz; + weights = new Real[wsize]; + double k = 1.0/(sl*sl*psz); + for(int i = 0; i < wsize; ++i) + weights[i] = Real( (rand()/(double)RAND_MAX*2 - 1)*k ); + + struct Link { + int n, w; + inline bool operator< (const Link &b) const + { return n < b.n ? true : (b.n < n ? false : w < b.w); } + } *links = new Link[wsize], *il = links; + + int dx = sx > 1 ? sx - 1 : 1; + int dy = sy > 1 ? sy - 1 : 1; + + for(int y = 0; y < sy; ++y) + for(int x = 0; x < sx; ++x) { + int py = y*(psy-sl)/dy; + int px = x*(psx-sl)/dx; + assert(py >= 0 && py <= psy-sl); + assert(px >= 0 && px <= psx-sl); + + for(int z = 0; z < sz; ++z) + for(int ly = 0; ly < sl; ++ly) + for(int lx = 0; lx < sl; ++lx) + for(int pz = 0; pz < psz; ++pz, ++il) { + assert(il < links + wsize); + il->n = ((py+ly)*psx + px+lx)*psz + pz; + il->w = il - links; + assert(il->n >= 0 && il->n < psize); + } + } + assert(il == links + wsize); + std::sort(links, il); + + invWeights = new int[wsize + psize + 1]; + invNeurons = invWeights + wsize; + invNeurons[0] = 0; + int pni = 0; + for(int i = 0; i < wsize; ++i) { + assert(pni == links[i].n || pni == links[i].n + 1); + invWeights[i] = links[i].w; + if (pni == links[i].n) + invNeurons[pni++] = i; + } + assert(pni == psize); + invNeurons[psize] = wsize; + + delete[] links; + } + return true; + } + + bool save(FILE *f) const { + return twrite(sx, f) && twrite(sy, f) && twrite(sz, f) && twrite(sl, f) + && (!weights || fwrite(weights, sizeof(*weights)*sx*sy*sz*sl*sl*prev->sz, 1, f)); + } + + bool load(FILE *f) { + clear(); + int sx = 0, sy = 0, sz = 0, sl = 0; + return tread(sx, f) && tread(sy, f) && tread(sz, f) && tread(sl, f) && init(sx, sy, sz, sl) + && (!weights || fread(weights, sizeof(*weights)*sx*sy*sz*sl*sl*prev->sz, 1, f)); + } + + bool saveAll(FILE *f) const + { return save(f) && (!next || next->saveAll(f)); } + bool loadAll(FILE *f) + { return load(f) && (!next || next->loadAll(f)); } + + bool saveAll(const char *filename) const { + assert(!prev); + FILE *f = fopen(filename, "wb"); + if (!f) + { printf("cannot open file for write: %s\n", filename); return false; } + int count = totalLayers(); + if (!twrite(count, f) || !saveAll(f)) + { printf("cannot save to file: %s\n", filename); fclose(f); return false; } + fclose(f); + return true; + } + + bool loadAll(const char *filename) { + assert(!prev); + FILE *f = fopen(filename, "rb"); + if (!f) + { printf("cannot open file for read: %s\n", filename); return false; } + int count = 0; + if (!tread(count, f) || count <= 0) + { printf("cannot load from file: %s\n", filename); fclose(f); return false; } + if (next) + delete next; + while(--count) + new Layer(this); + if (!loadAll(f)) + { printf("cannot load from file: %s\n", filename); fclose(f); return false; } + fclose(f); + return true; + } + + inline int countNeurons() const + { return sx*sy*sz; } + inline int linksPerNeuron() const + { return prev ? sl*sl*prev->sz : 0; } + inline int countLinks() const + { return countNeurons() * linksPerNeuron(); } + inline size_t memSize() const { + return sizeof(*neurons)*countNeurons() + + (sizeof(*weights) + sizeof(*invWeights))*countLinks() + + (prev ? sizeof(*invNeurons)*(prev->countNeurons() + 1) : 0); + } + + int totalLayers() const + { int t = 0; for(const Layer *l = this; l; l = l->next) ++t; return t; } + int totalNeurons() const + { int t = 0; for(const Layer *l = this; l; l = l->next) t += l->countNeurons(); return t; } + int totalLinks() const + { int t = 0; for(const Layer *l = this; l; l = l->next) t += l->countLinks(); return t; } + size_t totalMemSize() const + { size_t t = 0; for(const Layer *l = this; l; l = l->next) t += l->memSize(); return t; } + + inline Layer& front() + { Layer *l = this; while(l->prev) l = l->prev; return *l; } + inline Layer& back() + { Layer *l = this; while(l->next) l = l->next; return *l; } + + + void pass(int y0, int y1) { + if (!prev) return; + + int sx = this->sx; + int sy = this->sy; + int sz = this->sz; + int sxz = sx*sz; + + int psx = prev->sx; + int psy = prev->sy; + int psz = prev->sz; + int psxz = psx*psz; + Neuron *pneurons = prev->neurons; + + int sl = this->sl; + int slpz = sl*psz; + int sllpz = sl*slpz; + + int ldy = psxz - slpz; + + int kpx = psx-sl, dpx = sx>1 ? sx-1 : 1; + int kpy = psy-sl, dpy = sy>1 ? sy-1 : 1; + + Real *iw = weights + y0*sxz*sllpz; + Neuron *in = neurons + y0*sxz; + int fpy = y0*kpy; + for(Neuron *e = in + (y1-y0)*sxz; in < e; fpy += kpy) { + Neuron *pnrow = pneurons + fpy/dpy*psxz; + + int fpx = 0; + for(Neuron *e = in + sxz; in < e; fpx += kpx) { + Neuron *pn = pnrow + fpx/dpx*psz; + + for(Neuron *e = in + sz; in < e; ++in) { + double s = 0; + + Neuron *ipn = pn; + for(Real *e = iw + sllpz; iw < e; ipn += ldy) + for(Real *e = iw + slpz; iw < e; ++iw, ++ipn) + s += *iw * ipn->v; + + // exp sigmoid + //double ss = 1/(1 + exp(-s)); + //in->v = ss; + //in->d = ss * (1-ss); + + // 1/(x+1) sigmoid + double ss = 1/(1+fabs(s)); + double ss2 = ss*0.5; + in->v = s > 0 ? 1 - ss2 : ss2; + in->d = ss2 * ss; + } + } + } + } + + + void backpassWeights(int y0, int y1) { + assert(prev); + + int sx = this->sx; + int sy = this->sy; + int sz = this->sz; + int sxz = sx*sz; + + int psx = prev->sx; + int psy = prev->sy; + int psz = prev->sz; + int psxz = psx*psz; + Neuron *pneurons = prev->neurons; + + int sl = this->sl; + int slpz = sl*psz; + int sllpz = sl*slpz; + + int ldy = psxz - slpz; + + int kpx = psx-sl, dpx = sx>1 ? sx-1 : 1; + int kpy = psy-sl, dpy = sy>1 ? sy-1 : 1; + + Real *iw = weights + y0*sxz*sllpz; + Neuron *in = neurons + y0*sxz; + int fpy = y0*kpy; + for(Neuron *e = in + (y1-y0)*sxz; in < e; fpy += kpy) { + Neuron *pnrow = pneurons + fpy/dpy*psxz; + + int fpx = 0; + for(Neuron *e = in + sxz; in < e; fpx += kpx) { + Neuron *pn = pnrow + fpx/dpx*psz; + + for(Neuron *e = in + sz; in < e; ++in) { + Real d = in->d; + + Neuron *ipn = pn; + for(Real *e = iw + sllpz; iw < e; ipn += ldy) + for(Real *e = iw + slpz; iw < e; ++iw, ++ipn) + *iw += ipn->v * d; + } + } + } + } + + template + void backpassTpl(int y0, int y1) { + assert(next); + + Neuron *nneurons = next->neurons; + Real *nweights = next->weights; + int *nInvWeights = next->invWeights; + int lpn = next->linksPerNeuron(); + + int sxz = sx*sz; + + Neuron *in = neurons + y0*sxz; + int *inni = next->invNeurons + y0*sxz; + for(Neuron *e = in + (y1-y0)*sxz; in < e; ++in) { + Real v = in->v; + double s = 0; + + int *iw = nInvWeights + *inni++; + for(int *e = nInvWeights + *inni; iw < e; ++iw) { + int wi = *iw; + Real d = nneurons[wi/lpn].d; + Real &nw = nweights[wi]; + s += nw * d; + if (WithWeights) nw += d * v; + } + + in->d *= s; + } + } + + void passAll() { + if (prev) pass(0, sy); + if (next) next->passAll(); + } + + void backpassAll() { + if (!prev) + return; + if (!prev->prev) + { backpassWeights(0, sy); return; } + if (next) + backpassTpl(0, sy); + return prev->backpassAll(); + } +}; + + +#endif diff --git a/simple/neural/nnlayer3.mt.inc.cpp b/simple/neural/nnlayer3.mt.inc.cpp new file mode 100644 index 0000000..584467a --- /dev/null +++ b/simple/neural/nnlayer3.mt.inc.cpp @@ -0,0 +1,167 @@ +#ifndef NNLAYER3_MT_INC_CPP +#define NNLAYER3_MT_INC_CPP + + +#include "nnlayer3.inc.cpp" + + +#include +#include +#include + + +class Barrier { +private: + std::atomic &counter; + const unsigned int threads; + unsigned int next; +public: + inline Barrier(std::atomic &counter, unsigned int threads): counter(counter), threads(threads), next() { } + inline void wait() { next += threads; ++counter; while(counter < next); } + inline void subwait(int tid) { while(counter < next + tid); } +}; + + +class TrainMT { +private: + struct LDesc { + int y0, y1; + double sumQ; + LDesc(): y0(), y1(), sumQ() { } + }; + +public: + Layer *layer; + const unsigned char *dataX; + const unsigned char *dataY; + int strideX; + int strideY; + int *shuffle; + int count; + Real trainRatio; + + TrainMT(): + layer(), + dataX(), + dataY(), + strideX(), + strideY(), + shuffle(), + count(), + trainRatio() { } + +private: + void trainFunc(int tid, int threads, std::atomic &barrierCounter, LDesc *ldescs) { + Barrier barrier(barrierCounter, threads); + + Layer &fl = *layer; + Layer &bl = layer->back(); + int layersCount = fl.totalLayers(); + LDesc *fld = ldescs, *bld = fld + layersCount - 1; + + Real trainRatio = this->trainRatio; + + int fsxz = fl.sx*fl.sz; + int bsxz = bl.sx*bl.sz; + + //barrier.subwait(tid); + //for(LDesc *ld = fld; ld <= bld; ++ld) + // printf("t%d %d %d %d\n", tid, (int)(ld-fld), ld->y0, ld->y1); + //barrier.wait(); + + const unsigned char *dataX = this->dataX + fsxz*fld->y0; + const unsigned char *dataY = this->dataY + bsxz*bld->y0; + + double sumQ = 0; + for(int i = 0; i < count; ++i) { + int ii = shuffle[i]; + const unsigned char *curX = dataX + strideX*ii; + const unsigned char *curY = dataY + strideY*ii; + + barrier.wait(); + const unsigned char *px = curX; + for(Neuron *in = fl.neurons + fsxz*fld->y0, *e = fl.neurons + fsxz*fld->y1; in < e; ++in, ++px) + in->v = Real(*px)*Real(1/255.0); + + LDesc *ld = fld + 1; + for(Layer *l = fl.next; l; l = l->next, ++ld) { + barrier.wait(); + l->pass(ld->y0, ld->y1); + } + + double q = 0; + const unsigned char *py = curY; + for(Neuron *in = bl.neurons + bsxz*bld->y0, *e = bl.neurons + bsxz*bld->y1; in < e; ++in, ++py) { + Real v = (in->v - 0.25)*2; + Real d = Real(*py)*Real(1/255.0) - v; + in->d *= d * trainRatio; + d *= d; + q += d*d; + } + sumQ += q; + + if (trainRatio > 0) { + ld = bld; + for(Layer *l = &bl; l->prev; l = l->prev, --ld) { + if (!l->prev->prev) { + barrier.wait(); + l->backpassWeights(ld->y0, ld->y1); + break; + } else + if (l->next) { + barrier.wait(); + l->backpassTpl(ld->y0, ld->y1); + //l->backpassTpl(ld->y0, ld->y1); + //barrier.wait(); + //l->next->backpassWeights(ld[1].y0, ld[1].y1); + } + } + } + + //if (!tid) printf(" - %d, %f, %f\n", i, q, sumQ); + } + + ldescs->sumQ = sumQ; + } + +public: + double train(int threads) { + assert(threads > 0); + assert(layer && !layer->prev); + assert(dataX && dataY && shuffle); + assert(count > 0); + assert(trainRatio >= 0); + + int layersCount = layer->totalLayers(); + assert(layersCount > 0); + std::vector ldescs( threads*layersCount ); + + int layerId = 0; + for(Layer *l = layer; l; l = l->next, ++layerId) { + assert(layerId < layersCount); + int tsy = l->sy/threads; + for(int tid = 0; tid < threads; ++tid) { + LDesc &desc = ldescs[tid*layersCount + layerId]; + desc.y0 = tid*tsy; + desc.y1 = desc.y0 + tsy; + if (tid == threads-1) desc.y1 = l->sy; + } + } + assert(layerId == layersCount); + + std::atomic barrierCounter(0); + std::vector t(threads - 1); + for(int i = 1; i < threads; ++i) + t[i-1] = new std::thread(&TrainMT::trainFunc, this, i, threads, std::ref(barrierCounter), &ldescs[i*layersCount]); + trainFunc(0, threads, barrierCounter, &ldescs[0]); + + double result = ldescs[0].sumQ; + for(int i = 1; i < threads; ++i) + { t[i-1]->join(); delete t[i-1]; result += ldescs[i*layersCount].sumQ; } + + return sqrt(sqrt( result/(count * layer->back().countNeurons()) )); + } +}; + + +#endif diff --git a/simple/neural/tga.inc.cpp b/simple/neural/tga.inc.cpp new file mode 100644 index 0000000..bd21033 --- /dev/null +++ b/simple/neural/tga.inc.cpp @@ -0,0 +1,62 @@ +#ifndef TGA_INC_CPP +#define TGA_INC_CPP + + +#include + + +bool tgaSave(const char *filename, const unsigned char *data, int w, int h, int ch) { + if (!data || w <= 0 || h <= 0 || w > 0xffff || h > 0xffff || (ch != 3 && ch != 4)) { + printf("ERROR: cannot save image (bad image): %s\n", filename); + return false; + } + + FILE *f = fopen(filename, "wb"); + if (!f) { + printf("ERROR: cannot open file: %s\n", filename); + return false; + } + + #pragma pack(push,1) + struct Header { + unsigned char idLength; + unsigned char colormapType; + unsigned char imageType; + unsigned char colormapIndex[2]; + unsigned char colormapLength[2]; + unsigned char colormapSize; + unsigned char xOrigin[2]; + unsigned char yOrigin[2]; + unsigned char width[2]; + unsigned char height[2]; + unsigned char pixelSize; + unsigned char attributes; + }; + #pragma pack(pop) + Header header = {}; + header.imageType = 2; + header.width[0] = w; + header.width[1] = w >> 8; + header.height[0] = h; + header.height[1] = h >> 8; + header.pixelSize = ch == 4 ? 32 : 24; + fwrite(&header, sizeof(header), 1, f); + + int rowSize = w*ch; + int size = h*rowSize; + const unsigned char *row = data + size; + for(unsigned short r = h; r; --r, row -= rowSize) { + for(const unsigned char *c = row - rowSize; c < row; c += ch) { + fputc(c[2], f); + fputc(c[1], f); + fputc(c[0], f); + if (ch == 4) fputc(c[3], f); + } + } + fclose(f); + + return true; +} + + +#endif