|
|
53488e |
#ifndef NNLAYER2_MT_INC_CPP
|
|
|
53488e |
#define NNLAYER2_MT_INC_CPP
|
|
|
53488e |
|
|
|
53488e |
|
|
|
53488e |
#include "nnlayer2.inc.cpp"
|
|
|
53488e |
|
|
|
53488e |
|
|
|
53488e |
#include <atomic></atomic>
|
|
|
53488e |
#include <thread></thread>
|
|
|
53488e |
#include <vector></vector>
|
|
|
53488e |
|
|
|
53488e |
|
|
|
53488e |
class Barrier {
|
|
|
53488e |
private:
|
|
|
53488e |
std::atomic<unsigned int=""> &counter;</unsigned>
|
|
|
53488e |
const unsigned int threads;
|
|
|
53488e |
unsigned int next;
|
|
|
53488e |
public:
|
|
|
53488e |
inline Barrier(std::atomic<unsigned int=""> &counter, unsigned int threads): counter(counter), threads(threads), next() { }</unsigned>
|
|
|
53488e |
inline void wait() { next += threads; ++counter; while(counter < next); }
|
|
|
53488e |
};
|
|
|
53488e |
|
|
|
53488e |
|
|
|
53488e |
class TrainMT {
|
|
|
53488e |
private:
|
|
|
53488e |
struct LDesc {
|
|
|
53488e |
int nb, ne, lb, le;
|
|
|
53488e |
double sumQ;
|
|
|
53488e |
LDesc(): nb(), ne(), lb(), le(), sumQ() { }
|
|
|
53488e |
};
|
|
|
53488e |
|
|
|
53488e |
public:
|
|
|
53488e |
Layer *layer;
|
|
|
53488e |
const unsigned char *dataX;
|
|
|
53488e |
const unsigned char *dataY;
|
|
|
53488e |
int strideX;
|
|
|
53488e |
int strideY;
|
|
|
53488e |
int *shuffle;
|
|
|
53488e |
int count;
|
|
|
53488e |
Real trainRatio;
|
|
|
53488e |
|
|
|
53488e |
TrainMT():
|
|
|
53488e |
layer(),
|
|
|
53488e |
dataX(),
|
|
|
53488e |
dataY(),
|
|
|
53488e |
strideX(),
|
|
|
53488e |
strideY(),
|
|
|
53488e |
shuffle(),
|
|
|
53488e |
count(),
|
|
|
53488e |
trainRatio() { }
|
|
|
53488e |
|
|
|
53488e |
private:
|
|
|
53488e |
void trainFunc(int tid, int threads, std::atomic<unsigned int=""> &barrierCounter, LDesc *ldescs) {</unsigned>
|
|
|
53488e |
Barrier barrier(barrierCounter, threads);
|
|
|
53488e |
|
|
|
53488e |
Layer &fl = *layer;
|
|
|
53488e |
Layer &bl = layer->back();
|
|
|
53488e |
int layersCount = fl.totalLayers();
|
|
|
53488e |
LDesc *fld = ldescs, *bld = fld + layersCount - 1;
|
|
|
53488e |
|
|
|
53488e |
Real trainRatio = this->trainRatio;
|
|
|
53488e |
|
|
|
53488e |
double sumQ = 0;
|
|
|
53488e |
for(int i = 0; i < count; ++i) {
|
|
|
53488e |
int ii = shuffle[i];
|
|
|
53488e |
const unsigned char *curX = dataX + strideX*ii;
|
|
|
53488e |
const unsigned char *curY = dataY + strideY*ii;
|
|
|
53488e |
|
|
|
53488e |
const unsigned char *px = curX;
|
|
|
53488e |
for(Neuron *in = fl.neurons + fld->nb, *e = fl.neurons + fld->ne; in < e; ++in, ++px)
|
|
|
53488e |
in->v = Real(*px)*Real(1/255.0);
|
|
|
53488e |
|
|
|
53488e |
LDesc *ld = fld + 1;
|
|
|
53488e |
for(Layer *l = fl.next; l; l = l->next, ++ld) {
|
|
|
53488e |
barrier.wait();
|
|
|
53488e |
l->pass(ld->nb, ld->ne);
|
|
|
53488e |
}
|
|
|
53488e |
|
|
|
53488e |
double q = 0;
|
|
|
53488e |
const unsigned char *py = curY;
|
|
|
53488e |
for(Neuron *in = bl.neurons + bld->nb, *e = bl.neurons + bld->ne; in < e; ++in, ++py) {
|
|
|
53488e |
Real d = Real(*py)*Real(1/255.0) - in->v;
|
|
|
53488e |
in->d *= d * trainRatio;
|
|
|
53488e |
q += d*d;
|
|
|
53488e |
}
|
|
|
53488e |
sumQ += q;
|
|
|
53488e |
|
|
|
53488e |
if (trainRatio > 0) {
|
|
|
53488e |
ld = bld - 1;
|
|
|
53488e |
for(Layer *l = bl.prev; l; l = l->prev, --ld) {
|
|
|
53488e |
barrier.wait();
|
|
|
53488e |
l->backpass(ld->lb, ld->le);
|
|
|
53488e |
}
|
|
|
53488e |
}
|
|
|
53488e |
|
|
|
53488e |
//if (!tid) printf(" - %d, %f, %f\n", i, q, sumQ);
|
|
|
53488e |
}
|
|
|
53488e |
|
|
|
53488e |
ldescs->sumQ = sumQ;
|
|
|
53488e |
}
|
|
|
53488e |
|
|
|
53488e |
public:
|
|
|
53488e |
double train(int threads) {
|
|
|
53488e |
assert(threads > 0);
|
|
|
53488e |
assert(layer && !layer->prev);
|
|
|
53488e |
assert(dataX && dataY && shuffle);
|
|
|
53488e |
assert(count > 0);
|
|
|
53488e |
assert(trainRatio >= 0);
|
|
|
53488e |
|
|
|
53488e |
int layersCount = layer->totalLayers();
|
|
|
53488e |
assert(layersCount > 0);
|
|
|
53488e |
std::vector<ldesc> ldescs( threads*layersCount );</ldesc>
|
|
|
53488e |
|
|
|
53488e |
int layerId = 0;
|
|
|
53488e |
for(Layer *l = layer; l; l = l->next, ++layerId) {
|
|
|
53488e |
assert(layerId < layersCount);
|
|
|
53488e |
int tsize = l->size/threads;
|
|
|
53488e |
for(int tid = 0; tid < threads; ++tid) {
|
|
|
53488e |
LDesc &desc = ldescs[tid*layersCount + layerId];
|
|
|
53488e |
desc.nb = tid*tsize;
|
|
|
53488e |
desc.ne = desc.nb + tsize;
|
|
|
53488e |
if (tid == threads-1) desc.ne = l->size;
|
|
|
53488e |
}
|
|
|
53488e |
|
|
|
53488e |
if (int lsize = l->size*l->lsize) {
|
|
|
53488e |
int tlsize = lsize/threads;
|
|
|
53488e |
int ipn = l->links[ l->lfirst ].nprev;
|
|
|
53488e |
int tid = 0;
|
|
|
53488e |
int count = 0;
|
|
|
53488e |
|
|
|
53488e |
ldescs[tid*layersCount + layerId].lb = l->lfirst;
|
|
|
53488e |
if (threads > 1) {
|
|
|
53488e |
for(int il = l->lfirst; il != lsize; il = l->links[il].lnext, ++count) {
|
|
|
53488e |
Link &link = l->links[il];
|
|
|
53488e |
if (ipn != link.nprev) {
|
|
|
53488e |
if (count >= tlsize) {
|
|
|
53488e |
ldescs[tid*layersCount + layerId].le = il;
|
|
|
53488e |
++tid;
|
|
|
53488e |
count -= tlsize;
|
|
|
53488e |
ldescs[tid*layersCount + layerId].lb = il;
|
|
|
53488e |
if (tid == threads - 1) break;
|
|
|
53488e |
}
|
|
|
53488e |
ipn = link.nprev;
|
|
|
53488e |
}
|
|
|
53488e |
}
|
|
|
53488e |
}
|
|
|
53488e |
ldescs[tid*layersCount + layerId].le = lsize;
|
|
|
53488e |
}
|
|
|
53488e |
}
|
|
|
53488e |
assert(layerId == layersCount);
|
|
|
53488e |
|
|
|
53488e |
std::atomic<unsigned int=""> barrierCounter(0);</unsigned>
|
|
|
53488e |
std::vector<std::thread*> t(threads - 1);</std::thread*>
|
|
|
53488e |
for(int i = 1; i < threads; ++i)
|
|
|
53488e |
t[i-1] = new std::thread(&TrainMT::trainFunc, this, i, threads, std::ref(barrierCounter), &ldescs[i*layersCount]);
|
|
|
53488e |
trainFunc(0, threads, barrierCounter, &ldescs[0]);
|
|
|
53488e |
|
|
|
53488e |
double result = ldescs[0].sumQ;
|
|
|
53488e |
for(int i = 1; i < threads; ++i)
|
|
|
53488e |
{ t[i-1]->join(); delete t[i-1]; result += ldescs[i*layersCount].sumQ; }
|
|
|
53488e |
|
|
|
53488e |
return result/(count * layer->back().size);
|
|
|
53488e |
}
|
|
|
53488e |
};
|
|
|
53488e |
|
|
|
53488e |
|
|
|
53488e |
#endif
|