From 56d550c3a17f4338432a078af51802922718a7ac Mon Sep 17 00:00:00 2001
From: Ivan Mahonin <bh@icystar.com>
Date: Feb 11 2023 16:26:59 +0000
Subject: neural: cpp


---
diff --git a/simple/neural/build-nn-trainer-pp.sh b/simple/neural/build-nn-trainer-pp.sh
new file mode 100755
index 0000000..ad7e1a6
--- /dev/null
+++ b/simple/neural/build-nn-trainer-pp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -e
+
+c++ -Wall -DNDEBUG -O3 nn-trainer.cpp -lm -o nn-trainer-pp
+
+
+
+
diff --git a/simple/neural/convertsyms.c b/simple/neural/convertsyms.c
new file mode 100644
index 0000000..b0d02cd
--- /dev/null
+++ b/simple/neural/convertsyms.c
@@ -0,0 +1,81 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+int main() {
+  int sizeX = 28;
+  int sizeY = 28;
+  int border = 2;
+  int outSizeX = 22;
+  int outSizeY = 22;
+  const char *filename = "data/symbols-data.bin";
+  const char *outFilename = "data/output/symbols22-data.bin";
+
+
+  printf("read data from '%s'\n", filename);
+  FILE *f = fopen(filename, "rb");
+  if (!f)
+    return printf("cannot open file\n"), 1;
+  fseek(f, 0, SEEK_END);
+  size_t fs = ftell(f);
+  fseek(f, 0, SEEK_SET);
+
+  size_t testSize = sizeX*sizeY + 1;
+  int count = fs/testSize;
+  if (!count)
+    return printf("file is lesser minimal size\n"), fclose(f), 1;
+
+  unsigned char *data = calloc(testSize, count);
+  if (!fread(data, testSize*count, 1, f))
+    return printf("cannot read"), free(data), fclose(f), 1;
+  fclose(f);
+
+  printf("write converted data to '%s'\n", outFilename);
+  f = fopen(outFilename, "wb");
+  if (!f)
+    return printf("cannot open file\n"), 1;
+
+  unsigned char *img = calloc(outSizeX*outSizeY+1, 1);
+  int sx = sizeX - 2*border - 1, sy = sizeY - 2*border - 1;
+  int osx = outSizeX - 2*border - 1, osy = outSizeY - 2*border - 1;
+  for(int i = 0; i < count; ++i) {
+    unsigned char *in = data + testSize*i;
+    for(int y = 0; y <= osy; ++y) {
+      int y0 = y*sy;
+      int py = y0%osy;
+      y0 /= osy;
+      int y1 = y0 < sy ? y0 : sy;
+      for(int x = 0; x <= osx; ++x) {
+        int x0 = x*sx;
+        int px = x0%osx;
+        x0 /= osx;
+        int x1 = x0 < sx ? x0 : sx-1;
+
+        int p00 = in[(border+y0)*sizeX + x0];
+        int p01 = in[(border+y0)*sizeX + x1];
+        int p0 = p00*(osx-px) + p01*px;
+
+        int p10 = in[(border+y1)*sizeX + x0];
+        int p11 = in[(border+y1)*sizeX + x1];
+        int p1 = p10*(osx-px) + p11*px;
+
+        int p = p0*(osy-py) + p1*py;
+        p = (p + osx*osy/2)/(osx*osy);
+
+        img[(y+border)*outSizeX + x] = (unsigned int)p1;
+      }
+    }
+    img[outSizeX*outSizeY] = in[sizeX*sizeY];
+    if (!fwrite(img, outSizeX*outSizeY+1, 1, f))
+      return printf("cannot write\n"), free(data), free(img), fclose(f), 1;
+  }
+  free(data);
+  free(img);
+
+  fclose(f);
+
+  return printf("done\n");
+  return 0;
+}
diff --git a/simple/neural/data/symbols22-data.bin b/simple/neural/data/symbols22-data.bin
new file mode 100644
index 0000000..a04f592
Binary files /dev/null and b/simple/neural/data/symbols22-data.bin differ
diff --git a/simple/neural/nn-trainer.cpp b/simple/neural/nn-trainer.cpp
new file mode 100644
index 0000000..6ecc350
--- /dev/null
+++ b/simple/neural/nn-trainer.cpp
@@ -0,0 +1,37 @@
+
+#include <ctime>
+#include <cstdlib>
+
+#include "nntrain.inc.cpp"
+
+
+int main() {
+  srand(time(NULL));
+
+  printf("load training data\n");
+  Trainer t;
+  if (!t.loadSymbolMap("data/symbols-data.bin", 784, 10)) return 1;
+
+  printf("create neural network\n");
+  double tr = 0.1;
+  Layer l(nullptr, 784, tr);
+  new LayerPlain(l, 256, tr);
+  new LayerPlain(l, 128, tr);
+  new LayerPlain(l, 64, tr);
+  new LayerPlain(l, 64, tr);
+  new LayerPlain(l, 10, tr);
+
+  printf("try load previously saved network\n");
+  l.load("data/weights.bin");
+
+  printf("train\n");
+  double k = pow(0.5, 0.125);
+  for(double q = k; q > 0.05; q *= k)
+    t.train(l, 10, 100, q);
+
+  printf("save neural network weights\n");
+  if (!l.save("data/output/weights.bin")) return 1;
+
+  return 0;
+}
+
diff --git a/simple/neural/nnlayer.inc.c b/simple/neural/nnlayer.inc.c
index 040de48..0709426 100644
--- a/simple/neural/nnlayer.inc.c
+++ b/simple/neural/nnlayer.inc.c
@@ -14,7 +14,7 @@ typedef struct NeuralLayer {
   struct NeuralLayer *prev, *next;
   int size;
   double trainRatio;
-  double *a, *d, *da, *w;
+  double *a, *b, *d, *da, *w;
 } NeuralLayer;
 
 
@@ -26,13 +26,14 @@ NeuralLayer* nlNew(NeuralLayer *prev, int size, double trainRatio) {
   nl->size = size;
   nl->prev = prev;
   nl->trainRatio = trainRatio;
-  nl->a = calloc(sizeof(*nl->a), size*3);
+  nl->a = calloc(sizeof(*nl->a), size*4);
   nl->d = nl->a + size;
   nl->da = nl->d + size;
   if (prev) {
     assert(prev->size > 0);
-    nl->w = calloc(sizeof(*nl->w), size*prev->size);
-    for(double *p = nl->w, *e = p + size*prev->size; p < e; ++p)
+    nl->b = calloc(sizeof(*nl->b), size*(1 + prev->size));
+    nl->w = nl->b + size;
+    for(double *p = nl->b, *e = p + size*(1 + prev->size); p < e; ++p)
       *p = rand()/(double)RAND_MAX*2 - 1;
     prev->next = nl;
   }
@@ -44,7 +45,7 @@ void nlFree(NeuralLayer *nl) {
   if (nl->next) nlFree(nl->next);
   if (nl->prev) nl->prev->next = NULL;
   free(nl->a);
-  if (nl->prev) free(nl->w);
+  if (nl->prev) free(nl->b);
   free(nl);
 }
 
@@ -57,7 +58,7 @@ NeuralLayer* nlBack(NeuralLayer *nl)
 
 int nlToStream(NeuralLayer *nl, FILE *f) {
   if (nl->prev)
-    if (1 != fwrite(nl->w, sizeof(double) * nl->size * nl->prev->size, 1, f))
+    if (!fwrite(nl->b, sizeof(double) * nl->size * (1 + nl->prev->size), 1, f))
       return 0;
   return nl->next ? nlToStream(nl->next, f) : 1;
 }
@@ -65,7 +66,7 @@ int nlToStream(NeuralLayer *nl, FILE *f) {
 
 int nlFromStream(NeuralLayer *nl, FILE *f) {
   if (nl->prev)
-    if (1 != fread(nl->w, sizeof(double) * nl->size * nl->prev->size, 1, f))
+    if (!fread(nl->b, sizeof(double) * nl->size * (1 + nl->prev->size), 1, f))
       return 0;
   return nl->next ? nlFromStream(nl->next, f) : 1;
 }
@@ -98,8 +99,8 @@ NeuralLayer* nlPass(NeuralLayer *nl) {
   if (nlp) {
     double *nlpa = nlp->a, *ee = nlpa + nlp->size;
     double *w = nl->w;
-    for(double *a = nl->a, *d = nl->d, *e = a + nl->size; a < e; ++a, ++d) {
-      double s = 0;
+    for(double *a = nl->a, *b = nl->b, *d = nl->d, *e = a + nl->size; a < e; ++a, ++b, ++d) {
+      double s = *b;
       for(double *pa = nlpa; pa < ee; ++pa, ++w)
         s += *w * *pa;
       double ex = exp(-s);
@@ -121,17 +122,19 @@ NeuralLayer* nlBackpass(NeuralLayer *nl) {
     double *w = nl->w;
     if (nlp->prev) {
       memset(nlp->da, 0, sizeof(*nlp->da) * nlp->size);
-      for(double *d = nl->d, *da = nl->da, *e = d + nl->size; d < e; ++d, ++da) {
+      for(double *b = nl->b, *d = nl->d, *da = nl->da, *e = b + nl->size; b < e; ++b, ++d, ++da) {
         double ds = *d * *da;
         double dst = ds*tr;
+        *b -= dst;
         for(double *pa = nlpa, *pda = nlpda; pa < ee; ++pa, ++pda, ++w) {
           *pda += ds * *w;
           *w += dst * *pa;
         }
       }
     } else {
-      for(double *d = nl->d, *da = nl->da, *e = d + nl->size; d < e; ++d, ++da) {
+      for(double *b = nl->b, *d = nl->d, *da = nl->da, *e = b + nl->size; b < e; ++b, ++d, ++da) {
         double dst = *d * *da * tr;
+        *b -= dst;
         for(double *pa = nlpa; pa < ee; ++pa, ++w)
           *w += dst * *pa;
       }
diff --git a/simple/neural/nnlayer.inc.cpp b/simple/neural/nnlayer.inc.cpp
new file mode 100644
index 0000000..a697dc5
--- /dev/null
+++ b/simple/neural/nnlayer.inc.cpp
@@ -0,0 +1,255 @@
+#ifndef NNLAYER_INC_CPP
+#define NNLAYER_INC_CPP
+
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+
+
+
+class Layer {
+public:
+  Layer *prev, *next;
+  int size;
+  double trainRatio;
+  double *a, *d, *da;
+
+  Layer(Layer *prev, int size, double trainRatio = 0):
+    prev(), next(), size(size), trainRatio(trainRatio)
+  {
+    assert(size > 0);
+    a = new double[size*3];
+    d = a + size;
+    da = d + size;
+    memset(a, 0, sizeof(*a)*size*3);
+    if (prev) (this->prev = &prev->back())->next = this;
+  }
+
+  inline Layer& front()
+    { Layer *l = this; while(l->prev) l = l->prev; return *l; }
+  inline Layer& back()
+    { Layer *l = this; while(l->next) l = l->next; return *l; }
+
+  virtual ~Layer() {
+    if (next) delete next;
+    if (prev) prev->next = nullptr;
+    delete[] a;
+  }
+
+  virtual bool toStream(FILE *f)
+    { return next ? next->toStream(f) : true; }
+  virtual bool fromStream(FILE *f)
+    { return next ? next->fromStream(f) : true; }
+
+  virtual Layer& pass()
+    { return next ? next->pass() : *this; }
+  virtual Layer& backpass()
+    { return prev ? prev->backpass() : *this; }
+
+  bool save(const char *filename) {
+    assert(!prev);
+    FILE *f = fopen(filename, "wb");
+    if (!f) return printf("cannot open file '%s' for write\n", filename), false;
+    if (!toStream(f)) return printf("cannot write to file '%s'\n", filename), fclose(f), false;
+    fclose(f);
+    return 1;
+  }
+
+  bool load(const char *filename) {
+    assert(!prev);
+    FILE *f = fopen(filename, "rb");
+    if (!f) return printf("cannot open file '%s' for read\n", filename), false;
+    if (!fromStream(f)) return printf("cannot read from file '%s'\n", filename), fclose(f), false;
+    fclose(f);
+    return 1;
+  }
+
+  double trainPass(double *x, double *y, double qmin) {
+    assert(!prev);
+    double *fa = a;
+    a = x;
+    Layer &b = pass();
+
+    double qmax = 0;
+    for(double *pa = b.a, *pda = b.da, *e = pa + b.size; pa < e; ++pa, ++pda, ++y) {
+      double d = *y - *pa;
+      *pda = d;
+      double q = fabs(d);
+      if (qmax < q) qmax = q;
+    }
+    if (qmax > qmin) b.backpass();
+
+    a = fa;
+    return qmax;
+  }
+};
+
+
+class LayerPlain: public Layer {
+public:
+  double *b, *w;
+
+  LayerPlain(Layer &prev, int size, double trainRatio = 0):
+    Layer(&prev, size, trainRatio)
+  {
+    b = new double[size*(1 + prev.size)];
+    w = b + size;
+    for(double *p = b, *e = p + size*(1 + prev.size); p < e; ++p)
+      *p = rand()/(double)RAND_MAX*2 - 1;
+  }
+
+  ~LayerPlain()
+    { delete[] b; }
+
+  bool toStream(FILE *f) override
+    { return fwrite(b, sizeof(double)*size*(1 + prev->size), 1, f) && (!next || next->toStream(f)); }
+  bool fromStream(FILE *f)
+    { return fread (b, sizeof(double)*size*(1 + prev->size), 1, f) && (!next || next->toStream(f)); }
+
+  Layer& pass() override {
+    double *prevA = prev->a, *ee = prevA + prev->size;
+    double *pw = w;
+    for(double *pa = a, *pb = b, *pd = d, *e = pa + size; pa < e; ++pa, ++pb, ++pd) {
+      double s = *pb;
+      for(double *ppa = prevA; ppa < ee; ++ppa, ++pw)
+        s += *ppa * *pw;
+      double ex = exp(-s);
+      double ex1 = ex + 1;
+      double ex2 = 1/ex1;
+      *pa = ex2;        // sigmoid
+      *pd = ex*ex2*ex2; // sigmoid derivation
+    }
+    return next ? next->pass() : *this;
+  }
+
+  Layer& backpass() override {
+    double tr = trainRatio;
+    double *prevA = prev->a, *prevDa = prev->da, *ee = prevA + prev->size;
+    double *pw = w;
+    if (prev->prev) {
+      memset(prevDa, 0, sizeof(*prev->da) * prev->size);
+      for(double *pb = b, *pd = d, *pda = da, *e = pb + size; pb < e; ++pb, ++pd, ++pda) {
+        double ds = *pd * *pda;
+        double dst = ds*tr;
+        *pb += dst;
+        for(double *ppa = prevA, *ppda = prevDa; ppa < ee; ++ppa, ++ppda, ++pw) {
+          *ppda += ds * *pw;
+          *pw += dst * *ppa;
+        }
+      }
+    } else {
+      for(double *pb = b, *pd = d, *pda = da, *e = pb + size; pb < e; ++pb, ++pd, ++pda) {
+        double dst = *pd * *pda * tr;
+        *pb += dst;
+        for(double *ppa = prevA; ppa < ee; ++ppa, ++pw)
+          *pw += dst * *ppa;
+      }
+    }
+    return prev->backpass();
+  }
+};
+
+
+class LayerConvolution: public Layer {
+public:
+  enum { W = 4, D = W-2, WW = W*W };
+
+  double *b, *w, *dw;
+  int sx, sy, sz;
+  int psx, psy, psz;
+
+  LayerConvolution(Layer &prev, int sx, int sy, int sz, double trainRatio = 0):
+    Layer(&prev, sx*sy*sz, trainRatio),
+    sx(sx), sy(sy), sz(sz),
+    psx(sx*2 + D), psy(sy*2 + D), psz(prev.size/(psx*psy))
+  {
+    assert(sx > 0 && sy > 0 && sz > 0);
+    assert(psx > 0 && psy > 0 && psz > 0);
+    assert(psx*psy*psz == prev.size);
+    b = new double[size + WW*psz*(sz+1)];
+    w = b + size;
+    dw = w + WW*psz*sz;
+    for(double *p = b, *e = p + (size + WW*psz*sz); p < e; ++p)
+      *p = rand()/(double)RAND_MAX*2 - 1;
+  }
+
+  ~LayerConvolution()
+    { delete[] b; }
+
+  bool toStream(FILE *f) override
+    { return fwrite(b, sizeof(double)*(size + WW*psz*sz), 1, f) && (!next || next->toStream(f)); }
+  bool fromStream(FILE *f)
+    { return fread (b, sizeof(double)*(size + WW*psz*sz), 1, f) && (!next || next->toStream(f)); }
+
+  Layer& pass() override {
+    double *pa = prev->a;
+    int sxy = sx*sy, stepW = psz*WW, stepPA = psx*2, stepIPA = psx - W;
+    for(double *ia = a, *ib = b, *id = d, *iw = w, *ea = ia + size; ia < ea; iw += stepW) {
+      double *eew = iw + stepW;
+      for(double *ipa = pa, *ea = ia + sxy; ia < ea; ipa += stepPA) {
+        for(double *ea = ia + sx; ia < ea; ++ia, ++ib, ++id, ipa += 2) {
+          double s = *ib;
+          for(double *iiw = iw, *iipa = ipa; iiw < eew; iipa = ipa)
+            for(int yy = 0; yy < W; ++yy, iipa += stepIPA)
+              for(int xx = 0; xx < W; ++xx, ++iipa, ++iiw)
+                s += *iipa * *iiw;
+          double ex = exp(-s);
+          double ex1 = ex + 1;
+          double ex2 = 1/ex1;
+          *ia = ex2;        // sigmoid
+          *id = ex*ex2*ex2; // sigmoid derivation
+        }
+      }
+    }
+    return next ? next->pass() : *this;
+  }
+
+  Layer& backpass() override {
+    double tr = trainRatio;
+    int sxy = sx*sy, stepW = psz*WW, stepPA = psx*2, stepIPA = psx - W;
+    double *dw = this->dw, *edw = dw + stepW, *pa = prev->a, *pda = prev->da;
+    if (prev->prev) {
+      memset(pda, 0, sizeof(*pda) * prev->size);
+      for(double *ib = b, *id = d, *ida = da, *iw = w, *eda = ida + size; ida < eda; iw += stepW) {
+        double *eew = iw + stepW;
+        memset(dw, 0, sizeof(*dw) * stepW);
+        for(double *ipa = pa, *ipda = pda, *eda = ida + sxy; ida < eda; ipa += stepPA, ipda += stepPA) {
+          for(double *eda = ida + sx; ida < eda; ++ib, ++id, ++ida, ipa += 2) {
+            double ds = *id * *ida;
+            double dst = ds*tr;
+            *ib += dst;
+            for(double *iiw = iw, *idw = dw, *iipa = ipa, *iipda = ipda; idw < edw; iipa = ipa, iipda = ipda)
+              for(int yy = 0; yy < W; ++yy, iipa += stepIPA, iipda += stepIPA)
+                for(int xx = 0; xx < W; ++xx, ++iipa, ++iipda, ++iiw, ++idw)
+                  { *iipda += ds * *iiw; *idw += dst * *iipa; }
+          }
+        }
+        for(double *idw = dw; iw < eew; ++iw, ++idw)
+          *iw += *idw;
+      }
+    } else {
+      for(double *ib = b, *id = d, *ida = da, *iw = w, *eda = ida + size; ida < eda; iw += stepW) {
+        memset(dw, 0, sizeof(*dw) * stepW);
+        for(double *ipa = pa, *eda = ida + sxy; ida < eda; ipa += stepPA) {
+          for(double *eda = ida + sx; ida < eda; ++ib, ++id, ++ida, ipa += 2) {
+            double dst = *id * *ida * tr;
+            *ib += dst;
+            for(double *idw = dw, *iipa = ipa; idw < edw; iipa = ipa)
+              for(int yy = 0; yy < W; ++yy, iipa += stepIPA)
+                for(int xx = 0; xx < W; ++xx, ++iipa, ++idw)
+                  *idw += dst * *iipa;
+          }
+        }
+        for(double *idw = dw; idw < edw; ++iw, ++idw)
+          *iw += *idw;
+      }
+    }
+    return prev->backpass();
+  }
+};
+
+
+#endif
diff --git a/simple/neural/nntrain.inc.c b/simple/neural/nntrain.inc.c
index 7eb97e3..418cffe 100644
--- a/simple/neural/nntrain.inc.c
+++ b/simple/neural/nntrain.inc.c
@@ -107,7 +107,7 @@ NeuralTrainer* ntNewSymbolMap(const char *filename, int sizeX, int sizeY) {
 
   unsigned char *data = calloc(testSize, count);
   if (count != fread(data, testSize, count, f))
-    return printf("cannot read from file '%s'\n", filename), fclose(f), NULL;
+    return printf("cannot read from file '%s'\n", filename), free(data), fclose(f), NULL;
 
   fclose(f);
 
diff --git a/simple/neural/nntrain.inc.c.copy b/simple/neural/nntrain.inc.c.copy
deleted file mode 100644
index 0482775..0000000
--- a/simple/neural/nntrain.inc.c.copy
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef NNTRAIN_INC_C
-#define NNTRAIN_INC_C
-
-
-#include "nnlayer.inc.c"
-
-
-typedef struct NeuralTrainer {
-  int sizeX, sizeY, count;
-  double *x, *y;
-} NeuralTrainer;
-
-
-
-NeuralTrainer* ntNew(int sizeX, int sizeY, int count) {
-  assert(sizeX > 0);
-  assert(sizeY > 0);
-  assert(count > 0);
-  NeuralTrainer *nt = calloc(sizeof(NeuralTrainer), 1);
-  nt->sizeX = sizeX;
-  nt->sizeY = sizeY;
-  nt->count = count;
-  nt->x = calloc(sizeof(double)*(sizeX + sizeY)*count, 1);
-  nt->y = nt->x + sizeX*count;
-  return nt;
-}
-
-
-void ntFree(NeuralTrainer *nt) {
-  free(nt->x);
-  free(nt);
-}
-
-
-double ntTrain(NeuralTrainer *nt, NeuralLayer *nl, int blocks, int blockSize, int repeats, double qmin, double qminSample) {
-  assert(!nl->prev);
-  assert(nt->sizeX == nl->size);
-  assert(nt->sizeY == nlBack(nl)->size);
-
-  printf("training: %d x %d x %d = %d:\n", blocks, blockSize, repeats, blocks*blockSize*repeats);
-  double qmax, qsum;
-  int total = 0;
-  int samples = 0;
-  for(int i = 0; i < blocks; ++i) {
-    qmax = qsum = 0;
-    int repeatsPerBlock = 0;
-    for(int j = 0; j < blockSize; ++j, ++samples) {
-      int index = rand() % nt->count;
-      double *x = nt->x + nt->sizeX*index;
-      double *y = nt->y + nt->sizeY*index;
-      double q = 0;
-      for(int k = 0; k < repeats; ++k) {
-        double qq = nlTrainPass(nl, x, y);
-        ++total, ++repeatsPerBlock;
-        if (!k) q = qq;
-        if (qq <= qminSample) break;
-      }
-      if (qmax < q) qmax = q;
-      qsum += q;
-    }
-    printf("  blocks %d (samples: %d, total: %d (%lf)): %lf, %lf\n", i+1, samples, total, repeatsPerBlock/(double)blockSize - 1, qmax, qsum/blockSize);
-    if (qmax <= qminSample) break;
-  }
-  printf("done\n");
-
-  return qmax;
-}
-
-
-NeuralTrainer* ntNewSymbolMap(const char *filename, int sizeX, int sizeY) {
-  FILE *f = fopen(filename, "rb");
-  if (!f)
-    return printf("cannot open file '%s' for read\n", filename), NULL;
-  fseek(f, 0, SEEK_END);
-  size_t fs = ftell(f);
-  fseek(f, 0, SEEK_SET);
-
-  size_t testSize = sizeX + 1;
-  int count = fs/testSize;
-  if (!count)
-    return printf("file '%s' is lesser minimal size\n", filename), fclose(f), NULL;
-
-  unsigned char *data = calloc(testSize, count);
-  if (count != fread(data, testSize, count, f))
-    return printf("cannot read from file '%s'\n", filename), fclose(f), NULL;
-
-  fclose(f);
-
-  NeuralTrainer *nt = ntNew(sizeX, sizeY, count);
-  const unsigned char *d = data;
-  double *x = nt->x, *y = nt->y, *ey = y + sizeY*count;
-  const double delta = 0;
-  for(double *p = y; p < ey; ++p) *p = delta;
-  while(y < ey) {
-    for(double *e = x + sizeX; x < e; ++x, ++d)
-      *x = *d/255.0;
-    assert(*d < sizeY);
-    y[*d++] = 1 - delta;
-    y += sizeY;
-  }
-  return nt;
-}
-
-
-void ntPrintSymbol(NeuralTrainer *nt, int index, int width) {
-  assert(index >= 0 && index < nt->count);
-  assert(width > 0);
-  for(int i = 0; i < nt->sizeX; ++i) {
-    if (i && !(i % width)) printf("\n");
-    printf("%c", nt->x[nt->sizeX*index + i] > 0 ? '#' : '.');
-  }
-  printf("\n");
-  for(int i = 0; i < nt->sizeY; ++i)
-    printf(" %4.1lf", nt->y[nt->sizeY*index + i]);
-  printf("\n");
-}
-
-
-#endif
diff --git a/simple/neural/nntrain.inc.cpp b/simple/neural/nntrain.inc.cpp
new file mode 100644
index 0000000..1dcc54b
--- /dev/null
+++ b/simple/neural/nntrain.inc.cpp
@@ -0,0 +1,155 @@
+#ifndef NNTRAIN_INC_CPP
+#define NNTRAIN_INC_CPP
+
+
+#include "nnlayer.inc.cpp"
+
+
+class Trainer {
+public:
+  int sizeX, sizeY, count;
+  double *x, *y;
+
+
+  Trainer(): sizeX(), sizeY(), count(), x(), y() { }
+
+  Trainer(int sizeX, int sizeY, int count): Trainer()
+    { init(sizeX, sizeY, count); }
+
+  ~Trainer()
+    { if (count) delete[] x; }
+
+
+  void init(int sizeX, int sizeY, int count) {
+    assert(sizeX > 0);
+    assert(sizeY > 0);
+    assert(count > 0);
+    this->sizeX = sizeX;
+    this->sizeY = sizeY;
+    this->count = count;
+    x = new double[(sizeX + sizeY)*count];
+    y = x + sizeX*count;
+    memset(x, 0, sizeof(*x)*(sizeX + sizeY)*count);
+  }
+
+  void deinit() {
+    if (!count) return;
+    delete[] x;
+    sizeX = sizeY = count = 0;
+    x = y = nullptr;
+  }
+
+
+  double train(Layer &l, int successCount, int blockSize, double qmin) {
+    assert(count);
+    assert(!l.prev);
+    assert(sizeX == l.size);
+    assert(sizeY == l.back().size);
+    assert(blockSize > 0 && qmin > 0);
+
+    printf("training: %d, %lf\n", blockSize, qmin);
+    double **blockXY = new double*[blockSize*2];
+    double qmin2 = qmin*0.9;
+    double qmin3 = qmin2*0.9;
+
+    int success = 0;
+    int total = 0;
+    int repeats, blockRepeats;
+    double qmax0, qsum0, qmax, qsum;
+    for(int i = 0; i < 10000; ++i) {
+      for(int i = 0; i < blockSize; ++i) {
+        int index = rand() % count;
+        blockXY[i*2 + 0] = x + sizeX*index;
+        blockXY[i*2 + 1] = y + sizeY*index;
+      }
+
+      repeats = blockRepeats = 0;
+      qmax0 = qsum0 = 0;
+      for(int i = 0; i < 1000; ++i) {
+        double **xy = blockXY;
+        qmax = 0, qsum = 0;
+        for(int i = 0; i < blockSize; ++i, xy += 2) {
+          double q0 = 0;
+          for(int i = 0; i < 100; ++i) {
+            double q = l.trainPass(xy[0], xy[1], qmin3);
+            if (!i) q0 = q;
+            ++repeats;
+            if (q < qmin3) break;
+          }
+          qsum += q0;
+          if (qmax < q0) qmax = q0;
+        }
+        if (!i) { qmax0 = qmax; qsum0 = qsum; }
+        ++blockRepeats;
+        if (qmax <= qmin2) break;
+      }
+      total += repeats;
+
+      printf("  blocks %d (samples: %d, total: %d, repeats: %3d (%lf)): %lf -> %lf, %lf -> %lf\n",
+        i+1, (i+1)*blockSize, total, blockRepeats-1, repeats/(double)(blockRepeats*blockSize) - 1, qmax0, qmax, qsum0/blockSize, qsum/blockSize);
+
+      if (qmax0 > qmin) success = 0; else
+        if (++success == successCount) break;
+    }
+
+    free(blockXY);
+    printf("done\n");
+    return qmax0;
+  }
+
+
+  bool loadSymbolMap(const char *filename, int sizeX, int sizeY) {
+    deinit();
+
+    FILE *f = fopen(filename, "rb");
+    if (!f)
+      return printf("cannot open file '%s' for read\n", filename), false;
+    fseek(f, 0, SEEK_END);
+    size_t fs = ftell(f);
+    fseek(f, 0, SEEK_SET);
+
+    size_t testSize = sizeX + 1;
+    int count = fs/testSize;
+    if (!count)
+      return printf("file '%s' is lesser minimal size\n", filename), fclose(f), false;
+
+    unsigned char *data = new unsigned char[testSize*count];
+    memset(data, 0, testSize*count);
+    if (!fread(data, testSize*count, 1, f))
+      return printf("cannot read from file '%s'\n", filename), free(data), fclose(f), false;
+
+    fclose(f);
+
+    init(sizeX, sizeY, count);
+    const unsigned char *pd = data;
+    const double delta = 0;
+    double *ey = y + sizeY*count;
+    for(double *py = y; py < ey; ++py) *py = delta;
+    for(double *px = x, *py = y; py < ey; py += sizeY) {
+      for(double *ex = px + sizeX; px < ex; ++px, ++pd)
+        *px = *pd/255.0;
+      assert(*pd < sizeY);
+      py[*pd++] = 1 - delta;
+    }
+    delete[] data;
+
+    return true;
+  }
+
+
+  void printSymbol(int index, int width) {
+    assert(index >= 0 && index < count);
+    assert(width > 0);
+    for(int i = 0; i < sizeX; ++i) {
+      if (i && !(i % width)) printf("\n");
+      printf("%c", x[sizeX*index + i] > 0 ? '#' : '.');
+    }
+    printf("\n");
+    for(int i = 0; i < sizeY; ++i)
+      printf(" %4.1lf", y[sizeY*index + i]);
+    printf("\n");
+  }
+};
+
+
+#endif