From b579b360d819a65c1901f7256583ec5a7f462a6d Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Mar 21 2023 13:44:43 +0000 Subject: neural: segment --- diff --git a/projects/neural/common.inc.cpp b/projects/neural/common.inc.cpp new file mode 100644 index 0000000..40efc35 --- /dev/null +++ b/projects/neural/common.inc.cpp @@ -0,0 +1,167 @@ +#ifndef COMMON_INC_CPP +#define COMMON_INC_CPP + + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +#include "layout.inc.cpp" + + +typedef double WeightReal; +typedef double NeuronReal; +typedef double AccumReal; + +typedef int WeightInt; +typedef int AccumInt; + + + +#define RANDOM_MAX 0x7fffffff +inline unsigned int randomNext(unsigned int prev) + { return (1103515245*prev + 12345) & RANDOM_MAX; } +inline unsigned int randomBranch(unsigned int seed) + { return randomNext(seed + 1); } + +inline void busyloop(unsigned int count) + { while(count--) __asm__ __volatile__(""); } + + +inline long long timeUs() { + static std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + return (long long)std::chrono::duration_cast( std::chrono::steady_clock::now() - begin ).count(); +} + + + +struct Accum { + union { AccumReal v; AccumInt i; }; +}; + + +struct Neuron { + NeuronReal v, d; + Accum a; +}; + + +struct Weight { + union { WeightReal w; WeightInt i; }; +}; + + +struct Iter { + typedef Accum AccumType; + typedef NeuronReal* DataType; + typedef AccumType DataAccumType; + static inline void init(Neuron&, AccumType&) { } + static inline void iter(Neuron&, Weight&, AccumType&) { } + static inline void done(Neuron&, AccumType&) { } + static inline void iter2(Neuron&, Neuron&, Weight&) { } + static inline void iter3(Neuron&) { } + static inline void iter4(Neuron&, DataType, DataAccumType&) { } +}; + + + +class Barrier { +private: + std::atomic &counter; + unsigned int next; + unsigned int busyseed; +public: + const unsigned int tid; + const unsigned int threads; + unsigned int seed; + + Barrier(const Barrier&) = delete; + inline Barrier(std::atomic &counter, unsigned int tid, unsigned int threads, unsigned int seed): + counter(counter), next(), busyseed(randomBranch(seed)), tid(tid), threads(threads), seed(seed) { assert(tid < threads); } + + //inline void busyloop() { } + inline void busyloop(unsigned int maxCycles = 4096) { ::busyloop( (busyseed = randomNext(busyseed))%maxCycles ); } + inline unsigned int rand() { return seed = randomNext(seed); } + inline void wait() { next += threads; ++counter; while(counter < next) busyloop(); } + inline void subwait() { while(counter < next + tid) busyloop(); } + +}; + + +struct Stat { + int neurons; + int activeNeurons; + int weights; + int links; + size_t memsize; + + Stat(): neurons(), activeNeurons(), weights(), links(), memsize() { } + + Stat& operator+= (const Stat &b) { + neurons += b.neurons; + activeNeurons += b.activeNeurons; + weights += b.weights; + links += b.links; + memsize += b.memsize; + return *this; + } + + void print(const char *prefix = nullptr) const { + if (prefix && *prefix) printf("%s: ", prefix); + printf("neurons: %d / %d, links %d / %d, memSize: %llu\n", activeNeurons, neurons, weights, links, (unsigned long long)memsize); + } +}; + + +struct Quality { + AccumReal train; + AccumReal human; + + inline Quality(AccumReal train, AccumReal human): train(train), human(human) {} + inline explicit Quality(AccumReal train = 0): Quality(train, train) {} + inline static Quality nan() { return Quality(NAN); } + inline static Quality bad() { return Quality(INFINITY); } + + inline Quality& operator+=(const Quality &b) + { train += b.train; human += b.human; return *this; } + inline Quality& operator*=(AccumReal x) + { train *= x; human *= x; return *this; } + inline bool operator<(const Quality &b) const { + return human < b.human ? true + : b.human < human ? false + : train < b.train; + } +}; + + +struct QualityPair { + Quality measure; + Quality train; + + inline explicit QualityPair(const Quality &measure = Quality(), const Quality &train = Quality()): + measure(measure), train(train) { } + + inline QualityPair& operator+=(const QualityPair &b) + { measure += b.measure; train += b.train; return *this; } + inline QualityPair& operator*=(AccumReal x) + { measure *= x; train *= x; return *this; } + inline bool operator<(const QualityPair &b) const { + return measure < b.measure ? true + : b.measure < measure ? false + : train < b.train; + } +}; + + +#endif + diff --git a/projects/neural/font.data.inc.cpp b/projects/neural/font.data.inc.cpp new file mode 100644 index 0000000..30256af --- /dev/null +++ b/projects/neural/font.data.inc.cpp @@ -0,0 +1,157 @@ +#ifndef FONT_DATA_INC_CPP +#define FONT_DATA_INC_CPP + +/** + * 8x8 monochrome bitmap fonts for rendering + * Author: Daniel Hepper + * + * License: Public Domain + * + * Based on: + * // Summary: font8x8.h + * // 8x8 monochrome bitmap fonts for rendering + * // + * // Author: + * // Marcel Sondaar + * // International Business Machines (public domain VGA fonts) + * // + * // License: + * // Public Domain + * + * Fetched from: http://dimensionalrift.homelinux.net/combuster/mos3/?p=viewsource&file=/modules/gfx/font8_8.asm + **/ + +// Constant: font8x8_basic +// Contains an 8x8 font map for unicode points U+0000 - U+007F (basic latin) +const unsigned char font8x8data[128][8] = { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0000 (nul) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0001 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0002 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0003 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0004 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0005 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0006 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0007 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0008 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0009 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000A + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000B + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000C + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000D + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000E + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+000F + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0010 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0011 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0012 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0013 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0014 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0015 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0016 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0017 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0018 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0019 + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001A + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001B + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001C + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001D + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001E + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+001F + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 (space) + { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) + { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (") + { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#) + { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($) + { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%) + { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&) + { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (') + { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (() + { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ()) + { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*) + { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,) + { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.) + { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/) + { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0) + { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1) + { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2) + { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3) + { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4) + { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5) + { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6) + { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7) + { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8) + { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;) + { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<) + { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=) + { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>) + { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?) + { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@) + { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A) + { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B) + { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C) + { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F) + { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G) + { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H) + { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I) + { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J) + { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K) + { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L) + { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M) + { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N) + { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O) + { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P) + { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q) + { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R) + { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S) + { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V) + { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W) + { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X) + { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y) + { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z) + { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([) + { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\) + { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (]) + { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_) + { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`) + { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a) + { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b) + { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c) + { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d) + { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e) + { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g) + { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h) + { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i) + { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j) + { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k) + { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l) + { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m) + { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n) + { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o) + { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q) + { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r) + { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s) + { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v) + { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w) + { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y) + { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z) + { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({) + { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|) + { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (}) + { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F +}; + +#endif diff --git a/projects/neural/font.inc.cpp b/projects/neural/font.inc.cpp new file mode 100644 index 0000000..b8f6aa8 --- /dev/null +++ b/projects/neural/font.inc.cpp @@ -0,0 +1,42 @@ +#ifndef FONT_INC_CPP +#define FONT_INC_CPP + +#include +#include +#include + +#include "font.data.inc.cpp" + + + +void imgPrint(unsigned char *data, int w, int h, int ch, int x, int y, const unsigned char *color, const char *text) { + int x0 = x; + while(unsigned char c = (unsigned char)*text++) { + if (c == '\n') { x = x0; y += 8; continue; } + const unsigned char *sym = font8x8data[c]; + for(int yy = y, ey = y + 8; yy < ey; ++yy, ++sym) { + if (yy >= 0 && yy < h) { + unsigned char row = *sym; + for(int xx = x; row; ++xx, row >>= 1) + if ((row & 1) && xx >= 0 && xx <= w) + memcpy(data + (yy*w + xx)*ch, color, ch); + } + } + x += 8; + } +} + + +void imgPrintf(unsigned char *data, int w, int h, int ch, int x, int y, const unsigned char *color, const char *format, ...) { + char buf[1024] = {}; + va_list args; + va_start(args, format); + vsnprintf(buf, sizeof(buf),format, args); + va_end(args); + imgPrint(data, w, h, ch, x, y, color, buf); +} + + + +#endif + diff --git a/projects/neural/func.inc.cpp b/projects/neural/func.inc.cpp new file mode 100644 index 0000000..b5c4cc3 --- /dev/null +++ b/projects/neural/func.inc.cpp @@ -0,0 +1,29 @@ +#ifndef FUNC_INC_CPP +#define FUNC_INC_CPP + + +#include "common.inc.cpp" + + + +typedef void Func(Neuron &n, AccumReal s); + + +inline void funcSigmoidExp(Neuron &n, AccumReal s) { + //if (s > 5) s = 5; else if (s < -5) s = -5; + AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = ss * (1-ss); +} + + +inline void funcSigmoidExp2(Neuron &n, AccumReal s) { + //if (s > 5) s = 5; else if (s < -5) s = -5; + AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = 0;//ss * (1-ss) * 0.1; +} + + +inline void funcReLU(Neuron &n, AccumReal s) + { n.v = s > 0 ? s : 0; n.d = s > 0; } + + + +#endif diff --git a/projects/neural/layer.all.test.inc.cpp b/projects/neural/layer.all.test.inc.cpp deleted file mode 100644 index 8d42289..0000000 --- a/projects/neural/layer.all.test.inc.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef LAYER_ALL_TEST_INC_CPP -#define LAYER_ALL_TEST_INC_CPP - - - -#include "layer.simple.test.inc.cpp" -#include "layer.conv.test.inc.cpp" - - -class AllTest: public Test { -public: - static bool test(const char *name = "all") { - Stage st(name); - SimpleTest::test(); - ConvTest::test(); - return st; - } -}; - - -#endif diff --git a/projects/neural/layer.conv.inc.cpp b/projects/neural/layer.conv.inc.cpp index efd72ac..a9aa454 100644 --- a/projects/neural/layer.conv.inc.cpp +++ b/projects/neural/layer.conv.inc.cpp @@ -3,6 +3,8 @@ +#include "tga.inc.cpp" +#include "font.inc.cpp" #include "layer.simple.inc.cpp" @@ -186,6 +188,71 @@ void iterateConvolutionPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, +bool saveConvDemoImage(const char *filename, int count, int ksx, int ksy, int ksz, const Weight *weights) { + int cols = count; + int rows = ksz + 1; + int w = 1 + cols*(ksx + 1); + int h = 10 + rows*(ksy + 1); + std::vector pixels(w*h*3, 0); + + WeightReal range = 0; + for(const Weight *iw = weights, *e = iw + count*ksx*ksy*ksz; iw < e; ++iw) { + WeightReal r = fabs(iw->w); + if (range < r) range = r; + } + + const unsigned char white[] = { 255, 255, 255 }; + imgPrintf(pixels.data(), w, h, 3, 1, 1, white, "%f", range); + + // rgb row + + for(int i = 0; i < count; ++i) + for(int ky = 0; ky < ksy; ++ky) + for(int kx = 0; kx < ksx; ++kx) { + int y0 = 10; + int x0 = i*(ksx + 1) + 1; + unsigned char *p = &pixels[ ((y0 + ky)*w + x0 + kx)*3 ]; + + for(int kz = 0; kz < 3; ++kz) { + if (kz < ksz) { + WeightReal x = weights[ ((i*ksy + ky)*ksx + kx)*3 + kz ].w; + x /= range; + x = (x + 0.5)*256; + unsigned char c = x < 0 ? 0 : x > 255 ? 255 : (unsigned char)x; + p[kz] = c; + } else { + p[kz] = 0; + } + } + } + + // gray rows + + for(int i = 0; i < count; ++i) + for(int kz = 0; kz < ksz; ++kz) + for(int ky = 0; ky < ksy; ++ky) + for(int kx = 0; kx < ksx; ++kx) { + WeightReal x = weights[ ((i*ksy + ky)*ksx + kx)*ksz + kz ].w; + x /= range; + x = (x + 0.5)*256; + unsigned char c = x < 0 ? 0 : x > 255 ? 255 : (unsigned char)x; + + int y0 = (kz + 1)*(ksy + 1) + 10; + int x0 = i*(ksx + 1) + 1; + unsigned char *p = &pixels[ ((y0 + ky)*w + x0 + kx)*3 ]; + p[0] = p[1] = p[2] = c; + + //if (c == 0) p[0] = p[1] = 0; // blue un underflow + //if (c == 255) p[1] = p[2] = 0; // red on overflow + } + + std::string fn(filename); + fn += ".tga"; + return tgaSave(fn.c_str(), pixels.data(), w, h, 3); +} + + + template class LayerConv: public Layer { public: diff --git a/projects/neural/layer.conv.shared.inc.cpp b/projects/neural/layer.conv.shared.inc.cpp index 366093f..f729bc4 100644 --- a/projects/neural/layer.conv.shared.inc.cpp +++ b/projects/neural/layer.conv.shared.inc.cpp @@ -29,7 +29,7 @@ void iterateTestConvolutionShared(Layout cl, Layout pl, Kernel k, Neuron *c_neur for(int ky = 0; ky < k.sy; ++ky) for(int kx = 0; kx < k.sx; ++kx) for(int pz = pl.z0; pz < pl.z1; ++pz) { - int wi = (ky*k.sx + kx)*pl.getD() + pz - pl.z0; + int wi = (((cz - cl.z0)*k.sy + ky)*k.sx + kx)*pl.getD() + pz - pl.z0; Weight &w = weights[wi]; int px = pl.x0 + (cx - cl.x0)*k.dx + k.ox + kx; @@ -46,7 +46,7 @@ void iterateTestConvolutionShared(Layout cl, Layout pl, Kernel k, Neuron *c_neur template -void iterateConvolutionSharedDyn(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { +void iterateConvolutionShared(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { if (!cl) return; assert(pl); assert(wl); @@ -71,122 +71,32 @@ void iterateConvolutionSharedDyn(Layout cl, Layout pl, Layout wl, Kernel k, Neur int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; int k_sxd = k.sx*p_d; + int k_syxd = k.sy*k_sxd; int p_ddy = (pl.sx - k.sx)*pl.sz; int p_ddx = pl.sz - p_d; Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0; - Weight *ew = weights + k.sy*k_sxd; - - for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy) - for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx) - for(Neuron *e = icn + c_d; icn < e; ++icn) { - typename Iter::AccumType a; - Iter::init(*icn, a); - - Neuron *iipn = ipn; - for(Weight *iw = weights; iw < ew; iipn += p_ddy) - for(Weight *e = iw + k_sxd; iw < e; iipn += p_ddx) - for(Weight *e = iw + p_d; iw < e; ++iw, ++iipn) - Iter::iter(*iipn, *iw, a); - - Iter::done(*icn, a); - } -} - - -template -void iterateConvolutionSharedXYD(Layout cl, Layout pl, Layout wl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { - if (!cl) return; - assert(pl); - assert(wl); - assert(k); - assert(c_neurons); - assert(p_neurons); - assert(weights); - assert(cl.isSubLayoutOf(wl)); - assert(pl.x0 + k.ox >= 0 && pl.x0 + (wl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); - assert(pl.y0 + k.oy >= 0 && pl.y0 + (wl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); - assert(KSX == k.sx); - assert(KSY == k.sy); - assert(PD == pl.getD()); - - int c_h = cl.getH(); - int c_w = cl.getW(); - int c_d = cl.getD(); - int c_swz = c_w*cl.sz; - int c_shxz = c_h*cl.sx*cl.sz; - int c_dx = cl.sz - c_d; - int c_dy = (cl.sx - c_w)*cl.sz; - - int p_dx = k.dx*pl.sz; - int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; - - int p_ddy = (pl.sx - KSX)*pl.sz; - int p_ddx = pl.sz - PD; - - Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; - Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0; + weights += (cl.z0 - wl.z0)*k_syxd; + Weight *iw = weights; for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy) - for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, iw = weights) for(Neuron *e = icn + c_d; icn < e; ++icn) { typename Iter::AccumType a; Iter::init(*icn, a); Neuron *iipn = ipn; - Weight *iw = weights; - for(int i = 0; i < KSY; ++i, iipn += p_ddy) - for(int i = 0; i < KSX; ++i, iipn += p_ddx) - for(int i = 0; i < PD; ++i, ++iw, ++iipn) + for(Weight *e = iw + k_syxd; iw < e; iipn += p_ddy) + for(Weight *e = iw + k_sxd; iw < e; iipn += p_ddx) + for(Weight *e = iw + p_d; iw < e; ++iw, ++iipn) Iter::iter(*iipn, *iw, a); Iter::done(*icn, a); } } -typedef void (*iterateConvolutionSharedFunc)(Layout, Layout, Layout, Kernel, Neuron*, Neuron*, Weight*); -template -iterateConvolutionSharedFunc getIterateConvolutionSharedFuncXY(int pd) { - if (pd <= 8) switch(pd) { - case 1: return &iterateConvolutionSharedXYD; - case 2: return &iterateConvolutionSharedXYD; - case 3: return &iterateConvolutionSharedXYD; - case 4: return &iterateConvolutionSharedXYD; - case 5: return &iterateConvolutionSharedXYD; - case 6: return &iterateConvolutionSharedXYD; - case 7: return &iterateConvolutionSharedXYD; - case 8: return &iterateConvolutionSharedXYD; - } - return &iterateConvolutionSharedDyn; -} - - -template -iterateConvolutionSharedFunc getIterateConvolutionSharedFunc(int ksx, int ksy, int pd) { - if (0 && ksx == ksy && pd <= 8) switch(ksx) { - case 1: return getIterateConvolutionSharedFuncXY(pd); - case 2: return getIterateConvolutionSharedFuncXY(pd); - case 3: return getIterateConvolutionSharedFuncXY(pd); - case 4: return getIterateConvolutionSharedFuncXY(pd); - case 5: return getIterateConvolutionSharedFuncXY(pd); - case 6: return getIterateConvolutionSharedFuncXY(pd); - case 7: return getIterateConvolutionSharedFuncXY(pd); - case 8: return getIterateConvolutionSharedFuncXY(pd); - } - return &iterateConvolutionSharedDyn; -} - - -template -void iterateConvolutionShared(const Layout &cl, const Layout &pl, const Layout &wl, const Kernel &k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { - iterateConvolutionSharedFunc f = getIterateConvolutionSharedFunc(k.sx, k.sy, pl.getD()); - f(cl, pl, wl, k, c_neurons, p_neurons, weights); -} - - - template void iterateConvolutionSharedPoint(Layout cl, Layout pl, Layout wl, Kernel k, int kx, int ky, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { @@ -214,29 +124,62 @@ void iterateConvolutionSharedPoint(Layout cl, Layout pl, Layout wl, Kernel k, in int p_d = pl.getD(); int p_dx = k.dx*pl.sz; int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; + + int w_dz = (k.sy*k.sx - 1)*p_d; Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy + ky)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox + kx)*pl.sz + pl.z0; - weights += (ky*k.sx + kx)*p_d; - Weight *ew = weights + p_d; + weights += (((cl.z0 - wl.z0)*k.sy + ky)*k.sx + kx)*p_d; + Weight *iw = weights; for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy) - for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx) - for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d) - for(Weight *iw = weights; iw < ew; ++ipn, ++iw) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, iw = weights) + for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d, iw += w_dz) + for(Weight *e = iw + p_d; iw < e; ++ipn, ++iw) Iter::iter2(*icn, *ipn, *iw); } +void fillConvolutionWeights(int kx, int ky, int kz, int count, Weight *weights) { + double kr = 1.5; + double sum = 0; + + Weight *iw = weights; + for(int i = 0; i < count; ++i) + for(int y = 0; y < ky; ++y) + for(int x = 0; x < kx; ++x) + for(int z = 0; z < kz; ++z, ++iw) { + double dx = (2.0*x/(kx-1) - 1)*kr; + double dy = (2.0*y/(ky-1) - 1)*kr; + double e = exp( -dx*dx - dy*dy ); + sum += e; + iw->w = (WeightReal)( (rand()/(double)RAND_MAX*2 - 1)*e ); + //iw->w = (WeightReal)( rand()/(double)RAND_MAX*e ); + } + + WeightReal k = (WeightReal)(10*kz/sum); + Weight *ew = iw; + for(iw = weights; iw < ew; ++iw) iw->w *= k; +} + class LayerConvSharedBase: public Layer { public: + Kernel kernel; std::vector mtWeights; - using Layer::Layer; - + LayerConvSharedBase(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): + Layer(&prev, layout, kernel.sx * kernel.sy * layout.getD() * prev.back().layout.getD(), weights), + kernel(kernel) + { + assert(kernel); + stat.links = weightsCount * layout.getW() * layout.getH(); + if (ownWeights) fillWeights(-1, 1); + } + + void split(int threadsCount) override { Layer::split(threadsCount); Weight w = {}; @@ -264,16 +207,11 @@ public: template class LayerConvShared: public LayerConvSharedBase { public: - Kernel kernel; - - LayerConvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): - LayerConvSharedBase(&prev, layout, kernel.sx*kernel.sy*prev.back().layout.getD(), weights), - kernel(kernel) + LayerConvSharedBase(prev, layout, kernel, weights) { - assert(kernel); - stat.links = weightsCount*neuronsCount; - if (ownWeights) fillWeights(-1, 1); + stat.links = weightsCount * layout.getW() * layout.getH(); + if (ownWeights) fillConvolutionWeights(kernel.sx, kernel.sy, this->prev->layout.getD(), layout.getD(), this->weights); } @@ -339,6 +277,10 @@ public: iterateNeurons(prev->layout, prev->neurons); clearAccum(); } + + + bool saveDemo() override + { return !filename || saveConvDemoImage( filename, layout.getD(), kernel.sx, kernel.sy, prev->layout.getD(), weights ); } }; @@ -346,16 +288,11 @@ public: template class LayerDeconvShared: public LayerConvSharedBase { public: - Kernel kernel; - - LayerDeconvShared(Layer &prev, const Layout &layout, const Kernel &kernel, Weight *weights = nullptr): - LayerConvSharedBase(&prev, layout, kernel.sx*kernel.sy*layout.getD(), weights), - kernel(kernel) + LayerConvSharedBase(prev, layout, kernel, weights) { - assert(kernel); - stat.links = weightsCount*neuronsCount; - if (ownWeights) fillWeights(-1, 1); + stat.links = weightsCount * this->prev->layout.getW() * this->prev->layout.getH(); + if (ownWeights) fillConvolutionWeights(kernel.sx, kernel.sy, layout.getD(), this->prev->layout.getD(), this->weights); } @@ -421,6 +358,10 @@ public: iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights); iterateTestConvolutionShared(prev->layout, layout, kernel, prev->neurons, neurons, weights); } + + + bool saveDemo() override + { return !filename || saveConvDemoImage( filename, prev->layout.getD(), kernel.sx, kernel.sy, layout.getD(), weights ); } }; #endif diff --git a/projects/neural/layer.conv.test.inc.cpp b/projects/neural/layer.conv.test.inc.cpp index c0b4be8..c4bd34f 100644 --- a/projects/neural/layer.conv.test.inc.cpp +++ b/projects/neural/layer.conv.test.inc.cpp @@ -8,7 +8,7 @@ #include "layer.conv.shared.inc.cpp" -class ConvTest: public Test { +class ConvTest: public LayerTest { public: static void init(const Layout &cl, const Layout &pl, const Kernel &k, bool shared = false) { Test::init(cl.getCount(), pl.getCount(), (shared ? 1 : cl.getActiveCount())*k.sx*k.sy*pl.getD()); } @@ -144,25 +144,25 @@ public: { Layer l(nullptr, pl); new LayerConv(l, cl, k); - Test::testLayer("LayerConv", l); + testLayer("LayerConv", l); } { Layer l(nullptr, cl); new LayerDeconv(l, pl, k); - Test::testLayer("LayerDeconv", l); + testLayer("LayerDeconv", l); } { Layer l(nullptr, pl); new LayerConvShared(l, cl, k); - Test::testLayer("LayerConvShared", l); + testLayer("LayerConvShared", l); } { Layer l(nullptr, cl); new LayerDeconvShared(l, pl, k); - Test::testLayer("LayerDeconvShared", l); + testLayer("LayerDeconvShared", l); } return st; diff --git a/projects/neural/layer.convsub.shared.inc.cpp b/projects/neural/layer.convsub.shared.inc.cpp new file mode 100644 index 0000000..3223870 --- /dev/null +++ b/projects/neural/layer.convsub.shared.inc.cpp @@ -0,0 +1,236 @@ +#ifndef LAYER_CONVSUB_SHARED_INC_CPP +#define LAYER_CONVSUB_SHARED_INC_CPP + + +#include "layer.conv.inc.cpp" + + +template +void iterateConvolutionShared2(Layout cl, Layout pl, Kernel k, Neuron *c_neurons, Neuron *p_neurons, Weight *weights) { + assert(cl); + assert(pl); + assert(k); + assert(c_neurons); + assert(p_neurons); + assert(weights); + assert(!cl.hasPadZ()); + assert(!pl.hasPadZ()); + assert(pl.x0 + k.ox >= 0 && pl.x0 + (cl.getW()-1)*k.dx + k.ox + k.sx <= pl.sx); + assert(pl.y0 + k.oy >= 0 && pl.y0 + (cl.getH()-1)*k.dy + k.oy + k.sy <= pl.sy); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_swz = c_w*cl.sz; + int c_shxz = c_h*cl.sx*cl.sz; + int c_dx = cl.sz - c_d; + int c_dy = (cl.sx - c_w)*cl.sz; + + int p_d = pl.getD(); + int p_dkx = pl.sx - k.sx + int p_dx = k.dx*pl.sz; + int p_dy = k.dy*pl.sx*pl.sz - c_w*p_dx; + + c_neurons += (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; + p_neurons += ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox)*pl.sz + pl.z0; + + for(int ky = 0; ky < k.sy; ++ky, p_neurons += p_dkx) + for(int kx = 0; kx < k.sx; ++kx, p_neurons += pl.sz) { + } + + + Neuron *icn = c_neurons + (cl.y0*cl.sx + cl.x0)*cl.sz + cl.z0; + Neuron *ipn = p_neurons + ((pl.y0 + (cl.y0 - wl.y0)*k.dy + k.oy + ky)*pl.sx + pl.x0 + (cl.x0 - wl.x0)*k.dx + k.ox + kx)*pl.sz + pl.z0; + weights += (ky*k.sx + kx)*p_d; + Weight *ew = weights + p_d; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn, ipn -= p_d) + for(Weight *iw = weights; iw < ew; ++ipn, ++iw) + Iter::iter2(*icn, *ipn, *iw); +} + + + + +template +class LayerSub: public Layer { +public: + Layout optLayout; + Layout::List mtOptLayouts; + std::vector choosen; + + LayerSub(Layer &prev, const Layout &layout): + Layer(&prev, layout), + optLayout(optimizeLayoutSimple(layout)), + choosen(layout.getActiveCount(), nullptr) + { } + + + void split(int threadsCount) override { + Layer::split(threadsCount); + optLayout.split(mtOptLayouts, threadsCount); + } + + + void pass(Barrier &barrier) override { + Layout cl = mtLayouts[barrier.tid]; + Layout pl = prev->layout; + Layout wl = layout; + if (!cl) return; + + assert(pl.getW() == wl.getW()*2); + assert(pl.getH() == wl.getH()*2); + assert(pl.getD() == wl.getD()); + assert(cl.isSubLayoutOf(wl)); + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_sxz = cl.sx*cl.sz; + int c_swz = c_w*cl.sz; + int c_shxz = c_h*c_sxz; + int c_dy = c_sxz - c_swz; + int c_dx = cl.sz - c_d; + + int w_d = wl.getD(); + int w_w = wl.getW(); + int w_dy = (w_w - c_w)*w_d; + int w_dx = w_d - c_d; + + int p_dy = (pl.sx - c_w)*pl.sz*2; + int p_dx = pl.sz*2 - c_d; + + int p_i1 = pl.sz; + int p_i2 = pl.sx*pl.sz; + int p_i3 = p_i1 + p_i2; + + int cx0 = cl.x0 - wl.x0; + int cy0 = cl.y0 - wl.y0; + int cz0 = cl.z0 - wl.z0; + + Neuron *icn = neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0); + Neuron *ipn = prev->neurons + ((pl.y0 + cy0*2)*pl.sx + pl.x0 + cx0*2)*pl.sz + pl.z0 + cz0; + Neuron **icc = choosen.data() + (cy0*w_w + cx0)*w_d + cz0; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, ipn += p_dy, icc += w_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, ipn += p_dx, icc += w_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn, ++ipn, ++icc) { + Neuron *iipn = ipn, *pn = iipn; + NeuronReal v = pn->v, d = pn->d; + pn->d = 0; + + iipn = ipn + p_i1; + if (v < iipn->v) { v = iipn->v; d = iipn->d; pn = iipn; } + iipn->d = 0; + + iipn = ipn + p_i2; + if (v < iipn->v) { v = iipn->v; d = iipn->d; pn = iipn; } + iipn->d = 0; + + iipn = ipn + p_i3; + if (v < iipn->v) { v = iipn->v; d = iipn->d; pn = iipn; } + iipn->d = 0; + + func(*icn, v); + icn->d *= d; + *icc = pn; + } + } + + + void backpassDeltas(Barrier &barrier) override { + Layout cl = mtOptLayouts[barrier.tid]; + Layout wl = optLayout; + if (!cl) return; + + int c_h = cl.getH(); + int c_w = cl.getW(); + int c_d = cl.getD(); + int c_sxz = cl.sx*cl.sz; + int c_swz = c_w*cl.sz; + int c_shxz = c_h*c_sxz; + int c_dy = c_sxz - c_swz; + int c_dx = cl.sz - c_d; + + int w_d = wl.getD(); + int w_w = wl.getW(); + int w_dy = (w_w - c_w)*w_d; + int w_dx = w_d - c_d; + + Neuron *icn = neurons + (cl.y0*c_sxz + cl.x0*cl.sz + cl.z0); + Neuron **icc = choosen.data() + ((cl.y0 - wl.y0)*w_w + cl.x0 - wl.x0)*w_d + cl.z0 - wl.z0; + + for(Neuron *e = icn + c_shxz; icn < e; icn += c_dy, icc += w_dy) + for(Neuron *e = icn + c_swz; icn < e; icn += c_dx, icc += w_dx) + for(Neuron *e = icn + c_d; icn < e; ++icn, ++icc) { + assert(*icc); + (*icc)->d = icn->d; + } + } + + + void testPass() override { + Layout cl = layout; + Layout pl = prev->layout; + + assert(pl.getW() == cl.getW()*2); + assert(pl.getH() == cl.getH()*2); + assert(pl.getD() == cl.getD()); + + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + int ci = (cy*cl.sx + cx)*cl.sz + cz; + Neuron &cn = neurons[ci]; + + Neuron *c = nullptr; + NeuronReal v = 0, d = 0; + + for(int ky = 0; ky < 2; ++ky) + for(int kx = 0; kx < 2; ++kx) { + int px = pl.x0 + (cx - cl.x0)*2 + kx; + int py = pl.y0 + (cy - cl.y0)*2 + ky; + int pz = pl.z0 + cz - cl.z0; + + Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ]; + if (!c || v < pn.v) { v = pn.v; d = pn.d; c = &pn; } + pn.d = 0; + } + + assert(c); + c->d = d; + func(cn, v); + } + } + + + void testBackpass() override { + Layout cl = layout; + Layout pl = prev->layout; + + assert(pl.getW() == cl.getW()*2); + assert(pl.getH() == cl.getH()*2); + assert(pl.getD() == cl.getD()); + + for(int cy = cl.y0; cy < cl.y1; ++cy) + for(int cx = cl.x0; cx < cl.x1; ++cx) + for(int cz = cl.z0; cz < cl.z1; ++cz) { + int ci = (cy*cl.sx + cx)*cl.sz + cz; + Neuron &cn = neurons[ci]; + + for(int ky = 0; ky < 2; ++ky) + for(int kx = 0; kx < 2; ++kx) { + int px = pl.x0 + (cx - cl.x0)*2 + kx; + int py = pl.y0 + (cy - cl.y0)*2 + ky; + int pz = pl.z0 + cz - cl.z0; + + Neuron &pn = prev->neurons[ (py*pl.sx + px)*pl.sz + pz ]; + pn.d *= cn.d; + } + } + } +}; + + +#endif diff --git a/projects/neural/layer.inc.cpp b/projects/neural/layer.inc.cpp index 0d6447a..fe357de 100644 --- a/projects/neural/layer.inc.cpp +++ b/projects/neural/layer.inc.cpp @@ -2,118 +2,57 @@ #define LAYER_INC_CPP -#include -#include -#include -#include -#include +#include "common.inc.cpp" -#include -#include -#include -#include -#include "layout.inc.cpp" - - - -typedef double WeightReal; -typedef double NeuronReal; -typedef double AccumReal; - -typedef int WeightInt; -typedef int AccumInt; - - -#define RANDOM_MAX 0x7fffffff -inline unsigned int randomNext(unsigned int prev) - { return (1103515245*prev + 12345) & RANDOM_MAX; } -inline unsigned int randomBranch(unsigned int seed) - { return randomNext(seed + 1); } - -inline void busyloop(unsigned int count) - { while(count--) __asm__ __volatile__(""); } - - -struct Accum { - union { AccumReal v; AccumInt i; }; -}; - - -struct Neuron { - NeuronReal v, d; - Accum a; -}; - - -struct Weight { - union { WeightReal w; WeightInt i; }; -}; - - -struct Iter { - typedef Accum AccumType; - typedef NeuronReal* DataType; - typedef AccumType DataAccumType; - static inline void init(Neuron&, AccumType&) { } - static inline void iter(Neuron&, Weight&, AccumType&) { } - static inline void done(Neuron&, AccumType&) { } - static inline void iter2(Neuron&, Neuron&, Weight&) { } - static inline void iter3(Neuron&) { } - static inline void iter4(Neuron&, DataType, DataAccumType&) { } -}; - - -class Barrier { -private: - std::atomic &counter; - unsigned int next; - unsigned int busyseed; +class WeightHolder { public: - const unsigned int tid; - const unsigned int threads; - unsigned int seed; - - Barrier(const Barrier&) = delete; - inline Barrier(std::atomic &counter, unsigned int tid, unsigned int threads, unsigned int seed): - counter(counter), next(), busyseed(randomBranch(seed)), tid(tid), threads(threads), seed(seed) { assert(tid < threads); } - - //inline void busyloop() { } - inline void busyloop(unsigned int maxCycles = 4096) { ::busyloop( (busyseed = randomNext(busyseed))%maxCycles ); } - inline unsigned int rand() { return seed = randomNext(seed); } - inline void wait() { next += threads; ++counter; while(counter < next) busyloop(); } - inline void subwait() { while(counter < next + tid) busyloop(); } + const int weightsCount; + Weight *weights; -}; - - -struct Stat { - int neurons; - int activeNeurons; - int weights; - int links; - size_t memsize; + const char *filename; + + explicit WeightHolder(int weightsCount = 0, Weight *weights = nullptr): + weightsCount(weightsCount), weights(weights), filename() + { assert(weightsCount >= 0); } + + + virtual ~WeightHolder() { } + + + bool save(bool demoOnly = false) { + if (filename && weightsCount && !demoOnly) { + FILE *f = fopen(filename, "wb"); + if (!f) + return printf("cannot open file for write: %s\n", filename), false; + if (!fwrite(weights, sizeof(*weights)*weightsCount, 1, f)) + return fclose(f), printf("cannot write to file: %s\n", filename), false; + fclose(f); + } + return saveDemo(); + } - Stat(): neurons(), activeNeurons(), weights(), links(), memsize() { } - Stat& operator+= (const Stat &b) { - neurons += b.neurons; - activeNeurons += b.activeNeurons; - weights += b.weights; - links += b.links; - memsize += b.memsize; - return *this; + bool load() { + if (filename && weightsCount) { + FILE *f = fopen(filename, "rb"); + if (!f) + return printf("cannot open file for read: %s\n", filename), false; + if (!fread(weights, sizeof(*weights)*weightsCount, 1, f)) + return fclose(f), printf("cannot read from file: %s\n", filename), false; + fclose(f); + } + return true; } + - void print(const char *prefix = nullptr) const { - if (prefix && *prefix) printf("%s: ", prefix); - printf("neurons: %d / %d, links %d / %d, memSize: %llu\n", activeNeurons, neurons, weights, links, (unsigned long long)memsize); - } + virtual bool saveDemo() { return true; } }; -class Layer { + +class Layer: public WeightHolder { public: Layer *prev, *next; @@ -122,11 +61,9 @@ public: Neuron *neurons; int neuronsCount; - Weight *weights; - int weightsCount; bool ownWeights; - const char *filename; + bool skipTrain; Stat stat; @@ -135,15 +72,14 @@ public: Layer(Layer *prev, const Layout &layout, int weightsCount = 0, Weight *weights = nullptr): + WeightHolder(weightsCount, weights), prev(prev ? &prev->back() : nullptr), next(), layout(layout), neurons(), neuronsCount(layout.getCount()), - weights(weights), - weightsCount(weightsCount), ownWeights(!weights && weightsCount), - filename() + skipTrain() { assert(layout); assert(neuronsCount > 0); @@ -169,7 +105,7 @@ public: } - virtual ~Layer() { + ~Layer() { if (next) delete next; if (neurons) delete[] neurons; if (ownWeights) delete[] weights; @@ -183,32 +119,12 @@ public: inline Stat sumStat() const { Stat s; for(const Layer *l = this; l; l = l->next) s += l->stat; return s; } - bool save() const { - if (filename && weightsCount) { - FILE *f = fopen(filename, "wb"); - if (!f) - return printf("cannot open file for write: %s\n", filename), false; - if (!fwrite(weights, sizeof(*weights)*weightsCount, 1, f)) - return fclose(f), printf("cannot write to file: %s\n", filename), false; - fclose(f); - } - return !next || next->save(); - } - - - bool load() { - if (filename && weightsCount) { - FILE *f = fopen(filename, "rb"); - if (!f) - return printf("cannot open file for read: %s\n", filename), false; - if (!fread(weights, sizeof(*weights)*weightsCount, 1, f)) - return fclose(f), printf("cannot read from file: %s\n", filename), false; - fclose(f); - } - return !next || next->load(); - } + bool save(bool demoOnly = false) + { return WeightHolder::save(demoOnly) && (!next || next->save(demoOnly)); } + bool load() + { return WeightHolder::load() && (!next || next->load()); } - + void clearAccum() { Accum a = {}; for(Neuron *in = neurons, *e = in + neuronsCount; in < e; ++in) @@ -221,8 +137,8 @@ public: for(Weight *iw = weights, *e = iw + weightsCount; iw < e; ++iw) iw->w = rand()*k + wmin; } - + virtual void split(int threadsCount) { layout.split(mtLayouts, threadsCount); if (prev) prev->layout.split(mtPrevLayouts, threadsCount); @@ -235,10 +151,33 @@ public: virtual void testBackpass() { } - virtual void clGetThreadsData(std::vector &data) { } - virtual void clGetPassProgram(std::string &text) { } - virtual void clGetBackpassWeightsProgram(std::string &text) { } - virtual void clGetBackpassDeltasProgram(std::string &text) { } + void passFull(const Layer *last = nullptr, int threadsCount = 1) { + struct H { + Layer &layer; + const Layer *last; + std::atomic barrierCounter; + std::vector threads; + + H(Layer &layer, const Layer *last, int threadsCount): layer(layer), last(last), barrierCounter(0), threads(threadsCount) { } + + void func(int tid, unsigned int seed) { + Barrier barrier(barrierCounter, tid, threads.size(), seed); + for(Layer *l = layer.next; l; l = l->next) { + l->pass(barrier); + if (l == last || !l->next) break; + barrier.wait(); + } + } + } h(*this, last, threadsCount); + + for(Layer *l = this; l; l = l->next) + l->split(threadsCount); + for(int i = 1; i < threadsCount; ++i) + h.threads[i] = new std::thread(&H::func, &h, i, rand()); + h.func(0, rand()); + for(int i = 1; i < threadsCount; ++i) + { h.threads[i]->join(); delete h.threads[i]; } + } }; diff --git a/projects/neural/layer.simple.inc.cpp b/projects/neural/layer.simple.inc.cpp index 9b4d28f..847d1be 100644 --- a/projects/neural/layer.simple.inc.cpp +++ b/projects/neural/layer.simple.inc.cpp @@ -3,25 +3,9 @@ #include "layer.inc.cpp" +#include "func.inc.cpp" -typedef void Func(Neuron &n, AccumReal s); - - -inline void funcSigmoidExp(Neuron &n, AccumReal s) { - //if (s > 5) s = 5; else if (s < -5) s = -5; - AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = ss * (1-ss); -} - - -inline void funcSigmoidExp2(Neuron &n, AccumReal s) { - //if (s > 5) s = 5; else if (s < -5) s = -5; - AccumReal ss = 1/(1 + std::exp(-s)); n.v = ss; n.d = 0;//ss * (1-ss) * 0.1; -} - - -inline void funcReLU(Neuron &n, AccumReal s) - { n.v = s > 0 ? s : 0; n.d = s > 0; } @@ -218,7 +202,7 @@ public: optLayout(optimizeLayoutSimple(layout)), prevOptLayout(optimizeLayoutSimple(this->prev->layout)) { - if (ownWeights) fillWeights(-1, 1); + if (ownWeights) fillWeights(-0.5, 0.5); } diff --git a/projects/neural/layer.simple.test.inc.cpp b/projects/neural/layer.simple.test.inc.cpp index fc6add5..e025591 100644 --- a/projects/neural/layer.simple.test.inc.cpp +++ b/projects/neural/layer.simple.test.inc.cpp @@ -7,7 +7,7 @@ #include "layer.sub.inc.cpp" -class SimpleTest: public Test { +class SimpleTest: public LayerTest { public: static void init(const Layout &cl, const Layout &pl = Layout()) { Test::init(cl.getCount(), pl.getCount(), cl.getActiveCount()*pl.getActiveCount()); } @@ -173,7 +173,7 @@ public: { Layer l(nullptr, pl); new LayerSimple(l, cl); - Test::testLayer("LayerSimple", l); + testLayer("LayerSimple", l); } { @@ -184,7 +184,7 @@ public: Layer l(nullptr, ppl); new LayerSub(l, cl); - Test::testLayer("LayerSub", l); + testLayer("LayerSub", l); } return st; diff --git a/projects/neural/layer.test.inc.cpp b/projects/neural/layer.test.inc.cpp index ec5a7bf..a12c154 100644 --- a/projects/neural/layer.test.inc.cpp +++ b/projects/neural/layer.test.inc.cpp @@ -2,132 +2,12 @@ #define LAYER_TEST_INC_CPP -#include +#include "test.inc.cpp" -#include "layer.inc.cpp" - -class Test { +class LayerTest: public Test { public: - class Stage { - public: - const int errors; - inline explicit Stage(const char *name): errors(Test::errors) { - for(int i = 0; i < level; ++i) printf("- "); - printf("%s\n", name); - fflush(stdout); - ++level; - } - inline ~Stage() { - --level; - if (!*this) { - for(int i = 0; i < level; ++i) printf("- "); - printf("FAILED\n"); - } - fflush(stdout); - } - operator bool() { return Test::errors == errors; } - }; - -private: - static int level; - -protected: - static std::vector c_neurons; - static std::vector p_neurons; - static std::vector weights; - -public: - static int errors; - - - static void init(int c_count, int p_count, int w_count) { - Neuron n = {}; - Weight w = {}; - - c_neurons.clear(); - p_neurons.clear(); - weights.clear(); - c_neurons.resize(c_count, n); - p_neurons.resize(p_count, n); - weights.resize(w_count, w); - } - - - static bool verifyNeurons(const char *name, const Layout &l, const Neuron *neurons, bool ignorePadded = false) { - Stage st(name); - for(int y = 0; y < l.sy; ++y) - for(int x = 0; x < l.sx; ++x) - for(int z = 0; z < l.sz; ++z) { - int n = neurons[ (y*l.sx + x)*l.sz + z ].a.i; - int i = x >= l.x0 && x < l.x1 - && y >= l.y0 && y < l.y1 - && z >= l.z0 && z < l.z1; - if (ignorePadded ? i && n != i : n != i) { - printf( - "wrong neuron mark %d, expected %d (%d, %d, %d)\n", - n, i, y, x, z ); - l.printYXZ("layout"); - ++errors; - return st; - } - } - return st; - } - - - static bool verifyNeuronIndices(const char *name, const Layout &l, const Neuron *neurons, int base = 1, int stride = 1) { - Stage st(name); - for(int y = 0; y < l.sy; ++y) - for(int x = 0; x < l.sx; ++x) - for(int z = 0; z < l.sz; ++z) { - bool active = x >= l.x0 && x < l.x1 - && y >= l.y0 && y < l.y1 - && z >= l.z0 && z < l.z1; - - int n = neurons[ (y*l.sx + x)*l.sz + z ].a.i; - int i = (((y - l.y0)*l.getW() + x - l.x0)*l.getD() + z - l.z0)*stride + base; - - if (!active) i = 0; - - if (n != i) { - printf( - "wrong neuron mark %d, expected %d (%d, %d, %d)\n", - n, i, y, x, z ); - l.printYXZ("layout"); - ++errors; - return st; - } - } - return st; - } - - - static bool verifyNeuronsAccum(const Layout &l, Neuron *neurons, int accum = 1, bool ignoreBounds = false) { - for(int y = 0; y < l.sy; ++y) - for(int x = 0; x < l.sx; ++x) - for(int z = 0; z < l.sz; ++z) { - Neuron &n = neurons[ (y*l.sx + x)*l.sz + z ]; - int i = ( x >= l.x0 && x < l.x1 - && y >= l.y0 && y < l.y1 - && z >= l.z0 && z < l.z1 )*accum; - if (ignoreBounds) i = accum; - if (n.v != 0 && n.v != i) { - printf( - "wrong neuron mark %g, expected 0 or %d (%d, %d, %d)\n", - n.v, i, y, x, z ); - l.printYXZ("layout"); - ++errors; - return false; - } - if (n.v) n.a.i = 1; - n.v = 0; - } - return true; - } - - static bool testLayer(const char *name, Layer &l) { Stage st(name); @@ -261,12 +141,4 @@ public: }; -int Test::level = 0; -std::vector Test::c_neurons; -std::vector Test::p_neurons; -std::vector Test::weights; -int Test::errors = 0; - - - #endif diff --git a/projects/neural/layout.inc.cpp b/projects/neural/layout.inc.cpp index 83a78a0..7f02b03 100644 --- a/projects/neural/layout.inc.cpp +++ b/projects/neural/layout.inc.cpp @@ -45,6 +45,10 @@ struct Layout { inline Layout& padXY (int p) { return padXY (p, p); } inline Layout& padXYZ(int p) { return padXYZ(p, p); } + inline bool hasPadX() const { return x0 > 0 || x1 < sx; } + inline bool hasPadY() const { return y0 > 0 || y1 < sy; } + inline bool hasPadZ() const { return z0 > 0 || z1 < sz; } + inline bool hasPad() const { return hasPadX() || hasPadY() || hasPadZ(); } inline int getW() const { return x1 - x0; } inline int getH() const { return y1 - y0; } diff --git a/projects/neural/segment.cx4.inc.cpp b/projects/neural/segment.cx4.inc.cpp new file mode 100644 index 0000000..61234ba --- /dev/null +++ b/projects/neural/segment.cx4.inc.cpp @@ -0,0 +1,292 @@ +#ifndef SEGMENT_CX4_INC_CPP +#define SEGMENT_CX4_INC_CPP + + +#include "segment.inc.cpp" +#include "func.inc.cpp" +#include "layer.conv.inc.cpp" + + +class SegmentCx4: public Segment { +public: + enum { + KSX = 4, + KSY = 4, + SX = 12, + SY = 12, + MSX = 5, + MSY = 5, + }; + + const int msx, msy, msz; + + Neuron *m_neurons; + Neuron *b_neurons; + + SegmentCx4(int sz, int msz, Weight *weights = nullptr): + Segment(SX, SY, sz, msz*KSY*KSX*sz, weights), msx(MSX), msy(MSY), msz(msz) + { + m_neurons = new Neuron[msx*msy*msz + sx*sy*sz]; + b_neurons = m_neurons + msx*msy*msz; + clear(); + } + ~SegmentCx4() + { delete[] m_neurons; } + + + void clear() override + { memset(m_neurons, 0, sizeof(*m_neurons)*(msx*msy*msz + sx*sy*sz)); } + + + inline void check(int x, int y, int z) { + Segment::check(x, y, z); + assert(layout.getD() == sz); + } + + + + Quality pass(Barrier &barrier, int x, int y, int z, NeuronReal trainRatio) override { + check(x, y, z); + + Layout l = layout; + const int ksx = 4, ksy = 4; + int tid = barrier.tid; + int threads = barrier.threads; + + int sx = this->sx; + int sy = this->sy; + int sz = this->sz; + int msx = this->msx; + int msy = this->msy; + int msz = this->msz; + + int ksxyz = ksx*ksy*sz; + int fv_dkx = l.sz - sz; + int fv_dky = (l.sx - ksx)*l.sz; + + NeuronReal *f_values = this->f_values + (y*l.sx + x)*l.sz + z; + + // stage 1: pass from front to mid + + Weight *iw = weights + tid*ksxyz; + Neuron *imn = m_neurons + tid; + NeuronReal *ifv = f_values; + + for(int mz = tid; mz < msz; mz += threads, iw += threads*ksxyz, imn += threads - msx*msy*msz, ifv = f_values) + for(int my = 0; my < MSY; ++my, ifv += 2*(l.sx - MSX)*l.sz) + for(int mx = 0; mx < MSX; ++mx, imn += msz, ifv += 2*l.sz) { + AccumReal a = 0; + + Weight *iiw = iw; + NeuronReal *iifv = ifv; + + for(int ky = 0; ky < KSY; ++ky, iifv += fv_dky) + for(int kx = 0; kx < KSX; ++kx, iifv += fv_dkx) + for(Weight *e = iiw + sz; iiw < e; ++iiw, ++iifv) + a += *iifv * iiw->w; + + if (a > 0) imn->v = a, imn->d = 1; else imn->v = imn->d = 0; + } + + barrier.wait(); + + // stage 2: pass from mid to back and verify + + AccumReal qa = 0; + for(int by = 2 + tid; by < 10; by += threads) + for(int bx = 2; bx < 10; ++bx) + for(int bz = 0; bz < sz; ++bz) { + AccumReal a = 0; + Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ]; + + for(int ky = by%2; ky < ksy; ky += 2) + for(int kx = bx%2; kx < ksx; kx += 2) { + int mx = (bx - kx)/2; + int my = (by - ky)/2; + assert(mx >= 0 && mx < msx && (bx - kx)%2 == 0); + assert(my >= 0 && my < msy && (by - ky)%2 == 0); + for(int mz = 0; mz < msz; ++mz) { + Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ]; + a += mn.v * w.w; + } + } + + if (a > 0) bn.v = a, bn.d = 1; else bn.v = bn.d = 0; + + NeuronReal fn = f_values[ (by*l.sx + bx)*l.sz + bz ]; + NeuronReal d = fn - bn.v; + bn.d *= d*trainRatio; + qa += d*d; + } + Quality q(qa/(64*sz)); + + if (trainRatio <= 0) return q; + + barrier.wait(); + + // stage 3: backpass deltas + + for(int mz = tid; mz < msz; mz += threads) + for(int my = 1; my < 4; ++my) + for(int mx = 1; mx < 4; ++mx) { + AccumReal a = 0; + Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ]; + + for(int ky = 0; ky < ksy; ++ky) + for(int kx = 0; kx < ksx; ++kx) + for(int kz = 0; kz < sz; ++kz) { + int bx = mx*2 + kx; + int by = my*2 + ky; + Neuron &bn = b_neurons[ (by*sx + bx)*sz + kz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ]; + a += bn.d * w.w; + } + mn.d *= a; + } + + barrier.wait(); + + // stage 4: update weights + + for(int mz = tid; mz < msz; mz += threads) + for(int by = 4; by < 8; ++by) + for(int bx = 4; bx < 8; ++bx) + for(int bz = 0; bz < sz; ++bz) { + Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ]; + NeuronReal fv = f_values[ (by*l.sx + bx)*l.sz + bz ]; + + for(int ky = by%2; ky < ksy; ky += 2) + for(int kx = bx%2; kx < ksx; kx += 2) { + int mx = (bx - kx)/2; + int my = (by - ky)/2; + assert(mx >= 1 && mx < 4 && (bx - kx)%2 == 0); + assert(my >= 1 && my < 4 && (by - ky)%2 == 0); + Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ]; + w.w += bn.d*mn.v + mn.d*fv; + } + } + + return q; + } + + + + Quality testPass(int x, int y, int z, NeuronReal trainRatio) override { + check(x, y, z); + + Layout l = layout; + const int ksx = 4, ksy = 4; + + // stage 1: pass + + clear(); + + for(int my = 0; my < msy; ++my) + for(int mx = 0; mx < msx; ++mx) + for(int mz = 0; mz < msz; ++mz) { + AccumReal a = 0; + Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ]; + + for(int ky = 0; ky < ksy; ++ky) + for(int kx = 0; kx < ksx; ++kx) + for(int kz = 0; kz < sz; ++kz) { + int fx = x + mx*2 + kx; + int fy = y + my*2 + ky; + int fz = z + kz; + NeuronReal fv = f_values[ (fy*l.sx + fx)*l.sz + fz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ]; + a += fv * w.w; + } + + if (a < 0) { mn.v = mn.d = 0; continue; } + mn.v = a; mn.d = 1; + + for(int ky = 0; ky < ksy; ++ky) + for(int kx = 0; kx < ksx; ++kx) + for(int kz = 0; kz < sz; ++kz) { + int bx = mx*2 + kx; + int by = my*2 + ky; + int bz = kz; + Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ]; + bn.a.v += a * w.w; + } + } + + // stage 2: finalize values and verify + + AccumReal qa = 0; + for(int by = 2; by < 10; ++by) + for(int bx = 2; bx < 10; ++bx) + for(int bz = 0; bz < sz; ++bz) { + Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ]; + if (bn.a.v > 0) bn.v = bn.a.v, bn.d = 1; else bn.v = bn.d = 0; + bn.a.v = 0; + + NeuronReal fn = f_values[ ((y + by)*l.sx + x + bx)*l.sz + z + bz ]; + NeuronReal d = fn - bn.v; + bn.d *= d*trainRatio; + qa += d*d; + } + Quality q(qa/(64*sz)); + + if (trainRatio <= 0) return q; + + // stage 3: backpass deltas + + for(int my = 0; my < msy; ++my) + for(int mx = 0; mx < msx; ++mx) + for(int mz = 0; mz < msz; ++mz) { + AccumReal a = 0; + Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ]; + + for(int ky = 0; ky < ksy; ++ky) + for(int kx = 0; kx < ksx; ++kx) + for(int kz = 0; kz < sz; ++kz) { + int bx = mx*2 + kx; + int by = my*2 + ky; + int bz = kz; + Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + kz ]; + a += bn.d * w.w; + } + mn.d *= a; + } + + // stage 4: update weights + + for(int by = 4; by < 8; ++by) + for(int bx = 4; bx < 8; ++bx) + for(int bz = 0; bz < sz; ++bz) { + Neuron &bn = b_neurons[ (by*sx + bx)*sz + bz ]; + NeuronReal fv = f_values[ ((y + by)*l.sx + x + bx)*l.sz + z + bz ]; + + for(int ky = by%2; ky < ksy; ky += 2) + for(int kx = bx%2; kx < ksx; kx += 2) + for(int mz = 0; mz < msz; ++mz) { + int mx = (bx - kx)/2; + int my = (by - ky)/2; + assert(mx >= 1 && mx < 4 && (bx - kx)%2 == 0); + assert(my >= 1 && my < 4 && (by - ky)%2 == 0); + Neuron &mn = m_neurons[ (my*msx + mx)*msz + mz ]; + Weight &w = weights[ ((mz*ksy + ky)*ksx + kx)*sz + bz ]; + w.w += bn.d*mn.v + mn.d*fv; + } + } + + return q; + } + + + bool saveDemo() override + { return !filename || saveConvDemoImage(filename, msz, 4, 4, sz, weights); } +}; + + + + +#endif + + diff --git a/projects/neural/segment.cx4.test.inc.cpp b/projects/neural/segment.cx4.test.inc.cpp new file mode 100644 index 0000000..6143900 --- /dev/null +++ b/projects/neural/segment.cx4.test.inc.cpp @@ -0,0 +1,32 @@ +#ifndef SEGMENT_CX4_TEST_INC_CPP +#define SEGMENT_CX4_TEST_INC_CPP + + +#include "segment.test.inc.cpp" +#include "segment.cx4.inc.cpp" + + +class Cx4Test: public SegmentTest { +public: + static bool test(const char *name, const Layout &l, int msz, int x, int y, int z) { + Stage st(name); + + { + SegmentCx4 s(l.getD(), msz); + testSegment("SegmentCx4", s, l, x, y, z, 0.001); + } + + return st; + } + + + static bool test(const char *name = "cx4") { + Stage st(name); + test("square-16x3", Layout(16, 16, 3), 5, 1, 2, 0); + test("random-rect", Layout(23, 58, 4).expandX(2, 0).expandY(5, 3).expandZ(3, 1), 7, 4, 5, 3); + return st; + } +}; + + +#endif diff --git a/projects/neural/segment.inc.cpp b/projects/neural/segment.inc.cpp new file mode 100644 index 0000000..ad4e3ea --- /dev/null +++ b/projects/neural/segment.inc.cpp @@ -0,0 +1,42 @@ +#ifndef SEGMENT_INC_CPP +#define SEGMENT_INC_CPP + + +#include "layer.inc.cpp" + + +class Segment: public WeightHolder { +public: + const int sx, sy, sz; + + Layout layout; + NeuronReal *f_values; + + Segment(int sx, int sy, int sz, int weightsCount, Weight *weights = nullptr): + WeightHolder(weightsCount, weights), sx(sx), sy(sy), sz(sz), f_values() { } + + virtual ~Segment() { } + + virtual void clear() { } + virtual void split(int threadsCount) { } + + virtual Quality pass(Barrier &barrier, int x, int y, int z, NeuronReal trainRatio) { return barrier.tid ? Quality() : testPass(x, y, z, trainRatio); } + virtual Quality testPass(int x, int y, int z, NeuronReal trainRatio) { return Quality::bad(); } + + + inline void check(int x, int y, int z) { + #ifndef NDEBUG + Layout l = layout; + assert(l); + assert(f_values); + assert(weights); + assert(l.x0 <= x && x+sx <= l.x1); + assert(l.y0 <= y && y+sy <= l.y1); + assert(l.z0 <= z && z+sz <= l.z1); + #endif + } +}; + + +#endif + diff --git a/projects/neural/segment.test.inc.cpp b/projects/neural/segment.test.inc.cpp new file mode 100644 index 0000000..1b7da72 --- /dev/null +++ b/projects/neural/segment.test.inc.cpp @@ -0,0 +1,109 @@ +#ifndef SEGMENT_TEST_INC_CPP +#define SEGMENT_TEST_INC_CPP + + +#include "test.inc.cpp" +#include "segment.inc.cpp" + + + +class SegmentTest: public Test { +public: + static bool testSegment(const char *name, Segment &segment, Layout l, int x, int y, int z, NeuronReal trainRatio) { + Stage st(name); + + struct H { + Segment &segment; + int x, y, z; + Quality testQ; + NeuronReal ratio; + + std::vector threads; + std::vector qualities; + std::atomic counter; + + H(Segment &segment, int x, int y, int z, NeuronReal ratio): segment(segment), x(x), y(y), z(z), ratio(ratio), counter(0) { } + + void prepareData() + { memcpy(segment.weights, weights.data(), segment.weightsCount*sizeof(Weight)); } + + void func(int tid, unsigned int seed) { + Barrier barrier(counter, tid, threads.size(), seed); + qualities[tid] = segment.pass(barrier, x, y, z, ratio); + } + + bool test(const char *name, int threadsCount) { + Stage st(name); + + assert(threadsCount > 0); + counter = 0; + threads.clear(); + qualities.clear(); + threads.resize(threadsCount, nullptr); + qualities.resize(threadsCount); + + prepareData(); + + segment.split(threadsCount); + for(int i = 1; i < threadsCount; ++i) threads[i] = new std::thread(&H::func, this, i, rand()); + func(0, rand()); + + Quality q = qualities[0]; + for(int i = 1; i < threadsCount; ++i) { threads[i]->join(); delete threads[i]; q += qualities[i]; } + threads.clear(); + + if ( fabs(q.train - testQ.train) > 1e-10 + || fabs(q.human - testQ.human) > 1e-10 ) + { + printf("qualities differs, was %g (%g), expected %g (%g)\n", + q.human, q.train, testQ.human, testQ.train ); + ++errors; + } + + for(int i = 0; i < segment.weightsCount; ++i) { + WeightReal a = segment.weights[i].w; + WeightReal b = weights[i + segment.weightsCount].w; + if (fabs(a - b) > 1e-10) { + printf("weights differs at %d, was %g, expected %g\n", i, a, b); + segment.layout.printYXZ("layout"); + ++errors; break; + } + } + + return st; + } + } h(segment, x, y, z, trainRatio); + + + assert(segment.weightsCount > 0); + + int valuesCount = l.getCount(); + init(0, 0, segment.weightsCount*3, valuesCount); + + for(int i = 0; i < valuesCount; ++i) + values[i] = rand()/(NeuronReal)RAND_MAX; + for(int i = 0; i < segment.weightsCount; ++i) + weights[i].w = (WeightReal)(2.0*rand()/RAND_MAX - 1); + + segment.layout = l; + segment.f_values = values.data(); + segment.weights = &weights[segment.weightsCount]; + segment.check(x, y, z); + + h.prepareData(); + h.testQ = segment.testPass(x, y, z, trainRatio); + segment.weights += segment.weightsCount; + + h.test("single-thread", 1); + h.test("single-thread-repeat", 1); + h.test("2-threads", 2); + h.test("7-threads", 7); + h.test("7-threads-repeat", 7); + h.test("8-threads", 8); + + return st; + } +}; + + +#endif diff --git a/projects/neural/test.all.inc.cpp b/projects/neural/test.all.inc.cpp new file mode 100644 index 0000000..227f226 --- /dev/null +++ b/projects/neural/test.all.inc.cpp @@ -0,0 +1,23 @@ +#ifndef TEST_ALL_INC_CPP +#define TEST_ALL_INC_CPP + + + +#include "layer.simple.test.inc.cpp" +#include "layer.conv.test.inc.cpp" +#include "segment.cx4.test.inc.cpp" + + +class AllTest: public Test { +public: + static bool test(const char *name = "all") { + Stage st(name); + //SimpleTest::test(); + //ConvTest::test(); + Cx4Test::test(); + return st; + } +}; + + +#endif diff --git a/projects/neural/test.inc.cpp b/projects/neural/test.inc.cpp new file mode 100644 index 0000000..d8cd8b6 --- /dev/null +++ b/projects/neural/test.inc.cpp @@ -0,0 +1,144 @@ +#ifndef TEST_INC_CPP +#define TEST_INC_CPP + + +#include "common.inc.cpp" + + + +class Test { +public: + class Stage { + public: + const int errors; + inline explicit Stage(const char *name): errors(Test::errors) { + for(int i = 0; i < level; ++i) printf("- "); + printf("%s\n", name); + fflush(stdout); + ++level; + } + inline ~Stage() { + --level; + if (!*this) { + for(int i = 0; i < level; ++i) printf("- "); + printf("FAILED\n"); + } + fflush(stdout); + } + operator bool() { return Test::errors == errors; } + }; + +private: + static int level; + +protected: + static std::vector c_neurons; + static std::vector p_neurons; + static std::vector weights; + static std::vector values; + +public: + static int errors; + + + static void init(int c_count, int p_count, int w_count, int v_count = 0) { + Neuron n = {}; + Weight w = {}; + + c_neurons.clear(); + p_neurons.clear(); + weights.clear(); + values.clear(); + + c_neurons.resize(c_count, n); + p_neurons.resize(p_count, n); + weights.resize(w_count, w); + values.resize(v_count, 0); + } + + + static bool verifyNeurons(const char *name, const Layout &l, const Neuron *neurons, bool ignorePadded = false) { + Stage st(name); + for(int y = 0; y < l.sy; ++y) + for(int x = 0; x < l.sx; ++x) + for(int z = 0; z < l.sz; ++z) { + int n = neurons[ (y*l.sx + x)*l.sz + z ].a.i; + int i = x >= l.x0 && x < l.x1 + && y >= l.y0 && y < l.y1 + && z >= l.z0 && z < l.z1; + if (ignorePadded ? i && n != i : n != i) { + printf( + "wrong neuron mark %d, expected %d (%d, %d, %d)\n", + n, i, y, x, z ); + l.printYXZ("layout"); + ++errors; + return st; + } + } + return st; + } + + + static bool verifyNeuronIndices(const char *name, const Layout &l, const Neuron *neurons, int base = 1, int stride = 1) { + Stage st(name); + for(int y = 0; y < l.sy; ++y) + for(int x = 0; x < l.sx; ++x) + for(int z = 0; z < l.sz; ++z) { + bool active = x >= l.x0 && x < l.x1 + && y >= l.y0 && y < l.y1 + && z >= l.z0 && z < l.z1; + + int n = neurons[ (y*l.sx + x)*l.sz + z ].a.i; + int i = (((y - l.y0)*l.getW() + x - l.x0)*l.getD() + z - l.z0)*stride + base; + + if (!active) i = 0; + + if (n != i) { + printf( + "wrong neuron mark %d, expected %d (%d, %d, %d)\n", + n, i, y, x, z ); + l.printYXZ("layout"); + ++errors; + return st; + } + } + return st; + } + + + static bool verifyNeuronsAccum(const Layout &l, Neuron *neurons, int accum = 1, bool ignoreBounds = false) { + for(int y = 0; y < l.sy; ++y) + for(int x = 0; x < l.sx; ++x) + for(int z = 0; z < l.sz; ++z) { + Neuron &n = neurons[ (y*l.sx + x)*l.sz + z ]; + int i = ( x >= l.x0 && x < l.x1 + && y >= l.y0 && y < l.y1 + && z >= l.z0 && z < l.z1 )*accum; + if (ignoreBounds) i = accum; + if (n.v != 0 && n.v != i) { + printf( + "wrong neuron mark %g, expected 0 or %d (%d, %d, %d)\n", + n.v, i, y, x, z ); + l.printYXZ("layout"); + ++errors; + return false; + } + if (n.v) n.a.i = 1; + n.v = 0; + } + return true; + } +}; + + +int Test::level = 0; +std::vector Test::c_neurons; +std::vector Test::p_neurons; +std::vector Test::weights; +std::vector Test::values; +int Test::errors = 0; + + + +#endif + diff --git a/projects/neural/tga.inc.cpp b/projects/neural/tga.inc.cpp new file mode 100644 index 0000000..a013b09 --- /dev/null +++ b/projects/neural/tga.inc.cpp @@ -0,0 +1,61 @@ +#ifndef TGA_INC_CPP +#define TGA_INC_CPP + + +#include + + +bool tgaSave(const char *filename, const unsigned char *data, int w, int h, int ch) { + if (!data || w <= 0 || h <= 0 || w > 0xffff || h > 0xffff || (ch != 3 && ch != 4)) { + printf("ERROR: cannot save image (bad image): %s\n", filename); + return false; + } + + FILE *f = fopen(filename, "wb"); + if (!f) { + printf("ERROR: cannot open file: %s\n", filename); + return false; + } + + #pragma pack(push,1) + struct Header { + unsigned char idLength; + unsigned char colormapType; + unsigned char imageType; + unsigned char colormapIndex[2]; + unsigned char colormapLength[2]; + unsigned char colormapSize; + unsigned char xOrigin[2]; + unsigned char yOrigin[2]; + unsigned char width[2]; + unsigned char height[2]; + unsigned char pixelSize; + unsigned char attributes; + }; + #pragma pack(pop) + Header header = {}; + header.imageType = 2; + header.width[0] = w; + header.width[1] = w >> 8; + header.height[0] = h; + header.height[1] = h >> 8; + header.pixelSize = ch == 4 ? 32 : 24; + fwrite(&header, sizeof(header), 1, f); + + int rowSize = w*ch; + const unsigned char *row = data + h*rowSize; + for(unsigned short r = h; r; --r, row -= rowSize) { + for(const unsigned char *c = row - rowSize; c < row; c += ch) { + fputc(c[2], f); + fputc(c[1], f); + fputc(c[0], f); + if (ch == 4) fputc(c[3], f); + } + } + fclose(f); + + return true; +} + + +#endif diff --git a/projects/neural/train.cx4.inc.cpp b/projects/neural/train.cx4.inc.cpp new file mode 100644 index 0000000..54831ad --- /dev/null +++ b/projects/neural/train.cx4.inc.cpp @@ -0,0 +1,293 @@ +#ifndef TRAIN_CX4_INC_CPP +#define TRAIN_CX4_INC_CPP + + +#include "train.segment.inc.cpp" +#include "segment.cx4.inc.cpp" +#include "layer.inc.cpp" + + +class TrainerCx4: public TrainerSegment { +protected: + FILE *f; + std::vector data; + std::vector values; + std::vector valuesMeasure; + std::vector tmpdata; + std::vector shuffle; + + Layout trainLayout; + Layout measureLayout; + + size_t imageSize; + size_t preparedImageSize; + int imagesInFile; + int imagesInMemory; + + volatile unsigned int seed; + +public: + Layer *layerFull; + Layer *layerPre; + int loadImagesCount; + int blocksPerLoading; + + const char *infile; + const char *cachefile; + const char *outfile; + + TrainerCx4(): + f(), + imageSize(), + preparedImageSize(), + imagesInFile(), + imagesInMemory(), + seed(), + layerFull(), + layerPre(), + loadImagesCount(), + blocksPerLoading(1), + infile(), + cachefile(), + outfile() { } + +protected: + void preprocess(unsigned char *src, NeuronReal *dst) { + struct IL: public Iter { + typedef const unsigned char* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; } + }; + struct IS: public Iter { + typedef NeuronReal* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v; } + }; + + Layer &fl = *layerFull; + Layer &bl = *layerPre; + + iterateNeurons2(fl.layout, fl.layout, fl.neurons, src); + fl.passFull(&bl, threadsCount); + iterateNeurons2(bl.layout, bl.layout, bl.neurons, dst); + } + + + bool loadImage(int fromIndex, int toIndex) { + unsigned char *src = data.data(); + if (!layerPre) src += toIndex*imageSize; + + fseeko64(f, fromIndex*imageSize, SEEK_SET); + if (!fread(src, imageSize, 1, f)) + return fclose(f), f = nullptr, false; + + if (layerPre) preprocess(src, values.data() + toIndex*preparedImageSize); + + return true; + } + + + bool loadImages() { + for(int i = 0; i < imagesInMemory; ++i) { + int j = rand()%imagesInFile; + if (i != j) std::swap(shuffle[i], shuffle[j]); + } + + typedef std::pair Pair; + typedef std::set Set; + Set set; + for(int i = 0; i < imagesInMemory; ++i) + set.insert(Pair(shuffle[i], i)); + for(Set::iterator i = set.begin(); i != set.end(); ++i) + loadImage(i->first, i->second); + + return true; + } + + + void prepareMeasure() { + if (measuresPerBlock <= 0) return; + int sy = segment->sy; + int sx = segment->sx; + int sz = segment->sz; + int sxz = sx*sz; + int w = (layerPre ? layerPre : layerFull)->layout.getW(); + int h = (layerPre ? layerPre : layerFull)->layout.getH(); + int rowstride = w*sz; + NeuronReal *dst = valuesMeasure.data(); + for(int i = 0; i < measuresPerBlock; ++i) { + int index = rand()%imagesInMemory; + int x = rand()%(w - sx + 1); + int y = rand()%(h - sy + 1); + if (layerPre) { + const NeuronReal *src = values.data() + index*preparedImageSize + y*rowstride + x*sz; + for(int j = 0; j < sy; ++j, src += rowstride, dst += sxz) + memcpy(dst, src, sxz*sizeof(*dst)); + } else { + const unsigned char *src = data.data() + index*preparedImageSize + y*rowstride + x*sz; + for(int j = 0; j < sy; ++j, src += rowstride - sxz) + for(int k = 0; k < sxz; ++k, ++src, ++dst) + *dst = *src/(NeuronReal)255; + } + } + } + + + bool prepare() override { + assert(infile); + assert(layerFull); + assert(loadImagesCount > 0); + + Layer &fl = layerFull->front(); + Layer &bl = layerFull->back(); + + imageSize = fl.layout.getActiveCount(); + f = fopen(infile, "rb"); + if (!f) return false; + fseeko64(f, 0, SEEK_END); + imagesInFile = ftello64(f)/imageSize; + if (imagesInFile < 1) return fclose(f), f = nullptr, false; + imagesInMemory = loadImagesCount > imagesInFile ? imagesInFile : loadImagesCount; + + + Layout l = layerPre ? layerPre->layout : layerFull->layout; + assert(l.getW() >= segment->sx); + assert(l.getH() >= segment->sy); + assert(l.getD() == segment->sz); + + measureLayout = Layout(segment->sx, segment->sy, segment->sz); + valuesMeasure.resize(measuresPerBlock * measureLayout.getActiveCount()); + if (layerPre) { + assert(l); + preparedImageSize = layerPre->layout.getActiveCount(); + trainLayout = Layout(l.getW(), l.getH(), l.getD()); + data.resize(imageSize); + values.resize(imagesInMemory * preparedImageSize); + } else { + trainLayout = measureLayout; + data.resize(imagesInMemory * imageSize); + values.resize(segment->sx * segment->sy * segment->sz); + } + + segment->f_values = values.data(); + segment->layout = trainLayout; + tmpdata.resize(bl.layout.getActiveCount()); + if (tmpdata.size() < imageSize) tmpdata.resize(imageSize); + + size_t memsize = data.size()*sizeof(data.front()) + + values.size()*sizeof(values.front()) + + valuesMeasure.size()*sizeof(valuesMeasure.front()) + + tmpdata.size()*sizeof(tmpdata.front()); + printf("allocated size: %lld\n", (long long)(memsize)); + + shuffle.resize(imagesInFile); + for(int i = 0; i < imagesInFile; ++i) + shuffle[i] = i; + + if (!loadImages()) return false; + prepareMeasure(); + return true; + } + + + void finish() override + { if (f) fclose(f), f = nullptr; } + + + bool prepareBlock(int block, bool measureOnly) override { + if (block > 0 && blocksPerLoading > 0 && (block % blocksPerLoading) == 0 && !loadImages()) + return false; + seed = rand(); + return true; + } + + + void finishBlock(int block) override { + if (outfile) { + struct IL: public Iter { + typedef const unsigned char* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; } + }; + struct IS: public Iter { + typedef unsigned char* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v < 0 ? 0 : n.v > 1 ? 255 : (unsigned char)(n.v*255.999); } + }; + + Layer &fl = *layerFull; + Layer &bl = fl.back(); + + std::string outfile0(outfile); + std::string outfile1 = outfile0 + ".1.tga"; + outfile0 += ".0.tga"; + + int index = rand()%imagesInFile; + fseeko64(f, index*imageSize, SEEK_SET); + fread(tmpdata.data(), imageSize, 1, f); + tgaSave(outfile0.c_str(), tmpdata.data(), fl.layout.getW(), fl.layout.getH(), fl.layout.getD()); + + iterateNeurons2(fl.layout, fl.layout, fl.neurons, tmpdata.data()); + fl.passFull(&bl, threadsCount); + + iterateNeurons2(bl.layout, bl.layout, bl.neurons, tmpdata.data()); + tgaSave(outfile1.c_str(), tmpdata.data(), bl.layout.getW(), bl.layout.getH(), bl.layout.getD()); + + segment->saveDemo(); + } + } + + + void loadData(Barrier &barrier, int block, int iter, bool measureOnly) override { + int tid = barrier.tid; + int threads = barrier.threads; + int sx = segment->sx; + int sy = segment->sy; + int sz = segment->sz; + int sxz = sx*sz; + + if (measureOnly) { + if (!tid) { + segment->layout = measureLayout; + segment->f_values = valuesMeasure.data() + iter*sy*sxz; + x = y = z = 0; + } + } else + if (layerPre) { + if (!tid) { + unsigned int s = randomNext(seed & iter); + int index = (s = randomNext(s))%imagesInMemory; + x = (s = randomNext(s)) % (layerPre->layout.getW() - sx + 1); + y = (s = randomNext(s)) % (layerPre->layout.getH() - sy + 1); + z = 0; + segment->layout = trainLayout; + segment->f_values = values.data() + index*preparedImageSize; + } + } else { + int w = layerFull->layout.getW(); + int h = layerFull->layout.getH(); + + unsigned int s = randomNext(seed & iter); + int index = (s = randomNext(s))%imagesInMemory; + int x0 = (s = randomNext(s))%(w - sx + 1); + int y0 = (s = randomNext(s))%(h - sy + 1); + + int rowstride = w*sz; + int dr = rowstride*threads - sxz; + int vdr = sxz*(threads - 1); + + const unsigned char *id0 = data.data() + index*imageSize + y0*rowstride + x0*sz; + const unsigned char *id = id0 + tid*rowstride; + NeuronReal *iv = values.data() + tid*sxz; + + for(const unsigned char *e = id0 + sy*rowstride; id < e; id += dr, iv += vdr) + for(const unsigned char *e = id + sxz; id < e; ++id, ++iv) + *iv = *id/(NeuronReal)255; + + if (!tid) { + segment->layout = trainLayout; + segment->f_values = values.data(); + x = 0, y = 0, z = 0; + } + } + } +}; + + +#endif diff --git a/projects/neural/train.digit.inc.cpp b/projects/neural/train.digit.inc.cpp index 8277f42..f868ca0 100644 --- a/projects/neural/train.digit.inc.cpp +++ b/projects/neural/train.digit.inc.cpp @@ -87,7 +87,7 @@ protected: Quality verifyData(Barrier &barrier, int, int iter) override { - Quality q = {}; + Quality q; if (barrier.tid) return q; struct I: public Iter { diff --git a/projects/neural/train.image.inc.cpp b/projects/neural/train.image.inc.cpp index d7dd9a2..18c5c46 100644 --- a/projects/neural/train.image.inc.cpp +++ b/projects/neural/train.image.inc.cpp @@ -2,6 +2,8 @@ #define TRAIN_IMAGE_INC_CPP +#include + #include "train.inc.cpp" #include "layer.simple.inc.cpp" @@ -9,194 +11,168 @@ class TrainerImage: public Trainer { protected: std::vector data; - std::vector shuffle; - const char *datafile; - const char *outfile; - Layout ofl, obl; - Layout::List oflist, oblist; - int stride, count; + std::vector tmpdata; + std::vector shuffle; + std::vector shuffle2; + Layout pbl; + Layout::List flist, blist; + FILE *f; + size_t imgsize; + int count; + int workCount; public: - TrainerImage(): stride(), count() { } + int pad; + const char *datafile; + const char *outfile; + Layer *dataLayer; + + TrainerImage(): f(), imgsize(), count(), workCount(), pad(), datafile(), outfile(), dataLayer() { } - bool configure(const char *datafile, const char *outfile) { - this->datafile = datafile; - this->outfile = outfile; - } +protected: + bool prepare() override { + assert(datafile); + assert(fl->layout.getD() == 3); + + Layer *dl = dataLayer ? dataLayer : fl; + assert(dl->layout.getW() == bl->layout.getW()); + assert(dl->layout.getH() == bl->layout.getH()); + assert(dl->layout.getD() == bl->layout.getD()); + + imgsize = fl->layout.getActiveCount(); + fl->layout.split(flist, threadsCount); + bl->layout.split(blist, threadsCount); + pbl = bl->layout; + pbl.padXY(pad); + + f = fopen(datafile, "rb"); + if (!f) return false; - data.clear(); + fseeko64(f, 0, SEEK_END); + long long size = ftello64(f); + count = size/imgsize; + if (count < 1) return fclose(f), f = nullptr, false; - FILE *f = fopen(filename, "rb"); - if (!f) - return printf("cannot open file for read: %s\n", filename), false; - fseek(f, 0, SEEK_END); - size_t fs = ftello(f); - fseek(f, 0, SEEK_SET); + workCount = itersPerBlock > count ? count : itersPerBlock; + printf("allocated size: %lld\n", (long long)(imgsize*workCount)); + data.resize(workCount*imgsize); - data.resize(fs, 0); - if (!fread(data.data(), fs, 1, f)) - return printf("cannot read from file: %s\n", filename), fclose(f), data.clear(), false; + shuffle.resize(count); + for(int i = 0; i < count; ++i) + shuffle[i] = i; - fclose(f); - return true; + shuffle2.resize(workCount); + for(int i = 0; i < workCount; ++i) + shuffle2[i] = i; + + return loadBlocks(); + //return true; } - -void imgTrain(Layer &l, const char *datafile, int size, const char *outfile, double trainRatio, int count) { - Layer &bl = l.back(); - - assert(!l.prev); - assert(datafile); - assert(count > 0 && size > 0); - assert(l.size == size); - assert(bl.size == size); - - int blockSize = 1000;//1024*1024*1024/size; - assert(blockSize > 0); - - FILE *f = fopen(datafile, "rb"); - if (!f) - { printf("cannot open file: %s\n", datafile); return; } - fseeko64(f, 0, SEEK_END); - long long fsize = ftello64(f); - int xCount = (int)(fsize/size); - if (xCount <= 0) - { printf("no tests in file: %s\n", datafile); return; } - - int *block = new int[blockSize*2]; - int *shuffle = block + blockSize; - double *results = new double[blockSize]; - unsigned char *blockData = new unsigned char[(blockSize + 1)*size]; - unsigned char *blockResData = blockData + blockSize*size; - bool err = false; - - for(int j = 0; j < blockSize; ++j) - { shuffle[j] = j; results[j] = 0; } - - int blocksCount = (count - 1)/blockSize + 1; - - printf("training %d (%d x %d blocks), tests: %d, ratio: %f:\n", blocksCount*blockSize, blocksCount, blockSize, xCount, trainRatio); - - double avgSum = 0; - for(int i = 0; i < blocksCount; ++i) { - for(int j = 0; j < blockSize; ++j) { - block[j] = rand()%xCount; - std::swap(shuffle[i], shuffle[rand()%blockSize]); - } - std::sort(block, block + blockSize); + + void finish() override + { if (f) fclose(f), f = nullptr; } - for(int j = 0; j < blockSize; ++j) { - fseeko64(f, block[j]*(long long)size, SEEK_SET); - if (!fread(blockData + j*size, size, 1, f)) - { printf("cannot read data from file: %s\n", datafile); err = true; break; } + + bool loadBlocks() { + for(int i = 0; i < workCount; ++i) { + int j = rand()%count; + if (i != j) std::swap(shuffle[i], shuffle[j]); } - if (err) break; - - printf(" next data block loaded\n"); - - double sumQ = 0; - for(int j = 0; j < blockSize; ++j) { - unsigned char *data = blockData + shuffle[j]*size; - for(double *ia = l.a, *e = ia + l.size; ia < e; ++ia, ++data) - *ia = *data/255.0; - - double firstQ = 0, q = 0; - for(int repeat = 0; repeat < 1; ++repeat) { - l.pass(); - - for(double *ia = l.a, *iba = bl.a, *ibda = bl.da, *e = ia + l.size; ia < e; ++ia, ++iba, ++ibda) { - double d = *ia - *iba; - *ibda = d; - q += d*d; - } - q /= size; - if (!repeat) firstQ = q; - - bl.backpass(trainRatio); - } - - sumQ += firstQ; - avgSum += firstQ - results[j]; - results[j] = firstQ; - int avgCnt = i ? blockSize : j + 1; - printf(" %4d: total: %6d, avg result: %f, last result: %f -> %f\n", j+1, i*blockSize+j+1, avgSum/avgCnt, firstQ, q); + + typedef std::pair Pair; + typedef std::set Set; + Set set; + for(int i = 0; i < workCount; ++i) + set.insert(Pair(shuffle[i], i)); + + for(Set::iterator i = set.begin(); i != set.end(); ++i) { + fseeko64(f, i->first*imgsize, SEEK_SET); + if (!fread(data.data() + i->second*imgsize, imgsize, 1, f)) + return fclose(f), f = nullptr, false; } - - printf("%4d: total: %6d, avg result: %f\n", i+1, (i+1)*blockSize, sumQ/blockSize); - - if (outfile && !l.save(outfile)) - { printf("cannot save neural network weights to file: %s\n", outfile); err = true; break; } - - unsigned char *data = blockResData; - for(double *iba = bl.a, *e = iba + bl.size; iba < e; ++iba, ++data) - *data = (unsigned char)(*iba*255.999); - tgaSave("data/output/sampleX.tga", blockData + shuffle[blockSize-1]*size, 256, 256, 3); - tgaSave("data/output/sampleY.tga", blockResData, 256, 256, 3); - } - - delete[] block; - delete[] results; - delete[] blockData; - - printf("finished\n"); -} - - -protected: - bool prepare() override { - ofl = optimizeLayoutSimple(fl->layout); - obl = optimizeLayoutSimple(bl->layout); - assert(ofl && obl); - assert(ofl.getActiveCount() == obl.getActiveCount()); - ofl.split(oflist, threadsCount); - obl.split(oblist, threadsCount); - stride = ofl.getActiveCount() + 1; - count = data.size()/stride; - if (count <= 0) return false; - shuffle.resize(count); - for(int i = 0; i < count; ++i) - shuffle[i] = i; return true; } - bool prepareBlock() override { - int cnt = itersPerBlock > count ? count : itersPerBlock; - for(int i = 0; i < cnt; ++i) { - int j = rand()%count; - if (i != j) std::swap(shuffle[i], shuffle[j]); + for(int i = 0; i < workCount; ++i) { + int j = rand()%workCount; + if (i != j) std::swap(shuffle2[i], shuffle2[j]); } + //return loadBlocks(); return true; } + + + void finishBlock() override { + if (outfile && !dataLayer) { + std::string outfile0(outfile); + std::string outfile1 = outfile0 + ".1.tga"; + outfile0 += ".0.tga"; + + unsigned char *id0 = data.data() + shuffle2[(itersPerBlock-1)%workCount]*imgsize; + tgaSave(outfile0.c_str(), id0, fl->layout.getW(), fl->layout.getH(), fl->layout.getD()); + + struct I: public Iter { + typedef unsigned char* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { *d = n.v < 0 ? 0 : n.v > 1 ? 255 : (unsigned char)(n.v*255.999); } + }; + + tmpdata.resize(imgsize); + unsigned char *id1 = tmpdata.data(); + iterateNeurons2(bl->layout, bl->layout, bl->neurons, id1); + tgaSave(outfile1.c_str(), id1, bl->layout.getW(), bl->layout.getH(), bl->layout.getD()); + } + } void loadData(Barrier &barrier, int, int iter) override { struct I: public Iter { - typedef const unsigned char* Type; - static inline void iter4(Neuron &n, Type d, AccumType&) { n.v = *d/(NeuronReal)255; } + typedef const unsigned char* DataType; + static inline void iter4(Neuron &n, DataType d, DataAccumType&) { n.v = *d/(NeuronReal)255; } }; - const unsigned char *id = data.data() + shuffle[iter%count]*stride; - iterateNeurons2(oflist[barrier.tid], ofl, fl->neurons, id); + const unsigned char *id = data.data() + shuffle2[iter%workCount]*imgsize; + iterateNeurons2(flist[barrier.tid], fl->layout, fl->neurons, id); } - AccumReal verifyDataMain(int, int iter) override { - struct I: public Iter { - typedef int Type; - struct AccumType { int ri, mi; NeuronReal m; }; - static inline void iter4(Neuron &n, Type d, AccumType &a) { - NeuronReal v1 = d == a.ri; - NeuronReal v0 = n.v; - n.d *= v1 - v0; - if (a.m < v0) { a.m = v0; a.mi = d; } - } - }; + Quality verifyData(Barrier &barrier, int, int iter) override { + Layout l = blist[barrier.tid]; + Layout dl = bl->layout; + Layout pl = pbl; + + int d = l.getD(); + int w = l.getW(); + int dx = l.sz - d; + int dy = (l.sx - w)*l.sz; + int ddx = dl.getD(); + int ddy = (dl.getW() - w)*ddx; + + AccumReal aq = 0; + NeuronReal ratio = this->ratio; + Neuron *in = bl->neurons + (l.y0*l.sx + l.x0)*l.sz + l.z0; + const unsigned char *id = data.data() + shuffle2[iter%workCount]*imgsize + ((l.y0-dl.y0)*l.sx + l.x0-dl.x0)*l.sz + l.z0-dl.z0; - I::AccumType a = { data[ (shuffle[iter%count] + 1)*stride - 1 ], 0, 0 }; - iterateNeurons2(obl, obl, bl->neurons, 0, 1, &a); + for(int y = l.y0; y < l.y1; ++y, in += dy, id += ddy) { + bool outside = y < pl.y0 || y >= pl.y1; + for(int x = l.x0; x < l.x1; ++x, in += dx, id += ddx) { + if (outside || x < pl.x0 || x >= pl.x1) { + for(Neuron *e = in + d; in < e; ++in) in->d = 0; + } else { + const unsigned char *iid = id; + for(Neuron *e = in + d; in < e; ++in, ++iid) { + NeuronReal v1 = *iid/(NeuronReal)255; + NeuronReal v0 = in->v; + NeuronReal diff = v1 - v0; + in->d *= diff*ratio; + aq += diff*diff; + } + } + } + } - return a.mi != a.ri; + return Quality( sqrt(aq/pbl.getActiveCount()) ); } }; diff --git a/projects/neural/train.inc.cpp b/projects/neural/train.inc.cpp index 21a8a56..41fd23a 100644 --- a/projects/neural/train.inc.cpp +++ b/projects/neural/train.inc.cpp @@ -2,43 +2,9 @@ #define TRAIN_INC_CPP -#include -#include - - #include "layer.inc.cpp" -long long timeUs() { - static std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); - return (long long)std::chrono::duration_cast( std::chrono::steady_clock::now() - begin ).count(); -} - - -struct Quality { - AccumReal train; - AccumReal human; - - inline Quality& operator+=(const Quality &b) { - train += b.train; - human += b.human; - return *this; - } - - inline Quality& operator*=(AccumReal x) { - train *= x; - human *= x; - return *this; - } - - inline bool operator<(const Quality &b) const { - return human < b.human ? true - : b.human < human ? false - : train < b.train; - } -}; - - class Trainer { private: std::atomic barrierCounter; @@ -57,6 +23,7 @@ protected: std::atomic skipBackpass; Layer *fl; Layer *bl; + Layer *ffl; virtual bool prepare() { return true; } virtual bool prepareBlock() { return true; } @@ -70,7 +37,7 @@ private: void threadFunc(int tid, unsigned int seed, int block) { Barrier barrier(barrierCounter, tid, threadsCount, seed); - Quality sumQ = {}; + Quality sumQ; for(int i = 0; i < itersPerBlock; ++i) { barrier.wait(); loadData(barrier, block, i); @@ -88,14 +55,16 @@ private: bool skipBp = skipBackpass; barrier.wait(); - if (ratio > 0 && !skipBp) { - for(Layer *l = bl; l->prev && l->prev->prev; l = l->prev) { + if (ffl && ratio > 0 && !skipBp) { + for(Layer *l = bl; l != ffl; l = l->prev) { barrier.wait(); l->backpassDeltas(barrier); } for(Layer *l = bl; l->prev; l = l->prev) { - barrier.wait(); - l->backpassWeights(barrier); + if (!l->skipTrain) { + barrier.wait(); + l->backpassWeights(barrier); + } } } } @@ -107,8 +76,8 @@ private: barrierCounter = 0; std::vector t(threadsCount, nullptr); for(int i = 1; i < threadsCount; ++i) - t[i] = new std::thread(&Trainer::threadFunc, this, i, block, rand()); - threadFunc(0, block, rand()); + t[i] = new std::thread(&Trainer::threadFunc, this, i, rand(), block); + threadFunc(0, rand(), block); Quality result = qualities[0]; for(int i = 1; i < threadsCount; ++i) @@ -164,6 +133,8 @@ public: fl = layer; bl = &layer->back(); + ffl = fl->next; + while(ffl && ffl->skipTrain) ffl = ffl->next; qualities.clear(); qualities.resize(threadsCount, Quality{}); diff --git a/projects/neural/train.segment.inc.cpp b/projects/neural/train.segment.inc.cpp new file mode 100644 index 0000000..836ab3e --- /dev/null +++ b/projects/neural/train.segment.inc.cpp @@ -0,0 +1,178 @@ +#ifndef TRAIN_SEGMENT_INC_CPP +#define TRAIN_SEGMENT_INC_CPP + + +#include "segment.inc.cpp" +#include "layer.inc.cpp" + + +class TrainerSegment { +private: + std::atomic barrierCounter; + std::vector qualities; + +public: + Segment *segment; + AccumReal ratio; + int threadsCount; + + int measuresPerBlock; + int trainsPerBlock; + int blocksPerSaving; + + int blocksCount; + AccumReal qmin; + +protected: + volatile int x, y, z; + + virtual bool prepare() { return true; } + virtual bool prepareBlock(int block, bool measureOnly) { return true; } + virtual void finishBlock(int block) { } + virtual void finish() { } + + virtual void loadData(Barrier &barrier, int block, int iter, bool measure) { } + +private: + void threadFunc(int tid, unsigned int seed, int block, bool measureOnly) { + Barrier barrier(barrierCounter, tid, threadsCount, seed); + + QualityPair q; + if (!measureOnly) { + for(int i = 0; i < trainsPerBlock; ++i) { + barrier.wait(); + loadData(barrier, block, i, false); + barrier.wait(); + q.train += segment->pass(barrier, x, y, z, ratio); + } + } + + for(int i = 0; i < measuresPerBlock; ++i) { + barrier.wait(); + loadData(barrier, block, i, true); + barrier.wait(); + q.measure += segment->pass(barrier, x, y, z, 0); + } + + qualities[tid] = q; + } + + + QualityPair runThreads(int block, bool measureOnly) { + barrierCounter = 0; + std::vector t(threadsCount, nullptr); + for(int i = 1; i < threadsCount; ++i) + t[i] = new std::thread(&TrainerSegment::threadFunc, this, i, rand(), block, measureOnly); + threadFunc(0, rand(), block, measureOnly); + + QualityPair q = qualities[0]; + for(int i = 1; i < threadsCount; ++i) + { t[i]->join(); delete t[i]; q += qualities[i]; } + + q.measure *= 1/(AccumReal)measuresPerBlock; + q.train *= 1/(AccumReal)trainsPerBlock; + return q; + } + + +public: + TrainerSegment(): + barrierCounter(0), + segment(), + ratio(), + threadsCount(1), + measuresPerBlock(), + trainsPerBlock(), + blocksPerSaving(), + blocksCount(), + qmin(), + x(), y(), z() + { } + + + QualityPair run() { + int trainsPerBlock = ratio > 0 ? this->trainsPerBlock : 0; + int blocksCount = trainsPerBlock > 0 || this->blocksCount > 0 ? this->blocksCount : 1; + + assert(segment); + assert(threadsCount > 0); + assert(measuresPerBlock >= 0); + assert(trainsPerBlock >= 0); + assert(measuresPerBlock + trainsPerBlock > 0); + + QualityPair bad(Quality::bad(), Quality::bad()); + + printf("training segment: threads %d, trainsPerBlock %d, measuresPerBlock %d, ratio: %lf\n", threadsCount, trainsPerBlock, measuresPerBlock, ratio); + fflush(stdout); + + qualities.clear(); + qualities.resize(threadsCount); + segment->split(threadsCount); + + if (!prepare()) + return printf("cannot prepare\n"), bad; + + + QualityPair result = bad, best = result, saved = result; + long long fullTimeStartUs = timeUs(); + int i = 0; + int bps = blocksPerSaving > 0 ? blocksPerSaving : 1; + int nextSave = i + bps; + while(true) { + bool measureOnly = measuresPerBlock > 0 && (!i || trainsPerBlock <= 0); + + if (!prepareBlock(i, measureOnly)) { + printf("cannot prepare block\n"); + result = bad; + break; + }; + + long long runTimeUs = timeUs(); + result = runThreads(i, measureOnly); + runTimeUs = timeUs() - runTimeUs; + + finishBlock(i); + + long long t = timeUs(); + long long fullTimeUs = t - fullTimeStartUs; + fullTimeStartUs = t; + ++i; + + Quality q = measuresPerBlock > 0 ? result.measure : result.train; + + if (i == 1) saved = result; + bool good = result < best; + bool done = (blocksCount > 0 && i >= blocksCount) || q.human <= qmin; + bool saving = !measureOnly && ratio > 0 && (i >= nextSave || done) && result < saved; + if (good) best = result; + + Quality bq = measuresPerBlock > 0 ? best.measure : best.train; + + printf("%4d, total %7d, avg.result %12g (%12g), best %12g (%12g), time: %f / %f%s\n", + i, i*trainsPerBlock, + q.human, q.train, bq.human, bq.train, + runTimeUs*0.000001, fullTimeUs*0.000001, + (saving ? ", saving" : "" ) ); + fflush(stdout); + + if (saving) { + if (!segment->save()) { + printf("saving failed\n"); + result = bad; + break; + } + saved = result; + nextSave += bps; + } + + if (done) break; + } + + finish(); + + return result; + } +}; + + +#endif diff --git a/projects/neural/trainer.cpp b/projects/neural/trainer.cpp index 6567a45..820f172 100644 --- a/projects/neural/trainer.cpp +++ b/projects/neural/trainer.cpp @@ -3,8 +3,10 @@ #include #include "layer.all.inc.cpp" -#include "layer.all.test.inc.cpp" +#include "test.all.inc.cpp" #include "train.digit.inc.cpp" +#include "train.image.inc.cpp" +#include "train.cx4.inc.cpp" bool runTests() { @@ -13,40 +15,149 @@ bool runTests() { } -int main() { - srand(time(NULL)); +bool trainDigits() { + #define FILENAME "data/output/weights-digit.bin" - //return !runTests(); + printf("create neural network\n"); + Layer l( nullptr, Layout(28, 28) ); + (new LayerSimple( l, Layout(256) ))->filename = FILENAME "1"; + (new LayerSimple( l, Layout(64) ))->filename = FILENAME "2"; + (new LayerSimple( l, Layout(10) ))->filename = FILENAME "3"; + l.sumStat().print(); - //#define FILENAME "data/output/weights-digit.bin" + #undef FILENAME + + printf("load training data\n"); + TrainerDigit t; + if (!t.loadSymbolMap("data/symbols-data.bin")) return 1; // 28x28 + + printf("try load previously saved network\n"); l.load(); + t.configure(l, 0.5, 8, 70000, 0, 0, 0.00001).run(); + + return true; +} + + +bool trainDigitsConv() { #define FILENAME "data/output/weights-digit-conv.bin" printf("create neural network\n"); - //Layer l( nullptr, Layout(28, 28) ); - //(new LayerSimple( l, Layout(256) ))->filename = FILENAME "1"; - //(new LayerSimple( l, Layout(64) ))->filename = FILENAME "2"; - //(new LayerSimple( l, Layout(10) ))->filename = FILENAME "3"; - Layer l(nullptr, Layout(28, 28)); - (new LayerConvShared(l, Layout(24, 24, 6), Kernel(5, 1, 0)))->filename = FILENAME "1"; - (new LayerSub(l, Layout(12, 12, 6)))->filename = FILENAME "2"; - (new LayerConvShared(l, Layout(8, 8, 48), Kernel(5, 1, 0)))->filename = FILENAME "3"; - (new LayerSub(l, Layout(4, 4, 48)))->filename = FILENAME "4"; - (new LayerSimple(l, Layout(64)))->filename = FILENAME "5"; - (new LayerSimple(l, Layout(10)))->filename = FILENAME "6"; + Layer *ll[10] = {}; + ll[1] = new LayerConvShared(l, Layout(12, 12, 6), Kernel(4, 2, 0)); ll[1]->filename = FILENAME "1"; + ll[2] = new LayerConvShared(l, Layout(4, 4, 12), Kernel(4, 2, 0)); ll[2]->filename = FILENAME "2"; + ll[3] = new LayerSimple(l, Layout(64)); ll[3]->filename = FILENAME "3"; + ll[4] = new LayerSimple(l, Layout(10)); ll[4]->filename = FILENAME "4"; + + #undef FILENAME l.sumStat().print(); printf("load training data\n"); TrainerDigit t; if (!t.loadSymbolMap("data/symbols-data.bin")) return 1; // 28x28 - //printf("try load previously saved network\n"); l.load(); + printf("try load previously saved network\n"); l.load(); - printf("train\n"); + //ll[1]->skipTrain = true; + //ll[2]->skipTrain = true; + + t.configure(l, 0.01, 8, 70000, 0, 0, 0.00001).run(); //t.configure(l, 0.5, 8, 70000, 0, 0, 0.00001).run(); - t.configure(l, 0.5, 8, 7000, 0, 0, 0.00001).run(); + + return true; +} + + +bool trainImage() { + #define FILENAME "data/output/weights-image.bin" + + printf("create neural network\n"); + Layer l(nullptr, Layout(128, 128, 3)); + Layer *ll[20] = {}; + ll[ 1] = new LayerConvShared(l, Layout(63, 63, 24), Kernel(4, 2, 0)); ll[1]->filename = FILENAME "1"; + ll[ 2] = new LayerConvShared(l, Layout(29, 29, 48), Kernel(5, 2, 0)); ll[2]->filename = FILENAME "2"; + //ll[ 3] = new LayerConvShared(l, Layout(14, 14, 24), Kernel(4, 2, 0)); ll[3]->filename = FILENAME "3"; + //ll[ 4] = new LayerConvShared(l, Layout( 6, 6, 48), Kernel(4, 2, 0)); ll[4]->filename = FILENAME "4"; + //ll[ 5] = new LayerConvShared(l, Layout( 2, 2, 96), Kernel(4, 2, 0)); ll[5]->filename = FILENAME "5"; + //ll[ 6] = new LayerDeconvShared(l, Layout( 6, 6, 48), Kernel(4, 2, 0), ll[5]->weights); + //ll[ 7] = new LayerDeconvShared(l, Layout( 14, 14, 24), Kernel(4, 2, 0), ll[4]->weights); + //ll[ 8] = new LayerDeconvShared(l, Layout( 30, 30, 12), Kernel(4, 2, 0), ll[3]->weights); + ll[ 9] = new LayerDeconvShared(l, Layout( 63, 63, 24), Kernel(5, 2, 0), ll[2]->weights); + ll[10] = new LayerDeconvShared(l, Layout(128, 128, 3), Kernel(4, 2, 0), ll[1]->weights); + + l.sumStat().print(); - return 0; + printf("try load previously saved network\n"); l.load(); + + ll[1]->skipTrain = true; + ll[10]->skipTrain = true; + + + TrainerImage t; + t.pad = 16; + t.datafile = "data/img128-data.bin"; + t.outfile = FILENAME ".test"; + + t.configure(l, 0.00001, 8, 1000, 0, 0, 0.00001).run(); + + #undef FILENAME + return true; +} + + +bool trainCx4() { + #define FILENAME "data/output/weights-cx4.bin" + + printf("create neural network\n"); + Layer l(nullptr, Layout(512, 512, 3).expandXY(2)); + Layer *fl[20] = { &l }; + int cnt = 1; + fl[cnt] = new LayerConvShared(l, Layout(257, 257, 24).expandXY(3), Kernel(4, 2, -2)); fl[cnt]->filename = FILENAME "1"; ++cnt; + fl[cnt] = new LayerConvShared(l, Layout(130, 130, 48), Kernel(4, 2, -2)); fl[cnt]->filename = FILENAME "2"; ++cnt; + fl[cnt] = new LayerConvShared(l, Layout( 66, 66, 96), Kernel(4, 2, -2)); fl[cnt]->filename = FILENAME "3"; ++cnt; + //fl[cnt] = new LayerConvShared(l, Layout( 6, 6, 48), Kernel(4, 2, 0)); fl[cnt]->filename = FILENAME "4"; ++cnt; + //fl[cnt] = new LayerConvShared(l, Layout( 2, 2, 96), Kernel(4, 2, 0)); fl[cnt]->filename = FILENAME "5"; ++cnt; + for(int i = cnt-1; i > 0; --i) { + Layer *bl = new LayerDeconvShared(l, fl[i-1]->layout, dynamic_cast*>(fl[i])->kernel, fl[i]->weights); + if (i < cnt-1) fl[i]->skipTrain = bl->skipTrain = true; + } + + l.sumStat().print(); + + printf("try load previously saved network\n"); l.load(); + + SegmentCx4 s(fl[cnt-2]->layout.getD(), fl[cnt-1]->layout.getD(), fl[cnt-1]->weights); + s.filename = fl[cnt-1]->filename; + + TrainerCx4 t; + t.layerFull = &l; + t.layerPre = cnt > 2 ? fl[cnt-2] : nullptr; + t.segment = &s; + t.ratio = 0.000001; + t.threadsCount = 8; + t.measuresPerBlock = 1000; + t.trainsPerBlock = 10000; + t.loadImagesCount = 100; + t.blocksPerLoading = 10; + t.qmin = 0.00001; + t.infile = "data/img512-data.bin"; + t.outfile = FILENAME ".test"; + + t.run(); + + #undef FILENAME + return true; +} + + +int main() { + srand(time(NULL)); + + //return !runTests(); + //return !trainDigits(); + //return !trainDigitsConv(); + //return !trainImage(); + return !trainCx4(); }