kusano 7d535a
/*
kusano 7d535a
    datagen.c - compressible data generator test tool
kusano 7d535a
    Copyright (C) Yann Collet 2012-2015
kusano 7d535a
kusano 7d535a
    GPL v2 License
kusano 7d535a
kusano 7d535a
    This program is free software; you can redistribute it and/or modify
kusano 7d535a
    it under the terms of the GNU General Public License as published by
kusano 7d535a
    the Free Software Foundation; either version 2 of the License, or
kusano 7d535a
    (at your option) any later version.
kusano 7d535a
kusano 7d535a
    This program is distributed in the hope that it will be useful,
kusano 7d535a
    but WITHOUT ANY WARRANTY; without even the implied warranty of
kusano 7d535a
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
kusano 7d535a
    GNU General Public License for more details.
kusano 7d535a
kusano 7d535a
    You should have received a copy of the GNU General Public License along
kusano 7d535a
    with this program; if not, write to the Free Software Foundation, Inc.,
kusano 7d535a
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
kusano 7d535a
kusano 7d535a
    You can contact the author at :
kusano 7d535a
   - ZSTD source repository : https://github.com/Cyan4973/zstd
kusano 7d535a
   - Public forum : https://groups.google.com/forum/#!forum/lz4c
kusano 7d535a
*/
kusano 7d535a
kusano 7d535a
/**************************************
kusano 7d535a
*  Includes
kusano 7d535a
**************************************/
kusano 7d535a
#include <stdlib.h>    /* malloc */</stdlib.h>
kusano 7d535a
#include <stdio.h>     /* FILE, fwrite */</stdio.h>
kusano 7d535a
#include <string.h>    /* memcpy */</string.h>
kusano 7d535a
kusano 7d535a
kusano 7d535a
/**************************************
kusano 7d535a
*  Basic Types
kusano 7d535a
**************************************/
kusano 7d535a
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
kusano 7d535a
# include <stdint.h></stdint.h>
kusano 7d535a
  typedef  uint8_t BYTE;
kusano 7d535a
  typedef uint16_t U16;
kusano 7d535a
  typedef uint32_t U32;
kusano 7d535a
  typedef  int32_t S32;
kusano 7d535a
  typedef uint64_t U64;
kusano 7d535a
#else
kusano 7d535a
  typedef unsigned char       BYTE;
kusano 7d535a
  typedef unsigned short      U16;
kusano 7d535a
  typedef unsigned int        U32;
kusano 7d535a
  typedef   signed int        S32;
kusano 7d535a
  typedef unsigned long long  U64;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
kusano 7d535a
/**************************************
kusano 7d535a
*  OS-specific Includes
kusano 7d535a
**************************************/
kusano 7d535a
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
kusano 7d535a
#  include <fcntl.h>   /* _O_BINARY */</fcntl.h>
kusano 7d535a
#  include <io.h>      /* _setmode, _isatty */</io.h>
kusano 7d535a
#  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
kusano 7d535a
#else
kusano 7d535a
#  define SET_BINARY_MODE(file)
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
kusano 7d535a
/**************************************
kusano 7d535a
*  Constants
kusano 7d535a
**************************************/
kusano 7d535a
#define KB *(1 <<10)
kusano 7d535a
kusano 7d535a
#define PRIME1   2654435761U
kusano 7d535a
#define PRIME2   2246822519U
kusano 7d535a
kusano 7d535a
kusano 7d535a
/**************************************
kusano 7d535a
*  Local types
kusano 7d535a
**************************************/
kusano 7d535a
#define LTLOG 13
kusano 7d535a
#define LTSIZE (1<
kusano 7d535a
#define LTMASK (LTSIZE-1)
kusano 7d535a
typedef BYTE litDistribTable[LTSIZE];
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*********************************************************
kusano 7d535a
*  Local Functions
kusano 7d535a
*********************************************************/
kusano 7d535a
#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
kusano 7d535a
static unsigned int RDG_rand(U32* src)
kusano 7d535a
{
kusano 7d535a
    U32 rand32 = *src;
kusano 7d535a
    rand32 *= PRIME1;
kusano 7d535a
    rand32 ^= PRIME2;
kusano 7d535a
    rand32  = RDG_rotl32(rand32, 13);
kusano 7d535a
    *src = rand32;
kusano 7d535a
    return rand32;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
kusano 7d535a
{
kusano 7d535a
    U32 i = 0;
kusano 7d535a
    BYTE character = '0';
kusano 7d535a
    BYTE firstChar = '(';
kusano 7d535a
    BYTE lastChar = '}';
kusano 7d535a
kusano 7d535a
    if (ld==0.0)
kusano 7d535a
    {
kusano 7d535a
        character = 0;
kusano 7d535a
        firstChar = 0;
kusano 7d535a
        lastChar =255;
kusano 7d535a
    }
kusano 7d535a
    while (i
kusano 7d535a
    {
kusano 7d535a
        U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
kusano 7d535a
        U32 end;
kusano 7d535a
        if (weight + i > LTSIZE) weight = LTSIZE-i;
kusano 7d535a
        end = i + weight;
kusano 7d535a
        while (i < end) lt[i++] = character;
kusano 7d535a
        character++;
kusano 7d535a
        if (character > lastChar) character = firstChar;
kusano 7d535a
    }
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
static BYTE RDG_genChar(U32* seed, const litDistribTable lt)
kusano 7d535a
{
kusano 7d535a
    U32 id = RDG_rand(seed) & LTMASK;
kusano 7d535a
    return (lt[id]);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
#define RDG_DICTSIZE    (32 KB)
kusano 7d535a
#define RDG_RAND15BITS  ((RDG_rand(seed) >> 3) & 32767)
kusano 7d535a
#define RDG_RANDLENGTH  ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
kusano 7d535a
void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr)
kusano 7d535a
{
kusano 7d535a
    BYTE* buffPtr = (BYTE*)buffer;
kusano 7d535a
    const U32 matchProba32 = (U32)(32768 * matchProba);
kusano 7d535a
    size_t pos = prefixSize;
kusano 7d535a
    U32* seed = seedPtr;
kusano 7d535a
kusano 7d535a
    /* special case */
kusano 7d535a
    while (matchProba >= 1.0)
kusano 7d535a
    {
kusano 7d535a
        size_t size0 = RDG_rand(seed) & 3;
kusano 7d535a
        size0  = (size_t)1 << (16 + size0 * 2);
kusano 7d535a
        size0 += RDG_rand(seed) & (size0-1);   /* because size0 is power of 2*/
kusano 7d535a
        if (buffSize < pos + size0)
kusano 7d535a
        {
kusano 7d535a
            memset(buffPtr+pos, 0, buffSize-pos);
kusano 7d535a
            return;
kusano 7d535a
        }
kusano 7d535a
        memset(buffPtr+pos, 0, size0);
kusano 7d535a
        pos += size0;
kusano 7d535a
        buffPtr[pos-1] = RDG_genChar(seed, lt);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* init */
kusano 7d535a
    if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1;
kusano 7d535a
kusano 7d535a
    /* Generate compressible data */
kusano 7d535a
    while (pos < buffSize)
kusano 7d535a
    {
kusano 7d535a
        /* Select : Literal (char) or Match (within 32K) */
kusano 7d535a
        if (RDG_RAND15BITS < matchProba32)
kusano 7d535a
        {
kusano 7d535a
            /* Copy (within 32K) */
kusano 7d535a
            size_t match;
kusano 7d535a
            size_t d;
kusano 7d535a
            int length = RDG_RANDLENGTH + 4;
kusano 7d535a
            U32 offset = RDG_RAND15BITS + 1;
kusano 7d535a
            if (offset > pos) offset = (U32)pos;
kusano 7d535a
            match = pos - offset;
kusano 7d535a
            d = pos + length;
kusano 7d535a
            if (d > buffSize) d = buffSize;
kusano 7d535a
            while (pos < d) buffPtr[pos++] = buffPtr[match++];
kusano 7d535a
        }
kusano 7d535a
        else
kusano 7d535a
        {
kusano 7d535a
            /* Literal (noise) */
kusano 7d535a
            size_t d;
kusano 7d535a
            size_t length = RDG_RANDLENGTH;
kusano 7d535a
            d = pos + length;
kusano 7d535a
            if (d > buffSize) d = buffSize;
kusano 7d535a
            while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt);
kusano 7d535a
        }
kusano 7d535a
    }
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
kusano 7d535a
{
kusano 7d535a
    litDistribTable lt;
kusano 7d535a
    if (litProba==0.0) litProba = matchProba / 4.5;
kusano 7d535a
    RDG_fillLiteralDistrib(lt, litProba);
kusano 7d535a
    RDG_genBlock(buffer, size, 0, matchProba, lt, &seed);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
#define RDG_BLOCKSIZE (128 KB)
kusano 7d535a
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
kusano 7d535a
{
kusano 7d535a
    BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
kusano 7d535a
    U64 total = 0;
kusano 7d535a
    size_t genBlockSize = RDG_BLOCKSIZE;
kusano 7d535a
    litDistribTable lt;
kusano 7d535a
kusano 7d535a
    /* init */
kusano 7d535a
    if (litProba==0.0) litProba = matchProba / 4.5;
kusano 7d535a
    RDG_fillLiteralDistrib(lt, litProba);
kusano 7d535a
    SET_BINARY_MODE(stdout);
kusano 7d535a
kusano 7d535a
    /* Generate dict */
kusano 7d535a
    RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
kusano 7d535a
kusano 7d535a
    /* Generate compressible data */
kusano 7d535a
    while (total < size)
kusano 7d535a
    {
kusano 7d535a
        RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
kusano 7d535a
        if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
kusano 7d535a
        total += genBlockSize;
kusano 7d535a
        fwrite(buff, 1, genBlockSize, stdout);
kusano 7d535a
        /* update dict */
kusano 7d535a
        memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
kusano 7d535a
    }
kusano 7d535a
}