From 9953f6caffc56ec053630f433d3e6a6db70c920d Mon Sep 17 00:00:00 2001 From: Ivan Mahonin Date: Aug 15 2024 07:17:45 +0000 Subject: initial commit --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aca8da4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +arch \ No newline at end of file diff --git a/arch.c b/arch.c new file mode 100644 index 0000000..e4b1947 --- /dev/null +++ b/arch.c @@ -0,0 +1,158 @@ + +#include "arch.h" + +#include +#include +#include + + +enum { + NO_CODE = 0xffff, + DICT_SIZE = 0xffff }; + + +int arch_init(Arch *a) { + memset(a, 0, sizeof(*a)); + a->entries = (Entry*)calloc(sizeof(Entry*), DICT_SIZE); + if (!a->entries) return RES_FAIL; + for(int i = 0; i < 256; ++i) { + Entry *e = a->entries + i; + memset(e->children, 0xff, sizeof(e->children)); + e->value = i; + a->root.children[i] = i; + } + a->nextcode = 0x100; + a->codebits = 9; + return RES_DONE; +} + + +void arch_deinit(Arch *a) { + free(a->entries); + memset(a, 0, sizeof(*a)); +} + + +static Entry* arch_entry_use(Arch *a, Entry *e) { + if (!e->next) return e; + if (e->prev) e->prev->next = e->next; + else a->first = e->next; + e->next->prev = e->prev; + e->prev = a->last; + e->next = NULL; + return a->last = a->last->next = e; +} + + +static Entry* arch_entry_add(Arch *a, Entry *parent, Byte value) { + Entry *e; + Code nextcode = a->nextcode; + if (nextcode < DICT_SIZE) { + ++a->nextcode; + if (!(nextcode & a->nextcode)) ++a->codebits; + assert(a->codebits <= 16); + e = &a->entries[nextcode]; + memset(e->children, 0xff, sizeof(e->children)); + } else { + e = a->first; + assert(e); + assert(!e->prev); + assert(e->next); + a->first = e->next; + e->next->prev = NULL; + + assert(e->parent); + assert(e->parent->children[e->value] == e - a->entries); + e->parent->children[e->value] = NO_CODE; + } + + assert(parent); + assert(parent->children[value] == NO_CODE); + e->parent = parent; + parent->children[value] = e - a->entries; + + e->prev = a->last; e->next = NULL; + if (a->last) a->last->next = e; + else a->first = e; + a->last = e; + + return e; +} + + +int arch_deflate(Arch *a, int final) { + if (a->rb.bits) return RES_FAIL; + while(1) { + if (a->wb.end - a->wb.cur < 4) return RES_RETRY; + Entry *e = &a->root; + Byte *bp = a->rb.cur; + while(1) { + Byte b; + if (!buf_read_byte(&a->rb, &b)) { + if (final) break; + a->rb.cur = bp; + break; + } + if (e->children[b] == NO_CODE) { e = arch_entry_add(a, e, b); break; } + e = arch_entry_use(a, a->entries + e->children[b]); + } + if (bp == a->rb.cur) break; + + assert(e); + assert(e != &a->root); + Code code = e - a->entries; + if (code < 256) code = a->nextcode; else code = e->parent - a->entries; + Word w = (code << 8) | e->value; + if ( !buf_write_bits(&a->wb, w, a->codebits + 8) ) return RES_FAIL; + } + + if (final && a->rb.cur >= a->rb.end) { buf_write_pad(&a->wb); return RES_DONE; } + return RES_RETRY; +} + + +static int arch_entry_write(Arch *a, Entry *e, TinySize count) { + if (!e->parent) { + if (a->wb.end - a->wb.cur < count) return 0; + } else { + if (!arch_entry_write(a, e->parent, count+1)) return 0; + } + buf_write_byte(&a->wb, e->value); + return 1; +} + + +int arch_inflate(Arch *a, int final) { + if (a->wb.bits) return RES_FAIL; + while(1) { + Byte *bp = a->rb.cur; + Bits bb = a->rb.bits; + + Word code, b; + if (!buf_read_bits(&a->rb, &code, a->codebits)) break; + int hasbyte = 0; + if (code >= a->nextcode) { + if (!buf_read_bits(&a->rb, &code, a->codebits)) break; + if (code >= a->nextcode) return RES_FAIL; + } else { + if (!buf_read_bits(&a->rb, &b, 8)) break; + hasbyte = 1; + } + + Entry *e = &a->entries[code]; + if (!arch_entry_write(a, e, hasbyte)) { + a->rb.cur = bp; + a->rb.bits = bb; + return RES_RETRY; + } + + if (hasbyte) { + if (!buf_write_byte(&a->wb, b)) return RES_FAIL; + arch_entry_add(a, e, b); + } + } + + if (final) { buf_read_pad(&a->rb); return RES_DONE; } + return RES_RETRY; +} + diff --git a/arch.h b/arch.h new file mode 100644 index 0000000..132e4b6 --- /dev/null +++ b/arch.h @@ -0,0 +1,39 @@ +#ifndef ARCH_H +#define ARCH_H + + +#include "buffer.h" + + +enum { + RES_RETRY, + RES_DONE, + RES_FAIL +}; + + +typedef unsigned short Code; + + +typedef struct Entry { + struct Entry *parent, *prev, *next; + Code children[256]; + Byte value; +} Entry; + + +typedef struct Arch { + Buffer rb, wb; + Entry root, *entries, *first, *last; + Code nextcode; + unsigned int codebits; +} Arch; + + +int arch_init(Arch *a); +void arch_deinit(Arch *a); +int arch_deflate(Arch *a, int final); +int arch_inflate(Arch *a, int final); + + +#endif \ No newline at end of file diff --git a/buffer.c b/buffer.c new file mode 100644 index 0000000..25bdd49 --- /dev/null +++ b/buffer.c @@ -0,0 +1,121 @@ + +#include "buffer.h" + +#include + + +// read + +int buf_read_bits(Buffer *b, Word *word, Bits bits) { + assert(b); + if (!bits) return 1; + TinySize remain = b->end - b->cur; + if (remain*8 - bits < bits || bits > 32 || !*word) return 0; + + Word w; + Byte *c = b->cur; + Bits bb = b->bits; + if (bb) { + assert(bb < 8); + w = ( ((Code)c[0]) << (bb+24) ); + if (remain > 4) { + w |= ( ((Code)c[1]) << (bb+16) ); + w |= ( ((Code)c[2]) << (bb+8) ); + w |= ( ((Code)c[3]) << bb ); + w |= ( ((Code)c[4]) >> (8-bb) ); + } else { + if (remain > 1) w |= ( ((Code)c[1]) << (bb+16) ); + if (remain > 2) w |= ( ((Code)c[2]) << (bb+8) ); + if (remain > 3) w |= ( ((Code)c[3]) << bb ); + } + } else { + w = ( ((Code)c[0]) << 24 ); + if (remain > 3) { + w |= ( ((Code)c[1]) << 16 ); + w |= ( ((Code)c[2]) << 8 ); + w |= ( ((Code)c[3]) ); + } else { + if (remain > 1) w |= ( ((Code)c[1]) << 16 ); + if (remain > 2) w |= ( ((Code)c[2]) << 8 ); + } + } + + bb += bits; + b->cur += bb >> 3; + b->bits = bb & 0x7; + + *word = w >> (32 - bits); + return 1; +} + + +void buf_read_pad(Buffer *b) { + if (b->bits) { + assert(b->cur < b->end); + assert(b->bits < 8); + ++*b->cur; + b->bits = 0; + } +} + + +int buf_read_byte(Buffer *b, Byte *byte) { + if (b->bits) buf_read_pad(b); + if (b->cur >= b->end || !*byte) return 0; + *byte = *b->cur++; + return 1; +} + + + +// write + +int buf_write_bits(Buffer *b, Word word, Bits bits) { + assert(b); + if (!bits) return 1; + if (b->end - b->cur < 5 || bits > 32) return 0; + + word <<= 32 - bits; + + Byte *c = b->cur; + Bits bb = b->bits; + if (bb) { + assert(bb < 8); + c[0] = (c[0] << (8-bb)) | (word >> (bb+24)); + c[1] = word >> (bb+16); + c[2] = word >> (bb+8); + c[3] = word >> bb; + c[4] = word; + } else { + c[0] = word >> 24; + c[1] = word >> 16; + c[2] = word >> 8; + c[3] = word; + } + + bb += bits; + b->cur += bb >> 3; + b->bits = bb & 0x7; + + return 1; +} + + +void buf_write_pad(Buffer *b) { + if (b->bits) { + assert(b->cur < b->end); + assert(b->bits < 8); + *b->cur <<= 8 - b->bits; + ++*b->cur; + b->bits = 0; + } +} + + +int buf_write_byte(Buffer *b, Byte byte) { + if (b->bits) buf_write_pad(b); + if (b->cur >= b->end) return 0; + *b->cur++ = byte; + return 1; +} + diff --git a/buffer.h b/buffer.h new file mode 100644 index 0000000..9cc794f --- /dev/null +++ b/buffer.h @@ -0,0 +1,26 @@ +#ifndef BUFFER_H +#define BUFFER_H + + +typedef unsigned int Word; +typedef unsigned char Byte; +typedef unsigned int Bits; +typedef Bits TinySize; + + +typedef struct Buffer { + Byte *begin, *end, *cur; + Bits bits; +} Buffer; + + +int buf_read_bits(Buffer *b, Word *word, Bits bits); +void buf_read_pad(Buffer *b); +int buf_read_byte(Buffer *b, Byte *byte); + +int buf_write_bits(Buffer *b, Word word, Bits bits); +void buf_write_pad(Buffer *b); +int buf_write_byte(Buffer *b, Byte byte); + + +#endif \ No newline at end of file diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..3c9b337 --- /dev/null +++ b/build.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +cc -Wall main.c -o arch + diff --git a/main.c b/main.c new file mode 100644 index 0000000..cda0bd2 --- /dev/null +++ b/main.c @@ -0,0 +1,79 @@ + +#include "arch.h" + +#include "buffer.c" +#include "arch.c" + +#include +#include +#include + + +void die(const char *msg) { + fputs(msg, stderr); + fflush(stderr); + exit(1); +} + + +void usage() + { die( "Usage: arch deflate|inflate \n" ); } + + +int main(int argc, char **argv) { + if (argc != 4) usage(); + + int mode; + if (!strcmp(argv[1], "deflate")) mode = 0; else + if (!strcmp(argv[1], "inflate")) mode = 1; else usage(); + + FILE *fi = fopen(argv[2], "rb"); + if (!fi) die("cannot open input file"); + FILE *fo = fopen(argv[3], "rb"); + if (!fo) die("cannot open output file"); + + Arch a; + if (RES_DONE != arch_init(&a)) die("cannot init arch"); + + size_t size = 1024*1024; + Byte *buf0 = malloc(2*size), *buf1 = buf0 + size; + if (!buf0) die("not enough memory"); + a.rb.begin = a.rb.cur = buf0; + a.wb.begin = a.wb.cur = buf1; + a.wb.end = a.wb.begin + size; + + int res = RES_RETRY; + while(res == RES_RETRY) { + // move previously read data into a beginning of the buffer + assert(a.rb.cur <= a.rb.end); + size_t s = a.rb.end - a.rb.cur; + memmove(a.rb.begin, a.rb.cur, s); + a.rb.cur = a.rb.begin; + a.rb.end -= s; + + // read new data into a remaining part of the buffer + assert(a.rb.begin + size >= a.rb.end); + s = a.rb.begin + size - a.rb.end; + s = fread(a.rb.end, 1, s, fi); + if (!s && ferror(fi)) die("cannot read from the input file"); + a.rb.end += s; + + // process data + res = mode ? arch_inflate(&a, !s) : arch_deflate(&a, !s); + if (res == RES_FAIL) die("arch failed"); + + // write results + if (a.wb.cur > a.rb.begin) { + if (!fwrite(a.rb.begin, a.wb.cur - a.rb.begin, 1, fo)) die("cannot write into the output file"); + if (a.wb.bits) *a.rb.begin = *a.wb.cur; + a.wb.cur = a.rb.begin; + } + } + + arch_deinit(&a); + free(buf0); + fclose(fo); + fclose(fi); + return 0; +} +