diff --git a/arch.c b/arch.c index e4b1947..e7e384f 100644 --- a/arch.c +++ b/arch.c @@ -4,6 +4,7 @@ #include #include #include +//#include enum { @@ -13,7 +14,7 @@ enum { int arch_init(Arch *a) { memset(a, 0, sizeof(*a)); - a->entries = (Entry*)calloc(sizeof(Entry*), DICT_SIZE); + a->entries = (Entry*)calloc(sizeof(Entry), DICT_SIZE); if (!a->entries) return RES_FAIL; for(int i = 0; i < 256; ++i) { Entry *e = a->entries + i; @@ -33,14 +34,17 @@ void arch_deinit(Arch *a) { } -static Entry* arch_entry_use(Arch *a, Entry *e) { - if (!e->next) return e; - if (e->prev) e->prev->next = e->next; - else a->first = e->next; - e->next->prev = e->prev; - e->prev = a->last; - e->next = NULL; - return a->last = a->last->next = e; +static void arch_entry_use(Arch *a, Entry *e) { + if (!e->parent) return; + if (e->next) { + if (e->prev) e->prev->next = e->next; + else a->first = e->next; + e->next->prev = e->prev; + e->prev = a->last; + e->next = NULL; + a->last = a->last->next = e; + } + arch_entry_use(a, e->parent); } @@ -51,31 +55,39 @@ static Entry* arch_entry_add(Arch *a, Entry *parent, Byte value) { ++a->nextcode; if (!(nextcode & a->nextcode)) ++a->codebits; assert(a->codebits <= 16); - e = &a->entries[nextcode]; + assert(nextcode < DICT_SIZE); + e = a->entries + nextcode; memset(e->children, 0xff, sizeof(e->children)); + e->prev = a->last; e->next = NULL; + if (a->last) a->last->next = e; + else a->first = e; + a->last = e; } else { + arch_entry_use(a, parent); + e = a->first; assert(e); - assert(!e->prev); - assert(e->next); - a->first = e->next; - e->next->prev = NULL; - + assert(e != parent); assert(e->parent); assert(e->parent->children[e->value] == e - a->entries); e->parent->children[e->value] = NO_CODE; + + //assert(!e->prev); + //assert(e->next); + //a->first = e->next; e->prev = a->last; + //e->next = e->next->prev = NULL; + //a->last = a->last->next = e; } assert(parent); assert(parent->children[value] == NO_CODE); + e->value = value; e->parent = parent; parent->children[value] = e - a->entries; + //printf("add code: %d\n", (int)(e - a->entries)); + //for(int i = 0; i < 256; ++i) assert(e->children[i] == NO_CODE); - e->prev = a->last; e->next = NULL; - if (a->last) a->last->next = e; - else a->first = e; - a->last = e; - + arch_entry_use(a, e); return e; } @@ -83,27 +95,30 @@ static Entry* arch_entry_add(Arch *a, Entry *parent, Byte value) { int arch_deflate(Arch *a, int final) { if (a->rb.bits) return RES_FAIL; while(1) { - if (a->wb.end - a->wb.cur < 4) return RES_RETRY; + if (a->wb.end - a->wb.cur < 5) return RES_RETRY; Entry *e = &a->root; Byte *bp = a->rb.cur; + Bits codebits = a->codebits; while(1) { Byte b; if (!buf_read_byte(&a->rb, &b)) { - if (final) break; + if (final) { arch_entry_use(a, e); break; } a->rb.cur = bp; break; } if (e->children[b] == NO_CODE) { e = arch_entry_add(a, e, b); break; } - e = arch_entry_use(a, a->entries + e->children[b]); + e = a->entries + e->children[b]; } if (bp == a->rb.cur) break; assert(e); assert(e != &a->root); - Code code = e - a->entries; - if (code < 256) code = a->nextcode; else code = e->parent - a->entries; - Word w = (code << 8) | e->value; - if ( !buf_write_bits(&a->wb, w, a->codebits + 8) ) return RES_FAIL; + + Code code; Byte b = e->value; + if (e->parent) { code = e->parent - a->entries; } + else { code = a->nextcode; assert(final); } + //printf("pos: %d/%d, code %d/%d, byte %d (%c)\n", (int)(a->rb.cur - a->rb.begin), (int)(a->rb.end - a->rb.begin), (int)code, (int)a->nextcode, (int)b, (char)b); + if ( !buf_write_bits(&a->wb, (((Word)code) << 8) | b, codebits + 8) ) return RES_FAIL; } if (final && a->rb.cur >= a->rb.end) { buf_write_pad(&a->wb); return RES_DONE; } @@ -117,6 +132,7 @@ static int arch_entry_write(Arch *a, Entry *e, TinySize count) { } else { if (!arch_entry_write(a, e->parent, count+1)) return 0; } + //fputc(e->value, stdout); buf_write_byte(&a->wb, e->value); return 1; } @@ -128,28 +144,29 @@ int arch_inflate(Arch *a, int final) { Byte *bp = a->rb.cur; Bits bb = a->rb.bits; - Word code, b; - if (!buf_read_bits(&a->rb, &code, a->codebits)) break; - int hasbyte = 0; - if (code >= a->nextcode) { - if (!buf_read_bits(&a->rb, &code, a->codebits)) break; - if (code >= a->nextcode) return RES_FAIL; - } else { - if (!buf_read_bits(&a->rb, &b, 8)) break; - hasbyte = 1; - } + Word w; + if (!buf_read_bits(&a->rb, &w, a->codebits + 8)) break; + Code code = w >> 8; + Byte b = w; - Entry *e = &a->entries[code]; - if (!arch_entry_write(a, e, hasbyte)) { - a->rb.cur = bp; - a->rb.bits = bb; - return RES_RETRY; + //printf("pos: %d/%d + %d, code %d/%d, byte %d (%c)\n", (int)(a->rb.cur - a->rb.begin), (int)(a->rb.end - a->rb.begin), (int)a->rb.bits, (int)code, (int)a->nextcode, (int)b, (char)b); + if (code < a->nextcode) { + Entry *e = &a->entries[code]; + if (!arch_entry_write(a, e, 1)) { + a->rb.cur = bp; + a->rb.bits = bb; + return RES_RETRY; + } + //fputc(b, stdout); fputc('\n', stdout); + if (e->children[b] == NO_CODE) e = arch_entry_add(a, e, b); else arch_entry_use(a, e); + } else + if (code != a->nextcode) { + return RES_FAIL; + } else { + //printf("pos: %d/%d + %d, code %d/%d, byte %d (%c)\n", (int)(a->rb.cur - a->rb.begin), (int)(a->rb.end - a->rb.begin), (int)a->rb.bits, (int)code, (int)a->nextcode, (int)b, (char)b); } - if (hasbyte) { - if (!buf_write_byte(&a->wb, b)) return RES_FAIL; - arch_entry_add(a, e, b); - } + if (!buf_write_byte(&a->wb, b)) return RES_FAIL; } if (final) { buf_read_pad(&a->rb); return RES_DONE; } diff --git a/buffer.c b/buffer.c index 25bdd49..ce211c0 100644 --- a/buffer.c +++ b/buffer.c @@ -2,6 +2,7 @@ #include "buffer.h" #include +#include // read @@ -9,8 +10,8 @@ int buf_read_bits(Buffer *b, Word *word, Bits bits) { assert(b); if (!bits) return 1; - TinySize remain = b->end - b->cur; - if (remain*8 - bits < bits || bits > 32 || !*word) return 0; + size_t remain = b->end - b->cur; + if ((remain < 16 && remain*8 - b->bits < bits) || bits > 32 || !word) return 0; Word w; Byte *c = b->cur; @@ -53,7 +54,7 @@ void buf_read_pad(Buffer *b) { if (b->bits) { assert(b->cur < b->end); assert(b->bits < 8); - ++*b->cur; + ++b->cur; b->bits = 0; } } @@ -61,7 +62,7 @@ void buf_read_pad(Buffer *b) { int buf_read_byte(Buffer *b, Byte *byte) { if (b->bits) buf_read_pad(b); - if (b->cur >= b->end || !*byte) return 0; + if (b->cur >= b->end || !byte) return 0; *byte = *b->cur++; return 1; } @@ -81,11 +82,12 @@ int buf_write_bits(Buffer *b, Word word, Bits bits) { Bits bb = b->bits; if (bb) { assert(bb < 8); - c[0] = (c[0] << (8-bb)) | (word >> (bb+24)); + Bits bbi = 8 - bb; + c[0] = ((c[0] >> bbi) << bbi) | (word >> (bb+24)); c[1] = word >> (bb+16); c[2] = word >> (bb+8); c[3] = word >> bb; - c[4] = word; + c[4] = word << bbi; } else { c[0] = word >> 24; c[1] = word >> 16; @@ -105,8 +107,9 @@ void buf_write_pad(Buffer *b) { if (b->bits) { assert(b->cur < b->end); assert(b->bits < 8); - *b->cur <<= 8 - b->bits; - ++*b->cur; + Bits bb = 8 - b->bits; + *b->cur = (*b->cur >> bb) << bb; + ++b->cur; b->bits = 0; } } diff --git a/build.sh b/build.sh index 3c9b337..d2da42a 100755 --- a/build.sh +++ b/build.sh @@ -1,4 +1,7 @@ #!/bin/bash +set -e + cc -Wall main.c -o arch +echo "built" \ No newline at end of file diff --git a/main.c b/main.c index cda0bd2..981bf80 100644 --- a/main.c +++ b/main.c @@ -11,13 +11,25 @@ void die(const char *msg) { fputs(msg, stderr); + fputc('\n', stderr); fflush(stderr); exit(1); } void usage() - { die( "Usage: arch deflate|inflate \n" ); } + { die( "Usage: arch deflate|inflate " ); } + + +void print_buffer(const char *name, const Buffer *b) { + printf("%s: %p, size: %d, processed: %d, bits: %d, remain: %d\n", + name, + b->begin, + (int)(b->end - b->begin), + (int)(b->cur - b->begin), + (int)b->bits, + (int)(b->end - b->cur) ); +} int main(int argc, char **argv) { @@ -29,7 +41,7 @@ int main(int argc, char **argv) { FILE *fi = fopen(argv[2], "rb"); if (!fi) die("cannot open input file"); - FILE *fo = fopen(argv[3], "rb"); + FILE *fo = fopen(argv[3], "wb"); if (!fo) die("cannot open output file"); Arch a; @@ -38,9 +50,8 @@ int main(int argc, char **argv) { size_t size = 1024*1024; Byte *buf0 = malloc(2*size), *buf1 = buf0 + size; if (!buf0) die("not enough memory"); - a.rb.begin = a.rb.cur = buf0; - a.wb.begin = a.wb.cur = buf1; - a.wb.end = a.wb.begin + size; + a.rb.begin = a.rb.cur = a.rb.end = buf0; + a.wb.begin = a.wb.cur = buf1; a.wb.end = a.wb.begin + size; int res = RES_RETRY; while(res == RES_RETRY) { @@ -49,7 +60,7 @@ int main(int argc, char **argv) { size_t s = a.rb.end - a.rb.cur; memmove(a.rb.begin, a.rb.cur, s); a.rb.cur = a.rb.begin; - a.rb.end -= s; + a.rb.end = a.rb.cur + s; // read new data into a remaining part of the buffer assert(a.rb.begin + size >= a.rb.end); @@ -58,15 +69,20 @@ int main(int argc, char **argv) { if (!s && ferror(fi)) die("cannot read from the input file"); a.rb.end += s; + //print_buffer("read buffer", &a.rb); + // process data res = mode ? arch_inflate(&a, !s) : arch_deflate(&a, !s); if (res == RES_FAIL) die("arch failed"); + //printf("res: %d\n", res); + //print_buffer("write buffer", &a.wb); + // write results - if (a.wb.cur > a.rb.begin) { - if (!fwrite(a.rb.begin, a.wb.cur - a.rb.begin, 1, fo)) die("cannot write into the output file"); - if (a.wb.bits) *a.rb.begin = *a.wb.cur; - a.wb.cur = a.rb.begin; + if (a.wb.cur > a.wb.begin) { + if (!fwrite(a.wb.begin, a.wb.cur - a.wb.begin, 1, fo)) die("cannot write into the output file"); + if (a.wb.bits) *a.wb.begin = *a.wb.cur; + a.wb.cur = a.wb.begin; } } diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..4bf3e24 --- /dev/null +++ b/test.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e + +[ -n "$1" ] || ( echo "file arg required"; false ) + +./build.sh + + +echo "arch $1 -> $1.ar" +./arch deflate "$1" "$1.ar" +echo "archived" + +echo "extract $1.ar -> $1.copy" +./arch inflate "$1.ar" "$1.copy" +echo "extracted" + + +echo "compare with original" +cmp "$1" "$1.copy" || ( echo "ERROR: files differ"; false ) +echo "files are equal" + +echo "done" \ No newline at end of file