kusano fc6ab3
/* gzjoin -- command to join gzip files into one gzip file
kusano fc6ab3
kusano fc6ab3
  Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
kusano fc6ab3
  version 1.2, 14 Aug 2012
kusano fc6ab3
kusano fc6ab3
  This software is provided 'as-is', without any express or implied
kusano fc6ab3
  warranty.  In no event will the author be held liable for any damages
kusano fc6ab3
  arising from the use of this software.
kusano fc6ab3
kusano fc6ab3
  Permission is granted to anyone to use this software for any purpose,
kusano fc6ab3
  including commercial applications, and to alter it and redistribute it
kusano fc6ab3
  freely, subject to the following restrictions:
kusano fc6ab3
kusano fc6ab3
  1. The origin of this software must not be misrepresented; you must not
kusano fc6ab3
     claim that you wrote the original software. If you use this software
kusano fc6ab3
     in a product, an acknowledgment in the product documentation would be
kusano fc6ab3
     appreciated but is not required.
kusano fc6ab3
  2. Altered source versions must be plainly marked as such, and must not be
kusano fc6ab3
     misrepresented as being the original software.
kusano fc6ab3
  3. This notice may not be removed or altered from any source distribution.
kusano fc6ab3
kusano fc6ab3
  Mark Adler    madler@alumni.caltech.edu
kusano fc6ab3
 */
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * Change history:
kusano fc6ab3
 *
kusano fc6ab3
 * 1.0  11 Dec 2004     - First version
kusano fc6ab3
 * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
kusano fc6ab3
 * 1.2  14 Aug 2012     - Clean up for z_const usage
kusano fc6ab3
 */
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
   gzjoin takes one or more gzip files on the command line and writes out a
kusano fc6ab3
   single gzip file that will uncompress to the concatenation of the
kusano fc6ab3
   uncompressed data from the individual gzip files.  gzjoin does this without
kusano fc6ab3
   having to recompress any of the data and without having to calculate a new
kusano fc6ab3
   crc32 for the concatenated uncompressed data.  gzjoin does however have to
kusano fc6ab3
   decompress all of the input data in order to find the bits in the compressed
kusano fc6ab3
   data that need to be modified to concatenate the streams.
kusano fc6ab3
kusano fc6ab3
   gzjoin does not do an integrity check on the input gzip files other than
kusano fc6ab3
   checking the gzip header and decompressing the compressed data.  They are
kusano fc6ab3
   otherwise assumed to be complete and correct.
kusano fc6ab3
kusano fc6ab3
   Each joint between gzip files removes at least 18 bytes of previous trailer
kusano fc6ab3
   and subsequent header, and inserts an average of about three bytes to the
kusano fc6ab3
   compressed data in order to connect the streams.  The output gzip file
kusano fc6ab3
   has a minimal ten-byte gzip header with no file name or modification time.
kusano fc6ab3
kusano fc6ab3
   This program was written to illustrate the use of the Z_BLOCK option of
kusano fc6ab3
   inflate() and the crc32_combine() function.  gzjoin will not compile with
kusano fc6ab3
   versions of zlib earlier than 1.2.3.
kusano fc6ab3
 */
kusano fc6ab3
kusano fc6ab3
#include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */</stdio.h>
kusano fc6ab3
#include <stdlib.h>     /* exit(), malloc(), free() */</stdlib.h>
kusano fc6ab3
#include <fcntl.h>      /* open() */</fcntl.h>
kusano fc6ab3
#include <unistd.h>     /* close(), read(), lseek() */</unistd.h>
kusano fc6ab3
#include "zlib.h"
kusano fc6ab3
    /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
kusano fc6ab3
kusano fc6ab3
#define local static
kusano fc6ab3
kusano fc6ab3
/* exit with an error (return a value to allow use in an expression) */
kusano fc6ab3
local int bail(char *why1, char *why2)
kusano fc6ab3
{
kusano fc6ab3
    fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
kusano fc6ab3
    exit(1);
kusano fc6ab3
    return 0;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* -- simple buffered file input with access to the buffer -- */
kusano fc6ab3
kusano fc6ab3
#define CHUNK 32768         /* must be a power of two and fit in unsigned */
kusano fc6ab3
kusano fc6ab3
/* bin buffered input file type */
kusano fc6ab3
typedef struct {
kusano fc6ab3
    char *name;             /* name of file for error messages */
kusano fc6ab3
    int fd;                 /* file descriptor */
kusano fc6ab3
    unsigned left;          /* bytes remaining at next */
kusano fc6ab3
    unsigned char *next;    /* next byte to read */
kusano fc6ab3
    unsigned char *buf;     /* allocated buffer of length CHUNK */
kusano fc6ab3
} bin;
kusano fc6ab3
kusano fc6ab3
/* close a buffered file and free allocated memory */
kusano fc6ab3
local void bclose(bin *in)
kusano fc6ab3
{
kusano fc6ab3
    if (in != NULL) {
kusano fc6ab3
        if (in->fd != -1)
kusano fc6ab3
            close(in->fd);
kusano fc6ab3
        if (in->buf != NULL)
kusano fc6ab3
            free(in->buf);
kusano fc6ab3
        free(in);
kusano fc6ab3
    }
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* open a buffered file for input, return a pointer to type bin, or NULL on
kusano fc6ab3
   failure */
kusano fc6ab3
local bin *bopen(char *name)
kusano fc6ab3
{
kusano fc6ab3
    bin *in;
kusano fc6ab3
kusano fc6ab3
    in = malloc(sizeof(bin));
kusano fc6ab3
    if (in == NULL)
kusano fc6ab3
        return NULL;
kusano fc6ab3
    in->buf = malloc(CHUNK);
kusano fc6ab3
    in->fd = open(name, O_RDONLY, 0);
kusano fc6ab3
    if (in->buf == NULL || in->fd == -1) {
kusano fc6ab3
        bclose(in);
kusano fc6ab3
        return NULL;
kusano fc6ab3
    }
kusano fc6ab3
    in->left = 0;
kusano fc6ab3
    in->next = in->buf;
kusano fc6ab3
    in->name = name;
kusano fc6ab3
    return in;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
kusano fc6ab3
   1 indicating that end-of-file was reached */
kusano fc6ab3
local int bload(bin *in)
kusano fc6ab3
{
kusano fc6ab3
    long len;
kusano fc6ab3
kusano fc6ab3
    if (in == NULL)
kusano fc6ab3
        return -1;
kusano fc6ab3
    if (in->left != 0)
kusano fc6ab3
        return 0;
kusano fc6ab3
    in->next = in->buf;
kusano fc6ab3
    do {
kusano fc6ab3
        len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
kusano fc6ab3
        if (len < 0)
kusano fc6ab3
            return -1;
kusano fc6ab3
        in->left += (unsigned)len;
kusano fc6ab3
    } while (len != 0 && in->left < CHUNK);
kusano fc6ab3
    return len == 0 ? 1 : 0;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* get a byte from the file, bail if end of file */
kusano fc6ab3
#define bget(in) (in->left ? 0 : bload(in), \
kusano fc6ab3
                  in->left ? (in->left--, *(in->next)++) : \
kusano fc6ab3
                    bail("unexpected end of file on ", in->name))
kusano fc6ab3
kusano fc6ab3
/* get a four-byte little-endian unsigned integer from file */
kusano fc6ab3
local unsigned long bget4(bin *in)
kusano fc6ab3
{
kusano fc6ab3
    unsigned long val;
kusano fc6ab3
kusano fc6ab3
    val = bget(in);
kusano fc6ab3
    val += (unsigned long)(bget(in)) << 8;
kusano fc6ab3
    val += (unsigned long)(bget(in)) << 16;
kusano fc6ab3
    val += (unsigned long)(bget(in)) << 24;
kusano fc6ab3
    return val;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* skip bytes in file */
kusano fc6ab3
local void bskip(bin *in, unsigned skip)
kusano fc6ab3
{
kusano fc6ab3
    /* check pointer */
kusano fc6ab3
    if (in == NULL)
kusano fc6ab3
        return;
kusano fc6ab3
kusano fc6ab3
    /* easy case -- skip bytes in buffer */
kusano fc6ab3
    if (skip <= in->left) {
kusano fc6ab3
        in->left -= skip;
kusano fc6ab3
        in->next += skip;
kusano fc6ab3
        return;
kusano fc6ab3
    }
kusano fc6ab3
kusano fc6ab3
    /* skip what's in buffer, discard buffer contents */
kusano fc6ab3
    skip -= in->left;
kusano fc6ab3
    in->left = 0;
kusano fc6ab3
kusano fc6ab3
    /* seek past multiples of CHUNK bytes */
kusano fc6ab3
    if (skip > CHUNK) {
kusano fc6ab3
        unsigned left;
kusano fc6ab3
kusano fc6ab3
        left = skip & (CHUNK - 1);
kusano fc6ab3
        if (left == 0) {
kusano fc6ab3
            /* exact number of chunks: seek all the way minus one byte to check
kusano fc6ab3
               for end-of-file with a read */
kusano fc6ab3
            lseek(in->fd, skip - 1, SEEK_CUR);
kusano fc6ab3
            if (read(in->fd, in->buf, 1) != 1)
kusano fc6ab3
                bail("unexpected end of file on ", in->name);
kusano fc6ab3
            return;
kusano fc6ab3
        }
kusano fc6ab3
kusano fc6ab3
        /* skip the integral chunks, update skip with remainder */
kusano fc6ab3
        lseek(in->fd, skip - left, SEEK_CUR);
kusano fc6ab3
        skip = left;
kusano fc6ab3
    }
kusano fc6ab3
kusano fc6ab3
    /* read more input and skip remainder */
kusano fc6ab3
    bload(in);
kusano fc6ab3
    if (skip > in->left)
kusano fc6ab3
        bail("unexpected end of file on ", in->name);
kusano fc6ab3
    in->left -= skip;
kusano fc6ab3
    in->next += skip;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* -- end of buffered input functions -- */
kusano fc6ab3
kusano fc6ab3
/* skip the gzip header from file in */
kusano fc6ab3
local void gzhead(bin *in)
kusano fc6ab3
{
kusano fc6ab3
    int flags;
kusano fc6ab3
kusano fc6ab3
    /* verify gzip magic header and compression method */
kusano fc6ab3
    if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
kusano fc6ab3
        bail(in->name, " is not a valid gzip file");
kusano fc6ab3
kusano fc6ab3
    /* get and verify flags */
kusano fc6ab3
    flags = bget(in);
kusano fc6ab3
    if ((flags & 0xe0) != 0)
kusano fc6ab3
        bail("unknown reserved bits set in ", in->name);
kusano fc6ab3
kusano fc6ab3
    /* skip modification time, extra flags, and os */
kusano fc6ab3
    bskip(in, 6);
kusano fc6ab3
kusano fc6ab3
    /* skip extra field if present */
kusano fc6ab3
    if (flags & 4) {
kusano fc6ab3
        unsigned len;
kusano fc6ab3
kusano fc6ab3
        len = bget(in);
kusano fc6ab3
        len += (unsigned)(bget(in)) << 8;
kusano fc6ab3
        bskip(in, len);
kusano fc6ab3
    }
kusano fc6ab3
kusano fc6ab3
    /* skip file name if present */
kusano fc6ab3
    if (flags & 8)
kusano fc6ab3
        while (bget(in) != 0)
kusano fc6ab3
            ;
kusano fc6ab3
kusano fc6ab3
    /* skip comment if present */
kusano fc6ab3
    if (flags & 16)
kusano fc6ab3
        while (bget(in) != 0)
kusano fc6ab3
            ;
kusano fc6ab3
kusano fc6ab3
    /* skip header crc if present */
kusano fc6ab3
    if (flags & 2)
kusano fc6ab3
        bskip(in, 2);
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* write a four-byte little-endian unsigned integer to out */
kusano fc6ab3
local void put4(unsigned long val, FILE *out)
kusano fc6ab3
{
kusano fc6ab3
    putc(val & 0xff, out);
kusano fc6ab3
    putc((val >> 8) & 0xff, out);
kusano fc6ab3
    putc((val >> 16) & 0xff, out);
kusano fc6ab3
    putc((val >> 24) & 0xff, out);
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* Load up zlib stream from buffered input, bail if end of file */
kusano fc6ab3
local void zpull(z_streamp strm, bin *in)
kusano fc6ab3
{
kusano fc6ab3
    if (in->left == 0)
kusano fc6ab3
        bload(in);
kusano fc6ab3
    if (in->left == 0)
kusano fc6ab3
        bail("unexpected end of file on ", in->name);
kusano fc6ab3
    strm->avail_in = in->left;
kusano fc6ab3
    strm->next_in = in->next;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* Write header for gzip file to out and initialize trailer. */
kusano fc6ab3
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
kusano fc6ab3
{
kusano fc6ab3
    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
kusano fc6ab3
    *crc = crc32(0L, Z_NULL, 0);
kusano fc6ab3
    *tot = 0;
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* Copy the compressed data from name, zeroing the last block bit of the last
kusano fc6ab3
   block if clr is true, and adding empty blocks as needed to get to a byte
kusano fc6ab3
   boundary.  If clr is false, then the last block becomes the last block of
kusano fc6ab3
   the output, and the gzip trailer is written.  crc and tot maintains the
kusano fc6ab3
   crc and length (modulo 2^32) of the output for the trailer.  The resulting
kusano fc6ab3
   gzip file is written to out.  gzinit() must be called before the first call
kusano fc6ab3
   of gzcopy() to write the gzip header and to initialize crc and tot. */
kusano fc6ab3
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
kusano fc6ab3
                  FILE *out)
kusano fc6ab3
{
kusano fc6ab3
    int ret;                /* return value from zlib functions */
kusano fc6ab3
    int pos;                /* where the "last block" bit is in byte */
kusano fc6ab3
    int last;               /* true if processing the last block */
kusano fc6ab3
    bin *in;                /* buffered input file */
kusano fc6ab3
    unsigned char *start;   /* start of compressed data in buffer */
kusano fc6ab3
    unsigned char *junk;    /* buffer for uncompressed data -- discarded */
kusano fc6ab3
    z_off_t len;            /* length of uncompressed data (support > 4 GB) */
kusano fc6ab3
    z_stream strm;          /* zlib inflate stream */
kusano fc6ab3
kusano fc6ab3
    /* open gzip file and skip header */
kusano fc6ab3
    in = bopen(name);
kusano fc6ab3
    if (in == NULL)
kusano fc6ab3
        bail("could not open ", name);
kusano fc6ab3
    gzhead(in);
kusano fc6ab3
kusano fc6ab3
    /* allocate buffer for uncompressed data and initialize raw inflate
kusano fc6ab3
       stream */
kusano fc6ab3
    junk = malloc(CHUNK);
kusano fc6ab3
    strm.zalloc = Z_NULL;
kusano fc6ab3
    strm.zfree = Z_NULL;
kusano fc6ab3
    strm.opaque = Z_NULL;
kusano fc6ab3
    strm.avail_in = 0;
kusano fc6ab3
    strm.next_in = Z_NULL;
kusano fc6ab3
    ret = inflateInit2(&strm, -15);
kusano fc6ab3
    if (junk == NULL || ret != Z_OK)
kusano fc6ab3
        bail("out of memory", "");
kusano fc6ab3
kusano fc6ab3
    /* inflate and copy compressed data, clear last-block bit if requested */
kusano fc6ab3
    len = 0;
kusano fc6ab3
    zpull(&strm, in);
kusano fc6ab3
    start = in->next;
kusano fc6ab3
    last = start[0] & 1;
kusano fc6ab3
    if (last && clr)
kusano fc6ab3
        start[0] &= ~1;
kusano fc6ab3
    strm.avail_out = 0;
kusano fc6ab3
    for (;;) {
kusano fc6ab3
        /* if input used and output done, write used input and get more */
kusano fc6ab3
        if (strm.avail_in == 0 && strm.avail_out != 0) {
kusano fc6ab3
            fwrite(start, 1, strm.next_in - start, out);
kusano fc6ab3
            start = in->buf;
kusano fc6ab3
            in->left = 0;
kusano fc6ab3
            zpull(&strm, in);
kusano fc6ab3
        }
kusano fc6ab3
kusano fc6ab3
        /* decompress -- return early when end-of-block reached */
kusano fc6ab3
        strm.avail_out = CHUNK;
kusano fc6ab3
        strm.next_out = junk;
kusano fc6ab3
        ret = inflate(&strm, Z_BLOCK);
kusano fc6ab3
        switch (ret) {
kusano fc6ab3
        case Z_MEM_ERROR:
kusano fc6ab3
            bail("out of memory", "");
kusano fc6ab3
        case Z_DATA_ERROR:
kusano fc6ab3
            bail("invalid compressed data in ", in->name);
kusano fc6ab3
        }
kusano fc6ab3
kusano fc6ab3
        /* update length of uncompressed data */
kusano fc6ab3
        len += CHUNK - strm.avail_out;
kusano fc6ab3
kusano fc6ab3
        /* check for block boundary (only get this when block copied out) */
kusano fc6ab3
        if (strm.data_type & 128) {
kusano fc6ab3
            /* if that was the last block, then done */
kusano fc6ab3
            if (last)
kusano fc6ab3
                break;
kusano fc6ab3
kusano fc6ab3
            /* number of unused bits in last byte */
kusano fc6ab3
            pos = strm.data_type & 7;
kusano fc6ab3
kusano fc6ab3
            /* find the next last-block bit */
kusano fc6ab3
            if (pos != 0) {
kusano fc6ab3
                /* next last-block bit is in last used byte */
kusano fc6ab3
                pos = 0x100 >> pos;
kusano fc6ab3
                last = strm.next_in[-1] & pos;
kusano fc6ab3
                if (last && clr)
kusano fc6ab3
                    in->buf[strm.next_in - in->buf - 1] &= ~pos;
kusano fc6ab3
            }
kusano fc6ab3
            else {
kusano fc6ab3
                /* next last-block bit is in next unused byte */
kusano fc6ab3
                if (strm.avail_in == 0) {
kusano fc6ab3
                    /* don't have that byte yet -- get it */
kusano fc6ab3
                    fwrite(start, 1, strm.next_in - start, out);
kusano fc6ab3
                    start = in->buf;
kusano fc6ab3
                    in->left = 0;
kusano fc6ab3
                    zpull(&strm, in);
kusano fc6ab3
                }
kusano fc6ab3
                last = strm.next_in[0] & 1;
kusano fc6ab3
                if (last && clr)
kusano fc6ab3
                    in->buf[strm.next_in - in->buf] &= ~1;
kusano fc6ab3
            }
kusano fc6ab3
        }
kusano fc6ab3
    }
kusano fc6ab3
kusano fc6ab3
    /* update buffer with unused input */
kusano fc6ab3
    in->left = strm.avail_in;
kusano fc6ab3
    in->next = in->buf + (strm.next_in - in->buf);
kusano fc6ab3
kusano fc6ab3
    /* copy used input, write empty blocks to get to byte boundary */
kusano fc6ab3
    pos = strm.data_type & 7;
kusano fc6ab3
    fwrite(start, 1, in->next - start - 1, out);
kusano fc6ab3
    last = in->next[-1];
kusano fc6ab3
    if (pos == 0 || !clr)
kusano fc6ab3
        /* already at byte boundary, or last file: write last byte */
kusano fc6ab3
        putc(last, out);
kusano fc6ab3
    else {
kusano fc6ab3
        /* append empty blocks to last byte */
kusano fc6ab3
        last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
kusano fc6ab3
        if (pos & 1) {
kusano fc6ab3
            /* odd -- append an empty stored block */
kusano fc6ab3
            putc(last, out);
kusano fc6ab3
            if (pos == 1)
kusano fc6ab3
                putc(0, out);               /* two more bits in block header */
kusano fc6ab3
            fwrite("\0\0\xff\xff", 1, 4, out);
kusano fc6ab3
        }
kusano fc6ab3
        else {
kusano fc6ab3
            /* even -- append 1, 2, or 3 empty fixed blocks */
kusano fc6ab3
            switch (pos) {
kusano fc6ab3
            case 6:
kusano fc6ab3
                putc(last | 8, out);
kusano fc6ab3
                last = 0;
kusano fc6ab3
            case 4:
kusano fc6ab3
                putc(last | 0x20, out);
kusano fc6ab3
                last = 0;
kusano fc6ab3
            case 2:
kusano fc6ab3
                putc(last | 0x80, out);
kusano fc6ab3
                putc(0, out);
kusano fc6ab3
            }
kusano fc6ab3
        }
kusano fc6ab3
    }
kusano fc6ab3
kusano fc6ab3
    /* update crc and tot */
kusano fc6ab3
    *crc = crc32_combine(*crc, bget4(in), len);
kusano fc6ab3
    *tot += (unsigned long)len;
kusano fc6ab3
kusano fc6ab3
    /* clean up */
kusano fc6ab3
    inflateEnd(&strm);
kusano fc6ab3
    free(junk);
kusano fc6ab3
    bclose(in);
kusano fc6ab3
kusano fc6ab3
    /* write trailer if this is the last gzip file */
kusano fc6ab3
    if (!clr) {
kusano fc6ab3
        put4(*crc, out);
kusano fc6ab3
        put4(*tot, out);
kusano fc6ab3
    }
kusano fc6ab3
}
kusano fc6ab3
kusano fc6ab3
/* join the gzip files on the command line, write result to stdout */
kusano fc6ab3
int main(int argc, char **argv)
kusano fc6ab3
{
kusano fc6ab3
    unsigned long crc, tot;     /* running crc and total uncompressed length */
kusano fc6ab3
kusano fc6ab3
    /* skip command name */
kusano fc6ab3
    argc--;
kusano fc6ab3
    argv++;
kusano fc6ab3
kusano fc6ab3
    /* show usage if no arguments */
kusano fc6ab3
    if (argc == 0) {
kusano fc6ab3
        fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
kusano fc6ab3
              stderr);
kusano fc6ab3
        return 0;
kusano fc6ab3
    }
kusano fc6ab3
kusano fc6ab3
    /* join gzip files on command line and write to stdout */
kusano fc6ab3
    gzinit(&crc, &tot, stdout);
kusano fc6ab3
    while (argc--)
kusano fc6ab3
        gzcopy(*argv++, argc, &crc, &tot, stdout);
kusano fc6ab3
kusano fc6ab3
    /* done */
kusano fc6ab3
    return 0;
kusano fc6ab3
}