kusano 7d535a
/* gzjoin -- command to join gzip files into one gzip file
kusano 7d535a
kusano 7d535a
  Copyright (C) 2004 Mark Adler, all rights reserved
kusano 7d535a
  version 1.0, 11 Dec 2004
kusano 7d535a
kusano 7d535a
  This software is provided 'as-is', without any express or implied
kusano 7d535a
  warranty.  In no event will the author be held liable for any damages
kusano 7d535a
  arising from the use of this software.
kusano 7d535a
kusano 7d535a
  Permission is granted to anyone to use this software for any purpose,
kusano 7d535a
  including commercial applications, and to alter it and redistribute it
kusano 7d535a
  freely, subject to the following restrictions:
kusano 7d535a
kusano 7d535a
  1. The origin of this software must not be misrepresented; you must not
kusano 7d535a
     claim that you wrote the original software. If you use this software
kusano 7d535a
     in a product, an acknowledgment in the product documentation would be
kusano 7d535a
     appreciated but is not required.
kusano 7d535a
  2. Altered source versions must be plainly marked as such, and must not be
kusano 7d535a
     misrepresented as being the original software.
kusano 7d535a
  3. This notice may not be removed or altered from any source distribution.
kusano 7d535a
kusano 7d535a
  Mark Adler    madler@alumni.caltech.edu
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
/*
kusano 7d535a
 * Change history:
kusano 7d535a
 *
kusano 7d535a
 * 1.0  11 Dec 2004     - First version
kusano 7d535a
 * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
/*
kusano 7d535a
   gzjoin takes one or more gzip files on the command line and writes out a
kusano 7d535a
   single gzip file that will uncompress to the concatenation of the
kusano 7d535a
   uncompressed data from the individual gzip files.  gzjoin does this without
kusano 7d535a
   having to recompress any of the data and without having to calculate a new
kusano 7d535a
   crc32 for the concatenated uncompressed data.  gzjoin does however have to
kusano 7d535a
   decompress all of the input data in order to find the bits in the compressed
kusano 7d535a
   data that need to be modified to concatenate the streams.
kusano 7d535a
kusano 7d535a
   gzjoin does not do an integrity check on the input gzip files other than
kusano 7d535a
   checking the gzip header and decompressing the compressed data.  They are
kusano 7d535a
   otherwise assumed to be complete and correct.
kusano 7d535a
kusano 7d535a
   Each joint between gzip files removes at least 18 bytes of previous trailer
kusano 7d535a
   and subsequent header, and inserts an average of about three bytes to the
kusano 7d535a
   compressed data in order to connect the streams.  The output gzip file
kusano 7d535a
   has a minimal ten-byte gzip header with no file name or modification time.
kusano 7d535a
kusano 7d535a
   This program was written to illustrate the use of the Z_BLOCK option of
kusano 7d535a
   inflate() and the crc32_combine() function.  gzjoin will not compile with
kusano 7d535a
   versions of zlib earlier than 1.2.3.
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */</stdio.h>
kusano 7d535a
#include <stdlib.h>     /* exit(), malloc(), free() */</stdlib.h>
kusano 7d535a
#include <fcntl.h>      /* open() */</fcntl.h>
kusano 7d535a
#include <unistd.h>     /* close(), read(), lseek() */</unistd.h>
kusano 7d535a
#include "zlib.h"
kusano 7d535a
    /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
kusano 7d535a
kusano 7d535a
#define local static
kusano 7d535a
kusano 7d535a
/* exit with an error (return a value to allow use in an expression) */
kusano 7d535a
local int bail(char *why1, char *why2)
kusano 7d535a
{
kusano 7d535a
    fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
kusano 7d535a
    exit(1);
kusano 7d535a
    return 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* -- simple buffered file input with access to the buffer -- */
kusano 7d535a
kusano 7d535a
#define CHUNK 32768         /* must be a power of two and fit in unsigned */
kusano 7d535a
kusano 7d535a
/* bin buffered input file type */
kusano 7d535a
typedef struct {
kusano 7d535a
    char *name;             /* name of file for error messages */
kusano 7d535a
    int fd;                 /* file descriptor */
kusano 7d535a
    unsigned left;          /* bytes remaining at next */
kusano 7d535a
    unsigned char *next;    /* next byte to read */
kusano 7d535a
    unsigned char *buf;     /* allocated buffer of length CHUNK */
kusano 7d535a
} bin;
kusano 7d535a
kusano 7d535a
/* close a buffered file and free allocated memory */
kusano 7d535a
local void bclose(bin *in)
kusano 7d535a
{
kusano 7d535a
    if (in != NULL) {
kusano 7d535a
        if (in->fd != -1)
kusano 7d535a
            close(in->fd);
kusano 7d535a
        if (in->buf != NULL)
kusano 7d535a
            free(in->buf);
kusano 7d535a
        free(in);
kusano 7d535a
    }
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* open a buffered file for input, return a pointer to type bin, or NULL on
kusano 7d535a
   failure */
kusano 7d535a
local bin *bopen(char *name)
kusano 7d535a
{
kusano 7d535a
    bin *in;
kusano 7d535a
kusano 7d535a
    in = malloc(sizeof(bin));
kusano 7d535a
    if (in == NULL)
kusano 7d535a
        return NULL;
kusano 7d535a
    in->buf = malloc(CHUNK);
kusano 7d535a
    in->fd = open(name, O_RDONLY, 0);
kusano 7d535a
    if (in->buf == NULL || in->fd == -1) {
kusano 7d535a
        bclose(in);
kusano 7d535a
        return NULL;
kusano 7d535a
    }
kusano 7d535a
    in->left = 0;
kusano 7d535a
    in->next = in->buf;
kusano 7d535a
    in->name = name;
kusano 7d535a
    return in;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
kusano 7d535a
   1 indicating that end-of-file was reached */
kusano 7d535a
local int bload(bin *in)
kusano 7d535a
{
kusano 7d535a
    long len;
kusano 7d535a
kusano 7d535a
    if (in == NULL)
kusano 7d535a
        return -1;
kusano 7d535a
    if (in->left != 0)
kusano 7d535a
        return 0;
kusano 7d535a
    in->next = in->buf;
kusano 7d535a
    do {
kusano 7d535a
        len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
kusano 7d535a
        if (len < 0)
kusano 7d535a
            return -1;
kusano 7d535a
        in->left += (unsigned)len;
kusano 7d535a
    } while (len != 0 && in->left < CHUNK);
kusano 7d535a
    return len == 0 ? 1 : 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* get a byte from the file, bail if end of file */
kusano 7d535a
#define bget(in) (in->left ? 0 : bload(in), \
kusano 7d535a
                  in->left ? (in->left--, *(in->next)++) : \
kusano 7d535a
                    bail("unexpected end of file on ", in->name))
kusano 7d535a
kusano 7d535a
/* get a four-byte little-endian unsigned integer from file */
kusano 7d535a
local unsigned long bget4(bin *in)
kusano 7d535a
{
kusano 7d535a
    unsigned long val;
kusano 7d535a
kusano 7d535a
    val = bget(in);
kusano 7d535a
    val += (unsigned long)(bget(in)) << 8;
kusano 7d535a
    val += (unsigned long)(bget(in)) << 16;
kusano 7d535a
    val += (unsigned long)(bget(in)) << 24;
kusano 7d535a
    return val;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* skip bytes in file */
kusano 7d535a
local void bskip(bin *in, unsigned skip)
kusano 7d535a
{
kusano 7d535a
    /* check pointer */
kusano 7d535a
    if (in == NULL)
kusano 7d535a
        return;
kusano 7d535a
kusano 7d535a
    /* easy case -- skip bytes in buffer */
kusano 7d535a
    if (skip <= in->left) {
kusano 7d535a
        in->left -= skip;
kusano 7d535a
        in->next += skip;
kusano 7d535a
        return;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* skip what's in buffer, discard buffer contents */
kusano 7d535a
    skip -= in->left;
kusano 7d535a
    in->left = 0;
kusano 7d535a
kusano 7d535a
    /* seek past multiples of CHUNK bytes */
kusano 7d535a
    if (skip > CHUNK) {
kusano 7d535a
        unsigned left;
kusano 7d535a
kusano 7d535a
        left = skip & (CHUNK - 1);
kusano 7d535a
        if (left == 0) {
kusano 7d535a
            /* exact number of chunks: seek all the way minus one byte to check
kusano 7d535a
               for end-of-file with a read */
kusano 7d535a
            lseek(in->fd, skip - 1, SEEK_CUR);
kusano 7d535a
            if (read(in->fd, in->buf, 1) != 1)
kusano 7d535a
                bail("unexpected end of file on ", in->name);
kusano 7d535a
            return;
kusano 7d535a
        }
kusano 7d535a
kusano 7d535a
        /* skip the integral chunks, update skip with remainder */
kusano 7d535a
        lseek(in->fd, skip - left, SEEK_CUR);
kusano 7d535a
        skip = left;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* read more input and skip remainder */
kusano 7d535a
    bload(in);
kusano 7d535a
    if (skip > in->left)
kusano 7d535a
        bail("unexpected end of file on ", in->name);
kusano 7d535a
    in->left -= skip;
kusano 7d535a
    in->next += skip;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* -- end of buffered input functions -- */
kusano 7d535a
kusano 7d535a
/* skip the gzip header from file in */
kusano 7d535a
local void gzhead(bin *in)
kusano 7d535a
{
kusano 7d535a
    int flags;
kusano 7d535a
kusano 7d535a
    /* verify gzip magic header and compression method */
kusano 7d535a
    if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
kusano 7d535a
        bail(in->name, " is not a valid gzip file");
kusano 7d535a
kusano 7d535a
    /* get and verify flags */
kusano 7d535a
    flags = bget(in);
kusano 7d535a
    if ((flags & 0xe0) != 0)
kusano 7d535a
        bail("unknown reserved bits set in ", in->name);
kusano 7d535a
kusano 7d535a
    /* skip modification time, extra flags, and os */
kusano 7d535a
    bskip(in, 6);
kusano 7d535a
kusano 7d535a
    /* skip extra field if present */
kusano 7d535a
    if (flags & 4) {
kusano 7d535a
        unsigned len;
kusano 7d535a
kusano 7d535a
        len = bget(in);
kusano 7d535a
        len += (unsigned)(bget(in)) << 8;
kusano 7d535a
        bskip(in, len);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* skip file name if present */
kusano 7d535a
    if (flags & 8)
kusano 7d535a
        while (bget(in) != 0)
kusano 7d535a
            ;
kusano 7d535a
kusano 7d535a
    /* skip comment if present */
kusano 7d535a
    if (flags & 16)
kusano 7d535a
        while (bget(in) != 0)
kusano 7d535a
            ;
kusano 7d535a
kusano 7d535a
    /* skip header crc if present */
kusano 7d535a
    if (flags & 2)
kusano 7d535a
        bskip(in, 2);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* write a four-byte little-endian unsigned integer to out */
kusano 7d535a
local void put4(unsigned long val, FILE *out)
kusano 7d535a
{
kusano 7d535a
    putc(val & 0xff, out);
kusano 7d535a
    putc((val >> 8) & 0xff, out);
kusano 7d535a
    putc((val >> 16) & 0xff, out);
kusano 7d535a
    putc((val >> 24) & 0xff, out);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* Load up zlib stream from buffered input, bail if end of file */
kusano 7d535a
local void zpull(z_streamp strm, bin *in)
kusano 7d535a
{
kusano 7d535a
    if (in->left == 0)
kusano 7d535a
        bload(in);
kusano 7d535a
    if (in->left == 0)
kusano 7d535a
        bail("unexpected end of file on ", in->name);
kusano 7d535a
    strm->avail_in = in->left;
kusano 7d535a
    strm->next_in = in->next;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* Write header for gzip file to out and initialize trailer. */
kusano 7d535a
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
kusano 7d535a
{
kusano 7d535a
    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
kusano 7d535a
    *crc = crc32(0L, Z_NULL, 0);
kusano 7d535a
    *tot = 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* Copy the compressed data from name, zeroing the last block bit of the last
kusano 7d535a
   block if clr is true, and adding empty blocks as needed to get to a byte
kusano 7d535a
   boundary.  If clr is false, then the last block becomes the last block of
kusano 7d535a
   the output, and the gzip trailer is written.  crc and tot maintains the
kusano 7d535a
   crc and length (modulo 2^32) of the output for the trailer.  The resulting
kusano 7d535a
   gzip file is written to out.  gzinit() must be called before the first call
kusano 7d535a
   of gzcopy() to write the gzip header and to initialize crc and tot. */
kusano 7d535a
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
kusano 7d535a
                  FILE *out)
kusano 7d535a
{
kusano 7d535a
    int ret;                /* return value from zlib functions */
kusano 7d535a
    int pos;                /* where the "last block" bit is in byte */
kusano 7d535a
    int last;               /* true if processing the last block */
kusano 7d535a
    bin *in;                /* buffered input file */
kusano 7d535a
    unsigned char *start;   /* start of compressed data in buffer */
kusano 7d535a
    unsigned char *junk;    /* buffer for uncompressed data -- discarded */
kusano 7d535a
    z_off_t len;            /* length of uncompressed data (support > 4 GB) */
kusano 7d535a
    z_stream strm;          /* zlib inflate stream */
kusano 7d535a
kusano 7d535a
    /* open gzip file and skip header */
kusano 7d535a
    in = bopen(name);
kusano 7d535a
    if (in == NULL)
kusano 7d535a
        bail("could not open ", name);
kusano 7d535a
    gzhead(in);
kusano 7d535a
kusano 7d535a
    /* allocate buffer for uncompressed data and initialize raw inflate
kusano 7d535a
       stream */
kusano 7d535a
    junk = malloc(CHUNK);
kusano 7d535a
    strm.zalloc = Z_NULL;
kusano 7d535a
    strm.zfree = Z_NULL;
kusano 7d535a
    strm.opaque = Z_NULL;
kusano 7d535a
    strm.avail_in = 0;
kusano 7d535a
    strm.next_in = Z_NULL;
kusano 7d535a
    ret = inflateInit2(&strm, -15);
kusano 7d535a
    if (junk == NULL || ret != Z_OK)
kusano 7d535a
        bail("out of memory", "");
kusano 7d535a
kusano 7d535a
    /* inflate and copy compressed data, clear last-block bit if requested */
kusano 7d535a
    len = 0;
kusano 7d535a
    zpull(&strm, in);
kusano 7d535a
    start = strm.next_in;
kusano 7d535a
    last = start[0] & 1;
kusano 7d535a
    if (last && clr)
kusano 7d535a
        start[0] &= ~1;
kusano 7d535a
    strm.avail_out = 0;
kusano 7d535a
    for (;;) {
kusano 7d535a
        /* if input used and output done, write used input and get more */
kusano 7d535a
        if (strm.avail_in == 0 && strm.avail_out != 0) {
kusano 7d535a
            fwrite(start, 1, strm.next_in - start, out);
kusano 7d535a
            start = in->buf;
kusano 7d535a
            in->left = 0;
kusano 7d535a
            zpull(&strm, in);
kusano 7d535a
        }
kusano 7d535a
kusano 7d535a
        /* decompress -- return early when end-of-block reached */
kusano 7d535a
        strm.avail_out = CHUNK;
kusano 7d535a
        strm.next_out = junk;
kusano 7d535a
        ret = inflate(&strm, Z_BLOCK);
kusano 7d535a
        switch (ret) {
kusano 7d535a
        case Z_MEM_ERROR:
kusano 7d535a
            bail("out of memory", "");
kusano 7d535a
        case Z_DATA_ERROR:
kusano 7d535a
            bail("invalid compressed data in ", in->name);
kusano 7d535a
        }
kusano 7d535a
kusano 7d535a
        /* update length of uncompressed data */
kusano 7d535a
        len += CHUNK - strm.avail_out;
kusano 7d535a
kusano 7d535a
        /* check for block boundary (only get this when block copied out) */
kusano 7d535a
        if (strm.data_type & 128) {
kusano 7d535a
            /* if that was the last block, then done */
kusano 7d535a
            if (last)
kusano 7d535a
                break;
kusano 7d535a
kusano 7d535a
            /* number of unused bits in last byte */
kusano 7d535a
            pos = strm.data_type & 7;
kusano 7d535a
kusano 7d535a
            /* find the next last-block bit */
kusano 7d535a
            if (pos != 0) {
kusano 7d535a
                /* next last-block bit is in last used byte */
kusano 7d535a
                pos = 0x100 >> pos;
kusano 7d535a
                last = strm.next_in[-1] & pos;
kusano 7d535a
                if (last && clr)
kusano 7d535a
                    strm.next_in[-1] &= ~pos;
kusano 7d535a
            }
kusano 7d535a
            else {
kusano 7d535a
                /* next last-block bit is in next unused byte */
kusano 7d535a
                if (strm.avail_in == 0) {
kusano 7d535a
                    /* don't have that byte yet -- get it */
kusano 7d535a
                    fwrite(start, 1, strm.next_in - start, out);
kusano 7d535a
                    start = in->buf;
kusano 7d535a
                    in->left = 0;
kusano 7d535a
                    zpull(&strm, in);
kusano 7d535a
                }
kusano 7d535a
                last = strm.next_in[0] & 1;
kusano 7d535a
                if (last && clr)
kusano 7d535a
                    strm.next_in[0] &= ~1;
kusano 7d535a
            }
kusano 7d535a
        }
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* update buffer with unused input */
kusano 7d535a
    in->left = strm.avail_in;
kusano 7d535a
    in->next = strm.next_in;
kusano 7d535a
kusano 7d535a
    /* copy used input, write empty blocks to get to byte boundary */
kusano 7d535a
    pos = strm.data_type & 7;
kusano 7d535a
    fwrite(start, 1, in->next - start - 1, out);
kusano 7d535a
    last = in->next[-1];
kusano 7d535a
    if (pos == 0 || !clr)
kusano 7d535a
        /* already at byte boundary, or last file: write last byte */
kusano 7d535a
        putc(last, out);
kusano 7d535a
    else {
kusano 7d535a
        /* append empty blocks to last byte */
kusano 7d535a
        last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
kusano 7d535a
        if (pos & 1) {
kusano 7d535a
            /* odd -- append an empty stored block */
kusano 7d535a
            putc(last, out);
kusano 7d535a
            if (pos == 1)
kusano 7d535a
                putc(0, out);               /* two more bits in block header */
kusano 7d535a
            fwrite("\0\0\xff\xff", 1, 4, out);
kusano 7d535a
        }
kusano 7d535a
        else {
kusano 7d535a
            /* even -- append 1, 2, or 3 empty fixed blocks */
kusano 7d535a
            switch (pos) {
kusano 7d535a
            case 6:
kusano 7d535a
                putc(last | 8, out);
kusano 7d535a
                last = 0;
kusano 7d535a
            case 4:
kusano 7d535a
                putc(last | 0x20, out);
kusano 7d535a
                last = 0;
kusano 7d535a
            case 2:
kusano 7d535a
                putc(last | 0x80, out);
kusano 7d535a
                putc(0, out);
kusano 7d535a
            }
kusano 7d535a
        }
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* update crc and tot */
kusano 7d535a
    *crc = crc32_combine(*crc, bget4(in), len);
kusano 7d535a
    *tot += (unsigned long)len;
kusano 7d535a
kusano 7d535a
    /* clean up */
kusano 7d535a
    inflateEnd(&strm);
kusano 7d535a
    free(junk);
kusano 7d535a
    bclose(in);
kusano 7d535a
kusano 7d535a
    /* write trailer if this is the last gzip file */
kusano 7d535a
    if (!clr) {
kusano 7d535a
        put4(*crc, out);
kusano 7d535a
        put4(*tot, out);
kusano 7d535a
    }
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* join the gzip files on the command line, write result to stdout */
kusano 7d535a
int main(int argc, char **argv)
kusano 7d535a
{
kusano 7d535a
    unsigned long crc, tot;     /* running crc and total uncompressed length */
kusano 7d535a
kusano 7d535a
    /* skip command name */
kusano 7d535a
    argc--;
kusano 7d535a
    argv++;
kusano 7d535a
kusano 7d535a
    /* show usage if no arguments */
kusano 7d535a
    if (argc == 0) {
kusano 7d535a
        fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
kusano 7d535a
              stderr);
kusano 7d535a
        return 0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* join gzip files on command line and write to stdout */
kusano 7d535a
    gzinit(&crc, &tot, stdout);
kusano 7d535a
    while (argc--)
kusano 7d535a
        gzcopy(*argv++, argc, &crc, &tot, stdout);
kusano 7d535a
kusano 7d535a
    /* done */
kusano 7d535a
    return 0;
kusano 7d535a
}