roentgen b75cab
/* $Id: iptcutil.c,v 1.8 2011-05-08 00:44:18 fwarmerdam Exp $ */
roentgen b75cab
roentgen b75cab
#include "tif_config.h"
roentgen b75cab
roentgen b75cab
#include <stdio.h></stdio.h>
roentgen b75cab
#include <stdlib.h></stdlib.h>
roentgen b75cab
#include <string.h></string.h>
roentgen b75cab
#include <ctype.h></ctype.h>
roentgen b75cab
roentgen b75cab
#ifdef HAVE_STRINGS_H
roentgen b75cab
# include <strings.h></strings.h>
roentgen b75cab
#endif
roentgen b75cab
roentgen b75cab
#ifdef HAVE_IO_H
roentgen b75cab
# include <io.h></io.h>
roentgen b75cab
#endif
roentgen b75cab
roentgen b75cab
#ifdef HAVE_FCNTL_H
roentgen b75cab
# include <fcntl.h></fcntl.h>
roentgen b75cab
#endif
roentgen b75cab
roentgen b75cab
#ifdef WIN32
roentgen b75cab
#define STRNICMP strnicmp
roentgen b75cab
#else 
roentgen b75cab
#define STRNICMP strncasecmp
roentgen b75cab
#endif 
roentgen b75cab
roentgen b75cab
typedef struct _tag_spec
roentgen b75cab
{
roentgen b75cab
  short
roentgen b75cab
    id;
roentgen b75cab
roentgen b75cab
  char
roentgen b75cab
    *name;
roentgen b75cab
} tag_spec;
roentgen b75cab
roentgen b75cab
static tag_spec tags[] = {
roentgen b75cab
    { 5,"Image Name" },
roentgen b75cab
    { 7,"Edit Status" },
roentgen b75cab
    { 10,"Priority" },
roentgen b75cab
    { 15,"Category" },
roentgen b75cab
    { 20,"Supplemental Category" },
roentgen b75cab
    { 22,"Fixture Identifier" },
roentgen b75cab
    { 25,"Keyword" },
roentgen b75cab
    { 30,"Release Date" },
roentgen b75cab
    { 35,"Release Time" },
roentgen b75cab
    { 40,"Special Instructions" },
roentgen b75cab
    { 45,"Reference Service" },
roentgen b75cab
    { 47,"Reference Date" },
roentgen b75cab
    { 50,"Reference Number" },
roentgen b75cab
    { 55,"Created Date" },
roentgen b75cab
    { 60,"Created Time" },
roentgen b75cab
    { 65,"Originating Program" },
roentgen b75cab
    { 70,"Program Version" },
roentgen b75cab
    { 75,"Object Cycle" },
roentgen b75cab
    { 80,"Byline" },
roentgen b75cab
    { 85,"Byline Title" },
roentgen b75cab
    { 90,"City" },
roentgen b75cab
    { 95,"Province State" },
roentgen b75cab
    { 100,"Country Code" },
roentgen b75cab
    { 101,"Country" },
roentgen b75cab
    { 103,"Original Transmission Reference" },
roentgen b75cab
    { 105,"Headline" },
roentgen b75cab
    { 110,"Credit" },
roentgen b75cab
    { 115,"Source" },
roentgen b75cab
    { 116,"Copyright String" },
roentgen b75cab
    { 120,"Caption" },
roentgen b75cab
    { 121,"Local Caption" },
roentgen b75cab
    { 122,"Caption Writer" },
roentgen b75cab
    { 200,"Custom Field 1" },
roentgen b75cab
    { 201,"Custom Field 2" },
roentgen b75cab
    { 202,"Custom Field 3" },
roentgen b75cab
    { 203,"Custom Field 4" },
roentgen b75cab
    { 204,"Custom Field 5" },
roentgen b75cab
    { 205,"Custom Field 6" },
roentgen b75cab
    { 206,"Custom Field 7" },
roentgen b75cab
    { 207,"Custom Field 8" },
roentgen b75cab
    { 208,"Custom Field 9" },
roentgen b75cab
    { 209,"Custom Field 10" },
roentgen b75cab
    { 210,"Custom Field 11" },
roentgen b75cab
    { 211,"Custom Field 12" },
roentgen b75cab
    { 212,"Custom Field 13" },
roentgen b75cab
    { 213,"Custom Field 14" },
roentgen b75cab
    { 214,"Custom Field 15" },
roentgen b75cab
    { 215,"Custom Field 16" },
roentgen b75cab
    { 216,"Custom Field 17" },
roentgen b75cab
    { 217,"Custom Field 18" },
roentgen b75cab
    { 218,"Custom Field 19" },
roentgen b75cab
    { 219,"Custom Field 20" }
roentgen b75cab
};
roentgen b75cab
roentgen b75cab
/*
roentgen b75cab
 * We format the output using HTML conventions
roentgen b75cab
 * to preserve control characters and such.
roentgen b75cab
 */
roentgen b75cab
void formatString(FILE *ofile, const char *s, int len)
roentgen b75cab
{
roentgen b75cab
  putc('"', ofile);
roentgen b75cab
  for (; len > 0; --len, ++s) {
roentgen b75cab
    int c = *s;
roentgen b75cab
    switch (c) {
roentgen b75cab
    case '&':
roentgen b75cab
      fputs("&", ofile);
roentgen b75cab
      break;
roentgen b75cab
#ifdef HANDLE_GT_LT
roentgen b75cab
    case '<':
roentgen b75cab
      fputs("<", ofile);
roentgen b75cab
      break;
roentgen b75cab
    case '>':
roentgen b75cab
      fputs(">", ofile);
roentgen b75cab
      break;
roentgen b75cab
#endif
roentgen b75cab
    case '"':
roentgen b75cab
      fputs(""", ofile);
roentgen b75cab
      break;
roentgen b75cab
    default:
roentgen b75cab
      if (iscntrl(c))
roentgen b75cab
        fprintf(ofile, "&#%d;", c);
roentgen b75cab
      else
roentgen b75cab
        putc(*s, ofile);
roentgen b75cab
      break;
roentgen b75cab
    }
roentgen b75cab
  }
roentgen b75cab
  fputs("\"\n", ofile);
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
typedef struct _html_code
roentgen b75cab
{
roentgen b75cab
  short
roentgen b75cab
    len;
roentgen b75cab
  const char
roentgen b75cab
    *code,
roentgen b75cab
    val;
roentgen b75cab
} html_code;
roentgen b75cab
roentgen b75cab
static html_code html_codes[] = {
roentgen b75cab
#ifdef HANDLE_GT_LT
roentgen b75cab
    { 4,"<",'<' },
roentgen b75cab
    { 4,">",'>' },
roentgen b75cab
#endif
roentgen b75cab
    { 5,"&",'&' },
roentgen b75cab
    { 6,""",'"' }
roentgen b75cab
};
roentgen b75cab
roentgen b75cab
/*
roentgen b75cab
 * This routine converts HTML escape sequence
roentgen b75cab
 * back to the original ASCII representation.
roentgen b75cab
 * - returns the number of characters dropped.
roentgen b75cab
 */
roentgen b75cab
int convertHTMLcodes(char *s, int len)
roentgen b75cab
{
roentgen b75cab
  if (len <=0 || s==(char*)NULL || *s=='\0')
roentgen b75cab
    return 0;
roentgen b75cab
roentgen b75cab
  if (s[1] == '#')
roentgen b75cab
    {
roentgen b75cab
      int val, o;
roentgen b75cab
roentgen b75cab
      if (sscanf(s,"&#%d;",&val) == 1)
roentgen b75cab
      {
roentgen b75cab
        o = 3;
roentgen b75cab
        while (s[o] != ';')
roentgen b75cab
        {
roentgen b75cab
          o++;
roentgen b75cab
          if (o > 5)
roentgen b75cab
            break;
roentgen b75cab
        }
roentgen b75cab
        if (o < 5)
roentgen b75cab
          strcpy(s+1, s+1+o);
roentgen b75cab
        *s = val;
roentgen b75cab
        return o;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
  else
roentgen b75cab
    {
roentgen b75cab
      int
roentgen b75cab
        i,
roentgen b75cab
        codes = sizeof(html_codes) / sizeof(html_code);
roentgen b75cab
roentgen b75cab
      for (i=0; i < codes; i++)
roentgen b75cab
      {
roentgen b75cab
        if (html_codes[i].len <= len)
roentgen b75cab
          if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
roentgen b75cab
            {
roentgen b75cab
              strcpy(s+1, s+html_codes[i].len);
roentgen b75cab
              *s = html_codes[i].val;
roentgen b75cab
              return html_codes[i].len-1;
roentgen b75cab
            }
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
roentgen b75cab
  return 0;
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
int formatIPTC(FILE *ifile, FILE *ofile)
roentgen b75cab
{
roentgen b75cab
  unsigned int
roentgen b75cab
    foundiptc,
roentgen b75cab
    tagsfound;
roentgen b75cab
roentgen b75cab
  unsigned char
roentgen b75cab
    recnum,
roentgen b75cab
    dataset;
roentgen b75cab
roentgen b75cab
  char
roentgen b75cab
    *readable,
roentgen b75cab
    *str;
roentgen b75cab
roentgen b75cab
  long
roentgen b75cab
    tagindx,
roentgen b75cab
    taglen;
roentgen b75cab
roentgen b75cab
  int
roentgen b75cab
    i,
roentgen b75cab
    tagcount = sizeof(tags) / sizeof(tag_spec);
roentgen b75cab
roentgen b75cab
  char
roentgen b75cab
    c;
roentgen b75cab
roentgen b75cab
  foundiptc = 0; /* found the IPTC-Header */
roentgen b75cab
  tagsfound = 0; /* number of tags found */
roentgen b75cab
roentgen b75cab
  c = getc(ifile);
roentgen b75cab
  while (c != EOF)
roentgen b75cab
  {
roentgen b75cab
	  if (c == 0x1c)
roentgen b75cab
	    foundiptc = 1;
roentgen b75cab
	  else
roentgen b75cab
      {
roentgen b75cab
        if (foundiptc)
roentgen b75cab
	        return -1;
roentgen b75cab
        else
roentgen b75cab
	        continue;
roentgen b75cab
	    }
roentgen b75cab
roentgen b75cab
    /* we found the 0x1c tag and now grab the dataset and record number tags */
roentgen b75cab
    dataset = getc(ifile);
roentgen b75cab
	  if ((char) dataset == EOF)
roentgen b75cab
	    return -1;
roentgen b75cab
    recnum = getc(ifile);
roentgen b75cab
	  if ((char) recnum == EOF)
roentgen b75cab
	    return -1;
roentgen b75cab
    /* try to match this record to one of the ones in our named table */
roentgen b75cab
    for (i=0; i< tagcount; i++)
roentgen b75cab
    {
roentgen b75cab
      if (tags[i].id == recnum)
roentgen b75cab
          break;
roentgen b75cab
    }
roentgen b75cab
    if (i < tagcount)
roentgen b75cab
      readable = tags[i].name;
roentgen b75cab
    else
roentgen b75cab
      readable = "";
roentgen b75cab
roentgen b75cab
    /* then we decode the length of the block that follows - long or short fmt */
roentgen b75cab
    c = getc(ifile);
roentgen b75cab
	  if (c == EOF)
roentgen b75cab
	    return 0;
roentgen b75cab
	  if (c & (unsigned char) 0x80)
roentgen b75cab
      {
roentgen b75cab
        unsigned char
roentgen b75cab
          buffer[4];
roentgen b75cab
roentgen b75cab
        for (i=0; i<4; i++)
roentgen b75cab
        {
roentgen b75cab
          c = buffer[i] = getc(ifile);
roentgen b75cab
          if (c == EOF)
roentgen b75cab
            return -1;
roentgen b75cab
        }
roentgen b75cab
        taglen = (((long) buffer[ 0 ]) << 24) |
roentgen b75cab
                 (((long) buffer[ 1 ]) << 16) | 
roentgen b75cab
	               (((long) buffer[ 2 ]) <<  8) |
roentgen b75cab
                 (((long) buffer[ 3 ]));
roentgen b75cab
	    }
roentgen b75cab
    else
roentgen b75cab
      {
roentgen b75cab
        unsigned char
roentgen b75cab
          x = c;
roentgen b75cab
roentgen b75cab
        taglen = ((long) x) << 8;
roentgen b75cab
        x = getc(ifile);
roentgen b75cab
        if ((char)x == EOF)
roentgen b75cab
          return -1;
roentgen b75cab
        taglen |= (long) x;
roentgen b75cab
	    }
roentgen b75cab
    /* make a buffer to hold the tag data and snag it from the input stream */
roentgen b75cab
    str = (char *) malloc((unsigned int) (taglen+1));
roentgen b75cab
    if (str == (char *) NULL)
roentgen b75cab
      {
roentgen b75cab
        printf("Memory allocation failed");
roentgen b75cab
        return 0;
roentgen b75cab
      }
roentgen b75cab
    for (tagindx=0; tagindx
roentgen b75cab
    {
roentgen b75cab
      c = str[tagindx] = getc(ifile);
roentgen b75cab
      if (c == EOF)
roentgen b75cab
      {
roentgen b75cab
          free(str);
roentgen b75cab
          return -1;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
    str[ taglen ] = 0;
roentgen b75cab
roentgen b75cab
    /* now finish up by formatting this binary data into ASCII equivalent */
roentgen b75cab
    if (strlen(readable) > 0)
roentgen b75cab
	    fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
roentgen b75cab
    else
roentgen b75cab
	    fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
roentgen b75cab
    formatString( ofile, str, taglen );
roentgen b75cab
    free(str);
roentgen b75cab
roentgen b75cab
	  tagsfound++;
roentgen b75cab
roentgen b75cab
    c = getc(ifile);
roentgen b75cab
  }
roentgen b75cab
  return tagsfound;
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
roentgen b75cab
char *white,char *brkchar,char *quote,char eschar,char *brkused,
roentgen b75cab
int *next,char *quoted);
roentgen b75cab
roentgen b75cab
char *super_fgets(char *b, int *blen, FILE *file)
roentgen b75cab
{
roentgen b75cab
  int
roentgen b75cab
    c,
roentgen b75cab
    len;
roentgen b75cab
roentgen b75cab
  char
roentgen b75cab
    *q;
roentgen b75cab
roentgen b75cab
  len=*blen;
roentgen b75cab
  for (q=b; ; q++)
roentgen b75cab
  {
roentgen b75cab
    c=fgetc(file);
roentgen b75cab
    if (c == EOF || c == '\n')
roentgen b75cab
      break;
roentgen b75cab
    if (((long)q - (long)b + 1 ) >= (long) len)
roentgen b75cab
      {
roentgen b75cab
        long
roentgen b75cab
          tlen;
roentgen b75cab
roentgen b75cab
        tlen=(long)q-(long)b;
roentgen b75cab
        len<<=1;
roentgen b75cab
        b=(char *) realloc((char *) b,(len+2));
roentgen b75cab
        if ((char *) b == (char *) NULL)
roentgen b75cab
          break;
roentgen b75cab
        q=b+tlen;
roentgen b75cab
      }
roentgen b75cab
    *q=(unsigned char) c;
roentgen b75cab
  }
roentgen b75cab
  *blen=0;
roentgen b75cab
  if ((unsigned char *)b != (unsigned char *) NULL)
roentgen b75cab
    {
roentgen b75cab
      int
roentgen b75cab
        tlen;
roentgen b75cab
roentgen b75cab
      tlen=(long)q - (long)b;
roentgen b75cab
      if (tlen == 0)
roentgen b75cab
        return (char *) NULL;
roentgen b75cab
      b[tlen] = '\0';
roentgen b75cab
      *blen=++tlen;
roentgen b75cab
    }
roentgen b75cab
  return b;
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
#define BUFFER_SZ 4096
roentgen b75cab
roentgen b75cab
int main(int argc, char *argv[])
roentgen b75cab
{            
roentgen b75cab
  unsigned int
roentgen b75cab
    length;
roentgen b75cab
roentgen b75cab
  unsigned char
roentgen b75cab
    *buffer;
roentgen b75cab
roentgen b75cab
  int
roentgen b75cab
    i,
roentgen b75cab
    mode; /* iptc binary, or iptc text */
roentgen b75cab
roentgen b75cab
  FILE
roentgen b75cab
    *ifile = stdin,
roentgen b75cab
    *ofile = stdout;
roentgen b75cab
roentgen b75cab
  char
roentgen b75cab
    c,
roentgen b75cab
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input>output";
roentgen b75cab
roentgen b75cab
  if( argc < 2 )
roentgen b75cab
    {
roentgen b75cab
      puts(usage);
roentgen b75cab
	    return 1;
roentgen b75cab
    }
roentgen b75cab
roentgen b75cab
  mode = 0;
roentgen b75cab
  length = -1;
roentgen b75cab
  buffer = (unsigned char *)NULL;
roentgen b75cab
roentgen b75cab
  for (i=1; i
roentgen b75cab
  {
roentgen b75cab
    c = argv[i][0];
roentgen b75cab
    if (c == '-' || c == '/')
roentgen b75cab
      {
roentgen b75cab
        c = argv[i][1];
roentgen b75cab
        switch( c )
roentgen b75cab
        {
roentgen b75cab
        case 't':
roentgen b75cab
	        mode = 1;
roentgen b75cab
#ifdef WIN32
roentgen b75cab
          /* Set "stdout" to binary mode: */
roentgen b75cab
          _setmode( _fileno( ofile ), _O_BINARY );
roentgen b75cab
#endif
roentgen b75cab
	        break;
roentgen b75cab
        case 'b':
roentgen b75cab
	        mode = 0;
roentgen b75cab
#ifdef WIN32
roentgen b75cab
          /* Set "stdin" to binary mode: */
roentgen b75cab
          _setmode( _fileno( ifile ), _O_BINARY );
roentgen b75cab
#endif
roentgen b75cab
	        break;
roentgen b75cab
        case 'i':
roentgen b75cab
          if (mode == 0)
roentgen b75cab
            ifile = fopen(argv[++i], "rb");
roentgen b75cab
          else
roentgen b75cab
            ifile = fopen(argv[++i], "rt");
roentgen b75cab
          if (ifile == (FILE *)NULL)
roentgen b75cab
            {
roentgen b75cab
	            printf("Unable to open: %s\n", argv[i]);
roentgen b75cab
              return 1;
roentgen b75cab
            }
roentgen b75cab
	        break;
roentgen b75cab
        case 'o':
roentgen b75cab
          if (mode == 0)
roentgen b75cab
            ofile = fopen(argv[++i], "wt");
roentgen b75cab
          else
roentgen b75cab
            ofile = fopen(argv[++i], "wb");
roentgen b75cab
          if (ofile == (FILE *)NULL)
roentgen b75cab
            {
roentgen b75cab
	            printf("Unable to open: %s\n", argv[i]);
roentgen b75cab
              return 1;
roentgen b75cab
            }
roentgen b75cab
	        break;
roentgen b75cab
        default:
roentgen b75cab
	        printf("Unknown option: %s\n", argv[i]);
roentgen b75cab
	        return 1;
roentgen b75cab
        }
roentgen b75cab
      }
roentgen b75cab
    else
roentgen b75cab
      {
roentgen b75cab
        puts(usage);
roentgen b75cab
	      return 1;
roentgen b75cab
      }
roentgen b75cab
  }
roentgen b75cab
roentgen b75cab
  if (mode == 0) /* handle binary iptc info */
roentgen b75cab
    formatIPTC(ifile, ofile);
roentgen b75cab
roentgen b75cab
  if (mode == 1) /* handle text form of iptc info */
roentgen b75cab
    {
roentgen b75cab
      char
roentgen b75cab
        brkused,
roentgen b75cab
        quoted,
roentgen b75cab
        *line,
roentgen b75cab
        *token,
roentgen b75cab
        *newstr;
roentgen b75cab
roentgen b75cab
      int
roentgen b75cab
        state,
roentgen b75cab
        next;
roentgen b75cab
roentgen b75cab
      unsigned char
roentgen b75cab
        recnum = 0,
roentgen b75cab
        dataset = 0;
roentgen b75cab
roentgen b75cab
      int
roentgen b75cab
        inputlen = BUFFER_SZ;
roentgen b75cab
roentgen b75cab
      line = (char *) malloc(inputlen);     
roentgen b75cab
      token = (char *)NULL;
roentgen b75cab
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
roentgen b75cab
      {
roentgen b75cab
        state=0;
roentgen b75cab
        next=0;
roentgen b75cab
roentgen b75cab
        token = (char *) malloc(inputlen);     
roentgen b75cab
        newstr = (char *) malloc(inputlen);     
roentgen b75cab
        while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
roentgen b75cab
          &brkused,&next,"ed)==0)
roentgen b75cab
        {
roentgen b75cab
          if (state == 0)
roentgen b75cab
            {                  
roentgen b75cab
              int
roentgen b75cab
                state,
roentgen b75cab
                next;
roentgen b75cab
roentgen b75cab
              char
roentgen b75cab
                brkused,
roentgen b75cab
                quoted;
roentgen b75cab
roentgen b75cab
              state=0;
roentgen b75cab
              next=0;
roentgen b75cab
              while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
roentgen b75cab
                &brkused, &next, "ed)==0)
roentgen b75cab
              {
roentgen b75cab
                if (state == 0)
roentgen b75cab
                  dataset = (unsigned char) atoi(newstr);
roentgen b75cab
                else
roentgen b75cab
                   if (state == 1)
roentgen b75cab
                     recnum = (unsigned char) atoi(newstr);
roentgen b75cab
                state++;
roentgen b75cab
              }
roentgen b75cab
            }
roentgen b75cab
          else
roentgen b75cab
            if (state == 1)
roentgen b75cab
              {
roentgen b75cab
                int
roentgen b75cab
                  next;
roentgen b75cab
roentgen b75cab
                unsigned long
roentgen b75cab
                  len;
roentgen b75cab
roentgen b75cab
                char
roentgen b75cab
                  brkused,
roentgen b75cab
                  quoted;
roentgen b75cab
roentgen b75cab
                next=0;
roentgen b75cab
                len = strlen(token);
roentgen b75cab
                while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
roentgen b75cab
                  &brkused, &next, "ed)==0)
roentgen b75cab
                {
roentgen b75cab
                  if (brkused && next > 0)
roentgen b75cab
                    {
roentgen b75cab
                      char
roentgen b75cab
                        *s = &token[next-1];
roentgen b75cab
roentgen b75cab
                      len -= convertHTMLcodes(s, strlen(s));
roentgen b75cab
                    }
roentgen b75cab
                }
roentgen b75cab
roentgen b75cab
                fputc(0x1c, ofile);
roentgen b75cab
                fputc(dataset, ofile);
roentgen b75cab
                fputc(recnum, ofile);
roentgen b75cab
                if (len < 0x10000)
roentgen b75cab
                  {
roentgen b75cab
                    fputc((len >> 8) & 255, ofile);
roentgen b75cab
                    fputc(len & 255, ofile);
roentgen b75cab
                  }
roentgen b75cab
                else
roentgen b75cab
                  {
roentgen b75cab
                    fputc(((len >> 24) & 255) | 0x80, ofile);
roentgen b75cab
                    fputc((len >> 16) & 255, ofile);
roentgen b75cab
                    fputc((len >> 8) & 255, ofile);
roentgen b75cab
                    fputc(len & 255, ofile);
roentgen b75cab
                  }
roentgen b75cab
                next=0;
roentgen b75cab
                while (len--)
roentgen b75cab
                  fputc(token[next++], ofile);
roentgen b75cab
              }
roentgen b75cab
          state++;
roentgen b75cab
        }
roentgen b75cab
        free(token);
roentgen b75cab
        token = (char *)NULL;
roentgen b75cab
        free(newstr);
roentgen b75cab
        newstr = (char *)NULL;
roentgen b75cab
      }
roentgen b75cab
      free(line);
roentgen b75cab
roentgen b75cab
      fclose( ifile );
roentgen b75cab
      fclose( ofile );
roentgen b75cab
    }
roentgen b75cab
roentgen b75cab
  return 0;
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
/*
roentgen b75cab
	This routine is a generalized, finite state token parser. It allows
roentgen b75cab
    you extract tokens one at a time from a string of characters.  The
roentgen b75cab
    characters used for white space, for break characters, and for quotes
roentgen b75cab
    can be specified. Also, characters in the string can be preceded by
roentgen b75cab
    a specifiable escape character which removes any special meaning the
roentgen b75cab
    character may have.
roentgen b75cab
roentgen b75cab
	There are a lot of formal parameters in this subroutine call, but
roentgen b75cab
	once you get familiar with them, this routine is fairly easy to use.
roentgen b75cab
	"#define" macros can be used to generate simpler looking calls for
roentgen b75cab
	commonly used applications of this routine.
roentgen b75cab
roentgen b75cab
	First, some terminology:
roentgen b75cab
roentgen b75cab
	token:		used here, a single unit of information in
roentgen b75cab
				the form of a group of characters.
roentgen b75cab
roentgen b75cab
	white space:	space that gets ignored (except within quotes
roentgen b75cab
				or when escaped), like blanks and tabs.  in
roentgen b75cab
				addition, white space terminates a non-quoted
roentgen b75cab
				token.
roentgen b75cab
roentgen b75cab
	break character: a character that separates non-quoted tokens.
roentgen b75cab
				commas are a common break character.  the
roentgen b75cab
				usage of break characters to signal the end
roentgen b75cab
				of a token is the same as that of white space,
roentgen b75cab
				except multiple break characters with nothing
roentgen b75cab
				or only white space between generate a null
roentgen b75cab
				token for each two break characters together.
roentgen b75cab
roentgen b75cab
				for example, if blank is set to be the white
roentgen b75cab
				space and comma is set to be the break
roentgen b75cab
				character, the line ...
roentgen b75cab
roentgen b75cab
				A, B, C ,  , DEF
roentgen b75cab
roentgen b75cab
				... consists of 5 tokens:
roentgen b75cab
roentgen b75cab
				1)	"A"
roentgen b75cab
				2)	"B"
roentgen b75cab
				3)	"C"
roentgen b75cab
				4)	""      (the null string)
roentgen b75cab
				5)	"DEF"
roentgen b75cab
roentgen b75cab
	quote character: 	a character that, when surrounding a group
roentgen b75cab
				of other characters, causes the group of
roentgen b75cab
				characters to be treated as a single token,
roentgen b75cab
				no matter how many white spaces or break
roentgen b75cab
				characters exist in the group.	also, a
roentgen b75cab
				token always terminates after the closing
roentgen b75cab
				quote.	for example, if ' is the quote
roentgen b75cab
				character, blank is white space, and comma
roentgen b75cab
				is the break character, the following
roentgen b75cab
				string ...
roentgen b75cab
roentgen b75cab
				A, ' B, CD'EF GHI
roentgen b75cab
roentgen b75cab
				... consists of 4 tokens:
roentgen b75cab
roentgen b75cab
				1)	"A"
roentgen b75cab
				2)	" B, CD" (note the blanks & comma)
roentgen b75cab
				3)	"EF"
roentgen b75cab
				4)	"GHI"
roentgen b75cab
roentgen b75cab
				the quote characters themselves do
roentgen b75cab
				not appear in the resultant tokens.  the
roentgen b75cab
				double quotes are delimiters i use here for
roentgen b75cab
				documentation purposes only.
roentgen b75cab
roentgen b75cab
	escape character:	a character which itself is ignored but
roentgen b75cab
				which causes the next character to be
roentgen b75cab
				used as is.  ^ and \ are often used as
roentgen b75cab
				escape characters.  an escape in the last
roentgen b75cab
				position of the string gets treated as a
roentgen b75cab
				"normal" (i.e., non-quote, non-white,
roentgen b75cab
				non-break, and non-escape) character.
roentgen b75cab
				for example, assume white space, break
roentgen b75cab
				character, and quote are the same as in the
roentgen b75cab
				above examples, and further, assume that
roentgen b75cab
				^ is the escape character.  then, in the
roentgen b75cab
				string ...
roentgen b75cab
roentgen b75cab
				ABC, ' DEF ^' GH' I ^ J K^ L ^
roentgen b75cab
roentgen b75cab
				... there are 7 tokens:
roentgen b75cab
roentgen b75cab
				1)	"ABC"
roentgen b75cab
				2)	" DEF ' GH"
roentgen b75cab
				3)	"I"
roentgen b75cab
				4)	" "     (a lone blank)
roentgen b75cab
				5)	"J"
roentgen b75cab
				6)	"K L"
roentgen b75cab
				7)	"^"     (passed as is at end of line)
roentgen b75cab
roentgen b75cab
roentgen b75cab
	OK, now that you have this background, here's how to call "tokenizer":
roentgen b75cab
roentgen b75cab
	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
roentgen b75cab
		      brkused,next,quoted)
roentgen b75cab
roentgen b75cab
	result: 	0 if we haven't reached EOS (end of string), and
roentgen b75cab
			1 if we have (this is an "int").
roentgen b75cab
roentgen b75cab
	flag:		right now, only the low order 3 bits are used.
roentgen b75cab
			1 => convert non-quoted tokens to upper case
roentgen b75cab
			2 => convert non-quoted tokens to lower case
roentgen b75cab
			0 => do not convert non-quoted tokens
roentgen b75cab
			(this is a "char").
roentgen b75cab
roentgen b75cab
	token:		a character string containing the returned next token
roentgen b75cab
			(this is a "char[]").
roentgen b75cab
roentgen b75cab
	maxtok: 	the maximum size of "token".  characters beyond
roentgen b75cab
			"maxtok" are truncated (this is an "int").
roentgen b75cab
roentgen b75cab
	string: 	the string to be parsed (this is a "char[]").
roentgen b75cab
roentgen b75cab
	white:		a string of the valid white spaces.  example:
roentgen b75cab
roentgen b75cab
			char whitesp[]={" \t"};
roentgen b75cab
roentgen b75cab
			blank and tab will be valid white space (this is
roentgen b75cab
			a "char[]").
roentgen b75cab
roentgen b75cab
	break:		a string of the valid break characters.  example:
roentgen b75cab
roentgen b75cab
			char breakch[]={";,"};
roentgen b75cab
roentgen b75cab
			semicolon and comma will be valid break characters
roentgen b75cab
			(this is a "char[]").
roentgen b75cab
roentgen b75cab
			IMPORTANT:  do not use the name "break" as a C
roentgen b75cab
			variable, as this is a reserved word in C.
roentgen b75cab
roentgen b75cab
	quote:		a string of the valid quote characters.  an example
roentgen b75cab
			would be
roentgen b75cab
roentgen b75cab
			char whitesp[]={"'\"");
roentgen b75cab
roentgen b75cab
			(this causes single and double quotes to be valid)
roentgen b75cab
			note that a token starting with one of these characters
roentgen b75cab
			needs the same quote character to terminate it.
roentgen b75cab
roentgen b75cab
			for example,
roentgen b75cab
roentgen b75cab
			"ABC '
roentgen b75cab
roentgen b75cab
			is unterminated, but
roentgen b75cab
roentgen b75cab
			"DEF" and 'GHI'
roentgen b75cab
roentgen b75cab
			are properly terminated.  note that different quote
roentgen b75cab
			characters can appear on the same line; only for
roentgen b75cab
			a given token do the quote characters have to be
roentgen b75cab
			the same (this is a "char[]").
roentgen b75cab
roentgen b75cab
	escape: 	the escape character (NOT a string ... only one
roentgen b75cab
			allowed).  use zero if none is desired (this is
roentgen b75cab
			a "char").
roentgen b75cab
roentgen b75cab
	brkused:	the break character used to terminate the current
roentgen b75cab
			token.	if the token was quoted, this will be the
roentgen b75cab
			quote used.  if the token is the last one on the
roentgen b75cab
			line, this will be zero (this is a pointer to a
roentgen b75cab
			"char").
roentgen b75cab
roentgen b75cab
	next:		this variable points to the first character of the
roentgen b75cab
			next token.  it gets reset by "tokenizer" as it steps
roentgen b75cab
			through the string.  set it to 0 upon initialization,
roentgen b75cab
			and leave it alone after that.	you can change it
roentgen b75cab
			if you want to jump around in the string or re-parse
roentgen b75cab
			from the beginning, but be careful (this is a
roentgen b75cab
			pointer to an "int").
roentgen b75cab
roentgen b75cab
	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
roentgen b75cab
			if not.  you may need this information (for example:
roentgen b75cab
			in C, a string with quotes around it is a character
roentgen b75cab
			string, while one without is an identifier).
roentgen b75cab
roentgen b75cab
			(this is a pointer to a "char").
roentgen b75cab
*/
roentgen b75cab
roentgen b75cab
/* states */
roentgen b75cab
roentgen b75cab
#define IN_WHITE 0
roentgen b75cab
#define IN_TOKEN 1
roentgen b75cab
#define IN_QUOTE 2
roentgen b75cab
#define IN_OZONE 3
roentgen b75cab
roentgen b75cab
int _p_state;	   /* current state	 */
roentgen b75cab
unsigned _p_flag;  /* option flag	 */
roentgen b75cab
char _p_curquote;  /* current quote char */
roentgen b75cab
int _p_tokpos;	   /* current token pos  */
roentgen b75cab
roentgen b75cab
/* routine to find character in string ... used only by "tokenizer" */
roentgen b75cab
roentgen b75cab
int sindex(char ch,char *string)
roentgen b75cab
{
roentgen b75cab
  char *cp;
roentgen b75cab
  for(cp=string;*cp;++cp)
roentgen b75cab
    if(ch==*cp)
roentgen b75cab
      return (int)(cp-string);	/* return postion of character */
roentgen b75cab
  return -1;			/* eol ... no match found */
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
/* routine to store a character in a string ... used only by "tokenizer" */
roentgen b75cab
roentgen b75cab
void chstore(char *string,int max,char ch)
roentgen b75cab
{
roentgen b75cab
  char c;
roentgen b75cab
  if(_p_tokpos>=0&&_p_tokpos
roentgen b75cab
  {
roentgen b75cab
    if(_p_state==IN_QUOTE)
roentgen b75cab
      c=ch;
roentgen b75cab
    else
roentgen b75cab
      switch(_p_flag&3)
roentgen b75cab
      {
roentgen b75cab
	    case 1: 	    /* convert to upper */
roentgen b75cab
	      c=toupper(ch);
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case 2: 	    /* convert to lower */
roentgen b75cab
	      c=tolower(ch);
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    default:	    /* use as is */
roentgen b75cab
	      c=ch;
roentgen b75cab
	      break;
roentgen b75cab
      }
roentgen b75cab
    string[_p_tokpos++]=c;
roentgen b75cab
  }
roentgen b75cab
  return;
roentgen b75cab
}
roentgen b75cab
roentgen b75cab
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
roentgen b75cab
  char *white,char *brkchar,char *quote,char eschar,char *brkused,
roentgen b75cab
    int *next,char *quoted)
roentgen b75cab
{
roentgen b75cab
  int qp;
roentgen b75cab
  char c,nc;
roentgen b75cab
roentgen b75cab
  *brkused=0;		/* initialize to null */
roentgen b75cab
  *quoted=0;		/* assume not quoted  */
roentgen b75cab
roentgen b75cab
  if(!line[*next])	/* if we're at end of line, indicate such */
roentgen b75cab
    return 1;
roentgen b75cab
roentgen b75cab
  _p_state=IN_WHITE;   /* initialize state */
roentgen b75cab
  _p_curquote=0;	   /* initialize previous quote char */
roentgen b75cab
  _p_flag=inflag;	   /* set option flag */
roentgen b75cab
roentgen b75cab
  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
roentgen b75cab
  {
roentgen b75cab
    if((qp=sindex(c,brkchar))>=0)  /* break */
roentgen b75cab
    {
roentgen b75cab
      switch(_p_state)
roentgen b75cab
      {
roentgen b75cab
	    case IN_WHITE:		/* these are the same here ...	*/
roentgen b75cab
	    case IN_TOKEN:		/* ... just get out		*/
roentgen b75cab
	    case IN_OZONE:		/* ditto			*/
roentgen b75cab
	      ++(*next);
roentgen b75cab
	      *brkused=brkchar[qp];
roentgen b75cab
	      goto byebye;
roentgen b75cab
roentgen b75cab
	    case IN_QUOTE:		 /* just keep going */
roentgen b75cab
	      chstore(token,tokmax,c);
roentgen b75cab
	      break;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
    else if((qp=sindex(c,quote))>=0)  /* quote */
roentgen b75cab
    {
roentgen b75cab
      switch(_p_state)
roentgen b75cab
      {
roentgen b75cab
	    case IN_WHITE:	 /* these are identical, */
roentgen b75cab
	      _p_state=IN_QUOTE; /* change states   */
roentgen b75cab
	      _p_curquote=quote[qp]; /* save quote char */
roentgen b75cab
	      *quoted=1;	/* set to true as long as something is in quotes */
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case IN_QUOTE:
roentgen b75cab
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
roentgen b75cab
	      {
roentgen b75cab
	        _p_state=IN_OZONE;
roentgen b75cab
	        _p_curquote=0;
roentgen b75cab
	      }
roentgen b75cab
	      else
roentgen b75cab
	        chstore(token,tokmax,c); /* treat as regular char */
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case IN_TOKEN:
roentgen b75cab
	    case IN_OZONE:
roentgen b75cab
	      *brkused=c; /* uses quote as break char */
roentgen b75cab
	      goto byebye;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
    else if((qp=sindex(c,white))>=0) /* white */
roentgen b75cab
    {
roentgen b75cab
      switch(_p_state)
roentgen b75cab
      {
roentgen b75cab
	    case IN_WHITE:
roentgen b75cab
	    case IN_OZONE:
roentgen b75cab
	      break;		/* keep going */
roentgen b75cab
roentgen b75cab
	    case IN_TOKEN:
roentgen b75cab
	      _p_state=IN_OZONE;
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case IN_QUOTE:
roentgen b75cab
	      chstore(token,tokmax,c); /* it's valid here */
roentgen b75cab
	      break;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
    else if(c==eschar)  /* escape */
roentgen b75cab
    {
roentgen b75cab
      nc=line[(*next)+1];
roentgen b75cab
      if(nc==0) 		/* end of line */
roentgen b75cab
      {
roentgen b75cab
	    *brkused=0;
roentgen b75cab
	    chstore(token,tokmax,c);
roentgen b75cab
	    ++(*next);
roentgen b75cab
	    goto byebye;
roentgen b75cab
      }
roentgen b75cab
      switch(_p_state)
roentgen b75cab
      {
roentgen b75cab
	    case IN_WHITE:
roentgen b75cab
	      --(*next);
roentgen b75cab
	      _p_state=IN_TOKEN;
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case IN_TOKEN:
roentgen b75cab
	    case IN_QUOTE:
roentgen b75cab
	      ++(*next);
roentgen b75cab
	      chstore(token,tokmax,nc);
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case IN_OZONE:
roentgen b75cab
	      goto byebye;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
    else	/* anything else is just a real character */
roentgen b75cab
    {
roentgen b75cab
      switch(_p_state)
roentgen b75cab
      {
roentgen b75cab
	    case IN_WHITE:
roentgen b75cab
	      _p_state=IN_TOKEN; /* switch states */
roentgen b75cab
roentgen b75cab
	    case IN_TOKEN:		 /* these 2 are     */
roentgen b75cab
	    case IN_QUOTE:		 /*  identical here */
roentgen b75cab
	      chstore(token,tokmax,c);
roentgen b75cab
	      break;
roentgen b75cab
roentgen b75cab
	    case IN_OZONE:
roentgen b75cab
	      goto byebye;
roentgen b75cab
      }
roentgen b75cab
    }
roentgen b75cab
  }		/* end of main loop */
roentgen b75cab
roentgen b75cab
byebye:
roentgen b75cab
  token[_p_tokpos]=0;	/* make sure token ends with EOS */
roentgen b75cab
roentgen b75cab
  return 0;
roentgen b75cab
}
roentgen b75cab
/*
roentgen b75cab
 * Local Variables:
roentgen b75cab
 * mode: c
roentgen b75cab
 * c-basic-offset: 8
roentgen b75cab
 * fill-column: 78
roentgen b75cab
 * End:
roentgen b75cab
 */