kusano 7d535a
/* $Id: iptcutil.c,v 1.8 2011-05-08 00:44:18 fwarmerdam Exp $ */
kusano 7d535a
kusano 7d535a
#include "tif_config.h"
kusano 7d535a
kusano 7d535a
#include <stdio.h></stdio.h>
kusano 7d535a
#include <stdlib.h></stdlib.h>
kusano 7d535a
#include <string.h></string.h>
kusano 7d535a
#include <ctype.h></ctype.h>
kusano 7d535a
kusano 7d535a
#ifdef HAVE_STRINGS_H
kusano 7d535a
# include <strings.h></strings.h>
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#ifdef HAVE_IO_H
kusano 7d535a
# include <io.h></io.h>
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#ifdef HAVE_FCNTL_H
kusano 7d535a
# include <fcntl.h></fcntl.h>
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#ifdef WIN32
kusano 7d535a
#define STRNICMP strnicmp
kusano 7d535a
#else 
kusano 7d535a
#define STRNICMP strncasecmp
kusano 7d535a
#endif 
kusano 7d535a
kusano 7d535a
typedef struct _tag_spec
kusano 7d535a
{
kusano 7d535a
  short
kusano 7d535a
    id;
kusano 7d535a
kusano 7d535a
  char
kusano 7d535a
    *name;
kusano 7d535a
} tag_spec;
kusano 7d535a
kusano 7d535a
static tag_spec tags[] = {
kusano 7d535a
    { 5,"Image Name" },
kusano 7d535a
    { 7,"Edit Status" },
kusano 7d535a
    { 10,"Priority" },
kusano 7d535a
    { 15,"Category" },
kusano 7d535a
    { 20,"Supplemental Category" },
kusano 7d535a
    { 22,"Fixture Identifier" },
kusano 7d535a
    { 25,"Keyword" },
kusano 7d535a
    { 30,"Release Date" },
kusano 7d535a
    { 35,"Release Time" },
kusano 7d535a
    { 40,"Special Instructions" },
kusano 7d535a
    { 45,"Reference Service" },
kusano 7d535a
    { 47,"Reference Date" },
kusano 7d535a
    { 50,"Reference Number" },
kusano 7d535a
    { 55,"Created Date" },
kusano 7d535a
    { 60,"Created Time" },
kusano 7d535a
    { 65,"Originating Program" },
kusano 7d535a
    { 70,"Program Version" },
kusano 7d535a
    { 75,"Object Cycle" },
kusano 7d535a
    { 80,"Byline" },
kusano 7d535a
    { 85,"Byline Title" },
kusano 7d535a
    { 90,"City" },
kusano 7d535a
    { 95,"Province State" },
kusano 7d535a
    { 100,"Country Code" },
kusano 7d535a
    { 101,"Country" },
kusano 7d535a
    { 103,"Original Transmission Reference" },
kusano 7d535a
    { 105,"Headline" },
kusano 7d535a
    { 110,"Credit" },
kusano 7d535a
    { 115,"Source" },
kusano 7d535a
    { 116,"Copyright String" },
kusano 7d535a
    { 120,"Caption" },
kusano 7d535a
    { 121,"Local Caption" },
kusano 7d535a
    { 122,"Caption Writer" },
kusano 7d535a
    { 200,"Custom Field 1" },
kusano 7d535a
    { 201,"Custom Field 2" },
kusano 7d535a
    { 202,"Custom Field 3" },
kusano 7d535a
    { 203,"Custom Field 4" },
kusano 7d535a
    { 204,"Custom Field 5" },
kusano 7d535a
    { 205,"Custom Field 6" },
kusano 7d535a
    { 206,"Custom Field 7" },
kusano 7d535a
    { 207,"Custom Field 8" },
kusano 7d535a
    { 208,"Custom Field 9" },
kusano 7d535a
    { 209,"Custom Field 10" },
kusano 7d535a
    { 210,"Custom Field 11" },
kusano 7d535a
    { 211,"Custom Field 12" },
kusano 7d535a
    { 212,"Custom Field 13" },
kusano 7d535a
    { 213,"Custom Field 14" },
kusano 7d535a
    { 214,"Custom Field 15" },
kusano 7d535a
    { 215,"Custom Field 16" },
kusano 7d535a
    { 216,"Custom Field 17" },
kusano 7d535a
    { 217,"Custom Field 18" },
kusano 7d535a
    { 218,"Custom Field 19" },
kusano 7d535a
    { 219,"Custom Field 20" }
kusano 7d535a
};
kusano 7d535a
kusano 7d535a
/*
kusano 7d535a
 * We format the output using HTML conventions
kusano 7d535a
 * to preserve control characters and such.
kusano 7d535a
 */
kusano 7d535a
void formatString(FILE *ofile, const char *s, int len)
kusano 7d535a
{
kusano 7d535a
  putc('"', ofile);
kusano 7d535a
  for (; len > 0; --len, ++s) {
kusano 7d535a
    int c = *s;
kusano 7d535a
    switch (c) {
kusano 7d535a
    case '&':
kusano 7d535a
      fputs("&", ofile);
kusano 7d535a
      break;
kusano 7d535a
#ifdef HANDLE_GT_LT
kusano 7d535a
    case '<':
kusano 7d535a
      fputs("<", ofile);
kusano 7d535a
      break;
kusano 7d535a
    case '>':
kusano 7d535a
      fputs(">", ofile);
kusano 7d535a
      break;
kusano 7d535a
#endif
kusano 7d535a
    case '"':
kusano 7d535a
      fputs(""", ofile);
kusano 7d535a
      break;
kusano 7d535a
    default:
kusano 7d535a
      if (iscntrl(c))
kusano 7d535a
        fprintf(ofile, "&#%d;", c);
kusano 7d535a
      else
kusano 7d535a
        putc(*s, ofile);
kusano 7d535a
      break;
kusano 7d535a
    }
kusano 7d535a
  }
kusano 7d535a
  fputs("\"\n", ofile);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
typedef struct _html_code
kusano 7d535a
{
kusano 7d535a
  short
kusano 7d535a
    len;
kusano 7d535a
  const char
kusano 7d535a
    *code,
kusano 7d535a
    val;
kusano 7d535a
} html_code;
kusano 7d535a
kusano 7d535a
static html_code html_codes[] = {
kusano 7d535a
#ifdef HANDLE_GT_LT
kusano 7d535a
    { 4,"<",'<' },
kusano 7d535a
    { 4,">",'>' },
kusano 7d535a
#endif
kusano 7d535a
    { 5,"&",'&' },
kusano 7d535a
    { 6,""",'"' }
kusano 7d535a
};
kusano 7d535a
kusano 7d535a
/*
kusano 7d535a
 * This routine converts HTML escape sequence
kusano 7d535a
 * back to the original ASCII representation.
kusano 7d535a
 * - returns the number of characters dropped.
kusano 7d535a
 */
kusano 7d535a
int convertHTMLcodes(char *s, int len)
kusano 7d535a
{
kusano 7d535a
  if (len <=0 || s==(char*)NULL || *s=='\0')
kusano 7d535a
    return 0;
kusano 7d535a
kusano 7d535a
  if (s[1] == '#')
kusano 7d535a
    {
kusano 7d535a
      int val, o;
kusano 7d535a
kusano 7d535a
      if (sscanf(s,"&#%d;",&val) == 1)
kusano 7d535a
      {
kusano 7d535a
        o = 3;
kusano 7d535a
        while (s[o] != ';')
kusano 7d535a
        {
kusano 7d535a
          o++;
kusano 7d535a
          if (o > 5)
kusano 7d535a
            break;
kusano 7d535a
        }
kusano 7d535a
        if (o < 5)
kusano 7d535a
          strcpy(s+1, s+1+o);
kusano 7d535a
        *s = val;
kusano 7d535a
        return o;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
  else
kusano 7d535a
    {
kusano 7d535a
      int
kusano 7d535a
        i,
kusano 7d535a
        codes = sizeof(html_codes) / sizeof(html_code);
kusano 7d535a
kusano 7d535a
      for (i=0; i < codes; i++)
kusano 7d535a
      {
kusano 7d535a
        if (html_codes[i].len <= len)
kusano 7d535a
          if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
kusano 7d535a
            {
kusano 7d535a
              strcpy(s+1, s+html_codes[i].len);
kusano 7d535a
              *s = html_codes[i].val;
kusano 7d535a
              return html_codes[i].len-1;
kusano 7d535a
            }
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
  return 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
int formatIPTC(FILE *ifile, FILE *ofile)
kusano 7d535a
{
kusano 7d535a
  unsigned int
kusano 7d535a
    foundiptc,
kusano 7d535a
    tagsfound;
kusano 7d535a
kusano 7d535a
  unsigned char
kusano 7d535a
    recnum,
kusano 7d535a
    dataset;
kusano 7d535a
kusano 7d535a
  char
kusano 7d535a
    *readable,
kusano 7d535a
    *str;
kusano 7d535a
kusano 7d535a
  long
kusano 7d535a
    tagindx,
kusano 7d535a
    taglen;
kusano 7d535a
kusano 7d535a
  int
kusano 7d535a
    i,
kusano 7d535a
    tagcount = sizeof(tags) / sizeof(tag_spec);
kusano 7d535a
kusano 7d535a
  char
kusano 7d535a
    c;
kusano 7d535a
kusano 7d535a
  foundiptc = 0; /* found the IPTC-Header */
kusano 7d535a
  tagsfound = 0; /* number of tags found */
kusano 7d535a
kusano 7d535a
  c = getc(ifile);
kusano 7d535a
  while (c != EOF)
kusano 7d535a
  {
kusano 7d535a
	  if (c == 0x1c)
kusano 7d535a
	    foundiptc = 1;
kusano 7d535a
	  else
kusano 7d535a
      {
kusano 7d535a
        if (foundiptc)
kusano 7d535a
	        return -1;
kusano 7d535a
        else
kusano 7d535a
	        continue;
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
    /* we found the 0x1c tag and now grab the dataset and record number tags */
kusano 7d535a
    dataset = getc(ifile);
kusano 7d535a
	  if ((char) dataset == EOF)
kusano 7d535a
	    return -1;
kusano 7d535a
    recnum = getc(ifile);
kusano 7d535a
	  if ((char) recnum == EOF)
kusano 7d535a
	    return -1;
kusano 7d535a
    /* try to match this record to one of the ones in our named table */
kusano 7d535a
    for (i=0; i< tagcount; i++)
kusano 7d535a
    {
kusano 7d535a
      if (tags[i].id == recnum)
kusano 7d535a
          break;
kusano 7d535a
    }
kusano 7d535a
    if (i < tagcount)
kusano 7d535a
      readable = tags[i].name;
kusano 7d535a
    else
kusano 7d535a
      readable = "";
kusano 7d535a
kusano 7d535a
    /* then we decode the length of the block that follows - long or short fmt */
kusano 7d535a
    c = getc(ifile);
kusano 7d535a
	  if (c == EOF)
kusano 7d535a
	    return 0;
kusano 7d535a
	  if (c & (unsigned char) 0x80)
kusano 7d535a
      {
kusano 7d535a
        unsigned char
kusano 7d535a
          buffer[4];
kusano 7d535a
kusano 7d535a
        for (i=0; i<4; i++)
kusano 7d535a
        {
kusano 7d535a
          c = buffer[i] = getc(ifile);
kusano 7d535a
          if (c == EOF)
kusano 7d535a
            return -1;
kusano 7d535a
        }
kusano 7d535a
        taglen = (((long) buffer[ 0 ]) << 24) |
kusano 7d535a
                 (((long) buffer[ 1 ]) << 16) | 
kusano 7d535a
	               (((long) buffer[ 2 ]) <<  8) |
kusano 7d535a
                 (((long) buffer[ 3 ]));
kusano 7d535a
	    }
kusano 7d535a
    else
kusano 7d535a
      {
kusano 7d535a
        unsigned char
kusano 7d535a
          x = c;
kusano 7d535a
kusano 7d535a
        taglen = ((long) x) << 8;
kusano 7d535a
        x = getc(ifile);
kusano 7d535a
        if ((char)x == EOF)
kusano 7d535a
          return -1;
kusano 7d535a
        taglen |= (long) x;
kusano 7d535a
	    }
kusano 7d535a
    /* make a buffer to hold the tag data and snag it from the input stream */
kusano 7d535a
    str = (char *) malloc((unsigned int) (taglen+1));
kusano 7d535a
    if (str == (char *) NULL)
kusano 7d535a
      {
kusano 7d535a
        printf("Memory allocation failed");
kusano 7d535a
        return 0;
kusano 7d535a
      }
kusano 7d535a
    for (tagindx=0; tagindx
kusano 7d535a
    {
kusano 7d535a
      c = str[tagindx] = getc(ifile);
kusano 7d535a
      if (c == EOF)
kusano 7d535a
      {
kusano 7d535a
          free(str);
kusano 7d535a
          return -1;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
    str[ taglen ] = 0;
kusano 7d535a
kusano 7d535a
    /* now finish up by formatting this binary data into ASCII equivalent */
kusano 7d535a
    if (strlen(readable) > 0)
kusano 7d535a
	    fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
kusano 7d535a
    else
kusano 7d535a
	    fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
kusano 7d535a
    formatString( ofile, str, taglen );
kusano 7d535a
    free(str);
kusano 7d535a
kusano 7d535a
	  tagsfound++;
kusano 7d535a
kusano 7d535a
    c = getc(ifile);
kusano 7d535a
  }
kusano 7d535a
  return tagsfound;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
kusano 7d535a
char *white,char *brkchar,char *quote,char eschar,char *brkused,
kusano 7d535a
int *next,char *quoted);
kusano 7d535a
kusano 7d535a
char *super_fgets(char *b, int *blen, FILE *file)
kusano 7d535a
{
kusano 7d535a
  int
kusano 7d535a
    c,
kusano 7d535a
    len;
kusano 7d535a
kusano 7d535a
  char
kusano 7d535a
    *q;
kusano 7d535a
kusano 7d535a
  len=*blen;
kusano 7d535a
  for (q=b; ; q++)
kusano 7d535a
  {
kusano 7d535a
    c=fgetc(file);
kusano 7d535a
    if (c == EOF || c == '\n')
kusano 7d535a
      break;
kusano 7d535a
    if (((long)q - (long)b + 1 ) >= (long) len)
kusano 7d535a
      {
kusano 7d535a
        long
kusano 7d535a
          tlen;
kusano 7d535a
kusano 7d535a
        tlen=(long)q-(long)b;
kusano 7d535a
        len<<=1;
kusano 7d535a
        b=(char *) realloc((char *) b,(len+2));
kusano 7d535a
        if ((char *) b == (char *) NULL)
kusano 7d535a
          break;
kusano 7d535a
        q=b+tlen;
kusano 7d535a
      }
kusano 7d535a
    *q=(unsigned char) c;
kusano 7d535a
  }
kusano 7d535a
  *blen=0;
kusano 7d535a
  if ((unsigned char *)b != (unsigned char *) NULL)
kusano 7d535a
    {
kusano 7d535a
      int
kusano 7d535a
        tlen;
kusano 7d535a
kusano 7d535a
      tlen=(long)q - (long)b;
kusano 7d535a
      if (tlen == 0)
kusano 7d535a
        return (char *) NULL;
kusano 7d535a
      b[tlen] = '\0';
kusano 7d535a
      *blen=++tlen;
kusano 7d535a
    }
kusano 7d535a
  return b;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
#define BUFFER_SZ 4096
kusano 7d535a
kusano 7d535a
int main(int argc, char *argv[])
kusano 7d535a
{            
kusano 7d535a
  unsigned int
kusano 7d535a
    length;
kusano 7d535a
kusano 7d535a
  unsigned char
kusano 7d535a
    *buffer;
kusano 7d535a
kusano 7d535a
  int
kusano 7d535a
    i,
kusano 7d535a
    mode; /* iptc binary, or iptc text */
kusano 7d535a
kusano 7d535a
  FILE
kusano 7d535a
    *ifile = stdin,
kusano 7d535a
    *ofile = stdout;
kusano 7d535a
kusano 7d535a
  char
kusano 7d535a
    c,
kusano 7d535a
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input>output";
kusano 7d535a
kusano 7d535a
  if( argc < 2 )
kusano 7d535a
    {
kusano 7d535a
      puts(usage);
kusano 7d535a
	    return 1;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
  mode = 0;
kusano 7d535a
  length = -1;
kusano 7d535a
  buffer = (unsigned char *)NULL;
kusano 7d535a
kusano 7d535a
  for (i=1; i
kusano 7d535a
  {
kusano 7d535a
    c = argv[i][0];
kusano 7d535a
    if (c == '-' || c == '/')
kusano 7d535a
      {
kusano 7d535a
        c = argv[i][1];
kusano 7d535a
        switch( c )
kusano 7d535a
        {
kusano 7d535a
        case 't':
kusano 7d535a
	        mode = 1;
kusano 7d535a
#ifdef WIN32
kusano 7d535a
          /* Set "stdout" to binary mode: */
kusano 7d535a
          _setmode( _fileno( ofile ), _O_BINARY );
kusano 7d535a
#endif
kusano 7d535a
	        break;
kusano 7d535a
        case 'b':
kusano 7d535a
	        mode = 0;
kusano 7d535a
#ifdef WIN32
kusano 7d535a
          /* Set "stdin" to binary mode: */
kusano 7d535a
          _setmode( _fileno( ifile ), _O_BINARY );
kusano 7d535a
#endif
kusano 7d535a
	        break;
kusano 7d535a
        case 'i':
kusano 7d535a
          if (mode == 0)
kusano 7d535a
            ifile = fopen(argv[++i], "rb");
kusano 7d535a
          else
kusano 7d535a
            ifile = fopen(argv[++i], "rt");
kusano 7d535a
          if (ifile == (FILE *)NULL)
kusano 7d535a
            {
kusano 7d535a
	            printf("Unable to open: %s\n", argv[i]);
kusano 7d535a
              return 1;
kusano 7d535a
            }
kusano 7d535a
	        break;
kusano 7d535a
        case 'o':
kusano 7d535a
          if (mode == 0)
kusano 7d535a
            ofile = fopen(argv[++i], "wt");
kusano 7d535a
          else
kusano 7d535a
            ofile = fopen(argv[++i], "wb");
kusano 7d535a
          if (ofile == (FILE *)NULL)
kusano 7d535a
            {
kusano 7d535a
	            printf("Unable to open: %s\n", argv[i]);
kusano 7d535a
              return 1;
kusano 7d535a
            }
kusano 7d535a
	        break;
kusano 7d535a
        default:
kusano 7d535a
	        printf("Unknown option: %s\n", argv[i]);
kusano 7d535a
	        return 1;
kusano 7d535a
        }
kusano 7d535a
      }
kusano 7d535a
    else
kusano 7d535a
      {
kusano 7d535a
        puts(usage);
kusano 7d535a
	      return 1;
kusano 7d535a
      }
kusano 7d535a
  }
kusano 7d535a
kusano 7d535a
  if (mode == 0) /* handle binary iptc info */
kusano 7d535a
    formatIPTC(ifile, ofile);
kusano 7d535a
kusano 7d535a
  if (mode == 1) /* handle text form of iptc info */
kusano 7d535a
    {
kusano 7d535a
      char
kusano 7d535a
        brkused,
kusano 7d535a
        quoted,
kusano 7d535a
        *line,
kusano 7d535a
        *token,
kusano 7d535a
        *newstr;
kusano 7d535a
kusano 7d535a
      int
kusano 7d535a
        state,
kusano 7d535a
        next;
kusano 7d535a
kusano 7d535a
      unsigned char
kusano 7d535a
        recnum = 0,
kusano 7d535a
        dataset = 0;
kusano 7d535a
kusano 7d535a
      int
kusano 7d535a
        inputlen = BUFFER_SZ;
kusano 7d535a
kusano 7d535a
      line = (char *) malloc(inputlen);     
kusano 7d535a
      token = (char *)NULL;
kusano 7d535a
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
kusano 7d535a
      {
kusano 7d535a
        state=0;
kusano 7d535a
        next=0;
kusano 7d535a
kusano 7d535a
        token = (char *) malloc(inputlen);     
kusano 7d535a
        newstr = (char *) malloc(inputlen);     
kusano 7d535a
        while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
kusano 7d535a
          &brkused,&next,"ed)==0)
kusano 7d535a
        {
kusano 7d535a
          if (state == 0)
kusano 7d535a
            {                  
kusano 7d535a
              int
kusano 7d535a
                state,
kusano 7d535a
                next;
kusano 7d535a
kusano 7d535a
              char
kusano 7d535a
                brkused,
kusano 7d535a
                quoted;
kusano 7d535a
kusano 7d535a
              state=0;
kusano 7d535a
              next=0;
kusano 7d535a
              while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
kusano 7d535a
                &brkused, &next, "ed)==0)
kusano 7d535a
              {
kusano 7d535a
                if (state == 0)
kusano 7d535a
                  dataset = (unsigned char) atoi(newstr);
kusano 7d535a
                else
kusano 7d535a
                   if (state == 1)
kusano 7d535a
                     recnum = (unsigned char) atoi(newstr);
kusano 7d535a
                state++;
kusano 7d535a
              }
kusano 7d535a
            }
kusano 7d535a
          else
kusano 7d535a
            if (state == 1)
kusano 7d535a
              {
kusano 7d535a
                int
kusano 7d535a
                  next;
kusano 7d535a
kusano 7d535a
                unsigned long
kusano 7d535a
                  len;
kusano 7d535a
kusano 7d535a
                char
kusano 7d535a
                  brkused,
kusano 7d535a
                  quoted;
kusano 7d535a
kusano 7d535a
                next=0;
kusano 7d535a
                len = strlen(token);
kusano 7d535a
                while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
kusano 7d535a
                  &brkused, &next, "ed)==0)
kusano 7d535a
                {
kusano 7d535a
                  if (brkused && next > 0)
kusano 7d535a
                    {
kusano 7d535a
                      char
kusano 7d535a
                        *s = &token[next-1];
kusano 7d535a
kusano 7d535a
                      len -= convertHTMLcodes(s, strlen(s));
kusano 7d535a
                    }
kusano 7d535a
                }
kusano 7d535a
kusano 7d535a
                fputc(0x1c, ofile);
kusano 7d535a
                fputc(dataset, ofile);
kusano 7d535a
                fputc(recnum, ofile);
kusano 7d535a
                if (len < 0x10000)
kusano 7d535a
                  {
kusano 7d535a
                    fputc((len >> 8) & 255, ofile);
kusano 7d535a
                    fputc(len & 255, ofile);
kusano 7d535a
                  }
kusano 7d535a
                else
kusano 7d535a
                  {
kusano 7d535a
                    fputc(((len >> 24) & 255) | 0x80, ofile);
kusano 7d535a
                    fputc((len >> 16) & 255, ofile);
kusano 7d535a
                    fputc((len >> 8) & 255, ofile);
kusano 7d535a
                    fputc(len & 255, ofile);
kusano 7d535a
                  }
kusano 7d535a
                next=0;
kusano 7d535a
                while (len--)
kusano 7d535a
                  fputc(token[next++], ofile);
kusano 7d535a
              }
kusano 7d535a
          state++;
kusano 7d535a
        }
kusano 7d535a
        free(token);
kusano 7d535a
        token = (char *)NULL;
kusano 7d535a
        free(newstr);
kusano 7d535a
        newstr = (char *)NULL;
kusano 7d535a
      }
kusano 7d535a
      free(line);
kusano 7d535a
kusano 7d535a
      fclose( ifile );
kusano 7d535a
      fclose( ofile );
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
  return 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/*
kusano 7d535a
	This routine is a generalized, finite state token parser. It allows
kusano 7d535a
    you extract tokens one at a time from a string of characters.  The
kusano 7d535a
    characters used for white space, for break characters, and for quotes
kusano 7d535a
    can be specified. Also, characters in the string can be preceded by
kusano 7d535a
    a specifiable escape character which removes any special meaning the
kusano 7d535a
    character may have.
kusano 7d535a
kusano 7d535a
	There are a lot of formal parameters in this subroutine call, but
kusano 7d535a
	once you get familiar with them, this routine is fairly easy to use.
kusano 7d535a
	"#define" macros can be used to generate simpler looking calls for
kusano 7d535a
	commonly used applications of this routine.
kusano 7d535a
kusano 7d535a
	First, some terminology:
kusano 7d535a
kusano 7d535a
	token:		used here, a single unit of information in
kusano 7d535a
				the form of a group of characters.
kusano 7d535a
kusano 7d535a
	white space:	space that gets ignored (except within quotes
kusano 7d535a
				or when escaped), like blanks and tabs.  in
kusano 7d535a
				addition, white space terminates a non-quoted
kusano 7d535a
				token.
kusano 7d535a
kusano 7d535a
	break character: a character that separates non-quoted tokens.
kusano 7d535a
				commas are a common break character.  the
kusano 7d535a
				usage of break characters to signal the end
kusano 7d535a
				of a token is the same as that of white space,
kusano 7d535a
				except multiple break characters with nothing
kusano 7d535a
				or only white space between generate a null
kusano 7d535a
				token for each two break characters together.
kusano 7d535a
kusano 7d535a
				for example, if blank is set to be the white
kusano 7d535a
				space and comma is set to be the break
kusano 7d535a
				character, the line ...
kusano 7d535a
kusano 7d535a
				A, B, C ,  , DEF
kusano 7d535a
kusano 7d535a
				... consists of 5 tokens:
kusano 7d535a
kusano 7d535a
				1)	"A"
kusano 7d535a
				2)	"B"
kusano 7d535a
				3)	"C"
kusano 7d535a
				4)	""      (the null string)
kusano 7d535a
				5)	"DEF"
kusano 7d535a
kusano 7d535a
	quote character: 	a character that, when surrounding a group
kusano 7d535a
				of other characters, causes the group of
kusano 7d535a
				characters to be treated as a single token,
kusano 7d535a
				no matter how many white spaces or break
kusano 7d535a
				characters exist in the group.	also, a
kusano 7d535a
				token always terminates after the closing
kusano 7d535a
				quote.	for example, if ' is the quote
kusano 7d535a
				character, blank is white space, and comma
kusano 7d535a
				is the break character, the following
kusano 7d535a
				string ...
kusano 7d535a
kusano 7d535a
				A, ' B, CD'EF GHI
kusano 7d535a
kusano 7d535a
				... consists of 4 tokens:
kusano 7d535a
kusano 7d535a
				1)	"A"
kusano 7d535a
				2)	" B, CD" (note the blanks & comma)
kusano 7d535a
				3)	"EF"
kusano 7d535a
				4)	"GHI"
kusano 7d535a
kusano 7d535a
				the quote characters themselves do
kusano 7d535a
				not appear in the resultant tokens.  the
kusano 7d535a
				double quotes are delimiters i use here for
kusano 7d535a
				documentation purposes only.
kusano 7d535a
kusano 7d535a
	escape character:	a character which itself is ignored but
kusano 7d535a
				which causes the next character to be
kusano 7d535a
				used as is.  ^ and \ are often used as
kusano 7d535a
				escape characters.  an escape in the last
kusano 7d535a
				position of the string gets treated as a
kusano 7d535a
				"normal" (i.e., non-quote, non-white,
kusano 7d535a
				non-break, and non-escape) character.
kusano 7d535a
				for example, assume white space, break
kusano 7d535a
				character, and quote are the same as in the
kusano 7d535a
				above examples, and further, assume that
kusano 7d535a
				^ is the escape character.  then, in the
kusano 7d535a
				string ...
kusano 7d535a
kusano 7d535a
				ABC, ' DEF ^' GH' I ^ J K^ L ^
kusano 7d535a
kusano 7d535a
				... there are 7 tokens:
kusano 7d535a
kusano 7d535a
				1)	"ABC"
kusano 7d535a
				2)	" DEF ' GH"
kusano 7d535a
				3)	"I"
kusano 7d535a
				4)	" "     (a lone blank)
kusano 7d535a
				5)	"J"
kusano 7d535a
				6)	"K L"
kusano 7d535a
				7)	"^"     (passed as is at end of line)
kusano 7d535a
kusano 7d535a
kusano 7d535a
	OK, now that you have this background, here's how to call "tokenizer":
kusano 7d535a
kusano 7d535a
	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
kusano 7d535a
		      brkused,next,quoted)
kusano 7d535a
kusano 7d535a
	result: 	0 if we haven't reached EOS (end of string), and
kusano 7d535a
			1 if we have (this is an "int").
kusano 7d535a
kusano 7d535a
	flag:		right now, only the low order 3 bits are used.
kusano 7d535a
			1 => convert non-quoted tokens to upper case
kusano 7d535a
			2 => convert non-quoted tokens to lower case
kusano 7d535a
			0 => do not convert non-quoted tokens
kusano 7d535a
			(this is a "char").
kusano 7d535a
kusano 7d535a
	token:		a character string containing the returned next token
kusano 7d535a
			(this is a "char[]").
kusano 7d535a
kusano 7d535a
	maxtok: 	the maximum size of "token".  characters beyond
kusano 7d535a
			"maxtok" are truncated (this is an "int").
kusano 7d535a
kusano 7d535a
	string: 	the string to be parsed (this is a "char[]").
kusano 7d535a
kusano 7d535a
	white:		a string of the valid white spaces.  example:
kusano 7d535a
kusano 7d535a
			char whitesp[]={" \t"};
kusano 7d535a
kusano 7d535a
			blank and tab will be valid white space (this is
kusano 7d535a
			a "char[]").
kusano 7d535a
kusano 7d535a
	break:		a string of the valid break characters.  example:
kusano 7d535a
kusano 7d535a
			char breakch[]={";,"};
kusano 7d535a
kusano 7d535a
			semicolon and comma will be valid break characters
kusano 7d535a
			(this is a "char[]").
kusano 7d535a
kusano 7d535a
			IMPORTANT:  do not use the name "break" as a C
kusano 7d535a
			variable, as this is a reserved word in C.
kusano 7d535a
kusano 7d535a
	quote:		a string of the valid quote characters.  an example
kusano 7d535a
			would be
kusano 7d535a
kusano 7d535a
			char whitesp[]={"'\"");
kusano 7d535a
kusano 7d535a
			(this causes single and double quotes to be valid)
kusano 7d535a
			note that a token starting with one of these characters
kusano 7d535a
			needs the same quote character to terminate it.
kusano 7d535a
kusano 7d535a
			for example,
kusano 7d535a
kusano 7d535a
			"ABC '
kusano 7d535a
kusano 7d535a
			is unterminated, but
kusano 7d535a
kusano 7d535a
			"DEF" and 'GHI'
kusano 7d535a
kusano 7d535a
			are properly terminated.  note that different quote
kusano 7d535a
			characters can appear on the same line; only for
kusano 7d535a
			a given token do the quote characters have to be
kusano 7d535a
			the same (this is a "char[]").
kusano 7d535a
kusano 7d535a
	escape: 	the escape character (NOT a string ... only one
kusano 7d535a
			allowed).  use zero if none is desired (this is
kusano 7d535a
			a "char").
kusano 7d535a
kusano 7d535a
	brkused:	the break character used to terminate the current
kusano 7d535a
			token.	if the token was quoted, this will be the
kusano 7d535a
			quote used.  if the token is the last one on the
kusano 7d535a
			line, this will be zero (this is a pointer to a
kusano 7d535a
			"char").
kusano 7d535a
kusano 7d535a
	next:		this variable points to the first character of the
kusano 7d535a
			next token.  it gets reset by "tokenizer" as it steps
kusano 7d535a
			through the string.  set it to 0 upon initialization,
kusano 7d535a
			and leave it alone after that.	you can change it
kusano 7d535a
			if you want to jump around in the string or re-parse
kusano 7d535a
			from the beginning, but be careful (this is a
kusano 7d535a
			pointer to an "int").
kusano 7d535a
kusano 7d535a
	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
kusano 7d535a
			if not.  you may need this information (for example:
kusano 7d535a
			in C, a string with quotes around it is a character
kusano 7d535a
			string, while one without is an identifier).
kusano 7d535a
kusano 7d535a
			(this is a pointer to a "char").
kusano 7d535a
*/
kusano 7d535a
kusano 7d535a
/* states */
kusano 7d535a
kusano 7d535a
#define IN_WHITE 0
kusano 7d535a
#define IN_TOKEN 1
kusano 7d535a
#define IN_QUOTE 2
kusano 7d535a
#define IN_OZONE 3
kusano 7d535a
kusano 7d535a
int _p_state;	   /* current state	 */
kusano 7d535a
unsigned _p_flag;  /* option flag	 */
kusano 7d535a
char _p_curquote;  /* current quote char */
kusano 7d535a
int _p_tokpos;	   /* current token pos  */
kusano 7d535a
kusano 7d535a
/* routine to find character in string ... used only by "tokenizer" */
kusano 7d535a
kusano 7d535a
int sindex(char ch,char *string)
kusano 7d535a
{
kusano 7d535a
  char *cp;
kusano 7d535a
  for(cp=string;*cp;++cp)
kusano 7d535a
    if(ch==*cp)
kusano 7d535a
      return (int)(cp-string);	/* return postion of character */
kusano 7d535a
  return -1;			/* eol ... no match found */
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/* routine to store a character in a string ... used only by "tokenizer" */
kusano 7d535a
kusano 7d535a
void chstore(char *string,int max,char ch)
kusano 7d535a
{
kusano 7d535a
  char c;
kusano 7d535a
  if(_p_tokpos>=0&&_p_tokpos
kusano 7d535a
  {
kusano 7d535a
    if(_p_state==IN_QUOTE)
kusano 7d535a
      c=ch;
kusano 7d535a
    else
kusano 7d535a
      switch(_p_flag&3)
kusano 7d535a
      {
kusano 7d535a
	    case 1: 	    /* convert to upper */
kusano 7d535a
	      c=toupper(ch);
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case 2: 	    /* convert to lower */
kusano 7d535a
	      c=tolower(ch);
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    default:	    /* use as is */
kusano 7d535a
	      c=ch;
kusano 7d535a
	      break;
kusano 7d535a
      }
kusano 7d535a
    string[_p_tokpos++]=c;
kusano 7d535a
  }
kusano 7d535a
  return;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
kusano 7d535a
  char *white,char *brkchar,char *quote,char eschar,char *brkused,
kusano 7d535a
    int *next,char *quoted)
kusano 7d535a
{
kusano 7d535a
  int qp;
kusano 7d535a
  char c,nc;
kusano 7d535a
kusano 7d535a
  *brkused=0;		/* initialize to null */
kusano 7d535a
  *quoted=0;		/* assume not quoted  */
kusano 7d535a
kusano 7d535a
  if(!line[*next])	/* if we're at end of line, indicate such */
kusano 7d535a
    return 1;
kusano 7d535a
kusano 7d535a
  _p_state=IN_WHITE;   /* initialize state */
kusano 7d535a
  _p_curquote=0;	   /* initialize previous quote char */
kusano 7d535a
  _p_flag=inflag;	   /* set option flag */
kusano 7d535a
kusano 7d535a
  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
kusano 7d535a
  {
kusano 7d535a
    if((qp=sindex(c,brkchar))>=0)  /* break */
kusano 7d535a
    {
kusano 7d535a
      switch(_p_state)
kusano 7d535a
      {
kusano 7d535a
	    case IN_WHITE:		/* these are the same here ...	*/
kusano 7d535a
	    case IN_TOKEN:		/* ... just get out		*/
kusano 7d535a
	    case IN_OZONE:		/* ditto			*/
kusano 7d535a
	      ++(*next);
kusano 7d535a
	      *brkused=brkchar[qp];
kusano 7d535a
	      goto byebye;
kusano 7d535a
kusano 7d535a
	    case IN_QUOTE:		 /* just keep going */
kusano 7d535a
	      chstore(token,tokmax,c);
kusano 7d535a
	      break;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
    else if((qp=sindex(c,quote))>=0)  /* quote */
kusano 7d535a
    {
kusano 7d535a
      switch(_p_state)
kusano 7d535a
      {
kusano 7d535a
	    case IN_WHITE:	 /* these are identical, */
kusano 7d535a
	      _p_state=IN_QUOTE; /* change states   */
kusano 7d535a
	      _p_curquote=quote[qp]; /* save quote char */
kusano 7d535a
	      *quoted=1;	/* set to true as long as something is in quotes */
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case IN_QUOTE:
kusano 7d535a
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
kusano 7d535a
	      {
kusano 7d535a
	        _p_state=IN_OZONE;
kusano 7d535a
	        _p_curquote=0;
kusano 7d535a
	      }
kusano 7d535a
	      else
kusano 7d535a
	        chstore(token,tokmax,c); /* treat as regular char */
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case IN_TOKEN:
kusano 7d535a
	    case IN_OZONE:
kusano 7d535a
	      *brkused=c; /* uses quote as break char */
kusano 7d535a
	      goto byebye;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
    else if((qp=sindex(c,white))>=0) /* white */
kusano 7d535a
    {
kusano 7d535a
      switch(_p_state)
kusano 7d535a
      {
kusano 7d535a
	    case IN_WHITE:
kusano 7d535a
	    case IN_OZONE:
kusano 7d535a
	      break;		/* keep going */
kusano 7d535a
kusano 7d535a
	    case IN_TOKEN:
kusano 7d535a
	      _p_state=IN_OZONE;
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case IN_QUOTE:
kusano 7d535a
	      chstore(token,tokmax,c); /* it's valid here */
kusano 7d535a
	      break;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
    else if(c==eschar)  /* escape */
kusano 7d535a
    {
kusano 7d535a
      nc=line[(*next)+1];
kusano 7d535a
      if(nc==0) 		/* end of line */
kusano 7d535a
      {
kusano 7d535a
	    *brkused=0;
kusano 7d535a
	    chstore(token,tokmax,c);
kusano 7d535a
	    ++(*next);
kusano 7d535a
	    goto byebye;
kusano 7d535a
      }
kusano 7d535a
      switch(_p_state)
kusano 7d535a
      {
kusano 7d535a
	    case IN_WHITE:
kusano 7d535a
	      --(*next);
kusano 7d535a
	      _p_state=IN_TOKEN;
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case IN_TOKEN:
kusano 7d535a
	    case IN_QUOTE:
kusano 7d535a
	      ++(*next);
kusano 7d535a
	      chstore(token,tokmax,nc);
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case IN_OZONE:
kusano 7d535a
	      goto byebye;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
    else	/* anything else is just a real character */
kusano 7d535a
    {
kusano 7d535a
      switch(_p_state)
kusano 7d535a
      {
kusano 7d535a
	    case IN_WHITE:
kusano 7d535a
	      _p_state=IN_TOKEN; /* switch states */
kusano 7d535a
kusano 7d535a
	    case IN_TOKEN:		 /* these 2 are     */
kusano 7d535a
	    case IN_QUOTE:		 /*  identical here */
kusano 7d535a
	      chstore(token,tokmax,c);
kusano 7d535a
	      break;
kusano 7d535a
kusano 7d535a
	    case IN_OZONE:
kusano 7d535a
	      goto byebye;
kusano 7d535a
      }
kusano 7d535a
    }
kusano 7d535a
  }		/* end of main loop */
kusano 7d535a
kusano 7d535a
byebye:
kusano 7d535a
  token[_p_tokpos]=0;	/* make sure token ends with EOS */
kusano 7d535a
kusano 7d535a
  return 0;
kusano 7d535a
}
kusano 7d535a
/*
kusano 7d535a
 * Local Variables:
kusano 7d535a
 * mode: c
kusano 7d535a
 * c-basic-offset: 8
kusano 7d535a
 * fill-column: 78
kusano 7d535a
 * End:
kusano 7d535a
 */