/*----------------------------------------------------------------------*/
/* convert1.c --- Convert a tech file from version 7.2 to version 7.3	*/
/* with stacked contacts.						*/
/*									*/
/* This file does the following:					*/
/*   1) Add the "format 29" line to the "tech" section at the top.	*/
/*   2) Removes the stacked contact types from all sections.		*/
/*   3) Adds the stacked contact types as "stackable" types in the	*/
/*	"contact" section, and adds the original type names as aliases	*/
/*	for painting these contact types.				*/
/*   4) Removes all "cifinput" and "drc" statements that depend		*/
/*	exclusively on stacked contact types.				*/
/*   5) Removes all "styles" section lines pertaining to the stacked	*/
/*	contact types.							*/
/*   6) Compresses the "styles" section by writing multiple style types	*/
/*	per tile type on one line as allowed by format 29.		*/
/*									*/
/* This file does NOT do the following:					*/
/*   1) Clean up the "cifinput" section to manage the standard contacts	*/
/*	(this is a difficult task that would require quite a bit of	*/
/*	finesse).							*/
/*   2) Attempt to make any format-29 improvements other than those	*/
/*	related to stacked contacts and the compressed "styles" section	*/
/*	notation.  Would like to cover the "*" notation, in particular.	*/
/*									*/
/* simple compilation:  gcc convert1.c -o convert			*/
/* simple usage: convert <filename.tech27> [<filename.tech>]		*/
/*----------------------------------------------------------------------*/

#include <stdio.h>

typedef struct {
   char plane[30];
   char names[10][40];
   int stacking;
} tiletype;

typedef struct {
   char contact[40];
   char residues[3][40];
   int  stacking;
} stack;

/* Global data structures */

tiletype mtiles[256];
stack mcontacts[128];

/* Compare string against stacking type names	*/
/* Return 1 or 0, depending on whether there	*/
/* was a match or not.				*/

int compare_stack(char *strptr)
{
   int i, j, n;

   while (isspace(*strptr)) strptr++;

   for (i = 0; mtiles[i].plane[0] != '\0'; i++)
      if (mtiles[i].stacking >= 0)
	 for (j = 0; mtiles[i].names[j][0] != '\0'; j++) {
	    n = strlen(mtiles[i].names[j]);
	    if (!strncmp(strptr, mtiles[i].names[j], n))
	       return n;
	 }

   return 0;
}

/* Parse a token, removing any stacking contact type names	*/
/* Return '0' if nothing is left of the token but whitespace;	*/
/* return '1' otherwise.					*/

int parse_token(char *tptr)
{
   int has_something = 0;
   int n, cpending;
   char *sptr = tptr;	 /* Source position */
   char *dptr = tptr;	 /* Destination position */

   /* Pass on any leading whitespace (line indentation) */
   while (isspace(*sptr) && (*sptr != '\0'))
      *dptr++ = *sptr++;

   cpending = 0;
   while (*sptr != '\0') {

      if (*sptr == '(' || *sptr == ')' || *sptr == '\n')
	 *dptr++ = *sptr++;
      else {
         n = compare_stack(sptr);
	 if (n > 0) {
	    sptr += n;
	    /* Skip over any modifiers */
	    if (*sptr == '/') {
               while (*sptr != ',' && *sptr != '\n' && *sptr != '\0'
			&& *sptr != ')' && !isspace(*sptr))
	          sptr++;
	    }
	    if (*sptr == ',')
	       sptr++;
	    else {
	       while (isspace(*sptr))
	          *dptr++ = *sptr++;
	    }
	 }
	 else {
            if (*sptr != '(' && *sptr != ')' &&
			*sptr != ',' && *sptr != '/' &&
			*sptr != '\n' && *sptr != '\0') {
	       if (cpending) *dptr++ = ',';
	       if (!isspace(*sptr)) {
	          has_something = 1;
	       }
	    }
            while (*sptr != '(' && *sptr != ')' && *sptr != ','
			&& *sptr != '\n' && *sptr != '\0') {
	       *dptr++ = *sptr++;
	    }
	    if (*sptr == ',') {
	       cpending = 1;
	       sptr++;
	    }
	    else
	       cpending = 0;
	 }
      }
   }
   *dptr = '\0';
   return has_something;
}

/* Grab the next space-delimited token from the input */
/* Include any whitespace, non-alpha, or newline characters after the token */

char *grab_token(char *tptr, char *destptr)
{
   char *sptr;
   int eol;

   sptr = tptr;
   while (isspace(*tptr) && (*tptr != '\n')) tptr++;
   while (!isspace(*tptr)) tptr++;
   while (isspace(*tptr) && (*tptr != '\0')) tptr++;
   eol = (int)(tptr - sptr);
   strncpy(destptr, sptr, eol);
   *(destptr + eol) = '\0';
   return tptr;
}

/* Main routine */

int main(int argc, char *argv[])
{
   FILE *fi, *fo;
   char *filein, *fileout, *extend;
   char fname[128], linein[1024];
   int state, i, j, n;
   int c, t;		/* total number of contacts and tiles. */
   char cname[40], r1[40], r2[40], r3[40], typelist[400], token[1024];
   char *tptr, *tptr2, *res1, *res2, *res3, *res4, sc;

   if (argc < 2) {
      fprintf(stderr, "Usage: convert <file_in> [<file_out>]\n");
      return 1;
   }
   filein = argv[1];
   if (argc == 3)
      fileout = argv[2];
   else {
      strcpy(fname, filein);
      if ((extend = strstr(fname, "tech27")) != NULL) {
	 *(extend + 4) = '\0';
	 fileout = fname;
      }
      else
	 fileout = NULL;
   }

   fi = fopen(filein, "r");
   if (fi == NULL) {
      fprintf(stderr, "Unable to open %s for input\n", filein);
      return 1;
   }

   if (fileout == NULL)
      fo = stdout;
   else {
      fo = fopen(fileout, "w");
      if (fo == NULL) {
	 fprintf(stderr, "Unable to open %s for output\n", fileout);
	 return 1;
      }
   }

   state = -1;
   t = 0;
   c = 0;
   while (fgets(linein, 1023, fi) != NULL) {

      switch(state) {

	 case -1:
	    /* Step 0:  Find the "tech" line and insert "format 29" */
	    if (!strncmp(linein, "tech", 4))
	       state = 0;
	    fprintf(fo, "%s", linein);
	    fprintf(fo, "  format 29\n");
	    break;
		
	 case 0:
	    /* Step 1:  Find the "types" section */

	    if (!strncmp(linein, "types", 5))
	       state = 1;
	    fprintf(fo, "%s", linein);
	    break;

	 case 1:

	    /* Step 2:  Save the names of all the tile types; don't */
	    /* write out yet.					    */

	    if (sscanf(linein, "%s %s", mtiles[t].plane, typelist) == 2) {
	       tptr = typelist;
	       j = 0;
	       while ((tptr2 = strchr(tptr, ',')) != NULL) {
		  *tptr2 = '\0';
		  strcpy(mtiles[t].names[j], tptr);
		  tptr = tptr2 + 1;
		  j++;
	       }
	       strcpy(mtiles[t].names[j], tptr);
	       mtiles[t].names[j + 1][0] = '\0';
	       mtiles[t].stacking = -1;
	       t++;
	       mtiles[t].plane[0] = '\0';
	    }
	    else if (!strncmp(linein, "end", 3))
	       state = 2;
	    break;

	 case 2:

	    /* Step 3:  Find the "contact" section */
	    if (!strncmp(linein, "contact", 7))
	       state = 3;
	    break;

	 case 3:

	    /* Step 4:  Look for stacked contact types */
	    if ((n = sscanf(linein, "%s %s %s %s", cname, r1, r2, r3)) >= 3) {
	       strcpy(mcontacts[c].contact, cname);
	       strcpy(mcontacts[c].residues[0], r1);
	       strcpy(mcontacts[c].residues[1], r2);
	       if (n == 4) {
	          strcpy(mcontacts[c].residues[2], r3);
		  mcontacts[c].stacking = -1;

		  /* This is a stacking type.  Mark the tile type accordingly */
		  for (i = 0; i < t; i++) {
		     for (j = 0; mtiles[i].names[j][0] != '\0'; j++) {
			if (!strcmp(cname, mtiles[i].names[j])) {
			   mtiles[i].stacking = c;
			   mcontacts[c].stacking = i;
			   break;
			}
		     }
		     if (mcontacts[c].stacking >= 0) break;
		  }
	       }
	       else {
		  mcontacts[c].residues[2][0] = '\0';
		  mcontacts[c].stacking = -1;
	       }
	       c++;
	       mcontacts[c].contact[0] = '\0';
	    }
	    if (!strncmp(linein, "end", 3)) {
	       state = 4;
	       /* Now, we write out all of the modified lines */
	       for (i = 0; i < t; i++) {
		  if (mtiles[i].stacking < 0) {
		     fprintf(fo, "  %s ", mtiles[i].plane);
		     for (j = 0; mtiles[i].names[j][0] != '\0'; j++) {
		        if (j != 0) fprintf(fo, ",");
			fprintf(fo, "%s", mtiles[i].names[j]);
		     }
		     fprintf(fo, "\n");
		  }
	       }
	       fprintf(fo, "end\n\ncontact\n");
	       for (i = 0; i < c; i++) {
		  if (mcontacts[i].stacking < 0) {
		     fprintf(fo, "  %s %s %s\n", mcontacts[i].contact,
				mcontacts[i].residues[0], mcontacts[i].residues[1]);
		  }
	       }
	       for (i = 0; i < c; i++) {
		  if (mcontacts[i].stacking >= 0) {
		     fprintf(fo, "  stackable ");
		     /* Find the contact matching the 1st 2 residues */
		     for (j = 0; j < c; j++) {
			if (mcontacts[j].stacking < 0) {
			   res1 = mcontacts[j].residues[0];
			   res2 = mcontacts[j].residues[1];
			   res3 = mcontacts[i].residues[0];
			   res4 = mcontacts[i].residues[1];
			   if ((!strcmp(res1, res3) && !strcmp(res2, res4)) ||
			       (!strcmp(res1, res4) && !strcmp(res2, res3))) {
			      fprintf(fo, "%s ", mcontacts[j].contact);
			      break;
			   }
			}
		     }
		     /* Find the contact matching the 2nd 2 residues */
		     for (j = 0; j < c; j++) {
			if (mcontacts[j].stacking < 0) {
			   res1 = mcontacts[j].residues[0];
			   res2 = mcontacts[j].residues[1];
			   res3 = mcontacts[i].residues[1];
			   res4 = mcontacts[i].residues[2];
			   if ((!strcmp(res1, res3) && !strcmp(res2, res4)) ||
			       (!strcmp(res1, res4) && !strcmp(res2, res3))) {
			      fprintf(fo, "%s ", mcontacts[j].contact);
			      break;
			   }
			}
		     }
		     for (j = 0; mtiles[mcontacts[i].stacking].names[j][0] != '\0';
				j++) {
			if (j != 0) fprintf(fo, ",");
		        fprintf(fo, "%s", mtiles[mcontacts[i].stacking].names[j]);
		     }
		     fprintf(fo, "\n");
		  }
	       }
	       fprintf(fo, "%s", linein);
	    }
	    break;

	 /* Step 5: Look for stacking type names in each	*/
	 /* line of input, and delete them.			*/

	 case 4:
	    /* Find the "styles" section */
	    if (!strncmp(linein, "styles", 6)) {
	       typelist[0] = '\0';
	       state = 5;
	    }
	    fprintf(fo, "%s", linein);
	    break;

	 case 5:
	
	    /* Handle "styles" section */

	    if (!strncmp(linein, "end", 3)) {
	       state = 6;
	       fprintf(fo, "\n%s", linein);
	    }
	    else {
	       tptr = grab_token(linein, token);
	       if (!compare_stack(token)) {
		  tptr2 = token;
		  while (isspace(*tptr2)) tptr2++;
		  if (!strncmp(tptr2, "styletype", 9)) {
		     fprintf(fo, "%s", linein);
		  }
		  else if ((typelist[0] != '\0') &&
			!strncmp(tptr2, typelist, strlen(typelist))) {
		     grab_token(tptr, token);
		     sscanf(token, "%d", &j);
		     fprintf(fo, " %d", j);
		  }		  
		  else {
		     if (typelist[0] != '\0')
		        fprintf(fo, "\n");
		     sscanf(tptr2, "%s", typelist);
		     tptr2 = linein;
		     while (*tptr2 != '\n') tptr2++;
		     *tptr2 = '\0';
		     fprintf(fo, "%s", linein);
		  }
	       }
	    }
	    break;

	 case 6:
	    /* Find the "compose" section */
	    if (!strncmp(linein, "compose", 7))
	       state = 7;
	    fprintf(fo, "%s", linein);
	    break;

	 case 7:
	    if (!strncmp(linein, "end", 3)) {
	       state = 8;
	       fprintf(fo, "%s", linein);
	    }
	    else {
	       tptr = grab_token(linein, token);
	       n = 0;
	       while (*tptr != '\0') {
		  tptr = grab_token(tptr, token);
	          if (compare_stack(token)) {
		     n = 1;
		     break;
		  }
	       }
	       if (n == 0) {  
	          fprintf(fo, "%s", linein);
		  break;
	       }
	    }
	    break;

	 case 8:
	    /* Find the "cifinput" section */
	    if (!strncmp(linein, "cifinput", 8)) {
	       state = 9;
	       fprintf(fo, "%s", linein);
	       break;
	    }
	    if (!strncmp(linein, "drc", 3)) {
	       state = 10;
	       fprintf(fo, "%s", linein);
	       break;
	    }

	    /* For all other sections, delete any */
	    /* type names that are stacked types.  */
	    tptr = linein;
	    while (*tptr != '\n' && *tptr != '\0') {
	       tptr = grab_token(tptr, token);
	       parse_token(token);
	       fprintf(fo, "%s", token);
	    }
	    if (*tptr == '\n') fprintf(fo, "\n");
	    break;

	 case 9:  /* "cifinput" section handling */
	    if (!strncmp(linein, "end", 3)) {
	       state = 8;
	       fprintf(fo, "%s", linein);
	    }
	    else {
	       tptr = grab_token(linein, cname);
	       tptr2 = linein;
	       while (isspace(*tptr2)) tptr2++;
	       if (*tptr2 == '\n') {
		  fprintf(fo, "%s", *tptr);
		  break;
	       }
	       else if (*tptr2 == '\0') {
		  fprintf(fo, "\n");
		  break;
	       }
	       while (*tptr != '\n' && *tptr != '\0') {
	          tptr = grab_token(tptr, token);
	          n = parse_token(token);
		  if (n == 0) {
		     if (!strncmp(tptr2, "layer", 5)) {
		        while (fgets(linein, 1023, fi) != NULL) {
			   tptr = grab_token(linein, cname);
			   tptr2 = linein;
	       		   while (isspace(*tptr2)) tptr2++;
			   if (!strncmp(tptr2, "calma", 5)) {
	 		      while (*tptr != '\n' && *tptr != '\0')
				 tptr = grab_token(tptr, token);
			      break;
			   }
			}
		     }
		  }
		  else {
	             fprintf(fo, "%s%s", cname, token);
		     cname[0] = '\0';
		  }
	       }
	    }
	    break;

	 case 10:  /* "drc" section handling */
	    if (!strncmp(linein, "end", 3)) {
	       state = 8;
	       fprintf(fo, "%s", linein);
	    }
	    else {
	       tptr = grab_token(linein, cname);
	       tptr2 = linein;
	       while (isspace(*tptr2)) tptr2++;
	       if (*tptr2 == '\n') {
		  fprintf(fo, "%s", *tptr);
		  break;
	       }
	       else if (*tptr2 == '\0') {
		  fprintf(fo, "\n");
		  break;
	       }
	       while (*tptr != '\n' && *tptr != '\0') {
	          tptr = grab_token(tptr, token);
	          n = parse_token(token);
		  if (n == 0) {
		     while (*tptr != '\n') {
			while (*tptr != '\n' && *tptr != '\\') tptr++;
			if (*tptr == '\\') {
			   if (fgets(linein, 1023, fi) == NULL) {
			      fprintf(stderr, "Premature end-of-file in drc section\n");
			      return 1;
			   }
			   tptr = linein;
			}
		     }
		  }
		  else {
	             fprintf(fo, "%s%s", cname, token);
		     cname[0] = '\0';
		  }
	       }
	    }
	    break;
      }
   }

   fclose(fi);
   fclose(fo);
   return 0;
}
