/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                       PROGRAM 01_TO_RS v1.0                        *****
*****    A program for converting 0/1 matrix data to RESTSITE format.    *****
*****    Copyright (c) 1991 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
***** This  program  converts  data  from  a  0/1  matrix (used by PAUP, *****
***** McClade, and Principal Component  Analysis programs) into a format *****
***** usable by RESTSITE.  From the  command  line, it reads in the name *****
***** of an  ASCII text file containing the square 0/1 data  matrix, the *****
***** name of the  output  file (for  the RESTSITE data), the name of an *****
***** ASCII text file with the names of the OTUs and characters found in *****
***** the 0/1 matrix, and the  matrix type.  For more information on the *****
***** formats of these files, see the program documentation.             *****
*****                                                                    *****
***** PAUP by  David  Swofford,  Copyright (c) 1989  by Illinois Natural *****
***** History Survey.  All Rights Reserved.                              *****
***** McCLADE Copyright (c) 1989 by  Wayne  Maddison and David Maddison. *****
***** All Rights Reserved.                                               *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     fclose           stdio.h          feof             stdio.h     *****
*****     fgetc            stdio.h          fgets            stdio.h     *****
*****     fopen            stdio.h          fprintf          stdio.h     *****
*****     fread            stdio.h          fseek            stdio.h     *****
*****     fwrite           stdio.h          printf           stdio.h     *****
*****     rewind           stdio.h          sscanf           stdio.h     *****
*****     strcpy           string.h         strcmp           string.h    *****
*****     system           stdlib.h                                      *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <rstypes.h>        /* file containing type definitions & constants */
#include <rserrors.h>                     /* file containing error messages */
#include <rsfuncs.h>        /* file containing some commonly-used functions */
/*****************************************************************************
**                  TYPE DEFINITIONS & SYMBOLIC CONSTANTS                   **
*****************************************************************************/
#define VNUM 1.0                          /* version number of this program */
#define YEAR 1991                /* calendar year this version was released */
#define goodx ((x=='0')||(x=='1')||(x=='-')||(x=='X')||(x=='x')||(x=='?'))
#define diffprb (strcmp(p1.prb,p2.prb) != 0) /* are probes names different? */
#define diffenz (strcmp(p1.enz,p2.enz) != 0)      /* are enzymes different? */
#define diffrv  (p1.rv != p2.rv)                 /* are r-values different? */
#define diffprobe (diffprb || diffenz || diffrv)     /* are PECs different? */
typedef struct {          /* structure to hold informaton on one individual */
  otuname idnum;                                   /* identification number */
  otuname K1;                                                     /* key #1 */
  otuname K2;                                                     /* key #2 */
  otuname K3;                                                     /* key #3 */
  otuname K4;                                                     /* key #3 */
} indinfo;
typedef struct {       /* holds information on one probr/enzyme combination */
  char prb[PRBLEN];                                           /* probe name */
  char enz[ENZLEN];                                          /* enzyme name */
  float rv;                                     /* size of restriction site */
  float fs;                                                /* fragment/site */
} prbinfo;
/*****************************************************************************
**                          FUNCTION PROTOTYPES                             **
*****************************************************************************/
void makelists(char ipfl[]);     /* converts ind&prb info to 2 binary files */
void flipmat(char mtfl[], char mattype);        /* flips matrix type 2 to 1 */
void convert(char rsfl[]);     /* converts type 1 matrix to RESTSITE format */
/*****************************************************************************
**                           GLOBAL VARIABLES                               **
*****************************************************************************/
int ni = 0;                                               /* number of OTUs */
int nc = 0;                                         /* number of characters */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
***** This  section of the program  prints  out the  program name  and a *****
***** copyright  message, then  error-checks the command line.  Then the *****
***** rest of the program is executed.                                   *****
*****                                                                    *****
***** Functions called:                                                  *****
***** makelists      -- takes the  ASCII file with the names of the OTUs *****
*****                   and the probes, and  converts the information to *****
*****                   two binary files.                                *****
***** flipmat        -- flips 0/1 matrix from type 2 to type 1 if needed.*****
***** convert        -- converts type 1 matrix to RESTSITE format.       *****
*****************************************************************************/
void main(int argc,char *argv[])
{
  printf("\r\nProgram 01_TO_RS v%3.1f\r\n",VNUM);        /* message to user */
  printf("A program for converting data in 0/1 matrices to RESTSITE format.");
  printf("\r\nCopyright (c) %d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  if (argc < 5) {                       /* error if command line incomplete */
    printf("\a\r\nError 101:  The proper format of the command line should ");
    printf("be:\r\n\n            01_TO_RS 01FILE RSFILE OTUFILE ");
    printf("MATRIXTYPE\r\n\n            All of these items are necessary. ");
    printf("  Refer to the\r\n            documentation if you require ");
    printf("more information.\r\n");
    exit(0);
  }

  makelists(argv[3]);                /* converts OTU & probe info to binary */
  flipmat(argv[1],argv[4][0]);             /* flips type 2 matrix to type 1 */

  convert(argv[2]);                 /* converts 0/1 data to RESTSITE format */
  system("DEL !@!.0");                            /* delete temporary files */
  system("DEL !@!.1");
  system("DEL !@!.2");
}                                                   /* END OF FUNCTION MAIN */
/*****************************************************************************
**                                                                          **
**                           FUNCTION MAKELISTS                             **
**                                                                          **
** This function is called from FUNCTION MAIN, and reads in the list of OTU **
** and probe names.  First, the file is opened, and the first line is read, **
** and  the number of individuals and the number of characters are read in. **
** Then, information on that  many individuals is  read, and  written  to a **
** temporary file.  The  character  information is sent to a separate file. **
** Control then passes back to FUNCTION MAIN.                               **
**                                                                          **
** Functions called:                                                        **
** opntrdfl       --  opens a text file for reading.                        **
** opnbwtfl       --  opens a binary file for writing.                      **
** rserror311     --  error message if error reading datafile.              **
** rserror323     --  error message if error writing to temp file.          **
*****************************************************************************/
void makelists(char ipfl[])
{
  char tempstr[MAXSTRLEN];                              /* temporary string */
  char tn[MAXSTRLEN];                                   /* temporary string */
  FILE *fpi, *fpd;                                         /* file pointers */
  register int i;                                  /* loop control variable */
  indinfo in;            /* structure to hold information on one individual */
  prbinfo pi;                 /* structure to hold information on one probe */

  printf("\r\nRecovering list of OTUs and characters");

  fpi = opntrdfl(fpi,ipfl);                  /* open ASCII file for reading */

  if ((fgets(tempstr,MAXSTRLEN,fpi)) == NULL) rserror311(ipfl);
  sscanf(tempstr,"%d %d",&ni,&nc);            /* get # individuals & probes */

  fpd = opnbwtfl(fpd,"!@!.1");      /* open binary file for individual data */
  for(i=0; i<ni; ++i) {                           /* write the info to file */
    in.idnum[0] = '\0';
    in.K1[0] = '\0';
    in.K2[0] = '\0';
    in.K3[0] = '\0';
    in.K4[0] = '\0';
    if ((fgets(tempstr,MAXSTRLEN,fpi)) == NULL) rserror311(ipfl);
    sscanf(tempstr,"%s %s %s %s %s ",in.idnum,in.K1,in.K2,in.K3,in.K4);
    if (fwrite(&in,sizeof(indinfo),1,fpd) == NULL) rserror323();
  }
  rewind(fpd);                         /* close binary individual info file */
  fclose(fpd);

  fpd = opnbwtfl(fpd,"!@!.2");           /* open binary file for probe data */
  for (i=0; i<nc; ++i) {                             /* write info the file */
    if ((fgets(tempstr,MAXSTRLEN,fpi)) == NULL) rserror311(ipfl);
    sscanf(tempstr,"%s %s %f %f",pi.prb,pi.enz,&pi.rv,&pi.fs);
    if (fwrite(&pi,sizeof(prbinfo),1,fpd) == NULL) rserror323();
  }
  rewind(fpd);                                               /* close files */
  fclose(fpd);
  rewind(fpi);
  fclose(fpi);
}                                              /* END OF FUNCTION MAKELISTS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION FLIPMAT                              **
**                                                                          **
** This function is  called  from  FUNCTION  MAIN, and  flips  the 0/1 data **
** matrix  from  type 2 to type 1, if  necessary.  In a type 1 matrix, each **
** row  represents one individual, and the columns are the characters found **
** in   that  individual.  In  a  type  2  matrix,  the   columns  are  the **
** individuals,  and the rows are the characters.  First, the 0/1 matrix is **
** read into a binary  file.  This  removes any spaces between the numbers, **
** and any  newline or carriage return  characters.  Then, if the matrix is **
** type 2, it  is  read  (during  which  it is "flipped") into a new binary **
** file.  Control then passes back to FUNCTION MAIN.                        **
**                                                                          **
** Functions called:                                                        **
** opntrdfl       --  opens a text file for reading.                        **
** opnbwtfl       --  opens a binary file for writing.                      **
** opnbrdfl       --  opens a binary file for reading.                      **
** rserror303     --  error message if FSEEK error.                         **
** rserror311     --  error message if error reading datafile.              **
** rserror313     --  error message if error reading temp file.             **
** rserror323     --  error message if error writing to temp file.          **
*****************************************************************************/
void flipmat(char mtfl[], char mattype)
{
  register int i,c;                               /* loop control variables */
  FILE *fp0,*fpd;                                          /* file pointers */
  long p;                                          /* long number for FSEEK */
  char x;                                          /* character (0's & 1's) */

  fp0 = opntrdfl(fp0,mtfl);                         /* open 0/1 matrix file */
  if (mattype == '1') fpd = opnbwtfl(fpd,"!@!.0");    /* create binary file */
  else fpd = opnbwtfl(fpd,"!@!.3");

  while ((x=fgetc(fp0)) != EOF)             /* convert 0/1 matrix to binary */
    if goodx if (fwrite(&x,sizeof(char),1,fpd) == NULL) rserror323();
  if (!feof(fp0)) rserror311(mtfl);                   /* error reading file */

  rewind(fp0);                                       /* close up both files */
  fclose(fp0);
  rewind(fpd);
  fclose(fpd);

  if (mattype == '2') {                     /* if matrix is type 2, flip it */
    fpd = opnbrdfl(fpd,"!@!.3");                        /* open binary file */
    fp0 = opnbwtfl(fp0,"!@!.0");          /* create file for flipped matrix */

    for (i=0; i<ni; ++i)         /* copy chars in correct order to new file */
      for (c=0; c<nc; ++c) {
        p = sizeof(char)*(c*ni+i);
        if ((fseek(fpd,p,SEEK_SET)) != NULL) rserror303(mtfl);
        if (fread(&x,sizeof(char),1,fpd) == NULL) rserror313();
        if (fwrite(&x,sizeof(char),1,fp0) == NULL) rserror323();
      }

    rewind(fpd);                                          /* close up files */
    fclose(fpd);
    rewind(fp0);
    fclose(fp0);
    system("DEL !@!.3");                                /* delete temp file */
  }
}                                                /* END OF FUNCTION FLIPMAT */
/*****************************************************************************
**                                                                          **
**                            FUNCTION CONVERT                              **
**                                                                          **
** This function is called from FUNCTION MAIN, and reads a 0/1 data matrix, **
** and writes the  data  in  RESTSITE  form to an ASCII text file.  Control **
** then passes back to FUNCTION MAIN.                                       **
**                                                                          **
** Functions called:                                                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opntwtfl       --  opens a text file for writing.                        **
** rserror303     --  error message if FSEEK error.                         **
** rserror313     --  error message if error reading temp file.             **
*****************************************************************************/
void convert(char rsfl[])
{
  FILE *fpi,*fpp,*fp0,*fpr;                                /* file pointers */
  register int i,c;                               /* loop control variables */
  prbinfo p1,p2;                                     /* hold info on probes */
  indinfo i1;                               /* holds info on one individual */
  int ip;                                                  /* on/off switch */
  char x;                                                         /* 0 or 1 */

  printf("\r\n\nConverting zeros and ones to site/fragment data");

  fpi = opnbrdfl(fpi,"!@!.1");                   /* open file with OTU info */
  fpp = opnbrdfl(fpp,"!@!.2");             /* open file with character info */
  fp0 = opnbrdfl(fp0,"!@!.0");                 /* open file with 0/1 matrix */
  fpr = opntwtfl(fpr,rsfl);                             /* open output file */

  for (i=0; i<ni; ++i) {                              /* print for each OTU */
    if (fread(&i1,sizeof(indinfo),1,fpi) == NULL) rserror313();  /* get OTU */
    p1.prb[0] = '\0';                             /* initialize "old" probe */
    p1.enz[0] = '\0';
    p1.rv     =   0;
    p1.fs     =   0;
    rewind(fpp);                           /* go to beginning of probe file */
    for (c=0; c<nc; ++c) {                             /* go through probes */
      printf(".");
      if (fread(&p2,sizeof(prbinfo),1,fpp) ==NULL) rserror313(); /* get PEC */
      if (fread(&x,sizeof(char),1,fp0) == NULL) rserror313();    /* get 0/1 */
/*      if (strcmp(p1.enz,p2.enz) != 0) printf("strcmp != 0\n"); */
      if (diffprobe) ip = 0;
      if (x == '1') {                                          /* if it's 1 */
/*      if ((diffprobe) || (ip == 0)) {   */     /* print if new probe or ind */
        if (diffprb || diffenz || diffrv || (ip == 0)) {
/*          printf("diffprobe!\n"); */
	  if ((i != 0) || (c != 0)) fprintf(fpr,"\n");
	  fprintf(fpr,"%s %s %s %s %s ",i1.idnum,i1.K1,i1.K2,i1.K3,i1.K4);
          fprintf(fpr,"%s %s %.2f ",p2.prb,p2.enz,p2.rv);
          ip = 1;                            /* switch off printing for ind */
        }
        fprintf(fpr,"%.2f ",p2.fs);                  /* print out frag/site */
      }
      strcpy(p1.prb,p2.prb);             /* copy "new" probe to "old" probe */
      strcpy(p1.enz,p2.enz);
      p1.rv = p2.rv;
      p1.fs = p2.fs;
    }
  }
  fprintf(fpr,"\n");

  rewind(fp0);                                        /* close up all files */
  rewind(fpi);
  rewind(fpp);
  rewind(fpr);
  fclose(fp0);
  fclose(fpi);
  fclose(fpp);
  fclose(fpr);
}                                                /* END OF FUNCTION CONVERT */
/****************************************************************************/
