/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                         PROGRAM COMBINE.C                          *****
*****    Copyright (c) 1990 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
***** This  program will combine  the data  from two  pooled  data files *****
***** produced by program RESTSITE into a single pooled data file.  From *****
***** the command line, it reads (1) and (2): the names of the two files *****
***** that  are to be combined, (3): the  new name  for the new OTU, and *****
***** optionally, the letter "A",  which tells  the program to re-number *****
***** all of the subsequent "#.$$$" files.                               *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     atoi             stdlib.h         fclose           stdio.h     *****
*****     feof             stdio.h          fread            stdio.h     *****
*****     fseek            stdio.h          fwrite           stdio.h     *****
*****     printf           stdio.h          qsort            stdlib.h    *****
*****     rewind           stdio.h          strcat           string.h    *****
*****     strcpy           string.h         strlen           string.h    *****
*****     strncpy          string.h         system           stdlib.h    *****
*****     toupper          ctype.h                                       *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <rstypes.h>         /* file with type definitions for this program */
#include <rserrors.h>                           /* file with error messages */
#include <rsfuncs.h>                   /* file with commonly-used functions */
/*****************************************************************************
**                            SYMBOLIC CONSTANTS                            **
*****************************************************************************/
#define VNUM 1.0                          /* version number of this program */
#define YEAR 1990                                         /* copyright year */
/*****************************************************************************
**                           FUNCTION PROTOTYPES                            **
*****************************************************************************/
void combinefiles(char fl1[], char fl2[], char newOTU[]); /* combines files */
int  fs_sort();           /* sorts the fragments/sites into numerical order */
void redolists(char fl1[], char fl2[], char newOTU[]);   /* renumbers files */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
**                                                                          **
** The  main  part of  this  program  prints out  the program  name  and  a **
** copyright message, then reads the command line.  From  the command line, **
** the program reads in the  names of the two pooled data files that are to **
** be  combined  into  one  file, the  new name  of the  combined  OTU, and **
** optionally, "A", which  tells  it  to  re-number  all of the pooled data **
** files it can find.                                                       **
**                                                                          **
** Functions called:                                                        **
** combinefiles   --  combines two pooled data files into one.              **
** redolists      --  renumbers all of the pooled data files.               **
*****************************************************************************/
void main(int argc, char *argv[])
{
  char fl1[20];            /* name of file that data is to be combined INTO */
  char fl2[20];               /* name of file that data is to be taken from */
  char newOTU[KEYLEN];                    /* name for the new, combined OTU */

  printf("\r\nProgram COMBINE v%3.1f\r\n",VNUM);       /* copyright message */
  printf("A program for combining two pooled data files into one.\r\n");
  printf("Copyright (c) %d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  if (argc < 4) {              /* error if not enough items on command line */
    printf("\a\r\nError 101:  Two pooled data files must be specified.  ");
    printf("The proper format of\r\n            the command line should be:");
    printf("\r\n\n            COMBINE FILENAME#1 FILENAME#2 NEWOTUNAME\r\n\n");
    printf("            All of these items are necessary.\r\n");
    exit(0);
  }
  else {                           /* get names of file #1, #2, and new OTU */
    strcpy(fl1,argv[1]);                         /* read in name of file #1 */
    strcpy(fl2,argv[2]);                         /* read in name of file #2 */
    strcpy(newOTU,argv[3]);                         /* read in new OTU name */
  }
  combinefiles(fl1,fl2,newOTU);                /* combine file #1 & file #2 */
  if (argc == 5)                           /* renumber all subsequent files */
    if (toupper(argv[4][0]) == 'A') redolists(fl1,fl2,newOTU);
}                                                    /* END OF MAIN PROGRAM */
/*****************************************************************************
**                                                                          **
**                         FUNCTION COMBINEFILES                            **
**                                                                          **
** This function is called  from FUNCTION MAIN, and  adds the data found in **
** the second  pooled data file into  the first pooled data file.  It first **
** opens  the two  files, and  reads one  record from  each.  The number of **
** individuals  in record  #2 is added  into  that same field in record #1, **
** then  all  of  the  different  fragments  (or sites) are compared.  Each **
** fragment in  record #2  is  compared to every fragment in record #1.  If **
** the  same fragment is found in both records, then the number of times it **
** occurred in record #2 is  added  into the number of times it occurred in **
** #1.  If the fragment did not occur in #1 at all, then it is added to the **
** list of fragments in record #1, and the  number of occurrences is copied **
** as well.  For example:                                                   **
**                             Record #1    Record #2    New Record #1      **
**          # Individuals:         5          5                10           **
**          Fragments &       2000bp   5   2000bp   5      2000bp   10      **
**              number of     1570bp   2                   1570bp    2      **
**             times each     1200bp   3                   1200bp    3      **
**                occured:    1150bp   5   1150bp   5                       **
**                                                                          **
** After  all  of the  fragments/sites  have  been  combined  together, the **
** fragments/sites in the new  record #1 (now  holding  the data  from both **
** records) are  re-sorted  into ascending order.  Then this  new record is **
** written  over the old version of record #1.  Then, the function reads in **
** the second record in  files #1 and #2, and the whole process is repeated **
** until all data from file #2 has been added to file #1.                   **
**                                                                          **
** Functions called:                                                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opnbrwfl       --  opens a binary file for reading and writing.          **
** fs_sort        --  sorts all of the sites/fragments in a data record.    **
** rserror303     --  error message if fseek error.                         **
** rserror311     --  error message if error reading datafile.              **
** rserror321     --  error message if error writing to datafile.           **
*****************************************************************************/
void combinefiles(char fl1[], char fl2[], char newOTU[])
{
  FILE *fp1, *fp2;                    /* file pointer for file #1 & file #2 */
  pooldat p1, p2;                   /* data record from file #1 and file #2 */
  char found = 'F';                         /* boolean if sites/frags match */
  long ps = -sizeof(pooldat);        /* used by FSEEK to back up one record */
  register int i,j;                               /* loop control variables */

  printf("\nCombining files %s and %s ",fl1,fl2);
  printf("into the new OTU %s\n",newOTU);                /* message to user */

  fp1 = opnbrwfl(fp1,fl1);
  fp2 = opnbrdfl(fp2,fl2);

  rewind(fp1);
  rewind(fp2);
  while ((!feof(fp1)) && (!feof(fp2))) {           /* go through both files */
    if ((fseek(fp1,(long)0,SEEK_CUR)) != NULL) rserror303(fl1);
    if ((fseek(fp2,(long)0,SEEK_CUR)) != NULL) rserror303(fl2);
    if (fread(&p1,sizeof(pooldat),1,fp1) != NULL) {        /* get record #1 */
      if (fread(&p2,sizeof(pooldat),1,fp2) != NULL) {      /* get record #2 */
        strcpy(p1.otu,newOTU);             /* replace old OTU name with new */
        p1.ni += p2.ni;                    /* combine number of individuals */
        for (i=0; ((i<MAXFS) && (p2.fs[i].f!=-5)); ++i) {  /* read #2 sites */
          found = 'F';                                /* set match to false */
          for (j=0; ((j<MAXFS) && (p1.fs[j].f!=-5)); ++j) {    /* read #1's */
            if (p1.fs[j].f == p2.fs[i].f) {        /* if the sites match... */
              p1.fs[j].no += p2.fs[i].no;  /* combine number of individuals */
              found = 'T';                                 /* match is true */
              break;                                     /* get out of loop */
            }
          }                                /* done going through #1's sites */
          if (found == 'F') {                          /* if no match found */
            p1.fs[j].f  = p2.fs[i].f;                 /* copy site/fragment */
            p1.fs[j].no = p2.fs[i].no;        /* copy number of occurrences */
          }
        }                                  /* done going through #2's sites */
        qsort(&p1.fs[0],MAXFS,sizeof(fragsite),fs_sort);    /* resort sites */
        if ((fseek(fp1,ps,SEEK_CUR)) != NULL) rserror303(fl1);   /* back up */
        if (fwrite(&p1,sizeof(pooldat),1,fp1) == NULL) rserror321(fl1);
      }                                              /* done with record #2 */
    }                                                /* done with record #1 */
  }                                                 /* done with both files */
  rewind(fp1);                                       /* close up both files */
  rewind(fp2);
  fclose(fp1);
  fclose(fp2);
}                                           /* END OF FUNCTION COMBINEFILES */
/*****************************************************************************
**                                                                          **
**                           FUNCTION REDOLISTS                             **
**                                                                          **
**   This  function is  called from FUNCTION MAIN.  It does two things: (1) **
** it alters the file "00.$$$" to  reflect the  new  number of taxa and the **
** taxa names,  and  (2) it  re-numbers  all of the  pooled  data  files to **
** reflect the fact that one of the taxa has been eliminated.               **
**   To change "00.$$$", it  receives the names of files #1 and #2, and the **
** new  OTU name.  The first thing it does is open the file "00.$$$", which **
** holds all of the memory lists, and creates an empty file "00.BK!".  Then **
** it reads the number of taxa out of 00.$$$, subtracts one, then writes it **
** to "00.BK!".  It  then  copies the number of treatments and r-classes to **
** "00.BK!".  The  list of OTUs is  then copied, during  which the OTU name **
** corresonding to file #1 is changed to the new OTU name, and the OTU name **
** corresonding to  file  #2 is  eliminated.  All others are copied as they **
** appear.  After the OTUlist is copied to "00.BK!", the treatment list and **
** r-class list  are  copied as is.  "00.$$$" is then deleted, and "00.BK!" **
** is renamed to "00.$$$".                                                  **
**   After this is done, the program renumbers  the pooled  data files.  It **
** starts at file #2, and deletes it.  It then takes the next file, renames **
** it to the file #2 name, then  renames the next file to the previous file **
** name, etc., until  all of  the files have been renamed.  It then returns **
** to FUNCTION MAIN.                                                        **
**                                                                          **
** Functions called:                                                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opnbwtfl       --  opens a binary file for writing.                      **
** rserror311     --  error message if error reading datafile.              **
** rserror321     --  error message if error writing to datafile.           **
*****************************************************************************/
void redolists(char fl1[], char fl2[], char newOTU[])
{
  FILE *fp1, *fp2;                    /* file pointer for 00.$$$ and 00.BK! */
  char flnow[FILELEN];                              /* name of current file */
  char flprev[FILELEN];                            /* name of previous file */
  char *fnow;                            /* pointer to name of current file */
  char *fprev;                          /* pointer to name of previous file */
  char comm1[100];                     /* string for comunicating to system */
  char comm2[100];                     /* string for comunicating to system */
  char *c1, *c2;                              /* pointer to comm1 and comm2 */
  int ntx = 0;                                            /* number of taxa */
  int ntr = 0;                                      /* number of treatments */
  int nrv = 0;                                       /* number of r-classes */
  otuname on;                                                /* name of OTU */
  treatment trt;                                             /* a treatment */
  float rv;                                                   /* an r-class */
  int n = 0;                                 /* flnow's slot in the OTULIST */
  int p = 0;                                /* flprev's slot in the OTULIST */
  int i,j;                                        /* loop control variables */

  printf("Also renaming all subsequent files to fill the space left ");
  printf("by %s,\n",fl2);                                /* message to user */
  printf("and altering 00.$$$ to reflect the loss of one OTU.\n");

  strcpy(comm1,"DEL ");               /* build beginning of system commands */
  strcpy(comm2,"RENAME ");

  fp1 = opnbrdfl(fp1,"00.$$$");                  /* open 00.$$$ for reading */
  fp2 = opnbwtfl(fp2,"00.BK!");                  /* open 00.BK! for writing */

  /********** REWRITE FILE "00.$$$", WHICH HOLDS THE MEMORY LISTS ***********/
  /* read in number of taxa, treatments, and r-classes */
  if (fread(&ntx,sizeof(int),1,fp1) == NULL) rserror311("00.$$$");
  if (fread(&ntr,sizeof(int),1,fp1) == NULL) rserror311("00.$$$");
  if (fread(&nrv,sizeof(int),1,fp1) == NULL) rserror311("00.$$$");
  ntx--;                                    /* reduce number of taxa by one */

  /* write amended number of taxa, treatments, and r-classes */
  if (fwrite(&ntx,sizeof(int),1,fp2) == NULL) rserror321("00.$$$");
  if (fwrite(&ntr,sizeof(int),1,fp2) == NULL) rserror321("00.$$$");
  if (fwrite(&nrv,sizeof(int),1,fp2) == NULL) rserror321("00.$$$");
  ntx++;                                         /* set number of taxa back */

  flnow[0] = '\0';                 /* GET SLOT NUMBER IN OTULIST OF FILE #1 */
  strncpy(flnow,fl1,strlen(fl1)-4);        /* copy number part of file name */
  flnow[strlen(fl1)-4] = '\0';
  n = atoi(flnow);                              /* convert it to an integer */
  strcpy(flnow,fl2);                     /* re-copy file #2 name into flnow */

  flprev[0] = '\0';                /* GET SLOT NUMBER IN OTULIST OF FILE #2 */
  strncpy(flprev,fl2,strlen(fl2)-4);       /* copy number part of file name */
  flprev[strlen(fl2)-4] = '\0';
  p = atoi(flprev);                             /* convert it to an integer */

  for (i=0; i<ntx; ++i) {               /* go through the original OTU list */
    if (fread(on,sizeof(otuname),1,fp1) == NULL) rserror311("00.$$$");
    if (i == n) strcpy(on,newOTU); /* replace old OTU name with newOTU name */
    if (i != p)      /* copy all other OTU names except the one for file #2 */
      if (fwrite(on,sizeof(otuname),1,fp2) == NULL) rserror311("00.$$$");
  }
  for (i=0; i<ntr; ++i) {             /* write list of treatments to 00.BK! */
    if (fread(&trt,sizeof(treatment),1,fp1) == NULL) rserror311("00.$$$");
    if (fwrite(&trt,sizeof(treatment),1,fp2) == NULL) rserror321("00.$$$");
  }
  for (i=0; i<nrv; ++i) {              /* write list of r-classes to 00.BK! */
    if (fread(&rv,sizeof(float),1,fp1) == NULL) rserror311("00.$$$");
    if (fwrite(&rv,sizeof(float),1,fp2) == NULL) rserror321("00.$$$");
  }
  rewind(fp1);                      /* rewind file pointers and close files */
  rewind(fp2);
  fclose(fp1);
  fclose(fp2);
  system("DEL 00.$$$");                     /* delete old version of 00.$$$ */
  system("RENAME 00.BK! 00.$$$");             /* rename new one to old name */

  /****************** RENAME ALL OF THE POOLED DATA FILES *******************/
  c1 = strcat(comm1,fl2);                  /* create command line "DEL FL2" */
  system(comm1);                                          /* delete file #2 */

  /* change all other OTU names & pooled files, starting at file #2 */
  for (i=p+1; i<ntx; ++i) {                /* go through the remaining OTUs */
    strcpy(flprev,flnow);        /* file from last run is now previous file */
    flnow[0] = '\0';                                    /* initialize flnow */
    fnow = inttoalph(i,flnow);           /* convert i to a character string */
    strcpy(flnow,fnow);                                   /* put into flnow */
    fnow = strcat(flnow,".$$$");                   /* add on file extension */
    strcpy(comm1,comm2);                                       /* "RENAME " */
    c1 = strcat(comm1,flnow);                             /* "RENAME FLNOW" */
    c1 = strcat(comm1," ");                              /* "RENAME FLNOW " */
    c1 = strcat(comm1,flprev);                     /* "RENAME FLNOW FLPREV" */
    system(comm1);                                                 /* do it */
  }                          /* have now renamed all of the remaining files */
}                                              /* END OF FUNCTION REDOLISTS */
/*****************************************************************************
**                                                                          **
**                             FUNCTION FS_SORT                             **
**                                                                          **
** This  function  is  called  from  FUNCTION  COMBINEFILES.  It is used in **
** conjunction with the QSORT routine to sort the  sites/fragments within a **
** pooldat data record.  All of initialization  numbers, -5, end  up at the **
** end.  Nulls  (-1),  zeros, and positive numbers are sorted to the front, **
** smallest to largest.                                                     **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int fs_sort(x,y)
fragsite *x, *y;
{
  if ((x->f) == (y->f)) return(0);
  if (y->f == -5) return(-1);
  if (x->f == -5) return(1);
  return ((x->f) - (y->f));
}                                                /* END OF FUNCTION FS_SORT */
/****************************************************************************/

