/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                         PROGRAM REMDATA.C                          *****
***** A  program  for  removing  all of the  sites/fragments  found in a *****
*****          pooled data file from another pooled data file.           *****
*****    Copyright (c) 1990 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
***** This  program  uses  pooled data  files  created  by the  RESTSITE *****
***** program (these files can  have any  kind of name; they do not have *****
***** to be of the "#.$$$" format, but they must  have been  created  by *****
***** RESTSITE).  REMDATA  reads  in  the names of two  such files, then *****
***** removes all of the  sites/fragments  found  in the first file from *****
***** the  second file.  The  first file is not  altered, but the second *****
***** one is.  Whenever a  site/fragment is  found  in a data  record in *****
***** file #1,  ALL  occurrences of that same  site/fragment are removed *****
***** from  that same  record  in file #2.  For this  reason, the second *****
***** file cannot  be used in subsequent  runs of  RESTSITE.EXE, and the *****
***** number of individuals is set to zero to ensure that it isn't.      *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     fclose           stdio.h          feof             stdio.h     *****
*****     fread            stdio.h          fseek            stdio.h     *****
*****     fwrite           stdio.h          printf           stdio.h     *****
*****     qsort            stdlib.h         rewind           stdio.h     *****
*****     strcpy           string.h                                      *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <rstypes.h>         /* file with type definitions for this program */
#include <rserrors.h>                           /* file with error messages */
#include <rsfuncs.h>                   /* file with commonly-used functions */
/*****************************************************************************
**                            SYMBOLIC CONSTANTS                            **
*****************************************************************************/
#define VNUM 1.0                          /* version number of this program */
#define YEAR 1990                                      /* year of copyright */
/*****************************************************************************
**                           FUNCTION PROTOTYPES                            **
*****************************************************************************/
void removedata(char fl1[], char fl2[]);        /* removes data from a file */
int fs_sort();            /* sorts the sites/fragments within a data record */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
**                                                                          **
** This function  checks the  command line, and if two  file  names are not **
** found, it prints an  error message, and exits.  If two files are listed, **
** then it calls on FUNCTION REMOVEDATA to do precisely that.               **
**                                                                          **
** Functions called:                                                        **
** removedata     --  removes  the  data found in one pooled data file from **
**                    another pooled data file.                             **
*****************************************************************************/
void main(int argc, char *argv[])
{
  char fl1[FILELEN];            /* name of file that serves as a "template" */
  char fl2[FILELEN];        /* name of file from which data will be removed */

  printf("\r\nProgram REMDATA v%3.1f\r\n",VNUM);       /* copyright message */
  printf("A program for removing the data found in one pooled data file\r\n");
  printf("from another pooled data file.\r\n");
  printf("Copyright (c) %d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  if (argc < 3) {              /* error if not enough items on command line */
    printf("\a\r\nError 101:  Two pooled data files must be specified.  ");
    printf("The proper format of\r\n            the command line should be:");
    printf("\r\n\n            REMDATA FILENAME#1 FILENAME#2\r\n\n");
    printf("            All of these items are necessary.\r\n");
    exit(0);
  }
  else {                           /* get names of file #1, #2, and new OTU */
    strcpy(fl1,argv[1]);                         /* read in name of file #1 */
    strcpy(fl2,argv[2]);                         /* read in name of file #2 */
    removedata(fl1,fl2);                     /* remove data in fl1 from fl1 */
  }
}                                                    /* END OF MAIN PROGRAM */
/*****************************************************************************
**                                                                          **
**                          FUNCTION REMOVEDATA                             **
**                                                                          **
** This function is  called  from  FUNCTION MAIN.  It receives the names of **
** file #1 and file #2, and opens them.  It  then  reads a data record from **
** each one.  The  number of individuals is set to zero to insure that this **
** pooled  data file  cannot be  used by the RESTSITE  program later.  Each **
** site/fragment from  the  record  from  file  #1  is  compared  to  every **
** site/fragment in  the record  from  file #2.  If  the same site/fragment **
** exists in both records, it is  completely  eliminated  from  the  second **
** record.  That way, every  site/fragment found in the record from file #1 **
** is removed from the file #2 record.  The  sites/fragments in the file #2 **
** record  are re-sorted, and the record is written over the old version of **
** that same record.  Then the second record is read from each file, and so **
** on.  In this way, all of the sites/fragments that occurred in OTU #1 are **
** removed from OTU #2.                                                     **
**                                                                          **
** Functions called:                                                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opnbrwfl       --  opens a binary file for reading and writing.          **
** rserror303     --  error message if fseek error.                         **
** rserror310     --  error message if error opening a file for reading.    **
** rserror320     --  error message if error opening a file for writing.    **
** rserror321     --  error message if error writing to datafile.           **
*****************************************************************************/
void removedata(char fl1[], char fl2[])
{
  FILE *fp1, *fp2;                 /* file pointers for file #1 and file #2 */
  pooldat p1,p2;                         /* record from file #1 and file #2 */
  long ps = -sizeof(pooldat);        /* used by FSEEK to back up one record */
  register int i,j;                               /* loop control variables */

  /* message to user */
  printf("\nRemoving data found in file %s from file %s\r\n",fl1,fl2);

  fp1 = opnbrdfl(fp1,fl1);                      /* open file #1 for reading */
  fp2 = opnbrwfl(fp2,fl2);          /* open file #1 for reading and writing */

  while (!feof(fp1)) {                             /* go through both files */
    if (fread(&p1,sizeof(pooldat),1,fp1) != NULL) {       /* read record #1 */
      if (fread(&p2,sizeof(pooldat),1,fp2) != NULL) {     /* read record #2 */
        p2.ni = 0;                     /* set number of individuals to zero */
        for (i=0; ((i<MAXFS) && (p1.fs[i].f!=-5)); ++i) {  /* go through #1 */
          for (j=0; j<MAXFS; ++j) {                        /* go through #2 */
            if (p1.fs[i].f == p2.fs[j].f) {  /* if s/f #1 matches s/f #2... */
              p2.fs[j].f  = -5;                         /* delete site/frag */
              p2.fs[j].no =  0;                /* set # occurrences to zero */
              break;                                     /* get out of loop */
            }
          }
        }
        qsort(&p2.fs[0],MAXFS,sizeof(fragsite),fs_sort);     /* re-sort s/f */
        if ((fseek(fp2,ps,SEEK_CUR)) != NULL) rserror303(fl2);   /* back up */
        if (fwrite(&p2,sizeof(pooldat),1,fp2) == NULL) rserror321(fl2);
        if ((fseek(fp2,(long)0,SEEK_CUR)) != NULL) rserror303(fl2);
      }                                        /* go on to next two records */
    }
  }
  rewind(fp1);                                            /* close up files */
  rewind(fp2);
  fclose(fp1);
  fclose(fp2);
}                                             /* END OF FUNCTION REMOVEDATA */
/*****************************************************************************
**                                                                          **
**                             FUNCTION FS_SORT                             **
**                                                                          **
** This  function  is  called  from  FUNCTION  REMOVEDATA.  It  is  used in **
** conjunction with the QSORT routine to sort the  sites/fragments within a **
** pooldat data record.  All of initialization  numbers, -5, end  up at the **
** end.  Nulls  (-1),  zeros, and positive numbers are sorted to the front, **
** smallest to largest.                                                     **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int fs_sort(x,y)
fragsite *x, *y;
{
  if ((x->f) == (y->f)) return(0);
  if (y->f == -5) return(-1);
  if (x->f == -5) return(1);
  return ((x->f) - (y->f));
}                                                /* END OF FUNCTION FS_SORT */
/****************************************************************************/
