/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                        PROGRAM RESTSITE v1.1                       *****
*****    Copyright (c) 1990 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
*****    This  program  takes data  file(s) created by program READTEXT, *****
***** and  performs  phylogenetic  analyses.   If  the data  consists of *****
***** mapped  restriction  sites,  then the  distance and standard error *****
***** between  OTUs are calculated by the Jukes and Cantor (1969) method *****
***** and  the  Nei  and  Li  (1979)  method.  If  the data  consists of *****
***** unmapped restriction fragments, then equations  5.53  through 5.55 *****
***** of Nei (1987) are used.  The  analyses  are  described in  greater *****
***** detail throughout the program.                                     *****
*****                                                                    *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     asctime          time.h           fabs             math.h      *****
*****     fclose           stdio.h          feof             stdio.h     *****
*****     fgets            stdio.h          floor            math.h      *****
*****     fopen            stdio.h          fprintf          stdio.h     *****
*****     fread            stdio.h          free             stdlib.h    *****
*****     fseek            stdio.h          fwrite           stdio.h     *****
*****     gets             stdio.h          localtime        time.h      *****
*****     log              math.h           malloc           stdlib.h    *****
*****     pow              math.h           printf           stdio.h     *****
*****     qsort            stdlib.h         rand             stdlib.h    *****
*****     rewind           stdio.h          sqrt             math.h      *****
*****     srand            stdlib.h         sscanf           stdio.h     *****
*****     strcat           string.h         strcmp           string.h    *****
*****     strcpy           string.h         strlen           string.h    *****
*****     time             time.h           toupper          ctype.h     *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <rstypes.h>             /* file with most TYPDEFs for this program */
#include <rserrors.h>          /* file with error messages for this program */
#include <rsfuncs.h>         /* a file with some functions for this program */
/*****************************************************************************
**                            TYPE DEFINITIONS                              **
*****************************************************************************/
typedef struct {              /* structure to hold the user-entered options */
  char infl[20];                                              /* input file */
  char outfl[20];                                            /* output file */
  char fllist;                       /* is input file a LIST of data files? */
  char tmpfls;                 /* use existing temporary pooled data files? */
  char dattyp;                            /* type of data: Site or Fragment */
  char setyp;   /* type of standard error: Jackknife (default) or Bootstrap */
  char matfl;                          /* if matrix files should be printed */
  char pool;                      /* pool data only -- don't do comparisons */
  char key;                                    /* key on which to sort OTUs */
} p_arms;                                          /* TOTAL SIZE = 47 BYTES */
/*****************************************************************************
**                            SYMBOLIC CONSTANTS                            **
*****************************************************************************/
#define VNUM 1.1                          /* version number of this program */
#define YEAR 1990                                 /* current copyright year */
/*****************************************************************************
**                          FUNCTION PROTOTYPES                             **
*****************************************************************************/
/************ ERROR-CHECK AND REPAIR COMMAND LINE, IF NECESSARY *************/
p_arms *check(int argc, char *argv[]);                /* reads command line */
char  getkey();                                 /* gets key if none entered */
char  getdattyp(char infl[]);             /* gets data type if none entered */
/**************************** MAKE GLOBAL LISTS *****************************/
void  readlists(char infl[], char fllist, char key); /* read previous lists */
void  makelists(char infl[], char fllist, char key);  /* makes memory lists */
void  fill_lists(char rdfl[], char key);              /* fills memory lists */
int   otusort();                /* sort the otulist into alphabetical order */
int   pecsort();           /* sort the peclist by enzymes within each probe */
int   rvsort();                    /* sort the rvarray into numerical order */
void  printlists();            /* prints contents of memory lists to screen */
void  writelists();            /* write memory lists to file for future use */
/********************** POOL DATA INTO TEMPORARY FILES **********************/
void  pooldata(char infl[], char tmpfls, char fllist, char key);
void  createpoolfl();                /* creates empty files to receive data */
void  fill_poolfl(char rdfl[], char key);   /* fills files with pooled data */
void  sortfrags(char flnm[]);     /* sorts the fragments/sites in the files */
int   fs_sort();                /* sorts the frags/sites within each record */
/**********************  SET UP DISTANCE MATRIX FILES ***********************/
void  setupmatrix();                  /* create the distance matrix file(s) */
/********************** MAKE COMPARISONS BETWEEN OTUs ***********************/
void  compareotus(char dattyp,char setyp,char matfl,char infl[],char outfl[]);
void  printheader(char infl[],char outfl[],char dattyp,char setyp);
void  tgetdata(char flnmx[], char flnmy[]);  /* gets data for all p/e combs */
/************************* ANALYZE RESTRICTION DATA *************************/
void  computestats(char dattyp);   /* conducts analysis of restriction data */
void  computefrag(int q);  /* does fragment analysis within an enzyme class */
void  computesite(int q);      /* does site analysis within an enzyme class */
void  allenzclasses(char dattyp); /* computes stats over all enzyme classes */
float eqnNL26(float x, float y, float z);          /* gets corrected F or S */
float eqnNei10_21(float x, float y, float z);           /* gets corrected d */
/************* COMPUTE JACKKNIFE AND BOOTSTRAP STANDARD ERRORS **************/
void  jackse(char dattyp);                     /* jackknife standard errors */
void  bootse(char dattyp);                     /* bootstrap standard errors */
void  initseslots(int limit, char dattyp);  /* initialize s.e. slots in RES */
void  increasesumsq(int limit, char dattyp);    /* increase sums of squares */
/*********************** PRINT RESULTS TO OUTPUT FILE ***********************/
void  printres(char dattyp);                  /* prints results of analyses */
/********************** INITIALIZATION OF STRUCTURES ************************/
void  initrvarray();                                 /* initializes rvarray */
void  initbj();                       /* initializes working version of RES */
void  initnt(char dattyp); /* initializes array of # treatments for each se */
void  copybjtores();               /* copies working results array into RES */
void  copyrvtobjdat();                        /* makes working copy of data */
/*****************************************************************************
**                           GLOBAL VARIABLES                               **
*****************************************************************************/
treatment *peclist;        /* list of all of the treatments in the data set */
otuname   otulist[MAXOTU];               /* list of the OTUs to be compared */
rclass    rvarray[MAXRVS];  /* data from all enzyme classes in the data set */
rclass    bjdat[MAXRVS];        /* "working version" of rvarray; for s.e.'s */
bjtrt     *bjlist;             /* a list of M-hat and Mxy found in each PEC */
results   res[MAXRVS+1];           /* structure for the results of analysis */
results   bj[MAXRVS+1];      /* "working version" of RES -- used for s.e.'s */
bjnt      nt[MAXRVS+1];          /* array to hold # treatments in each s.e. */
pooldat pdx, pdy;   /* have to be global -- take up too much space in stack */
int ntx = 0;                         /* total number of OTUs to be compared */
int ntr = 0;                      /* total number of treatments in data set */
int nrv = 0;                  /* total number of enzyme classes in data set */
FILE *fpw;                         /* file pointer for printing out results */
/***************************** MEMORY REQUIRED: ******************************
** NAME:        SIZE x MAX#  TOTAL BYTES                                    **
** OTULIST     =  21 x 200 =  4,200                                         **
** PECLIST     =  22 x 500 = 11,000 (freed early in program)                **
** RVARRAY     =  42 x  10 =    420                                         **
** BJLIST      =  16 x 500 =  8,000                                         **
** BJDAT       =  42 x  10 =    420                                         **
** RES         = 144 x  11 =  1,584                                         **
** BJ          = 144 x  11 =  1,584                                         **
** NT          =  64 x  11 =    704                                         **
** PDX,PDY     = 350 x   2 =    700                                         **
** NTX,NTR,NRV =   2 x   3 =      6                                         **
** TOTAL                   = 28,618  BYTES (includes peclist)               **
** TOTAL                   = 17,618  BYTES (without peclist)                */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
**                                                                          **
** The  main part  of this  program acts as an organizational function.  It **
** prints out the title of the program, the version number, and a copyright **
** message.  It  then  passes  the command  line to  FUNCTION CHECK,  which **
** checks  it for  errors and  parses the  user-entered  options  into  the **
** structure PARMS.  It  then  calls  on functions  that construct lists of **
** OTUs, probe/enzyme combinations, and  enzyme classes  found in the data, **
** functions  that pool the data, and  that compare each OTU to every other **
** OTU.                                                                     **
**                                                                          **
** Functions called:                                                        **
** check          --  error-checks  the command line,  extracts the options **
**                    entered by the user, and places them in PARMS.        **
** readlists      --  reads file "00.$$$" for previous lists.               **
** makelists      --  reads  the  data  file(s)  and  makes  lists  of  the **
**                    different OTUs, probe/enzyme combinations, and enzyme **
**                    classes.                                              **
** pooldata       --  pools  data from  data  file(s) into temporary pooled **
**                    data files.                                           **
** setupmatrix    --  creates distance matrix  file(s) (one for each enzyme **
**                    class, and one for all classes combined).             **
** compareotus    --  compares OTUs and does appropriate analyses.          **
** rserror401     --  error message if not enough memory to hold peclist.   **
*****************************************************************************/
void main(int argc, char *argv[])
{
  p_arms parms, *ps;              /* structure to hold user-entered options */

  printf("\r\nProgram RESTSITE v%3.1f\r\n",VNUM);
  printf("A program for analyzing restriction site or fragment data.\r\n");
  printf("Copyright (c) %4.0d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  ps = check(argc,argv);     /* error-check command line & get user options */
  parms = *ps;                                  /* put into PARMS structure */
  printf("\r\n");

  /* allocate memory to hold list of different treatments */
  if ((peclist=(treatment *)malloc(MAXPEC*sizeof(treatment))) == NULL)
    rserror401();

  /* get lists of different OTUs, probe/enzyme combinations, enzyme classes */
  if (parms.tmpfls == 'Y') readlists(parms.infl, parms.fllist, parms.key);
  else {
    system("DEL *.$$$");  /* delete any previous lists or pooled data files */
    makelists(parms.infl, parms.fllist, parms.key);
  }

  /* create temporary files if needed, and pool data into them */
  pooldata(parms.infl, parms.tmpfls, parms.fllist, parms.key);
  free(peclist);              /* peclist no longer needed -- release memory */

  if (parms.pool != 'P') {
    /* create and set up distance matrix file(s) */
    if ((parms.matfl == 'D') || (parms.matfl == 'U')) setupmatrix();
    /* compare the different OTUs and write the results to output file */
    compareotus(parms.dattyp,parms.setyp,parms.matfl,parms.infl,parms.outfl);
  }
}                                                    /* END OF MAIN PROGRAM */
/*****************************************************************************
******************************************************************************
*****                                                                    *****
*****                             FUNCTIONS                              *****
*****                                                                    *****
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                                                                          **
**                             FUNCTION CHECK                               **
**                                                                          **
** This function is called from  FUNCTION MAIN, and checks the command line **
** for  errors,  and  parses  the  components  of the command line into the **
** structure  PARMS, which holds the various user options.  It then returns **
** this structure to FUNCTION MAIN.                                         **
**                                                                          **
** Functions called:                                                        **
** getkey         --  gets the user-entered sort key if none was entered.   **
** getdattyp      --  gets the data type (S or F) if non was entered.       **
** rserror101     --  error message if not enough items on command line.    **
*****************************************************************************/
p_arms *check(int argc, char *argv[])
{
  p_arms parms;           /* structure to hold list of user-entered options */
  int tlen = 0;      /* length of option string = number of options invoked */
  int i;                                           /* loop control variable */

  /* initialize parm structure */
  parms.infl[0]  = '\0';                                /* name of datafile */
  parms.outfl[0] = '\0';                             /* name of output file */
  parms.fllist   =  'N';          /* is INFL is really a list of datafiles? */
  parms.tmpfls   =  'N';                  /* use existing pooled datafiles? */
  parms.dattyp   =  'A';                   /* Data type -- Site or Fragment */
  parms.setyp    =  'J';   /* Standard error type -- Jackknife or Bootstrap */
  parms.matfl    =  'A';            /* are distance matrices to be printed? */
  parms.pool     =  'A';                 /* only pool data? no comparisons? */
  parms.key      =  '9';            /* Key to pool data on -- 1, 2, 3, or 4 */

  /* error-check the command line */
  if (argc < 3) rserror101();    /* if input or output file name is missing */

  /* convert input file name to uppercase */
  for (i=0; i<strlen(argv[1]); ++i) {
    if (i<FILELEN) parms.infl[i] = toupper(argv[1][i]);
    else break;
  }
  parms.infl[i] = '\0';

  /* convert output file name to uppercase */
  for (i=0; i<strlen(argv[2]); ++i) {
    if (i<FILELEN) parms.outfl[i] = toupper(argv[2][i]);
    else break;
  }
  parms.outfl[i] = '\0';

  if (argc == 3) {    /* if no options were entered, get the necessary ones */
    parms.key = getkey();                      /* get key (what to sort on) */
    parms.dattyp = getdattyp(argv[1]);         /* get site or fragment data */
  }
  if (argc == 4) tlen = strlen(argv[3]);  /* how many options were entered? */

  /* sort the options into their respective slots in PARMS structure */
  for (i=0; i<tlen; ++i) {
    if ((argv[3][i] == 'L') || (argv[3][i] == 'l')) parms.fllist = 'Y';
    if ((argv[3][i] == 'T') || (argv[3][i] == 't')) parms.tmpfls = 'Y';
    if ((argv[3][i] == 'S') || (argv[3][i] == 's')) parms.dattyp = 'S';
    if ((argv[3][i] == 'F') || (argv[3][i] == 'f')) parms.dattyp = 'F';
    if ((argv[3][i] == 'J') || (argv[3][i] == 'j')) parms.setyp  = 'J';
    if ((argv[3][i] == 'B') || (argv[3][i] == 'b')) parms.setyp  = 'B';
    if ((argv[3][i] == 'D') || (argv[3][i] == 'd')) parms.matfl  = 'D';
    if ((argv[3][i] == 'U') || (argv[3][i] == 'u')) parms.matfl  = 'U';
    if ((argv[3][i] == 'P') || (argv[3][i] == 'p')) parms.pool   = 'P';
    if (argv[3][i] == '0') parms.key = '0';
    if (argv[3][i] == '1') parms.key = '1';
    if (argv[3][i] == '2') parms.key = '2';
    if (argv[3][i] == '3') parms.key = '3';
    if (argv[3][i] == '4') parms.key = '4';
  }

  /* if still no dattyp or key, user gets one last chance to input them */
  if (parms.dattyp == 'A') parms.dattyp = getdattyp(parms.infl);
  if ((parms.key == '9') && (parms.tmpfls == 'N')) parms.key = getkey();

  return(&parms);               /* return address of PARMS to FUNCTION MAIN */
}                                                  /* END OF FUNCTION CHECK */
/*****************************************************************************
**                                                                          **
**                              FUNCTION GETKEY                             **
**                                                                          **
** This  function is  called from  FUNCTION CHECK,  and gets a key from the **
** user  if none  was entered  on the  command  line.   The key is used for **
** deciding what the OTUs are.                                              **
**                                                                          **
** Functions called:                                                        **
** rserror200     --  error message if character entered is out of range.   **
*****************************************************************************/
char getkey()
{
  char answer[10];                   /* temporary string to hold user input */
  char *a;                                     /* pointer for GETS function */
  char k;                                        /* key to be input by user */

  printf("\r\nWhat key number do you wish to base the OTU comparisons ");
  printf("on?\r\nChoices are 1, 2, 3, or 4:   ");
  a = gets(answer);
  k = toupper(answer[0]);
  if ((k!='0') && (k!='1') && (k!='2') && (k!='3') && (k!='4')) rserror200(k);
  else return(k);                           /* return KEY to FUNCTION CHECK */
}                                                 /* END OF FUNCTION GETKEY */
/*****************************************************************************
**                                                                          **
**                            FUNCTION GETDATTYP                            **
**                                                                          **
** This function is called from FUNCTION CHECK, and gets the data type from **
** the user if it was not entered on the command line.  The choices are "S" **
** for site (mapped) data, and "F" for fragment (unmapped) data.            **
**                                                                          **
** Functions called:                                                        **
** rserror200     --  error message if character entered is out of range.   **
*****************************************************************************/
char getdattyp(char infl[])
{
  char answer[10];                   /* temporary string to hold user input */
  char *a;                                     /* pointer for GETS function */
  char d;                                  /* data type to be input by user */

  printf("\r\n\nThe method of analysis to be used depends on the data to ");
  printf("be analyzed.  When\r\nthe locations of the restriction sites ");
  printf("are known (i.e. a restriction map has\r\nbeen constructed), the ");
  printf("data is best analyzed via the Jukes-Cantor (1969)\r\nmethod or ");
  printf("the Nei and Li (1979) method.  If the comparisons between \r\n");
  printf("OTUs are to be based on the number of fragments in common (i.e.,");
  printf(" restriction sites\r\nhave not been mapped), then the method of ");
  printf("Nei (1987) should be used.\r\n\n");
  printf("What kind of data is in %s?\r\n",infl);
  printf("     S : Site data (mapped restriction sites).\r\n");
  printf("     F : Fragment data (location of restriction sites ");
  printf("not known).\r\n");
  printf("Choices are S or F:   ");
  a = gets(answer);
  d = toupper(answer[0]);
  if (d != 'S' && d != 's' && d != 'F' && d != 'f') rserror200(d);
  else return(d);                        /* return DATTYP to FUNCTION CHECK */
}                                              /* END OF FUNCTION GETDATTYP */
/*****************************************************************************
**                                                                          **
**                            FUNCTION READLISTS                            **
**                                                                          **
** This function is called from FUNCTION MAIN.  It  is used if the  "T"emp- **
** files option is on.  It  opens the file  "00.$$$"  (if it  exists),  and **
** retrieves  the  old  ntx,  ntr, nrv,  otulist, peclist, and rvarray.  If **
** "00.$$$"  does  not exist  (as  in the case of pooled data files made by **
** version 1.0 of this program), then FUNCTION MAKELISTS is called, and new **
** lists are made, and written to "00.$$$".                                 **
**                                                                          **
** Functions called:                                                        **
** makelists      --  reads  the  data  file(s)  and  makes  lists  of  the **
**                    different OTUs, probe/enzyme combinations, and enzyme **
**                    classes.                                              **
** printlists     --  prints the contents of the three lists to the screen. **
** rserror311     --  error message if error reading datafile.              **
*****************************************************************************/
void readlists(char infl[], char fllist, char key)
{
  FILE *fp;
  int i;

  if ((fp = fopen("00.$$$","rb")) == NULL) {        /* if no "00.$$$" file, */
    makelists(infl,fllist,key);                               /* make lists */
    return;
  }
  if (fread(&ntx,sizeof(int),1,fp) == NULL) rserror311("00.$$$");
  if (fread(&ntr,sizeof(int),1,fp) == NULL) rserror311("00.$$$");
  if (fread(&nrv,sizeof(int),1,fp) == NULL) rserror311("00.$$$");
  for (i=0; i<ntx; ++i)
    if (fread(otulist[i],sizeof(otuname),1,fp) == NULL) rserror311("00.$$$");
  for (i=0; i<ntr; ++i)
    if (fread(&peclist[i],sizeof(treatment),1,fp) == NULL)
      rserror311("00.$$$");
  for (i=0; i<nrv; ++i)
    if (fread(&rvarray[i],sizeof(float),1,fp) == NULL)
      rserror311("00.$$$");
  rewind(fp);
  fclose(fp);
  printlists();
}                                              /* END OF FUNCTION READLISTS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION MAKELISTS                            **
**                                                                          **
** This  function  is called  from  FUNCTION MAIN.  It initializes OTULIST, **
** PECLIST, and  RVARRAY.  It  then reads  the INFL, and determines whether **
** or not it is an actual data file, or a list of such files.  It then goes **
** through the file(s) and determines how many different OTUs, probe/enzyme **
** combinations, and  enzyme classes  there are, and makes lists of each of **
** them  (the  globals OTULIST, PECLIST, and  RVARRAY).  If  the  INFL is a **
** datafile, then it is passed to FUNCTION FILL_LISTS, which reads the data **
** and fills the  three global lists.  If INFL is a list of datafiles, then **
** each datafile name in turn is passed to  FUNCTION  FILL_LISTS.  When the **
** last  datafile name  (or the  name of INFL itself) is passed to FUNCTION **
** FILL_LISTS,  the  boolean  DONE is  changed  to 'Y' to signal the end of **
** operations.                                                              **
**                                                                          **
** Functions called:                                                        **
** opntrdfl       --  opens  text  file  (file  of list of data files)  for **
**                    reading.                                              **
** fill_lists     --  goes  through  the  data  file,  filling in the three **
**                    global lists.                                         **
** pecsort        --  used with QSORT to sort the treatments in PECLIST.    **
** otusort        --  used with QSORT to sort the otunames in the OTULIST.  **
** printlists     --  prints the contents of the three lists to the screen. **
** writelists     --  writes the lists to file "00.$$$".                    **
*****************************************************************************/
void makelists(char infl[], char fllist, char key)
{
  char tempstr[MAXSTRLEN];                              /* temporary string */
  char done = 'N';                                     /* are we there yet? */
  char flnm[20];                               /* string for temp file name */
  FILE *fpl;             /* file pointer for reading file list & temp files */
  int i;                                           /* loop control variable */

  printf("Making lists of the different OTUs and probe/enzyme ");
  printf("combinations:\r\n\nReading file...\r\n");

  for (i=0; i<MAXOTU; ++i) otulist[i][0] = '\0';      /* initialize OTULIST */
  for (i=0; i<MAXPEC; ++i) {                          /* initialize PECLIST */
    peclist[i].prb[0] = '\0';
    peclist[i].enz[0] = '\0';
    peclist[i].rv     =   0;
  }
  for (i=0; i<MAXRVS; ++i) {              /* initialize elements in RVARRAY */
    rvarray[i].rv  = 0;
    rvarray[i].nt  = 0;
    rvarray[i].xmx = 0;
    rvarray[i].xmy = 0;
    rvarray[i].xmz = 0;
    rvarray[i].ymx = 0;
    rvarray[i].ymy = 0;
    rvarray[i].ymz = 0;
    rvarray[i].mx  = 0;
    rvarray[i].my  = 0;
    rvarray[i].mz  = 0;
  }

  strcpy(flnm,infl);
  if (fllist == 'Y') fpl = opntrdfl(fpl,flnm);       /* open if a file list */
  while (done == 'N') {                           /* go through datafile(s) */
    if (fllist == 'Y') {                              /* get next file name */
      if ((fgets(tempstr,MAXSTRLEN,fpl)) != NULL) sscanf(tempstr,"%s",flnm);
      else done = 'Y';                       /* if a blank line, we're done */
    }
    if (done == 'N') fill_lists(flnm,key);             /* fill memory lists */
    if (fllist == 'N') done = 'Y';  /* if infl was the datafile, we're done */
  }                                            /* done with all datafile(s) */
  qsort(otulist,ntx,sizeof(otuname),otusort);               /* sort OTULIST */
  qsort(peclist,ntr,sizeof(treatment),pecsort);             /* sort PECLIST */
  qsort(rvarray,nrv,sizeof(rclass),rvsort);                 /* sort RVARRAY */
  printlists();                  /* print contents of three lists to screen */
  writelists();                 /* write contents of lists to file "00.$$$" */
}                                              /* END OF FUNCTION MAKELISTS */
/*****************************************************************************
**                                                                          **
**                          FUNCTION FILL_LISTS                             **
**                                                                          **
** This function is called from  FUNCTION MAKELISTS.  It  goes  through the **
** data files, and determines how many different  OTUs, how many treatments **
** (probe/enzyme combinations), and how many different enzyme classes there **
** are.  It does this by taking a record from the data file, and reading it **
** into temporary record  d.   The appropriate key (choices  1, 2, 3, or 4, **
** chosen by user) in d is then compared to every OTU name in  OTULIST.  If **
** it  isn't  on the list,  it is added.  The treatment  (d.prb, d.enz, and **
** d.rv)  is  then  compared  to every treatment element in PECLIST.  If it **
** is not in  this list, it is added.  The same is then done for the enzyme **
** class  (d.rv) list  RVARRAY.   Before  comparing and adding an item to a **
** list, the program checks to see if the last slot on the list had already **
** been  filled.  If it  has,  then an error message is called, and it asks **
** the user if he/she  wants to continue,  since  any  OTUs, treatments, or **
** enzyme classes beyond the limits of  MAXOTU,  MAXPEC, or  MAXRVS will be **
** ignored.                                                                 **
**                                                                          **
** Functions called:                                                        **
** opnbrdfl       --  opens the binary data file for reading.               **
** rserror211     --  error if too many OTUs are found.                     **
** rserror212     --  error if too many probe/enzyme combinations found.    **
** rserror213     --  error if too many enzyme classes are found.           **
** rserror311     --  error message if error reading datafile.              **
*****************************************************************************/
void fill_lists(char rdfl[], char key)
{
  FILE *fpd;                                  /* file pointer for data file */
  fragdat d;                               /* current record being examined */
  treatment t;                                         /* current treatment */
  int i;                                           /* loop control variable */

  printf(":%s:\r\n",rdfl);                  /* print name of file to screen */
  fpd = opnbrdfl(fpd,rdfl);                                /* open datafile */

  while (!feof(fpd)) {                               /* go through datafile */
    if (fread(&d,sizeof(fragdat),1,fpd) != NULL) {          /* get a record */

      /* see if the otuname for this record is already in the otulist */
      if (ntx < MAXOTU) {              /* don't go outside memory allocated */
        for (i=0; i<ntx; ++i) {          /* locate this OTU name in OTULIST */
          if (key == '0') if ((strcmp(otulist[i],d.id))   == NULL) break;
          if (key == '1') if ((strcmp(otulist[i],d.key1)) == NULL) break;
          if (key == '2') if ((strcmp(otulist[i],d.key2)) == NULL) break;
          if (key == '3') if ((strcmp(otulist[i],d.key3)) == NULL) break;
          if (key == '4') if ((strcmp(otulist[i],d.key4)) == NULL) break;
        }
        if (i >= ntx) {                    /* if not found on list, add it */
          if (key == '0') strcpy(otulist[ntx++],d.id);
          if (key == '1') strcpy(otulist[ntx++],d.key1);
          if (key == '2') strcpy(otulist[ntx++],d.key2);
          if (key == '3') strcpy(otulist[ntx++],d.key3);
          if (key == '4') strcpy(otulist[ntx++],d.key4);
        }
      }
      else rserror211(key);                       /* if too many OTUs found */

      /* see if the treatment from this record is already in peclist */
      if (ntr < MAXPEC) {              /* don't go outside memory allocated */
        for (i=0; i<ntr; ++i) {         /* locate this treatment in PECLIST */
          t = peclist[i];
          if treatmatch break;
        }
        if (i >= ntr) {                             /* if not found, add it */
          strcpy(peclist[ntr].prb,d.prb);
          strcpy(peclist[ntr].enz,d.enz);
          peclist[ntr++].rv = d.rv;
        }
      }
      else rserror212();           /* if too many probe/enzyme combinations */

      /* see if the enzyme class for this record is already in rvarray */
      if (nrv < MAXRVS) {              /* don't go outside memory allocated */
        for (i=0; i<nrv; ++i)        /* locate this enzyme class in RVARRAY */
        if (d.rv == rvarray[i].rv) break;
        if (i >= nrv) rvarray[nrv++].rv = d.rv;     /* if not found, add it */
      }
      else rserror213();                /* if too many enzyme classes found */
    }
    else if (!feof(fpd)) rserror311(rdfl);     /* error in reading datafile */
  }
  rewind(fpd);
  fclose(fpd);                                            /* close datafile */
}                                             /* END OF FUNCTION FILL_LISTS */
/*****************************************************************************
**                                                                          **
**                             FUNCTION OTUSORT                             **
**                                                                          **
** This  function  is  called  from  FUNCTION  MAKELISTS.  It  is  used  in **
** conjunction with the QSORT routine to sort the OTULIST into alphabetical **
** order.                                                                   **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int otusort(x, y)
otuname *x, *y;
{
  int i;
  if ((i = strcmp(*x,*y)) != NULL) return(i);           /* sort on otu name */
}                                                /* END OF FUNCTION PECSORT */
/*****************************************************************************
**                                                                          **
**                             FUNCTION PECSORT                             **
**                                                                          **
** This  function  is  called  from  FUNCTION  MAKELISTS.  It  is  used  in **
** conjunction with the QSORT routine to sort the PECLIST into order, first **
** by probe, then enzyme, then by rv.  The final order of the list would be **
** something like:  PRB1/ENZ1/RV, PRB1/ENZ2/RV, PRB1/ENZ3/RV, PRB2/ENZ1/RV, **
** PRB2/ENZ2/RV, PRB2/ENZ3/RV, PRB3/ENZ1/RV, etc.                           **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int pecsort(x, y)
treatment *x, *y;
{
  int i;

  if ((i = strcmp(x->prb,y->prb)) != NULL) return(i); /* sort on probe name */
  if ((i = strcmp(x->enz,y->enz)) != NULL) return(i);     /* sort on enzyme */
  if (x->rv > y->rv) return(1);                               /* sort on rv */
  if (x->rv == y->rv) return(0);
  return(-1);
}                                                /* END OF FUNCTION PECSORT */
/*****************************************************************************
**                                                                          **
**                             FUNCTION RVSORT                              **
**                                                                          **
** This  function  is  called  from  FUNCTION  MAKELISTS.  It  is  used  in **
** conjunction  with the QSORT routine to  sort the  RVARRAY into numerical **
** order.                                                                   **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int rvsort(x, y)
rclass *x, *y;
{
  if (x->rv > y->rv) return(1);                               /* sort on rv */
  return(-1);
}                                                 /* END OF FUNCTION RVSORT */
/*****************************************************************************
**                                                                          **
**                           FUNCTION PRINTLISTS                            **
**                                                                          **
** This  function  is  called from  FUNCTION  MAKELISTS.  It prints out the **
** contents of  OTULIST,  PECLIST, and  RVARRAY to the  screen, so that the **
** user   can  be  sure  that  the  OTUs,  probe/enzyme  combinations,  and **
** restriction-site sizes are correct.                                      **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printlists()
{
  int i,j,k,l;                                    /* loop control variables */

  /* print out the different OTUs found in the data file */
  printf("\r\n%d different OTUs were found.  They are:\r\n",ntx);
  for (i=0; i<ntx; ++i)                                   /* print all OTUs */
    for (j=0; j<3; ++j)                            /* print 3 OTUs per line */
      if (i < ntx) {
         printf("     %s",otulist[i]);                    /* print OTU name */
        if (strlen(otulist[i]) < KEYLEN)             /* pad out with blanks */
          for (k=strlen(otulist[i]); k<KEYLEN; ++k) printf(" ");
        if (j == 2) printf("\r\n");                            /* line feed */
        else i++;
      }

  /* print out the different probe/enzyme combinations found */
  printf("\r\n%d different probe/enzyme combinations were found.  ",ntr);
  printf("They are:\r\n");
  for (i=0; i<ntr; ++i)                                   /* print all OTUs */
    for (j=0; j<3; ++j)                            /* print 3 OTUs per line */
      if (i < ntr) {
        printf("     %s / %s",peclist[i].prb,peclist[i].enz);
        k = strlen(peclist[i].prb)+strlen(peclist[i].enz);
        if (k < PRBLEN+ENZLEN)
          for (l=k; l<PRBLEN+ENZLEN; ++l) printf(" ");   /* pad with blanks */
        if (j == 2) printf("\r\n");                            /* line feed */
        else i++;
      }

  /* print out the different enzyme classes found in the data file */
  if (nrv == 1)
    printf("\r\n1 restriction enzyme class was found.  It is:\r\n");
  else {
    printf("\r\n%d different restriction enzyme classes were found.  ",nrv);
    printf("They are:\r\n");
  }
  for (i=0; i<nrv; ++i) printf("    %3.1f",rvarray[i].rv);
  printf("\r\n\n");
}                                             /* END OF FUNCTION PRINTLISTS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION WRITELISTS                           **
**                                                                          **
** This function is called from  FUNCTION MAKELISTS.  Once that function is **
** done getting  otulist, peclist, rvarray,  ntx,  ntr, and  nrv, it writes **
** them all to  binary  file "00.$$$".  First, it writes the three integers **
** ntx, ntr, and nrv.  Then it writes ntx otunames, ntr treatments, and nrv **
** floats.                                                                  **
**                                                                          **
** Functions called:                                                        **
** opnbwtfl       --  opens a binary file for writing.                      **
** rserror321     --  error message if error writing to datafile.           **
*****************************************************************************/
void writelists()
{
  FILE *fp;
  int i;

  fp = opnbwtfl(fp,"00.$$$");
  if (fwrite(&ntx,sizeof(int),1,fp) == NULL) rserror321("00.$$$");
  if (fwrite(&ntr,sizeof(int),1,fp) == NULL) rserror321("00.$$$");
  if (fwrite(&nrv,sizeof(int),1,fp) == NULL) rserror321("00.$$$");
  for (i=0; i<ntx; ++i)
    if (fwrite(otulist[i],sizeof(otuname),1,fp) == NULL)
      rserror321("00.$$$");
  for (i=0; i<ntr; ++i)
    if (fwrite(&peclist[i],sizeof(treatment),1,fp) == NULL)
      rserror321("00.$$$");
  for (i=0; i<nrv; ++i)
    if (fwrite(&rvarray[i],sizeof(float),1,fp) == NULL)
      rserror321("00.$$$");
  rewind(fp);
  fclose(fp);
}                                             /* END OF FUNCTION WRITELISTS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION POOLDATA                             **
**                                                                          **
** This  function is  called from FUNCTION MAIN.  It organizes and controls **
** the pooling  of the data  into temporary files.  If  pooled  data  files **
** already  exist,  it makes  sure that  there are enough of them, and that **
** the  OTUs in  them correspond  to the OTUs  in OTULIST.  If there are no **
** pooled data files, it calls on FUNCTION CREATEPOOLFL to create them, and **
** FUNCTION FILL_POOLFL to fill them.  It then returns to FUNCTION MAIN.    **
**                                                                          **
** Functions called:                                                        **
** inttoalph      --  turns an integer into a character string.             **
** opnbrdfl       --  open binary file for reading.                         **
** createpoolfile --  creates empty files to receive pooled data.           **
** opntrdfl       --  opens text file for reading.                          **
** fill_poolfl    --  reads contents of data file, and pools raw data into  **
**                    the pooled data files.                                **
** sortfrags      --  sorts the sites/fragments in the pooled data files.   **
** rserror214     --  error  message if  OTUs  in  pooled  data files don't **
**                    match OTUs in OTULIST.                                **
** rserror311     --  error message if error reading file.                  **
*****************************************************************************/
void pooldata(char infl[], char tmpfls, char fllist, char key)
{
  char tempstr[MAXSTRLEN];                              /* temporary string */
  char done = 'N';                                     /* are we there yet? */
  char flnm[20];                               /* string for temp file name */
  FILE *fpl;             /* file pointer for reading file list & temp files */
  char *f;                                    /* pointer for temp file name */
  int i;                                           /* loop control variable */

  strcpy(flnm,infl);
  if (tmpfls == 'Y') {             /* if pooled data files already exist... */
    printf("Checking temporary files...     ");
    for (i=0; i<ntx; ++i) {       /* check to see if the files really exist */
      flnm[0] = '\0';                       /* create temp file name from i */
      f = inttoalph(i,flnm);
      strcpy(flnm,f);
      f = strcat(flnm,".$$$");
      fpl = opnbrdfl(fpl,flnm);                      /* open that temp file */
      /* read one data record, if OTUs match = ok, if not = error */
      if (fread(&pdx,sizeof(pooldat),1,fpl) == NULL) rserror311(flnm);
      else if ((strcmp(pdx.otu,otulist[i])) != NULL) rserror214(key);
      rewind(fpl);
      fclose(fpl);                                        /* close the file */
    }
    printf("Temporary files okay.\r\n\n");
  }
  else {                               /* create and fill pooled data files */
    createpoolfl(key);                   /* create a temp file for each OTU */
    printf("Now pooling data from data records...\r\n");
    if (fllist == 'Y') fpl = opntrdfl(fpl,flnm);   /* open if list of files */
    while (done == 'N') {                         /* go through datafile(s) */
      if (fllist == 'Y') {           /* if it's a list, get a datafile name */
        if ((fgets(tempstr,MAXSTRLEN,fpl)) != NULL) sscanf(tempstr,"%s",flnm);
        else done = 'Y';                 /* if end of file list, we're done */
      }
      if (done == 'N') fill_poolfl(flnm,key);   /* pool data from this file */
      if (fllist == 'N') done = 'Y';          /* if only 1 file, we're done */
    }
    printf("Sorting sites/fragments in pooled data files...\r\n");
    for (i=0; i<ntx; ++i) {   /* sort the fragments/sites in the temp files */
      flnm[0] = '\0';                       /* create temp file name from i */
      f = inttoalph(i,flnm);
      strcpy(flnm,f);
      f = strcat(flnm,".$$$");
      sortfrags(flnm);
    }
  }
}                                               /* END OF FUNCTION POOLDATA */
/*****************************************************************************
**                                                                          **
**                         FUNCTION CREATEPOOLFL                            **
**                                                                          **
** This function is called from  FUNCTION POOLDATA.  It creates a temporary **
** file  for each  OTU in the OTULIST.  First,  it creates a  file with the **
** name "#.$$$",  where "#" is the OTU's number in OTULIST  (e.g. the first **
** OTU's  file is  "0.$$$", the second's  is  "1.$$$", etc.).  It then runs **
** through  PECLIST, and  fills the  temporary  OTU file with as many blank **
** POOLDAT-type records as there are treatments in  PECLIST.  As each blank **
** record is added, the  treatment  (from  PECLIST) is copied into it.  The **
** OTU files are then ready to receive the pooled data.                     **
**                                                                          **
** Functions called:                                                        **
** inttoalph      --  turns an integer into a character string.             **
** opnbwtfl       --  opens a binary file for writing.                      **
** rserror321     --  error message if error writing to the temp file.      **
*****************************************************************************/
void createpoolfl()
{
  char flnm[20];                               /* string for temp file name */
  FILE *fp;                             /* file pointer for temporary files */
  int i,j;                                        /* loop control variables */
  char *f;                                    /* pointer for temp file name */

  printf("Creating temporary OTU files...\r\n");
  for (i=0; i<MAXFS; ++i) {                        /* initialize data slots */
    pdx.fs[i].f = -5;
    pdx.fs[i].no = 0;
  }
  pdx.tn = 0;                            /* initialize total number of data */
  pdx.ni = 0;                           /* initialize number of individuals */

  for (i=0; i<ntx; ++i) {                                /* for each OTU... */
    flnm[0] = '\0';                                /* create temp file name */
    f = inttoalph(i,flnm);
    strcpy(flnm,f);
    f = strcat(flnm,".$$$");
    fp = opnbwtfl(fp,flnm);                                 /* create tmpfl */
    strcpy(pdx.otu,otulist[i]);                            /* copy OTU name */
    for (j=0; j<ntr; ++j) {                        /* for each treatment... */
      strcpy(pdx.prb,peclist[j].prb);                         /* copy probe */
      strcpy(pdx.enz,peclist[j].enz);                        /* copy enzyme */
      pdx.rv = peclist[j].rv;                          /* copy enzyme class */
      if (fwrite(&pdx,sizeof(pooldat),1,fp) == NULL)   /* write pdx to file */
        rserror321(flnm);
    }
    rewind(fp);
    fclose(fp);                                           /* close OTU file */
  }
}                                           /* END OF FUNCTION CREATEPOOLFL */
/*****************************************************************************
**                                                                          **
**                         FUNCTION FILL_POOLFL                             **
**                                                                          **
** This function is called from  FUNCTION POOLDATA.  It pools the data from **
** the  data  file into the various  OTU  temporary files.  It goes through **
** each record in the datafile, gets the OTU name, and locates that name in **
** the  OTULIST.  It  then  opens that  OTU's temporary file.  Then it goes **
** through  PECLIST, locates the  appropriate  treatment, and  then goes to **
** that  same treatment on the  OTU temporary file.  The data is then added **
** to  that  temporary  file record.  The  net result is a list of lists of **
** lists: there is a  file for each  OTU,  and within  each of these files, **
** there  is a  record  for each  probe/enzyme combination.  Within each of **
** these records is a list of all of the fragments/sites found in that  OTU **
** for that  probe/enzyme combination, and how many times they  were found. **
** This  information on the  "frequency"  of  occurrence  of the bands will **
** enable rapid calculation of the within-OTU diversity.                    **
**                                                                          **
** Functions called:                                                        **
** inttoalph      --  turns an integer into a character string.             **
** opnbrdfl       --  opens the data file for reading.                      **
** rserror215     --  error if MAXFS is reached.                            **
** rserror303     --  error message if fseek error.                         **
** rserror310     --  error message if error opening for reading.           **
** rserror311     --  error message if read error in temporary file.        **
** rserror321     --  error message if error writing to file.               **
*****************************************************************************/
void fill_poolfl(char rdfl[], char key)
{
  FILE *fpd, *fp;              /* file pointer for datafile and for otufile */
  char flnm[20], *f;           /* string & pointer for making otufile names */
  treatment t;                               /* temporary treatment records */
  fragdat d;                                        /* temporary datarecord */
  int i;                                                /* looping variable */
  register int j,k;                          /* most-used looping variables */
  long recno = 1;                       /* current record number (for user) */
  long a = 0;                             /* negative record size for FSEEK */
  long b = 0;                             /* positive record size for FSEEK */
  long c = 0;                                    /* record number for FSEEK */
  long e = 0;                           /* (record number x size) for FSEEK */
  a   = -sizeof(pooldat);                               /* for use in FSEEK */
  b   =  sizeof(pooldat);                               /* for use in FSEEK */
  printf(":%s:\r\n",rdfl);                 /* print datafile name to screen */
  fpd =  opnbrdfl(fpd,rdfl);                               /* open datafile */

  while (fread(&d,sizeof(fragdat),1,fpd) != NULL) {    /* read a datarecord */
    printf("\r%d",recno++);                     /* print data record number */

    for (i=0; i<ntx; ++i) {      /* use KEY to find matching OTU in otulist */
      if (key == '0') if ((strcmp(otulist[i],d.id))   == NULL) break;
      if (key == '1') if ((strcmp(otulist[i],d.key1)) == NULL) break;
      if (key == '2') if ((strcmp(otulist[i],d.key2)) == NULL) break;
      if (key == '3') if ((strcmp(otulist[i],d.key3)) == NULL) break;
      if (key == '4') if ((strcmp(otulist[i],d.key4)) == NULL) break;
    }

    if (i < ntx) {              /* pool data only if otu name is in OTULIST */
      flnm[0] = '\0';
      f = inttoalph(i,flnm);                       /* create temp file name */
      strcpy(flnm,f);
      f = strcat(flnm,".$$$");
      if ((fp = fopen(flnm,"rb+")) == NULL) rserror310(flnm);  /* open file */
      rewind(fp);

      for (i=0; i<ntr; ++i) {         /* find matching treatment in peclist */
        t = peclist[i];
        if treatmatch break;
      }

      if (i < ntr) {           /* pool data only if treatment is in PECLIST */
        c = i;                     /* convert treatment number to type LONG */
        e = b * c;                                     /* go to that record */
        if ((fseek(fp,e,SEEK_SET)) != NULL) rserror303(flnm);

        /* begin pooling data */
        if (fread(&pdx,sizeof(pooldat),1,fp) != NULL) { /* get that record # */
          pdx.ni += 1;                   /* increment number of individuals */
          for (j=0; (j<NUMFS) && (d.f[j] != -5); ++j)  /* go thru data in d */
          for (k=0; k<MAXFS; ++k) {                   /* go thru data in pd */
            if (d.f[j] == pdx.fs[k].f) {       /* if site already on list.. */
              if (d.f[j] >= 0) {                   /* if site is not a null */
                pdx.tn += 1;
                pdx.fs[k].no += 1;
              }
              break;
            }
            else if (pdx.fs[k].f == -5) {                 /* if not, add it */
              pdx.fs[k].f  = d.f[j];                           /* copy site */
              if (d.f[j] >= 0) {                           /* if not a null */
                pdx.fs[k].no = 1;
                pdx.tn += 1;
              }
              break;
            }
          }                                              /* end of "k" loop */
          /* error if MAXFS is reached */
          if (k == MAXFS) rserror215(pdx.otu,pdx.prb,pdx.enz);
        }                                              /* end of "if fread" */
        else rserror311(flnm);            /* if error reading temp OTU file */

        if ((fseek(fp,a,SEEK_CUR)) != NULL)   /* back up to start of record */
          rserror303(flnm);
        if (fwrite(&pdx,sizeof(pooldat),1,fp) == NULL)      /* write record */
          rserror321(flnm);
        rewind(fp);
        fclose(fp);                             /* close temporary OTU file */
      }                                            /* end of "if (i < ntr)" */
    }                                              /* end of "if (i < ntx)" */
  }                                                    /* end of while loop */
  if (!feof(fpd)) rserror311(rdfl);            /* if error reading datafile */

  printf("\r\n");
  rewind(fpd);
  fclose(fpd);                                        /* close up data file */
}                                            /* END OF FUNCTION FILL_POOLFL */
/*****************************************************************************
**                                                                          **
**                            FUNCTION SORTFRAGS                            **
**                                                                          **
** This  function is  called from FUNCTION POOLDATA.  Once  all of the data **
** files have been read, and the data  has been placed into the pooled data **
** files, the  sites/fragments  within each record of each file are sorted. **
**                                                                          **
** Functions called:                                                        **
** fs_sort        --  used with QSORT to sort the sites/fragments.          **
** rserror303     --  error message if fseek error.                         **
** rserror310     --  error message if error opening for reading.           **
** rserror321     --  error message if error writing to file.               **
*****************************************************************************/
void sortfrags(char flnm[])
{
  FILE *fp;
  long ps = -sizeof(pooldat);

  if ((fp = fopen(flnm,"rb+")) == NULL) rserror310(flnm);
  rewind(fp);
  while (!feof(fp)) {
    if ((fseek(fp,(long)0,SEEK_CUR)) != NULL) rserror303(flnm);
    if (fread(&pdx,sizeof(pooldat),1,fp) != NULL) {
      qsort(&pdx.fs[0],MAXFS,sizeof(fragsite),fs_sort);
      if ((fseek(fp,ps,SEEK_CUR)) != NULL) rserror303(flnm);
      if (fwrite(&pdx,sizeof(pooldat),1,fp) == NULL) rserror321(flnm);
    }
  }
  rewind(fp);
  fclose(fp);
}                                              /* END OF FUNCTION SORTFRAGS */
/*****************************************************************************
**                                                                          **
**                             FUNCTION FS_SORT                             **
**                                                                          **
** This  function  is  called  from  FUNCTION  SORTFRAGS.  It  is  used  in **
** conjunction with the QSORT routine to sort the  sites/fragments within a **
** pooldat data record.  All of initialization  numbers, -5, end  up at the **
** end.  Nulls  (-1),  zeros, and positive numbers are sorted to the front, **
** smallest to largest.                                                     **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int fs_sort(x,y)
fragsite *x, *y;
{
  if ((x->f) == (y->f)) return(0);
  if (y->f == -5) return(-1);
  if (x->f == -5) return(1);
  return ((x->f) - (y->f));
}                                                /* END OF FUNCTION FS_SORT */
/*****************************************************************************
**                                                                          **
**                           FUNCTION SETUPMATRIX                           **
**                                                                          **
** This  function is  called from  FUNCTION  MAIN if the user has requested **
** that the Jukes-Cantor  distances or d-hats be printed to distance matrix **
** files.  It creates a file for each enzyme class, and one for  all enzyme **
** classes combined (if only one class, it  creates only one file).  All of **
** these files have names of type  "#.MAT", where  "#" is the number of the **
** enzyme class.  The OTUs are listed, one per line. A blank line  follows. **
** The file(s) are then closed, and control passed back to FUNCTION MAIN.   **
** NOTE: This  function only creates the files, and prints the OTU list and **
**       blank line.  The  distances  and the restriction class is added to **
**       each file by FUNCTION COMPAREOTUS.  Two  kinds of  matrix  formats **
**       are allowed, "D", lower left, and "U"pper right:                   **
**                                                                          **
**           OTU1NAME                               OTU1NAME                **
**           OTU2NAME                               OTU2NAME                **
**           OTU3NAME                               OTU3NAME                **
**           OTU4NAME                               OTU4NAME                **
**           etc.                                   etc.                    **
**                              <--  OR  -->                                **
**           D1x2                                   D1x2 D1x3 D1x4          **
**           D1x3 D2x3                                   D2x3 D2x4          **
**           D1x4 D2x4 D3x4                                   D3x4          **
**           etc.                                             etc.          **
**                                                                          **
**           Number of nucleotides in restriction site = 6.0                **
**                                                                          **
** Functions called:                                                        **
** inttoalph      --  turns an integer into a character string.             **
** opntwtfl       --  opens/creates a text file for writing.                **
*****************************************************************************/
void setupmatrix()
{
  char flnmd[20];             /* string for name of distance matrix file(s) */
  char *fd;                                  /* pointer to matrix file name */
  FILE *fpd;                                  /* pointer for matrix file(s) */
  int i,j,k;                                      /* loop control variables */

  if (nrv == 1) k = 1;                  /* if 1 r-class, make 1 matrix file */
  else k = nrv + 1;                  /* 1 file per r-class, & 1 for overall */
  for (i=0; i<k; ++i) {         /* make 1 file per r-class, & 1 for overall */
    flnmd[0] = '\0';                  /* initialize name of distance matrix */
    fd = inttoalph(i,flnmd);                     /* convert "i" into string */
    strcpy(flnmd,fd);            /* copy string into name for distance file */
    fd = strcat(flnmd,".MAT");              /* add ending for distance file */
    fpd = opntwtfl(fpd,flnmd);                   /* open that distance file */
    for (j=0; j<ntx; ++j) fprintf(fpd,"%s\n",otulist[j]);     /* print OTUs */
    fprintf(fpd,"\n");                              /* print one blank line */
    rewind(fpd);                                       /* close matrix file */
    fclose(fpd);
  }
}                                            /* END OF FUNCTION SETUPMATRIX */
/*****************************************************************************
**                                                                          **
**                          FUNCTION COMPAREOTUS                            **
**                                                                          **
** This function  is called from  FUNCTION MAIN.  It compares the  data for **
** every OTU to the data for  every other  OTU,  and calls the  appropriate **
** mathematics  function which actually computes the distances and standard **
** errors.  Each  pairwise  comparison is  executed once  (e.g.  OTU  A  is **
** compared to B, but not B to A).  The temporary OTU files are opened, and **
** the  first  pooled data  record is  retrieved  from each.  The number of **
** sites/fragments in each is counted (mx and my), and the number in common **
** (mxy).  This is added to a running total of Mx, My, and Mxy.  There is a **
** running total  for  each enzyme  class.  When all of the treatments have **
** been  examined, the running  totals for each  r-class  for that pairwise **
** comparison are  passed to  functions that compute  relatedness, distance **
** measures, and standard error.  OTU A is then compared to C, etc.  If the **
** user has specified that distance matrix files are to be printed, then it **
** is FUNCTION COMPAREOTUS that actually prints the distances.              **
**                                                                          **
** Functions called:                                                        **
** inttoalph      --  converts an integers into a character string.         **
** opntwtfl       --  opens a text file for writing.                        **
** opntapfl       --  opens a text file for appending.                      **
** printheader    --  prints  a  short  description to outfile,  explaining **
**                    what kind of statistics were performed.               **
** tgetdata       --  opens  pooled  data  files  for  the  two  OTUs being **
**                    compared, and feeds data into rvarray.                **
** computestats   --  computes statistics for site or fragment data.        **
** jackse         --  calculates jackknife standard errors.                 **
** bootse         --  calculates bootstrap standard errors.                 **
** printres       --  prints results from data analyses.                    **
** rserror402     --  error message if not enough memory for bjlist.        **
*****************************************************************************/
void compareotus(char dattyp,char setyp,char matfl,char infl[],char outfl[])
{
  char flnmx[20], flnmy[20];                  /* names for OTUs X & Y files */
  char *fx, *fy;                  /* pointers to names for OTUs X & Y files */
  char flnmd[20];                    /* file name for distance matrix files */
  char *fd;               /* pointer to file name for distance matrix files */
  FILE *fpd;                      /* file pointer for distance matrix files */
  int i,j;              /* loop control variables for OTU - OTU comparisons */
  int k,l,m;           /* loop control variables for accessing matrix files */
  int Is,If,Js,Jf;             /* start and finish points for i and j loops */

  if (nrv == 1) l = 1;       /* loop endpoint when making distance matrices */
  else l = nrv + 1;

  if ((bjlist = (bjtrt *)malloc(ntr*sizeof(bjtrt))) == NULL) rserror402();
  fpw = opntwtfl(fpw,outfl);              /* open file for output (results) */

  printf("Comparing OTUs and writing results to %s\r\n",outfl);
  if (matfl != 'A') printf("Writing distance matrices to #.MAT files.\r\n");
  printheader(infl,outfl,dattyp,setyp);
  printf("\n");

           /* BEGIN COMPARING EACH OTU TO EVERY OTHER OTU... */
  if (matfl == 'D') {
    Is = 1;
    If = ntx;
    Js = 0;
  }
  else {
    Is = 0;
    If = ntx-1;
    Jf = ntx;
  }
  for (i=Is; i<If; ++i) {                            /* compare each OTU... */
    printf("Comparing %s\r\n",otulist[i]);                /* screen message */
    flnmx[0] = '\0';                          /* get OTU X file name from i */
    fx = inttoalph(i,flnmx);
    strcpy(flnmx,fx);
    fx = strcat(flnmx,".$$$");
    if (matfl == 'D') Jf = i;
    else Js = i+1;
    for (j=Js; j<Jf; ++j) {                        /* to every other OTU... */
      printf("     to %s...\r\n",otulist[j]);             /* screen message */
      fprintf(fpw,"\n\n%s vs. %s\n",otulist[i],otulist[j]);

      flnmy[0] = '\0';                        /* get OTU Y file name from j */
      fy = inttoalph(j,flnmy);
      strcpy(flnmy,fy);
      fy = strcat(flnmy,".$$$");

      /* get Mx's, My's, and Mxy's for this OTU X vs. OTU Y comparison */
      tgetdata(flnmx,flnmy);                       /* put data into rvarray */
      computestats(dattyp);       /* choose appropriate mathematical method */
      if (setyp == 'B') bootse(dattyp);                 /* bootstrap s.e.'s */
      else jackse(dattyp);                              /* jackknife s.e.'s */
      printres(dattyp);                        /* print results of analysis */

      if (matfl != 'A')                 /* print results to distance matrix */
        for (k=0; k<l; ++k) {
          flnmd[0] = '\0';               /* get distance matrix name from k */
          fd = inttoalph(k,flnmd);
          strcpy(flnmd,fd);
          fd = strcat(flnmd,".MAT");
          fpd = opntapfl(fpd,flnmd);      /* open matrix file for appending */
          if (dattyp == 'F') fprintf(fpd,"%8.6f ",res[k].Ap);
          else if (dattyp == 'S') fprintf(fpd,"%8.6f ",res[k].AJC);
          if (j == (Jf-1)) {
            fprintf(fpd,"\n");
            if (matfl == 'U') for (m=0; m<=i; ++m) fprintf(fpd,"         ");
          }
          rewind(fpd);                                 /* close matrix file */
          fclose(fpd);
        }
    }                                                    /* end of "j" loop */
  }                                                      /* end of "i" loop */
  rewind(fpw);
  fclose(fpw);                                         /* close output file */

  free(bjlist);

  if (matfl != 'A')                     /* finish off distance matrix files */
    for (k=0; k<l; ++k) {
      flnmd[0] = '\0';                   /* get distance matrix name from k */
      fd = inttoalph(k,flnmd);
      strcpy(flnmd,fd);
      fd = strcat(flnmd,".MAT");
      fpd = opntapfl(fpd,flnmd);          /* open matrix file for appending */
      if (k<nrv) {                                    /* print to each file */
        fprintf(fpd,"\nNumber of nucleotides in restriction site = ");
        fprintf(fpd,"%8.6f\n",rvarray[k].rv);
      }
      else fprintf(fpd,"\nAll enzyme classes combined.\n");
    }
}                                            /* END OF FUNCTION COMPAREOTUS */
/*****************************************************************************
**                                                                          **
**                          FUNCTION PRINTHEADER                            **
**                                                                          **
** This  function  is  called  from  FUNCTION  COMPAREOTUS.   It  prints  a **
** descriptive  header to both the screen and the output file, stating what **
** kind  of  data  is  being analyzed  (Site or Fragment), and what kind of **
** analyses are being done.                                                 **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printheader(char infl[], char outfl[], char dattyp, char setyp)
{
  struct tm *nowtime;     /* time and clock structures to get date and time */
  time_t aclock;
  time(&aclock);
  nowtime = localtime(&aclock);
  fprintf(fpw,"%s",asctime(nowtime));
  fprintf(fpw,"Program RESTSITE v%3.1f\n",VNUM);
  fprintf(fpw,"A program for analyzing restriction site or fragment data.\n");
  fprintf(fpw,"Copyright (c) %4.0d by Joyce C. Miller.  ",YEAR);
  fprintf(fpw,"All Rights Reserved.");
  fprintf(fpw,"\n\nThis file, %s, contains output from ",outfl);
  fprintf(fpw,"analysis of file %s:\n",infl);

  if (dattyp == 'S') {              /* if it's site data we're analyzing... */
    printf("\nAnalyzing site data via:\r\n");
    printf("   Jukes-Cantor method (Equations 5.38, 5.41, 5.3, 5.44");
    printf(" of Nei (1987)).\r\n   Nei & Li (1979) method ");
    printf("(Equations 5.38, 5.42, 5.45 of Nei (1987)).\r\n");
    fprintf(fpw,"\nAnalyzing site data via:\n");
    fprintf(fpw,"   Jukes-Cantor method (Equations 5.38, 5.41, 5.3, 5.44");
    fprintf(fpw," of Nei (1987)).\n   Nei & Li (1979) method ");
    fprintf(fpw,"(Equations 5.38, 5.42, 5.45 of Nei (1987)).\n");
  }
  else if (dattyp == 'F') {     /* if it's fragment data we're analyzing... */
    printf("\r\nAnalyzing fragment data via:\r\n   Nei (1987) method ");
    printf("(Equations 5.53, 5.54, 5.55 of Nei (1987)).\r\n");
    fprintf(fpw,"\nAnalyzing fragment data via:\n   Nei (1987) method ");
    fprintf(fpw,"(Equations 5.53, 5.54, 5.55 of Nei (1987)).\n");
  }
  if (setyp == 'B') {
    printf("   Standard errors derived via bootstrapping.\r\n");
    fprintf(fpw,"   Standard errors derived via bootstrapping.\r\n");
  }
  else {
    printf("   Standard errors derived via jackknifing.\r\n");
    fprintf(fpw,"   Standard errors derived via jackknifing.\r\n");
  }
}                                            /* END OF FUNCTION PRINTHEADER */
/*****************************************************************************
**                                                                          **
**                            FUNCTION TGETDATA                             **
**                                                                          **
** This function is called from  FUNCTION COMPAREOTUS.  It opens the pooled **
** data  files   for  the  two  OTUs  being  compared,   and  examines  the **
** fragments/sites found in each.  From this, the number of fragments/sites **
** within each OTU  (Mx and My) are found, and the number the two OTUs have **
** in common  (Mxy).   Mx,  My, and  Mxy are also found within each OTU, so **
** that the within-population variation can be calculated.                  **
**                                                                          **
** Functions called:                                                        **
** initrvarray    --  initializes RVARRAY for next comparison.              **
** opnbrdfl       --  opens the binary data file for reading.               **
** rserror311     --  error message if error reading datafile.              **
*****************************************************************************/
void tgetdata(char flnmx[], char flnmy[])
{
  FILE *fpx, *fpy;                /* file pointers for OTU X and OTU Y file */
  float xni,yni;       /* necessary to convert int (# individuals) to float */
  rclass tf;                           /* structure for data of OTU X vs. Y */
  int i,j,k;                                      /* loop control variables */

  initrvarray();                                      /* initialize rvarray */
  fpx = opnbrdfl(fpx,flnmx);                 /* open OTU X pooled data file */
  fpy = opnbrdfl(fpy,flnmy);                 /* open OTU Y pooled data file */

  for (i=0; i<ntr; ++i) {        /* examine each treatment (record) in turn */
    /* get a record for OTU X and for OTU Y */
    if (fread(&pdx,sizeof(pooldat),1,fpx) == NULL) rserror311(flnmx);
    if (fread(&pdy,sizeof(pooldat),1,fpy) == NULL) rserror311(flnmy);

    tf.rv  = pdx.rv;                 /* initialize temporary data structure */
    tf.nt  = 0;
    tf.xmx = 0;
    tf.xmy = 0;
    tf.xmz = 0;
    tf.ymx = 0;
    tf.ymy = 0;
    tf.ymz = 0;
    tf.mx  = 0;
    tf.my  = 0;
    tf.mz  = 0;

    xni = pdx.ni;      /* convert number of individuals from ints to floats */
    yni = pdy.ni;

    if ((xni > 0) && (yni > 0)) {       /* if this is a valid comparison... */

      /* get Mx, My, and Mxy for within-OTU X and Mx for OTU X vs. OTU Y */
      for (j=0; (j<MAXFS) && (pdx.fs[j].f != -5); ++j)
        if (pdx.fs[j].f >= 0) {                /* if the site is not a null */
          tf.mx  += (pdx.fs[j].no * yni);
          tf.xmz += (pdx.fs[j].no*(pdx.fs[j].no-1)/2);
        }
      tf.xmx += (pdx.tn/xni)*((xni*(xni-1))/2);
      tf.xmy  = tf.xmx;

      /* get Mx, My, and Mxy for within-OTU X and My for OTU X vs. OTU Y */
      for (j=0; (j<MAXFS) && (pdy.fs[j].f != -5); ++j)
        if (pdy.fs[j].f >= 0) {                /* if the site is not a null */
          tf.my  += (pdy.fs[j].no * xni);
          tf.ymz += (pdy.fs[j].no*(pdy.fs[j].no-1)/2);
        }
      tf.ymx += (pdy.tn/yni)*((yni*(yni-1))/2);
      tf.ymy  = tf.ymx;

      /* get Mxy OTU X vs. Y */
      for (j=0; (j<MAXFS) && (pdx.fs[j].f != -5); ++j)
        if (pdx.fs[j].f >= 0)
          for (k=0; (k<MAXFS) && (pdy.fs[k].f != -5); ++k)
            if (pdy.fs[k].f >= 0)
              if (pdx.fs[j].f == pdy.fs[k].f) {
                tf.mz += (pdx.fs[j].no * pdy.fs[k].no);
                break;
              }
    }               /* end of getting Mx, My, Mxy for OTU X, Y, and X vs. Y */

    /* put data from this PEC into correct r-class in rvarray */
    for (j=0; j<nrv; ++j)                 /* locate same r-class in rvarray */
      if (rvarray[j].rv == tf.rv) {                     /* if this is it... */
        rvarray[j].nt  += 1;                       /* increase # treatments */
        rvarray[j].xmx += tf.xmx;                               /* add data */
        rvarray[j].xmy += tf.xmy;
        rvarray[j].xmz += tf.xmz;
        rvarray[j].ymx += tf.ymx;
        rvarray[j].ymy += tf.ymy;
        rvarray[j].ymz += tf.ymz;
        rvarray[j].mx  += tf.mx;
        rvarray[j].my  += tf.my;
        rvarray[j].mz  += tf.mz;
        break;
      }

    /* also put data from each treatment into array in memory */
    bjlist[i].rv   = tf.rv;
    bjlist[i].xmxy = tf.xmx + tf.xmy;
    bjlist[i].xmz  = tf.xmz;
    bjlist[i].ymxy = tf.ymx + tf.ymy;
    bjlist[i].ymz  = tf.ymz;
    bjlist[i].mxy  = tf.mx  + tf.my;
    bjlist[i].mz   = tf.mz;
  }                                          /* end of "i" (treatment) loop */
  rewind(fpx);                               /* close OTU pooled data files */
  fclose(fpx);
  rewind(fpy);
  fclose(fpy);
}                                               /* END OF FUNCTION TGETDATA */
/*****************************************************************************
**                                                                          **
**                          FUNCTION COMPUTESTATS                           **
**                                                                          **
** This function is called from FUNCTION COMPAREOTUS.  For restriction site **
** data, it  computes  S-hat, p-hat and  its  standard  error, Jukes-Cantor **
** (1969) distance  and its s.e., and  Nei-Li distance  (1979) and its s.e. **
** For restriction fragment data, it computes F-hat and d-hat.  The results **
** of the analyses are put into the global array RES.                       ** 
**                                                                          **
** Functions called:                                                        **
** initbj         --  initializes the "working copy" of RES.                **
** copybjtores    --  copies contents of BJ into RES.                       **
** copyrvtobjdat  --  makes working copy of data set.                       **
** computefrag    --  computes stats on frag data in a single enzyme class. **
** computesite    --  computes stats on site data in a single enzyme class. **
** allenzclasses  --  computes stats across all enzyme classes.             **
*****************************************************************************/
void computestats(char dattyp)
{
  int i;                                           /* loop control variable */

  initbj(dattyp);          /* initialize "working version" of results array */
  copybjtores();                  /* use it to initialize the results array */
  copyrvtobjdat();                        /* make a working copy of RVARRAY */

  for (i=0; i<nrv; ++i)                   /* compute stats for each r-class */
    if (dattyp == 'F') computefrag(i);                 /* for fragment data */
    else computesite(i);                                   /* for site data */
  if (nrv > 1) allenzclasses(dattyp);      /* across all r-classes combined */
  copybjtores();                                   /* copy results into RES */
}                                           /* END OF FUNCTION COMPUTESTATS */
/*****************************************************************************
**                                                                          **
**                           FUNCTION COMPUTEFRAG                           **
**                                                                          **
** This function is called from several different functions. It receives an **
** RCLASS  structure  with  data for  the current  X  vs. Y comparison, and **
** computes statistics within OTU X, within OTU Y, between OTU X and Y, and **
** the  corrected  statistics for  OTU X  by  Y.   For  the first three, it **
** computes F-hat from equation 5.53 of Nei (1987).  It then computes d-hat **
** via equations 5.54 and 5.55.  The corrected values for F-hat is computed **
** from equation 26 of Nei and Li (1979).  Corrected d-hat is calculated by **
** equation  10.21 of Nei (1987).  The standard errors of all of the F-hats **
** and d-hats are computed either by jackknifing or by bootstrapping, which **
** is  done  by  other  functions.                                          **
**                                                                          **
** Functions called:                                                        **
** eqnNL26        --  computes corrected F-hat.                             **
** eqnNei10_21    --  computes corrected d-hat.                             **
*****************************************************************************/
void computefrag(int q)
{
  float F  = 0;                                                    /* F-hat */
  float G1 = 0;                                                   /* G1-hat */
  float G2 = 0;                                                   /* G2-hat */
  int i;                                           /* loop control variable */

  bj[q].rv = bjdat[q].rv;                          /* copy size of cut site */
  /* COMPUTE STATISTICS WITHIN OTU X */
  if ((bjdat[q].xmx > 0) && (bjdat[q].xmy > 0)) {
    bj[q].xm = (bjdat[q].xmx + bjdat[q].xmy) / 2;          /* compute m-hat */
    bj[q].xS = bjdat[q].xmz / bj[q].xm;              /* F-hat via eqn. 5.53 */
    if ((F=bj[q].xS) > 0) {                     /* compute G2 via eqn. 5.54 */
      G1 = sqrt(sqrt(F));                                 /* G1 start value */
      for (i=0; ((fabs((G2=CALCG)-G1)) > (TEST*G2)); ++i) G1 = G2;
      bj[q].xp = -2 / bj[q].rv*log(G2);              /* d-hat via eqn. 5.55 */
    }
  }

  /* COMPUTE STATISTICS WITHIN OTU Y */
  if ((bjdat[q].ymx > 0) && (bjdat[q].ymy > 0)) {
    bj[q].ym = (bjdat[q].ymx + bjdat[q].ymy) / 2;          /* compute m-hat */
    bj[q].yS = bjdat[q].ymz / bj[q].ym;              /* F-hat via eqn. 5.53 */
    if ((F=bj[q].yS) > 0) {                     /* compute G2 via eqn. 5.54 */
      G1 = sqrt(sqrt(F));                                 /* G1 start value */
      for (i=0; ((fabs((G2=CALCG)-G1)) > (TEST*G2)); ++i) G1 = G2;
      bj[q].yp = -2 / bj[q].rv*log(G2);              /* d-hat via eqn. 5.55 */
    }
  }

  /* COMPUTE STATISTICS FOR OTU X vs. Y */
  if ((bjdat[q].mx > 0) && (bjdat[q].my > 0)) {
    bj[q].zm = (bjdat[q].mx + bjdat[q].my) / 2;            /* compute m-hat */
    bj[q].zS = bjdat[q].mz / bj[q].zm;               /* F-hat via eqn. 5.53 */
    if ((F=bj[q].zS) > 0) {                     /* compute G2 via eqn. 5.54 */
      G1 = sqrt(sqrt(F));                                 /* G1 start value */
      for (i=0; ((fabs((G2=CALCG)-G1)) > (TEST*G2)); ++i) G1 = G2;
      bj[q].zp = -2 / bj[q].rv*log(G2);              /* d-hat via eqn. 5.55 */
    }
  }
  bj[q].AS = eqnNL26(bj[q].xS,bj[q].yS,bj[q].zS);        /* corrected F-hat */
  bj[q].Ap = eqnNei10_21(bj[q].xp,bj[q].yp,bj[q].zp);    /* corrected d-hat */
}                                            /* END OF FUNCTION COMPUTEFRAG */
/*****************************************************************************
**                                                                          **
**                           FUNCTION COMPUTESITE                           **
**                                                                          **
** This  function is called  from several different functions.  It takes an **
** rclass structure, with  data for  the current  X  vs.  Y comparison, and **
** computes statistics within OTU X, within OTU Y, for OTU X vs. OTU Y, and **
** corrected statistics for OTU X  vs. Y.  For the first three, it computes **
** m-hat from the equation (Mx + My)/2  from Nei (1987), page 100.  It then **
** computes  S-hat  via  equation  5.38 of Nei (1987),  p-hat from equation **
** 5.41, that standard error of  p-hat from 5.43, the Jukes-Cantor distance **
** via 5.3, its standard error from 5.44, and the Nei-Li distance (equation **
** 5.42) and its  standard error  (equation 5.45).  The corrected value for **
** S-hat  is  computed from equation  26  of  Nei and Li (1979).  Corrected **
** p-hat is computed  directly from this corrected  S-hat via equation 5.41 **
** of  Nei  (1987).   Corrected   Jukes-Cantor  and  Nei-Li  distances  are **
** calculated by removing the average within-population distance (the OTU X **
** and  OTU Y distances) as per equation 10.21 of Nei (1987).  The standard **
** errors  of the corrected  p-hat,  the corrected  Jukes-Cantor and Nei-Li **
** distances,  and all of the  S-hats are computed by either jackknifing or **
** bootstrapping, which is done by another function.                        **
**                                                                          **
** Functions called:                                                        **
** eqnNL26        --  computes corrected S-hat.                             **
** eqnNei10_21    --  computes corrected Jukes-Cantor or Nei-Li distance.   **
*****************************************************************************/
void computesite(int q)
{
  float num = 0;                         /* numerator for complex equations */
  float den = 0;                       /* denominator for complex equations */

  bj[q].rv = bjdat[q].rv;                  /* copy size of restriction site */

  /* COMPUTE RESULTS FOR WITHIN OTU X */
  if ((bjdat[q].xmx > 0) && (bjdat[q].xmy > 0)) { /* if comparison is valid */
    bj[q].xm = (bjdat[q].xmx + bjdat[q].xmy) / 2;          /* compute m-hat */
    bj[q].xS = bjdat[q].xmz / bj[q].xm;              /* S-hat via eqn. 5.38 */
    bj[q].xp = 1-(pow(bj[q].xS,(1/bj[q].rv)));       /* p-hat via eqn. 5.41 */
    if ((bj[q].xS > 0) && (bj[q].xp < 0.75)) {  /* if S=0 dist's are undef. */
      bj[q].xJC = -0.75 * log(1-(4*bj[q].xp/3));    /* JC dist via eqn. 5.3 */
      bj[q].xNL = -log(bj[q].xS) / bj[q].rv;   /* NL distance via eqn. 5.42 */
    }
  }

  /* COMPUTE RESULTS FOR WITHIN OTU Y */
  if ((bjdat[q].ymx > 0) && (bjdat[q].ymy > 0)) { /* if comparison is valid */
    bj[q].ym = (bjdat[q].ymx + bjdat[q].ymy) / 2;          /* compute m-hat */
    bj[q].yS = bjdat[q].ymz / bj[q].ym;              /* S-hat via eqn. 5.38 */
    bj[q].yp = 1-(pow(bj[q].yS,(1/bj[q].rv)));       /* p-hat via eqn. 5.41 */
    if ((bj[q].yS > 0) && (bj[q].yp < 0.75)) {  /* if S=0 dist's are undef. */
      bj[q].yJC = -0.75 * log(1-(4*bj[q].yp/3));    /* JC dist via eqn. 5.3 */
      bj[q].yNL = -log(bj[q].yS) / bj[q].rv;   /* NL distance via eqn. 5.42 */
    }
  }

  /* COMPUTE RESULTS FOR OTU X vs. OTU Y */
  if ((bjdat[q].mx > 0) && (bjdat[q].my > 0)) {   /* if comparison is valid */
    bj[q].zm = (bjdat[q].mx + bjdat[q].my) / 2;            /* compute m-hat */
    bj[q].zS = bjdat[q].mz / bj[q].zm;               /* S-hat via eqn. 5.38 */
    bj[q].zp = 1-(pow(bj[q].zS,(1/bj[q].rv)));       /* p-hat via eqn. 5.41 */
    if ((bj[q].zS > 0) && (bj[q].zp < 0.75)) {  /* if S=0 dist's are undef. */
      bj[q].zJC = -0.75 * log(1-(4*bj[q].zp/3));    /* JC dist via eqn. 5.3 */
      bj[q].zNL = -log(bj[q].zS) / bj[q].rv;   /* NL distance via eqn. 5.42 */
    }
  }

  /* COMPUTE CORRECTED VALUES FOR S-HAT, p-HAT, AND JC AND NL DISTANCE */
  bj[q].AS = eqnNL26(bj[q].xS,bj[q].yS,bj[q].zS);            /* corrected S */
  if ((bj[q].AS < 0) || (bj[q].rv <= 0)) bj[q].Ap = -1;      /* corrected p */
  else if ((bj[q].AS >= 0) && (bj[q].rv > 0)) 
    bj[q].Ap = 1-(pow(bj[q].AS,(1/bj[q].rv)));                 /* eqn. 5.41 */
  bj[q].AJC = eqnNei10_21(bj[q].xJC, bj[q].yJC, bj[q].zJC);   /* corr'd JCd */
  bj[q].ANL = eqnNei10_21(bj[q].xNL, bj[q].yNL, bj[q].zNL);   /* corr'd NLd */
}                                            /* END OF FUNCTION COMPUTESITE */
/*****************************************************************************
**                                                                          **
**                          FUNCTION ALLENZCLASSES                          **
**                                                                          **
** This function is called by several  other  functions.  It will compute a **
** statistic over all of the enzyme classes by using equation 17 of Nei and **
** Miller (1990).  It does this by summing:                                 **
**                          (stat  x  m-hat  x  r)                          **
** over all of the enzyme classes, and dividing it by:                      **
**                              (m-hat  x  r)                               **
** summed over all  of the  enzyme  classes.  For  site data, the following **
** numbers are computed this way:                                           **
**          Within OTU X  : S-hat, p-hat, JC d-hat, NL d-hat.               **
**          Within OTU Y  : S-hat, p-hat, JC d-hat, NL d-hat.               **
**          Within OTU XY : S-hat, p-hat, JC d-hat, NL d-hat.               **
** The  corrected X-Y values  (S-hat, p-hat, JC d-hat & NL d-hat) cannot be **
** computed in  this  manner, since  "m"  is  no longer available.  For the **
** corrected  X-Y, S-hat is computed via eqn. 26 of Nei & Li (1979) and the **
** Jukes-Cantor and Nei-Li distances must be computed via equation 10.21 of **
** Nei (1987)  from  the  corrected  d-hats of  OTU X,  OTU Y, and OTU X-Y. **
** p-hat, unfortunately, cannot be computed at all: computing it from S-hat **
** would  require an  r-value  (and  we're  trying to  do this  across  all **
** r-classes here), and we  cannot use  equation  17, either (no "m" values **
** are available).  To my knowledge, no "corrected p-hat" equation exists.  **
**                                                                          **
** For fragment data, the following numbers are computed via eqn. 17:       **
**          Within OTU X  : F-hat, d-hat.                                   **
**          Within OTU Y  : F-hat, d-hat.                                   **
**          Within OTU XY : F-hat, d-hat.                                   **
** The corrected X-Y values  (F-hat, and  d-hat) also cannot be derived via **
** eqn. 17, for the same reasons listed above.  The corrected F-hat for OTU **
** X vs. OTU Y is computed  via equation 26 of Nei and Li (1979),  and  the **
** corrected d-hat by equation 10.21 of Nei (1987).  The standard errors of **
** all of these values are computed by either jackknifing or bootstrapping, **
** both of which are performed by other functions.                          **
**                                                                          **
** Functions called:                                                        **
** eqnNL26        --  computes corrected F-hat.                             **
** eqnNei10_21    --  computes corrected d-hat.                             **
*****************************************************************************/
void allenzclasses(char dattyp)
{
  float xb = 0;                  /* bottom term for wgt'd F and d for OTU X */
  float yb = 0;                  /* bottom term for wgt'd F and d for OTU X */
  float zb = 0;             /* bottom term for wgt'd F and d for OTU X by Y */
  int i;                                           /* loop control variable */

  bj[nrv].xS = 0;         /* initialize slots for S-hats (for site data) or */
  bj[nrv].yS = 0;                            /* F-hats (for fragment data ) */
  bj[nrv].zS = 0;
  bj[nrv].AS = 0;
  bj[nrv].xp = 0;            /* initialize slots for p-hats (for site data) */
  bj[nrv].yp = 0;                          /* or d-hats (for fragment data) */
  bj[nrv].zp = 0;
  bj[nrv].Ap = 0;
  if (dattyp == 'S') {                                /* for site data .... */
    bj[nrv].xJC = 0;         /* initialize slots for Jukes-Cantor distances */
    bj[nrv].yJC = 0;
    bj[nrv].zJC = 0;
    bj[nrv].AJC = 0;
    bj[nrv].xNL = 0;               /* initialize slots for Nei-Li distances */
    bj[nrv].yNL = 0;
    bj[nrv].zNL = 0;
    bj[nrv].ANL = 0;
  }
  bjdat[nrv].nt = 0;                     /* initialize number of treatments */

  /* sum numerators and denominators over all enzyme classes */
  for (i=0; i<nrv; ++i) {
    if (bj[i].rv > 0) {
      bjdat[nrv].nt += bjdat[i].nt;       /* increment number of treatments */
      if (bj[i].xm >= 0) {          /* numerator of S (F), p (d), JCd & NLd */
        if (bj[i].xS >= 0) bj[nrv].xS += (bj[i].xS * bj[i].xm * bj[i].rv);
        if (bj[i].xp >= 0) bj[nrv].xp += (bj[i].xp * bj[i].xm * bj[i].rv);                /* OTU Y */
        if (dattyp == 'S') {
          if (bj[i].xJC >= 0) bj[nrv].xJC += (bj[i].xJC * bj[i].xm * bj[i].rv);
          if (bj[i].xNL >= 0) bj[nrv].xNL += (bj[i].xNL * bj[i].xm * bj[i].rv);
        }
        xb += (bj[i].xm * bj[i].rv);     /* denominator of the above values */
      }
      if (bj[i].ym >= 0) {          /* numerator of S (F), p (d), JCd & NLd */
        if (bj[i].yS >= 0) bj[nrv].yS += (bj[i].yS * bj[i].ym * bj[i].rv);
        if (bj[i].yp >= 0) bj[nrv].yp += (bj[i].yp * bj[i].ym * bj[i].rv);                /* OTU Y */
        if (dattyp == 'S') {
          if (bj[i].yJC >= 0) bj[nrv].yJC += (bj[i].yJC * bj[i].ym * bj[i].rv);
          if (bj[i].yNL >= 0) bj[nrv].yNL += (bj[i].yNL * bj[i].ym * bj[i].rv);
        }
        yb += (bj[i].ym * bj[i].rv);     /* denominator of the above values */
      }
      if (bj[i].zm >= 0) {          /* numerator of S (F), p (d), JCd & NLd */
        if (bj[i].zS >= 0) bj[nrv].zS += (bj[i].zS * bj[i].zm * bj[i].rv);
        if (bj[i].zp >= 0) bj[nrv].zp += (bj[i].zp * bj[i].zm * bj[i].rv);                /* OTU Y */
        if (dattyp == 'S') {
          if (bj[i].zJC >= 0) bj[nrv].zJC += (bj[i].zJC * bj[i].zm * bj[i].rv);
          if (bj[i].zJC >= 0) bj[nrv].zNL += (bj[i].zNL * bj[i].zm * bj[i].rv);
        }
        zb += (bj[i].zm * bj[i].rv);     /* denominator of the above values */
      }
    }
  }
  if (xb > 0) {                                             /* within OTU X */
    bj[nrv].xS = bj[nrv].xS / xb;                            /* S- or F-hat */
    bj[nrv].xp = bj[nrv].xp / xb;                            /* p- or d-hat */
    if (dattyp == 'S') {                                  /* for site data: */
      bj[nrv].xJC = bj[nrv].xJC / xb;                           /* JC d-hat */
      bj[nrv].xNL = bj[nrv].xNL / xb;                           /* NL d-hat */
    }
  }
  else {
    bj[nrv].xS = -1;                                         /* S- or F-hat */
    bj[nrv].xp = -1;                                         /* p- or d-hat */
    if (dattyp == 'S') {                                  /* for site data: */
      bj[nrv].xJC = -1;                                         /* JC d-hat */
      bj[nrv].xNL = -1;                                         /* NL d-hat */
    }
  }
  if (yb > 0) {                                             /* within OTU Y */
    bj[nrv].yS = bj[nrv].yS / yb;                            /* S- or F-hat */
    bj[nrv].yp = bj[nrv].yp / yb;                            /* p- or d-hat */
    if (dattyp == 'S') {                                  /* for site data: */
      bj[nrv].yJC = bj[nrv].yJC / yb;                           /* JC d-hat */
      bj[nrv].yNL = bj[nrv].yNL / yb;                           /* NL d-hat */
    }
  }
  else {
    bj[nrv].yS = -1;                                         /* S- or F-hat */
    bj[nrv].yp = -1;                                         /* p- or d-hat */
    if (dattyp == 'S') {                                  /* for site data: */
      bj[nrv].yJC = -1;                                         /* JC d-hat */
      bj[nrv].yNL = -1;                                         /* NL d-hat */
    }
  }
  if (zb > 0) {                                            /* within OTU XY */
    bj[nrv].zS = bj[nrv].zS / zb;                            /* S- or F-hat */
    bj[nrv].zp = bj[nrv].zp / zb;                            /* p- or d-hat */
    if (dattyp == 'S') {                                  /* for site data: */
      bj[nrv].zJC = bj[nrv].zJC / zb;                           /* JC d-hat */
      bj[nrv].zNL = bj[nrv].zNL / zb;                           /* NL d-hat */
    }
  }
  else {
    bj[nrv].zS = -1;                                         /* S- or F-hat */
    bj[nrv].zp = -1;                                         /* p- or d-hat */
    if (dattyp == 'S') {                                  /* for site data: */
      bj[nrv].zJC = -1;                                         /* JC d-hat */
      bj[nrv].zNL = -1;                                         /* NL d-hat */
    }
  }

  bj[nrv].AS = eqnNL26(bj[nrv].xS,bj[nrv].yS,bj[nrv].zS);  /* corr'd S or F */
  if (dattyp == 'F')
    bj[nrv].Ap = eqnNei10_21(bj[nrv].xp,bj[nrv].yp,bj[nrv].zp);    /* d-hat */
  if (dattyp == 'S') {
    bj[nrv].Ap = -1;   /* no eqn exists for corr'd p-hat over all r-classes */
    bj[nrv].AJC = eqnNei10_21(bj[nrv].xJC,bj[nrv].yJC,bj[nrv].zJC);  /* JCd */
    bj[nrv].ANL = eqnNei10_21(bj[nrv].xNL,bj[nrv].yNL,bj[nrv].zNL);  /* NLd */
  }
}                                          /* END OF FUNCTION ALLENZCLASSES */
/*****************************************************************************
**                                                                          **
**                             FUNCTION EQNNL26                             **
**                                                                          **
** This function is called from several other functions.  It receives F-hat **
** or  S-hat  for within  OTU X, within  OTU Y, and for OTU X  vs. Y.  From **
** these three values, it computes the corrected version of  F-hat or S-hat **
** for OTU X vs. OTU Y via equation 26 of Nei and Li (1979):                **
**                F(A) = F(xy)/(square root of (F(x) x F(y)))               **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
float eqnNL26(float x, float y, float z)
{
  float A = 0;                        /* corrected value for F-hat or S-hat */
  if (z <= 0) A = z;                        /* if Fxy is undefined, so is A */
  else {
    if (x > 0) {
      if (y <= 0) A = z/sqrt(x);
      else A = z/sqrt(x * y);
    }
    else {
      if (y > 0) A = z/sqrt(y);
      else A = z;
    }
  }
  return(A);
}                                                /* END OF FUNCTION EQNNL26 */
/*****************************************************************************
**                                                                          **
**                           FUNCTION EQNNEI10_21                           **
**                                                                          **
** This function is called from several other functions.  It receives d-hat **
** for within OTU X, within  OTU Y, and for OTU X  vs. Y.  From these three **
** values, it computes the corrected version of d-hat for  OTU X  vs. OTU Y **
** via equation 10.21 of Nei (1987):                                        **
**                     d(A) = d(xy) - [(d(x) + d(y))/2]                     **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
float eqnNei10_21(float x, float y, float z)
{
  float A = 0;                                 /* corrected value for d-hat */

  if (z <= 0) A = z;                        /* if dxy is undefined, so is A */
  else {
    if (x > 0) {
      if (y <= 0) A = z - x/2;
      else A = z - (x + y)/2;
    }
    else {
      if (y > 0) A = z - y/2;
      else A = z;
    }
  }
  return(A);
}                                            /* END OF FUNCTION EQNNEI10_21 */
/*****************************************************************************
**                                                                          **
**                             FUNCTION JACKSE                              **
**                                                                          **
** This  function  is  called  from  FUNCTION COMPAREOTUS.  It computes the **
** jackknife  standard  error for  all of  the values  from both  the  site **
** analysis  and  the  fragment  analysis.   Jackknifing  is  done  in  the **
** following  way.  The  values  (ex.:  d-hat)  that  were computed for the **
** complete data  set are stored in RES.  A working  copy  of  the complete **
** data  set  is made  (BJDAT),  and  the data  that  came  from the  first **
** probe/enzyme  combination is  subtracted  from it.  The  values are then **
** computed  for this first  "jackknife"  data  set (d(1)).  The difference **
** between the  d(1) value  and  the "true" d value is squared, and stored. **
** Then  the process is  repeated  with  the  second p/e combination's data **
** being subtracted from the  complete data set, and so on.  The sum of the **
** squared differences between the "complete" data values and the jackknife **
** data values are used to compute the jackknife standard error.  It should **
** be noted that we do not jackknife over enzymes, but rather over probe/-  **
** enzyme combinations.  The jackknife formula itself is:                   **
**       square root of:  {[(n-1)/n] x (sum of the squared differences)}    **
** where n is the total number of probe/enzyme combinations.                **
**                                                                          **
** Functions called:                                                        **
** initseslots    --  initializes the standard error slots in RES.          **
** initnt         --  initializes  NT, an  array  that holds the numbers of **
**                    probe/enzyme combinations that go into each s.e.      **
** initbj         --  initializes the "working version" of RES.             **
** copyrvtobjdat  --  makes a "working copy" of RVARRAY.                    **
** computesite    --  computes stats on site data in a single enzyme class. **
** computefrag    --  computes stats on frag data in a single enzyme class. **
** allenzclasses  --  computes stats across all enzyme classes.             **
** increasesumsq  --  computes  and   increases  the  sum  of  the  squared **
**                    differences for each value requiring an s.e.          **
*****************************************************************************/
void jackse(char dattyp)
{
  int i,j,k,m;                                    /* loop control variables */

  if (nrv == 1) k = 1;
  else k = nrv + 1;

  initseslots(k,dattyp);        /* initialize the s.e.'s for the statistics */
  initnt(dattyp);     /* initialize array to hold # of jackknife treatments */

  m = 0;                                     /* used to tell user of status */
  for (i=0; i<ntr; ++i) {                /* jackknife across all treatments */
    if (m == 20) {        /* provides a message to the user every 20 cycles */
      printf("%d treatments jackknifed...\r",i);
      m = 0;
    }
    m++;
    initbj();   /* initialize temporary array to hold results of this cycle */
    copyrvtobjdat();            /* copy "real" data into working data array */

    for (j=0; j<nrv; ++j) {    /* subtract out the data from this treatment */
      if (bjdat[j].rv == bjlist[i].rv) {
        bjdat[j].xmx -= (bjlist[i].xmxy/2);
        bjdat[j].xmy -= (bjlist[i].xmxy/2);
        bjdat[j].xmz -=  bjlist[i].xmz;
        bjdat[j].ymx -= (bjlist[i].ymxy/2);
        bjdat[j].ymy -= (bjlist[i].ymxy/2);
        bjdat[j].ymz -=  bjlist[i].ymz;
        bjdat[j].mx   = (bjdat[j].mx + bjdat[j].my) / 2;
        bjdat[j].my   =  bjdat[j].mx;
        bjdat[j].mx  -= (bjlist[i].mxy/2);
        bjdat[j].my  -= (bjlist[i].mxy/2);
        bjdat[j].mz  -=  bjlist[i].mz;
      }
      if (dattyp == 'F') computefrag(j);   /* get jack stats for this class */
      else computesite(j);
    }                                                    /* end of "j" loop */
    if (nrv > 1) allenzclasses(dattyp);     /* get jackstats for each class */

    /* increase sum of the squared differences for each standard error */
    increasesumsq(k,dattyp); 
  }                                            /* end of treatment (i) loop */

  /* now have the sum of the squared differences over all the treatments */
  /* compute the jackknife standard error */
  for (i=0; i<k; ++i) {    /* for each enzyme class and for all combined... */
    /* S-hats (site data) or F-hats (fragment data) */
    if ((res[i].xSse < 0) || (nt[i].xSnt <= 1)) res[i].xSse = -1;  /* OTU X */
    else res[i].xSse = sqrt(res[i].xSse * (nt[i].xSnt-1) / nt[i].xSnt);
    if ((res[i].ySse < 0) || (nt[i].ySnt <= 1)) res[i].ySse = -1;  /* OTU Y */
    else res[i].ySse = sqrt(res[i].ySse * (nt[i].ySnt-1) / nt[i].ySnt);
    if ((res[i].zSse < 0) || (nt[i].zSnt <= 1)) res[i].zSse = -1; /* OTU XY */
    else res[i].zSse = sqrt(res[i].zSse * (nt[i].zSnt-1) / nt[i].zSnt);
    if ((res[i].ASse < 0) || (nt[i].ASnt <= 1)) res[i].ASse = -1; /* corr'd */
    else res[i].ASse = sqrt(res[i].ASse * (nt[i].ASnt-1) / nt[i].ASnt);

    /* p-hats (site data) or d-hats (fragment data) */
    if ((res[i].xpse < 0) || (nt[i].xpnt <= 1)) res[i].xpse = -1;
    else res[i].xpse = sqrt(res[i].xpse * (nt[i].xpnt-1) / nt[i].xpnt);
    if ((res[i].ypse < 0) || (nt[i].ypnt <= 1)) res[i].ypse = -1;
    else res[i].ypse = sqrt(res[i].ypse * (nt[i].ypnt-1) / nt[i].ypnt);
    if ((res[i].zpse < 0) || (nt[i].zpnt <= 1)) res[i].zpse = -1;
    else res[i].zpse = sqrt(res[i].zpse * (nt[i].zpnt-1) / nt[i].zpnt);
    if ((res[i].Apse < 0) || (nt[i].Apnt <= 1)) res[i].Apse = -1;
    else res[i].Apse = sqrt(res[i].Apse * (nt[i].Apnt-1) / nt[i].Apnt);

    /* Jukes-Cantor distances and Nei-Li distances (site data) */
    if (dattyp == 'S') {
      if ((res[i].xJCse < 0) || (nt[i].xJCnt <= 1)) res[i].xJCse = -1;
      else res[i].xJCse = sqrt(res[i].xJCse * (nt[i].xJCnt-1)/nt[i].xJCnt);
      if ((res[i].yJCse < 0) || (nt[i].yJCnt <= 1)) res[i].yJCse = -1;
      else res[i].yJCse = sqrt(res[i].yJCse * (nt[i].yJCnt-1)/nt[i].yJCnt);
      if ((res[i].zJCse < 0) || (nt[i].zJCnt <= 1)) res[i].zJCse = -1;
      else res[i].zJCse = sqrt(res[i].zJCse * (nt[i].zJCnt-1)/nt[i].zJCnt);
      if ((res[i].AJCse < 0) || (nt[i].AJCnt <= 1)) res[i].AJCse = -1;
      else res[i].AJCse = sqrt(res[i].AJCse * (nt[i].AJCnt-1)/nt[i].AJCnt);

      if ((res[i].xNLse < 0) || (nt[i].xNLnt <= 1)) res[i].xNLse = -1;
      else res[i].xNLse = sqrt(res[i].xNLse * (nt[i].xNLnt-1)/nt[i].xNLnt);
      if ((res[i].yNLse < 0) || (nt[i].yNLnt <= 1)) res[i].yNLse = -1;
      else res[i].yNLse = sqrt(res[i].yNLse * (nt[i].yNLnt-1)/nt[i].yNLnt);
      if ((res[i].zNLse < 0) || (nt[i].zNLnt <= 1)) res[i].zNLse = -1;
      else res[i].zNLse = sqrt(res[i].zNLse * (nt[i].zNLnt-1)/nt[i].zNLnt);
      if ((res[i].ANLse < 0) || (nt[i].ANLnt <= 1)) res[i].ANLse = -1;
      else res[i].ANLse = sqrt(res[i].ANLse * (nt[i].ANLnt-1)/nt[i].ANLnt);
    }
  }                        /* done with computing jackknife standard errors */
  printf("                              \r",i);            /* clear up line */
}                                                /* END OF PROCEDURE JACKSE */
/*****************************************************************************
**                                                                          **
**                             FUNCTION BOOTSE                              **
**                                                                          **
** This  function  is  called  from  FUNCTION COMPAREOTUS.  It computes the **
** bootstrap  standard  error for all of the values  from both the site and **
** the fragment analysis.  Bootstrapping is done in the following way.  The **
** statistics are computed from the  whole data set, and are stored in RES. **
** Then a  fake  data  set is  made up  by  randomly  selecting  treatments **
** (Probe/Enzyme Combinations) from the real data set, replacing them after **
** they have  been selected.  Since the selection is made with replacement, **
** data  from a  single  PEC can be in the bootstrap ("fake") data set more **
** than once, or not at all.  Once  we  have  the  bootstrap  data set, the **
** statistics  are  computed  from it.  Then, the  differences  between the **
** bootstrap values and  the "real" values are computed  and squared.  This **
** procedure (getting a bootstrap data set, computing  the bootstrap values **
** from it, and  computing the  squared difference) is done 200 times.  The **
** squared differences  are summed  over  these 200 cycles, and this sum is **
** divided by  (200-1).  The  square  root of  this number is the bootstrap **
** s.e. for a particular value.  This  program is  presently  set  for  200 **
** bootstrap  cycles,  and can be changed by altering the symbolic constant **
** BOOTSTRAP in  the file  RSTYPES.H.  Most  statisticians,  however, agree **
** that 200 cycles of bootstrapping is generally sufficient.                **
**    The bootstrap  data sets are created by selecting random numbers from **
** 1 to ntr (the number of PECs).  It selects ntr of these numbers, thereby **
** creating a bootstrap  data set as large as the original, with treatments **
** randomly selected from the original. The  bootstrap values are computed, **
** and a running sum is  kept of the sum of the squared differences.  After **
** 200  cycles, the  actual  bootstrap s.e. of  each of the above values is **
** computed from its running sum.                                           **
**                                                                          **
** Functions called:                                                        **
** initseslots    --  initializes the standard error slots in RES.          **
** copyrvtobjdat  --  makes a "working copy" of RVARRAY.                    **
** computefrag    --  computes stats on frag data in a single enzyme class. **
** computesite    --  computes stats on site data in a single enzyme class. **
** allenzclasses  --  computes stats across all enzyme classes.             **
** increasesumsq  --  computes  and   increases  the  sum  of  the  squared **
**                    differences for each value requiring an s.e.          **
*****************************************************************************/
void bootse(char dattyp)
{
  int i,j,k,l,m;
  float n = 0;
  time_t tyme;
  int numlist[MAXPEC];

  if (nrv == 1) l = 1;
  else l = nrv + 1;
  initseslots(l,dattyp);
  for (i=0; i<l; ++i) {   /* initialize array to hold # of bootstrap cycles */
    nt[i].xSnt  = BOOTSTRAP;
    nt[i].ySnt  = BOOTSTRAP;
    nt[i].zSnt  = BOOTSTRAP;
    nt[i].ASnt  = BOOTSTRAP;
    nt[i].xpnt  = BOOTSTRAP;
    nt[i].ypnt  = BOOTSTRAP;
    nt[i].zpnt  = BOOTSTRAP;
    nt[i].Apnt  = BOOTSTRAP;
    nt[i].xJCnt = BOOTSTRAP;
    nt[i].yJCnt = BOOTSTRAP;
    nt[i].zJCnt = BOOTSTRAP;
    nt[i].AJCnt = BOOTSTRAP;
    nt[i].xNLnt = BOOTSTRAP;
    nt[i].yNLnt = BOOTSTRAP;
    nt[i].zNLnt = BOOTSTRAP;
    nt[i].ANLnt = BOOTSTRAP;
  }

  n = (RAND_MAX/(float)ntr) + (1/(float)ntr);
  srand(time(&tyme));
  m = 20;                              /* used to print out status messages */
  for (i=0; i<BOOTSTRAP; ++i) {
    if (m == 20) {        /* provides a message to the user every 20 cycles */
      printf("%d bootstrap cycles done...\r",i);
      m = 0;
    }
    m++;
    for (j=0; j<nrv; ++j) {
      bjdat[j].rv  = rvarray[j].rv;
      bjdat[j].nt  = 0;
      bjdat[j].xmx = 0;
      bjdat[j].xmy = 0;
      bjdat[j].xmz = 0;
      bjdat[j].ymx = 0;
      bjdat[j].ymy = 0;
      bjdat[j].ymz = 0;
      bjdat[j].mx  = 0;
      bjdat[j].my  = 0;
      bjdat[j].mz  = 0;
    }
    for (j=0; j<ntr; ++j) numlist[j] = 0;
    for (j=0; j<ntr; ++j) numlist[(int)floor(rand()/n)]++;
    for (j=0; j<ntr; ++j) {
      if (numlist[j] > 0)
        for (k=0; k<nrv; ++k)
          if (bjdat[k].rv == bjlist[j].rv) {
            bjdat[k].nt  +=  numlist[j];
            bjdat[k].xmx += (bjlist[j].xmxy/2 * numlist[j]);
            bjdat[k].xmy += (bjlist[j].xmxy/2 * numlist[j]);
            bjdat[k].xmz +=  bjlist[j].xmz    * numlist[j];
            bjdat[k].ymx += (bjlist[j].ymxy/2 * numlist[j]);
            bjdat[k].ymy += (bjlist[j].ymxy/2 * numlist[j]);
            bjdat[k].ymz +=  bjlist[j].ymz    * numlist[j];
            bjdat[k].mx  += (bjlist[j].mxy/2  * numlist[j]);
            bjdat[k].my  += (bjlist[j].mxy/2  * numlist[j]);
            bjdat[k].mz  +=  bjlist[j].mz     * numlist[j];
          }
    }                                        /* now have bootstrap data set */
    for (j=0; j<nrv; ++j) {    /* compute bootstrap values for each r-class */
      if (dattyp == 'F') computefrag(j);
      else computesite(j);
    }
    if (nrv > 1) allenzclasses(dattyp);     /* get values for all r-classes */

    /* increase sum of the squared differences for each enzyme class */
    increasesumsq(l,dattyp);
  }                                            /* end of bootstrap (i) loop */

  /* now have the sum of the squared differences over all the treatments */
  /* compute the bootstrap standard error */
  for (i=0; i<l; ++i) {
    /* compute bootstrap s.e. for all S-hats or F-hats */
    if ((res[i].xSse < 0) || (nt[i].xSnt <= 1)) res[i].xSse = -1;
    else res[i].xSse = sqrt(res[i].xSse / (nt[i].xSnt-1) );
    if ((res[i].ySse < 0) || (nt[i].ySnt <= 1)) res[i].ySse = -1;
    else res[i].ySse = sqrt(res[i].ySse / (nt[i].ySnt-1) );
    if ((res[i].zSse < 0) || (nt[i].zSnt <= 1)) res[i].zSse = -1;
    else res[i].zSse = sqrt(res[i].zSse / (nt[i].zSnt-1) );
    if ((res[i].ASse < 0) || (nt[i].ASnt <= 1)) res[i].ASse = -1;
    else res[i].ASse = sqrt(res[i].ASse / (nt[i].ASnt-1) );

    /* compute bootstrap s.e. for p-hat (site data) or d-hat (frag data) */
    if ((res[i].xpse < 0) || (nt[i].xpnt <= 1)) res[i].xpse = -1;
    else res[i].xpse = sqrt(res[i].xpse / (nt[i].xpnt-1) );
    if ((res[i].ypse < 0) || (nt[i].ypnt <= 1)) res[i].ypse = -1;
    else res[i].ypse = sqrt(res[i].ypse / (nt[i].ypnt-1) );
    if ((res[i].zpse < 0) || (nt[i].zpnt <= 1)) res[i].zpse = -1;
    else res[i].zpse = sqrt(res[i].zpse / (nt[i].zpnt-1) );
    if ((res[i].Apse < 0) || (nt[i].Apnt <= 1)) res[i].Apse = -1;
    else res[i].Apse = sqrt(res[i].Apse / (nt[i].Apnt-1) );

    if (dattyp == 'S') {
      /* compute bootstrap s.e. for Jukes-Cantor distance */
      if ((res[i].xJCse < 0) || (nt[i].xJCnt <= 1)) res[i].xJCse = -1;
      else res[i].xJCse = sqrt(res[i].xJCse / (nt[i].xJCnt-1) );
      if ((res[i].yJCse < 0) || (nt[i].yJCnt <= 1)) res[i].yJCse = -1;
      else res[i].yJCse = sqrt(res[i].yJCse / (nt[i].yJCnt-1) );
      if ((res[i].zJCse < 0) || (nt[i].zJCnt <= 1)) res[i].zJCse = -1;
      else res[i].zJCse = sqrt(res[i].zJCse / (nt[i].zJCnt-1) );
      if ((res[i].AJCse < 0) || (nt[i].AJCnt <= 1)) res[i].AJCse = -1;
      else res[i].AJCse = sqrt(res[i].AJCse / (nt[i].AJCnt-1) );

      /* compute bootstrap s.e. for Nei-Li distance */
      if ((res[i].xNLse < 0) || (nt[i].xNLnt <= 1)) res[i].xNLse = -1;
      else res[i].xNLse = sqrt(res[i].xNLse / (nt[i].xNLnt-1) );
      if ((res[i].yNLse < 0) || (nt[i].yNLnt <= 1)) res[i].yNLse = -1;
      else res[i].yNLse = sqrt(res[i].yNLse / (nt[i].yNLnt-1) );
      if ((res[i].zNLse < 0) || (nt[i].zNLnt <= 1)) res[i].zNLse = -1;
      else res[i].zNLse = sqrt(res[i].zNLse / (nt[i].zNLnt-1) );
      if ((res[i].ANLse < 0) || (nt[i].ANLnt <= 1)) res[i].ANLse = -1;
      else res[i].ANLse = sqrt(res[i].ANLse / (nt[i].ANLnt-1) );
    }
  }                        /* done with computing bootstrap standard errors */
  printf("                                \r",i);             /* clear line */
}                                                 /* END OF FUNCTION BOOTSE */
/*****************************************************************************
**                                                                          **
**                           FUNCTION INITSESLOTS                           **
**                                                                          **
** This  function  initializes the standard error slots in RES that will be **
** filled with either jackknifed or bootstrapped standard errors.           **
**                                                                          **
** Functions called:  none.                                                 **
*****************************************************************************/
void initseslots(int limit, char dattyp)
{
  int i;

  for (i=0; i<limit; ++i) {
    res[i].xSse = 0;
    res[i].ySse = 0;
    res[i].zSse = 0;
    res[i].ASse = 0;
    res[i].xpse = 0;
    res[i].ypse = 0;
    res[i].zpse = 0;
    res[i].Apse = 0;
    if (dattyp == 'S') {
      res[i].xJCse = 0;
      res[i].yJCse = 0;
      res[i].zJCse = 0;
      res[i].AJCse = 0;
      res[i].xNLse = 0;
      res[i].yNLse = 0;
      res[i].zNLse = 0;
      res[i].ANLse = 0;
    }
  }
}                                            /* END OF FUNCTION INITSESLOTS */
/*****************************************************************************
**                                                                          **
**                          FUNCTION INCREASESUMSQ                          **
**                                                                          **
** This function takes  the current jackknife or bootstrap values, computes **
** the squared  difference between  each  and  the "real"  values, and adds **
** these squared differences to the running sums stored in RES.             **
**                                                                          **
** Functions called:  none.                                                 **
*****************************************************************************/
void increasesumsq(int limit, char dattyp)
{
  int i;
  for (i=0; i<limit; ++i) {
    /* increase the sum of the squared differences for all S-hats or F-hats */
    if ((res[i].xS < 0) || (bj[i].xS < 0)) nt[i].xSnt -= 1;
    else res[i].xSse += pow((res[i].xS - bj[i].xS),2);
    if ((res[i].yS < 0) || (bj[i].yS < 0)) nt[i].ySnt -= 1;
    else res[i].ySse += pow((res[i].yS - bj[i].yS),2);
    if ((res[i].zS < 0) || (bj[i].zS < 0)) nt[i].zSnt -= 1;
    else res[i].zSse += pow((res[i].zS - bj[i].zS),2);
    if ((res[i].AS < 0) || (bj[i].AS < 0)) nt[i].ASnt -= 1;
    else res[i].ASse += pow((res[i].AS - bj[i].AS),2);

    /* increase the sum of the squared differences for all p-hats or d-hats */
    if ((res[i].xp < 0) || (bj[i].xp < 0)) nt[i].xpnt -= 1;
    else res[i].xpse += pow((res[i].xp - bj[i].xp),2);
    if ((res[i].yp < 0) || (bj[i].yp < 0)) nt[i].ypnt -= 1;
    else res[i].ypse += pow((res[i].yp - bj[i].yp),2);
    if ((res[i].zp < 0) || (bj[i].zp < 0)) nt[i].zpnt -= 1;
    else res[i].zpse += pow((res[i].zp - bj[i].zp),2);
    if ((res[i].Ap < 0) || (bj[i].Ap < 0)) nt[i].Apnt -= 1;
    else res[i].Apse += pow((res[i].Ap - bj[i].Ap),2);

    if (dattyp == 'S') {
      /* increase the sum of the squared differences for JC distances */
      if ((res[i].xJC < 0) || (bj[i].xJC < 0)) nt[i].xJCnt -= 1;
      else res[i].xJCse += pow((res[i].xJC - bj[i].xJC),2);
      if ((res[i].yJC < 0) || (bj[i].yJC < 0)) nt[i].yJCnt -= 1;
      else res[i].yJCse += pow((res[i].yJC - bj[i].yJC),2);
      if ((res[i].zJC < 0) || (bj[i].zJC < 0)) nt[i].zJCnt -= 1;
      else res[i].zJCse += pow((res[i].zJC - bj[i].zJC),2);
      if ((res[i].AJC < 0) || (bj[i].AJC < 0)) nt[i].AJCnt -= 1;
      else res[i].AJCse += pow((res[i].AJC - bj[i].AJC),2);

      /* increase the sum of the squared differences for corrected NL dist. */
      if ((res[i].xNL < 0) || (bj[i].xNL < 0)) nt[i].xNLnt -= 1;
      else res[i].xNLse += pow((res[i].xNL - bj[i].xNL),2);
      if ((res[i].yNL < 0) || (bj[i].yNL < 0)) nt[i].yNLnt -= 1;
      else res[i].yNLse += pow((res[i].yNL - bj[i].yNL),2);
      if ((res[i].zNL < 0) || (bj[i].zNL < 0)) nt[i].zNLnt -= 1;
      else res[i].zNLse += pow((res[i].zNL - bj[i].zNL),2);
      if ((res[i].ANL < 0) || (bj[i].ANL < 0)) nt[i].ANLnt -= 1;
      else res[i].ANLse += pow((res[i].ANL - bj[i].ANL),2);
    }
  }
}                                          /* END OF FUNCTION INCREASESUMSQ */
/*****************************************************************************
**                                                                          **
**                            FUNCTION PRINTRES                             **
**                                                                          **
** This  function  is called from  FUNCTION COMPAREOTUS, and prints out the **
** results of the restriction data analysis for the currect OTU X  by OTU Y **
** comparison.                                                              **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printres(char dattyp)
{
  int i,j;                                        /* loop control variables */
  if (nrv == 1) j = 1;       /* decide how many iterations -- once for each */
  else j = nrv+1;             /* enzyme class, and one for overall combined */


  for (i=0; i<j; ++i) {
    /* print out headings for enzyme class or for all classes combined */
    if (i < nrv) fprintf(fpw,"\nTOTAL FOR R = %3.1f:",res[i].rv);
    else fprintf(fpw,"\nALL ENZYME CLASSES:");

    /* print out results for within OTU X and within OTU Y */
    fprintf(fpw,"          OTU X                    OTU Y\n");

    /* print number of frags/sites found within OTU X and within OTU Y */
    if (i < nrv) {
      fprintf(fpw,"     Mx, My, Mxy = ");
      if ((rvarray[i].xmx - floor(rvarray[i].xmx)) == 0)
        fprintf(fpw,"%7.0f %7.0f",rvarray[i].xmx,rvarray[i].xmy);
      else {
        fprintf(fpw,"%7.0f ",floor(rvarray[i].xmx)+1);
        fprintf(fpw,"%7.0f",floor(rvarray[i].xmy));
      }
      fprintf(fpw," %7.0f  ",rvarray[i].xmz);
      if ((rvarray[i].ymx - floor(rvarray[i].ymx)) == 0)
        fprintf(fpw,"%7.0f %7.0f",rvarray[i].ymx,rvarray[i].ymy);
      else {
        fprintf(fpw,"%7.0f ",floor(rvarray[i].ymx)+1);
        fprintf(fpw,"%7.0f",floor(rvarray[i].ymy));
      }
      fprintf(fpw," %7.0f\n",rvarray[i].ymz);
    }

    /* FOR THIS ENZYME CLASS: OTU X AND OTU Y: PRINT OUT S-HAT or F-HAT */
    if (dattyp == 'F') fprintf(fpw,"               F =  ");
    else fprintf(fpw,"               S =  ");
    if (res[i].xS >= 0) fprintf(fpw,"%8.6f",res[i].xS);
    else fprintf(fpw," undef. ");
    if (res[i].xSse >= 0) fprintf(fpw,"  +\b_  %8.6f    ",res[i].xSse);
    else fprintf(fpw,"  +\b_   undef.     ");
    if (res[i].yS >= 0) fprintf(fpw,"%8.6f  +\b_  ",res[i].yS);
    else fprintf(fpw," undef.   +\b_  ");
    if (res[i].ySse >= 0) fprintf(fpw,"%8.6f\n",res[i].ySse);
    else fprintf(fpw," undef.\n");

    /* FOR THIS ENZYME CLASS: OTU X AND OTU Y: PRINT OUT p-HAT or d-HAT */
    if (dattyp == 'F') fprintf(fpw,"               d =  ");
    else fprintf(fpw,"               p =  ");
    if (res[i].xp >= 0) fprintf(fpw,"%8.6f",res[i].xp);
    else fprintf(fpw," undef. ");
    if (res[i].xpse >= 0) fprintf(fpw,"  +\b_  %8.6f    ",res[i].xpse);
    else fprintf(fpw,"  +\b_   undef.     ");
    if (res[i].yp >= 0) fprintf(fpw,"%8.6f  +\b_  ",res[i].yp);
    else fprintf(fpw," undef.   +\b_  ");
    if (res[i].ypse >= 0) fprintf(fpw,"%8.6f\n",res[i].ypse);
    else fprintf(fpw," undef.\n");

    if (dattyp == 'S') {
      /* OTU X & OTU Y: PRINT OUT JUKES-CANTOR DISTANCE AND STANDARD ERROR */
      if (res[i].xJC >= 0) fprintf(fpw,"  Jukes-Cantor d =  %8.6f",res[i].xJC);
      else fprintf(fpw,"  Jukes-Cantor d =   undef. ");
      if (res[i].xJCse >= 0) fprintf(fpw,"  +\b_  %8.6f    ",res[i].xJCse);
      else fprintf(fpw,"  +\b_   undef.     ");
      if (res[i].yJC >= 0) fprintf(fpw,"%8.6f  +\b_  ",res[i].yJC);
      else fprintf(fpw," undef.   +\b_  ");
      if (res[i].yJCse >= 0) fprintf(fpw,"%8.6f\n",res[i].yJCse);
      else fprintf(fpw," undef.\n");

      /* OTU X AND OTU Y: PRINT OUT NEI-LI DISTANCE AND STANDARD ERROR */
      if (res[i].xNL >= 0) fprintf(fpw,"        Nei-Li d =  %8.6f",res[i].xNL);
      else fprintf(fpw,"        Nei-Li d =   undef. ");
      if (res[i].xNLse >= 0) fprintf(fpw,"  +\b_  %8.6f    ",res[i].xNLse);
      else fprintf(fpw,"  +\b_   undef.     ");
      if (res[i].yNL >= 0) fprintf(fpw,"%8.6f  +\b_  ",res[i].yNL);
      else fprintf(fpw," undef.   +\b_  ");
      if (res[i].yNLse >= 0) fprintf(fpw,"%8.6f\n",res[i].yNLse);
      else fprintf(fpw," undef.\n");
    }

    fprintf(fpw,"                          X-Y (Raw)             X-Y ");
    fprintf(fpw,"(Corrected)\n");
    if (i < nrv) {
      /* OTU X BY OTU Y, UNCORRECTED AND CORRECTED: PRINT OUT Mx, My, Mxy */
      fprintf(fpw,"     Mx, My, Mxy = ");
      if ((rvarray[i].mx - floor(rvarray[i].mx)) == 0)
     fprintf(fpw,"%7.0f %7.0f",rvarray[i].mx,rvarray[i].my);
      else
     fprintf(fpw,"%7.0f %7.0f",floor(rvarray[i].mx)+1,floor(rvarray[i].my));
      fprintf(fpw," %7.0f\n",rvarray[i].mz);
    }

    /* OTU X BY OTU Y UNCORRECTED AND CORRECTED: PRINT OUT S-HAT or F-HAT */
    if (dattyp == 'F') fprintf(fpw,"               F =  ");
    else fprintf(fpw,"               S =  ");
    if (res[i].zS >= 0) fprintf(fp