/*
 *	File: spectfit.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *
 *      History:
 *
 *	- march 1999, version 1.0 
 *
 *      Description: 
 *      ===> Calculates Gale-Sampson proxy values for large m
 *      (Gale and Sampson, JQL 2.3., 1995);
 *      ===> fits the frequency spectrum to the
 *      implicit parametric Zipfian model V(m) = Ce^{-\mu/m}m^{-b}
 *      (S. Naranan and V.K. Balahsubrahmanyan, JQL 5.1-2,1998,35--61).
 *      This is not a full-fledged LNRE model: V(m) is obtained by
 *      summation of all ranks, and all three parameters may vary
 *      with sample size.  The model is fitted to the data by requiring
 *      EV(1) = V(1) and EV(2) = V(2), and by searching for the optimal
 *      m such that EV(m) = V(m) and EV = V.
 *      ===> calculates raw Good-Turing estimates and estimates based
 *      on the Naranan and Balahsubrahmanyan Zipfian smoother.
 *      Input file: observed spectrum file (text.spc)
 *      Output files: text_N.sum: summary of model statistics and parameters;
 *      text_N.spc:  m, Vm, EVm, GTraw, GTzipf
 *  
 *
 */

#include <stdio.h>
#include <stdlib.h>        
#include <string.h>
#include <errno.h>          
#include <malloc.h>
#include <math.h>          
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

/* functions for numerical procedures */

extern double	gammln ();
extern double   factln();
extern double	power ();

extern void	amoeba ();

extern double   spectfit();
extern void     getparam();
extern double   getVmN();
extern double   getVZZ();
extern double   getV2N();
extern double   getmse2();
extern double   getmse();
extern double   getVn();
extern void     GTproxies();
extern double   GTstdev();
extern void     NVproxies();

extern double	*vector ();
extern double	**matrix ();
extern void	free_vector ();
extern void	free_matrix ();

extern double	sim_functie ();
extern double	sim_functie_mse ();

/* argument reading, file manipulation, and help function */

extern int	leesgetal ();
extern void	change_extension ();
extern void	getGaussParam ();
extern void	help ();


/* GLOBAL VARIABLES */

double   N, V, n1, n2, n3, n4, n5,      /* number of tokens, types, hapaxes, disleg */
         E,                             /* extrapolation sample size */
         SPECTRUM[MAXM3][7],            /* frequency spectrum m Vm */
         PREPOST[MAXM3][2],
         alpha, Gamma, theta, b, c, Z,  /* parameters of the model */
         ALPHA1N[MAXM3],                /* array for relative spectrum V(m,N)/V(N) */
         ALPHA2N[MAXM3],                /* array for relative spectrum at 2N */
         Nzero, Vzero,                  /* original sample size N0, idem voc. */
         Nfit,
         Nproxy, Vproxy, gdiff,         /* N and V given raw proxies */
         eVNzero,
         eV, eV2N, S, eV1, eV2,         /* E[V(N)], E[V(2N)], S */
         eV3, eV4, eV5, eVMax, nMax,
         x, x1, y, y2, y3, y4, y5, z,
         m1, m2, m3, Vm1, Vm2, Vm3, m1orig, m2orig, m3orig,
         xmin1, xmin2, alphamin1, alphamin2,
         w1, w2, w3, w4, w5,            /* weights for similarity function */
         CHUNKS[MAXCHUNKS6],            /* the chunk sizes */
         chunksize, remainDer,          /* chunk variables */
         **sim_mat,                     /* for simplex minimization */
         *sim_vec,
         *sim_yy,
         miny, ZZ,
         minMSE, mse,
         tolerance;

FILE     *fpspectrum,                   /* fpspectrum: datafile: m, Vm */
         *fpexpspect,                   /* expected spectrum */
         *fpexpspect2N,                 /* spectrum at 2N */
         *fpVN,                         /* file with E[V(N)] and E[V(2N)] */
         *fpsum,                        /* file with summary of model */
         *fpint,                        /* interpolation results */
         *fpext,                        /* extrapolation results */
         *fpE,                          /* spectrum at sample size E */
 	 *fullspc,		        /* spectrum with m EVM for m=1..skip */
         *fpKvalues;                    /* list N_k for k = 1..K, K+1,..,2K */

int      nranks,                        /* number of different ranks in spectrum */
         maxrank,                       /* largest rank for fit, default 15 */
         i, j, k,        
         header,                        /* boolean for presence of header */
         nchunks,                       /* number of chunks for interpolation */
         enchunks,                      /* number of chunks for extrapolation */
         token, type,                   /* auxiliary variables for scanf */
         ndimensions,                   /* for simplex downhill method */
         ndimensions1,                  /* for simplex downhill method */
         niterations,                   /* for simplex downhill method */
         simplex_flunked,               /* idem */
         again,                         /* for manual parameter estimation */
         metV,                          /* boolean for amoeba fnc */
         forceN,                        /* force reading N and V from stdin */
         forceV,                        /* force reading N and V from stdin */
	 skip,			        /* only print spectrum for m=1..skip */
         ownparams,                     /* boolean, option for specifying
                                           own fixed parameters */
         optnranks,                     /* number of ranks in optimization */
         kfile,                         /* print fpKvalue file */
         freemem,                       /* free allocated memory */
         firstthree,                    /* boolean for param fitting */
         nMaxRank,                      /* max m: m <= 100 */
         choosem1m2m3,                  /* boolean for choosing m1, m2, m3 */
         optimize,                      /* boolean for optimization */
         besti,                         /* tmp variable */
         withNfitInMSE,                 /* include N - Nfit in getmse() */
         msemethod,                     /* use mse over msenranks ranks for
                                           parameter estimation */
         msenranks,                     /* number of ranks for mse param.
                                           estimation */
         aantal;                        /* for command line options */

char     woord[MAXWORDLENGTH],          /* variable for skipping header in fscanf */
         new_name[MAXWORDLENGTH],       /* variables for extension handling    */
         base_name[MAXWORDLENGTH],
         cc,                            /* for scanf() */
         *fs;                           /* variable for scanning options */


/* MAIN () */

int main (argc, argv)
int     argc;
char    *argv[];

{
   /* DEFAULTS */

   maxrank = DEF_MAXRANK;
   nchunks = DEF_CHUNKS;
   enchunks = DEF_CHUNKS;

   freemem = 0;
   E = NULL_F;
   w1 = 1.0; w2 = 1.0; w3 = 1.0; w4 = NULL_F; w5 = NULL_F;
   skip = 0;
   kfile = 0;
   forceN = 0;
   forceV = 0;
   optimize = 0;
   Vm1 = 0.0; Vm2 = 0.0; Vm3 = 0.0;
   ownparams = 0;
   msemethod = 0;
   choosem1m2m3 = 0;

   header = 1;
   m1 = 1.0; m2 = 2.0; m3 = 3.0; 
   m1orig = m1, m2orig = m2, m3orig = m3;
   msenranks = maxrank;
   optnranks = maxrank;
   firstthree = 1;
   withNfitInMSE = 0;
   metV = 0;

   /* COMMAND LINE OPTIONS */

   while ((--argc > 0) && ((*++argv)[0] == '-')) {
        for (fs = argv[0] + 1; *fs != '\0'; fs++) {
            switch (*fs) {
            case 'h':
                help();
                break;
            case 'm':
                maxrank = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'o':   /* number of ranks for optimization */
                optnranks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'W':   /* specify similarity weights */
                fprintf(stdout, "specify w1 w2 w3 w4 w5 ");
                scanf("%lf %lf %lf %lf %lf", &w1, &w2, &w3, &w4, &w5);
                fprintf(stdout, "\n");
                break;
            case 'n':      
                withNfitInMSE = 1;
                break;
            case 'H':      /* input files without headers! */
                header = 0;
                break;
            case 'p':     
                ownparams = 1;
                break;
            case 'v':      /* add V-eV to amoeba metric function */
                metV = 1;
                break;
            case 'O':      /* do optimization */
                optimize = 1;
                break;
            case 'F':      /* use simplex method */
                firstthree = 0;
                break;
            case 'C':      /* choose m1, m2, m3 for parameter calculation */
                choosem1m2m3 = 1;
                firstthree = 1; /* and don't use simplex method */
                fprintf(stdout, "specify x1 x2 x3 ");
                scanf("%lf %lf %lf", &m1, &m2, &m3);
                m1orig = m1, m2orig = m2, m3orig = m3;
                fprintf(stdout, "\n");
                break;
            case 'e':
                msenranks = leesgetal (fs, &aantal);
                if (msenranks == 0) msenranks = maxrank;
                optnranks = msenranks;
                msemethod = 1;
                optimize = 0;
                ownparams = 0;
                firstthree = 0;
                choosem1m2m3 = 0;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            default:
                fprintf(stderr, "spectfit: illegal option %c\n", *fs);
                exit(1);
                break;
            }
        }
   } /* of while */

   /* FILE HANDLING */

   if (argc == 0) {
     help ();
   }

   /* load input spectrum, should have .spect extension */

   if ((fpspectrum = fopen(*argv, "r")) == NULL) {
       fprintf(stderr, "spectfit: can't open %s\n", *argv);
       exit(1);
   }

   /*
         fpexpspect     text.S.spc
         fpexpspect2N   text.S.sp2
         fpVN           text.S.ev2
   */

   /* file name handling output files */
 
   strncpy(base_name, *argv, strlen(*argv) - 4);

   change_extension (base_name, new_name, "_N.spc");
   if ((fpexpspect = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "spectfit: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_N.sum");
   if ((fpsum = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "spectfit: can't open output file %s\n", new_name);
      exit(1);
   }

   /* LOAD SPECTRUM FILE */

   nranks = 0; n1 = 0; n2 = 0;
   if (header){
      fscanf(fpspectrum, "%s ", woord);  /* m */
      fscanf(fpspectrum, "%s ", woord);  /* Vm */
   }
   while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
        nranks++;
        SPECTRUM[nranks][0] = (double) token;
        SPECTRUM[nranks][1] = (double) type;
        if (token == 1) n1 = (double) type;
        if (token == 2) n2 = (double) type;
        if (token == 3) n3 = (double) type;
        if (token == 4) n4 = (double) type;
        if (token == 5) n5 = (double) type;
        if (token == (int) m1) Vm1 = (double) type;
        if (token == (int) m2) Vm2 = (double) type;
        if (token == (int) m3) Vm3 = (double) type;
        if (token <= optnranks) {
               nMax = type;
               nMaxRank = token;
        }
        N+= (double) token * (double) type;
        V+= (double) type;
   }

   if (Vm1 == 0.0) {
        fprintf(stderr, "error, V(%d) = 0\n", (int) m1);
   }
   if (Vm2 == 0.0) {
        fprintf(stderr, "error, V(%d) = 0\n", (int) m2);
   }
   if (Vm3 == 0.0) {
        fprintf(stderr, "error, V(%d) = 0\n", (int) m3);
   }

   if (forceN > 0) {
       Nzero = (double) forceN;
       N = (double) forceN;
   }
   if (forceV > 0){
       Vzero = (double) forceV;
       V = (double) forceV;
   }

   ZZ = N;


if ((optimize == 1) && (ownparams==0)) { 
   fprintf(stderr, "optimizing mse for m = %d onwards\n", (int) m3);
   minMSE = 100000000000000000000000000000000.0;
   for (i = (int) m3; i <= nranks; i++) {
       m3 = SPECTRUM[i][0];
       Vm3 = SPECTRUM[i][1];
       getparam(m1, m2, m3, Vm1, Vm2, Vm3);
       /* eV = getVZZ(); */
       mse = getmse();
       if (mse < minMSE) {
          minMSE = mse;
          besti = i;
          fprintf(stderr, "optimal m3 = %d, mse = %10.4f\n", \
             (int) SPECTRUM[besti][0],minMSE);
       }
   }
   m3 = SPECTRUM[besti][0];
   Vm3 = SPECTRUM[besti][1];
}

if (ownparams==0) {
   getparam(m1, m2, m3, Vm1, Vm2, Vm3); 
}


if ((firstthree == 0) && (ownparams==0)) {

   /* use downhill simplex method, NumRecC, p. 305 */

   ndimensions = 3;  /* C, mu, gamma */
              /* for ease of programming, I use the following mapping:
                  b -> mu
                  Z -> C
                  gamma -> gamma
              */
   ndimensions1 = ndimensions + 1;
   tolerance = 0.0001;
   niterations = 0;

   fprintf(stdout, "Z = %10.2f b = %10.4f Gamma = %10.4f\n", Z, b, Gamma);
   fprintf(stdout, "accept as starting point? ");
   scanf("%1s", &cc);
   if (cc!='y') {
      fprintf(stdout, "specify C, mu, and gamma: ");
      scanf("%lf %lf %lf", &Z, &b, &Gamma);
   }
   fprintf(stdout, "tolerance = %f, change? (y/n) ", tolerance);
   scanf("%1s", &cc);
   if (cc=='y') {
        fprintf(stdout, "specify tolerance: ");
        scanf("%lf", &tolerance);
   }
   fflush(stdout);

   freemem = 1;
   sim_mat = matrix (1, ndimensions + 1, 1, ndimensions);
   sim_vec = vector (1, ndimensions + 1);
   sim_yy = vector (1, ndimensions);

   sim_mat[1][1] = Z;    sim_mat[1][2] = b;         sim_mat[1][3] = Gamma;
   sim_mat[2][1] = 0.7*Z;sim_mat[2][2] = (0.2*b); sim_mat[2][3] = 0.9*Gamma;
   sim_mat[3][1] = 1.7*Z;sim_mat[3][2] = (1.2*b); sim_mat[3][3] = 1.3*Gamma;
   sim_mat[4][1] = 1.3*Z;sim_mat[4][2] = (1.7*b); sim_mat[4][3] = 1.7*Gamma;

   for (i = 1; i <= ndimensions+1; i++){
      sim_yy[1] = sim_mat[i][1];
      sim_yy[2] = sim_mat[i][2];
      sim_yy[3] = sim_mat[i][3];
      /* sim_vec[i] = sim_functie(sim_yy); */
      if (msemethod == 0) {
          sim_vec[i] = sim_functie(sim_yy);
      } else {
          sim_vec[i] = sim_functie_mse(sim_yy);
      }

   }

    /* print_sim_matrix(sim_mat,ndimensions); */

   fprintf (stdout, "\nStarting simplex method for parameter estimation\n");
   fflush (stdout);
   /*
   amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie, &niterations,0);
   */
   if (msemethod == 0) {
       amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie, \
               &niterations,0);
   } else {
       amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie_mse, \
               &niterations,0);
   }

   /* find minimum for which values are less than tolerance */

   if (simplex_flunked != 2) {
      miny = MAXX;
      for (i = 1; i <= ndimensions+1; i++){
        if (sim_vec[i] < miny){
          j = i;
          miny = sim_vec[i];
        }
      }
      Z = sim_mat[j][1]; b = sim_mat[j][2]; Gamma = sim_mat[j][3];
   }
   else{
    simplex_flunked = 1;
   }
}

if (ownparams==1) {
   fprintf(stdout, "specify C, mu, and gamma: ");
   scanf("%lf %lf %lf", &Z, &b, &Gamma);
   fprintf(stdout, "\n");
}

   fprintf(stdout, "\n   C  = %10.4f  mu    = %14.12f  gamma     = %10.4f\n", \
           Z, b, Gamma);
   eV1 = spectfit(1);
   eV2 = spectfit(2);
   eV3 = spectfit(3);
   eV4 = spectfit(4);
   eV5 = spectfit(5);
   eVMax = spectfit(nMaxRank);
   fprintf(stdout, "   V1   = %10.0f   E[V1]   = %15.2f\n", n1, eV1);
   fprintf(stdout, "   V2   = %10.0f   E[V2]   = %15.2f\n", n2, eV2);
   fprintf(stdout, "   V3   = %10.0f   E[V3]   = %15.2f\n", n3, eV3);
   fprintf(stdout, "   V4   = %10.0f   E[V4]   = %15.2f\n", n4, eV4);
   fprintf(stdout, "   V5   = %10.0f   E[V5]   = %15.2f\n", n5, eV5);
   fprintf(stdout, "   V%d = %10.0f   E[V%d]  = %15.2f\n", nMaxRank, nMax, \
             nMaxRank, eVMax);
   eV = getVZZ();  
   fprintf(stdout, "   V    = %10.2f   E[V]    = %15.2f\n", V, eV);
   
   if (ownparams==0) {
   if (firstthree == 0) {
      fprintf(stdout, "parameters estimated with simplex method\n");
   } else {
      fprintf(stdout, "parameters estimated with ranks %d, %d, and %d\n",\
         (int) m1, (int) m2, (int) m3);
   }
   } else {
      fprintf(stdout, "parameter values not optimized but enforced\n");
   }
   fflush(stdout);

   /* calculate EVm for all attested ranks */
   for (i = 1; i <= nranks; i++) {
      SPECTRUM[i][2] = spectfit((int)SPECTRUM[i][0]);
   }


   /* calculate Gale-Church-Sampson proxies */

   for (j = 1; j <= nranks; j++) {
     if (j > 1) {
       i = (int) SPECTRUM[j - 1][0]; 
     } else {
       i = 0;
     }
     if (j != nranks) { 
       k = (int) SPECTRUM[j + 1][0]; 
     } else {
       k = (int) ((2.0*SPECTRUM[j][0])-(1.0*SPECTRUM[j-1][0])); 
     }
     SPECTRUM[j][3] = (2.0*SPECTRUM[j][1])/((double) (k-i));
   }

   /* calculate Good-Turing estimates for smoothed values */

   for (j = 1; j <= nranks; j++) {
     SPECTRUM[j][4] = \
      (SPECTRUM[j][0]+1.0)*spectfit((int)(SPECTRUM[j][0]+1.0))/SPECTRUM[j][2];
   }

   /* calculate Good-Turing estimates for proxies without parametric smoothing*/

   /* calculate the Vm values for the unattested ranks */
   for (j = 2; j <= nranks; j++) {
      GTproxies(j);
   }
   /* calculate the GT estimates */
   for (j = 1; j < nranks; j++) {
      k = j+1;
      if (SPECTRUM[k][0]==(SPECTRUM[j][0]+1)){
         SPECTRUM[j][5] = (SPECTRUM[k][0]*SPECTRUM[k][3])/SPECTRUM[j][3];
      } else {
         SPECTRUM[j][5] = (SPECTRUM[j][0]+1.0)*PREPOST[j][1]/SPECTRUM[j][3];
      }
   }
   SPECTRUM[j][5] = (SPECTRUM[j][0]+1.0)*PREPOST[j][1]/SPECTRUM[j][3];

   /* calculate proxy values of N and V */
   NVproxies();
   

   /* SHOW OUTPUT IN THE text_N.spc FILE */
 
   fprintf(fpexpspect, \
   "     m     Vm        EVm        SVm     mStar  mStarRaw    StdevMstar\n");

   SPECTRUM[nranks][1]=SPECTRUM[nranks-1][1];
   for (i = 1; i <= nranks; i++ ) {
     fprintf(fpexpspect, "%6d %6.0f %10.5f %10.5f %9.3f %9.3f %9.4f\n", \
         (int) SPECTRUM[i][0], SPECTRUM[i][1], SPECTRUM[i][2], \
         SPECTRUM[i][3], SPECTRUM[i][4], SPECTRUM[i][5],\
         GTstdev(i));
   }


   

   if (freemem == 1) {
     free_matrix (sim_mat, 1, ndimensions + 1, 1, ndimensions);
     free_vector (sim_vec, 1, ndimensions + 1);
     free_vector (sim_yy, 1, ndimensions);
   }

   fprintf(fpsum, "Naranan and Balasubrahmanyan's spectrum fitter for %s\n", \
           *argv);
   fprintf(fpsum, "N          = %12d\n", (int) N);
   fprintf(fpsum, "V(N)       = %12d\n", (int) V);
   fprintf(fpsum, "E[V(N)]    = %12.2f\n", eV);
   fprintf(fpsum, "V(1,N)     = %12d\n", (int) n1);
   fprintf(fpsum, "E[V(1,N)]  = %12.2f\n", eV1);
   fprintf(fpsum, "V(2,N)     = %12d\n", (int) n2);
   fprintf(fpsum, "E[V(2,N)]  = %12.2f\n", eV2);
   fprintf(fpsum, "V(3,N)     = %12d\n", (int) n3);
   fprintf(fpsum, "E[V(3,N)]  = %12.2f\n", eV3);
   fprintf(fpsum, "V(4,N)     = %12d\n", (int) n4);
   fprintf(fpsum, "E[V(4,N)]  = %12.2f\n", eV4);
   fprintf(fpsum, "V(5,N)     = %12d\n", (int) n5);
   fprintf(fpsum, "E[V(5,N)]  = %12.2f\n", eV5);
   fprintf(fpsum, "S          = not available\n");
   fprintf(fpsum, "C          = %12.4f\n",  Z);
   fprintf(fpsum, "mu         = %12.4f\n",  b);
   fprintf(fpsum, "gamma      = %12.4f\n",  Gamma);
   /* calculate N for all ranks 1 up to mmax */
   z = 0.0;
   for (i = 1; i <= SPECTRUM[nranks][0]; i++) {
       z+= ((double) i) * spectfit(i);
   }
   fprintf(fpsum, "Nfit       = %12.4f (for m = 1 .. mmax)\n", z);
   fprintf(stdout, "N (for m = 1 .. mmax) in fit = %10.2f; N = %d\n", \
                   z, (int)N);
   if (ownparams==0){
    if (firstthree==1) {
     if (optimize==1) {
        fprintf(fpsum, "optimized with starting ranks %d, %d, and %d\n",\
                (int) m1orig, (int) m2orig, (int) m3orig);
     }
     fprintf(fpsum, "Parameters estimated using ranks %d, %d, and %d\n", \
                (int) m1, (int) m2, (int) m3);
    } else {
     fprintf(fpsum, "simplex method used for parameter estimation\n");
     if (msemethod == 1) {
        fprintf(fpsum, "mse over first %d ranks used in cost function\n",\
                msenranks);
        if (metV == 0) {
           fprintf(fpsum, "|V(N)-E[V(N)]| was not included in cost function\n");
        } else {
           fprintf(fpsum, "|V(N)-E[V(N)]| was included in cost function\n");
        }
        if (withNfitInMSE == 0) {
           fprintf(fpsum, "|N-Nfit| was not included in cost function\n");
        } else {
           fprintf(fpsum, "|N-Nfit| was included in cost function\n");
        }
     } else {
        fprintf(fpsum, "w1--5     = %3.2f %3.2f %3.2f %3.2f %3.2f\n", \
                w1, w2, w3, w4, w5);
     }
    }
   } else {
      fprintf(fpsum, "user-specified parameters\n");
   }

   withNfitInMSE = 0; /* MSE will now be calculated as for LNRE programs,
                         excluding N-Nfit */
   fprintf(fpsum,  "MSE (%d+2) = %12.2f\n", optnranks, getmse());
   fprintf(stdout, "MSE(%d+2) = %12.2f\n", optnranks, getmse());

   fprintf(fpsum,  "Nproxy     = %12.4f\n", Nproxy);
   fprintf(fpsum,  "Vproxy     = %12.4f\n", Vproxy);
   fclose(fpsum);

   /*
   fprintf(stdout, "V2N = %20.10f\n", getV2N());
   */

   
   return (0);
}

double spectfit(m)
int m;
{
  double x;

  x = (double) m;
  return( Z * exp(-1.0*b/x) * power(x, -1.0*Gamma) );
}

double GTstdev(j) 
int j;
{
  double nr, nr1, var, x;
  int r, r1;
  extern double spectfit();

  r = SPECTRUM[j][0];
  r1 = r+1;
  nr = SPECTRUM[j][2];  /* smoothed Naranan-Bra.. value */
  nr1 = spectfit(r1);

  x = (2.0*log((double) r1)+log(nr1))-(2.0*log(nr))+log(1.0+(nr1/nr));
  /* var = (r1*r1*nr1/(nr*nr))*(1.0+(nr1/nr));*/
  var = exp(x);
  return(sqrt(var));
}


void help ()
{
  fprintf (stderr,"spectfit text.spc\n");
  fprintf (stderr,"OPTIONS:\n");
  fprintf (stderr,"     -h: display help\n");
  fprintf (stderr,"     -m: number of ranks in fit (default: 15)\n");
  fprintf (stderr,"     -H: input files lack header (default: with header)\n");
  /* fprintf (stderr,"     -C: cost function C1 for parameter calculation\n");*/
  /* fprintf (stderr,"     -W: modify weights for similarity function\n");*/
  /* fprintf (stderr,"     -O: don't optimize\n"); */
  /* fprintf (stderr,"     -o: optimize with first -o ranks (default -m)\n");*/
  /* fprintf (stderr,"     -F: force simplex parameter estimation\n"); */
  /* fprintf (stderr,"     -p: use parameter values as given by user\n"); */
  fprintf (stderr,"     -e: use -e ranks for mse in simplex cost function\n");
  fprintf (stderr,"     -n: include |N-Nfit| in cost function\n");
  fprintf (stderr,"     -v: include |V-eV| in cost function\n");
  fprintf (stderr,"INPUT:\n");
  fprintf (stderr,"     text.spc:  m Vm\n");
  fprintf (stderr,"OUTPUT:\n");
  fprintf (stderr,"     text_N.spc:  expected spectrum\n");
  fprintf (stderr,"     text_N.sum:  summary, fitted parameters\n");
  exit (1);
}


double sim_functie_mse(x)
double *x;
{
   extern double getmse();

   Z = x[1]; b = x[2]; Gamma = x[3];
   if (Z <= 0) Z = 0.1;
   if (Gamma <= 0) Gamma = 0.1;

   return(getmse());
}


double sim_functie(x)
double *x;
{
   extern double spectfit();
   extern double getVZZ();

   Z = x[1]; b = x[2]; Gamma = x[3];
   if (Z <= 0) Z = 0.1;
   if (Gamma <= 0) Gamma = 0.1;
   eV1 = spectfit(1);
   eV2 = spectfit(2);
   eV3 = spectfit(3);
   eV4 = spectfit(4);
   eV5 = spectfit(5);
   eVMax = spectfit(nMaxRank);
   eV = getVZZ();

   if (metV == 1) {
   return( w1*fabs(V-eV) + w2*fabs(n1-eV1) + w3*fabs(n2-eV2)+w4*fabs(n3-eV3)\
        +w5*fabs(nMax-eVMax) );
   } else {
   return( w1*fabs(n1-eV1) + w2*fabs(n2-eV2) + w3*fabs(n3-eV3)+w4*fabs(n4-eV4)\
        +w5*fabs(nMax-eVMax) );
   }
}



void getparam(x1, x2, x3, Vx1, Vx2, Vx3)
double x1, x2, x3, Vx1, Vx2, Vx3;
{

  /* for ease of programming, I use the following mapping:
       b -> mu
       Z -> C
       Gamma -> gamma
  */
  double L;

  /*
  L = log(3.0)/log(2.0);
  b = (log(n3/n1)+(L*log(n1/n2)))/( (2.0/3.0) - (L/2.0));
  Z = exp(log(n1)+b);
  Gamma = (log(Z/n2) - (b/2.0))/log(2.0);
  */
  
  L = log(x1/x3)/log(x1/x2);
  b = (log(Vx3/Vx1) - (L*log(Vx2/Vx1))) /
      ((1.0/x1)-(1.0/x3) - (L*((1.0/x1)-(1.0/x2))));
  Gamma = (log(Vx2/Vx1) - (b*((1.0/x1)-(1.0/x2))))/log(x1/x2);
  Z = exp(log(Vx1)+(b/x1)+(Gamma*log(x1)));
} 


double getmse (){
   extern double getVn ();
   extern double getVZZ();
   
   double som, som2, esom2, EVn, Vn, x, y, EV; 
   int i;
   
   som = 0.0; som2 = 0.0; esom2 = 0.0;
   x = 0; y = 0;
   for (i = 1; i <= msenranks; i++) {
        EVn = spectfit(i);
        Vn = getVn(i);
        som += (EVn-Vn)*(EVn-Vn);
        som2 += Vn; esom2 += EVn;
   }
   EV = getVZZ();
   x = EV - esom2; y = V - som2;
   som += (x-y)*(x-y);
   som += (EV-V)*(EV-V);
   if (withNfitInMSE==1) {
      som += (N-Nfit)*(N-Nfit);
      return(som/((double)(msenranks+3)));
   } else {
      return(som/((double)(msenranks+2)));
   }
}


double getmse2()
{
  int i;
  double som, x;
  extern double spectfit();

  som = 0.0;
  for (i = 1; i <= optnranks; i++) {
       x = spectfit(i);
       som += (x-SPECTRUM[i][1])*(x-SPECTRUM[i][1]);
  }
  som += (V-eV)*(V-eV);
  return(som/(optnranks+1));
}

double getV2N()
{
  int i;
  double som, teken;
  extern double spectfit();
  som = eV;
  for (i = 1; i <= SPECTRUM[nranks][0]; i++) {
       if (i%2==1) {
            teken = 1.0;
       } else {
            teken = -1.0;
       }
       som += (teken * spectfit(i));
  }
  return(som);
}

double getVZZ()
{
  int i;
  double som;
  extern double spectfit();
  Nfit = 0.0;
  som = 0.0;
  for (i = 1; i <= SPECTRUM[nranks][0]; i++) {
       som += spectfit(i);
       Nfit += spectfit(i)*((double)i);
  }
  return(som);
}

void NVproxies()
{

   int i, j, n;
   double x;

   Nproxy = SPECTRUM[1][1]; 
   Vproxy = Nproxy;  /* V(1,N) */

   
   for (j = 2; j <= nranks; j++) {
      Nproxy += (SPECTRUM[j][0]*SPECTRUM[j][3]);
      Vproxy += SPECTRUM[j][3];
      i = j-1; 
      if (SPECTRUM[i][0]!=(SPECTRUM[j][0]-1)) {
        for (n = SPECTRUM[i][0]+1; n < SPECTRUM[j][0]; n++) {
           x = (SPECTRUM[i][3]+SPECTRUM[j][3])/2.0;
           Nproxy += n*x;
           Vproxy += x;
        }
      }
   }
   for (n = SPECTRUM[nranks][0]+1; \
        n <= (2*SPECTRUM[nranks][0])-SPECTRUM[nranks-1][0]; n++) {
      x = (SPECTRUM[nranks][3])/2.0;
      Vproxy += x;
      Nproxy += n*x;
   }
}

void GTproxies(j)
int j;
{
  int i, k, diff;
  double addval;
 
  if (j < 2) {
     fprintf(stderr, "cannot work with a spectrum without hapax legomena\n");
     exit(1);
  }

  i = j-1; 
  k = j+1;
  if (j == nranks) {
     k = j;
     diff = (2*SPECTRUM[k][0])-SPECTRUM[i][0];
     gdiff = diff;
  } else {
     diff = SPECTRUM[k][0]-SPECTRUM[i][0];
  }
  addval = SPECTRUM[j][1]/((double)diff);

  if (SPECTRUM[i][0]!=(SPECTRUM[j][0]-1)) {
      /* there is at least one zero entry preceding rank SPECTRUM[j][0] */
      /* add addval to the POST position of rank SPECTRUM[i][0] */
      PREPOST[i][1] += addval;
      /* add addval to the PRE position of rank SPECTRUM[j][0] */
      PREPOST[j][0] += addval;
  } 

  if (SPECTRUM[k][0]!=(SPECTRUM[j][0]+1)) {
      /* there is at least one zero entry following rank SPECTRUM[j][0] */
      /* add addval to the POST position of rank SPECTRUM[j][0] */
      PREPOST[j][1] += addval;
      /* add addval to the PRE position of rank SPECTRUM[k][0] */
      PREPOST[k][0] += addval;
      if (j==nranks) PREPOST[k][1] = addval;
  } 
  
}

double getVn (r) 
int r;
{
   int i;
   for (i = r; (int) SPECTRUM[i][0] > r; i--) ;
   if ((int) SPECTRUM[i][0] == r) return(SPECTRUM[i][1]);
   else return(0.0);
}
