/*
 *	File: lnreSgam.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *		Fiona J. Tweedie
 *
 *      History:
 *
 *      - jul 1997, version 1.0 (rhb)
 *	- nov 1998, version 1.1 (rhb, fjt)
 *  - dec 1999, version 1.2 (rhb): addition of -z option
 *        
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>        
#include <string.h>
#include <errno.h>          
#include <malloc.h>
#include <math.h>          
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

/* functions for numerical procedures */

extern double	gammln ();
extern void	    amoeba ();
extern double	alpha1 ();
extern double	alpha2 ();
extern double	alphaRecur ();
extern double	expV ();
extern double	getEV0 ();
extern double	getS ();
extern double   getModelN ();
extern double	IF ();
extern double	bessel2 ();
extern void	    triplet ();
extern double   getmse ();

extern double	*vector ();
extern double	**matrix ();
extern void	free_vector ();
extern void	free_matrix ();

extern double	sim_functie ();
extern double	sim_functie_mse ();
extern void	print_sim_matrix ();

/* argument reading, file manipulation, and help function */

extern int	leesgetal ();
extern void	change_extension ();
extern void	getGaussParam ();
extern void	help ();


/* GLOBAL VARIABLES */

double   N, V, n1, n2, n3, n4, n0,      /* number of tokens, types, hapaxes, disleg */
         E,                             /* extrapolation sample size */
         SPECTRUM[MAXM3][2],            /* frequency spectrum m Vm */
         alpha, Gamma, theta, b, c, Z,  /* parameters of the model */
         steinAlpha, steinXi,           /* Stein's alpha and xi */
         ALPHA1N[MAXM3],                /* array for relative spectrum V(m,N)/V(N) */
         ALPHA2N[MAXM3],                /* array for relative spectrum at 2N */
         Nzero, Vzero,                  /* original sample size N0, idem voc. */
         eVNzero,
         eV, eV2N, S, eV1, eV2,         /* E[V(N)], E[V(2N)], S */
         eV3, eV4, eVn, eV0, eV0x,
         x, x1, y, y2, y3, y4, y5, z,
         xmin1, xmin2, alphamin1, alphamin2,
         w1, w2, w3, w4, w5,            /* weights for similarity function */
         CHUNKS[MAXCHUNKS6],            /* the chunk sizes */
         HACKVM[10001][2],              /* hacked Vm values */
         chunksize, remainDer,          /* chunk variables */
         **sim_mat,                     /* for simplex minimization */
         *sim_vec,
         *sim_yy,
         miny,
         tolerance;

FILE     *fpspectrum,                   /* fpspectrum: datafile: m, Vm */
         *fpexpspect,                   /* expected spectrum */
         *fpexpspect2N,                 /* spectrum at 2N */
         *fpVN,                         /* file with E[V(N)] and E[V(2N)] */
         *fpsum,                        /* file with summary of model */
         *fpint,                        /* interpolation results */
         *fpext,                        /* extrapolation results */
         *fpE,                          /* spectrum at sample size E */
 	     *fullspc,		                /* spectrum with m EVM for m=1..skip */
         *fpmstar,                      /* m mstar output file */
         *fpKvalues;                    /* list N_k for k = 1..K, K+1,..,2K */

int      nranks,                        /* number of different ranks in spectrum */
         maxrank,                       /* largest rank for fit, default 15 */
         i, j, r,           
         header,                        /* boolean for presence of header */
         k,                             /* variable for chunks */
         nchunks,                       /* number of chunks for interpolation */
         hackm,                         /* hack to allow the hackm'th
                                           interpolated sample size to
                                           generate the number of
                                           ranks specified with the -s option,
                                           results in the .fsp file */
         enchunks,                      /* number of chunks for extrapolation */
         token, type,                   /* auxiliary variables for scanf */
         ndimensions,                   /* for simplex downhill method */
         ndimensions1,                  /* for simplex downhill method */
         niterations,                   /* for simplex downhill method */
         simplex_flunked,               /* idem */
         again,                         /* for manual parameter estimation */
         forceN,                        /* force reading N and V from stdin */
         forceV,                        /* force reading N and V from stdin */
         steinparams,                   /* use xi and alpha in sim_functie */
	     skip,			                /* only print spectrum for m=1..skip */
         kfile,                         /* print fpKvalue file */
         freemem,                       /* free allocated memory */
         msemethod,                     /* use mse over msenranks ranks for
                                           parameter estimation */
         msenranks,                     /* number of ranks for mse param.
                                           estimation */
         mstar,                         /* boolean for printing of mstar file */
		 zero_truncated,                /* 0 iff V(0,N) is known */
         aantal;                        /* for command line options */

char     woord[MAXWORDLENGTH],          /* variable for skipping header in fscanf */
         new_name[MAXWORDLENGTH],       /* variables for extension handling    */
         base_name[MAXWORDLENGTH],
         cc,                            /* for scanf() */
         *fs;                           /* variable for scanning options */


/* MAIN () */

int main (argc, argv)
int     argc;
char    *argv[];

{
   /* DEFAULTS */

   maxrank = DEF_MAXRANK;
   nchunks = DEF_CHUNKS;
   enchunks = DEF_CHUNKS;
   header = 1;
   freemem = 0;
   E = NULL_F;
   w1 = 1.0; w2 = 1.0; w3 = 1.0; w4 = NULL_F; w5 = NULL_F;
   skip = 0;
   kfile = 0;
   forceN = 0;
   forceV = 0;
   hackm = 0;
   msenranks = maxrank;
   msemethod = 0;
   mstar = 0;
   steinparams = 0;
   zero_truncated = 1;

   /* COMMAND LINE OPTIONS */

   while ((--argc > 0) && ((*++argv)[0] == '-')) {
        for (fs = argv[0] + 1; *fs != '\0'; fs++) {
            switch (*fs) {
            case 'h':
                help();
                break;
            case 'X':
                steinparams = 1;
                break;
            case 'S':
                mstar = 1;
                break;
            case 'x':
                hackm = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'E':
                i =  leesgetal (fs, &aantal);
                E = (double) i;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'k':
                nchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'K':
                enchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'e':
                msenranks = leesgetal (fs, &aantal);
                msemethod = 1;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'm':
                maxrank = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'z':
				zero_truncated = 0;  /* V(0,N) is known! */
				break;
            case 'W':   /* specify similarity weights */
                fprintf(stdout, "specify w1 w2 w3 w4 w5 ");
                scanf("%lf %lf %lf %lf %lf", &w1, &w2, &w3, &w4, &w5);
                fprintf(stdout, "\n");
                break;
            case 'H':      /* input files without headers! */
                header = 0;
                break;
            case 'F':
                kfile = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                if (kfile == 0) {
                  fprintf(stderr, "lnreSgam: cannot int/ext with zero N\n");
                  exit(1);
                }
                break;
            case 's':      /* don't interpolate or extrapolate */
                           /* show m and Vm and EVm for m=1..s */
                skip = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                if (skip == 0) {
                   fprintf(stderr, "lnreSgam: cannot skip with zero rank\n");
                   exit(1);
                }
                if (skip > 10000) {
                   fprintf(stderr,"lnreSgam: calculation limited to m=10000\n");
                   skip = 10000;
                }
                break;
            case 'V':
                forceV = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'N':
                forceN = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            default:
                fprintf(stderr, "lnreSgam: illegal option %c\n", *fs);
                exit(1);
                break;
            }
        }
   } /* of while */

   /* FILE HANDLING */

   if (argc == 0) {
     help ();
   }
   if ((zero_truncated == 0) && (msemethod==0)) {
	    fprintf(stderr, "using msemethod with m=15\n");
		msenranks = 15;
        msemethod = 1;
   }

   if ((hackm>0) && (skip==0)) {
        fprintf(stderr, "the interpolation hack can only be carried\n");
        fprintf(stderr, "out in combination with the -s option\n");
        exit(1);
   }

   /* load input spectrum, should have .spect extension */

   if ((fpspectrum = fopen(*argv, "r")) == NULL) {
       fprintf(stderr, "lnreSgam: can't open %s\n", *argv);
       exit(1);
   }

   /*
         fpexpspect     text.S.spc
         fpexpspect2N   text.S.sp2
         fpVN           text.S.ev2
   */

   /* file name handling output files */
 
   strncpy(base_name, *argv, strlen(*argv) - 4);

   if ((skip == 0) && (kfile == 0)) {

   change_extension (base_name, new_name, "_G.spc");
   if ((fpexpspect = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_G.sp2");
   if ((fpexpspect2N = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_G.ev2");
   if ((fpVN = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_G.sum");
   if ((fpsum = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_G.int");
   if ((fpint = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_G.ext");
   if ((fpext = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
      exit(1);
   }
   if (mstar == 1) {
     change_extension (base_name, new_name, "_G.str");
     if ((fpmstar = fopen(new_name, "w")) == NULL){
        fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
        exit(1);
     }
   }
   if (E > NULL_F){
     change_extension (base_name, new_name, "_G.sp3");
     if ((fpE = fopen(new_name, "w")) == NULL){
        fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
        exit(1);
     }
   }
  } else {
    if (skip > 0 ) {
        change_extension (base_name, new_name, "_G.fsp");
        if ((fullspc = fopen(new_name, "w")) == NULL){
           fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
           exit(1);
        }
	/* CHANGE HERE - THIS BIT ADDED */

       change_extension (base_name, new_name, "_G.sum");
       if ((fpsum = fopen(new_name, "w")) == NULL){
          fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
          exit(1);
       }
    } else {
        change_extension (base_name, new_name, "_G.iex");
        if ((fpKvalues = fopen(new_name, "w")) == NULL){
              fprintf(stderr,"lnreSgam: can't open output file %s\n", new_name);
              exit(1);
        }
    }
  }

   /* LOAD SPECTRUM FILE */

   nranks = 0; n1 = 0; n2 = 0;
   if (header){
      fscanf(fpspectrum, "%s ", woord);  /* m */
      fscanf(fpspectrum, "%s ", woord);  /* Vm */
   }
   while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
        nranks++;
        SPECTRUM[nranks][0] = (double) token;
        SPECTRUM[nranks][1] = (double) type;
		if (token == 0) {
			if (zero_truncated==1) {
				fprintf(stderr, "lnreSgam: error - spectrum contains m=0\n");
				fprintf(stderr, "          use z-option\n");
				exit(1);
			} else {
				n0 = type;
				fprintf(stdout, "lnreSgam: distribution including V(0,N)\n");
				type = 0;
				nranks--;
			}
		} 
        if (token == 1) n1 = type;
        if (token == 2) n2 = type;
        if (token == 3) n3 = type;
        if (token == 4) n4 = type;
        N+= (double) token * (double) type;
        V+= (double) type;
   }

   if (forceN > 0) {
       Nzero = (double) forceN;
       N = (double) forceN;
   }
   if (forceV > 0){
       Vzero = (double) forceV;
       V = (double) forceV;
   }

   /* DETERMINE THE PARAMETERS OF THE MODEL */

   getGaussParam (N, V, n1, &alpha, &theta, &b, &c); 

   if ((c == 0) && (b==0)){
      fprintf (stdout, "lnreSgam: no solution for Gamma=-0.5\n");
      Z = 0;
   }
   else{
      Z = 1.0 / c;
      Gamma = -0.5;
   }
   Nzero = N;
   Vzero = V;

   triplet ();
   
   /* PARAMETER ESTIMATION */

   /* use downhill simplex method, NumRecC, p. 305 */

   ndimensions = 3;  /* specific for full Sichel model */
   ndimensions1 = ndimensions + 1;
   tolerance = 0.0001;
   niterations = 0;


   fprintf(stdout, "run downhill simplex minimization? (y/n) \n");
   scanf("%1s", &cc);
   if (cc=='y'){
    fprintf(stdout, "Initial values for minimization:\n");
    fprintf(stdout, "   Z  = %10.4f   b     = %14.12f   gamma     = %10.4f\n", \
            Z, b, Gamma);
    fprintf(stdout, "   V  = %10.0f   E[V]  = %15.2f\n", V, eV);
	if (zero_truncated == 0) {
    fprintf(stdout, "   V0 = %10.0f   E[V0] = %15.2f\n", n0, eV0);
	}
    fprintf(stdout, "   V1 = %10.0f   E[V1] = %15.2f\n", n1, eV1);
    fprintf(stdout, "   V2 = %10.0f   E[V2] = %15.2f\n", n2, eV2);
    fprintf(stdout, "   V3 = %10.0f   E[V3] = %15.2f\n", n3, eV3);
    fflush(stdout);
 
    fprintf(stdout,"proceed (y), specify own starting point (o), or quit (q) ");
    scanf("%1s", &cc);
    if (cc == 'q') exit(1);
    if (cc=='o'){
     fprintf(stdout, "specify Z, b, and gamma: ");
     scanf("%lf %lf %lf", &Z, &b, &Gamma);
    }
    fprintf(stdout, "tolerance = %f, change? (y/n) ", tolerance);
    scanf("%1s", &cc);
    if (cc=='y') {
        fprintf(stdout, "specify tolerance: ");
        scanf("%lf", &tolerance);
    }
    fflush(stdout);

    freemem = 1;
    sim_mat = matrix (1, ndimensions + 1, 1, ndimensions);
    sim_vec = vector (1, ndimensions + 1);
    sim_yy = vector (1, ndimensions);

    if (steinparams == 0) {
    sim_mat[1][1] = Z;    sim_mat[1][2] = b;         sim_mat[1][3] = Gamma;
    sim_mat[2][1] = 0.7*Z;sim_mat[2][2] = b-(0.2*b); sim_mat[2][3] = Gamma-0.1;
    sim_mat[3][1] = 1.7*Z;sim_mat[3][2] = b+(0.2*b); sim_mat[3][3] = Gamma+0.1;
    sim_mat[4][1] = 1.3*Z;sim_mat[4][2] = b+(0.1*b); sim_mat[4][3] = Gamma+0.02;
    } else {
      steinAlpha = b*sqrt(1.0+(c*N));
      steinXi = (b*c*N)/2.0;

      sim_mat[1][1] = steinAlpha;    
      sim_mat[1][2] = steinXi;         
      sim_mat[1][3] = Gamma;

      sim_mat[2][1] = 0.7*steinAlpha;
      sim_mat[2][2] = steinXi-(0.2*steinXi); 
      sim_mat[2][3] = Gamma-0.1;

      sim_mat[3][1] = 1.7*steinAlpha;
      sim_mat[3][2] = steinXi+(0.2*steinXi); 
      sim_mat[3][3] = Gamma+0.1;

      sim_mat[4][1] = 1.3*steinAlpha;
      sim_mat[4][2] = steinXi+(0.1*steinXi); 
      sim_mat[4][3] = Gamma+0.02;
    }


    for (i = 1; i <= ndimensions+1; i++){
      sim_yy[1] = sim_mat[i][1];
      sim_yy[2] = sim_mat[i][2];
      sim_yy[3] = sim_mat[i][3];
      if (msemethod == 0) {
          sim_vec[i] = sim_functie(sim_yy);
      } else {
          sim_vec[i] = sim_functie_mse(sim_yy);
      }
    }

    /* print_sim_matrix(sim_mat,ndimensions); */

    fprintf (stdout, "\nStarting simplex method for parameter estimation\n");
    fflush (stdout);
    if (msemethod == 0) {
       amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie, \
               &niterations,0);
    } else {
       amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie_mse, \
               &niterations,0);
    }

    /* find minimum for which values are less than tolerance */

    if (simplex_flunked != 2) {
      miny = MAXX;
      for (i = 1; i <= ndimensions+1; i++){
        if (sim_vec[i] < miny){
          j = i;
          miny = sim_vec[i];
        }
      }
      if (steinparams==0) {
         Z = sim_mat[j][1]; b = sim_mat[j][2]; Gamma = sim_mat[j][3];
      } else {
         steinAlpha = sim_mat[j][1];
	 steinXi = sim_mat[j][2];
         Gamma = sim_mat[j][3];
         b = sqrt((steinAlpha*steinAlpha)+(steinXi*steinXi))-steinXi;
         c = (2.0*steinXi)/(N*b); Z = 1.0/c;
      }
    }
   }
   else{
    simplex_flunked = 1;
   }

   triplet();
   if (steinparams==1) {
        fprintf(stderr, "xi = %10.4f alpha = %10.4f\n", steinXi, steinAlpha);
   }
   fprintf(stdout, "\n   Z  = %10.4f  b     = %14.12f  gamma     = %10.4f\n", \
           Z, b, Gamma);
   fprintf(stdout, "   V  = %10.0f   E[V]  = %15.2f\n", V, eV);
   if (zero_truncated == 0) {
   fprintf(stdout, "   V0 = %10.0f   E[V0] = %15.2f\n", n0, eV0);
   }
   fprintf(stdout, "   V1 = %10.0f   E[V1] = %15.2f\n", n1, eV1);
   fprintf(stdout, "   V2 = %10.0f   E[V2] = %15.2f\n", n2, eV2);
   fprintf(stdout, "   V3 = %10.0f   E[V3] = %15.2f\n", n3, eV3);
   fflush(stdout);

   if (simplex_flunked==1){   /* TRY MANUALLY */
     getGaussParam (N, V, n1, &alpha, &theta, &b, &c);
     if ((c == 0)&&(b==0)){
         fprintf(stdout, "lnreSgam: no solution for Gamma=-0.5\n");
         fflush(stdout);
         Z = 0;
     }
     else{
      if (simplex_flunked != 2) {
         Z = 1.0/c;
         Gamma = -0.5;
         triplet();
         fprintf(stdout, \
                 "   Z  = %10.4f  b     = %14.12f  gamma     = %10.2f\n", \
                 Z, b, -0.5);
         fprintf(stdout, "V    = %15.2f  V1    = %15.2f  V2    = %15.2f\n", \
                         V, n1, n2);
         fprintf(stdout, "E[V] = %15.2f  E[V1] = %15.2f  E[V2] = %15.2f\n", \
                         eV, eV1, eV2);
         fflush(stdout);
      }
     }

     again = 1;
     if (simplex_flunked==2) {
       again = 0;
     }
     while (again){
        fprintf(stdout, "specify Z, b, and gamma\n ");
        scanf("%lf %lf %lf", &Z, &b, &Gamma);
        triplet();
        fprintf(stdout, "Summary of main accuracy statistics:\n");
        fprintf(stdout, "V    = %15.2f  V1    = %15.2f  V2    = %15.2f\n", \
                         V, n1, n2);
        fprintf(stdout, "E[V] = %15.2f  E[V1] = %15.2f  E[V2] = %15.2f\n", \
                         eV, eV1, eV2);
        fprintf(stdout, \
                 "   Z  = %10.4f  b     = %14.12f  gamma     = %10.8f\n", \
                 Z, b, Gamma);
        fflush(stdout);
        fprintf(stdout, "reestimate (r), continue (c), or quit (q)? ");
        scanf("%s", &cc);
        if (cc=='q') exit(1);
        if (cc!='r') again=0;
     }
   }

   /* CONSTRUCT RELATIVE FREQUENCY SPECTRUM FOR THE FIRST maxrank RANKS */
 
   triplet();

   ALPHA1N[1] = alpha1();
   ALPHA1N[2] = alpha2(ALPHA1N[1]);
   for (i = 3; i <= maxrank; i++){
      ALPHA1N[i] = alphaRecur( (double) i, ALPHA1N[i-1], ALPHA1N[i-2]);
   }

   /* AND CALCULATE E[V(N)] and S */

   Nzero = N;    /* take present sample size as Nzero */

   if (skip > 0) {
    fprintf(stdout, "estimating EVm for m=1..%d\n", skip);
    fflush(stdout);
	eV0 = getEV0();
    HACKVM[1][0] = eV1;
    HACKVM[2][0] = eV2;
    alphamin1 = eV2/eV;
    alphamin2 = eV1/eV;
    for (i = 3; i <= skip; i++) {
      x = alphaRecur( (double) i, alphamin1, alphamin2);
      HACKVM[i][0] = eV*x;
      /* fprintf(fullspc, "%10d %10.4f\n", i, eV*x); */
      alphamin2 = alphamin1; alphamin1 = x;
    }
    fprintf(stdout, "V = %10.2f  V0 = %10.2f   S      = %12.5f\n", eV, eV0, S);
    fflush(stdout);

    /* 
     reset N to the required sample size, and recalculate EVm
    */
    if (hackm > 0) {
      chunksize = floor(Nzero/(nchunks*1.0));
      remainDer = Nzero - ((nchunks*1.0) * chunksize);
      for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
      for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
      for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];
      N = CHUNKS[hackm];
      fprintf(stdout, "interpolation for chunk %d, N = %f\n", hackm, N);

      x = expV();
      eV = x;
	  eV0x = getEV0();
      y = alpha1();
      y2 = alpha2(y);
      eV1 = x*y; HACKVM[1][1] = eV1;
      eV2 = x*y2; HACKVM[2][1] = eV2;
      alphamin1 = eV2/eV;
      alphamin2 = eV1/eV;
      for (i = 3; i <= skip; i++) {
         x = alphaRecur( (double) i, alphamin1, alphamin2);
         HACKVM[i][1] = eV*x;
         alphamin2 = alphamin1; alphamin1 = x;
      }
    }
    
    if (hackm==0) {
	   if (zero_truncated) {
           fprintf(fullspc, "m EVm\n");
	   } else {
           fprintf(fullspc, "m EVm\n%10d %20.10f\n", 0, eV0);
	   }
       for (i=1; i <= skip; i++) {
          fprintf(fullspc, "%10d %20.10f\n", i, HACKVM[i][0]);
       }
    } else {
	   if (zero_truncated) {
           fprintf(fullspc, "m EVm EVmInt1\n");
	   } else {
           fprintf(fullspc, "m EVm EVmInt1\n%10d %20.10f %20.10f\n", 0, eV0, eV0x);
	   }
       for (i=1; i <= skip; i++) {
          fprintf(fullspc, "%10d %20.10f %20.10f\n",\
                  i,HACKVM[i][0],HACKVM[i][1]);
       }
    }
    fflush(fullspc);


    /* I THINK I ALSO CHANGED THE FOLLOWING LINE - CHECK ORIGINAL CODE
    fclose(fullspc);
       CHANGE HERE - THIS BIT ADDED */

   fprintf(fpsum, "Full Inverse Gauss-Poisson model for %s\n", *argv);
   fprintf(fpsum, "N         = %12d\n", (int) N);
   fprintf(fpsum, "V(N)      = %12d\n", (int) V);
   fprintf(fpsum, "E[V(N)]   = %12.2f\n", eV);
   if (zero_truncated == 0) {
   fprintf(fpsum, "V(0,N)    = %12d\n", (int) n0);
   fprintf(fpsum, "E[V(0,N)] = %15.2f\n", eV0);
   }
   fprintf(fpsum, "V(1,N)    = %12d\n", (int) n1);
   fprintf(fpsum, "E[V(1,N)] = %12.2f\n", eV1);
   fprintf(fpsum, "V(2,N)    = %12d\n", (int) n2);
   fprintf(fpsum, "E[V(2,N)] = %12.2f\n", eV2);
   fprintf(fpsum, "S         = %12.2f\n", S);
   fprintf(fpsum, "Z         = %12.10f (c = %20.15f)\n",  Z, 1.0 / Z);
   fprintf(fpsum, "b         = %22.20f\n",  b);
   fprintf(fpsum, "gamma     = %12.10f\n",  Gamma);
   if (w1 != 1.0){
   fprintf(fpsum, "w1--5     = %3.2f %3.2f %3.2f %3.2f %3.2f\n", w1, w2, w3, w4, w5);
   }
   fclose(fpsum);
   exit(1);
   }

   if (kfile > 0) {
     Nzero = (double) kfile;
     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     fprintf(stdout, "\nComputing interpolation+extrapolation statistics\n");
     fflush(stdout);
     fprintf(fpKvalues, "       N       EV      EV1      EV2      EV3      EV4      EV5       GV\n");
     for (k = 1; k <= nchunks; k++){
        /* fprintf(stdout, "[%d]", k);
        fflush(stdout); */
        N = CHUNKS[k]+1; x1 = expV();
        N = CHUNKS[k];
        x = expV();
        eV = x;
        y = alpha1();
        fprintf(fpKvalues, "%15.2f %15.2f %15.2f", N,  x, x*y);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpKvalues, "%15.2f %15.2f %15.2f %15.2f %15.4f\n", 
                y2*x, y3*x, y4*x, y5*x, x1-x);
     }
     fprintf(stdout, "\n");
     fflush(stdout);
     fclose(fpKvalues);

     exit(1);
   }


   /* PRINT SUMMARY */

   /* fprintf(fpsum, "Full Inverse Gauss-Poisson model for %s", *argv); */
   fprintf(fpsum, "Full GIGP model for %s", *argv);
    if (msemethod == 0) {
     fprintf(fpsum, " (parameter estimation using E[V(N)] and E[V(1,N)])\n");
   } else {
     fprintf(fpsum, " (parameter estimation using mse method with %d ranks)\n",\
                msenranks);
   }
   fprintf(fpsum, "N         = %12d\n", (int) N);
   fprintf(fpsum, "V(N)      = %12d\n", (int) V);
   fprintf(fpsum, "E[V(N)]   = %12.2f\n", eV);
   if (zero_truncated == 0) {
   fprintf(fpsum, "V(0,N)    = %12d\n", (int) n0);
   fprintf(fpsum, "E[V(0,N)] = %15.2f\n", eV0);
   }
   fprintf(fpsum, "V(1,N)    = %12d\n", (int) n1);
   fprintf(fpsum, "E[V(1,N)] = %12.2f\n", eV1);
   fprintf(fpsum, "V(2,N)    = %12d\n", (int) n2);
   fprintf(fpsum, "E[V(2,N)] = %12.2f\n", eV2);
   fprintf(fpsum, "S         = %12.2f\n", S);
   fprintf(fpsum, "Z         = %12.10f (c = %12.10f)\n",  Z, 1.0 / Z);
   fprintf(fpsum, "b         = %22.20f\n",  b);
   fprintf(fpsum, "gamma     = %12.10f\n",  Gamma);
   if (msemethod == 0) {
     if (w1 != 1.0){
         fprintf(fpsum, "w1--5     = %3.2f %3.2f %3.2f %3.2f %3.2f\n", \
         w1, w2, w3, w4, w5);
     }
   } 
   fprintf(fpsum, "Nfit      = %12.4f (for m = 1 .. mmax)\n",getModelN()); 
   fclose(fpsum);
   fprintf(stdout, "N (for m = 1 .. mmax) in fit = %10.2f; N = %d\n", \
                    getModelN(), (int) N);

   /* PRINT SPECTRUM */

   fprintf(fpexpspect, "         m         Vm        EVm     alphaM    EalphaM\n");
   if (zero_truncated==0) {
	   fprintf(fpexpspect, "%10d %10d %10.4f %10.4f %10.4f\n", 0, (int) n0, \
		eV0, n0/V, eV0/eV);
   }
   for (i = 1; i <= maxrank; i++) {
     fprintf(fpexpspect, "%10d %10d ",  (int) SPECTRUM[i][0], \
           (int) SPECTRUM[i][1]);
     fprintf(fpexpspect, "%10.4f %10.4f %10.4f\n", \
           ALPHA1N[i]*eV,  SPECTRUM[i][1]/V, ALPHA1N[i]);
   }
   fclose(fpexpspect);

   fprintf(stdout, "MSE(%d+2) = %10.4f\n", msenranks, getmse());

   if (mstar==1) {
      /* initialize for m =1 and m=2 */
      x = expV(); eV = x;
      y = alpha1(); y2 = alpha2(y);
      eV1 = x*y; eV2 = x*y2; 
      alphamin1 = eV2/eV; alphamin2 = eV1/eV;
      z = eV2;
      fprintf(fpmstar, "m mstar\n%10d %20.10f\n", 1, (2.0 * eV2/eV1));
      r = 2;
      for (i = 3; i <= SPECTRUM[nranks][0]; i++) {
         x = alphaRecur( (double) i, alphamin1, alphamin2);
         eVn = x*eV;
         if ((int) SPECTRUM[r][0] == i-1) {
           fprintf(fpmstar, "%10d %20.10f\n", i-1, ((double) i) * (eVn/z));
           r++;
         }
         alphamin2 = alphamin1; alphamin1 = x; z = eVn;
      }
      x = alphaRecur( (double) i, alphamin1, alphamin2);
      eVn = x*eV;
      fprintf(fpmstar, "%10d %20.10f\n", i-1, ((double) i) * (eVn/z));
      fclose(fpmstar);
   }

   /* PRINT SPECTRUM AT 2N */

   N = 2 * Nzero;
   eV2N = expV();

   /* BUT FIRST PRINT VOCABULARY SIZES */

   fprintf(fpVN, "       V       EV     EV2N\n");
   fprintf(fpVN, "%15.2f %15.2f %15.2f\n", V, eV, eV2N);
   fclose(fpVN);

   /* NOW CONTINUE */

   eV = eV2N;
   if (!zero_truncated) {
   		eV0 = getEV0();
   }
   ALPHA2N[1] = alpha1();
   ALPHA2N[2] = alpha2(ALPHA2N[1]);
   for (i = 3; i <= 2*maxrank; i++){
      	ALPHA2N[i] = alphaRecur( (double) i, ALPHA2N[i-1], ALPHA2N[i-2]);
   }

   fprintf(fpexpspect2N, "         m      EVm2N\n");
   if (!zero_truncated) {
	   fprintf(fpexpspect2N, "%10d %10.2f\n", 0, eV0);
   }
   for (i = 1; i <= 2 * maxrank; i++){
       fprintf(fpexpspect2N, "%10d %10.2f\n", i, ALPHA2N[i]*eV2N);
   }
   fclose(fpexpspect2N);

   /* INTERPOLATION */

   if (nchunks > 0){ 

     /* CALCULATE THE TEXT CHUNKS */

     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];
    
     /* AND PRINT THE CORRESPONDING STATISTICS */

	 if (zero_truncated) {
     fprintf(fpint, "       N       EV      EV1     EV2      EV3      EV4      EV5       GV\n");
	 } else {
     fprintf(fpint, "       N       EV      EV1     EV2      EV3      EV4      EV5       GV      EV0\n");
	 }

     for (k = 1; k <= nchunks; k++){
        N = CHUNKS[k]+1; x1 = expV();
        N = CHUNKS[k];
		if (!zero_truncated) eV0 = getEV0();
        x = expV();
        eV = x;
        y = alpha1();
        fprintf(fpint, "%15.2f %15.2f %15.2f", N,  x, x*y);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpint, "%15.2f %15.2f %15.2f %15.2f %15.4f", 
                y2*x, y3*x, y4*x, y5*x, x1-x);
		if (zero_truncated) {
			fprintf(fpint, "\n");
		} else {
			fprintf(fpint, " %15.4f\n", eV0);
		}
     }
     fclose(fpint);
   }

   /* EXTRAPOLATION */
   
   if (E == NULL_F) {  /* extrapolate to 2N */
	 if (zero_truncated) {
     fprintf(fpext, "       N       EV      EV1     EV2      EV3      EV4      EV5\n");
	 } else {
     fprintf(fpext, "       N       EV      EV1     EV2      EV3      EV4      EV5       EV0\n");
	 }

     for (k = 1; k <= nchunks; k++){
        N = Nzero + CHUNKS[k];
		if (!zero_truncated) eV0 = getEV0();
        x = expV();
        eV = x;
        y = alpha1();
        fprintf(fpext, "%15.2f %15.2f %15.4f ", N,  x, y*x);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpext, "%15.2f %15.2f %15.2f %15.2f", y2*x, y3*x, y4*x, y5*x);
		if (zero_truncated) {
			fprintf(fpext, "\n");
		} else {
			fprintf(fpext, " %15.4f\n", eV0);
		}
     }
   }
   else{ 
     
     /* FIND NEW CHUNKSIZES */
     chunksize = floor((E-Nzero)/(enchunks*1.0));
     remainDer = (E-Nzero) - ((enchunks*1.0) * chunksize);
     for (k = 1; k <= enchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++)  CHUNKS[k]++;
     for (k = 2; k <= enchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     /* PRINT THE GROWTH CURVE */
 
	 if (zero_truncated) {
     fprintf(fpext, \
"       N       EV      EV1     EV2      EV3      EV4      EV5\n");
	 } else {
     fprintf(fpext, \
"       N       EV      EV1     EV2      EV3      EV4      EV5       EV0\n");
	 }

     for (k = 1; k <= enchunks; k++){
        N = Nzero + CHUNKS[k];
		if (!zero_truncated) eV0 = getEV0();
        x = expV();
		eV = x;
        y = alpha1();
        fprintf(fpext, "%15.2f %15.2f %15.4f ", N,  x, y*x);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpext, "%15.2f %15.2f %15.2f %15.2f", y2*x, y3*x, y4*x, y5*x);
		if (zero_truncated) {
			fprintf(fpext, "\n");
		} else {
			fprintf(fpext, " %15.4f\n", eV0);
		}
     }


     /* AND SHOW THE SPECTRUM AT E */

     N = E;
     eV2N = expV ();
     eV = eV2N;
     ALPHA2N[1] = alpha1 ();
     ALPHA2N[2] = alpha2 (ALPHA2N[1]);
     for (i = 3; i <= maxrank; i++){
         ALPHA2N[i] = alphaRecur ((double) i, ALPHA2N[i-1], ALPHA2N[i-2]);
     }
     fprintf(fpE, "         m      EVmXN\n");
	 if (!zero_truncated) {
		 eV0 = getEV0();
         fprintf(fpE, "%10d %10.2f\n", 0, eV0);
	 }
     for (i = 1; i <= maxrank; i++){
       fprintf(fpE, "%10d %10.2f\n", i, ALPHA2N[i]*eV2N);
     }
   }


   if (freemem == 1) {
     free_matrix (sim_mat, 1, ndimensions + 1, 1, ndimensions);
     free_vector (sim_vec, 1, ndimensions + 1);
     free_vector (sim_yy, 1, ndimensions);
   }

   
   return (0);
}


double getS ()
{
  return( ((2.0*Z)/b) *(bessel2(Gamma, b)/bessel2(Gamma+1.0, b)) );
}


double expV()
{
  S = getS();
  return(
   S * (1.0 -
              (
                bessel2(Gamma, b*sqrt(1.0+(N/Z))) /
                ( 
                  exp((Gamma/2.0) * log(1.0+(N/Z))) * bessel2(Gamma,b)
                )
              )
       )
 );
}


double alpha1()
{
 return(
   ((N/ exp((Gamma+1.0)* log( sqrt(1.0+(N/Z)) )) )
   *
   ( bessel2(Gamma+1.0, b*sqrt(1.0+(N/Z))) / bessel2(Gamma+1.0, b) ) /
   eV)
 );
}


double alpha2(a1)   /* a1 = alpha1() */
double a1;
{
 return(
   a1 * 0.5 *  (1.0/((Z/N) + 1.0))  *
   ( 
       (
         (
            b*sqrt(1+(N/Z))*bessel2(Gamma, b*sqrt(1+(N/Z)))
         )   
         /
         (
            2.0 * bessel2(Gamma+1.0, b*sqrt(1.0+(N/Z)))
         )
       )
       + Gamma + 1
   )
 );
}


double alphaRecur(m, amin1, amin2)
double m, amin1, amin2;
{
  double x;
  x = (b*N)/(2*Z*sqrt(1+(N/Z)));
  return(
    (((Gamma+m-1.0)/m)*(1.0/( (Z/N)+1.0)) * amin1) +
    ((x * x)/(m*(m-1.0))*amin2)
  );
}


void help ()
{
  fprintf (stderr,"lnreSgam text.spc\n");
  fprintf (stderr,"OPTIONS:\n");
  fprintf (stderr,"     -h: display help\n");
  fprintf (stderr,"     -m: number of ranks in fit (default: 15)\n");
  fprintf (stderr,"     -k: number of chunks for interpolation (default: 20)\n");
  fprintf (stderr,"     -K: number of chunks for extrapolation (default: 20)\n");
  fprintf (stderr,"     -E: extrapolation sample size (default: 2N)\n");
  fprintf (stderr,"     -W: modify weights for similarity function\n");
  fprintf (stderr,"     -H: input files lack header (default: with header)\n");
  fprintf (stderr,"     -s: number of ranks to be calculated, excluding int/ext\n");
  fprintf (stderr,"     -e: use -e ranks for mse in simplex cost function\n");
  fprintf (stderr,"     -z: non-truncated distribution with V(0,N), uses -e15 as default\n");
  fprintf (stderr,"     -x: interpolation chunk to be included with -s option\n");
  fprintf (stderr,"     -N: force number of tokens to be N\n");
  fprintf (stderr,"     -V: force number of tokens to be V\n");
  fprintf (stderr,"     -S: make m mstar output file (extension: .str)\n");
  fprintf (stderr,"INPUT:\n");
  fprintf (stderr,"     text.spc:  m Vm\n");
  fprintf (stderr,"OUTPUT:\n");
  fprintf (stderr,"     text_G.spc:  expected spectrum\n");
  fprintf (stderr,"     text_G.sp2:  expected spectrum at 2N\n");
  fprintf (stderr,"     text_G.ev2:  E[V(N)] and E[V(2N)]\n");
  fprintf (stderr,"     text_G.sum:  summary, fitted parameters\n");
  exit (1);
}


double bessel2 (nu,z)
double nu,z;
{
 double x, y, q;

 x = PI_2;
 y = sin (nu * PI);
 q = IF((-1.0*nu),z) - IF(nu,z);
 return((x*q)/y);
}

double IF (nu, z)
double nu, z;

{
  double	teller, noemer;
  double	som, term, delta, lastterm;
  double	n;
  double	OPSLAG[1000];
  int		i, j;

  term = 1.0;
  delta = 1.0;
  n = NULL_F;
  som = NULL_F;
  i = 0;
  lastterm = 0;

  while (delta > EPSILON) { 
    teller = exp( (nu + (2.0*n)) * log( z/2.0 ) );
    if (n > NULL_F) {
        noemer = exp(gammln(n+1.0))* exp(gammln(nu+n+1.0));
    }
    else{
        delta = 1.0;
        noemer = exp(gammln(nu+n+1.0));
    }
    term = teller / noemer;
    if (i > 999) {
        fprintf(stderr, "lnreSgam: no convergence within 1000 steps for Bessel function\n");
        fflush(stdout);
        fclose(stdout);
        exit(1);
    }
    OPSLAG[i] = term;
    if (i > 0) {
        delta = term - lastterm;
        if (delta < NULL_F) {
            delta *= -1.0;
        }
    }

    n += 1.0;
    i++;
    lastterm = term;
 }
 for (j = i-1; j >= 0; j--) {
    som += OPSLAG[j];
 }
 return(som);
}


void print_sim_matrix (m, ndim)
double **m;
int ndim;
{
 int i, j;
 for (i = 1; i <= ndim+1; i++){
   for (j = 1; j <= ndim; j++){
       fprintf(stderr,"%10.2f ", m[i][j]);
   }
   fprintf(stderr, "%4.2f ", sim_vec[i]);
   Z = m[i][1]; b = m[i][2]; Gamma = m[i][3];
   triplet();
   fprintf(stderr, "V : %6.2f/%6.2f  V1 : %6.2f/%6.2f V2: %6.2f/%6.2f\n", 
      eV, V, eV1, n1, eV2, n2);
 }
}

double sim_functie_mse(x)
double *x;
{
   extern double getmse();
   double simMSE;

   /* calculate MSE for the first msenranks given the parameter values in x */

   if (steinparams==0) {
         Z = x[1]; b = x[2]; Gamma = x[3];
         if (Z <= 0) Z = 0.1;
         if (b <= 0) b = NULL_F;
   } else {
         steinAlpha = x[1];
	     steinXi = x[2];
         Gamma = x[3];
         b = sqrt((steinAlpha*steinAlpha)+(steinXi*steinXi))-steinXi;
         c = (2.0*steinXi)/(N*b); Z = 1.0/c;
   }
   if (Gamma <= -1) Gamma = -0.99;
   if (Gamma >= 0) Gamma = -0.01;
   triplet();
   /* calculate simMSE */
   simMSE = getmse();
   return(simMSE);
}

double sim_functie(x)
double *x;
{
   Z = x[1]; b = x[2]; Gamma = x[3];
   if (Z <= 0) Z = 0.1;
   if (b <= 0) b = NULL_F;
   if (Gamma <= -1) Gamma = -0.99;
   if (Gamma >= 0) Gamma = -0.01;
   triplet();
   return((w1*fabs(V-eV))+(w2*fabs(n1-eV1))+(w3*fabs(n2-eV2))+(w4*fabs(n3-eV3))+(w5*fabs(n4-eV4)));
}


double getEV0() {
  return( (2*Z*bessel2(Gamma,b*sqrt(1.0+(N/Z)))) / 
          (b* bessel2(Gamma+1.0, b) * exp( (Gamma/2.0) * log(1.0+(N/Z)) ))
	 );
}

void triplet(){
   eV = expV ();
   ALPHA1N[1] = alpha1();
   ALPHA1N[2] = alpha2(ALPHA1N[1]);
   ALPHA1N[3] = alphaRecur( (double) 3, ALPHA1N[2], ALPHA1N[1]);
   ALPHA1N[4] = alphaRecur( (double) 4, ALPHA1N[3], ALPHA1N[2]);
   if (zero_truncated==0) {
	   eV0 = getEV0();
   }
   eV1 = ALPHA1N[1]*eV;
   eV2 = ALPHA1N[2]*eV;
   eV3 = ALPHA1N[3]*eV;
   eV4 = ALPHA1N[4]*eV;
}

double getVn (r) 
int r;
{
   int i;
   for (i = r; (int) SPECTRUM[i][0] > r; i--) ;
   if ((int) SPECTRUM[i][0] == r) return(SPECTRUM[i][1]);
   else return(0.0);
}

double getmse (){
   extern void triplet ();
   extern double getVn ();
   
   double som, som2, esom2, EVn, Vn, x, y, alphamin1, alphamin2;
   int i;
   
   triplet ();
   som = 0.0; som2 = 0.0; esom2 = 0.0;
   if (zero_truncated==0) {
   		som += (eV0-n0)*(eV0-n0);
		som2 = n0; esom2 = eV0;
   }
   som += (eV1-n1)*(eV1-n1);
   som += (eV2-n2)*(eV2-n2);

   som2 += (n1 + n2); esom2 += eV1+eV2;
   x = 0; y = 0;

   alphamin1 = eV2/eV;
   alphamin2 = eV1/eV;

   for (i = 3; i <= msenranks; i++) {
        x = alphaRecur( (double) i, alphamin1, alphamin2);
        EVn = eV*x;
        alphamin2 = alphamin1; alphamin1 = x;

        Vn = getVn(i);

        som2 += Vn; esom2 += EVn;
        som += (EVn-Vn)*(EVn-Vn);
   }
   x = eV - esom2; y = V - som2;
   som += (x-y)*(x-y);
   if (zero_truncated) som += (eV-V)*(eV-V);
   return(som/((double)(msenranks+2)));
}

double getModelN (){
   extern void triplet ();
   
   double som, alphamin1, alphamin2, x, EVn;
   int i;
   
   triplet ();
   som = (eV1 + (2.0*eV2));

   alphamin1 = eV2/eV;
   alphamin2 = eV1/eV;

   for (i = 3; i <= SPECTRUM[nranks][0]; i++) {
        x = alphaRecur( (double) i, alphamin1, alphamin2);
        EVn = eV*x;
        alphamin2 = alphamin1; alphamin1 = x;
        som += EVn*((double)i);
   }
   return(som);
}