/*
    gaussian_sim_marginal

    Copyright (C) 2010 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/
/******************************************************************************
 *
 *  File:           gaussian_sim_marginal.c
 *
 *  Function:       
 *
 *  Author(s):      Douglas L. Theobald
 *
 *  Copyright:      Copyright (c) 2011 Douglas L. Theobald
 *                  All Rights Reserved.
 *
 *  Source:         Started anew.
 *
 *  Notes:          
 *
 *  Change History:
 *          2011_04_15_nnn    Started source
 *  
 *****************************************************************************/
// gcc -O3 -ffast-math -Wall -Werror -std=c99 -pedantic -o gaussian_sim_marginal -lgsl -lgslcblas gaussian_sim_marginal.c

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#ifdef   __linux__
  #include <getopt.h>
#endif
#include <ctype.h>
#include <math.h>
#include <float.h>
#include <pthread.h>
#include <assert.h>
#include <sys/resource.h>
#include <time.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_integration.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_eigen.h>


double             burnin = 0.0;
int                iters = 1000000;
double             nu = 1.0;
int                pnum = 1;
double            *pave = NULL;
double            *lnpost = NULL;
double            *lnlike = NULL;
double            *lnprior = NULL;
double           **x = NULL;
double           **cov = NULL;
//int                col = 3; // starts at 0


void
MatPrintLowerDiag(double **matrix, const int size)
{
    int             i, j;

    printf("\n\n");
    for (i = 0; i < size; ++i)
    {
        printf("%-2d: [", i);
        for (j = 0; j <= i; ++j)
            printf(" % 10.2f", matrix[i][j]);
        printf(" ]\n");
    }
    
    printf("     ");
    for (i = 0; i < size; ++i)
        printf(" % 10d", i);
    printf("\n");

    fflush(NULL);
}


void
MatDestroy(double ***matrix_ptr)
{
    double       **matrix = *matrix_ptr;

    if (matrix != NULL)
    {
        if (matrix[0] != NULL)
        {
            free(matrix[0]);
            matrix[0] = NULL;
        }

        free(matrix);
        *matrix_ptr = NULL;
    }
}


double
**MatAlloc(const int rows, const int cols)
{
    int            i;
    double       **matrix = NULL;
    double        *matspace = NULL;

    matspace = (double *) calloc((rows * cols), sizeof(double));
    if (matspace == NULL)
    {
        perror("\n ERROR");
        printf("\n ERROR: Failure to allocate matrix space in MatAlloc(): (%d x %d)\n", rows, cols);
        exit(EXIT_FAILURE);
    }

    /* allocate room for the pointers to the rows */
    matrix = (double **) malloc(rows * sizeof(double *));
    if (matrix == NULL)
    {
        perror("\n ERROR");
        printf("\n ERROR: Failure to allocate room for row pointers in MatAlloc(): (%d)\n", rows);
        exit(EXIT_FAILURE);
    }

    /*  now 'point' the pointers */
    for (i = 0; i < rows; i++)
        matrix[i] = matspace + (i * cols);

    return(matrix);
}


/*
Reads a column of floating point numbers from a data file.
Columns are free-form, separated by white space.
Allows comments (midline OK) using '#'.
Also allows blank internal lines. 
*/
double
*ReadVals(const char *listfile_name, int *len, const int column)
{
    int            i, numscanned, numvals, cnt, filelen;
    FILE          *listfile = NULL;
    double        *array = NULL;
    char           columns[16][512], line[512], *comment = NULL;
    int            ch;


    listfile = fopen(listfile_name, "r");
    if (listfile == NULL)
    {
        fprintf(stderr,
                "\n ERROR_069: cannot open first file \"%s\" \n",
                listfile_name);
        exit(EXIT_FAILURE);
    }

    filelen = 0;
    while(!feof(listfile))
    {
        ch = getc(listfile);

        if (ch == EOF || ch == '\n')
            ++filelen;

        if (ch == EOF)
            break;
    }

    array = calloc(filelen, sizeof(double));

    rewind(listfile);

    cnt = 0;
    for(i = 0; i < filelen; ++i)
    {
        if (fgets(line, 512, listfile) == NULL)
            break;

        if (line[0] == '#')
            continue;

        comment = strchr(line, '#');

        if (comment != NULL)
            *comment = '\0';

        numscanned = sscanf(line, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                            &columns[0][0], &columns[1][0], &columns[2][0],  &columns[3][0],
                            &columns[4][0], &columns[5][0], &columns[6][0],  &columns[7][0],
                            &columns[8][0], &columns[9][0], &columns[10][0], &columns[11][0],
                            &columns[12][0], &columns[13][0], &columns[14][0], &columns[15][0]);

        numvals = sscanf(&columns[column][0], "%le", &array[i]);

        /* printf("\n %d \n", numscanned); */

        if(numvals > 0 && isfinite(array[i]) && fabs(array[i]) < DBL_MAX && numscanned > column)
            ++cnt;
    }

    *len = cnt;

    fclose(listfile);

    printf("\nNumber of data points read: %d", *len);

    return(array);
}


/* 
Calculate harmonic mean estimator, which should never be used, but we determine it for fun
and to see how bad it actually is. 
As boni, we get the log arithmetic mean likelihood and log geometric mean likelihood. 
*/
double
CalcHarmonicMean(const double *x, const int len)
{
    double         blik, mlik, hmlik, amlik, diff, ediff, liksi, harm_mean;
    int            i;

    /* first center the log-likelihoods, as the likelihoods are probably too small to represent. */
    blik = 0.0;
    for (i = 0; i < len; ++i)
        blik += x[i];

    blik /= len;

    mlik = hmlik = amlik = 0.0;
    for (i = 0; i < len; ++i)
    {
        liksi = x[i];
        diff = liksi - blik;
        ediff = exp(diff);

        if (isfinite(ediff))
        {
            mlik  += ediff;
            hmlik += 1.0 / ediff;
            amlik += liksi;
        }
    }

/* 
    if (badsamp > 0)
        printf("\nWARNING: %d samples excluded, not finite\n", badsamp);
 */
    harm_mean = blik - log(hmlik) + log(len);
    printf("\n%-20s% 14d", "n:", len);

    printf("\n%-20s% 14.4f", "log arithmetic mean:", log(mlik / len) + blik);
    printf("\n%-20s% 14.4f", "log harmonic mean:", harm_mean);
    printf("\n%-20s% 14.4f", "log geometric mean:", amlik / len);
    printf("\n\n");
    fflush(NULL);

    return(harm_mean);
}


/* 
Calculate eigenvalues of a square, symmetric, real matrix, using GSL.
Eigenvalues are returned in descending order, largest first. 
Pointer *eval must be allocated. 
Input matrix **cov is NOT perturbed. 
*/
void
EigenvalsGSL(double **cov, const int dim, double *eval)
{
    double        *cov_cpy = NULL;

    cov_cpy = malloc(dim * dim * sizeof(double));
    memcpy(cov_cpy, &cov[0][0], dim * dim * sizeof(double));
    gsl_matrix_view m = gsl_matrix_view_array(cov_cpy, dim, dim);
    gsl_vector_view evalv = gsl_vector_view_array(eval, dim);
    gsl_eigen_symm_workspace *w = gsl_eigen_symm_alloc(dim);

    gsl_eigen_symm(&m.matrix, &evalv.vector, w);

    gsl_eigen_symm_free(w);
    free(cov_cpy);
}


/* This one destroys half of the input matrix **cov */
void
EigenvalsGSLDest(double **cov, const int dim, double *eval)
{
    gsl_matrix_view m = gsl_matrix_view_array(cov[0], dim, dim);
    gsl_vector_view evalv = gsl_vector_view_array(eval, dim);
    gsl_eigen_symm_workspace *w = gsl_eigen_symm_alloc(dim);
    gsl_eigen_symm(&m.matrix, &evalv.vector, w);
    gsl_eigen_symm_free(w);
}


void
CalcPAve(void)
{
    int            i, j;

    for (i = 0; i < pnum; ++i)
        pave[i] = 0.0;

    for (i = 0; i < iters; ++i)
        for (j = 0; j < pnum; ++j)
            pave[j] += x[i][j];

    for (i = 0; i < pnum; ++i)
        pave[i] /= iters;
}


void
CalcPCov(void)
{
    int            i, j, k;
    double         tmpi, tmpj, sum;

    for (i = 0; i < pnum; ++i)
    {
        for (j = 0; j <= i; ++j)
        {
            sum = 0.0;
            for (k = 0; k < iters; ++k)
            {
                tmpi = x[k][i] - pave[i];
                tmpj = x[k][j] - pave[j];
                sum += tmpi * tmpj;
            }

            cov[i][j] = cov[j][i] = sum / iters;
        }
    }
}


/* 
The Laplace-Metropolis estimator for calculating the marginal likelihood
from metropolis samples from the posterior distribution.

Steven M. Lewis, Adrian E. Raftery (1997)
"Estimating Bayes Factors Via Posterior Stimulation with the Laplace-Metropolis Estimator."
Journal of the American Statistical Association, 92(438):648-655

Using equation 4, esp. see pp 649-650, first method to estimate \theta*.

IME, this is extremely accurate (using Gold Standard as a reference).

NB: REQUIRES CalcPAve() and CalcPCov() to have already calculated ave and cov.
*/
double
CalcLaplaceMet(void)
{
    int            i, j;
    int            maxind, runind;
    double         maxpost, lndet, lapmet, lnh;
    double        *eval = NULL;

    CalcPAve();
    CalcPCov();

    for (i = 0; i < pnum; ++i)
        printf("\nave p[%d]:% 16.4f (+/- %14.4f)", i, pave[i], sqrt(cov[i][i]));

    printf ("\n\nParameter covariance matrix (estimate of minus inverse Hessian):");
    MatPrintLowerDiag(cov, pnum);

////////////////////////////////////////////////////////////////////////////////////////////////////

    eval = calloc(pnum, sizeof(double));

    EigenvalsGSL(cov, pnum, eval);

    lndet = 0.0;
    for (i = 0; i < pnum; i++)
    {
        if (isgreater(eval[i], FLT_EPSILON))
        {
            lndet += log(eval[i]);
        }
        else
        {
            printf("\n WARNING: excluding eigenvalue %d from determinant calculation", i);
            printf("\n WARNING: eigenvalue[%d] = %g < %g", i, eval[i], FLT_EPSILON);
        }
    }

    printf ("\nln(det): %g", lndet);
    printf ("\ndet: %g\n", exp(lndet));
    for (i = 0; i < pnum; i++)
        printf ("\neigenvalue[%d] = %g", i, eval[i]);
    printf ("\n");
    fflush(NULL);

    free(eval);

////////////////////////////////////////////////////////////////////////////////////////////////////

    for (i = 0; i < pnum; ++i)
    {
        for (j = 0; j < i; ++j)
        {
            if (cov[i][j] == 0.0)
                cov[i][j] = cov[j][i] = 0.0;
            else
                cov[i][j] = cov[j][i] = cov[i][j] / sqrt(cov[i][i] * cov[j][j]);
        }
    }

    for (i = 0; i < pnum; ++i)
        cov[i][i] = 1.0;

    printf ("\nParameter correlation matrix:");
    MatPrintLowerDiag(cov, pnum);

////////////////////////////////////////////////////////////////////////////////////////////////////

    /* Find the parameters with the maximum posterior prob */
    maxpost = -DBL_MAX;
    maxind = 0;
    runind = 0;
    for (i = 0; i < iters; ++i)
    {
        //printf("\nlnpost[%6d]: %g %g", i, lnpost[i], maxpost);
        lnh = lnprior[i] + lnlike[i];
        if (maxpost < lnh)
        {
            maxpost = lnh;
            maxind = i;
        }
    }

    printf("\n%-20s% 16.4f", "Maximum log posterior * ln p(x):", maxpost);
    for (i = 0; i < pnum; ++i)
        printf("\nmax logPost p[%d]:   % 16.4f", i, x[maxind][i]);

    printf("\n%.4f", x[maxind][0]);
    for (i = 1; i < pnum; ++i)
        printf(":%.4f", x[maxind][i]);
    printf("\n");

    lapmet = maxpost + 0.5 * lndet + 0.5 * pnum * log(2.0 * M_PI);

    printf("\n\nLog marginal likelihood ln p(x):");
    printf("\n%-20s% 16.4f\n\n", "Laplace-Metropolis:", lapmet);

    fflush(NULL);

    return(lapmet);
}



void
SimGauss(const double nu, const gsl_rng *r2)
{
    int             i, j;
    double          sigma = sqrt(nu / (1.0 + nu));
    double          sqrtnu = sqrt(nu);
    
    for (i = 0; i < iters; ++i)
    {
        lnprior[i] = lnpost[i] = lnlike[i] = 0.0;
        for (j = 0; j < pnum; ++j)
        {
            x[i][j] = gsl_ran_gaussian(r2, sigma);
            lnprior[i] += log(gsl_ran_gaussian_pdf(x[i][j], 1.0));
            lnlike[i]  += log(gsl_ran_gaussian_pdf(x[i][j], sqrtnu));
            lnpost[i]  += log(gsl_ran_gaussian_pdf(x[i][j], sigma));
        }
        //printf("\nlnpost[%6d]: %g", i, lnpost[i]);
    }
}


void
GetOpts(int argc, char *argv[])
{
    int            option;

    /* get the options */
    while ((option = getopt(argc, argv, "b:d:i:n:")) != -1)
    {
        switch (option)
        {
/* 
            case 'h':
                thermo = 1;
                break;


            case 'P':
                sscanf(optarg, "%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf",
                       &prior[0], &prior[1], &prior[2], &prior[3],
                       &prior[4], &prior[5], &prior[6], &prior[7],
                       &prior[8], &prior[9], &prior[10], &prior[11]);
                for (i = 0; i < pnum; ++i)
                    prior[i] *= 0.5;
                break;
 */

            case 'b':
                burnin = (double) strtod(optarg, NULL);

                if (burnin > 0.0 && burnin < 1.0)
                    burnin = 1.0 - burnin;
                else
                    burnin = 0.5;
                break;

            case 'd':
                pnum = (int) strtol(optarg, NULL, 10);
                break;

            case 'i':
                iters = (int) strtol(optarg, NULL, 10);
                break;

            case 'n':
                nu = (double) strtod(optarg, NULL);
                break;

            default:
                perror("\n\n  ERROR");
                fprintf(stderr, "\nBad option '-%c' \n", optopt);
                exit(EXIT_FAILURE);
                break;
        }
    }
}


int
main(int argc, char *argv[])
{
    int            i, narguments;
    double         hme, marglik;


    const gsl_rng_type     *T = NULL;
    gsl_rng                *r2 = NULL;

    gsl_rng_env_setup();
    gsl_rng_default_seed = time(NULL);
    T = gsl_rng_ranlxd2;
    r2 = gsl_rng_alloc(T);

    //gsl_rng_set (r2, 1);

//    if (argc == 1)
//        exit(EXIT_FAILURE);

    GetOpts(argc, argv);

    narguments = argc - optind; /* number of nonoption args */
    argv += optind; /* now argv is set with first arg = argv[0] */

//    printf("\nReading data ...\n");
//    fflush(NULL);

    cov = MatAlloc(pnum, pnum);
    lnpost = calloc(iters, sizeof(double));
    lnlike = calloc(iters, sizeof(double));
    lnprior = calloc(iters, sizeof(double));
    pave = calloc(pnum, sizeof(double));
    x = calloc(iters, sizeof(double *));
    for (i = 0; i < iters; ++i)
        x[i] = calloc(pnum, sizeof(double));

    SimGauss(nu, r2);

    CalcLaplaceMet();

    hme = CalcHarmonicMean(lnlike, iters);
    printf("\nhme: %g\n", hme);

    hme = 0.0;
    for (i = 0; i < iters; ++i)
        hme += 1.0 / exp(lnlike[i]);
    hme = iters / hme;
    printf("\nhme: %g", hme);
    printf("\nlog(hme): %g\n", log(hme));

    marglik = - 0.5 * pnum * log(2.0 * M_PI) - 0.5 * pnum * log(1.0 + nu);
    printf("\nexact marginal likelihood: %g\n\n", marglik);

    for (i = 0; i < iters; ++i)
        free(x[i]);
    free(x);
    free(lnpost);
    free(lnprior);
    free(pave);
    MatDestroy(&cov);

    exit(EXIT_SUCCESS);
}

