// Compute likelihood of a given network using an ensamble of random graphs
// Stefano Allesina allesina@nceas.ucsb.edu
// v 1.0 Apr 2009

// Compile 
// gcc -lgsl -lgslcblas GA-Groups.c -o Groups -Wall -O3 -DHAVE_INLINE

// Launch
// GSL_RNG_SEED=1 ./Groups 26 benguela 5 1000 1000 0

// The max population size for GA
#define MAXPOP 5000

#include <stdio.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <string.h>
#include <math.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_sort_vector_double.h>

// Random generator
const gsl_rng_type * TRND;
gsl_rng * RND;

// global variables
int TotLinks;
int TOR;

// Initialize Random Number Generator
static int InitializeRandom(){
  // setup the environment
  // The generator must be seeded by command line
  gsl_rng_env_setup();
  TRND = gsl_rng_default;
  RND = gsl_rng_alloc(TRND);
  return 0;
}

// Free Random Number Generator
static int FreeRandom(){
  gsl_rng_free (RND);
  return 0;
}

// UTILITIES ///////////////////////////////////////////////////////
// Print a matrix ToP to stream f (m rows and cols)
static int MyIntMatPrint(FILE * f, gsl_matrix * ToP,int m) {
  int i,j;
  for (i=0;i<m;i++)   { 
    for (j=0;j<m;j++)       {
      fprintf(f,"%1.0f ",gsl_matrix_get(ToP,i,j));
    }
    fprintf(f,"\n");
  }
  fprintf(f,"\n");
  return 0;
}

// Print a matrix ToP to stream f (m rows and cols)
static int MyDblMatPrint(FILE * f, gsl_matrix * ToP,int m) {
  int i,j;
  for (i=0;i<m;i++)   { 
    for (j=0;j<m;j++)       {
      fprintf(f,"%1.0f ",gsl_matrix_get(ToP,i,j));
    }
    fprintf(f,"\n");
  }
  fprintf(f,"\n");
  return 0;
}

int MyDblVecPrint(FILE * f, gsl_vector * ToP,int m) {
  int j;
  for (j=0;j<m;j++){
    fprintf(f,"%1.4f ",gsl_vector_get(ToP,j));
  }
  fprintf(f,"\n");
  return 0;
}
// END UTILITIES ///////////////////////////////////////////////////

static double AIC(gsl_matrix * A, int N, gsl_matrix * MK, int KK, gsl_vector * G, gsl_vector * Gcount){
  int i,j,Gi,Gj;
  double Results=0.0;
  gsl_matrix_set_zero(MK);
  gsl_vector_set_zero(Gcount);
  // Version 2: A is an adjacency list, TotLinks is the total number of links
  for (i=0;i<TotLinks;i++){
    if (i<N){
      Gi=(int)gsl_vector_get(G,i);
      gsl_vector_set(Gcount,Gi,gsl_vector_get(Gcount,Gi)+1);
    }
    Gi=(int)gsl_vector_get(G,gsl_matrix_get(A,i,0));
    Gj=(int)gsl_vector_get(G,gsl_matrix_get(A,i,1));
    gsl_matrix_set(MK,Gi,Gj,gsl_matrix_get(MK,Gi,Gj)+1);
  }
  // Compute log likelihood
  double p=0.0;
  double Lin=0.0;
  double MLin=0.0;
  for (i=0;i<KK;i++){
    for (j=0;j<KK;j++){
      Lin=(double)gsl_matrix_get(MK,i,j);
      MLin=(double)(gsl_vector_get(Gcount,i)*gsl_vector_get(Gcount,j));
      p=Lin/MLin;
      if (p>0.0){
	if (p<1.0){
	  Results+=Lin*log(p)+(MLin-Lin)*log(1.-p);
	}
      }
    }
  }
  // compute AIC
  Results=2.*N+2.*KK*KK-2.*Results;
  return 1./Results;
}

// Specific functions for Genetic Algorithm
static int InitializePop(gsl_vector *G1[MAXPOP],gsl_vector *G2[MAXPOP],int N,int Pop, int KK){
  int i,j;
  for (i=0;i<Pop;i++){
    G1[i]=gsl_vector_calloc(N);
    G2[i]=gsl_vector_calloc(N);
  }
  // initialize genes at random
  for (i=0;i<Pop;i++){
    for (j=0;j<N;j++){
      gsl_vector_set(G1[i],j,gsl_rng_uniform_int(RND,KK));
      gsl_vector_set(G2[i],j,0);
    } 
  } 
  return 0;
}

static int FreePop(gsl_vector * G1[MAXPOP],gsl_vector *G2[MAXPOP],int N,int Pop){
  int i;
  for (i=0;i<Pop;i++){
    gsl_vector_free(G1[i]);
    gsl_vector_free(G2[i]);
  }
  return 0;
}

static double ClimbHill(gsl_matrix * A, int N, gsl_matrix * MK, int KK, gsl_vector * G, gsl_vector * Gcount){
  // search for all solutions one mutation away and hillclimb
  int i,j,oldi;
  double OldFit,NewFit,CurFit;
  NewFit=-2;
  OldFit=-1;
  gsl_vector * Hill=gsl_vector_calloc(N);
  gsl_vector_memcpy(Hill,G);
  while(OldFit!=NewFit){
    OldFit=NewFit;
    gsl_vector_memcpy(G,Hill);
    for (i=0;i<N;i++){
      oldi=(int) gsl_vector_get(G,i);
      for (j=0;j<KK;j++){
	gsl_vector_set(G,i,j);
	// now compute AIC
	CurFit=AIC(A,N,MK,KK,G,Gcount);
	if (CurFit>NewFit){
	  gsl_vector_memcpy(Hill,G);
	  NewFit=CurFit;
	}
      }
      // restore vector
      gsl_vector_set(G,i,oldi);
    }
  }
  gsl_vector_memcpy(G,Hill);
  gsl_vector_free(Hill);
  return NewFit;
}

static int Redeploy(gsl_vector *A1,gsl_vector *A2, int N, int KK, int MutationsAway){
  gsl_vector_memcpy(A2,A1);
  int k,j;
  for (j=0;j<MutationsAway;j++){
    k=gsl_rng_uniform_int(RND,N);
    gsl_vector_set(A2,k,gsl_rng_uniform_int(RND,KK));
  }
  return 0;
}

static int GA_Generation(gsl_vector *G1[MAXPOP], gsl_vector *G2[MAXPOP], int N, int Pop, 
			 gsl_matrix *A,gsl_vector *Fits,gsl_vector *BestSol, double *BestLik, int KK,
			 gsl_matrix *MK, gsl_vector *Gcount){
  int i;
  double SumFit, Fit;
  SumFit=0.0;
  // Assign Fitness to each vector in G1
  for (i=0;i<Pop;i++){
    // reorder the matrix
    Fit=ClimbHill(A,N,MK,KK,G1[i],Gcount);
    SumFit+=Fit;
    gsl_vector_set(Fits,i,Fit);
    if (Fit>*BestLik){
      *BestLik=Fit;
      gsl_vector_memcpy(BestSol,G1[i]);
    }
  }
  // Now Reproduce
  double RandN,TmpSum;
  int k,j;
  for (i=0;i<Pop;i++){
    RandN=gsl_rng_uniform(RND)*SumFit;
    TmpSum=0.0;
    for (j=0;j<Pop;j++){
      TmpSum+=gsl_vector_get(Fits,j);
      if (TmpSum>=RandN) break;
    }
    Redeploy(G1[j],G2[i],N,KK,gsl_ran_binomial(RND,3./(double)N,N)); // number of mutations for redeploy. On average, 3 mutations away
  }
  // copy new generation
  for (k=0;k<Pop;k++){
    gsl_vector_memcpy(G1[k],G2[k]);
  }
  return 0;
}

int main (int argc, char *argv[]){
  // first argument: Number of species in the food web
  int N=atoi(argv[1]);
  // second argument: File containing food web
  char *InFile=argv[2];
  int KK=atoi(argv[3]);
  // fourth argument: Population size
  int Pop=atoi(argv[4]);
  // fifth argument: Number of generations
  int Gen=atoi(argv[5]);
  fprintf(stderr,"N=%d\nFile=%s\nGroups=%d\nPop=%d\nGen=%d\n",N,InFile,KK,Pop,Gen);
  int i,j,k;
  // Read the matrix and assign to A
  FILE * F;
  gsl_matrix * A=gsl_matrix_calloc(N,N);
  gsl_matrix * MK=gsl_matrix_calloc(KK,KK);
  gsl_vector *Gcount=gsl_vector_calloc(KK);
  F=fopen(InFile,"rb");
  gsl_matrix_fscanf(F,A);
  fclose(F);
  // Create Population vectors
  gsl_vector *G1[MAXPOP];
  gsl_vector *G2[MAXPOP];
  TotLinks=0;
  for (i=0;i<N;i++){
    for (j=0;j<N;j++){
      if (gsl_matrix_get(A,i,j)>0) TotLinks++;
    }
  }
  gsl_matrix * Adj=gsl_matrix_calloc(TotLinks,2);
  k=0;
  for (i=0;i<N;i++){
    for (j=0;j<N;j++){
      if (gsl_matrix_get(A,i,j)>0) {
	gsl_matrix_set(Adj,k,0,i);
	gsl_matrix_set(Adj,k,1,j);
	k++;
      }
    }
  }

  // Create a vector for best solution
  gsl_vector *BestSol=gsl_vector_calloc(N);
  double BestLik=-1.0;
  // Create a vector for Fitness values
  gsl_vector *Fitness=gsl_vector_calloc(Pop);
  // Initialize Random Number Generator
  i=InitializeRandom();
  // Initialize Populations
  i=InitializePop(G1,G2,N,Pop,KK);
  // Now run the GA
  for(i=0;i<Gen;i++){
    // Assign Fitnesses and Reproduce
    fprintf(stderr,"-");
    GA_Generation(G1,G2,N,Pop,Adj,Fitness,BestSol,&BestLik,KK,MK,Gcount);
    //fprintf(stderr,"+");
    if (i%10==0)
      fprintf(stderr,"Gen %d\tMinAIC %f\t MinAIC-Gen %f\tGroups %d\n",i,1./BestLik,1./gsl_vector_max(Fitness),KK);
  }
  // SAVE OUTPUT
  FILE * OU;
  char NameOU[1000];
  sprintf(NameOU,"Out-%s-%d-%f.txt",InFile,KK,1./BestLik);
  OU=fopen(NameOU,"w");
  //printf(OU,"%d\t%f\n",KK,1./BestLik);
  MyDblVecPrint(OU,BestSol,N);
  fclose(OU);

  // Free random number gen
  FreeRandom();
  // Free populations
  FreePop(G1,G2,N,Pop);
  // Free matrix A
  gsl_matrix_free(A);
  gsl_matrix_free(Adj);
  gsl_matrix_free(MK);
  gsl_vector_free(Fitness);
  gsl_vector_free(BestSol);
  gsl_vector_free(Gcount);
  return 0;
}
