/*  HMM_combined   11 Aug 2008
I've written this program to run Baum-Welch algorithm for HMM,
based on the class notes for ECE 534. The alphas and betas
are normalized at each step to avoid really small
numbers.  The log likekihood is tracked.
For more than about 1000 observations there were
overflow problems, so in this version the alphas
and betas are renormaized after each step.  The log10
normalizations are saved so that the log10 likelihood
can still be reported.   -B. Hajek
    */


#include <stdio.h>
#include <math.h>

double pi[10],a[10][10],b[10][10];    /* Parameters of the HMM  */
double old;
int init_state(),next_state(),observation();
int print_parameter();
char input[100001]="0110100000000110010010000010100000001000001101111110001100000000010000111111111000001000100111001000";
int y[100001];
double alpha[10][100001], beta[10][100001], Gamma[10][100001], xi[10][10][100001];   /* Sorry--lower case gammma is a math function */
int N_s,N_o,T;

double drand48(void);

/*    MAIN PROGRAM  */

int main(void)  {
int i,j, l,t;
int iter,num_iter;
double numerator,denominator,temp;
double alpha_log_norm, beta_log_norm, factor;
T=100000;
printf("T=%i  data= ", T);


N_s=2;
N_o=2;

/* Following statement enters the observations from the string input[] */
/* for (t=1;t<=T;t++)  {sscanf(input+t-1, "%1i", &y[t]);   printf("%1i",y[t]); } */


/* The following enters observations by random generation, first the
values of the parameters pi, A, B are read in   */

printf("\n Parameters used to generate the observations:");
pi[0]=0.5;  pi[1]=0.5;

a[0][0]=0.8;   a[0][1]=0.2;
a[1][0]=0.4;   a[1][1]=0.6;

b[0][0]=0.8;   b[0][1]=0.2;
b[1][0]=0.3;   b[1][1]=0.7;

print_parameter();

i=init_state();
for (t=1;t<=T;t++)  {
	y[t]=observation(i);
	/* printf("%1i", i);     */
	/* printf("%i %i\n", i,y[t]); */
	i=next_state(i);
}


printf("\nFirst min(T,100) observations:");
for (t=1;(t<=100)&(t<=T);t++) printf("%1i",y[t]);
printf("\n");


/*  Now we are ready to run the EM algorithm.   We enter
the intial parmater values (which may not be the same
as the parmeters used to generated the observations  */

printf("\n Initial value of parameters for EM algorithm:");
pi[0]=0.5;  pi[1]=0.5;

a[0][0]=0.8;   a[0][1]=0.2;
a[1][0]=0.1;   a[1][1]=0.9;

b[0][0]=0.6;   b[0][1]=0.4;
b[1][0]=0.4;   b[1][1]=0.6;

/* a[0][0]=0.8;   a[0][1]=0.2;
a[1][0]=0.4;   a[1][1]=0.6;

b[0][0]=0.9;   b[0][1]=0.1;
b[1][0]=0.1;   b[1][1]=0.9;  */



num_iter=500;

for (iter=1;iter<=num_iter;iter++)  {
old=a[0][0];
print_parameter();


/* initialize alpha and beta  */
for (i=0;i<N_s;i++)  {
	alpha[i][1]=pi[i]*b[i][y[1]];
	beta[i][T]=1;
}
alpha_log_norm=0;
beta_log_norm=0;

/* Compute remaining alphas  */
for (t=1;t<T;t++)  {
	for (j=0;j<N_s;j++)	{
		alpha[j][t+1]=0;
		for (i=0;i<N_s;i++)  alpha[j][t+1] +=  alpha[i][t]*a[i][j]*b[j][y[t+1]];
		/* printf("alpha[%2i][%2i]=%e  ",j,t+1,alpha[j][t+1]);  */
	}
	factor=0;
	for (j=0;j<N_s;j++)	factor+= alpha[j][t+1];
	for (j=0;j<N_s;j++)	alpha[j][t+1]=alpha[j][t+1]/factor;
	alpha_log_norm+=log10(factor);
		/* printf("\n"); */
}

/* Compute remaining betas  */
for (t=T;t>1;t--)  {
	for (i=0;i<N_s; i++)	{
		beta[i][t-1]=0;
		for (j=0;j<N_s;j++)  beta[i][t-1] +=  a[i][j]*b[j][y[t]]*beta[j][t];
		/*printf("beta[%2i][%2i]=%e  ",i,t+1,beta[i][t-1]); */
	}
	factor=0;
	for (i=0;i<N_s;i++)	factor+= beta[i][t-1];
	for (i=0;i<N_s;i++)	beta[i][t-1]=beta[i][t-1]/factor;
	beta_log_norm+=log10(factor);
		/* printf("\n"); */
}
/* Compute Gammas */
for (t=1;t<=T;t++)  {
	denominator=0;
	for (j=0; j<N_s; j++)  denominator+= alpha[j][t]*beta[j][t];
	  /* printf("denominator=%e \n",denominator); */
	for (i=0; i<N_s; i++) Gamma[i][t]=  alpha[i][t]*beta[i][t]/denominator;
}

/* Compute likelihood of observed sequence for given theta.  */ 
/*printf("\nFOLLOWING WORKS IF NO NORMALIZATION\n");
for (t=1; t<T;t++)  for (i=0;i<N_s;i++)  printf("prod=%e\n",  alpha[i][t]*beta[i][t]/Gamma[i][t]); */
temp=0;
for (i=0; i<N_s; i++) temp+=beta[i][1]*alpha[i][1];   /* Note: alpha's at t=1 are not scaled */
printf("\nLog likelihood=%f or %f or %f \n", log10(alpha[0][1]*beta[0][1]/Gamma[0][1])+beta_log_norm, alpha_log_norm, beta_log_norm+log10(temp));

/* Compute xi's  */
for (t=1;t<T;t++)  {
	denominator=0;
	for (i=0; i<N_s; i++)  for (j=0; j<N_s; j++)   denominator+=alpha[i][t]*a[i][j]*b[j][y[t+1]]*beta[j][t+1];
	for (i=0; i<N_s; i++)  for (j=0; j<N_s; j++)  xi[i][j][t]= alpha[i][t]*a[i][j]*b[j][y[t+1]]*beta[j][t+1]/denominator;
}

/*  Following prints the values of observation sequence, Gamma, and xi's 
for (t=1;t<=T; t++)  printf("%1i   %6.4f  %6.4f     %6.4f   %6.4f\n                       %6.4f   %6.4f\n", y[t],Gamma[0][t],Gamma[1][t],xi[0][0][t],xi[0][1][t],xi[1][0][t],xi[1][1][t]);  */

/*  Now that Gamma and Xi is computed, compute next theta */


printf("Update pi, A, B per EM algorithm *******************************");
for (i=0;i<N_s;i++)  pi[i]=Gamma[i][1];

for (i=0;i<N_s;i++)  {
	denominator=0;
	for (t=1;t<T;t++)  denominator+=Gamma[i][t];  
	for (j=0;j<N_s;j++) {
		numerator=0;
		for (t=1;t<T;t++) numerator+=xi[i][j][t];
		a[i][j]=numerator/denominator; 
	}   /* eo for j */
}  /* eo for i */

for (i=0;i<N_s;i++)  {
	denominator=0;
	for (l=0; l<N_o; l++)  b[i][l]=0;   /* will compute b[i][.] on fly, then normalize at end */
	for (t=1;t<=T;t++)  {
		denominator+= Gamma[i][t];
		b[i][y[t]] += Gamma[i][t];
	}  /* eo for t */
	for (l=0;l<N_o;l++)  b[i][l] = b[i][l]/denominator;   /*normalize b[i][.] */
}  /* eo for i */


printf("\nCHANGE IN a[0][0] is %e \n", a[0][0]-old);

}  /* eo for iter */

return(0);
}   /* eo main */


/* print_parameter() prints the values of pi[],a[][], and b[][] */
int print_parameter()
{int i,j,l;
 printf("\npi:");
 for (i=0;i<N_s;i++)  printf(" %f  ",pi[i]);
 printf("\n A:");
 for (i=0;i<N_s;i++)  {
	for (j=0;j<N_s;j++)  printf(" %f  ",a[i][j]);
	printf("\n   ");
 }
 printf("\n B:");
 for (i=0;i<N_s;i++)  {
	for (l=0;l<N_o;l++)  printf(" %f  ",b[i][l]);
	printf("\n   ");
 }
 return(0);
}  /* eo print_parameter
 
 
/*  init_state() generates a (pseudo) random
initial state distributed according to pi[] */
int init_state()  
{int i;
 double u,sum;
 sum=pi[0];
 i=0;
 u=drand48();
 while (u > sum )  {
	i++;
	sum+=pi[i];
 }
 return(i);
}    /* eo init_state */

/*  next_state(i) for a state i returns a
(pseudo) random next state distributed
according to the ith row of A */
int next_state(i)
int i;  
{int j;
 double u,sum;
          /*   printf("Hello\n");  */
 sum=a[i][0];
 j=0;
 u=drand48();
 while (u > sum )  {
	j++;
	sum+=a[i][j];
 }
 return(j);
}    /* eo next_state */


/* observation(i) for a state i returns a
(pseudo) random observation distributed
according to the ith row of B */
int observation(i)
int i;  
{int l;
 double u,sum;
 sum=b[i][0];
 l=0;
 u=drand48();
 while (u > sum )  {
	l++;
	sum+=b[i][l];
 }
 return(l);
}    /* eo observation */