/* HMM_combined 11 Aug 2008 I've written this program to run Baum-Welch algorithm for HMM, based on the class notes for ECE 534. The alphas and betas are normalized at each step to avoid really small numbers. The log likekihood is tracked. For more than about 1000 observations there were overflow problems, so in this version the alphas and betas are renormaized after each step. The log10 normalizations are saved so that the log10 likelihood can still be reported. -B. Hajek */ #include <stdio.h> #include <math.h> double pi[10],a[10][10],b[10][10]; /* Parameters of the HMM */ double old; int init_state(),next_state(),observation(); int print_parameter(); char input[100001]="0110100000000110010010000010100000001000001101111110001100000000010000111111111000001000100111001000"; int y[100001]; double alpha[10][100001], beta[10][100001], Gamma[10][100001], xi[10][10][100001]; /* Sorry--lower case gammma is a math function */ int N_s,N_o,T; double drand48(void); /* MAIN PROGRAM */ int main(void) { int i,j, l,t; int iter,num_iter; double numerator,denominator,temp; double alpha_log_norm, beta_log_norm, factor; T=100000; printf("T=%i data= ", T); N_s=2; N_o=2; /* Following statement enters the observations from the string input[] */ /* for (t=1;t<=T;t++) {sscanf(input+t-1, "%1i", &y[t]); printf("%1i",y[t]); } */ /* The following enters observations by random generation, first the values of the parameters pi, A, B are read in */ printf("\n Parameters used to generate the observations:"); pi[0]=0.5; pi[1]=0.5; a[0][0]=0.8; a[0][1]=0.2; a[1][0]=0.4; a[1][1]=0.6; b[0][0]=0.8; b[0][1]=0.2; b[1][0]=0.3; b[1][1]=0.7; print_parameter(); i=init_state(); for (t=1;t<=T;t++) { y[t]=observation(i); /* printf("%1i", i); */ /* printf("%i %i\n", i,y[t]); */ i=next_state(i); } printf("\nFirst min(T,100) observations:"); for (t=1;(t<=100)&(t<=T);t++) printf("%1i",y[t]); printf("\n"); /* Now we are ready to run the EM algorithm. We enter the intial parmater values (which may not be the same as the parmeters used to generated the observations */ printf("\n Initial value of parameters for EM algorithm:"); pi[0]=0.5; pi[1]=0.5; a[0][0]=0.8; a[0][1]=0.2; a[1][0]=0.1; a[1][1]=0.9; b[0][0]=0.6; b[0][1]=0.4; b[1][0]=0.4; b[1][1]=0.6; /* a[0][0]=0.8; a[0][1]=0.2; a[1][0]=0.4; a[1][1]=0.6; b[0][0]=0.9; b[0][1]=0.1; b[1][0]=0.1; b[1][1]=0.9; */ num_iter=500; for (iter=1;iter<=num_iter;iter++) { old=a[0][0]; print_parameter(); /* initialize alpha and beta */ for (i=0;i<N_s;i++) { alpha[i][1]=pi[i]*b[i][y[1]]; beta[i][T]=1; } alpha_log_norm=0; beta_log_norm=0; /* Compute remaining alphas */ for (t=1;t<T;t++) { for (j=0;j<N_s;j++) { alpha[j][t+1]=0; for (i=0;i<N_s;i++) alpha[j][t+1] += alpha[i][t]*a[i][j]*b[j][y[t+1]]; /* printf("alpha[%2i][%2i]=%e ",j,t+1,alpha[j][t+1]); */ } factor=0; for (j=0;j<N_s;j++) factor+= alpha[j][t+1]; for (j=0;j<N_s;j++) alpha[j][t+1]=alpha[j][t+1]/factor; alpha_log_norm+=log10(factor); /* printf("\n"); */ } /* Compute remaining betas */ for (t=T;t>1;t--) { for (i=0;i<N_s; i++) { beta[i][t-1]=0; for (j=0;j<N_s;j++) beta[i][t-1] += a[i][j]*b[j][y[t]]*beta[j][t]; /*printf("beta[%2i][%2i]=%e ",i,t+1,beta[i][t-1]); */ } factor=0; for (i=0;i<N_s;i++) factor+= beta[i][t-1]; for (i=0;i<N_s;i++) beta[i][t-1]=beta[i][t-1]/factor; beta_log_norm+=log10(factor); /* printf("\n"); */ } /* Compute Gammas */ for (t=1;t<=T;t++) { denominator=0; for (j=0; j<N_s; j++) denominator+= alpha[j][t]*beta[j][t]; /* printf("denominator=%e \n",denominator); */ for (i=0; i<N_s; i++) Gamma[i][t]= alpha[i][t]*beta[i][t]/denominator; } /* Compute likelihood of observed sequence for given theta. */ /*printf("\nFOLLOWING WORKS IF NO NORMALIZATION\n"); for (t=1; t<T;t++) for (i=0;i<N_s;i++) printf("prod=%e\n", alpha[i][t]*beta[i][t]/Gamma[i][t]); */ temp=0; for (i=0; i<N_s; i++) temp+=beta[i][1]*alpha[i][1]; /* Note: alpha's at t=1 are not scaled */ printf("\nLog likelihood=%f or %f or %f \n", log10(alpha[0][1]*beta[0][1]/Gamma[0][1])+beta_log_norm, alpha_log_norm, beta_log_norm+log10(temp)); /* Compute xi's */ for (t=1;t<T;t++) { denominator=0; for (i=0; i<N_s; i++) for (j=0; j<N_s; j++) denominator+=alpha[i][t]*a[i][j]*b[j][y[t+1]]*beta[j][t+1]; for (i=0; i<N_s; i++) for (j=0; j<N_s; j++) xi[i][j][t]= alpha[i][t]*a[i][j]*b[j][y[t+1]]*beta[j][t+1]/denominator; } /* Following prints the values of observation sequence, Gamma, and xi's for (t=1;t<=T; t++) printf("%1i %6.4f %6.4f %6.4f %6.4f\n %6.4f %6.4f\n", y[t],Gamma[0][t],Gamma[1][t],xi[0][0][t],xi[0][1][t],xi[1][0][t],xi[1][1][t]); */ /* Now that Gamma and Xi is computed, compute next theta */ printf("Update pi, A, B per EM algorithm *******************************"); for (i=0;i<N_s;i++) pi[i]=Gamma[i][1]; for (i=0;i<N_s;i++) { denominator=0; for (t=1;t<T;t++) denominator+=Gamma[i][t]; for (j=0;j<N_s;j++) { numerator=0; for (t=1;t<T;t++) numerator+=xi[i][j][t]; a[i][j]=numerator/denominator; } /* eo for j */ } /* eo for i */ for (i=0;i<N_s;i++) { denominator=0; for (l=0; l<N_o; l++) b[i][l]=0; /* will compute b[i][.] on fly, then normalize at end */ for (t=1;t<=T;t++) { denominator+= Gamma[i][t]; b[i][y[t]] += Gamma[i][t]; } /* eo for t */ for (l=0;l<N_o;l++) b[i][l] = b[i][l]/denominator; /*normalize b[i][.] */ } /* eo for i */ printf("\nCHANGE IN a[0][0] is %e \n", a[0][0]-old); } /* eo for iter */ return(0); } /* eo main */ /* print_parameter() prints the values of pi[],a[][], and b[][] */ int print_parameter() {int i,j,l; printf("\npi:"); for (i=0;i<N_s;i++) printf(" %f ",pi[i]); printf("\n A:"); for (i=0;i<N_s;i++) { for (j=0;j<N_s;j++) printf(" %f ",a[i][j]); printf("\n "); } printf("\n B:"); for (i=0;i<N_s;i++) { for (l=0;l<N_o;l++) printf(" %f ",b[i][l]); printf("\n "); } return(0); } /* eo print_parameter /* init_state() generates a (pseudo) random initial state distributed according to pi[] */ int init_state() {int i; double u,sum; sum=pi[0]; i=0; u=drand48(); while (u > sum ) { i++; sum+=pi[i]; } return(i); } /* eo init_state */ /* next_state(i) for a state i returns a (pseudo) random next state distributed according to the ith row of A */ int next_state(i) int i; {int j; double u,sum; /* printf("Hello\n"); */ sum=a[i][0]; j=0; u=drand48(); while (u > sum ) { j++; sum+=a[i][j]; } return(j); } /* eo next_state */ /* observation(i) for a state i returns a (pseudo) random observation distributed according to the ith row of B */ int observation(i) int i; {int l; double u,sum; sum=b[i][0]; l=0; u=drand48(); while (u > sum ) { l++; sum+=b[i][l]; } return(l); } /* eo observation */