alpha <- function(seq, Pi, A, E){ a <- matrix(0, ncol = length(seq), nrow = length(pi)) K <- length(pi) for (k in 1:K){ a[k,1] = pi[k] * E[seq[1],k]; } a[,1] = a[, 1] / sum(a[,1]) for (t in 2:length(seq)){ # going through the length of the sequence for (k in 1:K){ # ready to fill each of cells of the t-th coloumn for (j in 1:K){ # look back to each of the K cells at t-1 a[k, t] = a[k, t] + a[j, t-1] * A[j, k] * E[seq[t], k] } } # normalize a a[,t] = a[,t] / sum(a[,t]) } return (a) } beta <- function(seq, A, E){ K <- dim(A)[1] T <- length(seq) b <- matrix(0, ncol = T, nrow = K) #init the last position first for (k in 1:K){ b[L] = A[k, 1] * E[seq[L], k] } # backward propagation for (t = (L-1): - 1: 1){ # go through the sequence for (k in 1:K){ # filling each cell at position t for (j in 1:K){ #look up each b value at poition t+1 b[k, t] = b[k, t] + b[j, t+1] * E[seq[t], k] * A[k, j] } } #normalize b[,t] = b[, t] / sum(b[,t]) } } HMMEM <- function (seqs, K, V){
maxIter = 100 # initially creat radom matrix to hold the parameters A = matrix (runif(K*K), ncol = K, nrow = K) for (k in 1:K){ A[k, ] = A[k, ] / sum(A[k,]) } E = matrix (runif(K * V), nrow = V, ncol = K) for (k in 1:K){ E[,k] = E[,k] / sum(E[,k]) } Pi = runif(k) Pi = Pi / sum(Pi) # creat matrices of same size of parameters to hold the EXPECTED sufficient statistics Pisuf = rep(0, K) Asuf = matrix(0, nrow = K, ncol = K) Esuf = matrix(0, nrow = V, ncol = K) # start EM iteration
for (iter in 1:maxIter){ # clear sufficiet statistics from previous iteration Asuf[,] = 0 Esuf[,] = 0 Pisuf = 0 # loop through data to cacluate the EXPECTED sufficient statistics for (n in 1:length(seqs)){ seq = seqs[n] a <- alpha(seqs, Pi, A, E) b <- beta(seqs, A, E) s = matrix(0, nrow = K, ncol = length(seqs)) # to fill in the posterior probability for the states along the path for (t in 1:lenth(seq)){ for (k in 1:K){ s[k,t] = a[k, t] * b[k, t] # note s is already normalized #now that we have the EXPECTED value for each state, collect the sufficient statics for E and Pi Esuf[seq[t], k] = Esuf[seq[t], k] + s[k,t] if ( t == 1) Pisuf[k] = Pisuf[k] + s[k, t] } # now let's collect sufficient statistics for A if (t > 1){ for (i in 1:K){ for (j in 1:K){ Asuf[i, j] = Asuf[i, j] + a[i, t-1] * E[seq[t], j] * b[j, t] * A[i, j] } } } }#end of loop through seq length } # end of iteration through seqs
for (k in 1:K){ A[k,] = Asuf[k,] / sum(Asuf[k,]) E[,k] = Esuf[,k] / sum(Esuf[,]) } Pi = Pi / sum(Pi)
} # end of EM iteration return (list(Pi, A, E)) }
