{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} module Math.HiddenMarkovModel.Private where import qualified Math.HiddenMarkovModel.Distribution as Distr import qualified Math.HiddenMarkovModel.CSV as HMMCSV import Math.HiddenMarkovModel.Utility (SquareMatrix, diagonal) import qualified Numeric.LAPACK.Matrix as Matrix import qualified Numeric.LAPACK.Vector as Vector import qualified Numeric.LAPACK.Format as Format import Numeric.LAPACK.Matrix ((<#), (<#>), (#>)) import Numeric.LAPACK.Vector (Vector) import qualified Numeric.Netlib.Class as Class import Control.DeepSeq (NFData, rnf) import Control.Applicative ((<$>)) import Foreign.Storable (Storable) import qualified Data.Array.Comfort.Storable as StorableArray import qualified Data.Array.Comfort.Boxed as Array import qualified Data.Array.Comfort.Shape as Shape import qualified Data.NonEmpty.Class as NonEmptyC import qualified Data.NonEmpty as NonEmpty import qualified Data.Semigroup as Sg import qualified Data.List as List import Data.Traversable (Traversable, mapAccumL) import Data.Tuple.HT (mapPair, mapFst, mapSnd, swap) {- | A Hidden Markov model consists of a number of (hidden) states and a set of emissions. There is a vector for the initial probability of each state and a matrix containing the probability for switching from one state to another one. The 'distribution' field points to probability distributions that associate every state with emissions of different probability. Famous distribution instances are discrete and Gaussian distributions. See "Math.HiddenMarkovModel.Distribution" for details. The transition matrix is transposed with respect to popular HMM descriptions. But I think this is the natural orientation, because this way you can write \"transition matrix times probability column vector\". The type has two type parameters, although the one for the distribution would be enough. However, replacing @prob@ by @Distr.Probability distr@ would prohibit the derived Show and Read instances. -} data T distr sh prob = Cons { initial :: Vector sh prob, transition :: SquareMatrix sh prob, distribution :: distr } deriving (Show) instance (NFData distr, NFData sh, NFData prob, Storable prob) => NFData (T distr sh prob) where rnf (Cons initial_ transition_ distribution_) = rnf (initial_, transition_, distribution_) instance (Class.Real prob, Format.FormatArray sh, Format.Format distr) => Format.Format (T distr sh prob) where format fmt (Cons initial_ transition_ distribution_) = Format.format fmt (initial_, transition_, distribution_) emission :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission) => T distr sh prob -> emission -> Vector sh prob emission = Distr.emissionProb . distribution forward :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission, Traversable f) => T distr sh prob -> NonEmpty.T f emission -> prob forward hmm = Vector.sum . NonEmpty.last . alpha hmm alpha :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission, Traversable f) => T distr sh prob -> NonEmpty.T f emission -> NonEmpty.T f (Vector sh prob) alpha hmm (NonEmpty.Cons x xs) = NonEmpty.scanl (\alphai xi -> Vector.mul (emission hmm xi) (transition hmm #> alphai)) (Vector.mul (emission hmm x) (initial hmm)) xs backward :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission, Traversable f) => T distr sh prob -> NonEmpty.T f emission -> prob backward hmm (NonEmpty.Cons x xs) = Vector.sum $ Vector.mul (initial hmm) $ Vector.mul (emission hmm x) $ NonEmpty.head $ beta hmm xs beta :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission, Traversable f) => T distr sh prob -> f emission -> NonEmpty.T f (Vector sh prob) beta hmm = NonEmpty.scanr (\xi betai -> Vector.mul (emission hmm xi) betai <# transition hmm) (Vector.constant (StorableArray.shape $ initial hmm) 1) alphaBeta :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission, Traversable f) => T distr sh prob -> NonEmpty.T f emission -> (prob, NonEmpty.T f (Vector sh prob), NonEmpty.T f (Vector sh prob)) alphaBeta hmm xs = let alphas = alpha hmm xs betas = beta hmm $ NonEmpty.tail xs recipLikelihood = recip $ Vector.sum $ NonEmpty.last alphas in (recipLikelihood, alphas, betas) biscaleTransition :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob) => T distr sh prob -> Distr.Emission distr -> Vector sh prob -> Vector sh prob -> SquareMatrix sh prob biscaleTransition hmm x alpha0 beta1 = diagonal (Vector.mul (emission hmm x) beta1) <#> transition hmm <#> diagonal alpha0 xiFromAlphaBeta :: (Shape.C sh, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission) => T distr sh prob -> prob -> NonEmpty.T [] emission -> NonEmpty.T [] (Vector sh prob) -> NonEmpty.T [] (Vector sh prob) -> [SquareMatrix sh prob] xiFromAlphaBeta hmm recipLikelihood xs alphas betas = zipWith3 (\x alpha0 beta1 -> Vector.scale recipLikelihood $ biscaleTransition hmm x alpha0 beta1) (NonEmpty.tail xs) (NonEmpty.init alphas) (NonEmpty.tail betas) zetaFromXi :: (Shape.C sh, Eq sh, Class.Real prob) => [SquareMatrix sh prob] -> [Vector sh prob] zetaFromXi = map Matrix.columnSums zetaFromAlphaBeta :: (Shape.C sh, Eq sh, Class.Real prob) => prob -> NonEmpty.T [] (Vector sh prob) -> NonEmpty.T [] (Vector sh prob) -> NonEmpty.T [] (Vector sh prob) zetaFromAlphaBeta recipLikelihood alphas betas = fmap (Vector.scale recipLikelihood) $ NonEmptyC.zipWith Vector.mul alphas betas {- | In constrast to Math.HiddenMarkovModel.reveal this does not normalize the vector. This is slightly simpler but for long sequences the product of probabilities might be smaller than the smallest representable number. -} reveal :: (Shape.InvIndexed sh, Shape.Index sh ~ state, Eq sh, sh ~ Distr.StateShape distr, Distr.EmissionProb distr, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission, Traversable f) => T distr sh prob -> NonEmpty.T f emission -> NonEmpty.T f state reveal hmm (NonEmpty.Cons x xs) = fmap (Shape.revealIndex (StorableArray.shape $ initial hmm)) $ uncurry (NonEmpty.scanr (StorableArray.!)) $ mapFst (fst . Vector.argAbsMaximum . StorableArray.mapShape Shape.Deferred) $ mapAccumL (\alphai xi -> swap $ mapSnd (Vector.mul (emission hmm xi)) $ matrixMaxMul (transition hmm) alphai) (Vector.mul (emission hmm x) (initial hmm)) xs matrixMaxMul :: (Shape.Indexed sh, Eq sh, Shape.Index sh ~ ix, Class.Real a) => SquareMatrix sh a -> Vector sh a -> (Vector (Shape.Deferred sh) (Shape.DeferredIndex ix), Vector sh a) matrixMaxMul m v = let sh = StorableArray.shape v in mapPair (Vector.fromList (Shape.Deferred sh), Vector.fromList sh) $ unzip $ map (Vector.argAbsMaximum . StorableArray.mapShape Shape.Deferred . Vector.mul v) $ Matrix.toRows m {- | A trained model is a temporary form of a Hidden Markov model that we need during the training on multiple training sequences. It allows to collect knowledge over many sequences with 'mergeTrained', even with mixed supervised and unsupervised training. You finish the training by converting the trained model back to a plain modul using 'finishTraining'. You can create a trained model in three ways: * supervised training using an emission sequence with associated states, * unsupervised training using an emission sequence and an existing Hidden Markov Model, * derive it from state sequence patterns, cf. "Math.HiddenMarkovModel.Pattern". -} data Trained distr sh prob = Trained { trainedInitial :: Vector sh prob, trainedTransition :: SquareMatrix sh prob, trainedDistribution :: distr } deriving (Show) instance (NFData distr, NFData sh, NFData prob, Storable prob) => NFData (Trained distr sh prob) where rnf hmm = rnf (trainedInitial hmm, trainedTransition hmm, trainedDistribution hmm) sumTransitions :: (Shape.C sh, Eq sh, Class.Real e) => T distr sh e -> [SquareMatrix sh e] -> SquareMatrix sh e sumTransitions hmm = List.foldl' Vector.add (Vector.constant (StorableArray.shape $ transition hmm) 0) {- | Baum-Welch algorithm -} trainUnsupervised :: (Distr.Estimate tdistr distr, Distr.StateShape distr ~ sh, Eq sh, Distr.Probability distr ~ prob, Distr.Emission distr ~ emission) => T distr sh prob -> NonEmpty.T [] emission -> Trained tdistr sh prob trainUnsupervised hmm xs = let (recipLikelihood, alphas, betas) = alphaBeta hmm xs zetas = zetaFromAlphaBeta recipLikelihood alphas betas zeta0 = NonEmpty.head zetas in Trained { trainedInitial = zeta0, trainedTransition = sumTransitions hmm $ xiFromAlphaBeta hmm recipLikelihood xs alphas betas, trainedDistribution = Distr.accumulateEmissions $ Array.fromList (StorableArray.shape zeta0) $ map (zip (NonEmpty.flatten xs)) $ List.transpose $ map Vector.toList $ NonEmpty.flatten zetas } mergeTrained :: (Shape.C sh, Eq sh, Distr.Estimate tdistr distr, Distr.Probability distr ~ prob) => Trained tdistr sh prob -> Trained tdistr sh prob -> Trained tdistr sh prob mergeTrained hmm0 hmm1 = Trained { trainedInitial = Vector.add (trainedInitial hmm0) (trainedInitial hmm1), trainedTransition = Vector.add (trainedTransition hmm0) (trainedTransition hmm1), trainedDistribution = Distr.combine (trainedDistribution hmm0) (trainedDistribution hmm1) } instance (Shape.C sh, Eq sh, Distr.Estimate tdistr distr, Distr.Probability distr ~ prob) => Sg.Semigroup (Trained tdistr sh prob) where (<>) = mergeTrained toCells :: (Distr.ToCSV distr, Shape.Indexed sh, Class.Real prob, Show prob) => T distr sh prob -> [[String]] toCells hmm = (HMMCSV.cellsFromVector $ initial hmm) : (HMMCSV.cellsFromSquare $ transition hmm) ++ [] : (Distr.toCells $ distribution hmm) parseCSV :: (Distr.FromCSV distr, Distr.StateShape distr ~ stateSh, Shape.C stateSh, Class.Real prob, Read prob) => (Int -> stateSh) -> HMMCSV.CSVParser (T distr stateSh prob) parseCSV makeShape = do v <- StorableArray.mapShape (makeShape . Shape.zeroBasedSize) <$> HMMCSV.parseNonEmptyVectorCells let sh = StorableArray.shape v m <- HMMCSV.parseSquareMatrixCells sh HMMCSV.skipEmptyRow distr <- Distr.parseCells sh return $ Cons { initial = v, transition = m, distribution = distr }