{-# LANGUAGE RebindableSyntax #-} module HiddenMarkovModel where import qualified LabelChain import qualified Label import qualified Named import qualified Math.HiddenMarkovModel.Named as HMMNamed import qualified Math.HiddenMarkovModel as HMM import qualified Numeric.LAPACK.Matrix as Matrix import qualified Numeric.LAPACK.Vector as Vector import Numeric.LAPACK.Matrix (ZeroInt) import Numeric.LAPACK.Vector (Vector) import qualified Data.Array.Comfort.Storable as ComfortArray import qualified Data.Array.Comfort.Boxed as Array import qualified Data.Array.Comfort.Shape as Shape import Data.Array.Comfort.Boxed (Array, (!)) import qualified Data.StorableVector.Lazy as SVL import Foreign.Storable (Storable) import Text.Printf (printf, ) import qualified Options.Applicative as OP import qualified System.Path.PartClass as PathClass import qualified System.Path as Path import qualified Control.Monad.Exception.Synchronous as ME import qualified Control.Parallel.Strategies as Par import qualified Control.DeepSeq as DeepSeq import qualified Data.NonEmpty.Class as NonEmptyC import qualified Data.NonEmpty as NonEmpty import qualified Data.Monoid.HT as Mn import qualified Data.List.HT as ListHT import qualified Data.List as List import qualified Data.Map as Map; import Data.Map (Map) import qualified Data.Set as Set; import Data.Set (Set) import Data.Traversable (Traversable) import Data.Foldable (foldMap) import Data.Monoid ((<>)) import Data.NonEmpty ((!:)) import Data.Tuple.HT (swap) import Data.Ix (Ix) import NumericPrelude.Numeric import NumericPrelude.Base newtype State = State Int deriving (Eq, Ord, Ix, Show) instance Enum State where fromEnum (State k) = k toEnum = State instance DeepSeq.NFData State where rnf (State k) = DeepSeq.rnf k state :: Int -> State state = State statesShape :: Int -> ShapeState statesShape n = Shape.Range (state 0) (state (n-1)) type ShapeInt = Shape.ZeroBased Int type ShapeState = Shape.Range State type Gaussian = HMM.Gaussian ShapeInt ShapeState Double type GaussianTrained = HMM.GaussianTrained ShapeInt ShapeState Double type NamedGaussian = HMMNamed.Gaussian ShapeInt ShapeState Double allStates :: [String] allStates = List.sort [Label.clickBegin, Label.clickEnd, Label.chirpingMain, Label.chirpingPause, Label.growling, Label.pause] admissibleTransitions :: [(String, [String])] admissibleTransitions = (Label.pause, [Label.pause, Label.chirpingMain, Label.clickBegin, Label.growlingClickBegin]) : (Label.clickBegin, [Label.clickBegin, Label.clickEnd]) : (Label.clickEnd, [Label.clickBegin, Label.clickEnd, Label.chirpingMain, Label.growlingClickBegin, Label.pause]) : (Label.chirpingMain, [Label.chirpingMain, Label.chirpingPause]) : (Label.chirpingPause, [Label.chirpingMain, Label.chirpingPause, Label.clickBegin, Label.growlingClickBegin, Label.pause]) : (Label.growlingClickBegin, [Label.growlingClickBegin, Label.growlingClickEnd]) : (Label.growlingClickEnd, [Label.growlingClickBegin, Label.growlingClickEnd, Label.chirpingMain, Label.clickBegin, Label.pause]) : [] admissibleTransitionSet :: Set (String, String) admissibleTransitionSet = foldMap (\(from, tos) -> Set.fromList $ map ((,) from) tos) admissibleTransitions forbiddenTransitions :: Set (String, String) -> Array ShapeState String -> GaussianTrained -> Set (String, String) forbiddenTransitions admissible dict = flip Set.difference admissible . foldMap (foldMap (\(row, (col, x)) -> Mn.when (x > 0) $ Set.singleton (dict ! state col, dict ! state row))) . zipWith (\k -> map ((,) k) . zip [0..]) [0..] . map Vector.toList . Matrix.toRows . HMM.trainedTransition -- cf. Math.HiddenMarkovModel.Named.inverseMap inverseMap :: Array ShapeState String -> Map String State inverseMap = Map.fromListWith (error "duplicate label") . map swap . Array.toAssociations checkedLookup :: (Ord k, Show k) => Map k a -> k -> a checkedLookup m k = Map.findWithDefault (error $ "checkedLookup: unknown key " ++ show k) k m mapsFromLabels :: [String] -> (Map String State, Array ShapeState String) mapsFromLabels ss = let m = Array.fromList (statesShape $ length ss) ss in (inverseMap m, m) checkNonEmpty :: (PathClass.AbsRel ar) => Path.File ar -> Named.Signal -> ME.Exceptional String Named.NonEmptySignal checkNonEmpty path (Named.Cons name sig) = case SVL.viewL sig of Nothing -> ME.throw $ printf "%s: %s: empty feature signal" (Path.toString path) name Just (x,xs) -> return $ Named.Cons name $ x !: xs flattenStorableVectorLazy :: (Storable a) => NonEmpty.T SVL.Vector a -> SVL.Vector a flattenStorableVectorLazy (NonEmpty.Cons x xs) = SVL.cons x xs prepare :: [Named.NonEmptySignal] -> NonEmpty.T [] (Vector ZeroInt Double) prepare nxs = let xs = map Named.body nxs vecFromList = ComfortArray.map realToFrac . Vector.autoFromList in (vecFromList $ map NonEmpty.head xs) !: (map vecFromList $ List.transpose $ map (SVL.unpack . NonEmpty.tail) xs) label :: Gaussian -> [Named.NonEmptySignal] -> [State] label model = NonEmpty.flatten . HMM.reveal model . prepare analyze :: NamedGaussian -> [Named.NonEmptySignal] -> LabelChain.T Int String analyze model = fmap (HMMNamed.nameFromStateMap model !) . LabelChain.segment . label (HMMNamed.model model) flattenIntervals :: Map String State -> LabelChain.T Int String -> [State] flattenIntervals dict = LabelChain.flattenLabels . fmap (checkedLookup dict) trainSupervised :: (PathClass.AbsRel ar) => Map String State -> Path.File ar -> [Named.NonEmptySignal] -> LabelChain.T Int String -> ME.Exceptional String GaussianTrained trainSupervised dict input sig labels = do labelSig <- ME.fromMaybe (printf "%s: no labels for supervised training" $ Path.toString input) $ NonEmpty.fetch $ flattenIntervals dict labels return $ HMM.trainSupervised (statesShape $ Map.size dict) $ NonEmptyC.zip labelSig (prepare sig) trainMany :: (Traversable f) => (trainingData -> GaussianTrained) -> NonEmpty.T f trainingData -> Gaussian trainMany train = HMM.finishTraining . NonEmpty.foldl1 HMM.mergeTrained . Par.withStrategy (Par.parTraversable Par.rdeepseq) . fmap train data Convergence = Convergence { cvgMaxIter, cvgSubIter :: Int, cvgTolerance :: Double } convergenceOptions :: OP.Parser Convergence convergenceOptions = OP.liftA3 Convergence (OP.option OP.auto $ OP.value 100 <> OP.long "max-iterations" <> OP.metavar "NUMBER" <> OP.help "maximal number of iterations for unsupervised training") (OP.option OP.auto $ OP.value 10 <> OP.long "sub-iterations" <> OP.metavar "NUMBER" <> OP.help "number of sub-iterations per iteration") (OP.option OP.auto $ OP.value 1e-5 <> OP.long "tolerance" <> OP.metavar "PROB" <> OP.help "convergence tolerance for unsupervised training") takeUntilConvergence :: Convergence -> [Gaussian] -> [Gaussian] takeUntilConvergence opt = (\(hmm:hmms) -> (hmm :) $ map snd . take (cvgMaxIter opt) . takeWhile fst $ ListHT.mapAdjacent (\hmm0 hmm1 -> (HMM.deviation hmm0 hmm1 > cvgTolerance opt, hmm1)) hmms) . ListHT.sieve (cvgSubIter opt)