{-# LANGUAGE TypeFamilies #-}
module Math.HiddenMarkovModel.Named (
   T(..),
   Discrete,
   Gaussian,
   fromModelAndNames,
   toCSV,
   fromCSV,
   ) where

import qualified Math.HiddenMarkovModel.Distribution as Distr
import qualified Math.HiddenMarkovModel.Private as HMM
import qualified Math.HiddenMarkovModel.CSV as HMMCSV
import Math.HiddenMarkovModel.Utility (attachOnes, vectorDim)

import qualified Numeric.Netlib.Class as Class

import qualified Data.Array.Comfort.Storable as StorableArray
import qualified Data.Array.Comfort.Boxed as Array
import qualified Data.Array.Comfort.Shape as Shape
import Data.Array.Comfort.Boxed (Array)

import qualified Text.CSV.Lazy.String as CSV
import Text.Printf (printf)

import qualified Control.Monad.Exception.Synchronous as ME
import qualified Control.Monad.Trans.State as MS
import Control.DeepSeq (NFData, rnf)
import Foreign.Storable (Storable)

import qualified Data.Map as Map
import qualified Data.List as List
import Data.Tuple.HT (swap)
import Data.Map (Map)


{- |
A Hidden Markov Model with names for each state.

Although 'nameFromStateMap' and 'stateFromNameMap' are exported
you must be careful to keep them consistent when you alter them.
-}
data T typ sh ix prob =
   Cons {
      T typ sh ix prob -> T typ sh prob
model :: HMM.T typ sh prob,
      T typ sh ix prob -> Array sh String
nameFromStateMap :: Array sh String,
      T typ sh ix prob -> Map String ix
stateFromNameMap :: Map String ix
   }
   deriving (Int -> T typ sh ix prob -> ShowS
[T typ sh ix prob] -> ShowS
T typ sh ix prob -> String
(Int -> T typ sh ix prob -> ShowS)
-> (T typ sh ix prob -> String)
-> ([T typ sh ix prob] -> ShowS)
-> Show (T typ sh ix prob)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall typ sh ix prob.
(C sh, Storable prob, Show typ, Show sh, Show prob, Show ix) =>
Int -> T typ sh ix prob -> ShowS
forall typ sh ix prob.
(C sh, Storable prob, Show typ, Show sh, Show prob, Show ix) =>
[T typ sh ix prob] -> ShowS
forall typ sh ix prob.
(C sh, Storable prob, Show typ, Show sh, Show prob, Show ix) =>
T typ sh ix prob -> String
showList :: [T typ sh ix prob] -> ShowS
$cshowList :: forall typ sh ix prob.
(C sh, Storable prob, Show typ, Show sh, Show prob, Show ix) =>
[T typ sh ix prob] -> ShowS
show :: T typ sh ix prob -> String
$cshow :: forall typ sh ix prob.
(C sh, Storable prob, Show typ, Show sh, Show prob, Show ix) =>
T typ sh ix prob -> String
showsPrec :: Int -> T typ sh ix prob -> ShowS
$cshowsPrec :: forall typ sh ix prob.
(C sh, Storable prob, Show typ, Show sh, Show prob, Show ix) =>
Int -> T typ sh ix prob -> ShowS
Show)

type Simple typ sh prob = T typ sh (Shape.Index sh) prob
type Discrete symbol stateSh prob =
      Simple (Distr.Discrete symbol) stateSh prob
type Gaussian emiSh stateSh a =
      Simple (Distr.Gaussian emiSh) stateSh a


instance
   (Distr.NFData typ, NFData sh, NFData ix, NFData prob,
    Shape.C sh, Storable prob) =>
      NFData (T typ sh ix prob) where
   rnf :: T typ sh ix prob -> ()
rnf T typ sh ix prob
hmm = (T typ sh prob, Array sh String, Map String ix) -> ()
forall a. NFData a => a -> ()
rnf (T typ sh ix prob -> T typ sh prob
forall typ sh ix prob. T typ sh ix prob -> T typ sh prob
model T typ sh ix prob
hmm, T typ sh ix prob -> Array sh String
forall typ sh ix prob. T typ sh ix prob -> Array sh String
nameFromStateMap T typ sh ix prob
hmm, T typ sh ix prob -> Map String ix
forall typ sh ix prob. T typ sh ix prob -> Map String ix
stateFromNameMap T typ sh ix prob
hmm)


fromModelAndNames ::
   (Shape.Indexed sh) =>
   HMM.T typ sh prob -> [String] -> Simple typ sh prob
fromModelAndNames :: T typ sh prob -> [String] -> Simple typ sh prob
fromModelAndNames T typ sh prob
md [String]
names =
   let m :: Array sh String
m = sh -> [String] -> Array sh String
forall sh a. C sh => sh -> [a] -> Array sh a
Array.fromList (Array sh prob -> sh
forall sh a. Array sh a -> sh
StorableArray.shape (Array sh prob -> sh) -> Array sh prob -> sh
forall a b. (a -> b) -> a -> b
$ T typ sh prob -> Array sh prob
forall typ sh prob. T typ sh prob -> Vector sh prob
HMM.initial T typ sh prob
md) [String]
names
   in  Cons :: forall typ sh ix prob.
T typ sh prob
-> Array sh String -> Map String ix -> T typ sh ix prob
Cons {
          model :: T typ sh prob
model = T typ sh prob
md,
          nameFromStateMap :: Array sh String
nameFromStateMap = Array sh String
m,
          stateFromNameMap :: Map String (Index sh)
stateFromNameMap = Array sh String -> Map String (Index sh)
forall sh ix.
(Indexed sh, Index sh ~ ix) =>
Array sh String -> Map String ix
inverseMap Array sh String
m
       }

inverseMap ::
   (Shape.Indexed sh, Shape.Index sh ~ ix) => Array sh String -> Map String ix
inverseMap :: Array sh String -> Map String ix
inverseMap =
   (ix -> ix -> ix) -> [(String, ix)] -> Map String ix
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith (String -> ix -> ix -> ix
forall a. HasCallStack => String -> a
error String
"duplicate label") ([(String, ix)] -> Map String ix)
-> (Array sh String -> [(String, ix)])
-> Array sh String
-> Map String ix
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   ((ix, String) -> (String, ix)) -> [(ix, String)] -> [(String, ix)]
forall a b. (a -> b) -> [a] -> [b]
map (ix, String) -> (String, ix)
forall a b. (a, b) -> (b, a)
swap ([(ix, String)] -> [(String, ix)])
-> (Array sh String -> [(ix, String)])
-> Array sh String
-> [(String, ix)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Array sh String -> [(ix, String)]
forall sh a. Indexed sh => Array sh a -> [(Index sh, a)]
Array.toAssociations


toCSV ::
   (Distr.ToCSV typ, Shape.Indexed sh, Class.Real prob, Show prob) =>
   Simple typ sh prob -> String
toCSV :: Simple typ sh prob -> String
toCSV Simple typ sh prob
hmm =
   CSVTable -> String
CSV.ppCSVTable (CSVTable -> String) -> CSVTable -> String
forall a b. (a -> b) -> a -> b
$ ([CSVError], CSVTable) -> CSVTable
forall a b. (a, b) -> b
snd (([CSVError], CSVTable) -> CSVTable)
-> ([CSVError], CSVTable) -> CSVTable
forall a b. (a -> b) -> a -> b
$ [[String]] -> ([CSVError], CSVTable)
CSV.toCSVTable ([[String]] -> ([CSVError], CSVTable))
-> [[String]] -> ([CSVError], CSVTable)
forall a b. (a -> b) -> a -> b
$ String -> [[String]] -> [[String]]
forall a. a -> [[a]] -> [[a]]
HMMCSV.padTable String
"" ([[String]] -> [[String]]) -> [[String]] -> [[String]]
forall a b. (a -> b) -> a -> b
$
      Array sh String -> [String]
forall sh a. C sh => Array sh a -> [a]
Array.toList (Simple typ sh prob -> Array sh String
forall typ sh ix prob. T typ sh ix prob -> Array sh String
nameFromStateMap Simple typ sh prob
hmm) [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: T typ sh prob -> [[String]]
forall typ sh prob.
(ToCSV typ, Indexed sh, Real prob, Show prob) =>
T typ sh prob -> [[String]]
HMM.toCells (Simple typ sh prob -> T typ sh prob
forall typ sh ix prob. T typ sh ix prob -> T typ sh prob
model Simple typ sh prob
hmm)

fromCSV ::
   (Distr.FromCSV typ, Shape.Indexed stateSh, Eq stateSh,
    Class.Real prob, Read prob) =>
   (Int -> stateSh) ->
   String -> ME.Exceptional String (Simple typ stateSh prob)
fromCSV :: (Int -> stateSh)
-> String -> Exceptional String (Simple typ stateSh prob)
fromCSV Int -> stateSh
makeShape =
   StateT CSVResult (Exceptional String) (Simple typ stateSh prob)
-> CSVResult -> Exceptional String (Simple typ stateSh prob)
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
MS.evalStateT ((Int -> stateSh)
-> StateT CSVResult (Exceptional String) (Simple typ stateSh prob)
forall typ stateSh prob.
(FromCSV typ, Indexed stateSh, Eq stateSh, Real prob, Read prob) =>
(Int -> stateSh) -> CSVParser (Simple typ stateSh prob)
parseCSV Int -> stateSh
makeShape) (CSVResult -> Exceptional String (Simple typ stateSh prob))
-> (String -> CSVResult)
-> String
-> Exceptional String (Simple typ stateSh prob)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either [CSVError] CSVRow -> Either [CSVError] CSVRow)
-> CSVResult -> CSVResult
forall a b. (a -> b) -> [a] -> [b]
map Either [CSVError] CSVRow -> Either [CSVError] CSVRow
HMMCSV.fixShortRow (CSVResult -> CSVResult)
-> (String -> CSVResult) -> String -> CSVResult
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> CSVResult
CSV.parseCSV

parseCSV ::
   (Distr.FromCSV typ, Shape.Indexed stateSh, Eq stateSh,
    Class.Real prob, Read prob) =>
   (Int -> stateSh) -> HMMCSV.CSVParser (Simple typ stateSh prob)
parseCSV :: (Int -> stateSh) -> CSVParser (Simple typ stateSh prob)
parseCSV Int -> stateSh
makeShape = do
   [String]
names <- CSVRow -> CSVParser [String]
HMMCSV.parseStringList (CSVRow -> CSVParser [String])
-> StateT CSVResult (Exceptional String) CSVRow
-> CSVParser [String]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< StateT CSVResult (Exceptional String) CSVRow
HMMCSV.getRow
   let duplicateNames :: [String]
duplicateNames =
         Map String Int -> [String]
forall k a. Map k a -> [k]
Map.keys (Map String Int -> [String]) -> Map String Int -> [String]
forall a b. (a -> b) -> a -> b
$ (Int -> Bool) -> Map String Int -> Map String Int
forall a k. (a -> Bool) -> Map k a -> Map k a
Map.filter (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> (Int
1::Int)) (Map String Int -> Map String Int)
-> Map String Int -> Map String Int
forall a b. (a -> b) -> a -> b
$
         (Int -> Int -> Int) -> [(String, Int)] -> Map String Int
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) ([(String, Int)] -> Map String Int)
-> [(String, Int)] -> Map String Int
forall a b. (a -> b) -> a -> b
$ [String] -> [(String, Int)]
forall b a. Num b => [a] -> [(a, b)]
attachOnes [String]
names
    in Bool -> String -> CSVParser ()
HMMCSV.assert ([String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
duplicateNames) (String -> CSVParser ()) -> String -> CSVParser ()
forall a b. (a -> b) -> a -> b
$
          String
"duplicate names: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
List.intercalate String
", " [String]
duplicateNames
   T typ stateSh prob
md <- (Int -> stateSh) -> CSVParser (T typ stateSh prob)
forall typ stateSh prob.
(FromCSV typ, C stateSh, Eq stateSh, Real prob, Read prob) =>
(Int -> stateSh) -> CSVParser (T typ stateSh prob)
HMM.parseCSV Int -> stateSh
makeShape
   let n :: Int
n = [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
names
       m :: Int
m = Vector stateSh prob -> Int
forall sh a. C sh => Vector sh a -> Int
vectorDim (T typ stateSh prob -> Vector stateSh prob
forall typ sh prob. T typ sh prob -> Vector sh prob
HMM.initial T typ stateSh prob
md)
    in Bool -> String -> CSVParser ()
HMMCSV.assert (Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
m) (String -> CSVParser ()) -> String -> CSVParser ()
forall a b. (a -> b) -> a -> b
$
          String -> Int -> Int -> String
forall r. PrintfType r => String -> r
printf String
"got %d state names for %d states" Int
n Int
m
   Simple typ stateSh prob -> CSVParser (Simple typ stateSh prob)
forall (m :: * -> *) a. Monad m => a -> m a
return (Simple typ stateSh prob -> CSVParser (Simple typ stateSh prob))
-> Simple typ stateSh prob -> CSVParser (Simple typ stateSh prob)
forall a b. (a -> b) -> a -> b
$ T typ stateSh prob -> [String] -> Simple typ stateSh prob
forall sh typ prob.
Indexed sh =>
T typ sh prob -> [String] -> Simple typ sh prob
fromModelAndNames T typ stateSh prob
md [String]
names