module Data.CRF.Chain1.Dataset.External
( Word
, Sent
, Dist (unDist)
, mkDist
, WordL
, annotate
, SentL
) where
import qualified Data.Set as S
import qualified Data.Map as M
type Word a = S.Set a
type Sent a = [Word a]
newtype Dist a = Dist { unDist :: M.Map a Double }
mkDist :: Ord a => [(a, Double)] -> Dist a
mkDist =
Dist . normalize . M.fromListWith (+)
where
normalize dist =
let z = sum (M.elems dist)
in fmap (/z) dist
type WordL a b = (Word a, Dist b)
annotate :: Word a -> b -> WordL a b
annotate w x = (w, Dist (M.singleton x 1))
type SentL a b = [WordL a b]