module Data.CRF.Chain1.Constrained.DAG.Dataset.Codec ( module Data.CRF.Chain1.Constrained.Dataset.Codec , Xs , XYs , encodeSent'Cu , encodeSent'Cn , encodeSent , encodeSentL'Cu , encodeSentL'Cn , encodeSentL , encodeData , encodeDataL , mkCodec ) where import Prelude hiding (Word) -- import qualified Data.Foldable as F import qualified Data.Traversable as T import Data.DAG (DAG) -- import Data.CRF.Chain1.Constrained.DAG.Dataset.Internal import qualified Data.CRF.Chain1.Constrained.Dataset.Internal as I import Data.CRF.Chain1.Constrained.DAG.Dataset.External import qualified Data.CRF.Chain1.Constrained.Dataset.Codec as C import Data.CRF.Chain1.Constrained.Dataset.Codec hiding (encodeSent'Cu, encodeSent'Cn, encodeSent, encodeSentL'Cu, encodeSentL'Cn, encodeSentL, encodeData, encodeDataL, mkCodec) import Control.Monad.Codec (evalCodec, execCodec) -- | Utility types. type Xs = DAG () I.X -- type Ys = DAG () I.Y type XYs = DAG () (I.X, I.Y) ------------------------------------- -- Normal sentences ------------------------------------- -- | Encode the sentence and update the codec. encodeSent'Cu :: (Ord a, Ord b) => Sent a b -> C.CodecM a b Xs encodeSent'Cu = T.mapM C.encodeWord'Cu -- | Encode the sentence and do *not* update the codec. encodeSent'Cn :: (Ord a, Ord b) => Sent a b -> C.CodecM a b Xs encodeSent'Cn = T.mapM C.encodeWord'Cn -- | Encode the sentence using the given codec. encodeSent :: (Ord a, Ord b) => C.Codec a b -> Sent a b -> Xs encodeSent codec = evalCodec codec . encodeSent'Cn ------------------------------------- -- Labeled sentences ------------------------------------- -- | Encode the labeled sentence and update the codec. encodeSentL'Cu :: (Ord a, Ord b) => SentL a b -> C.CodecM a b XYs encodeSentL'Cu = T.mapM C.encodeWordL'Cu -- | Encode the labeled sentence and do *not* update the codec. Substitute the -- default label for any label not present in the codec. encodeSentL'Cn :: (Ord a, Ord b) => SentL a b -> C.CodecM a b XYs encodeSentL'Cn = T.mapM C.encodeWordL'Cn -- | Encode the labeled sentence with the given codec. Substitute the -- default label for any label not present in the codec. encodeSentL :: (Ord a, Ord b) => C.Codec a b -> SentL a b -> XYs encodeSentL codec = evalCodec codec . encodeSentL'Cn ------------------------------------- -- Datasets ------------------------------------- -- | Encode the labeled dataset using the codec. Substitute the default -- label for any label not present in the codec. encodeDataL :: (Ord a, Ord b) => C.Codec a b -> [SentL a b] -> [XYs] encodeDataL = map . encodeSentL -- | Encode the dataset with the codec. encodeData :: (Ord a, Ord b) => C.Codec a b -> [Sent a b] -> [Xs] encodeData = map . encodeSent ------------------------------------- -- Creation ------------------------------------- -- | Create codec on the basis of the labeled dataset. mkCodec :: (Ord a, Ord b) => [SentL a b] -> Codec a b mkCodec = execCodec empty . mapM_ encodeSentL'Cu