module Data.CRF.Chain2.Generic.Codec
( CodecM
, Codec (..)
, encodeWord'Cu
, encodeWord'Cn
, encodeSent'Cu
, encodeSent'Cn
, encodeSent
, encodeWordL'Cu
, encodeWordL'Cn
, encodeSentL'Cu
, encodeSentL'Cn
, encodeSentL
, decodeLabel
, decodeLabels
, unJust
, mkCodec
, encodeData
, encodeDataL
) where
import Control.Applicative (pure, (<$>), (<*>))
import Data.Maybe (catMaybes)
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Vector as V
import qualified Control.Monad.Codec as C
import Data.CRF.Chain2.Generic.Internal
import Data.CRF.Chain2.Generic.External
type CodecM c a = C.Codec c a
data Codec a b c o e = Codec {
empty :: c
, encodeObU :: a -> CodecM c o
, encodeObN :: a -> CodecM c (Maybe o)
, encodeLbU :: b -> CodecM c e
, encodeLbN :: b -> CodecM c e
, decodeLbC :: e -> CodecM c (Maybe b)
, hasLabel :: c -> b -> Bool }
encodeWordL'Cu
:: (Ord e, Ord o) => Codec a b c o e
-> WordL a b -> CodecM c (X o e, Y e)
encodeWordL'Cu Codec{..} (word, choice) = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbU lb <*> pure pr
| (lb, pr) <- (M.toList . unDist) choice ]
return (x, y)
encodeWordL'Cn
:: (Ord e, Ord o) => Codec a b c o e
-> WordL a b -> CodecM c (X o e, Y e)
encodeWordL'Cn Codec{..} (word, choice) = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbN lb <*> pure pr
| (lb, pr) <- (M.toList . unDist) choice ]
return (x, y)
encodeWord'Cu
:: (Ord e, Ord o) => Codec a b c o e
-> Word a b -> CodecM c (X o e)
encodeWord'Cu Codec{..} word = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
return $ mkX x' r'
encodeWord'Cn
:: (Ord e, Ord o) => Codec a b c o e
-> Word a b -> CodecM c (X o e)
encodeWord'Cn Codec{..} word = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
return $ mkX x' r'
encodeSentL'Cu
:: (Ord e, Ord o) => Codec a b c o e
-> SentL a b -> CodecM c (Xs o e, Ys e)
encodeSentL'Cu cdc sent = do
ps <- mapM (encodeWordL'Cu cdc) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL'Cn
:: (Ord e, Ord o) => Codec a b c o e
-> SentL a b -> CodecM c (Xs o e, Ys e)
encodeSentL'Cn cdc sent = do
ps <- mapM (encodeWordL'Cn cdc) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL
:: (Ord e, Ord o) => Codec a b c o e
-> c -> SentL a b -> (Xs o e, Ys e)
encodeSentL cdc cdcData = C.evalCodec cdcData . encodeSentL'Cn cdc
encodeSent'Cu
:: (Ord e, Ord o) => Codec a b c o e
-> Sent a b -> CodecM c (Xs o e)
encodeSent'Cu cdc = fmap V.fromList . mapM (encodeWord'Cu cdc)
encodeSent'Cn
:: (Ord e, Ord o) => Codec a b c o e
-> Sent a b -> CodecM c (Xs o e)
encodeSent'Cn cdc = fmap V.fromList . mapM (encodeWord'Cn cdc)
encodeSent
:: (Ord e, Ord o) => Codec a b c o e
-> c -> Sent a b -> Xs o e
encodeSent cdc cdcData = C.evalCodec cdcData . encodeSent'Cn cdc
mkCodec
:: (Ord e, Ord o) => Codec a b c o e
-> [SentL a b] -> (c, [(Xs o e, Ys e)])
mkCodec cdc
= swap
. C.runCodec (empty cdc)
. mapM (encodeSentL'Cu cdc)
where
swap (x, y) = (y, x)
encodeDataL
:: (Ord e, Ord o) => Codec a b c o e
-> c -> [SentL a b] -> [(Xs o e, Ys e)]
encodeDataL cdc cdcData = C.evalCodec cdcData . mapM (encodeSentL'Cn cdc)
encodeData
:: (Ord e, Ord o) => Codec a b c o e
-> c -> [Sent a b] -> [Xs o e]
encodeData cdc cdcData = map (encodeSent cdc cdcData)
decodeLabel :: Codec a b c o e -> c -> e -> Maybe b
decodeLabel cdc cdcData = C.evalCodec cdcData . decodeLbC cdc
decodeLabels :: Codec a b c o e -> c -> [e] -> [Maybe b]
decodeLabels cdc cdcData = C.evalCodec cdcData . mapM (decodeLbC cdc)
unJust :: Codec a b c o e -> c -> Word a b -> Maybe b -> b
unJust _ _ _ (Just x) = x
unJust cdc cdcData word Nothing = case allUnk of
(x:_) -> x
[] -> error "unJust: Nothing and all values known"
where
allUnk = filter (not . hasLabel cdc cdcData) (S.toList $ lbs word)