module Data.CRF.Chain1.Dataset.Codec
( Codec
, CodecM
, encodeWord'Cu
, encodeWord'Cn
, encodeSent'Cu
, encodeSent'Cn
, encodeSent
, encodeWordL'Cu
, encodeWordL'Cn
, encodeSentL'Cu
, encodeSentL'Cn
, encodeSentL
, decodeLabel
, decodeLabels
, mkCodec
, encodeData
, encodeDataL
) where
import Control.Applicative ((<$>), (<*>), pure)
import Data.Maybe (catMaybes)
import Data.Lens.Common (fstLens, sndLens)
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Vector as V
import qualified Control.Monad.Codec as C
import Data.CRF.Chain1.Dataset.Internal
import Data.CRF.Chain1.Dataset.External
type Codec a b = (C.AtomCodec a, C.AtomCodec b)
type CodecM a b c = C.Codec (Codec a b) c
encodeWordL'Cu :: (Ord a, Ord b) => WordL a b -> CodecM a b (X, Y)
encodeWordL'Cu word = do
x <- mkX . map Ob <$>
mapM (C.encode' fstLens) (S.toList $ fst word)
y <- mkY <$> sequence
[ (,) <$> (Lb <$> C.encode sndLens lb) <*> pure pr
| (lb, pr) <- (M.toList . unDist) (snd word) ]
return (x, y)
encodeWordL'Cn :: (Ord a, Ord b) => Int -> WordL a b -> CodecM a b (X, Y)
encodeWordL'Cn i word = do
x <- mkX . map Ob . catMaybes <$>
mapM (C.maybeEncode fstLens) (S.toList $ fst word)
y <- mkY <$> sequence
[ (,) <$> encodeL i lb <*> pure pr
| (lb, pr) <- (M.toList . unDist) (snd word) ]
return (x, y)
where
encodeL j y = Lb . maybe j id <$> C.maybeEncode sndLens y
encodeWord'Cu :: Ord a => Word a -> CodecM a b X
encodeWord'Cu word =
mkX . map Ob <$> mapM (C.encode' fstLens) (S.toList word)
encodeWord'Cn :: Ord a => Word a -> CodecM a b X
encodeWord'Cn word =
mkX . map Ob . catMaybes <$> mapM (C.maybeEncode fstLens) (S.toList word)
encodeSentL'Cu :: (Ord a, Ord b) => SentL a b -> CodecM a b (Xs, Ys)
encodeSentL'Cu sent = do
ps <- mapM encodeWordL'Cu sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL'Cn :: (Ord a, Ord b) => b -> SentL a b -> CodecM a b (Xs, Ys)
encodeSentL'Cn def sent = do
i <- C.maybeEncode sndLens def >>= \mi -> case mi of
Just _i -> return _i
Nothing -> error "encodeWordL'Cn: default label not in the codec"
ps <- mapM (encodeWordL'Cn i) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL :: (Ord a, Ord b) => b -> Codec a b -> SentL a b -> (Xs, Ys)
encodeSentL def codec = C.evalCodec codec . encodeSentL'Cn def
encodeSent'Cu :: Ord a => Sent a -> CodecM a b Xs
encodeSent'Cu = fmap V.fromList . mapM encodeWord'Cu
encodeSent'Cn :: Ord a => Sent a -> CodecM a b Xs
encodeSent'Cn = fmap V.fromList . mapM encodeWord'Cn
encodeSent :: Ord a => Codec a b -> Sent a -> Xs
encodeSent codec = C.evalCodec codec . encodeSent'Cn
mkCodec :: (Ord a, Ord b) => [SentL a b] -> (Codec a b, [(Xs, Ys)])
mkCodec =
let swap (x, y) = (y, x)
in swap . C.runCodec (C.empty, C.empty) . mapM encodeSentL'Cu
encodeDataL :: (Ord a, Ord b) => b -> Codec a b -> [SentL a b] -> [(Xs, Ys)]
encodeDataL def codec = C.evalCodec codec . mapM (encodeSentL'Cn def)
encodeData :: Ord a => Codec a b -> [Sent a] -> [Xs]
encodeData codec = map (encodeSent codec)
decodeLabel :: Ord b => Codec a b -> Lb -> b
decodeLabel codec x = C.evalCodec codec $ C.decode sndLens (unLb x)
decodeLabels :: Ord b => Codec a b -> [Lb] -> [b]
decodeLabels codec xs = C.evalCodec codec $
sequence [C.decode sndLens (unLb x) | x <- xs]