module Data.CRF.Chain1.Constrained.Dataset.Codec
( Codec
, CodecM
, obMax
, lbMax
, encodeWord'Cu
, encodeWord'Cn
, encodeSent'Cu
, encodeSent'Cn
, encodeSent
, encodeWordL'Cu
, encodeWordL'Cn
, encodeSentL'Cu
, encodeSentL'Cn
, encodeSentL
, encodeLabels
, decodeLabel
, decodeLabels
, mkCodec
, encodeData
, encodeDataL
, unJust
, unJusts
) where
import Control.Applicative ((<$>), (<*>), pure)
import Data.Maybe (catMaybes, fromJust)
import Data.Lens.Common (fstLens, sndLens)
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Vector as V
import qualified Control.Monad.Codec as C
import Data.CRF.Chain1.Constrained.Dataset.Internal
import Data.CRF.Chain1.Constrained.Dataset.External
type Codec a b = (C.AtomCodec a, C.AtomCodec (Maybe b))
obMax :: Codec a b -> Ob
obMax =
let idMax m = M.size m 1
in Ob . idMax . C.to . fst
lbMax :: Codec a b -> Lb
lbMax =
let idMax m = M.size m 1
in Lb . idMax . C.to . snd
empty :: Ord b => Codec a b
empty =
let withNo = C.execCodec C.empty (C.encode C.idLens Nothing)
in (C.empty, withNo)
type CodecM a b c = C.Codec (Codec a b) c
encodeObU :: Ord a => a -> CodecM a b Ob
encodeObU = fmap Ob . C.encode' fstLens
encodeObN :: Ord a => a -> CodecM a b (Maybe Ob)
encodeObN = fmap (fmap Ob) . C.maybeEncode fstLens
encodeLbU :: Ord b => b -> CodecM a b Lb
encodeLbU = fmap Lb . C.encode sndLens . Just
encodeLbN :: Ord b => b -> CodecM a b Lb
encodeLbN x = do
my <- C.maybeEncode sndLens (Just x)
Lb <$> ( case my of
Just y -> return y
Nothing -> fromJust <$> C.maybeEncode sndLens Nothing )
encodeWordL'Cu :: (Ord a, Ord b) => WordL a b -> CodecM a b (X, Y)
encodeWordL'Cu w = do
x' <- mapM encodeObU $ S.toList $ obs $ word w
r' <- mapM encodeLbU $ S.toList $ lbs $ word w
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbU lb <*> pure pr
| (lb, pr) <- (M.toList . unProb) (choice w) ]
return (x, y)
encodeWordL'Cn :: (Ord a, Ord b) => WordL a b -> CodecM a b (X, Y)
encodeWordL'Cn w = do
x' <- fmap catMaybes . mapM encodeObN . S.toList . obs $ word w
r' <- mapM encodeLbN . S.toList . lbs $ word w
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbN lb <*> pure pr
| (lb, pr) <- (M.toList . unProb) (choice w) ]
return (x, y)
encodeWord'Cu :: (Ord a, Ord b) => Word a b -> CodecM a b X
encodeWord'Cu word = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
return $ mkX x' r'
encodeWord'Cn :: (Ord a, Ord b) => Word a b -> CodecM a b X
encodeWord'Cn word = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
return $ mkX x' r'
encodeSentL'Cu :: (Ord a, Ord b) => SentL a b -> CodecM a b (Xs, Ys)
encodeSentL'Cu sent = do
ps <- mapM (encodeWordL'Cu) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL'Cn :: (Ord a, Ord b) => SentL a b -> CodecM a b (Xs, Ys)
encodeSentL'Cn sent = do
ps <- mapM (encodeWordL'Cn) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeLabels :: Ord b => Codec a b -> [b] -> AVec Lb
encodeLabels codec = fromList . C.evalCodec codec . mapM encodeLbN
encodeSentL :: (Ord a, Ord b) => Codec a b -> SentL a b -> (Xs, Ys)
encodeSentL codec = C.evalCodec codec . encodeSentL'Cn
encodeSent'Cu :: (Ord a, Ord b) => Sent a b -> CodecM a b Xs
encodeSent'Cu = fmap V.fromList . mapM encodeWord'Cu
encodeSent'Cn :: (Ord a, Ord b) => Sent a b -> CodecM a b Xs
encodeSent'Cn = fmap V.fromList . mapM encodeWord'Cn
encodeSent :: (Ord a, Ord b) => Codec a b -> Sent a b -> Xs
encodeSent codec = C.evalCodec codec . encodeSent'Cn
mkCodec :: (Ord a, Ord b) => [SentL a b] -> Codec a b
mkCodec = C.execCodec empty . mapM_ encodeSentL'Cu
encodeDataL :: (Ord a, Ord b) => Codec a b -> [SentL a b] -> [(Xs, Ys)]
encodeDataL = map . encodeSentL
encodeData :: (Ord a, Ord b) => Codec a b -> [Sent a b] -> [Xs]
encodeData = map . encodeSent
decodeLabel :: Ord b => Codec a b -> Lb -> Maybe b
decodeLabel codec x = C.evalCodec codec $ C.decode sndLens (unLb x)
decodeLabels :: Ord b => Codec a b -> [Lb] -> [Maybe b]
decodeLabels codec xs = C.evalCodec codec $
sequence [C.decode sndLens (unLb x) | x <- xs]
hasLabel :: Ord b => Codec a b -> b -> Bool
hasLabel codec x = M.member (Just x) (C.to $ snd codec)
unJust :: Ord b => Codec a b -> Word a b -> Maybe b -> b
unJust _ _ (Just x) = x
unJust codec word Nothing = case allUnk of
(x:_) -> x
[] -> error "unJust: Nothing and all values known"
where
allUnk = filter (not . hasLabel codec) (S.toList $ lbs word)
unJusts :: Ord b => Codec a b -> Word a b -> [Maybe b] -> [b]
unJusts codec word xs =
concatMap deJust xs
where
allUnk = filter (not . hasLabel codec) (S.toList $ lbs word)
deJust (Just x) = [x]
deJust Nothing = allUnk