module Data.CRF.Chain2.Pair.Codec
( Codec
, CodecM
, obMax
, lb1Max
, lb2Max
, encodeWord'Cu
, encodeWord'Cn
, encodeSent'Cu
, encodeSent'Cn
, encodeSent
, encodeWordL'Cu
, encodeWordL'Cn
, encodeSentL'Cu
, encodeSentL'Cn
, encodeSentL
, decodeLabel
, decodeLabels
, unJust
, mkCodec
, encodeData
, encodeDataL
) where
import Control.Applicative (pure, (<$>), (<*>))
import Control.Comonad.Trans.Store (store)
import Data.Maybe (fromJust, catMaybes)
import Data.Lens.Common (Lens(..))
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Vector as V
import qualified Control.Monad.Codec as C
import Data.CRF.Chain2.Pair.Base
import Data.CRF.Chain2.Generic.Base
import Data.CRF.Chain2.Generic.External
type Codec a b c =
( C.AtomCodec a
, C.AtomCodec (Maybe b)
, C.AtomCodec (Maybe c) )
_1 :: (a, b, c) -> a
_1 (x, _, _) = x
_2 :: (a, b, c) -> b
_2 (_, x, _) = x
_3 :: (a, b, c) -> c
_3 (_, _, x) = x
_1Lens :: Lens (a, b, c) a
_1Lens = Lens $ \(a, b, c) -> store (\a' -> (a', b, c)) a
_2Lens :: Lens (a, b, c) b
_2Lens = Lens $ \(a, b, c) -> store (\b' -> (a, b', c)) b
_3Lens :: Lens (a, b, c) c
_3Lens = Lens $ \(a, b, c) -> store (\c' -> (a, b, c')) c
obMax :: Codec a b c -> Ob
obMax =
let idMax m = M.size m 1
in Ob . idMax . C.to . _1
lb1Max :: Codec a b c -> Lb1
lb1Max =
let idMax m = M.size m 1
in Lb1 . idMax . C.to . _2
lb2Max :: Codec a b c -> Lb2
lb2Max =
let idMax m = M.size m 1
in Lb2 . idMax . C.to . _3
empty :: (Ord b, Ord c) => Codec a b c
empty =
( C.empty
, C.execCodec C.empty (C.encode C.idLens Nothing)
, C.execCodec C.empty (C.encode C.idLens Nothing) )
type CodecM a b c d = C.Codec (Codec a b c) d
encodeObU :: Ord a => a -> CodecM a b c Ob
encodeObU = fmap Ob . C.encode' _1Lens
encodeObN :: Ord a => a -> CodecM a b c (Maybe Ob)
encodeObN = fmap (fmap Ob) . C.maybeEncode _1Lens
encodeLbU :: (Ord b, Ord c) => (b, c) -> CodecM a b c Lb
encodeLbU (x, y) = do
x' <- C.encode _2Lens (Just x)
y' <- C.encode _3Lens (Just y)
return (Lb1 x', Lb2 y')
encodeLbN :: (Ord b, Ord c) => (b, c) -> CodecM a b c Lb
encodeLbN (x, y) = do
x' <- C.maybeEncode _2Lens (Just x) >>= \mx -> case mx of
Just x' -> return x'
Nothing -> fromJust <$> C.maybeEncode _2Lens Nothing
y' <- C.maybeEncode _3Lens (Just y) >>= \my -> case my of
Just y' -> return y'
Nothing -> fromJust <$> C.maybeEncode _3Lens Nothing
return (Lb1 x', Lb2 y')
encodeWordL'Cu
:: (Ord a, Ord b, Ord c)
=> WordL a (b, c)
-> CodecM a b c (X Ob Lb, Y Lb)
encodeWordL'Cu (word, choice) = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbU lb <*> pure pr
| (lb, pr) <- (M.toList . unDist) choice ]
return (x, y)
encodeWordL'Cn
:: (Ord a, Ord b, Ord c)
=> WordL a (b, c)
-> CodecM a b c (X Ob Lb, Y Lb)
encodeWordL'Cn (word, choice) = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbN lb <*> pure pr
| (lb, pr) <- (M.toList . unDist) choice ]
return (x, y)
encodeWord'Cu
:: (Ord a, Ord b, Ord c)
=> Word a (b, c)
-> CodecM a b c (X Ob Lb)
encodeWord'Cu word = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
return $ mkX x' r'
encodeWord'Cn
:: (Ord a, Ord b, Ord c)
=> Word a (b, c)
-> CodecM a b c (X Ob Lb)
encodeWord'Cn word = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
return $ mkX x' r'
encodeSentL'Cu
:: (Ord a, Ord b, Ord c)
=> SentL a (b, c)
-> CodecM a b c (Xs Ob Lb, Ys Lb)
encodeSentL'Cu sent = do
ps <- mapM (encodeWordL'Cu) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL'Cn
:: (Ord a, Ord b, Ord c)
=> SentL a (b, c)
-> CodecM a b c (Xs Ob Lb, Ys Lb)
encodeSentL'Cn sent = do
ps <- mapM (encodeWordL'Cn) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL
:: (Ord a, Ord b, Ord c) => Codec a b c
-> SentL a (b, c) -> (Xs Ob Lb, Ys Lb)
encodeSentL codec = C.evalCodec codec . encodeSentL'Cn
encodeSent'Cu
:: (Ord a, Ord b, Ord c) => Sent a (b, c)
-> CodecM a b c (Xs Ob Lb)
encodeSent'Cu = fmap V.fromList . mapM encodeWord'Cu
encodeSent'Cn
:: (Ord a, Ord b, Ord c) => Sent a (b, c)
-> CodecM a b c (Xs Ob Lb)
encodeSent'Cn = fmap V.fromList . mapM encodeWord'Cn
encodeSent
:: (Ord a, Ord b, Ord c) => Codec a b c
-> Sent a (b, c) -> Xs Ob Lb
encodeSent codec = C.evalCodec codec . encodeSent'Cn
mkCodec
:: (Ord a, Ord b, Ord c) => [SentL a (b, c)]
-> (Codec a b c, [(Xs Ob Lb, Ys Lb)])
mkCodec
= swap
. C.runCodec empty
. mapM encodeSentL'Cu
where
swap (x, y) = (y, x)
encodeDataL
:: (Ord a, Ord b, Ord c) => Codec a b c
-> [SentL a (b, c)] -> [(Xs Ob Lb, Ys Lb)]
encodeDataL codec = C.evalCodec codec . mapM encodeSentL'Cn
encodeData
:: (Ord a, Ord b, Ord c) => Codec a b c
-> [Sent a (b, c)] -> [Xs Ob Lb]
encodeData codec = map (encodeSent codec)
decodeLabel'C
:: (Ord b, Ord c) => Lb
-> CodecM a b c (Maybe (b, c))
decodeLabel'C (x, y) = do
x' <- C.decode _2Lens (unLb1 x)
y' <- C.decode _3Lens (unLb2 y)
return $ (,) <$> x' <*> y'
decodeLabel :: (Ord b, Ord c) => Codec a b c -> Lb -> Maybe (b, c)
decodeLabel codec = C.evalCodec codec . decodeLabel'C
decodeLabels :: (Ord b, Ord c) => Codec a b c -> [Lb] -> [Maybe (b, c)]
decodeLabels codec = C.evalCodec codec . mapM decodeLabel'C
hasLabel :: (Ord b, Ord c) => Codec a b c -> (b, c) -> Bool
hasLabel codec (x, y)
= M.member (Just x) (C.to $ _2 codec)
&& M.member (Just y) (C.to $ _3 codec)
unJust
:: (Ord b, Ord c) => Codec a b c
-> Word a (b, c) -> Maybe (b, c)
-> (b, c)
unJust _ _ (Just x) = x
unJust codec word Nothing = case allUnk of
(x:_) -> x
[] -> error "unJust: Nothing and all values known"
where
allUnk = filter (not . hasLabel codec) (S.toList $ lbs word)