module NLP.Partage.Earley.AutoAP
(
recognize
, recognizeFrom
, parse
, earley
, recognizeAuto
, recognizeFromAuto
, parseAuto
, earleyAuto
, Hype
, parsedTrees
, hyperNodesNum
, hyperEdgesNum
, printHype
, Pos
) where
import Prelude hiding (span, (.))
import Control.Applicative ((<$>))
import Control.Monad (guard, void, (>=>), when, forM_)
import Control.Monad.Trans.Class (lift)
import qualified Control.Monad.RWS.Strict as RWS
import Control.Category ((>>>), (.))
import Data.Function (on)
import Data.Maybe ( isJust, isNothing, mapMaybe
, maybeToList )
import qualified Data.Map.Strict as M
import Data.Ord ( comparing )
import Data.List ( sortBy )
import qualified Data.Set as S
import qualified Data.PSQueue as Q
import Data.PSQueue (Binding(..))
import Data.Lens.Light
import qualified Data.Vector as V
import qualified Pipes as P
import Data.DAWG.Ord (ID)
import NLP.Partage.SOrd
import NLP.Partage.FactGram (FactGram)
import NLP.Partage.FactGram.Internal
( Lab(..), Rule(..), viewLab )
import qualified NLP.Partage.Auto as A
import qualified NLP.Partage.Auto.DAWG as D
import qualified NLP.Partage.Tree as T
type Pos = Int
data Span = Span {
_beg :: Pos
, _end :: Pos
, _gap :: Maybe (Pos, Pos)
} deriving (Show, Eq, Ord)
$( makeLenses [''Span] )
data Active = Active {
_state :: ID
, _spanA :: Span
} deriving (Show, Eq, Ord)
$( makeLenses [''Active] )
data Passive n t = Passive {
_label :: Lab n t
, _spanP :: Span
} deriving (Show, Eq, Ord)
$( makeLenses [''Passive] )
regular :: Span -> Bool
regular = isNothing . getL gap
auxiliary :: Span -> Bool
auxiliary = isJust . getL gap
printSpan :: Span -> IO ()
printSpan span = do
putStr . show $ getL beg span
putStr ", "
case getL gap span of
Nothing -> return ()
Just (p, q) -> do
putStr $ show p
putStr ", "
putStr $ show q
putStr ", "
putStr . show $ getL end span
printActive :: Active -> IO ()
printActive p = do
putStr "("
putStr . show $ getL state p
putStr ", "
printSpan $ getL spanA p
putStrLn ")"
printPassive :: (Show n, Show t) => Passive n t -> IO ()
printPassive p = do
putStr "("
putStr . viewLab $ getL label p
putStr ", "
printSpan $ getL spanP p
putStrLn ")"
data Trav n t
= Scan
{ _scanFrom :: Active
, _scanTerm :: t
}
| Subst
{ _passArg :: Passive n t
, _actArg :: Active
}
| Foot
{ _actArg :: Active
, _theFoot :: Passive n t
}
| Adjoin
{ _passAdj :: Passive n t
, _passMod :: Passive n t
}
deriving (Show, Eq, Ord)
printTrav :: (Show n, Show t) => Item n t -> Trav n t -> IO ()
printTrav q' (Scan p x) = do
putStr "# " >> printActive p
putStr "+ " >> print x
putStr "= " >> printItem q'
printTrav q' (Subst p q) = do
putStr "# " >> printActive q
putStr "+ " >> printPassive p
putStr "= " >> printItem q'
printTrav q' (Foot q p) = do
putStr "# " >> printActive q
putStr "+ " >> printPassive p
putStr "= " >> printItem q'
printTrav q' (Adjoin p s) = do
putStr "# " >> printPassive p
putStr "+ " >> printPassive s
putStr "= " >> printItem q'
type Prio = (Int, Int)
prioA :: Active -> Prio
prioA p =
let i = getL (beg . spanA) p
j = getL (end . spanA) p
in (j, j i)
prioP :: Passive n t -> Prio
prioP p =
let i = getL (beg . spanP) p
j = getL (end . spanP) p
in (j, j i)
data ExtPrio n t = ExtPrio
{ prioVal :: Prio
, prioTrav :: S.Set (Trav n t)
} deriving (Show)
instance (Eq n, Eq t) => Eq (ExtPrio n t) where
(==) = (==) `on` prioVal
instance (Ord n, Ord t) => Ord (ExtPrio n t) where
compare = compare `on` prioVal
extPrio :: Prio -> ExtPrio n t
extPrio p = ExtPrio p S.empty
joinPrio :: (Ord n, Ord t) => ExtPrio n t -> ExtPrio n t -> ExtPrio n t
joinPrio x y = ExtPrio
(min (prioVal x) (prioVal y))
(S.union (prioTrav x) (prioTrav y))
data Item n t
= ItemP (Passive n t)
| ItemA Active
deriving (Show, Eq, Ord)
printItem :: (Show n, Show t) => Item n t -> IO ()
printItem (ItemP p) = printPassive p
printItem (ItemA p) = printActive p
prio :: Item n t -> Prio
prio (ItemP p) = prioP p
prio (ItemA p) = prioA p
type EarRd t = V.Vector (S.Set t)
data Hype n t = Hype
{ automat :: A.GramAuto n t
, withBody :: M.Map (Lab n t) (S.Set ID)
, doneActive :: M.Map Pos (M.Map ID
(M.Map Active (S.Set (Trav n t))))
, donePassive :: M.Map (Pos, n, Pos)
(M.Map (Passive n t) (S.Set (Trav n t)))
, waiting :: Q.PSQ (Item n t) (ExtPrio n t)
}
mkHype
:: (Ord n, Ord t)
=> A.GramAuto n t
-> S.Set Active
-> Hype n t
mkHype dag s = Hype
{ automat = dag
, withBody = mkWithBody dag
, doneActive = M.empty
, donePassive = M.empty
, waiting = Q.fromList
[ ItemA p :-> extPrio (prioA p)
| p <- S.toList s ] }
mkWithBody
:: (Ord n, Ord t)
=> A.GramAuto n t
-> M.Map (Lab n t) (S.Set ID)
mkWithBody dag = M.fromListWith S.union
[ (x, S.singleton i)
| (i, A.Body x, _j) <- A.allEdges dag ]
type Earley n t = RWS.RWST (EarRd t) () (Hype n t) IO
readInput :: Pos -> P.ListT (Earley n t) t
readInput i = do
sent <- RWS.ask
xs <- some $ sent V.!? i
each $ S.toList xs
hyperNodesNum :: Hype n t -> Int
hyperNodesNum e
= length (listPassive e)
+ length (listActive e)
hyperEdgesNum :: forall n t. Hype n t -> Int
hyperEdgesNum earSt
= sumOver listPassive
+ sumOver listActive
where
sumOver :: (Hype n t -> [(a, S.Set (Trav n t))]) -> Int
sumOver listIt = sum
[ S.size travSet
| (_, travSet) <- listIt earSt ]
hyperEdges :: Hype n t -> [(Item n t, Trav n t)]
hyperEdges earSt =
passiveEdges ++ activeEdges
where
passiveEdges =
[ (ItemP p, trav)
| (p, travSet) <- listPassive earSt
, trav <- S.toList travSet ]
activeEdges =
[ (ItemA p, trav)
| (p, travSet) <- listActive earSt
, trav <- S.toList travSet ]
printHype :: (Show n, Show t) => Hype n t -> IO ()
printHype earSt =
forM_ edges $ \(p, trav) ->
printTrav p trav
where
edges = sortIt (hyperEdges earSt)
sortIt = sortBy (comparing $ prio.fst)
listActive :: Hype n t -> [(Active, S.Set (Trav n t))]
listActive = (M.elems >=> M.elems >=> M.toList) . doneActive
activeTrav
:: (Ord n, Ord t)
=> Active -> Hype n t
-> Maybe (S.Set (Trav n t))
activeTrav p
= ( M.lookup (p ^. spanA ^. end)
>=> M.lookup (p ^. state)
>=> M.lookup p )
. doneActive
_isProcessedA :: (Ord n, Ord t) => Active -> Hype n t -> Bool
_isProcessedA p =
check . activeTrav p
where
check (Just _) = True
check _ = False
isProcessedA :: (Ord n, Ord t) => Active -> Earley n t Bool
isProcessedA p = _isProcessedA p <$> RWS.get
saveActive
:: (Ord t, Ord n)
=> Active
-> S.Set (Trav n t)
-> Earley n t ()
saveActive p ts =
RWS.state $ \s -> ((), s {doneActive = newDone s})
where
newDone st =
M.insertWith
( M.unionWith
( M.unionWith S.union ) )
( p ^. spanA ^. end )
( M.singleton (p ^. state)
( M.singleton p ts ) )
( doneActive st )
listPassive :: Hype n t -> [(Passive n t, S.Set (Trav n t))]
listPassive = (M.elems >=> M.toList) . donePassive
passiveTrav
:: (Ord n, Ord t)
=> Passive n t -> Hype n t
-> Maybe (S.Set (Trav n t))
passiveTrav p
= ( M.lookup
( p ^. spanP ^. beg
, nonTerm $ p ^. label
, p ^. spanP ^. end ) >=> M.lookup p )
. donePassive
_isProcessedP :: (Ord n, Ord t) => Passive n t -> Hype n t -> Bool
_isProcessedP x =
check . passiveTrav x
where
check (Just _) = True
check _ = False
isProcessedP :: (Ord n, Ord t) => Passive n t -> Earley n t Bool
isProcessedP p = _isProcessedP p <$> RWS.get
savePassive
:: (Ord t, Ord n)
=> Passive n t
-> S.Set (Trav n t)
-> Earley n t ()
savePassive p ts =
RWS.state $ \s -> ((), s {donePassive = newDone s})
where
newDone st =
M.insertWith
( M.unionWith S.union )
( p ^. spanP ^. beg
, nonTerm $ p ^. label
, p ^. spanP ^. end )
( M.singleton p ts )
( donePassive st )
pushActive :: (Ord t, Ord n) => Active -> Trav n t -> Earley n t ()
pushActive p t = isProcessedA p >>= \b -> if b
then saveActive p $ S.singleton t
else modify' $ \s -> s {waiting = newWait (waiting s)}
where
newWait = Q.insertWith joinPrio (ItemA p) newPrio
newPrio = ExtPrio (prioA p) (S.singleton t)
pushPassive :: (Ord t, Ord n) => Passive n t -> Trav n t -> Earley n t ()
pushPassive p t = isProcessedP p >>= \b -> if b
then savePassive p $ S.singleton t
else modify' $ \s -> s {waiting = newWait (waiting s)}
where
newWait = Q.insertWith joinPrio (ItemP p) newPrio
newPrio = ExtPrio (prioP p) (S.singleton t)
pushInduced :: (Ord t, Ord n) => Active -> Trav n t -> Earley n t ()
pushInduced p t = do
hasElems (getL state p) >>= \b -> when b
(pushActive p t)
P.runListT $ do
x <- heads (getL state p)
lift . flip pushPassive t $
Passive x (getL spanA p)
popItem
:: (Ord t, Ord n)
=> Earley n t
(Maybe (Binding (Item n t) (ExtPrio n t)))
popItem = RWS.state $ \st -> case Q.minView (waiting st) of
Nothing -> (Nothing, st)
Just (b, s) -> (Just b, st {waiting = s})
expectEnd
:: (Ord n, Ord t) => Lab n t -> Pos
-> P.ListT (Earley n t) Active
expectEnd sym i = do
Hype{..} <- lift RWS.get
doneEnd <- some $ M.lookup i doneActive
stateSet <- some $ M.lookup sym withBody
stateID <- each $ S.toList stateSet
doneEndLab <- some $ M.lookup stateID doneEnd
each $ M.keys doneEndLab
rootSpan
:: Ord n => n -> (Pos, Pos)
-> P.ListT (Earley n t) (Passive n t)
rootSpan x (i, j) = do
Hype{..} <- lift RWS.get
each $ case M.lookup (i, x, j) donePassive of
Nothing -> []
Just m -> M.keys m
followTerm :: (Ord n, Ord t) => ID -> t -> P.ListT (Earley n t) ID
followTerm i c = do
auto <- RWS.gets automat
some $ A.follow auto i (A.Body $ Term c)
follow :: (Ord n, Ord t) => ID -> Lab n t -> P.ListT (Earley n t) ID
follow i x = do
auto <- RWS.gets automat
some $ A.follow auto i (A.Body x)
heads :: ID -> P.ListT (Earley n t) (Lab n t)
heads i = do
auto <- RWS.gets automat
let mayHead (x, _) = case x of
A.Body _ -> Nothing
A.Head y -> Just y
each $ mapMaybe mayHead $ A.edges auto i
hasElems :: ID -> Earley n t Bool
hasElems i = do
auto <- RWS.gets automat
let mayBody (x, _) = case x of
A.Body y -> Just y
A.Head _ -> Nothing
return
. not . null
. mapMaybe mayBody
$ A.edges auto i
tryScan :: (SOrd t, SOrd n) => Active -> Earley n t ()
tryScan p = void $ P.runListT $ do
c <- readInput $ getL (spanA >>> end) p
j <- followTerm (getL state p) c
let q = setL state j
. modL' (spanA >>> end) (+1)
$ p
#ifdef Debug
lift . lift $ do
putStr "[S] " >> printActive p
putStr " : " >> printActive q
#endif
lift $ pushInduced q $ Scan p c
trySubst :: (SOrd t, SOrd n) => Passive n t -> Earley n t ()
trySubst p = void $ P.runListT $ do
let pLab = getL label p
pSpan = getL spanP p
guard . regular $ pSpan
q <- expectEnd pLab (getL beg pSpan)
j <- follow (getL state q) pLab
let q' = setL state j
. setL (end.spanA) (getL end pSpan)
$ q
#ifdef Debug
lift . lift $ do
putStr "[U] " >> printPassive p
putStr " + " >> printActive q
putStr " : " >> printActive q'
#endif
lift $ pushInduced q' $ Subst p q
tryAdjoinInit :: (SOrd n, SOrd t) => Passive n t -> Earley n t ()
tryAdjoinInit p = void $ P.runListT $ do
let pLab = p ^. label
pSpan = p ^. spanP
guard $ auxiliary pSpan <= not (topLevel pLab)
let foot = AuxFoot $ nonTerm pLab
q <- expectEnd foot (getL beg pSpan)
j <- follow (getL state q) foot
let q' = setL state j
. setL (spanA >>> end) (pSpan ^. end)
. setL (spanA >>> gap) (Just
( pSpan ^. beg
, pSpan ^. end ))
$ q
#ifdef Debug
lift . lift $ do
putStr "[A] " >> printPassive p
putStr " + " >> printActive q
putStr " : " >> printActive q'
#endif
lift $ pushInduced q' $ Foot q p
tryAdjoinCont :: (SOrd n, SOrd t) => Passive n t -> Earley n t ()
tryAdjoinCont p = void $ P.runListT $ do
let pLab = p ^. label
pSpan = p ^. spanP
guard . not $ topLevel pLab
guard . auxiliary $ pSpan
q <- expectEnd pLab (pSpan ^. beg)
j <- follow (q ^. state) pLab
let q' = setL state j
. setL (spanA >>> end) (pSpan ^. end)
. setL (spanA >>> gap) (pSpan ^. gap)
$ q
#ifdef Debug
lift . lift $ do
putStr "[B] " >> printPassive p
putStr " + " >> printActive q
putStr " : " >> printActive q'
#endif
lift $ pushInduced q' $ Subst p q
tryAdjoinTerm :: (SOrd t, SOrd n) => Passive n t -> Earley n t ()
tryAdjoinTerm q = void $ P.runListT $ do
let qLab = q ^. label
qSpan = q ^. spanP
guard $ topLevel qLab
(gapBeg, gapEnd) <- each $ maybeToList $ qSpan ^. gap
p <- rootSpan (nonTerm qLab) (gapBeg, gapEnd)
let p' = setL (spanP >>> beg) (qSpan ^. beg)
. setL (spanP >>> end) (qSpan ^. end)
$ p
#ifdef Debug
lift . lift $ do
putStr "[C] " >> printPassive q
putStr " + " >> printPassive p
putStr " : " >> printPassive p'
#endif
lift $ pushPassive p' $ Adjoin q p
step
:: (SOrd t, SOrd n)
=> Binding (Item n t) (ExtPrio n t)
-> Earley n t ()
step (ItemP p :-> e) = do
mapM_ ($ p)
[ trySubst
, tryAdjoinInit
, tryAdjoinCont
, tryAdjoinTerm ]
savePassive p $ prioTrav e
step (ItemA p :-> e) = do
mapM_ ($ p)
[ tryScan ]
saveActive p $ prioTrav e
parsedTrees
:: forall n t. (Ord n, Ord t)
=> Hype n t
-> n
-> Int
-> S.Set (T.Tree n t)
parsedTrees earSt start n
= S.fromList
$ concatMap fromPassive
$ finalFrom start n earSt
where
fromPassive :: Passive n t -> [T.Tree n t]
fromPassive p = concat
[ fromPassiveTrav p trav
| travSet <- maybeToList $ passiveTrav p earSt
, trav <- S.toList travSet ]
fromPassiveTrav p (Scan q t) =
[ T.Branch
(nonTerm $ getL label p)
(reverse $ T.Leaf t : ts)
| ts <- fromActive q ]
fromPassiveTrav p (Foot q _p') =
[ T.Branch
(nonTerm $ getL label p)
(reverse $ T.Branch (nonTerm $ p ^. label) [] : ts)
| ts <- fromActive q ]
fromPassiveTrav p (Subst qp qa) =
[ T.Branch
(nonTerm $ getL label p)
(reverse $ t : ts)
| ts <- fromActive qa
, t <- fromPassive qp ]
fromPassiveTrav _p (Adjoin qa qm) =
[ replaceFoot ini aux
| aux <- fromPassive qa
, ini <- fromPassive qm ]
replaceFoot ini (T.Branch _ []) = ini
replaceFoot ini (T.Branch x ts) = T.Branch x $ map (replaceFoot ini) ts
replaceFoot _ t@(T.Leaf _) = t
fromActive :: Active -> [[T.Tree n t]]
fromActive p = case activeTrav p earSt of
Nothing -> error "fromActive: unknown active item"
Just travSet -> if S.null travSet
then [[]]
else concatMap
(fromActiveTrav p)
(S.toList travSet)
fromActiveTrav _p (Scan q t) =
[ T.Leaf t : ts
| ts <- fromActive q ]
fromActiveTrav _p (Foot q p) =
[ T.Branch (nonTerm $ p ^. label) [] : ts
| ts <- fromActive q ]
fromActiveTrav _p (Subst qp qa) =
[ t : ts
| ts <- fromActive qa
, t <- fromPassive qp ]
fromActiveTrav _p (Adjoin _ _) =
error "parsedTrees: fromActiveTrav called on a passive item"
recognize
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> FactGram n t
-> [S.Set t]
-> IO Bool
recognize gram =
recognizeAuto (D.fromGram gram)
recognizeFrom
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> FactGram n t
-> n
-> [S.Set t]
-> IO Bool
recognizeFrom gram =
recognizeFromAuto (D.fromGram gram)
parse
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> FactGram n t
-> n
-> [S.Set t]
-> IO (S.Set (T.Tree n t))
parse gram = parseAuto $ D.fromGram gram
earley
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> FactGram n t
-> [S.Set t]
-> IO (Hype n t)
earley gram = earleyAuto $ D.fromGram gram
recognizeAuto
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> A.GramAuto n t
-> [S.Set t]
-> IO Bool
recognizeAuto auto xs =
isRecognized xs <$> earleyAuto auto xs
recognizeFromAuto
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> A.GramAuto n t
-> n
-> [S.Set t]
-> IO Bool
recognizeFromAuto auto start xs = do
earSt <- earleyAuto auto xs
return $ (not.null) (finalFrom start (length xs) earSt)
parseAuto
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> A.GramAuto n t
-> n
-> [S.Set t]
-> IO (S.Set (T.Tree n t))
parseAuto auto start xs = do
earSt <- earleyAuto auto xs
return $ parsedTrees earSt start (length xs)
earleyAuto
#ifdef Debug
:: (SOrd t, SOrd n)
#else
:: (Ord t, Ord n)
#endif
=> A.GramAuto n t
-> [S.Set t]
-> IO (Hype n t)
earleyAuto dawg xs =
fst <$> RWS.execRWST loop (V.fromList xs) st0
where
st0 = mkHype dawg $ S.fromList
[ Active root Span
{ _beg = i
, _end = i
, _gap = Nothing }
| i <- [0 .. length xs 1]
, root <- S.toList (A.roots dawg) ]
loop = popItem >>= \mp -> case mp of
Nothing -> return ()
Just p -> step p >> loop
finalFrom
:: (Ord n, Eq t)
=> n
-> Int
-> Hype n t
-> [Passive n t]
finalFrom start n Hype{..} =
case M.lookup (0, start, n) donePassive of
Nothing -> []
Just m ->
[ p
| p <- M.keys m
, p ^. label == NonT start Nothing ]
isRecognized
:: (SOrd t, SOrd n)
=> [S.Set t]
-> Hype n t
-> Bool
isRecognized xs Hype{..} =
(not . null)
(complete
(agregate donePassive))
where
n = length xs
complete done =
[ True | item <- S.toList done
, item ^. spanP ^. beg == 0
, item ^. spanP ^. end == n
, isNothing (item ^. spanP ^. gap) ]
agregate = S.unions . map M.keysSet . M.elems
modify' :: RWS.MonadState s m => (s -> s) -> m ()
modify' f = do
x <- RWS.get
RWS.put $! f x
each :: Monad m => [a] -> P.ListT m a
each = P.Select . P.each
some :: Monad m => Maybe a -> P.ListT m a
some = each . maybeToList
topLevel :: Lab n t -> Bool
topLevel x = case x of
NonT{..} -> isNothing labID
AuxRoot{} -> True
_ -> False