module NLP.GenI.Builder (
TagDerivation, Builder(..), GenStatus(..),
lexicalSelection, FilterStatus(..),incrCounter, num_iterations,
(>-->),
num_comparisons, chart_size,
SemBitMap, defineSemanticBits, semToBitVector, bitVectorToSem, DispatchFilter, condFilter,
defaultStepAll,
BuilderState, UninflectedDisjunction(..), Input(..), unlessEmptySem,
initStats, Output, SentenceAut, run, queryCounter, defaultMetricNames, preInit
)
where
import Control.Monad.State.Strict
import Data.Bits (bit, (.&.), (.|.))
import Data.List (delete, nub, sort)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tree (flatten)
import Prelude hiding (init)
import Control.DeepSeq
import Data.Generics (Data)
import Data.Typeable (Typeable)
import NLP.GenI.Automaton (NFA, automatonPathSets,
automatonPaths, numStates,
numTransitions)
import NLP.GenI.FeatureStructure (Flist, mkFeatStruct, sortFlist)
import NLP.GenI.Flag
import NLP.GenI.General (BitVector, geniBug, snd3, thd3)
import NLP.GenI.GeniVal (Collectable (collect),
DescendGeniVal (..), GeniVal,
finaliseVarsById)
import NLP.GenI.Lexicon (LexEntry)
import NLP.GenI.Morphology.Types
import NLP.GenI.Polarity (PolResult (..), buildAutomaton,
PolPathSet, detectPolPaths)
import NLP.GenI.Pretty
import NLP.GenI.Semantics (Literal, Sem, SemInput)
import NLP.GenI.Statistics (Metric (IntMetric), Statistics,
addMetric, emptyStats,
incrIntMetric, queryIntMetric,
queryMetrics, updateMetrics)
import NLP.GenI.Tag (TagDerivation,
TagElem (idname, tsemantics, ttree),
dsChild, dsParent, setTidnums)
import NLP.GenI.TreeSchema (GNode (..), GType (Subs, Foot))
data GenStatus = Finished
| Active
| Error Text
data Builder st it = Builder
{ init :: Input -> [Flag] -> (st, Statistics)
, step :: BuilderState st ()
, stepAll :: BuilderState st ()
, finished :: st -> GenStatus
, unpack :: st -> [Output]
, partial :: st -> [Output]
}
type Output = (Integer, LemmaPlusSentence, TagDerivation)
data Input = Input
{ inSemInput :: SemInput
, inLex :: [LexEntry]
, inCands :: [(TagElem, PolPathSet)]
}
type SentenceAut = NFA Int LemmaPlus
data UninflectedDisjunction = UninflectedDisjunction [Text] (Flist GeniVal) deriving (Data, Typeable)
instance DescendGeniVal UninflectedDisjunction where
descendGeniVal s (UninflectedDisjunction a v) =
UninflectedDisjunction a (descendGeniVal s v)
instance Collectable UninflectedDisjunction where
collect (UninflectedDisjunction _ b) = collect b
type BuilderState s a = StateT s (State Statistics) a
preInit :: Input -> [Flag] -> (Input, PolResult)
preInit input flags_ =
(input2, autstuff)
where
(cand,_) = unzip $ inCands input
seminput = inSemInput input
extraPol = Map.empty
polsToDetect = fromMaybe (error "there should be a default for --detect-pols")
$ getFlag DetectPolaritiesFlg flags_
rootFeat = mkFeatStruct $ getListFlag RootFeatureFlg flags_
isPol = hasOpt Polarised flags_
autstuff = buildAutomaton polsToDetect rootFeat extraPol seminput cand
autpaths = map concat . automatonPathSets . prFinal $ autstuff
combosPol = if isPol then autpaths else [considerHasSem cand]
considerHasSem = filter (not . null . tsemantics)
(cands2, pathIds) = unzip $ detectPolPaths combosPol
fixate ts ps = zip (map finaliseVarsById $ setTidnums ts) ps
input2 = input { inCands = fixate cands2 pathIds
, inSemInput = (prSem autstuff, snd3 seminput, thd3 seminput) }
unlessEmptySem :: Input -> [Flag] -> a -> a
unlessEmptySem input _
| null semanticsErr = id
| otherwise = error semanticsErr
where
(cands,_) = unzip $ inCands input
nullSemCands = [ idname t | t <- cands, (null.tsemantics) t ]
unInstSemCands = [ idname t | t <- cands, not $ Map.null $ collect (tsemantics t) Map.empty ]
nullSemErr =
"The following trees have a null semantics: " ++
T.unpack (T.unwords nullSemCands)
unInstSemErr =
"The following trees have an uninstantiated semantics: " ++
T.unpack (T.unwords unInstSemCands)
semanticsErr =
(if null nullSemCands then "" else nullSemErr ++ "\n") ++
(if null unInstSemCands then "" else unInstSemErr)
run :: Builder st it -> Input -> [Flag] -> (st, Statistics)
run builder input flgs_ =
runState (execStateT stepAll_ iSt) iStats
where
flgs = modifyFlag RootFeatureFlg sortFlist flgs_
(input2, autstuff) = preInit input flgs
auts = map snd3 (prIntermediate autstuff)
(iSt, iStats) = init builder input2 flgs
autpaths = map concat . automatonPathSets . prFinal $ autstuff
countsFor ts =
(length ts, length nodes, length sn, length an)
where
nodes = concatMap (flatten.ttree) ts
sn = [ n | n <- nodes, gtype n == Subs ]
an = [ n | n <- nodes, gtype n == Foot ]
(tsem,_,_) = inSemInput input
cands = nub . map fst $ inCands input
cands2 = nub . concatMap concat . automatonPathSets . prFinal $ autstuff
countUp = do
incrCounter "sem_literals" $ length tsem
incrCounter "lex_subst_nodes" snl
incrCounter "lex_foot_nodes" anl
incrCounter "lex_nodes" nl
incrCounter "lex_trees" tl
incrCounter "plex_subst_nodes" snl2
incrCounter "plex_foot_nodes" anl2
incrCounter "plex_nodes" nl2
incrCounter "plex_trees" tl2
where
(tl , nl , snl , anl ) = countsFor cands
(tl2, nl2, snl2, anl2) = countsFor cands2
stepAll_ = do
countUp
incrCounter "pol_used_bundles" $ length autpaths
incrCounter "pol_used_paths" $ length . automatonPaths . prFinal $ autstuff
incrCounter "pol_seed_paths" $ length . automatonPaths . prInitial $ autstuff
incrCounter "pol_total_states" $ sum $ map numStates auts
incrCounter "pol_total_trans" $ sum $ map numTransitions auts
incrCounter "pol_max_states" $ maximum $ map numStates auts
incrCounter "pol_max_trans" $ maximum $ map numTransitions auts
stepAll builder
type SemBitMap = Map.Map (Literal GeniVal) BitVector
defineSemanticBits :: Sem -> SemBitMap
defineSemanticBits sem =
Map.fromList $ zip sem bits
where
bits = map bit [0..]
semToBitVector :: SemBitMap -> Sem -> BitVector
semToBitVector bmap sem =
foldr (.|.) 0 $ map doLookup sem
where
doLookup p = fromMaybe (geniBug (oops p)) (Map.lookup p bmap)
oops p = "predicate " ++ prettyStr p ++ " not found in semanticBit map"
bitVectorToSem :: SemBitMap -> BitVector -> Sem
bitVectorToSem bmap vector =
mapMaybe tryKey $ Map.toList bmap
where
tryKey (p,k) = if (k .&. vector == k) then Just p else Nothing
defaultStepAll :: Builder st it -> BuilderState st ()
defaultStepAll b = do
s <- get
case finished b s of
Active -> step b >> defaultStepAll b
_ -> return ()
type DispatchFilter s a = a -> s (FilterStatus a)
data FilterStatus a = Filtered | NotFiltered a
(>-->) :: (Monad s) => DispatchFilter s a -> DispatchFilter s a -> DispatchFilter s a
f >--> f2 =
\x -> f x >>= next
where
next y@Filtered = return y
next (NotFiltered x2) = f2 x2
condFilter :: (Monad s) => (a -> Bool)
-> DispatchFilter s a -> DispatchFilter s a
-> DispatchFilter s a
condFilter cond f1 f2 = \x -> if cond x then f1 x else f2 x
modifyStats :: (Metric -> Metric) -> BuilderState st ()
modifyStats fn = lift $ modify $ updateMetrics fn
incrCounter :: String -> Int -> BuilderState st ()
incrCounter key n = modifyStats (incrIntMetric key n)
queryCounter :: String -> Statistics -> Maybe Int
queryCounter key s =
case queryMetrics (queryIntMetric key) s of
[] -> Nothing
[c] -> Just c
_ -> geniBug $ "More than one instance of the metric: " ++ key
initStats :: [Flag] -> Statistics
initStats flags_ =
execState (mapM_ addMetric metrics) emptyStats
where
mdefault ms = if "default" `elem` ms then defaultMetricNames else []
identifyMs :: [String] -> [Metric]
identifyMs ms = map namedMetric $ mdefault ms ++ delete "default" ms
metrics = identifyMs $ fromMaybe [] $ getFlag MetricsFlg flags_
namedMetric :: String -> Metric
namedMetric n = IntMetric n 0
defaultMetricNames :: [ String ]
defaultMetricNames = [ num_iterations, chart_size, num_comparisons, gen_time ]
num_iterations, chart_size, num_comparisons, gen_time :: String
num_iterations = "iterations"
chart_size = "chart_size"
num_comparisons = "comparisons"
gen_time = "gen_time"
instance Pretty GenStatus where
pretty Finished = "finished"
pretty Active = "in progress"
pretty (Error x) = "error:" <+> x
lexicalSelection :: TagDerivation -> [Text]
lexicalSelection = sort . nub
. concatMap (\d -> dsChild d : maybeToList (dsParent d))
instance NFData Input where
rnf (Input x1 x2 x3) = rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()