module NLP.GenI.Builder (
TagDerivation, Builder(..), GenStatus(..),
lexicalSelection, FilterStatus(..),incrCounter, num_iterations,
(>-->),
num_comparisons, chart_size,
SemBitMap, defineSemanticBits, semToBitVector, bitVectorToSem, DispatchFilter, condFilter,
defaultStepAll,
BuilderState, UninflectedDisjunction(..), Input(..), unlessEmptySem,
initStats, Output, SentenceAut, run, queryCounter, defaultMetricNames, preInit
)
where
import Control.Monad.State.Strict
import Data.Bits ( (.&.), (.|.), bit )
import Data.List ( delete, sort, nub )
import Data.Maybe ( mapMaybe, fromMaybe, maybeToList )
import Data.Tree ( flatten )
import Prelude hiding ( init )
import Data.Text ( Text )
import qualified Data.Map as Map
import qualified Data.Text as T
import Control.DeepSeq
import Data.Generics ( Data )
import Data.Typeable ( Typeable )
import NLP.GenI.Automaton (NFA, automatonPaths, automatonPathSets, numStates, numTransitions)
import NLP.GenI.Configuration
( getListFlagP, getFlagP, modifyFlagP, Params,
DetectPolaritiesFlg(..),
MetricsFlg(..),
RootFeatureFlg(..),
Optimisation(..), hasOpt,
)
import NLP.GenI.FeatureStructure ( Flist, sortFlist, mkFeatStruct )
import NLP.GenI.General ( BitVector, snd3, thd3, geniBug )
import NLP.GenI.GeniVal ( GeniVal, DescendGeniVal(..), Collectable(collect), finaliseVarsById )
import NLP.GenI.Lexicon ( LexEntry )
import NLP.GenI.Morphology.Types
import NLP.GenI.Polarity (PolResult(..), buildAutomaton, detectPolPaths)
import NLP.GenI.Pretty
import NLP.GenI.Semantics ( SemInput, Sem, Literal )
import NLP.GenI.Statistics (Statistics, incrIntMetric,
Metric(IntMetric), updateMetrics,
queryMetrics, queryIntMetric,
addMetric, emptyStats,
)
import NLP.GenI.Tag
( TagElem(idname,tsemantics,ttree), setTidnums
, TagDerivation, dsChild, dsParent
)
import NLP.GenI.TreeSchema ( GNode(..), GType(Subs, Foot) )
data GenStatus = Finished
| Active
| Error Text
data Builder st it pa = Builder
{ init :: Input -> pa -> (st, Statistics)
, step :: BuilderState st ()
, stepAll :: BuilderState st ()
, finished :: st -> GenStatus
, unpack :: st -> [Output]
, partial :: st -> [Output]
}
type Output = (Integer, LemmaPlusSentence, TagDerivation)
data Input =
Input { inSemInput :: SemInput
, inLex :: [LexEntry]
, inCands :: [(TagElem, BitVector)]
}
type SentenceAut = NFA Int LemmaPlus
data UninflectedDisjunction = UninflectedDisjunction [Text] (Flist GeniVal) deriving (Data, Typeable)
instance DescendGeniVal UninflectedDisjunction where
descendGeniVal s (UninflectedDisjunction a v) = UninflectedDisjunction a (descendGeniVal s v)
instance Collectable UninflectedDisjunction where
collect (UninflectedDisjunction _ b) = collect b
type BuilderState s a = StateT s (State Statistics) a
preInit :: Input -> Params -> (Input, PolResult)
preInit input config =
let (cand,_) = unzip $ inCands input
seminput = inSemInput input
extraPol = Map.empty
polsToDetect = fromMaybe (error "there should be a default for --detect-pols")
$ getFlagP DetectPolaritiesFlg config
rootFeat = mkFeatStruct $ getListFlagP RootFeatureFlg config
isPol = hasOpt Polarised config
autstuff = buildAutomaton polsToDetect rootFeat extraPol seminput cand
autpaths = map concat . automatonPathSets . prFinal $ autstuff
combosPol = if isPol then autpaths else [considerHasSem cand]
considerHasSem = filter (not . null . tsemantics)
(cands2, pathIds) = unzip $ detectPolPaths combosPol
fixate ts ps = zip (map finaliseVarsById $ setTidnums ts) ps
input2 = input { inCands = fixate cands2 pathIds
, inSemInput = (prSem autstuff, snd3 seminput, thd3 seminput) }
in (input2, autstuff)
unlessEmptySem :: Input -> Params -> a -> a
unlessEmptySem input _
| null semanticsErr = id
| otherwise = error semanticsErr
where
(cands,_) = unzip $ inCands input
nullSemCands = [ idname t | t <- cands, (null.tsemantics) t ]
unInstSemCands = [ idname t | t <- cands, not $ Map.null $ collect (tsemantics t) Map.empty ]
nullSemErr =
"The following trees have a null semantics: " ++
T.unpack (T.unwords nullSemCands)
unInstSemErr =
"The following trees have an uninstantiated semantics: " ++
T.unpack (T.unwords unInstSemCands)
semanticsErr =
(if null nullSemCands then "" else nullSemErr ++ "\n") ++
(if null unInstSemCands then "" else unInstSemErr)
run :: Builder st it Params -> Input -> Params -> (st, Statistics)
run builder input config_ =
let
config = modifyFlagP RootFeatureFlg sortFlist config_
(input2, autstuff) = preInit input config
auts = map snd3 (prIntermediate autstuff)
(iSt, iStats) = init builder input2 config
autpaths = map concat . automatonPathSets . prFinal $ autstuff
countsFor ts = (length ts, length nodes, length sn, length an)
where nodes = concatMap (flatten.ttree) ts
sn = [ n | n <- nodes, gtype n == Subs ]
an = [ n | n <- nodes, gtype n == Foot ]
(tsem,_,_) = inSemInput input
cands = nub . map fst $ inCands input
cands2 = nub . concatMap concat . automatonPathSets . prFinal $ autstuff
countUp = do incrCounter "sem_literals" $ length tsem
incrCounter "lex_subst_nodes" snl
incrCounter "lex_foot_nodes" anl
incrCounter "lex_nodes" nl
incrCounter "lex_trees" tl
incrCounter "plex_subst_nodes" snl2
incrCounter "plex_foot_nodes" anl2
incrCounter "plex_nodes" nl2
incrCounter "plex_trees" tl2
where (tl , nl , snl , anl ) = countsFor cands
(tl2, nl2, snl2, anl2) = countsFor cands2
stepAll_ = do countUp
incrCounter "pol_used_bundles" $ length autpaths
incrCounter "pol_used_paths" $ length . automatonPaths . prFinal $ autstuff
incrCounter "pol_seed_paths" $ length . automatonPaths . prInitial $ autstuff
incrCounter "pol_total_states" $ sum $ map numStates auts
incrCounter "pol_total_trans" $ sum $ map numTransitions auts
incrCounter "pol_max_states" $ maximum $ map numStates auts
incrCounter "pol_max_trans" $ maximum $ map numTransitions auts
stepAll builder
in runState (execStateT stepAll_ iSt) iStats
type SemBitMap = Map.Map (Literal GeniVal) BitVector
defineSemanticBits :: Sem -> SemBitMap
defineSemanticBits sem = Map.fromList $ zip sem bits
where
bits = map bit [0..]
semToBitVector :: SemBitMap -> Sem -> BitVector
semToBitVector bmap sem = foldr (.|.) 0 $ map doLookup sem
where doLookup p =
case Map.lookup p bmap of
Nothing -> geniBug $ "predicate " ++ prettyStr p ++ " not found in semanticBit map"
Just b -> b
bitVectorToSem :: SemBitMap -> BitVector -> Sem
bitVectorToSem bmap vector =
mapMaybe tryKey $ Map.toList bmap
where tryKey (p,k) = if (k .&. vector == k) then Just p else Nothing
defaultStepAll :: Builder st it pa -> BuilderState st ()
defaultStepAll b =
do s <- get
case finished b s of
Active -> step b >> defaultStepAll b
_ -> return ()
type DispatchFilter s a = a -> s (FilterStatus a)
data FilterStatus a = Filtered | NotFiltered a
(>-->) :: (Monad s) => DispatchFilter s a -> DispatchFilter s a -> DispatchFilter s a
f >--> f2 = \x -> f x >>= next
where
next y@Filtered = return y
next (NotFiltered x2) = f2 x2
condFilter :: (Monad s) => (a -> Bool)
-> DispatchFilter s a -> DispatchFilter s a
-> DispatchFilter s a
condFilter cond f1 f2 = \x -> if cond x then f1 x else f2 x
modifyStats :: (Metric -> Metric) -> BuilderState st ()
modifyStats fn = lift $ modify $ updateMetrics fn
incrCounter :: String -> Int -> BuilderState st ()
incrCounter key n = modifyStats (incrIntMetric key n)
queryCounter :: String -> Statistics -> Maybe Int
queryCounter key s =
case queryMetrics (queryIntMetric key) s of
[] -> Nothing
[c] -> Just c
_ -> geniBug $ "More than one instance of the metric: " ++ key
initStats :: Params -> Statistics
initStats pa =
let mdefault ms = if "default" `elem` ms then defaultMetricNames else []
identifyMs :: [String] -> [Metric]
identifyMs ms = map namedMetric $ mdefault ms ++ delete "default" ms
metrics = identifyMs $ fromMaybe [] $ getFlagP MetricsFlg pa
in execState (mapM_ addMetric metrics) emptyStats
namedMetric :: String -> Metric
namedMetric n = IntMetric n 0
defaultMetricNames :: [ String ]
defaultMetricNames = [ num_iterations, chart_size, num_comparisons, gen_time ]
num_iterations, chart_size, num_comparisons, gen_time :: String
num_iterations = "iterations"
chart_size = "chart_size"
num_comparisons = "comparisons"
gen_time = "gen_time"
lexicalSelection :: TagDerivation -> [Text]
lexicalSelection = sort . nub
. concatMap (\d -> dsChild d : maybeToList (dsParent d))
instance NFData Input where
rnf (Input x1 x2 x3) = rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()