module NLP.GenI.LexicalSelection
where
import Control.Applicative ((<$>))
import Control.Arrow ((***))
import Control.Monad.Trans.Maybe
import Control.Monad.Writer
import Data.Function (on)
import Data.List
import qualified Data.Map as Map
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tree (Tree (Node))
import Data.FullList hiding (head, tail, (++))
import qualified Data.FullList as FL
import NLP.GenI.FeatureStructure (AvPair (..), Flist, unifyFeat)
import NLP.GenI.General (filterTree, geniBug,
histogram, hush,
repAllNode,
repNodeByNode)
import NLP.GenI.GeniVal
import NLP.GenI.LexicalSelection.Types
import NLP.GenI.Lexicon (LexEntry (..), Lexicon)
import NLP.GenI.Semantics (LitConstr, Sem, SemInput,
subsumeSem, unifySem)
import NLP.GenI.Tag (TagElem (..), idname)
import NLP.GenI.TestSuite
import NLP.GenI.TreeSchema (GNode (..), GType (..),
AdjunctionConstraint(..),
Macros, SchemaTree,
Ttree (..), crushTreeGNode,
setAnchor, setLexeme, tree)
import NLP.GenI.Warning
data CustomSem sem = CustomSem
{
fromCustomSemInput :: sem -> Either Text SemInput
, customSelector :: LexicalSelector sem
, customSemParser :: Text -> Either Text (TestCase sem)
, customSuiteParser :: FilePath -> Text -> Either Text [TestCase sem]
, customRenderSem :: sem -> Text
}
type LexicalSelector sem = Macros -> Lexicon -> sem -> IO LexicalSelection
data LexicalSelection = LexicalSelection
{
lsAnchored :: [TagElem]
, lsLexEntries :: [LexEntry]
, lsWarnings :: GeniWarnings
}
defaultLexicalSelector :: Macros -> Lexicon -> SemInput -> IO LexicalSelection
defaultLexicalSelector g l t = return (defaultLexicalSelection g l t)
defaultLexicalSelection :: Macros -> Lexicon -> SemInput -> LexicalSelection
defaultLexicalSelection grammar lexicon sem =
defaultPostProcessing sem $
defaultAnchoring sem grammar $
defaultLexicalChoice lexicon sem
missingLexEntries :: [TagElem] -> [LexEntry] -> [LexEntry]
missingLexEntries cands =
filter treeless
where
treeless l = isNothing $ find (\t -> tsemantics t == isemantics l) cands
defaultLexicalChoice :: Lexicon -> SemInput -> [LexEntry]
defaultLexicalChoice slex (tsem,_,_) = chooseCandI tsem slex
chooseCandI :: Sem -> [LexEntry] -> [LexEntry]
chooseCandI tsem cand =
nub $ concatMap helper cand
where
replaceLex i (sem,sub) =
(replace sub i) { isemantics = sem }
helper :: LexEntry -> [LexEntry]
helper l =
if null sem
then [l]
else map (replaceLex l) psubsem
where
psubsem = sem `subsumeSem` tsem
sem = isemantics l
mergeSynonyms :: [LexEntry] -> [LexEntry]
mergeSynonyms lexEntry =
Map.elems synMap
where
mergeFn l1 l2 = l1 { iword = (FL.++) (iword l1) (iword l2) }
keyFn l = (ifamname l, isemantics l)
synMap =
foldr helper Map.empty lexEntry
where
helper x acc = Map.insertWith mergeFn (keyFn x) x acc
type LexCombine a = MaybeT (Writer [LexCombineError]) a
runLexCombine :: LexCombine a -> (Maybe a, [LexCombineError])
runLexCombine = runWriter . runMaybeT
lexTell :: LexCombineError -> LexCombine ()
lexTell x = lift (tell [x])
defaultAnchoring :: SemInput -> Macros -> [LexEntry] -> LexicalSelection
defaultAnchoring (tsem,_,_) grammar lexCands = LexicalSelection
{ lsAnchored = cands
, lsLexEntries = lexCands
, lsWarnings = mconcat [ lexWarnings, coanchorWarnings, errs ]
}
where
combinations = map (combineList tsem grammar) lexCands
cands = concatMap snd combinations
errs = mkGeniWarnings . concat $ zipWith mkWarnings lexCands (map fst combinations)
mkWarnings l = map (LexWarning [l] . LexCombineOneSchemaFailed)
coanchorWarnings = mkGeniWarnings $ do
l <- lexCands
let xs = filter (\p -> pfamily p == ifamname l) grammar
(c,n) <- Map.toList . histogram $ concatMap (missingCoanchors l) xs
return (LexWarning [l] (MissingCoanchors c n))
lexWarnings = mkGeniWarnings $
case missingLexEntries cands lexCands of
[] -> []
xs -> [LexWarning xs LexCombineAllSchemataFailed]
combineList :: Sem -> Macros -> LexEntry
-> ([LexCombineError],[TagElem])
combineList tsem gram lexitem =
case [ t | t <- gram, pfamily t == tn ] of
[] -> ([FamilyNotFoundError tn],[])
macs -> squish . swap . unzip $ map (\m -> runLexCombine $ combineOne tsem lexitem m) macs
where
tn = ifamname lexitem
swap (x,y) = (y,x)
squish = (compressLexCombineErrors . concat) *** (concat . catMaybes)
combineOne :: Sem -> LexEntry -> SchemaTree -> LexCombine [TagElem]
combineOne tsem lexRaw eRaw = do
let l1 = finaliseVars "-l" lexRaw
e1 = finaliseVars "-t" eRaw
(l,e) <- unifyParamsWithWarning (l1,e1)
>>= unifyInterfaceUsing iinterface
>>= unifyInterfaceUsing ifilters
>>= enrichWithWarning
tree2 <- case crushTreeGNode (tree e) of
Nothing -> do lexTell (SchemaError [pidname e]
(StringError "Could not flatten disjunction"))
fail ""
Just x -> return x
let name = T.intercalate ":" $ filter (not . T.null)
[ FL.head (iword l) , pfamily e , pidname e ]
template = TE
{ idname = name
, ttreename = pfamily e
, tidnum = 1
, ttype = ptype e
, ttree = setOrigin name . setLemAnchors . setAnchor (iword l) $ tree2
, tsemantics = []
, tsempols = isempols l
, tpolarities = Map.empty
, tinterface = pinterface e
, ttrace = ptrace e
}
semUnifications <- case unifySem (isemantics l) (fromMaybe [] $ psemantics e) of
[] -> croak e "could not unify lemma and schema semantics"
xs -> return xs
return $ concatMap (anonymiseSingletons . finaliseSemantics template) semUnifications
where
croak :: SchemaTree -> Text -> LexCombine a
croak t msg = do
lexTell (SchemaError [pidname t] (StringError msg))
mzero
finaliseSemantics template (sem,sub) = do
let template2 = replace sub template
(sem2,sub2) <- sem `subsumeSem` replace sub tsem
return $ replace sub2 $ template2 { tsemantics = sem2 }
unifyParamsWithWarning (l,t) =
if length lp /= length tp
then croak t "Parameter length mismatch"
else case unify lp tp of
Left msg ->
croak t ("Parameter unification error: " <> T.pack msg)
Right (ps2, subst) ->
let t2 = (replace subst t) { params = ps2 }
in return (replace subst l, t2)
where
lp = iparams l
tp = params t
unifyInterfaceUsing ifn (l,e) =
case unifyFeat (ifn l) (pinterface e) of
Left msg ->
croak e ("Interface unification error: " <> T.pack msg)
Right (int2, fsubst) ->
let e2 = (replace fsubst e) { pinterface = int2 }
in return (replace fsubst l, e2)
enrichWithWarning (l,e) = (l,) <$> enrich l e
enrich :: LexEntry -> SchemaTree -> LexCombine SchemaTree
enrich l t = do
(intE, namedE) <- lift $ lexEquations l
t2 <- foldM enrichInterface t intE
foldM enrichBy t2 namedE
where
enrichInterface tx en =
case unifyFeat [en] (pinterface tx) of
Left _ -> lexTell (ifaceEnrichErr en) >> fail ""
Right (i2, isubs) -> return $ (replace isubs tx) { pinterface = i2 }
ifaceEnrichErr (AvPair loc _) =
SchemaError [pidname t] (EnrichError (PeqInterface loc))
data EnrichmentResult = EnrSuccess SchemaTree Subst
| EnrNotFound
| EnrFailed
enrichBy :: SchemaTree
-> PathEqPair
-> LexCombine SchemaTree
enrichBy t eq@(eqLhs, _) =
case maybeEnrichBy t eq of
EnrNotFound -> return t
EnrFailed -> lexTell enrichErr >> mzero
EnrSuccess t2 _ -> return t2
where
enrichErr = SchemaError [pidname t] $
EnrichError (PeqJust eqLhs)
maybeEnrichBy :: SchemaTree
-> PathEqPair
-> EnrichmentResult
maybeEnrichBy t (eqLhs, eqVal) =
maybe EnrNotFound enrichNode $ seekCoanchor eqLhs t
where
fixNode n mt =
mt { tree = repNodeByNode (matchNodeName eqLhs) n (tree mt) }
enrichNode node = fromMaybe EnrFailed $
case eqLhs of
PeqFeat _ eqTop eqAtt -> do
let (get, set) = case eqTop of
Top -> (gup, \n x -> n { gup = x })
Bottom -> (gdown, \n x -> n { gdown = x})
(fs, sub) <- hush $ enrichFeat (AvPair eqAtt eqVal) (get node)
let t2 = fixNode (set node fs) (replace sub t)
return $ EnrSuccess t2 sub
PeqLex _ -> do
vs <- gConstraints eqVal
let node2 = node { glexeme = FL.fromFL vs }
t2 = fixNode node2 t
return $ EnrSuccess t2 Map.empty
enrichFeat :: MonadUnify m
=> AvPair GeniVal
-> Flist SchemaVal
-> m (Flist SchemaVal, Subst)
enrichFeat (AvPair a v) fs =
case span (\x -> avAtt x < a) fs of
(before,here:after) | avMatch here -> do
let (AvPair _ (SchemaVal fv)) = here
(v2,sub) <- unify fv (replicate (length fv) v)
let av2 = AvPair a (SchemaVal v2)
fs2 = replace sub before ++ (av2 : replace sub after)
return (fs2, sub)
(before,after) -> do
let av2 = AvPair a (SchemaVal [v])
fs2 = before ++ (av2 : after)
return (fs2, Map.empty)
where
avMatch (AvPair fa _) = fa == a
missingCoanchors :: LexEntry -> SchemaTree -> [Text]
missingCoanchors lexEntry t =
[ name eqLhs | eqLhs <- nubBy ((==) `on` name) equations, missing eqLhs ]
where
equations = map fst . snd . fst . runWriter $ lexEquations lexEntry
name (PeqFeat n _ _) = n
name (PeqLex n) = n
missing eqLhs = isNothing (seekCoanchor eqLhs t)
lexEquations :: LexEntry -> Writer [LexCombineError] ([AvPair GeniVal],[PathEqPair])
lexEquations =
fmap myPartition . mapM parseAv . iequations
where
myPartition xs = ( [ AvPair a v | (PeqInterface a, v) <- xs ]
, [ (n,v) | (PeqJust n, v) <- xs ] )
parseAv (AvPair a v) = fmap (\a2 -> (a2,v)) (parsePathEq a)
seekCoanchor :: NodePathEqLhs -> SchemaTree -> Maybe (GNode SchemaVal)
seekCoanchor eqLhs t =
case filterTree (matchNodeName eqLhs) (tree t) of
[a] -> Just a
[] -> Nothing
_ -> geniBug . T.unpack . T.intercalate "\n" $
[ "NLP.GenI.LexicalSelection.seekCoanchor:"
, "Did not expect to see a tree with multiple matches in enrichBy."
, "Tree: " `T.append` pidname t
, "Family: " `T.append` pfamily t
, "Matching on: " `T.append` showPathEqLhs (PeqJust eqLhs)
]
matchNodeName :: NodePathEqLhs -> GNode SchemaVal -> Bool
matchNodeName (PeqFeat n _ _) = matchNodeNameHelper n
matchNodeName (PeqLex n) = matchNodeNameHelper n
matchNodeNameHelper :: Text -> GNode SchemaVal -> Bool
matchNodeNameHelper "anchor" = ganchor
matchNodeNameHelper n = (== n) . gnname
setLemAnchors :: Tree (GNode GeniVal) -> Tree (GNode GeniVal)
setLemAnchors t =
repAllNode fn filt t
where
filt (Node a []) = gtype a == Subs && (isJust. lemAnchor) a
filt _ = False
fn (Node x k) = setLexeme (lemAnchorMaybeFake x) $
Node (x { gtype = Other, gaconstr = MaybeAdj }) k
lemAnchorMaybeFake :: GNode GeniVal -> [Text]
lemAnchorMaybeFake n =
fromMaybe ["ERR_UNSET_LEMMANCHOR"] (lemAnchor n)
lemAnchor :: GNode GeniVal -> Maybe [Text]
lemAnchor n =
case [ v | AvPair a v <- gup n, a == _lemanchor ] of
[l] -> fromFL <$> gConstraints l
_ -> Nothing
_lemanchor :: Text
_lemanchor = "lemanchor"
setOrigin :: Text -> Tree (GNode v) -> Tree (GNode v)
setOrigin t = fmap (\g -> g { gorigin = t })
defaultPostProcessing :: SemInput -> LexicalSelection -> LexicalSelection
defaultPostProcessing (_,_,lc) sel = sel
{ lsAnchored = preselectParaphrases lc (lsAnchored sel) }
preselectParaphrases :: [LitConstr] -> [TagElem] -> [TagElem]
preselectParaphrases litContrs = filter (respectsConstraints litContrs)
respectsConstraints :: [LitConstr] -> TagElem -> Bool
respectsConstraints lc t =
all (`elem` ttrace t) constrs
where
constrs = concat [ cs | (l,cs) <- lc, l `elem` tsemantics t ]