module NLP.GenI.LexicalSelection
where
import Control.Applicative ( (<$>) )
import Control.Arrow ((***))
import Control.Monad.Maybe
import Control.Monad.Writer
import Data.Function ( on )
import Data.List
import qualified Data.Map as Map
import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing)
import Data.Tree (Tree(Node))
import qualified Data.Text as T
import Data.Text ( Text )
import Data.FullList hiding ( head, tail, (++) )
import qualified Data.FullList as FL
import NLP.GenI.FeatureStructure (Flist, AvPair(..), unifyFeat)
import NLP.GenI.General
( filterTree, repAllNode, histogram, geniBug, repNodeByNode,
)
import NLP.GenI.GeniVal( unify, GeniVal(gConstraints), isConst, Subst, replace, finaliseVars )
import NLP.GenI.LexicalSelection.Types
import NLP.GenI.Lexicon ( LexEntry(..), Lexicon, )
import NLP.GenI.Semantics ( subsumeSem, unifySem, Sem )
import NLP.GenI.Tag ( TagElem(..), idname )
import NLP.GenI.TreeSchema ( Ttree(..), SchemaTree, SchemaNode, Macros
, crushTreeGNode
, setAnchor, setLexeme, tree
, GNode(..), GType(..)
)
import NLP.GenI.Warning
type LexicalSelector = Macros -> Lexicon -> Sem -> IO LexicalSelection
data LexicalSelection = LexicalSelection
{
lsAnchored :: [TagElem]
, lsLexEntries :: [LexEntry]
, lsWarnings :: GeniWarnings
}
defaultLexicalSelector :: Macros -> Lexicon -> Sem -> IO LexicalSelection
defaultLexicalSelector g l t = return (defaultLexicalSelection g l t)
defaultLexicalSelection :: Macros -> Lexicon -> Sem -> LexicalSelection
defaultLexicalSelection grammar lexicon tsem =
defaultAnchoring grammar (defaultLexicalChoice lexicon tsem) tsem
missingLexEntries :: [TagElem] -> [LexEntry] -> [LexEntry]
missingLexEntries cands = filter treeless
where
treeless l = isNothing $ find (\t -> tsemantics t == isemantics l) cands
defaultLexicalChoice :: Lexicon -> Sem -> [LexEntry]
defaultLexicalChoice slex tsem = chooseCandI tsem slex
chooseCandI :: Sem -> [LexEntry] -> [LexEntry]
chooseCandI tsem cand =
let replaceLex i (sem,sub) =
(replace sub i) { isemantics = sem }
helper :: LexEntry -> [LexEntry]
helper l = if null sem then [l]
else map (replaceLex l) psubsem
where psubsem = sem `subsumeSem` tsem
sem = isemantics l
in nub $ concatMap helper cand
mergeSynonyms :: [LexEntry] -> [LexEntry]
mergeSynonyms lexEntry =
let mergeFn l1 l2 = l1 { iword = (FL.++) (iword l1) (iword l2) }
keyFn l = (ifamname l, isemantics l)
synMap = foldr helper Map.empty lexEntry
where helper x acc = Map.insertWith mergeFn (keyFn x) x acc
in Map.elems synMap
type LexCombine a = MaybeT (Writer [LexCombineError]) a
lexTell :: LexCombineError -> LexCombine ()
lexTell x = lift (tell [x])
defaultAnchoring :: Macros -> [LexEntry] -> Sem -> LexicalSelection
defaultAnchoring grammar lexCands tsem =
LexicalSelection { lsAnchored = cands
, lsLexEntries = lexCands
, lsWarnings = mconcat [ lexWarnings, coanchorWarnings, errs ]
}
where
combinations = map (combineList tsem grammar) lexCands
cands = concatMap snd combinations
errs = mkGeniWarnings . concat $ zipWith mkWarnings lexCands (map fst combinations)
mkWarnings l = map (LexWarning [l] . LexCombineOneSchemaFailed)
coanchorWarnings = mkGeniWarnings $ do
l <- lexCands
let xs = filter (\p -> pfamily p == ifamname l) grammar
(c,n) <- Map.toList . histogram $ concatMap (missingCoanchors l) xs
return (LexWarning [l] (MissingCoanchors c n))
lexWarnings = mkGeniWarnings $ case missingLexEntries cands lexCands of
[] -> []
xs -> [LexWarning xs LexCombineAllSchemataFailed]
combineList :: Sem -> Macros -> LexEntry
-> ([LexCombineError],[TagElem])
combineList tsem gram lexitem =
case [ t | t <- gram, pfamily t == tn ] of
[] -> ([FamilyNotFoundError tn],[])
macs -> squish . swap . unzip $ map (\m -> runWriter . runMaybeT $ combineOne tsem lexitem m) macs
where
tn = ifamname lexitem
swap (x,y) = (y,x)
squish = (compressLexCombineErrors . concat) *** (concat . catMaybes)
combineOne :: Sem -> LexEntry -> SchemaTree -> LexCombine [TagElem]
combineOne tsem lexRaw eRaw =
do let l1 = finaliseVars "-l" lexRaw
e1 = finaliseVars "-t" eRaw
(l,e) <- unifyParamsWithWarning (l1,e1)
>>= unifyInterfaceUsing iinterface
>>= unifyInterfaceUsing ifilters
>>= enrichWithWarning
tree2 <- case crushTreeGNode (tree e) of
Nothing -> do lexTell (SchemaError [pidname e]
(StringError "Could not flatten disjunction"))
fail ""
Just x -> return x
let name = T.intercalate ":" $ filter (not . T.null)
[ FL.head (iword l) , pfamily e , pidname e ]
template = TE
{ idname = name
, ttreename = pfamily e
, tidnum = 1
, ttype = ptype e
, ttree = setOrigin name . setLemAnchors . setAnchor (iword l) $ tree2
, tsemantics = []
, tsempols = isempols l
, tpolarities = Map.empty
, tinterface = pinterface e
, ttrace = ptrace e
}
semUnifications <- case unifySem (isemantics l) (fromMaybe [] $ psemantics e) of
[] -> do lexTell (SchemaError [pidname e] (StringError "could not unify lemma and schema semantics"))
fail ""
xs -> return xs
return $ concatMap (finaliseSemantics template) semUnifications
where
croak t msg = do
lexTell (SchemaError [pidname t] (StringError msg))
fail ""
finaliseSemantics template (sem,sub) =
do (sem2,sub2) <- sem `subsumeSem` replace sub tsem
return $ replace sub2 $ template { tsemantics = sem2 }
unifyParamsWithWarning (l,t) =
let lp = iparams l
tp = params t
in if length lp /= length tp
then croak t "Parameter length mismatch"
else case unify lp tp of
Nothing -> croak t "Parameter unification error"
Just (ps2, subst) -> return (replace subst l, t2)
where t2 = (replace subst t) { params = ps2 }
unifyInterfaceUsing ifn (l,e) =
case unifyFeat (ifn l) (pinterface e) of
Nothing -> croak e "Interface unification error"
Just (int2, fsubst) -> return (replace fsubst l, e2)
where e2 = (replace fsubst e) { pinterface = int2 }
enrichWithWarning (l,e) =
do e2 <- enrich l e
return (l,e2)
enrich :: LexEntry -> SchemaTree -> LexCombine SchemaTree
enrich l t =
do
(intE, namedE) <- lift $ lexEquations l
t2 <- foldM enrichInterface t intE
foldM enrichBy t2 namedE
where
enrichInterface tx en =
case unifyFeat [en] (pinterface tx) of
Nothing -> lexTell (ifaceEnrichErr en) >> fail ""
Just (i2, isubs) -> return $ (replace isubs tx) { pinterface = i2 }
ifaceEnrichErr (AvPair loc _) =
SchemaError [pidname t] (EnrichError (PeqInterface loc))
enrichBy :: SchemaTree
-> PathEqPair
-> LexCombine SchemaTree
enrichBy t eq@(eqLhs, _) =
case maybeEnrichBy t eq of
Nothing -> lexTell enrichErr >> return t
Just (t2,_) -> return t2
where
enrichErr = SchemaError [pidname t] (EnrichError (PeqJust eqLhs))
maybeEnrichBy :: SchemaTree
-> PathEqPair
-> Maybe (SchemaTree, Subst)
maybeEnrichBy t (eqLhs, eqVal) = do
node <- seekCoanchor eqLhs t
case eqLhs of
PeqFeat _ eqTop eqAtt -> do
let (get, set) = case eqTop of
Top -> (gup, \n x -> n { gup = x })
Bottom -> (gdown, \n x -> n { gdown = x})
(fs, sub) <- enrichFeat (AvPair eqAtt eqVal) (get node)
let t2 = fixNode (set node fs) (replace sub t)
return (t2, sub)
PeqLex _ -> do
vs <- gConstraints eqVal
let node2 = node { glexeme = FL.fromFL vs }
t2 = fixNode node2 t
return (t2, Map.empty)
where
fixNode n mt = mt { tree = repNodeByNode (matchNodeName eqLhs) n (tree mt) }
enrichFeat :: AvPair GeniVal -> Flist [GeniVal] -> Maybe (Flist [GeniVal], Subst)
enrichFeat (AvPair a v) fs =
case span (\x -> avAtt x < a) fs of
(before,here:after) | avMatch here ->
do let (AvPair _ fv) = here
(v2,sub) <- unify fv (replicate (length fv) v)
let av2 = AvPair a v2
fs2 = replace sub before ++ (av2 : replace sub after)
return (fs2, sub)
(before,after) ->
let av2 = AvPair a [v]
fs2 = before ++ (av2 : after) in Just (fs2, Map.empty)
where
avMatch (AvPair fa _) = fa == a
missingCoanchors :: LexEntry -> SchemaTree -> [Text]
missingCoanchors lexEntry t =
[ name eqLhs | eqLhs <- nubBy ((==) `on` name) equations, missing eqLhs ]
where
equations = map fst . snd . fst . runWriter $ lexEquations lexEntry
name (PeqFeat n _ _) = n
name (PeqLex n) = n
missing eqLhs = isNothing (seekCoanchor eqLhs t)
lexEquations :: LexEntry -> Writer [LexCombineError] ([AvPair GeniVal],[PathEqPair])
lexEquations =
fmap myPartition . mapM parseAv . iequations
where
myPartition xs = ( [ AvPair a v | (PeqInterface a, v) <- xs ]
, [ (n,v) | (PeqJust n, v) <- xs ] )
parseAv (AvPair a v) = fmap (\a2 -> (a2,v)) (parsePathEq a)
seekCoanchor :: NodePathEqLhs -> SchemaTree -> Maybe SchemaNode
seekCoanchor eqLhs t =
case filterTree (matchNodeName eqLhs) (tree t) of
[a] -> Just a
[] -> Nothing
_ -> geniBug . T.unpack . T.intercalate "\n" $
[ "NLP.GenI.LexicalSelection.seekCoanchor:"
, "Did not expect to see a tree with multiple matches in enrichBy."
, "Tree: " `T.append` pidname t
, "Family: " `T.append` pfamily t
, "Matching on: " `T.append` showPathEqLhs (PeqJust eqLhs)
]
matchNodeName :: NodePathEqLhs -> SchemaNode -> Bool
matchNodeName (PeqFeat n _ _) = matchNodeNameHelper n
matchNodeName (PeqLex n) = matchNodeNameHelper n
matchNodeNameHelper :: Text -> SchemaNode -> Bool
matchNodeNameHelper "anchor" = ganchor
matchNodeNameHelper n = (== n) . gnname
setLemAnchors :: Tree (GNode GeniVal) -> Tree (GNode GeniVal)
setLemAnchors t =
repAllNode fn filt t
where
filt (Node a []) = gtype a == Subs && (isJust. lemAnchor) a
filt _ = False
fn (Node x k) = setLexeme (lemAnchorMaybeFake x) $
Node (x { gtype = Other, gaconstr = False }) k
lemAnchorMaybeFake :: GNode GeniVal -> [Text]
lemAnchorMaybeFake n =
fromMaybe ["ERR_UNSET_LEMMANCHOR"] (lemAnchor n)
lemAnchor :: GNode GeniVal -> Maybe [Text]
lemAnchor n =
case [ v | AvPair a v <- gup n, a == _lemanchor ] of
[l] | isConst l -> fromFL <$> gConstraints l
_ -> Nothing
_lemanchor :: Text
_lemanchor = "lemanchor"
setOrigin :: Text -> Tree (GNode v) -> Tree (GNode v)
setOrigin t = fmap (\g -> g { gorigin = t })