module NLP.GenI.Tag (
Tags, TagElem(..), TagItem(..), TagSite(..),
TagDerivation, DerivationStep(..), dsChild, dsParent, dsParentSite,
ts_synIncomplete, ts_semIncomplete, ts_tbUnificationFailure,
ts_rootFeatureMismatch,
addToTags, tagLeaves, getLexeme, toTagSite,
setTidnums, plugTree, spliceTree,
mapBySem,
collect, detectSites,
) where
import Control.Applicative ( (<$>), (<*>) )
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe, catMaybes)
import Data.Tree
import Data.Text ( Text )
import qualified Data.Text as T
import Control.DeepSeq
import Data.Generics (Data)
import Data.Typeable (Typeable)
import Data.FullList hiding ( (++) )
import Text.JSON
import NLP.GenI.FeatureStructure ( AvPair(..), Flist )
import NLP.GenI.General (listRepNode, groupByFM, preTerminals, geniBug)
import NLP.GenI.GeniShow
import NLP.GenI.GeniVal ( GeniVal(..), DescendGeniVal(..), Collectable(..), Idable(..),
isConst,
)
import NLP.GenI.Polarity.Types ( PolarityKey(..), SemPols )
import NLP.GenI.Pretty
import NLP.GenI.Semantics ( Sem, Literal(..), emptyLiteral )
import NLP.GenI.TreeSchema
( Ptype(..), GNode(..), GType(..), NodeName, lexemeAttributes )
type Tags = Map.Map String [TagElem]
addToTags :: Tags -> String -> TagElem -> Tags
addToTags t k e = Map.insertWith (++) k [e] t
data TagSite = TagSite
{ tsName :: Text
, tsUp :: Flist GeniVal
, tsDown :: Flist GeniVal
, tsOrigin :: Text
}
deriving (Eq, Ord, Data, Typeable)
data TagElem = TE
{ idname :: Text
, ttreename :: Text
, tidnum :: Integer
, ttype :: Ptype
, ttree :: Tree (GNode GeniVal)
, tsemantics :: Sem
, tpolarities :: Map.Map PolarityKey (Int,Int)
, tinterface :: Flist GeniVal
, ttrace :: [Text]
, tsempols :: [SemPols]
}
deriving (Eq, Data, Typeable)
detectSites :: Tree (GNode GeniVal) -> ([NodeName], [NodeName], [NodeName])
detectSites t =
( sites isSub
, sites (not.gaconstr)
, sites constrButNotSub
)
where
ns = flatten t
sites match = map gnname . filter match $ ns
isSub n = gtype n == Subs
constrButNotSub n = gaconstr n && (not $ isSub n)
toTagSite :: GNode GeniVal -> TagSite
toTagSite n = TagSite (gnname n) (gup n) (gdown n) (gorigin n)
type TagDerivation = [ DerivationStep ]
data DerivationStep = SubstitutionStep Text Text Text
| AdjunctionStep Text Text Text
| InitStep Text
deriving (Show, Ord, Eq)
dsOp :: DerivationStep -> Char
dsOp (SubstitutionStep {}) = 's'
dsOp (AdjunctionStep {}) = 'a'
dsOp (InitStep {}) = 'i'
dsChild :: DerivationStep -> Text
dsChild (SubstitutionStep c _ _) = c
dsChild (AdjunctionStep c _ _ ) = c
dsChild (InitStep c) = c
dsParent :: DerivationStep -> Maybe Text
dsParent (SubstitutionStep _ p _) = Just p
dsParent (AdjunctionStep _ p _) = Just p
dsParent (InitStep _) = Nothing
dsParentSite :: DerivationStep -> Maybe Text
dsParentSite (SubstitutionStep _ _ s) = Just s
dsParentSite (AdjunctionStep _ _ s) = Just s
dsParentSite (InitStep _) = Nothing
instance JSON DerivationStep where
readJSON j = do
jo <- fromJSObject `fmap` readJSON j
let field x = maybe (fail $ "Could not find: " ++ x) readJSON
$ lookup x jo
op <- field "op"
child <- field "child"
case op of
"s" -> AdjunctionStep child <$> field "parent" <*> field "parent-node"
"a" -> SubstitutionStep child <$> field "parent" <*> field "parent-node"
"i" -> return (InitStep child)
x -> fail $ "Don't know about derivation operation '" ++ x ++ "'"
showJSON x =
JSObject . toJSObject $ [ ("op", showJSON $ dsOp x)
, ("child", showJSON $ dsChild x)
] ++ catMaybes
[ (\v -> ("parent", showJSON v)) <$> dsParent x
, (\v -> ("parent-node", showJSON v)) <$> dsParentSite x
]
instance Ord TagElem where
compare t1 t2 =
case (ttype t1, ttype t2) of
(Initial, Initial) -> compareId
(Initial, Auxiliar) -> LT
(Auxiliar, Initial) -> GT
(Auxiliar, Auxiliar) -> compareId
where compareId = compare (tidnum t1) (tidnum t2)
instance DescendGeniVal TagElem where
descendGeniVal s te =
te { tinterface = descendGeniVal s (tinterface te)
, ttree = descendGeniVal s (ttree te)
, tsemantics = descendGeniVal s (tsemantics te) }
instance DescendGeniVal TagSite where
descendGeniVal s (TagSite n fu fd o) = TagSite n (descendGeniVal s fu) (descendGeniVal s fd) o
instance Collectable TagElem where
collect t = (collect $ tinterface t) . (collect $ ttree t)
. (collect $ tsemantics t)
instance Idable TagElem where
idOf = tidnum
plugTree :: Tree NodeName
-> NodeName
-> Tree NodeName
-> Tree NodeName
plugTree male n female =
case listRepNode (const male) (nmatch n) [female] of
([r], True) -> r
_ -> geniBug oops
where
oops = "plugTree: unexpected plug failure at node " ++ T.unpack n
spliceTree :: NodeName
-> Tree NodeName
-> NodeName
-> Tree NodeName
-> Tree NodeName
spliceTree f auxT n top =
plugTree middle n top
where
bottom = fromMaybe (geniBug oops) (findSubTree n top)
middle = plugTree bottom f auxT
oops = unwords
[ "NLP.GenI.Tag.spliceTree:"
, "Unexpected adjunction failure."
, "Could not find node " ++ T.unpack n ++ " of target tree."
]
nmatch :: NodeName -> Tree NodeName -> Bool
nmatch n (Node a _) = a == n
findSubTree :: NodeName -> Tree NodeName -> Maybe (Tree NodeName)
findSubTree n n2@(Node x ks)
| x == n = Just n2
| otherwise = case mapMaybe (findSubTree n) ks of
[] -> Nothing
(h:_) -> Just h
setTidnums :: [TagElem] -> [TagElem]
setTidnums xs = zipWith (\c i -> setOrigin $ c {tidnum = i}) xs [1..]
setOrigin :: TagElem -> TagElem
setOrigin te =
te { ttree = fmap setLabel . ttree $ te }
where
setLabel g = g { gorigin = nameNumber }
nameNumber = idname te
`T.append` ":"
`T.append` (T.pack . show . tidnum) te
class TagItem t where
tgIdName :: t -> Text
tgIdNum :: t -> Integer
tgSemantics :: t -> Sem
tgTree :: t -> Tree (GNode GeniVal)
instance TagItem TagElem where
tgIdName = idname
tgIdNum = tidnum
tgSemantics = tsemantics
tgTree = ttree
mapBySem :: (TagItem t) => [t] -> Map.Map (Literal GeniVal) [t]
mapBySem ts =
let gfn t = case tgSemantics t of
[] -> emptyLiteral
(x:_) -> x
in groupByFM gfn ts
type UninflectedDisjunction = ([Text], Flist GeniVal)
tagLeaves :: TagElem -> [ (NodeName, UninflectedDisjunction) ]
tagLeaves te = [ (gnname pt, (getLexeme t, gup pt)) | (pt,t) <- preTerminals . ttree $ te ]
getLexeme :: GNode GeniVal -> [Text]
getLexeme node =
case glexeme node of
[] -> fromMaybe [gnname node] $ firstMaybe grab lexemeAttributes
lexs -> lexs
where
grab la = firstMaybe match guppy
where
match (AvPair a v) | isConst v && a == la =
fromFL <$> gConstraints v
match _ = Nothing
guppy = gup node
firstMaybe :: (a -> Maybe b) -> [a] -> Maybe b
firstMaybe fn = listToMaybe . mapMaybe fn
instance GeniShow TagElem where
geniShowText te = T.concat
[ "% ------------------------- ", idname te
, "\n", ttreename te, ":" , idname te
, " " , (geniShowText $ tinterface te)
, " " , (geniShowText $ ttype te)
, "\n", (geniShowText $ ttree te)
, "\n", geniKeyword "semantics" (geniShowText $ tsemantics te)
]
instance GeniShow [TagElem] where
geniShowText = T.intercalate "\n\n" . map geniShowText
instance Pretty [TagSite] where
pretty =
T.intercalate "\n " . map fn
where
fn (TagSite n t b o) = T.intercalate "/"
[ n, pretty t, pretty b, o ]
ts_synIncomplete, ts_tbUnificationFailure :: String
ts_synIncomplete = "syntactically incomplete"
ts_tbUnificationFailure = "top/bot unification failure"
ts_rootFeatureMismatch :: Flist GeniVal -> String
ts_rootFeatureMismatch good = "root feature does not unify with " ++ prettyStr good
ts_semIncomplete :: [Literal GeniVal] -> String
ts_semIncomplete sem = "semantically incomplete - missing: " ++ prettyStr sem
instance NFData TagElem where
rnf (TE x1 x2 x3 x4 x5 x6 x7 x8 x9 x10)
= rnf x1 `seq`
rnf x2 `seq`
rnf x3 `seq`
rnf x4 `seq`
rnf x5 `seq`
rnf x6 `seq`
rnf x7 `seq` rnf x8 `seq` rnf x9 `seq` rnf x10 `seq` ()
instance NFData DerivationStep where
rnf (SubstitutionStep x1 x2 x3)
= rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()
rnf (AdjunctionStep x1 x2 x3)
= rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()
rnf (InitStep x1) = rnf x1 `seq` ()