module NLP.GenI.Tag (
Tags, TagElem(..), TagItem(..), TagSite(..),
TagDerivation, DerivationStep(..), dsChild, dsParent, dsParentSite,
ts_synIncomplete, ts_semIncomplete, ts_tbUnificationFailure,
ts_rootFeatureMismatch,
addToTags, tagLeaves, getLexeme, toTagSite,
setTidnums, plugTree, spliceTree,
mapBySem,
collect, detectSites,
) where
import Control.Applicative ((<$>), (<*>))
import qualified Data.Map as Map
import Data.Maybe (catMaybes, fromMaybe, listToMaybe,
mapMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tree
import Control.DeepSeq
import Data.FullList hiding ((++))
import Data.Generics (Data)
import Data.Typeable (Typeable)
import Text.JSON
import NLP.GenI.FeatureStructure (AvPair (..), Flist)
import NLP.GenI.General (geniBug, groupByFM, listRepNode,
preTerminals)
import NLP.GenI.GeniShow
import NLP.GenI.GeniVal (Collectable (..),
DescendGeniVal (..), GeniVal (..),
Idable (..))
import NLP.GenI.Polarity.Types (PolarityKey (..), SemPols)
import NLP.GenI.Pretty
import NLP.GenI.Semantics (Literal (..), Sem, emptyLiteral)
import NLP.GenI.TreeSchema (GNode (..), GType (..), NodeName,
isAdjConstrained,
Ptype (..), lexemeAttributes)
type Tags = Map.Map String [TagElem]
addToTags :: Tags -> String -> TagElem -> Tags
addToTags t k e = Map.insertWith (++) k [e] t
data TagSite = TagSite
{ tsName :: Text
, tsUp :: Flist GeniVal
, tsDown :: Flist GeniVal
, tsOrigin :: Text
}
deriving (Eq, Ord, Data, Typeable)
data TagElem = TE
{ idname :: Text
, ttreename :: Text
, tidnum :: Integer
, ttype :: Ptype
, ttree :: Tree (GNode GeniVal)
, tsemantics :: Sem
, tpolarities :: Map.Map PolarityKey (Int,Int)
, tinterface :: Flist GeniVal
, ttrace :: [Text]
, tsempols :: [SemPols]
}
deriving (Eq, Data, Typeable)
detectSites :: Tree (GNode GeniVal) -> ([NodeName], [NodeName], [NodeName])
detectSites t =
( sites isSub
, sites isAdj
, sites isNeither
)
where
ns = flatten t
sites match = map gnname (filter match ns)
isSub n = gtype n == Subs
isAdj n = not (isAdjConstrained n)
isNeither n = isAdjConstrained n && not (isSub n)
toTagSite :: GNode GeniVal -> TagSite
toTagSite n = TagSite (gnname n) (gup n) (gdown n) (gorigin n)
type TagDerivation = [ DerivationStep ]
data DerivationStep = SubstitutionStep Text Text Text
| AdjunctionStep Text Text Text
| InitStep Text
deriving (Show, Ord, Eq)
dsOp :: DerivationStep -> Char
dsOp (SubstitutionStep {}) = 's'
dsOp (AdjunctionStep {}) = 'a'
dsOp (InitStep {}) = 'i'
dsChild :: DerivationStep -> Text
dsChild (SubstitutionStep c _ _) = c
dsChild (AdjunctionStep c _ _ ) = c
dsChild (InitStep c) = c
dsParent :: DerivationStep -> Maybe Text
dsParent (SubstitutionStep _ p _) = Just p
dsParent (AdjunctionStep _ p _) = Just p
dsParent (InitStep _) = Nothing
dsParentSite :: DerivationStep -> Maybe Text
dsParentSite (SubstitutionStep _ _ s) = Just s
dsParentSite (AdjunctionStep _ _ s) = Just s
dsParentSite (InitStep _) = Nothing
instance JSON DerivationStep where
readJSON j = do
jo <- fromJSObject `fmap` readJSON j
let field x = maybe (fail $ "Could not find: " ++ x) readJSON
$ lookup x jo
op <- field "op"
child <- field "child"
case op of
"s" -> AdjunctionStep child <$> field "parent" <*> field "parent-node"
"a" -> SubstitutionStep child <$> field "parent" <*> field "parent-node"
"i" -> return (InitStep child)
x -> fail $ "Don't know about derivation operation '" ++ x ++ "'"
showJSON x =
JSObject . toJSObject $ [ ("op", showJSON $ dsOp x)
, ("child", showJSON $ dsChild x)
] ++ catMaybes
[ (\v -> ("parent", showJSON v)) <$> dsParent x
, (\v -> ("parent-node", showJSON v)) <$> dsParentSite x
]
instance Ord TagElem where
compare t1 t2 =
case (ttype t1, ttype t2) of
(Initial, Initial) -> compareId
(Initial, Auxiliar) -> LT
(Auxiliar, Initial) -> GT
(Auxiliar, Auxiliar) -> compareId
where compareId = compare (tidnum t1) (tidnum t2)
instance DescendGeniVal TagElem where
descendGeniVal s te =
te { tinterface = descendGeniVal s (tinterface te)
, ttree = descendGeniVal s (ttree te)
, tsemantics = descendGeniVal s (tsemantics te) }
instance DescendGeniVal TagSite where
descendGeniVal s (TagSite n fu fd o) = TagSite n (descendGeniVal s fu) (descendGeniVal s fd) o
instance Collectable TagElem where
collect t = (collect $ tinterface t) . (collect $ ttree t)
. (collect $ tsemantics t)
instance Idable TagElem where
idOf = tidnum
plugTree :: Tree NodeName
-> NodeName
-> Tree NodeName
-> Tree NodeName
plugTree male n female =
case listRepNode (const male) (nmatch n) [female] of
([r], True) -> r
_ -> geniBug oops
where
oops = "plugTree: unexpected plug failure at node " ++ T.unpack n
spliceTree :: NodeName
-> Tree NodeName
-> NodeName
-> Tree NodeName
-> Tree NodeName
spliceTree f auxT n top =
plugTree middle n top
where
bottom = fromMaybe (geniBug oops) (findSubTree n top)
middle = plugTree bottom f auxT
oops = unwords
[ "NLP.GenI.Tag.spliceTree:"
, "Unexpected adjunction failure."
, "Could not find node " ++ T.unpack n ++ " of target tree."
]
nmatch :: NodeName -> Tree NodeName -> Bool
nmatch n (Node a _) = a == n
findSubTree :: NodeName -> Tree NodeName -> Maybe (Tree NodeName)
findSubTree n n2@(Node x ks)
| x == n = Just n2
| otherwise = case mapMaybe (findSubTree n) ks of
[] -> Nothing
(h:_) -> Just h
setTidnums :: [TagElem] -> [TagElem]
setTidnums xs = zipWith (\c i -> setOrigin $ c {tidnum = i}) xs [1..]
setOrigin :: TagElem -> TagElem
setOrigin te =
te { ttree = fmap setLabel . ttree $ te }
where
setLabel g = g { gorigin = nameNumber }
nameNumber = idname te
`T.append` ":"
`T.append` (T.pack . show . tidnum) te
class TagItem t where
tgIdName :: t -> Text
tgIdNum :: t -> Integer
tgSemantics :: t -> Sem
tgTree :: t -> Tree (GNode GeniVal)
instance TagItem TagElem where
tgIdName = idname
tgIdNum = tidnum
tgSemantics = tsemantics
tgTree = ttree
mapBySem :: (TagItem t) => [t] -> Map.Map (Literal GeniVal) [t]
mapBySem ts =
let gfn t = case tgSemantics t of
[] -> emptyLiteral
(x:_) -> x
in groupByFM gfn ts
type UninflectedDisjunction = ([Text], Flist GeniVal)
tagLeaves :: TagElem -> [ (NodeName, UninflectedDisjunction) ]
tagLeaves te = [ (gnname pt, (getLexeme t, gup pt)) | (pt,t) <- preTerminals . ttree $ te ]
getLexeme :: GNode GeniVal -> [Text]
getLexeme node =
case glexeme node of
[] -> fromMaybe [gnname node] $ firstMaybe grab lexemeAttributes
lexs -> lexs
where
grab la = firstMaybe match guppy
where
match (AvPair a v) | a == la = fromFL <$> gConstraints v
match _ = Nothing
guppy = gup node
firstMaybe :: (a -> Maybe b) -> [a] -> Maybe b
firstMaybe fn = listToMaybe . mapMaybe fn
instance GeniShow TagElem where
geniShowText te = T.concat
[ "% ------------------------- ", idname te
, "\n", ttreename te, ":" , idname te
, " " , (geniShowText $ tinterface te)
, " " , (geniShowText $ ttype te)
, "\n", (geniShowText $ ttree te)
, "\n", geniKeyword "semantics" (geniShowText $ tsemantics te)
]
instance GeniShow [TagElem] where
geniShowText = T.intercalate "\n\n" . map geniShowText
instance Pretty [TagSite] where
pretty =
T.intercalate "\n " . map fn
where
fn (TagSite n t b o) = T.intercalate "/"
[ n, pretty t, pretty b, o ]
ts_synIncomplete :: Text
ts_synIncomplete = "syntactically incomplete"
ts_tbUnificationFailure :: Text -> Text
ts_tbUnificationFailure msg = "top/bot unification failure:" <+> msg
ts_rootFeatureMismatch :: Flist GeniVal -> Text
ts_rootFeatureMismatch good = "root feature does not unify with" <+> pretty good
ts_semIncomplete :: [Literal GeniVal] -> Text
ts_semIncomplete sem = "semantically incomplete - missing:" <+> pretty sem
instance NFData TagElem where
rnf (TE x1 x2 x3 x4 x5 x6 x7 x8 x9 x10)
= rnf x1 `seq`
rnf x2 `seq`
rnf x3 `seq`
rnf x4 `seq`
rnf x5 `seq`
rnf x6 `seq`
rnf x7 `seq` rnf x8 `seq` rnf x9 `seq` rnf x10 `seq` ()
instance NFData DerivationStep where
rnf (SubstitutionStep x1 x2 x3)
= rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()
rnf (AdjunctionStep x1 x2 x3)
= rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()
rnf (InitStep x1) = rnf x1 `seq` ()