{-# LANGUAGE DeriveDataTypeable, OverloadedStrings, FlexibleInstances #-} {-# LANGUAGE ViewPatterns, TupleSections #-} -- | Module : NLP.Antfarm.History -- Copyright : 2012 Eric Kow (Computational Linguistics Ltd.) -- License : BSD3 -- Maintainer : eric.kow@gmail.com -- Stability : experimental -- Portability : portable -- -- Discourse history tracking module NLP.Antfarm.History where import Control.Applicative ((<$>)) import Data.List ( foldl', elemIndex, delete ) import Data.Maybe ( isJust, mapMaybe ) import Data.Text ( Text ) import Data.Tree ( Tree(..), flatten ) import Prelude hiding ( lex ) import qualified Data.Set as Set import qualified Data.Map as Map import NLP.Antfarm.Refex -- | A 'RefGroup' is considered to refer exactly to its indices if it -- has no bounds information or examples associated with it. isExact :: RefGroup -> Bool isExact rg = rgBounds rg == emptyBounds -- | A discourse unit that would refer to just an element mkSingletonDu :: RefKey -> DiscourseUnit mkSingletonDu (c,i) = Node (RefGroup c (Set.singleton i) emptyBounds) [] {- Note [discourse tree] ~~~~~~~~~~~~~~~~~~~~~ Our notion of a discourse unit is a tree of a RefGroup and its examples (and their examples, etc). When we enter such a unit into the history, we also enter all of the examples as separate entries at the same time. This facilitates queries on the units, and also affects the counts. For example, if we say “two atoms (a carbon and an oxygen)” we can consider the oxygen to have already beeen referenced once so when we bring up another oxygen, we say “another oxygen” accounting for the fact that oxygen had already been mentioned once -} instance Ord (Tree RefGroup) where compare (Node r1 ks1) (Node r2 ks2) = case compare r1 r2 of EQ -> compare ks1 ks2 x -> x data RefHistory = RefHistory { -- | How many times a 'DiscourseUnit' has been mentioned rhCount :: Map.Map DiscourseUnit RefCount -- | For each class: an ordering of indices that reflects what -- ordinal expression should be used for them (if at all) -- -- So @[c8,c3,c4]@ means -- -- c8: the first -- c3: the second -- c4: the third , rhOrder :: Map.Map Text [Text] } type RefCount = Int plusRefCount :: RefCount -> RefCount -> RefCount plusRefCount = (+) -- | Discourse history without any objects emptyHistory :: RefHistory emptyHistory = RefHistory Map.empty Map.empty -- ---------------------------------------------------------------------- -- building the discourse history -- ---------------------------------------------------------------------- -- See note `discourse tree' -- -- | Take note of the fact that these discourse units have been mentioned -- (again) in the history. -- -- You probably want to realise the units first, then add them to the -- history. addToHistory :: [DiscourseUnit] -> RefHistory -> RefHistory addToHistory ks st = st { rhCount = foldl' plusOne (rhCount st) (concatMap subtrees ks) , rhOrder = foldl' addOrdinal (rhOrder st) (concatMap duSingletons ks) } where plusOne m k = Map.insertWith' plusRefCount k 1 m addOrdinal m (c,i) = Map.insertWith' append c [i] m where append new old = if all (`elem` old) new then old else old ++ new -- | Individuals mentioned in a discourse unit -- (see 'refSingleton') duSingletons :: DiscourseUnit -> [RefKey] duSingletons = mapMaybe refSingleton . flatten -- | A 'refSingleton' is an instance that appears by itself in a 'RefGroup' -- without other items or constraints that imply that there could be other -- items refSingleton :: RefGroup -> Maybe RefKey refSingleton (RefGroup c is bnds) = case Set.toList is of [i] -> if kosher bnds then Just (c,i) else Nothing _ -> Nothing where -- either no bounds information, or “exactly 1" are acceptable kosher (Bounds [] Nothing Nothing) = True kosher (Bounds [] (Just 1) (Just 1)) = True kosher _ = False -- | If a RefGroup has explicit constraints, augment them with the -- implicit constraints that arise from treating each item -- as evidence of an at least constraint -- -- It's a good idea to run this once when building 'RefGroup's, -- but you may also decide that this sort of behaviour is not -- desirable for your application, so it's off by default noteImplicitBounds :: RefGroup -> RefGroup noteImplicitBounds rg = if implicits > 0 then rg { rgBounds = bounds2 } else rg where bounds1 = rgBounds rg bounds2 = case bLower bounds1 of Just l -> bounds1 { bLower = Just (max l implicits) } Nothing -> if isJust (bUpper bounds1) then bounds1 { bLower = Just implicits } else bounds1 implicits = Set.size (rgIdxes rg) -- ---------------------------------------------------------------------- -- query -- ---------------------------------------------------------------------- -- | @hasDistractorGroup st k@ returns whether or not the discourse history -- @st@ contains a group with distractors to @k@. -- -- See 'distractorGroups' for more details hasDistractorGroup :: RefHistory -> RefKey -> Bool hasDistractorGroup st k = not . null $ distractorGroups st k -- | @distractorGroups st k@ returns all the distractor groups for @k@ -- in the discourse history. -- -- A distractor is defined (here) as something that has the the same class -- as @k@ but a different index. distractorGroups :: RefHistory -> RefKey -> [DiscourseUnit] distractorGroups st (c, i) = filter distracting (Map.keys (rhCount st)) where distracting = not . safe safe (Node rg2 _) = c /= rgClass rg2 || i `Set.member` rgIdxes rg2 || isClasswide rg2 -- | @hasSupersetMention st g@ returns whether or not the discourse history -- contains a group that includes all members of @g@ -- -- Note that if a group has already occured in the discourse history, this -- returns a True (ie. not a strict superset) hasSupersetMention :: RefHistory -> DiscourseUnit -> Bool hasSupersetMention st k = not . Map.null . rhCount $ supersetMentions k st -- | @supersetMentions g st@ returns the portion of discourse history @st@ -- in which all groups are supersets of @g@ (inclusive, not strict super) supersetMentions :: DiscourseUnit -> RefHistory -> RefHistory supersetMentions (Node g _) h = h { rhCount = Map.filterWithKey hasK (rhCount h) } where hasK (Node g2 _) _ = rgClass g == rgClass g2 && rgIdxes g `Set.isSubsetOf` rgIdxes g2 -- | @lastMention st k@ returns the number of times @k@ has been mentioned lastMention :: RefHistory -> RefKey -> Int lastMention st (c,i) = sum . Map.elems . rhCount $ supersetMentions (mkSingletonDu (c,i)) st -- | @lastMention st g@ returns the number of times the group @g@ has been -- mentioned lastMentions :: RefHistory -> DiscourseUnit -> Int lastMentions st k = Map.findWithDefault 0 k (rhCount st) isFirstMention :: RefHistory -> RefKey -> Bool isFirstMention st k = lastMention st k == 0 -- ---------------------------------------------------------------------- -- subtle queries -- ---------------------------------------------------------------------- -- | If it makes sense to refer to a key using an ordinal expression, -- the order we should assign it (Nothing if we either can't sensibly -- assign one, or the history does not give us enough information to -- do so) mentionOrder :: RefHistory -> RefKey -> Maybe Int mentionOrder rh (c,i) = if isOnlySingletons rh then case Map.lookup c (rhOrder rh) of Just is | length is > 1 -> (+ 1) <$> elemIndex i is _ -> Nothing else Nothing where -- the c's never appear with others of their own kind in the same -- 'RefGroup' isOnlySingletons = not . any isMultiMatch . Map.keys . rhCount isMultiMatch (Node g _) = c == rgClass g && Set.size (rgIdxes g) > 1 -- | Is a subset of a previously mentioned group @g@ where there are no -- distractors to @g@ in the discourse history hasTidyBackpointer :: RefHistory -> DiscourseUnit -> Bool hasTidyBackpointer st du@(Node rg _) = not (any (hasDistractorGroup st) keys) && lastMentions st du == 0 && hasSupersetMention st du where keys = [ (rgClass rg, idx) | idx <- Set.toList (rgIdxes rg) ] -- | @isTheOther st k@ returns whether or not there is a two-member group in -- the discourse history which @k@ is a member of such that the other -- member has already been mentioned as a part of a singleton group. -- -- The idea is that if you have said "one of the X", you will want to say -- "the other X" for the other member of that group isTheOther :: RefHistory -> RefKey -> Bool isTheOther st (c,i) = any isBuddy $ Map.keys (rhCount st) where isBuddy (Node g2 []) | isExact g2 && c == rgClass g2 = -- the other instance was mentioned at least once case getBuddy (rgIdxes g2) of Just b -> lastMentions st (mkSingletonDu (c,b)) >= 1 Nothing -> False isBuddy _ = False -- getBuddy (Set.toList -> idx) | length idx == 2 = case delete i idx of [b] -> Just b -- must be *exactly* one other item _ -> Nothing getBuddy _ = Nothing -- | Is the class itself, not any individual entity within that class -- ie. “ants” instead of “an ant” or “some ants” -- -- By convention, any group which containts no indices or constraints -- is considered to be classwide. isClasswide :: RefGroup -> Bool isClasswide rg = Set.null (rgIdxes rg) && isExact rg -- ---------------------------------------------------------------------- -- odds and ends -- ---------------------------------------------------------------------- -- | Like 'flatten', but returns whole subtrees instead of -- just nodes: -- -- > a(b c(d e(f g)) h) -- > b -- > c(d e(f g)) -- > d -- > e(f g) -- > f -- > g -- > h -- -- Invariant: @map rootLabel (subtrees x) == flatten x@ subtrees :: Tree a -> [Tree a] subtrees t = grab t [] where grab st@(Node _ ts) xs = st : foldr grab xs ts mkLeaf :: a -> Tree a mkLeaf x = Node x [] fst3 :: (a, b, c) -> a fst3 (x, _, _) = x snd3 :: (a, b, c) -> b snd3 (_, y, _) = y thd3 :: (a, b, c) -> c thd3 (_, _, z) = z