-- GenI surface realiser -- Copyright (C) 2005-2009 Carlos Areces and Eric Kow -- -- This program is free software; you can redistribute it and/or -- modify it under the terms of the GNU General Public License -- as published by the Free Software Foundation; either version 2 -- of the License, or (at your option) any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; if not, write to the Free Software -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | We use a flat semantics in GenI (bag of literals). module NLP.GenI.Semantics where import Control.Applicative ((<$>)) import Control.Arrow (first, (&&&), (***)) import Control.DeepSeq import Control.Monad.Except import Data.Binary import Data.Data import Data.Function (on) import Data.List (delete, insert, nub, sortBy) import qualified Data.Map as Map import Data.Maybe import Data.Text (Text) import qualified Data.Text as T import NLP.GenI.FeatureStructure import NLP.GenI.General (histogram, hush) import NLP.GenI.GeniShow import NLP.GenI.GeniVal import NLP.GenI.Pretty -- | A single semantic literal containing its handle, predicate, and arguments -- -- This can be paramaterised on the kinds of variables it uses, for example, -- 'GeniVal' for a semantics that you might still want to do unification on -- or 'Text' if it's supposed to be ground. data Literal gv = Literal { -- | the handle can be seen as a special kind of argument; stored separately lHandle :: gv -- | , lPredicate :: gv -- | , lArgs :: [gv] } deriving (Eq, Data, Typeable) instance Ord gv => Ord (Literal gv) where compare = compare `on` tucked where -- treat the handle as an argument tucked l = (lPredicate l, lHandle l : lArgs l) -- | A semantics is just a set of literals. type Sem = [Literal GeniVal] -- | A literal and any constraints associated with it (semantic input) type LitConstr = (Literal GeniVal, [Text]) -- | Semantics, index constraints, literal constraints -- -- The intention here is that for @(sem, icons, lcons)@ -- @all (`elem` sem) lcons@ type SemInput = (Sem,Flist GeniVal,[LitConstr]) instance Collectable a => Collectable (Literal a) where collect (Literal a b c) = collect a . collect b . collect c -- | An empty literal, not sure you should really be using this emptyLiteral :: Literal GeniVal emptyLiteral = Literal mkGAnon mkGAnon [] -- ---------------------------------------------------------------------- -- * Utility functions -- ---------------------------------------------------------------------- -- | Strip any index or literal constraints from an input. -- Use with care. removeConstraints :: SemInput -> SemInput removeConstraints (x, _, _) = (x, [], []) -- | Default sorting for a semantics sortSem :: Ord a => [Literal a] -> [Literal a] sortSem = sortBy compareOnLiteral -- | Default comparison for a literal compareOnLiteral :: Ord a => Literal a -> Literal a -> Ordering compareOnLiteral = compare -- | Sort primarily putting the ones with the most constants first -- and secondarily by the number of instances a predicate occurs -- (if plain string; atomic disjunction/vars treated as infinite) sortByAmbiguity :: Sem -> Sem sortByAmbiguity sem = sortBy (flip compare `on` criteria) sem where criteria = (constants &&& ambiguity) -- this is reverse sorting -- so high numbers come first ambiguity l = fromMaybe 0 $ do -- Maybe p <- boringLiteral l negate <$> Map.lookup p (literalCount sem) literalCount = histogram . mapMaybe boringLiteral boringLiteral = singletonVal . lPredicate -- | Anything that we would want to count the number constants in -- (as opposed to variables) class HasConstants a where -- | Number of constants constants :: a -> Int instance HasConstants GeniVal where constants g = if isConst2 g then 1 else 0 where isConst2 :: GeniVal -> Bool isConst2 x = isJust (gConstraints x) && isNothing (gLabel x) instance HasConstants a => HasConstants [a] where constants = sum . map constants instance HasConstants (Literal GeniVal) where constants (Literal h p args) = constants (h:p:args) -- ---------------------------------------------------------------------- -- Traversal -- ---------------------------------------------------------------------- instance {-# OVERLAPPING #-} DescendGeniVal a => DescendGeniVal (Literal a) where descendGeniVal s (Literal h n lp) = Literal (descendGeniVal s h) (descendGeniVal s n) (descendGeniVal s lp) -- ---------------------------------------------------------------------- -- Pretty printing -- ---------------------------------------------------------------------- instance Pretty Sem where pretty = geniShowText instance GeniShow Sem where geniShowText = squares . T.unwords . map geniShowText instance Pretty (Literal GeniVal) where pretty = geniShowText instance GeniShow (Literal GeniVal) where geniShowText (Literal h p l) = mh `T.append` geniShowText p `T.append` (parens . T.unwords . map geniShowText $ l) where mh = if hideh h then "" else geniShowText h `T.snoc` ':' hideh = maybe False isInternalHandle . singletonVal instance Pretty SemInput where pretty = geniShowText instance GeniShow SemInput where geniShowText = displaySemInput (T.unwords . map geniShowText) instance GeniShow LitConstr where geniShowText (sem, []) = geniShowText sem geniShowText (sem, cs) = geniShowText sem <> squares (T.unwords cs) -- | Helper for displaying or pretty printing a semantic input -- -- This gives you a bit of control over how each literal is -- displayed displaySemInput :: ([LitConstr] -> Text) -> SemInput -> Text displaySemInput dispLits (sem, icons, lcons) = -- CAREFUL: if you're modifying this, note that geniShowText -- can be affected T.intercalate "\n" . concat $ [ [semStuff] , [ idxStuff | not (null icons) ] ] where semStuff = geniKeyword "semantics" . squares . dispLits $ map withConstraints sem idxStuff = geniKeyword "idxconstraints" . squares $ geniShowText icons withConstraints lit = (lit, concat [ cs | (p,cs) <- lcons, p == lit ]) -- | Is a handle generated by GenI. GenI lets you write literals without -- a handle; in these cases a unique handle is generated and hidden -- from the UI. isInternalHandle :: Text -> Bool isInternalHandle = ("genihandle" `T.isPrefixOf`) -- ---------------------------------------------------------------------- -- * Subsumption -- ---------------------------------------------------------------------- -- | @x `subsumeSem` y@ returns all the possible ways to unify -- @x@ with some SUBSET of @y@ so that @x@ subsumes @y@. -- If @x@ does NOT subsume @y@, we return the empty list. subsumeSem :: Sem -> Sem -> [(Sem,Subst)] subsumeSem x y | length x > length y = [] subsumeSem x y = map (first sortSem) $ subsumeSemH x y -- | Helper for 'subsumeSem' traversal subsumeSemH :: Sem -> Sem -> [(Sem,Subst)] subsumeSemH [] [] = [ ([], Map.empty) ] subsumeSemH _ [] = error "subsumeSemH: got longer list in front" subsumeSemH [] _ = [ ([], Map.empty) ] subsumeSemH (x:xs) ys = nub $ do let attempts = zip ys $ map (hush . subsumeLiteral x) ys (y, Just (x2, subst)) <- attempts let next_xs = replace subst xs next_ys = replace subst $ delete y ys prepend = insert x2 *** appendSubst subst prepend `fmap` subsumeSemH next_xs next_ys -- | @p1 `subsumeLiteral` p2@ is the unification of @p1@ and @p2@ if -- both literals have the same arity, and the handles, predicates, -- and arguments in @p1@ all subsume their counterparts in @p2@ subsumeLiteral :: MonadUnify m => Literal GeniVal -> Literal GeniVal -> m (Literal GeniVal, Subst) subsumeLiteral l1@(Literal h1 p1 la1) l2@(Literal h2 p2 la2) = if length la1 == length la2 then do let hpla1 = h1:p1:la1 hpla2 = h2:p2:la2 (hpla, sub) <- hpla1 `allSubsume` hpla2 return (toLiteral hpla, sub) else throwError $ T.unpack $ pretty l1 <+> "does not subsume" <+> pretty l2 <+> "because they don't have the same arity" where toLiteral (h:p:xs) = Literal h p xs toLiteral _ = error "subsumeLiteral.toLiteral" -- ---------------------------------------------------------------------- -- * Unification -- ---------------------------------------------------------------------- -- | Return the list of minimal ways to unify two semantics, ie. -- where any literals that are not the product of a succesful unification -- really do not unify with anything else. unifySem :: Sem -> Sem -> [(Sem,Subst)] unifySem xs ys = map (first sortSem) $ if length xs < length ys then unifySemH xs ys else unifySemH ys xs -- | Helper traversal for 'unifySem' unifySemH :: Sem -> Sem -> [(Sem,Subst)] unifySemH [] [] = return ([], Map.empty) unifySemH [] xs = return (xs, Map.empty) unifySemH xs [] = error $ "unifySem: shorter list should always be in front: " ++ prettyStr xs unifySemH (x:xs) ys = nub $ do -- list monad for Prolog-style backtracking. let attempts = zip ys $ map (hush . unifyLiteral x) ys if all (isNothing . snd) attempts then first (x:) `fmap` unifySemH xs ys -- only include x unmolested if no unification succeeds else do (y, Just (x2, subst)) <- attempts let next_xs = replace subst xs next_ys = replace subst $ delete y ys prepend = insert x2 *** appendSubst subst prepend `fmap` unifySemH next_xs next_ys -- | Two literals unify if they have the same arity, and their -- handles, predicates, and arguments also unify unifyLiteral :: MonadUnify m => Literal GeniVal -> Literal GeniVal -> m (Literal GeniVal, Subst) unifyLiteral l1@(Literal h1 p1 la1) l2@(Literal h2 p2 la2) = if length la1 == length la2 then do let hpla1 = h1:p1:la1 hpla2 = h2:p2:la2 (hpla, sub) <- hpla1 `unify` hpla2 return (toLiteral hpla, sub) else throwError $ T.unpack $ pretty l1 <+> "does not unify with" <+> pretty l2 <+> "because they don't have the same arity" where toLiteral (h:p:xs) = Literal h p xs toLiteral _ = error "unifyLiteral.toLiteral" -- ---------------------------------------------------------------------- -- -- ---------------------------------------------------------------------- {-! deriving instance NFData Literal deriving instance Binary Literal !-} -- GENERATED START instance NFData g => NFData (Literal g) where rnf (Literal x1 x2 x3) = rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` () instance Binary g => Binary (Literal g) where put (Literal x1 x2 x3) = do put x1 put x2 put x3 get = do x1 <- get x2 <- get x3 <- get return (Literal x1 x2 x3) -- GENERATED STOP