------------------------------------------------------------------------ -- Precedence graphs ------------------------------------------------------------------------ {-# LANGUAGE FlexibleContexts, Rank2Types #-} module PrecedenceGraph -- * Precedence graphs. ( Name , Assoc(..) , Fixity(..) , Node , PrecedenceGraph -- * Constructing precedence graphs. , empty , bindsAs , bindsBetween , unrelated -- * Parsing expressions. , Token(..) , Expr(..) , expressionParser -- * Testing. , tests ) where import Parser import qualified Data.List as List import qualified Data.Set as Set import Data.Set (Set) import qualified Data.Map as Map import Data.Map (Map) import qualified Data.Graph.Inductive as G import Data.Graph.Inductive ((&)) import Control.Applicative hiding (empty) import Control.Monad import qualified Control.Applicative as A import qualified Control.Monad.State as S import qualified Control.Monad.Identity as I import Data.Function import Test.QuickCheck ------------------------------------------------------------------------ -- Some helper functions -- | Converts a set to a list and maps over it. mapM' :: Monad m => (a -> m b) -> Set a -> m [b] mapM' f = mapM f . Set.toList -- | An efficient variant of 'List.nub'. efficientNub :: Ord a => [a] -> [a] efficientNub = removeDups . List.sort where removeDups = map head . List.group -- Code used to test efficientNub. data IgnoreSnd a b = Pair a b deriving Show fst' :: IgnoreSnd a b -> a fst' (Pair x y) = x instance (Eq a, Eq b) => Eq (IgnoreSnd a b) where (==) = (==) `on` fst' instance (Ord a, Eq b) => Ord (IgnoreSnd a b) where compare = compare `on` fst' instance (Arbitrary a, Arbitrary b) => Arbitrary (IgnoreSnd a b) where arbitrary = liftM2 Pair arbitrary arbitrary -- | This property tests that 'efficientNub' is equivalent to 'nub', -- up to a permutation of the output. Note that the property checks -- that the two functions choose the same representative from every -- equivalence class. prop_efficientNub :: [IgnoreSnd Integer Int] -> Property prop_efficientNub xs = classify nonTriv "with non-trivial equivalence classes" $ efficientNub xs =*= List.sort (List.nub xs) where xs =*= ys = length xs == length ys && and (zipWith reallyEqual xs ys) reallyEqual (Pair x y) (Pair u v) = x == u && y == v nonTriv = any ((> 1) . length) $ map (List.nubBy reallyEqual) $ List.group $ List.sort xs ------------------------------------------------------------------------ -- Types -- | A name is a /non-empty/ list of /non-empty/ name parts. type Name = [String] -- | Associativity. data Assoc = Non | L | R deriving (Eq, Ord, Show) -- | Fixity. data Fixity = Prefix | Postfix | Infix Assoc deriving (Eq, Ord, Show) -- | Precedence graph node labels. type Node = Int -- | Precedence graphs. -- Invariant: Has to be acyclic. newtype PrecedenceGraph = PG { unPG :: G.Gr (Map Fixity (Set Name)) () } deriving Show ------------------------------------------------------------------------ -- Inspecting precedence graphs -- | The successors of a node. successors :: PrecedenceGraph -> Node -> Set Node successors g n = Set.fromList $ G.suc (unPG g) n -- | A node's annotation. annotation :: PrecedenceGraph -> Node -> Map Fixity (Set Name) annotation g n = case G.lab (unPG g) n of Nothing -> Map.empty Just ann -> ann -- | The nodes in the graph. nodes :: PrecedenceGraph -> Set Node nodes = Set.fromList . G.nodes . unPG -- | Is the graph acyclic? -- Check this by ensuring that the graph is simple (no loops) and all -- strongly connected components have size 1. acyclic :: PrecedenceGraph -> Bool acyclic (PG g) = G.isSimple g && all (\c -> length c == 1) (G.scc g) ------------------------------------------------------------------------ -- Constructing precedence graphs -- An empty precedence graph. empty :: PrecedenceGraph empty = PG G.empty -- @bindsAs op fix n g@ adds @op@ (with fixity @fix@) to node @n@. -- -- Precondition: @n@ has to exist in @g@, and @n@ should not already -- be annotated with @op@. (The second precondition is not checked.) bindsAs :: Name -> Fixity -> Node -> PrecedenceGraph -> PrecedenceGraph bindsAs op fix n (PG g) = case G.match n g of (Nothing, g') -> error "bindsAs: The node does not exist." (Just (pre, n, a, suc), g') -> PG ((pre, n, Map.adjust (Set.insert op) fix a, suc) & g') -- @bindsBetween op fix tighterThan looserThan n g@ adds a new node to -- @g@, annotated with @op@ (with fixity @fix@). Edges are added from -- all nodes in @tighterThan@, and their predecessors. Edges are also -- added to all nodes in @looserThan@, and their successors. The new -- node label is returned along with the new graph. -- -- Precondition: The resulting graph has to be acyclic. bindsBetween :: Name -> Fixity -> [Node] -> [Node] -> PrecedenceGraph -> (Node, PrecedenceGraph) bindsBetween op fixity tighterThan looserThan (PG g) -- The test for acyclicity is rather slow. | True {- acyclic g' -} = (new, g') | otherwise = error "bindsBetween: Cyclic result." where [new] = G.newNodes 1 g allLooserThan = looserThan : map (G.suc g) looserThan allTighterThan = tighterThan : map (G.pre g) tighterThan fix = map ((,) ()) . efficientNub . concat ctxt = ( fix allTighterThan , new , Map.singleton fixity (Set.singleton op) , fix allLooserThan ) g' = PG (ctxt & g) -- @unrelated op fix g@ add a fresh node to @g@, annotated with @op@ -- (with fixity @fix@). No new edges are added. unrelated :: Name -> Fixity -> PrecedenceGraph -> (Node, PrecedenceGraph) unrelated op fix = bindsBetween op fix [] [] ------------------------------------------------------------------------ -- Turning graphs into expression parsers -- | Tokens. data Token = Atom | Name String deriving (Eq, Ord, Show) -- | Expressions. data Expr = AtomE | Op String [Expr] deriving (Eq, Ord, Show) -- | The internal, mixfix part of an operator (the name, excluding outer -- '_', plus the internal expressions). type Op = (String, [Expr]) -- | The parser type used below. The state component is used to -- memoise the computation of node parsers. type P r = forall p. Parser p Node Expr Token => S.State (Map Node (p Expr)) (p r) -- | Expression parser. Parameterised on a graph describing the -- operators. expressionParser :: Parser p Node Expr Token => PrecedenceGraph -> p Expr expressionParser g = S.evalState (expr g (nodes g)) Map.empty -- | Parses a subset of the expressions. Only the nodes reachable from -- the given list of nodes are recognised by the parser. -- Note that Atom stands for applications of one or more identifiers, -- parenthesised expressions, or mixfix operators that are not prefix, -- postfix or infix. Hence the atom parser will probably be -- implemented using expressionParser. expr :: PrecedenceGraph -> Set Node -> P Expr expr g ns = do ns <- mapM' (node g) ns return $ (AtomE <$ sym Atom) <|> choice ns -- | Parser for one operator (just the internal, mixfix part). -- Note that this function uses the non-memoised (expressionParser g) -- instead of the memoised (expr g (nodes g)). The reason is that -- otherwise the memoisation is not sufficiently productive. This -- could be fixed by inserting the parsers into the memo table -- _before_ computing them, by using recursive do in "node" below. -- However, I think recursive do is too complicated. The current -- solution is easier to understand and gives roughly the same -- performance. opProd :: PrecedenceGraph -> Name -> P Op opProd g nameParts = return $ (,) (List.intercalate "_" nameParts) <$> (expressionParser g `between` map Name nameParts) -- | Parser for several operators. opProds :: PrecedenceGraph -> Set Name -> P Op opProds g ops = fmap choice (mapM' (opProd g) ops) appLeft :: Expr -> Op -> Expr appLeft e (n, es) = Op ('_' : n) (e : es) appRight :: Op -> Expr -> Expr appRight (n, es) e = Op (n ++ "_") (es ++ [e]) appBoth :: Op -> Expr -> Expr -> Expr appBoth (n, es) l r = Op ('_' : n ++ "_") (l : es ++ [r]) -- | Parser for a node. -- -- The graph typically has lots of sharing (many pointers to the same -- node), so this function is memoised. In two ways, actually: -- -- 1) The construction of the parsers is memoised. -- -- 2) If a memoising parser is used the results of parsing a given -- node at a specific position are also memoised. -- -- Note that the second memoisation is potentially unsafe, if this -- parser is combined with another memoised parser. The memoisation -- keys have to be unique. node :: PrecedenceGraph -> Node -> P Expr node g n = do memoisedP <- fmap (Map.lookup n) S.get case memoisedP of Just p -> return p Nothing -> do -- Parser for operators of higher precedence. h <- expr g (successors g n) p <- fmap (memoise n . choice) $ sequence (opParsers h) S.modify (Map.insert n p) return p where m ! k = case Map.lookup k m of Nothing -> Set.empty Just ns -> ns ops fixity f = fmap f (opProds g (annotation g n ! fixity)) -- Parser for the internal parts of operators -- of the given fixity (in this node). opParsers h = [ ops Prefix (\o -> flip (foldr appRight) <$> many1 o <*> h) , ops Postfix (\o -> foldl appLeft <$> h <*> many1 o) , ops (Infix Non) (\o -> flip appBoth <$> h <*> o <*> h) , ops (Infix L) (\o -> chainl3 h (appBoth <$> o)) , ops (Infix R) (\o -> chainr3 h (appBoth <$> o)) ] ------------------------------------------------------------------------ -- All test cases -- | All tests from this module. tests = do quickCheck prop_efficientNub