-- {-# LANGUAGE Haskell2010 #-} {-# LANGUAGE EmptyCase, PostfixOperators, TupleSections, NamedFieldPuns, BangPatterns, BinaryLiterals, HexFloatLiterals, NumericUnderscores, GADTSyntax, RankNTypes, TypeApplications, PolyKinds, ExistentialQuantification, TypeOperators, ConstraintKinds, ExplicitForAll, KindSignatures, NamedWildCards, ScopedTypeVariables, FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, ConstrainedClassMethods, InstanceSigs, TypeSynonymInstances, DeriveDataTypeable, DeriveFoldable, DeriveFunctor, DeriveTraversable, StandaloneDeriving, EmptyDataDeriving, DeriveLift, DeriveGeneric #-} -- Deleted GeneralisedNewtypeDeriving, because it is not compatible with Safe -- Deleted ImportQualifiedPost, StandaloneKindSignatures, because they are not supported in ghc 8.8.1 {-# LANGUAGE MonoLocalBinds #-} -- {-# LANGUAGE ApplicativeDo #-} module ParserUnbiasedChoiceMonadEmbedding ({- $Example -} ArrowProd(..), msym, sym, lift, loc, arrowRule, arrowParser, showLoc, positionToLoc) where import Prelude hiding (id, (.)) import Control.Category(Category(..)) import Control.Arrow(Arrow(..)) import Control.Applicative(Alternative(..)) import Text.Earley(Prod, Grammar, Parser, terminal, rule, parser) import Data.Loc(L(..), Loc(..), (<-->), Pos(..)) import Data.Traversable(for) newtype ArrowProd r e t m b c = ArrowProd (Prod r e (L t) (L (b -> m c))) instance Monad m => Category (ArrowProd r e t m) where id = ArrowProd $ pure $ L NoLoc pure ArrowProd x . ArrowProd y = ArrowProd $ do { ~(L yl ya) <- y; ~(L xl xa) <- x; pure $ L (yl <--> xl) $ \v1 -> do { v2 <- ya v1; xa v2; }; } instance Monad m => Arrow (ArrowProd r e t m) where arr f = ArrowProd $ pure $ L NoLoc (\b -> pure $ f b) first (ArrowProd x) = ArrowProd $ do { ~(L l arr1) <- x; pure $ L l $ \(b, d) -> do { c <- arr1 b; pure (c, d); }; } instance Functor m => Functor (ArrowProd r e t m b) where fmap f (ArrowProd x) = ArrowProd $ do { ~(L l arr1) <- x; pure $ L l $ \b -> f <$> arr1 b; } instance Applicative m => Applicative (ArrowProd r e t m b) where pure c = ArrowProd $ pure $ L NoLoc $ \_ -> pure c ArrowProd x <*> ArrowProd y = ArrowProd $ do { ~(L xl xa) <- x; ~(L yl ya) <- y; pure $ L (xl <--> yl) $ \b -> do { xv <- xa b; yv <- ya b; pure $ xv yv; }; } instance Applicative m => Alternative (ArrowProd r e t m b) where empty = ArrowProd empty ArrowProd x <|> ArrowProd y = ArrowProd $ x <|> y -- We really need to define "many" and "some" here. Because they are special-cased in "Earley", see https://github.com/ollef/Earley/issues/55 many (ArrowProd x) = ArrowProd $ do { list <- many x; pure $ L (foldl (<-->) NoLoc $ map (\(L l _) -> l) list) $ \b -> for list $ \(L _ arr1) -> arr1 b; } some x = (:) <$> x <*> many x msym :: Applicative m => (t -> Maybe c) -> ArrowProd r e t m () c msym f = ArrowProd $ terminal $ \(L l input) -> case f input of { Just result -> Just $ L l (\() -> pure result); Nothing -> Nothing; } sym :: (Applicative m, Eq t) => t -> ArrowProd r e t m () t sym tok = msym (\input -> if input == tok then Just input else Nothing) lift :: ArrowProd r e t m (m c) c lift = ArrowProd $ pure $ L NoLoc id loc :: Applicative m => ArrowProd r e t m b c -> ArrowProd r e t m b (L c) loc (ArrowProd x) = ArrowProd $ do { ~(L l a) <- x; pure $ L l $ \b -> (L l) <$> a b; } arrowRule :: ArrowProd r e t m b c -> Grammar r (ArrowProd r e t m b c) arrowRule (ArrowProd x) = ArrowProd <$> rule x arrowParser :: (forall r. Grammar r (ArrowProd r e t m () c)) -> Parser e [L t] (m c) arrowParser g = parser $ do { ArrowProd x <- g; pure $ do { ~(L _ a) <- x; pure $ a (); }; } showLoc :: Loc -> String showLoc NoLoc = "-:1:1-1:1" showLoc (Loc (Pos file line1 col1 _) (Pos _ line2 col2 _)) = file ++ ":" ++ show line1 ++ ":" ++ show col1 ++ "-" ++ show line2 ++ ":" ++ show col2 -- | This function is buggy: if a file contains no tokens, the file name will not be returned positionToLoc :: [L a] -> Int -> Loc positionToLoc list position = if position < length list then let L l _ = list !! position in l else if position == 0 then NoLoc else let L l _ = list !! (position - 1) in l {- $Example Example. Let's parse (and calculate) such expressions: "( 1 * 2 / 3 )". We will use tokenizer similar to "words", so we place spaces between all tokens. Parens () are not mandatory around every subterm, i. e. ( 1 * 2 ) is ok and 1 * 2 is ok, too. >>> :set -XHaskell2010 >>> :set -XRecursiveDo >>> :set -XArrows >>> import Control.Arrow(returnA) >>> import Text.Earley(fullParses, Report(..)) >>> import Control.Monad(when) >>> import Language.Lexer.Applicative(Lexer, token, longest, whitespace, streamToEitherList, runLexer, LexicalError(..)) >>> import Text.Regex.Applicative(psym, string) >>> import ParserUnbiasedChoiceMonadEmbedding >>> :{ grammar :: Grammar r (ArrowProd r () String (Either String) () Int) grammar = mdo { num <- arrowRule $ msym $ \tok -> if all (`elem` ['0'..'9']) tok then Just $ read tok else Nothing; term <- arrowRule $ num <|> (sym "(" *> expr <* sym ")"); expr <- arrowRule $ term <|> ((*) <$> (expr <* sym "*") <*> term) <|> (proc () -> do { L xl x <- loc expr -< (); sym "/" -< (); L yl y <- loc term -< (); lift -< when (y == 0) $ Left $ "Division by zero. Attempting to divide this expression: " ++ showLoc xl ++ " by this expression: " ++ showLoc yl; returnA -< div x y; }); return expr; } lexer :: Lexer String lexer = mconcat [ token $ longest $ many $ psym (/= ' '), whitespace $ longest $ string " " ] :} >>> :{ case streamToEitherList $ runLexer lexer "-" "6 / 3 * 21" of { Left (LexicalError (Pos _ line col _)) -> putStrLn $ "Lexer error at " ++ show line ++ " " ++ show col; Right tokens -> case fullParses (arrowParser grammar) tokens of { ([], Report pos _ _) -> putStrLn $ "Parser error: " ++ showLoc (positionToLoc tokens pos) ++ ": no parse"; ([m], _) -> case m of { Left e -> putStrLn $ "Semantic error: " ++ e; Right x -> putStrLn $ "Result: " ++ show x; }; (_, _) -> putStrLn "Ambiguity"; }; } :} Result: 42 -}