module CCO.Parsing (
Symbol (describe)
, Parser
, satisfy
, eof
, sourcePos
, lexeme
, (<!>)
, choice
, opt
, chainl
, chainr
, manySepBy
, someSepBy
, parse
, parse_
) where
import CCO.Feedback (Feedback, errorMessage)
import CCO.Lexing hiding (satisfy)
import CCO.SourcePos (Source (..), Pos (..), SourcePos (..))
import CCO.Printing hiding (empty)
import qualified CCO.Printing as P (empty)
import Control.Applicative
import Data.Function (on)
import Data.List (nub)
import Prelude hiding (lex)
class Symbol s where
describe :: s -> String -> String
instance Symbol Char where
describe _ = show
data Steps a
= Done a
| Fail SourcePos (Maybe String) ([String] -> [String])
| LexFail (Maybe String) SourcePos String
| Step (Steps a)
best :: Steps a -> Steps a -> Steps a
best steps@(LexFail _ _ _) _ = steps
best _ steps@(LexFail _ _ _) = steps
best (Fail srcpos unexp acc) (Fail _ _ acc')
= Fail srcpos unexp (acc . acc')
best (Fail _ _ _) steps = steps
best steps (Fail _ _ _) = steps
best steps@(Done _) _ = steps
best _ steps@(Done _) = steps
best (Step steps) (Step steps') = Step (best steps steps')
eval :: Steps a -> Feedback a
eval (Done x) = return x
eval (Fail srcpos unexp acc) = errorMessage . pp $
UnexpectedSymbol srcpos unexp (nub (acc []))
eval (LexFail mmsg srcpos lx) = errorMessage (pp (LexicalError mmsg srcpos lx))
eval (Step steps) = eval steps
infixl 2 <!>
newtype Parser s a
= P {unP :: forall h r.
((h, a) -> Maybe ([String] -> [String]) ->
Symbols s -> Steps r) ->
h ->
Maybe ([String] -> [String]) ->
Symbols s ->
Steps r
}
instance Functor (Parser s) where
fmap f (P p) = P (\k -> p (\(h, x) -> k (h, f x)))
instance Applicative (Parser s) where
pure x = P (\k h -> k (h, x))
P p <*> P q = P (\k -> p (q (\((h, f), x) -> k (h, f x))))
instance Symbol s => Alternative (Parser s) where
empty = P $ \_ _ macc (Symbols src units) -> case units of
[] -> Fail (SourcePos src EOF) Nothing
(maybe id id macc)
Token s pos lx _ : _ -> Fail (SourcePos src pos)
(Just (describe s lx)) (maybe id id macc)
Error pos lx _ : _ -> LexFail Nothing (SourcePos src pos) lx
Msg msg pos lx _ : _ -> LexFail (Just msg) (SourcePos src pos) lx
P p <|> P q = P (\k h macc syms -> p k h macc syms `best` q k h macc syms)
satisfy :: Symbol s => (s -> Bool) -> Parser s s
satisfy test = P p
where
p _ _ macc (Symbols src [])
= Fail (SourcePos src EOF) Nothing (maybe id id macc)
p k h macc (Symbols src (Token s pos lx _ : units))
| test s = Step (k (h, s) Nothing (Symbols src units))
| otherwise = Fail (SourcePos src pos) (Just (describe s lx))
(maybe id id macc)
p _ _ _ (Symbols src (Error pos lx _ : _))
= LexFail Nothing (SourcePos src pos) lx
p _ _ _ (Symbols src (Msg msg pos lx _ : _))
= LexFail (Just msg) (SourcePos src pos) lx
eof :: Symbol s => Parser s ()
eof = P p
where
p k h _ syms@(Symbols _ [])
= Step (k (h, ()) Nothing syms)
p _ _ _ (Symbols src (Token s pos lx _ : _))
= Fail (SourcePos src pos) (Just (describe s lx)) ("end of input" :)
p _ _ _ (Symbols src (Error pos lx _ : _))
= LexFail Nothing (SourcePos src pos) lx
p _ _ _ (Symbols src (Msg msg pos lx _ : _))
= LexFail (Just msg) (SourcePos src pos) lx
sourcePos :: Parser s SourcePos
sourcePos = P p
where
p k h macc syms@(Symbols src [])
= k (h, SourcePos src EOF) macc syms
p k h macc syms@(Symbols src (Token _ pos _ _ : _))
= k (h, SourcePos src pos) macc syms
p k h macc syms@(Symbols src (Error pos _ _ : _))
= k (h, SourcePos src pos) macc syms
p k h macc syms@(Symbols src (Msg _ pos _ _ : _))
= k (h, SourcePos src pos) macc syms
lexeme :: Parser s String
lexeme = P p
where
p k h _ (Symbols src [])
= Fail (SourcePos src EOF) Nothing ("any symbol" :)
p k h macc syms@(Symbols _ (Token _ _ lx _ : _)) = k (h, lx) macc syms
p k h macc syms@(Symbols _ (Error _ lx _ : _ )) = k (h, lx) macc syms
p k h macc syms@(Symbols _ (Msg _ _ lx _ : _)) = k (h, lx) macc syms
(<!>) :: Parser s a -> String -> Parser s a
P p <!> prod = P q where
q k h Nothing = p (\h macc' syms -> k h macc' syms) h (Just (prod :))
q k h macc = p (\h macc' syms -> k h macc' syms) h macc
infixl 3 `opt`
choice :: Symbol s => [Parser s a] -> Parser s a
choice [] = empty
choice [parser] = parser
choice parsers = foldr1 (<|>) parsers
opt :: Symbol s => Parser s a -> a -> Parser s a
opt parser x = parser <|> pure x
chainl :: Symbol s => Parser s (a -> a -> a) -> Parser s a -> Parser s a
chainl op elem = foldl (flip ($)) <$> elem <*> many (flip <$> op <*> elem)
chainr :: Symbol s => Parser s (a -> a -> a) -> Parser s a -> Parser s a
chainr op elem = parser
where
parser = elem <**> (flip <$> op <*> parser `opt` id)
manySepBy :: Symbol s => Parser s b -> Parser s a -> Parser s [a]
manySepBy sep elem = (:) <$> elem <*> many (sep *> elem) <|> pure []
someSepBy :: Symbol s => Parser s b -> Parser s a -> Parser s [a]
someSepBy sep elem = (:) <$> elem <*> many (sep *> elem)
parse :: Parser s a -> Symbols s -> Feedback (a, Symbols s)
parse (P p) = eval . p (\(_, x) _ syms -> Done (x, syms)) () Nothing
parse_ :: Lexer s -> Parser s a -> Source -> String -> Feedback a
parse_ lexer parser src = fmap fst . parse parser . lex lexer src
data Diagnosis
= LexicalError (Maybe String) SourcePos String
| UnexpectedSymbol SourcePos (Maybe String) [String]
instance Printable Diagnosis where pp = ppDiagnosis
ppDiagnosis :: Diagnosis -> Doc
ppDiagnosis (LexicalError Nothing srcpos lx)
= above [ppHeader, text " ", ppUnexpected]
where
ppHeader = wrapped $
describeSourcePos srcpos ++ ": Lexical error."
ppUnexpected = text "? unexpected : " >|< text lx
ppDiagnosis (LexicalError (Just msg) srcpos _)
= wrapped (describeSourcePos srcpos ++ ": Lexical error: " ++ msg ++ ".")
ppDiagnosis (UnexpectedSymbol srcpos unexp exp)
= above [ppHeader, text " ", ppUnexpected, ppExpected]
where
ppHeader = wrapped $
describeSourcePos srcpos ++ ": Syntax error."
ppUnexpected = text "? unexpected : " >|<
wrapped (describeUnexpected unexp)
ppExpected | null exp = P.empty
| otherwise = text "? expected : " >|<
wrapped (disjunction exp)
describeSourcePos :: SourcePos -> String
describeSourcePos (SourcePos (File file) (Pos ln col))
= file ++
":line " ++ show ln ++
":column " ++ show col
describeSourcePos (SourcePos (File file) EOF) = file ++
":<at end of file>"
describeSourcePos (SourcePos Stdin (Pos ln col)) = "line " ++ show ln ++
":column " ++ show col
describeSourcePos (SourcePos Stdin EOF) = "<at end of input>"
describeUnexpected :: Maybe String -> String
describeUnexpected Nothing = "end of input"
describeUnexpected (Just sym) = sym
disjunction :: [String] -> String
disjunction [x] = x
disjunction [x, y] = x ++ " or " ++ y
disjunction [x, y, z] = x ++ ", " ++ y ++ ", or " ++ z
disjunction (x : xs) = x ++ ", " ++ disjunction xs