{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} module Hledger.Utils.Parse ( -- * Some basic hledger parser flavours SimpleStringParser, SimpleTextParser, TextParser, -- * SourcePos SourcePos(..), mkPos, unPos, initialPos, sourcePosPretty, sourcePosPairPretty, -- * Parsers and helpers choice', choiceInState, surroundedBy, parsewith, runTextParser, rtp, parsewithString, parseWithState, parseWithState', fromparse, parseerror, showDateParseError, nonspace, isNewline, isNonNewlineSpace, restofline, eolof, spacenonewline, skipNonNewlineSpaces, skipNonNewlineSpaces1, skipNonNewlineSpaces', -- ** Trace the state of hledger parsers dbgparse, -- * More helpers, previously in Text.Megaparsec.Custom -- ** Custom parse error types HledgerParseErrorData, HledgerParseErrors, -- ** Failing with an arbitrary source position parseErrorAt, parseErrorAtRegion, -- ** Re-parsing SourceExcerpt, getExcerptText, excerpt_, reparseExcerpt, -- ** Pretty-printing custom parse errors customErrorBundlePretty, -- ** "Final" parse errors FinalParseError, FinalParseError', FinalParseErrorBundle, FinalParseErrorBundle', -- *** Constructing "final" parse errors finalError, finalFancyFailure, finalFail, finalCustomFailure, -- *** Pretty-printing "final" parse errors finalErrorBundlePretty, attachSource, -- *** Handling parse errors from include files with "final" parse errors parseIncludeFile, ) where import Control.Monad (when) import qualified Data.Text as T import Safe (tailErr) import Text.Megaparsec import Text.Printf import Control.Monad.State.Strict (StateT, evalStateT) import Data.Char import Data.Functor (void) import Data.Functor.Identity (Identity(..)) import Data.List import Data.Text (Text) import Text.Megaparsec.Char -- import Text.Megaparsec.Debug (dbg) import Control.Monad.Except (ExceptT, MonadError, catchError, throwError) import Control.Monad.Trans.Class (lift) import qualified Data.List.NonEmpty as NE import Data.Monoid (Alt(..)) import qualified Data.Set as S import Hledger.Utils.Debug (debugLevel, dbg0Msg) -- | A parser of string to some type. type SimpleStringParser a = Parsec HledgerParseErrorData String a -- | A parser of strict text to some type. type SimpleTextParser = Parsec HledgerParseErrorData Text -- XXX an "a" argument breaks the CsvRulesParser declaration somehow -- | A parser of text that runs in some monad. type TextParser m a = ParsecT HledgerParseErrorData Text m a -- class (Stream s, MonadPlus m) => MonadParsec e s m -- dbgparse :: (MonadPlus m, MonadParsec e String m) => Int -> String -> m () -- | Trace to stderr or log to debug log the provided label (if non-null) -- and current parser state (position and next input), -- if the global debug level is at or above the specified level. -- See also: Hledger.Utils.Debug, megaparsec's dbg. -- Uses unsafePerformIO. -- XXX Can be hard to make this evaluate. dbgparse :: Int -> String -> TextParser m () dbgparse level msg = when (level <= debugLevel) $ do pos <- getSourcePos next <- (T.take peeklength) `fmap` getInput let (l,c) = (sourceLine pos, sourceColumn pos) s = printf "at line %2d col %2d: %s" (unPos l) (unPos c) (show next) :: String s' = printf ("%-"++show (peeklength+30)++"s") s ++ " " ++ msg dbg0Msg s' $ return () where peeklength = 30 -- | Render a pair of source positions in human-readable form, only displaying the range of lines. sourcePosPairPretty :: (SourcePos, SourcePos) -> String sourcePosPairPretty (SourcePos fp l1 _, SourcePos _ l2 c2) = fp ++ ":" ++ show (unPos l1) ++ "-" ++ show l2' where l2' = if unPos c2 == 1 then unPos l2 - 1 else unPos l2 -- might be at end of file with a final new line -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. choice' :: [TextParser m a] -> TextParser m a choice' = choice . map try -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. choiceInState :: [StateT s (ParsecT HledgerParseErrorData Text m) a] -> StateT s (ParsecT HledgerParseErrorData Text m) a choiceInState = choice . map try surroundedBy :: Applicative m => m openclose -> m a -> m a surroundedBy p = between p p parsewith :: Parsec e Text a -> Text -> Either (ParseErrorBundle Text e) a parsewith p = runParser p "" -- | Run a text parser in the identity monad. See also: parseWithState. runTextParser, rtp :: TextParser Identity a -> Text -> Either HledgerParseErrors a runTextParser = parsewith rtp = runTextParser parsewithString :: Parsec e String a -> String -> Either (ParseErrorBundle String e) a parsewithString p = runParser p "" -- | Run a stateful parser with some initial state on a text. -- See also: runTextParser, runJournalParser. parseWithState :: Monad m => st -> StateT st (ParsecT HledgerParseErrorData Text m) a -> Text -> m (Either HledgerParseErrors a) parseWithState ctx p = runParserT (evalStateT p ctx) "" parseWithState' :: (Stream s) => st -> StateT st (ParsecT e s Identity) a -> s -> (Either (ParseErrorBundle s e) a) parseWithState' ctx p = runParser (evalStateT p ctx) "" fromparse :: (Show t, Show (Token t), Show e) => Either (ParseErrorBundle t e) a -> a fromparse = either parseerror id parseerror :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> a parseerror e = errorWithoutStackTrace $ showParseError e -- PARTIAL: showParseError :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String showParseError e = "parse error at " ++ show e showDateParseError :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tailErr $ lines $ show e) -- PARTIAL tailError won't be null because showing a parse error isNewline :: Char -> Bool isNewline '\n' = True isNewline _ = False nonspace :: TextParser m Char nonspace = satisfy (not . isSpace) isNonNewlineSpace :: Char -> Bool isNonNewlineSpace c = not (isNewline c) && isSpace c spacenonewline :: (Stream s, Char ~ Token s) => ParsecT HledgerParseErrorData s m Char spacenonewline = satisfy isNonNewlineSpace {-# INLINABLE spacenonewline #-} restofline :: TextParser m String restofline = anySingle `manyTill` eolof -- Skip many non-newline spaces. skipNonNewlineSpaces :: (Stream s, Token s ~ Char) => ParsecT HledgerParseErrorData s m () skipNonNewlineSpaces = void $ takeWhileP Nothing isNonNewlineSpace {-# INLINABLE skipNonNewlineSpaces #-} -- Skip many non-newline spaces, failing if there are none. skipNonNewlineSpaces1 :: (Stream s, Token s ~ Char) => ParsecT HledgerParseErrorData s m () skipNonNewlineSpaces1 = void $ takeWhile1P Nothing isNonNewlineSpace {-# INLINABLE skipNonNewlineSpaces1 #-} -- Skip many non-newline spaces, returning True if any have been skipped. skipNonNewlineSpaces' :: (Stream s, Token s ~ Char) => ParsecT HledgerParseErrorData s m Bool skipNonNewlineSpaces' = True <$ skipNonNewlineSpaces1 <|> pure False {-# INLINABLE skipNonNewlineSpaces' #-} eolof :: TextParser m () eolof = void newline <|> eof -- A bunch of megaparsec helpers, eg for re-parsing (formerly in Text.Megaparsec.Custom). -- I think these are generic apart from the HledgerParseError name. --- * Custom parse error types -- | Custom error data for hledger parsers. Specialised for a 'Text' parse stream. -- ReparseableTextParseErrorData ? data HledgerParseErrorData -- | Fail with a message at a specific source position interval. The -- interval must be contained within a single line. = ErrorFailAt Int -- Starting offset Int -- Ending offset String -- Error message -- | Re-throw parse errors obtained from the "re-parsing" of an excerpt -- of the source text. | ErrorReparsing (NE.NonEmpty (ParseError Text HledgerParseErrorData)) -- Source fragment parse errors deriving (Show, Eq, Ord) -- | A specialised version of ParseErrorBundle: -- a non-empty collection of hledger parse errors, -- equipped with PosState to help pretty-print them. -- Specialised for a 'Text' parse stream. type HledgerParseErrors = ParseErrorBundle Text HledgerParseErrorData -- We require an 'Ord' instance for 'CustomError' so that they may be -- stored in a 'Set'. The actual instance is inconsequential, so we just -- derive it, but the derived instance requires an (orphan) instance for -- 'ParseError'. Hopefully this does not cause any trouble. deriving instance Ord (ParseError Text HledgerParseErrorData) -- Note: the pretty-printing of our 'HledgerParseErrorData' type is only partally -- defined in its 'ShowErrorComponent' instance; we perform additional -- adjustments in 'customErrorBundlePretty'. instance ShowErrorComponent HledgerParseErrorData where showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg showErrorComponent (ErrorReparsing _) = "" -- dummy value errorComponentLen (ErrorFailAt startOffset endOffset _) = endOffset - startOffset errorComponentLen (ErrorReparsing _) = 1 -- dummy value --- * Failing with an arbitrary source position -- | Fail at a specific source position, given by the raw offset from the -- start of the input stream (the number of tokens processed at that -- point). parseErrorAt :: Int -> String -> HledgerParseErrorData parseErrorAt offset = ErrorFailAt offset (offset+1) -- | Fail at a specific source interval, given by the raw offsets of its -- endpoints from the start of the input stream (the numbers of tokens -- processed at those points). -- -- Note that care must be taken to ensure that the specified interval does -- not span multiple lines of the input source. This will not be checked. parseErrorAtRegion :: Int -- ^ Start offset -> Int -- ^ End end offset -> String -- ^ Error message -> HledgerParseErrorData parseErrorAtRegion startOffset endOffset msg = if startOffset < endOffset then ErrorFailAt startOffset endOffset msg' else ErrorFailAt startOffset (startOffset+1) msg' where msg' = "\n" ++ msg --- * Re-parsing -- | A fragment of source suitable for "re-parsing". The purpose of this -- data type is to preserve the content and source position of the excerpt -- so that parse errors raised during "re-parsing" may properly reference -- the original source. data SourceExcerpt = SourceExcerpt Int -- Offset of beginning of excerpt Text -- Fragment of source file -- | Get the raw text of a source excerpt. getExcerptText :: SourceExcerpt -> Text getExcerptText (SourceExcerpt _ txt) = txt -- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of -- the source consumed by 'p', along with the source position of this -- portion. This is the only way to create a source excerpt suitable for -- "re-parsing" by 'reparseExcerpt'. -- This function could be extended to return the result of 'p', but we don't -- currently need this. excerpt_ :: MonadParsec HledgerParseErrorData Text m => m a -> m SourceExcerpt excerpt_ p = do offset <- getOffset (!txt, _) <- match p pure $ SourceExcerpt offset txt -- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the -- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source -- position of the source excerpt. -- -- In order for the correct source file to be displayed when re-throwing -- parse errors, we must ensure that the source file during the use of -- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_' -- that generated the source excerpt 's'. However, we can usually expect -- this condition to be satisfied because, at the time of writing, the -- only changes of source file in the codebase take place through include -- files, and the parser for include files neither accepts nor returns -- 'SourceExcerpt's. reparseExcerpt :: Monad m => SourceExcerpt -> ParsecT HledgerParseErrorData Text m a -> ParsecT HledgerParseErrorData Text m a reparseExcerpt (SourceExcerpt offset txt) p = do (_, res) <- lift $ runParserT' p (offsetInitialState offset txt) case res of Right result -> pure result Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle where offsetInitialState :: Int -> s -> #if MIN_VERSION_megaparsec(8,0,0) State s e #else State s #endif offsetInitialState initialOffset s = State { stateInput = s , stateOffset = initialOffset , statePosState = PosState { pstateInput = s , pstateOffset = initialOffset , pstateSourcePos = initialPos "" , pstateTabWidth = defaultTabWidth , pstateLinePrefix = "" } #if MIN_VERSION_megaparsec(8,0,0) , stateParseErrors = [] #endif } --- * Pretty-printing custom parse errors -- | Pretty-print our custom parse errors. It is necessary to use this -- instead of 'errorBundlePretty' when custom parse errors are thrown. -- -- This function intercepts our custom parse errors and applies final -- adjustments ('finalizeCustomError') before passing them to -- 'errorBundlePretty'. These adjustments are part of the implementation -- of the behaviour of our custom parse errors. -- -- Note: We must ensure that the offset of the 'PosState' of the provided -- 'ParseErrorBundle' is no larger than the offset specified by a -- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to -- 0 (that is, the beginning of the source file), which is the -- case for 'ParseErrorBundle's returned from 'runParserT'. customErrorBundlePretty :: HledgerParseErrors -> String customErrorBundlePretty errBundle = let errBundle' = errBundle { bundleErrors = NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets bundleErrors errBundle >>= finalizeCustomError } in errorBundlePretty errBundle' where finalizeCustomError :: ParseError Text HledgerParseErrorData -> NE.NonEmpty (ParseError Text HledgerParseErrorData) finalizeCustomError err = case findCustomError err of Nothing -> pure err Just errFailAt@(ErrorFailAt startOffset _ _) -> -- Adjust the offset pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt Just (ErrorReparsing errs) -> -- Extract and finalize the inner errors errs >>= finalizeCustomError -- If any custom errors are present, arbitrarily take the first one -- (since only one custom error should be used at a time). findCustomError :: ParseError Text HledgerParseErrorData -> Maybe HledgerParseErrorData findCustomError err = case err of FancyError _ errSet -> finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet _ -> Nothing finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b finds f = getAlt . foldMap (Alt . f) --- * "Final" parse errors -- -- | A type representing "final" parse errors that cannot be backtracked -- from and are guaranteed to halt parsing. The anti-backtracking -- behaviour is implemented by an 'ExceptT' layer in the parser's monad -- stack, using this type as the 'ExceptT' error type. -- -- We have three goals for this type: -- (1) it should be possible to convert any parse error into a "final" -- parse error, -- (2) it should be possible to take a parse error thrown from an include -- file and re-throw it in the context of the parent file, and -- (3) the pretty-printing of "final" parse errors should be consistent -- with that of ordinary parse errors, but should also report the stack of -- parent files when errors are thrown from included files. -- -- In order to pretty-print a "final" parse error (goal 3), it must be -- bundled with include filepaths and its full source text. When a "final" -- parse error is thrown from within a parser, we do not have access to -- the full source, so we must hold the parse error ('FinalParseError') -- until it can be combined with the full source (and any parent file paths) -- by the parser's caller ('FinalParseErrorBundle'). data FinalParseError' e -- a parse error thrown as a "final" parse error = FinalError (ParseError Text e) -- a parse error obtained from running a parser, e.g. using 'runParserT' | FinalBundle (ParseErrorBundle Text e) -- a parse error thrown from an include file | FinalBundleWithStack (FinalParseErrorBundle' e) deriving (Show) type FinalParseError = FinalParseError' HledgerParseErrorData -- We need a 'Monoid' instance for 'FinalParseError' so that 'ExceptT -- FinalParseError m' is an instance of Alternative and MonadPlus, which -- is needed to use some parser combinators, e.g. 'many'. -- -- This monoid instance simply takes the first (left-most) error. instance Semigroup (FinalParseError' e) where e <> _ = e instance Monoid (FinalParseError' e) where mempty = FinalError $ FancyError 0 $ S.singleton (ErrorFail "default parse error") mappend = (<>) -- | A type bundling a 'ParseError' with its full source text, filepath, -- and stack of include files. Suitable for pretty-printing. -- -- Megaparsec's 'ParseErrorBundle' type already bundles a parse error with -- its full source text and filepath, so we just add a stack of include -- files. data FinalParseErrorBundle' e = FinalParseErrorBundle' { finalErrorBundle :: ParseErrorBundle Text e , includeFileStack :: [FilePath] } deriving (Show) type FinalParseErrorBundle = FinalParseErrorBundle' HledgerParseErrorData --- * Constructing and throwing final parse errors -- | Convert a "regular" parse error into a "final" parse error. finalError :: ParseError Text e -> FinalParseError' e finalError = FinalError -- | Like megaparsec's 'fancyFailure', but as a "final" parse error. finalFancyFailure :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => S.Set (ErrorFancy e) -> m a finalFancyFailure errSet = do offset <- getOffset throwError $ FinalError $ FancyError offset errSet -- | Like 'fail', but as a "final" parse error. finalFail :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a finalFail = finalFancyFailure . S.singleton . ErrorFail -- | Like megaparsec's 'customFailure', but as a "final" parse error. finalCustomFailure :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom --- * Pretty-printing "final" parse errors -- | Pretty-print a "final" parse error: print the stack of include files, -- then apply the pretty-printer for parse error bundles. -- Note that 'attachSource' must be used on a "final" parse error before it can be pretty-printed. finalErrorBundlePretty :: FinalParseErrorBundle' HledgerParseErrorData -> String finalErrorBundlePretty bundle = concatMap showIncludeFilepath (includeFileStack bundle) <> customErrorBundlePretty (finalErrorBundle bundle) where showIncludeFilepath path = "in file included from " <> path <> ",\n" -- | Attach a filepath and source text to a "final" parse error so that it can be pretty-printed. -- You must ensure that you provide the appropriate source text and filepath. attachSource :: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e attachSource filePath sourceText finalParseError = case finalParseError of -- A parse error thrown directly with the 'FinalError' constructor -- requires both source and filepath. FinalError err -> let bundle = ParseErrorBundle { bundleErrors = err NE.:| [] , bundlePosState = initialPosState filePath sourceText } in FinalParseErrorBundle' { finalErrorBundle = bundle , includeFileStack = [] } -- A 'ParseErrorBundle' already has the appropriate source and filepath -- and so needs neither. FinalBundle peBundle -> FinalParseErrorBundle' { finalErrorBundle = peBundle , includeFileStack = [] } -- A parse error from a 'FinalParseErrorBundle' was thrown from an -- include file, so we add the filepath to the stack. FinalBundleWithStack fpeBundle -> fpeBundle { includeFileStack = filePath : includeFileStack fpeBundle } --- * Handling parse errors from include files with "final" parse errors -- | Parse an include file with the given parser and initial state, -- discarding the resulting state, -- and re-throwing any parse errors as final parse errors with the file's info attached. parseIncludeFile :: Monad m => StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a -> st -> FilePath -> Text -> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a parseIncludeFile parser initialState filepath text = catchError parser' handler where parser' = do eResult <- lift $ lift $ runParserT (evalStateT parser initialState) filepath text case eResult of Left parseErrorBundle -> throwError $ FinalBundle parseErrorBundle Right result -> pure result -- Attach source and filepath of the include file to its parse errors handler e = throwError $ FinalBundleWithStack $ attachSource filepath text e --- * Helpers -- | Like megaparsec's 'initialState', but instead for 'PosState'. -- Used when constructing 'ParseErrorBundle's. -- The values for "tab width" and "line prefix" are taken from 'initialState'. initialPosState :: FilePath -> Text -> PosState Text initialPosState filePath sourceText = PosState { pstateInput = sourceText , pstateOffset = 0 , pstateSourcePos = initialPos filePath , pstateTabWidth = defaultTabWidth , pstateLinePrefix = "" }