{- Haskell module for constructing indentation aware parser combinators. Copyright (C) 2007 Piyush P Kurur, This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {-| A module to construct indentation aware parsers. Many programming language have indentation based syntax rules e.g. python and Haskell. This module exports combinators to create such parsers. The input source can be thought of as a list of tokens. Abstractly each token occurs at a line and a column and has a width. The column number of a token measures is indentation. If t1 and t2 are two tokens then we say that indentation of t1 is more than t2 if the column number of occurrence of t1 is greater than that of t2. Currently this module supports two kind of indentation based syntactic structures which we now describe: [Block] A block of indentation /c/ is a sequence of tokens with indentation at least /c/. Examples for a block is a where clause of Haskell with no explicit braces. [Line fold] A line fold starting at line /l/ and indentation /c/ is a sequence of tokens that start at line /l/ and possibly continue to subsequent lines as long as the indentation is greater than /c/. Such a sequence of lines need to be /folded/ to a single line. An example is MIME headers. Line folding based binding separation is used in Haskell as well. Warning: Internally indentations are implemented using Parser states. If one wants to use parser states as well then use the 'getState' and 'setState' functions exported by this module instead of those exported from the parsec library. Also use the 'parseTest' and 'runParser' function exported from this module instead of the one exported from Parsec. -} module Text.ParserCombinators.Parsec.IndentParser ( -- * Parser type IndentParser, -- * Parser Combinators. -- $combinators indentParser, noIndent, block, lineFold, betweenOrBlock, betweenOrLineFold, -- * Primitive Parsers -- $prim IndentMode(..), IndentState, state, indentMode, saveIndentMode, getIndentMode, setIndentMode, -- * User state manipulation. -- $state getState, setState, -- * Testing and Running. -- $run runParser, parseTest, -- ) where -- The functions @getState@ @setState@ and parseTest for indentation -- Parsers are exported by this module. Hence they are hidden. import Text.ParserCombinators.Parsec hiding ( getState, setState, parseTest, runParser ) import Text.ParserCombinators.Parsec.Pos import qualified Text.ParserCombinators.Parsec.Prim as PP import Control.Monad(fmap) getStatePrim = PP.getState setStatePrim = PP.setState runParserPrim = PP.runParser {-| An indentation aware parser. The parser should be of this type to make it possible to parse indentation based grammatical structure. -} type IndentParser tok st a = GenParser tok (IndentState st) a {- $combinators The module exports three combinators are @'indentParser'@, @'block'@ and @'lineFold'@. To construct parsers for indentation based grammars one typically applies the @'indentParser'@ to all tokenisers. In conjunction with @'Text.ParserCombinators.Parsec.Token'@ module, one would want to apply indentParser to all the fields of the 'Text.ParserCombinators.Parsec.Token.TokenParser' record except @'Text.ParserCombinators.Parsec.Token.whiteSpace'@. A block can then be parsed using the combinator @'block'@ and a line fold using @'lineFold'@. To generate indentation aware tokeniser from the corresponding 'Text.ParserCombinators.Parsec.Language.LanguageDef' record see the module @'Text.ParserCombinators.Parsec.IndentToken'@. -} {-| The combinator indentParser makes its input parser indentation aware. Usually one would want to make all the tokenisers indentation aware. -} indentParser :: IndentParser tok st a -> IndentParser tok st a indentParser p = do indMode <- getIndentMode case indMode of NoIndent -> p -- Block c -> blockP c p LineFold l c -> lineP l c p where blockP c p = do col <- column if col >= c then p else pzero lineP l c p = do col <- column if col > c then p else do ln <- line if ln == l then p else pzero {-| The parser @`noIndent` p@ runs @p@ ignoring any indentation based structure. This can be used to parse for example an explicitly braced where clause in Haskell. -} noIndent :: IndentParser tok st a -> IndentParser tok st a noIndent p = saveIndentMode $ do {setIndentMode NoIndent; p} {-| The parser @'block' p@ parses a /block/ of @p@ with the block indentation being the current column number. -} block :: IndentParser tok st a -> IndentParser tok st a block p = saveIndentMode $ do col <- column setIndentMode $ Block col p {-| The parser @lineFold p@ parses a folded line of @p@. The current line is the starting line. The indentation of the line depends on where in the source we are. If we are in a block then the indentation is the indentation of the block. Otherwise the current column is the indentation. -} lineFold :: IndentParser tok st a -> IndentParser tok st a lineFold p = saveIndentMode $ do ln <- line indMode <- getIndentMode case indMode of NoIndent -> do col <- column setIndentMode $ LineFold ln col Block c -> setIndentMode $ LineFold ln c p {-| The parser @betweenOrBlock open close p@ parses @p@ between @open@ and @close@. If open is matched @p@ is parsed in 'NoIndent' mode otherwise a block @p@ is parsed in 'Block' mode. For eg. the parser for parsing haskell where clause would look like > whereClause = do reserved where; betweenOrBlock bindings -} betweenOrBlock :: IndentParser tok st open -> IndentParser tok st close -> IndentParser tok st a -> IndentParser tok st a betweenOrBlock left right p = do left; x <- noIndent p; right; return x <|> block p {-| Similar to betweenOrBlock but uses lineFold instead of block. -} betweenOrLineFold :: IndentParser tok st open -> IndentParser tok st close -> IndentParser tok st a -> IndentParser tok st a betweenOrLineFold left right p = do left; x <- noIndent p; right; return x <|> lineFold p -- | This returns the current line number line :: GenParser tok st Line line = fmap sourceLine getPosition -- | This returns the current column number column :: GenParser tok st Column column = fmap sourceColumn getPosition {- $prim We now describe the primitives that are used to build the combinators @block@, @noIndent@ and @lineFold@. An indentation parser can be in one of the following modes: [@NoIndent@] In this mode the parser ignores all indentation constraints. All tokens regardless of their indentation are accepted. [@Block c@] In this mode a parser accepts only tokens which have indentation at least @c@. A parser parsing a block that is indented more than @c@ columns will be this mode. [@LineFold l c@] In this mode a parser accepts tokens as long as it is in the current line or is indented more than @c@. When parsing a folded line starting at @l@ and indentation more than @c@ the parser will be in this mode. -} data IndentMode = NoIndent | Block Column | LineFold Line Column deriving (Show, Eq) -- | This parser returns the current indentation mode. getIndentMode :: IndentParser tok st IndentMode getIndentMode = fmap indentMode getStatePrim -- | This parser sets the current indentation mode setIndentMode :: IndentMode -> IndentParser tok st () setIndentMode ind = do indState <- getStatePrim PP.setState $ indState {indentMode = ind} {-| The parser @saveIndentMode p@ saves the current indentation mode and returns the result of running @p@. It restores back the old indentation once @p@ has finished executing. -} saveIndentMode :: IndentParser tok st a -> IndentParser tok st a saveIndentMode p = do indMode <- getIndentMode x <- p setIndentMode indMode return x {- $state Indentation awareness is built into indentation parser by using these parser states. To distinguish it from the actual user defined state we call the former the indentation state and the later the user state. -} -- | The parser state used by Indentation Parsers. data IndentState st = IndentState { state :: st, -- the actual state indentMode :: IndentMode -- the indentation mode } -- | Gets the current user state. Use this instead of the one exported -- from Parsec module getState :: IndentParser tok st st getState = fmap state getStatePrim {-| This parser sets the current state of the parser to the given input state. Use this function instead of the one exported by the parsec library. -} setState :: st -> IndentParser tok st () setState st = do indState <- getStatePrim PP.setState indState {state = st} {- $run The most generic way to run an IndentParser. Use @parseTest@ for testing your parser instead. -} runParser :: IndentParser tok st a -- ^ the parser to be run -> st -- ^ the initial state -> IndentMode -- ^ the indentation mode -> SourceName -- ^ the source file name -> [tok] -- ^ the list of tokens -> Either ParseError a -- ^ the result of parsing runParser p st imode = runParserPrim p (IndentState st imode) {-| This is the function analogues to parseTest of the Parsec module. Given an indent parser @p :: IndentParser tok () a@ and a list of tokens it runs the parser and prints the result. -} parseTest :: Show a => IndentParser tok () a -> [tok] -> IO () parseTest p input = case result of Left err -> do putStr "Error"; print err Right a -> do print a where result = runParser p () NoIndent "" input