{-# LANGUAGE FlexibleContexts #-} module Text.Parsec.Indent (block, lineFold, runIndent) where import Text.Parsec import Text.Parsec.Pos import Control.Monad.State import Control.Concatenative {-| A module to construct indentation aware parsers. Many programming language have indentation based syntax rules e.g. python and Haskell. This module exports combinators to create such parsers. The input source can be thought of as a list of tokens. Abstractly each token occurs at a line and a column and has a width. The column number of a token measures is indentation. If t1 and t2 are two tokens then we say that indentation of t1 is more than t2 if the column number of occurrence of t1 is greater than that of t2. Currently this module supports two kind of indentation based syntactic structures which we now describe: [Block] A block of indentation /c/ is a sequence of tokens with indentation at least /c/. Examples for a block is a where clause of Haskell with no explicit braces. [Line fold] A line fold starting at line /l/ and indentation /c/ is a sequence of tokens that start at line /l/ and possibly continue to subsequent lines as long as the indentation is greater than /c/. Such a sequence of lines need to be /folded/ to a single line. An example is MIME headers. Line folding based binding separation is used in Haskell as well. -} type IndentParserT s u m a = ParsecT s u (StateT SourcePos m) a -- | Parses a block of lines at the same indentation level block :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] block p = do a <- setStart r <- many1 (checkIndent >> p) put a return r -- | Continues parsing indented below a line lineFold :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] lineFold p = do a <- setStart r <- blockOrNewLine p put a return r blockOrNewLine :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] blockOrNewLine p = do s <- get pos <- getPosition liftM2 (:) p $ if biAp sourceLine (==) pos s then blockOrNewLine p else liftM concat (block (manyLine p)) -- | Parses many occurances of p without using more than one line manyLine :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] manyLine p = do s <- liftM ((+1) . sourceLine) get pos <- liftM sourceLine getPosition if pos == s then many (char ' ') >> return [] else liftM2 (:) p (manyLine p) setStart :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m SourcePos setStart = do a <- get p <- getPosition (put p) return a checkIndent :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m () checkIndent = do s <- get p <- getPosition if biAp sourceColumn (==) p s then return () else mzero -- | Run the result of an indentation sensative parse runIndent :: Monad m => SourceName -> StateT SourcePos m a -> m a runIndent s = flip evalStateT (initialPos s) -- for tests, assemble random indent-using functions, their inputs and outputs in tandem, then test to see they all fit as planned