Text/Parsec/Indent.hs

{-# LANGUAGE FlexibleContexts #-}
module Text.Parsec.Indent (block, lineFold, runIndent) where
import Text.Parsec
import Text.Parsec.Pos
import Control.Monad.State
import Control.Concatenative

{-| 

A module to construct indentation aware parsers. Many programming
language have indentation based syntax rules e.g. python and Haskell.
This module exports combinators to create such parsers. 

The input source can be thought of as a list of tokens. Abstractly
each token occurs at a line and a column and has a width. The column
number of a token measures is indentation. If t1 and t2 are two tokens
then we say that indentation of t1 is more than t2 if the column
number of occurrence of t1 is greater than that of t2.

Currently this module supports two kind of indentation based syntactic
structures which we now describe:

[Block] A block of indentation /c/ is a sequence of tokens with
indentation at least /c/.  Examples for a block is a where clause of
Haskell with no explicit braces.

[Line fold] A line fold starting at line /l/ and indentation /c/ is a
sequence of tokens that start at line /l/ and possibly continue to
subsequent lines as long as the indentation is greater than /c/. Such
a sequence of lines need to be /folded/ to a single line. An example
is MIME headers. Line folding based binding separation is used in
Haskell as well.

-}

type IndentParserT s u m a = ParsecT s u (StateT SourcePos m) a

-- | Parses a block of lines at the same indentation level
block :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a]
block p = do
    a <- setStart
    r <- many1 (checkIndent >> p)
    put a
    return r

-- | Continues parsing indented below a line
lineFold :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a]
lineFold p = do
    a <- setStart
    r <- blockOrNewLine p
    put a
    return r

blockOrNewLine :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a]
blockOrNewLine p = do
    s <- get
    pos <- getPosition
    liftM2 (:) p $ if biAp sourceLine (==) pos s then blockOrNewLine p else liftM concat (block (manyLine p))

-- | Parses many occurances of p without using more than one line
manyLine :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a]
manyLine p = do
    s <- liftM ((+1) . sourceLine) get
    pos <- liftM sourceLine getPosition
    if pos == s then many (char ' ') >> return [] else liftM2 (:) p (manyLine p)

setStart :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m SourcePos
setStart = do
    a <- get
    p <- getPosition
    (put p)
    return a

checkIndent :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m ()
checkIndent = do
    s <- get
    p <- getPosition
    if biAp sourceColumn (==) p s then return () else mzero

-- | Run the result of an indentation sensative parse
runIndent :: Monad m => SourceName -> StateT SourcePos m a -> m a
runIndent s = flip evalStateT (initialPos s)

-- for tests, assemble random indent-using functions, their inputs and outputs in tandem, then test to see they all fit as planned