{-# LANGUAGE FlexibleContexts #-}
module Text.Parsec.Indent (
    -- $doc
    
    -- * Types
    IndentParser, runIndent,
    -- * Blocks
    withBlock, withBlock', block,
    -- * Indentation Checking
    indented, same, sameOrIndented, checkIndent, withPos,
    -- * Paired characters
    indentBrackets, indentAngles, indentBraces, indentParens,
    -- * Line Fold Chaining
    -- | Any chain using these combinators must used with 'withPos'
    (<+/>), (<-/>), (<*/>), (<?/>), Optional(..)
    ) where
import Text.Parsec hiding (State)
import Text.Parsec.Pos
import Text.Parsec.Token
import Control.Monad.State
import Control.Concatenative

-- $doc
-- A module to construct indentation aware parsers. Many programming
-- language have indentation based syntax rules e.g. python and Haskell.
-- This module exports combinators to create such parsers. 
-- 
-- The input source can be thought of as a list of tokens. Abstractly
-- each token occurs at a line and a column and has a width. The column
-- number of a token measures is indentation. If t1 and t2 are two tokens
-- then we say that indentation of t1 is more than t2 if the column
-- number of occurrence of t1 is greater than that of t2.
-- 
-- Currently this module supports two kind of indentation based syntactic
-- structures which we now describe:
-- 
-- [Block] A block of indentation /c/ is a sequence of tokens with
-- indentation at least /c/.  Examples for a block is a where clause of
-- Haskell with no explicit braces.
-- 
-- [Line fold] A line fold starting at line /l/ and indentation /c/ is a
-- sequence of tokens that start at line /l/ and possibly continue to
-- subsequent lines as long as the indentation is greater than /c/. Such
-- a sequence of lines need to be /folded/ to a single line. An example
-- is MIME headers. Line folding based binding separation is used in
-- Haskell as well.

-- | Indentation sensitive parser type. Usually @ m @ will
--   be @ Identity @ as with any @ ParsecT @
type IndentParser s u a = ParsecT s u (State SourcePos) a
    
-- | @ 'withBlock' f a p @ parses @ a @
--   followed by an indented block of @ p @
--   combining them with @ f @
withBlock :: (Stream s (State SourcePos) Char) => (a -> [b] -> c) ->
    IndentParser s u a -> IndentParser s u b -> IndentParser s u c
withBlock f a p = withPos $ do
    r1 <- a
    r2 <- option [] (indented >> block p)
    return (f r1 r2)

-- | Like 'withBlock', but throws away initial parse result
withBlock' :: (Stream s (State SourcePos) Char) =>
    IndentParser s u a -> IndentParser s u b -> IndentParser s u [b]
withBlock' = withBlock (flip const)

-- | Parses only when indented past the level of the reference
indented :: (Stream s (State SourcePos) Char) => IndentParser s u ()
indented = do
    pos <- getPosition
    s <- get
    if biAp sourceColumn (<=) pos s then parserFail "not indented" else do
        put $ setSourceLine s (sourceLine pos)
        return ()

-- | Parses only when indented past the level of the reference or on the same line
sameOrIndented :: Stream s (State SourcePos) Char => IndentParser s u ()
sameOrIndented = same <|> indented

-- | Parses only on the same line as the reference
same :: (Stream s (State SourcePos) Char) => IndentParser s u ()
same = do
    pos <- getPosition
    s <- get
    if biAp sourceLine (==) pos s then return () else parserFail "over one line"
    
-- | Parses a block of lines at the same indentation level
block :: (Stream s (State SourcePos) Char) => IndentParser s u a -> IndentParser s u [a]
block p = withPos $ do
    r <- many1 (checkIndent >> p)
    return r

-- | Parses using the current location for indentation reference
withPos :: (Stream s (State SourcePos) Char) => IndentParser s u a -> IndentParser s u a
withPos x = do
    a <- get
    p <- getPosition
    r <- put p >> x
    put a >> return r

-- | Ensures the current indentation level matches that of the reference
checkIndent :: (Stream s (State SourcePos) Char) => IndentParser s u ()
checkIndent = do
    s <- get
    p <- getPosition
    if biAp sourceColumn (==) p s then return () else parserFail "indentation doesn't match"

-- | Run the result of an indentation sensitive parse
runIndent :: SourceName -> State SourcePos a -> a
runIndent s = flip evalState (initialPos s)

-- | '<+/>' is to indentation sensitive parsers what 'ap' is to monads
(<+/>) :: (Stream s (State SourcePos) Char) =>
    IndentParser s u (a -> b) -> IndentParser s u a -> IndentParser s u b
a <+/> b = ap a (sameOrIndented >> b)

-- | '<-/>' is like '<+/>', but doesn't apply the function to the parsed value
(<-/>) :: (Stream s (State SourcePos) Char) =>
    IndentParser s u a -> IndentParser s u b -> IndentParser s u a
a <-/> b = liftM2 const a (sameOrIndented >> b)

-- | Like '<+/>' but applies the second parser many times
(<*/>) :: (Stream s (State SourcePos) Char) =>
    IndentParser s u ([a] -> b) -> IndentParser s u a -> IndentParser s u b
a <*/> b = ap a (many (sameOrIndented >> b))

-- | Datatype used to optional parsing
data Optional s u a = Opt a (IndentParser s u a)

-- | Like '<+/>' but applies the second parser optionally using the 'Optional' datatype
(<?/>) :: (Stream s (State SourcePos) Char) =>
    IndentParser s u (a -> b) -> (Optional s u a) -> IndentParser s u b
(<?/>) a (Opt b c) = ap a (option b (sameOrIndented >> c))

-- | parses with surrounding brackets
indentBrackets :: (Stream s (State SourcePos) Char) => GenTokenParser s u (State SourcePos) -> IndentParser s u a -> IndentParser s u a
indentBrackets lexer p = withPos $ return id <-/> symbol lexer "[" <+/> p <-/> symbol lexer "]"

-- | parses with surrounding angle brackets
indentAngles :: (Stream s (State SourcePos) Char) => GenTokenParser s u (State SourcePos) -> IndentParser s u a -> IndentParser s u a
indentAngles lexer p = withPos $ return id <-/> symbol lexer "<" <+/> p <-/> symbol lexer ">"

-- | parses with surrounding braces
indentBraces :: (Stream s (State SourcePos) Char) => GenTokenParser s u (State SourcePos) -> IndentParser s u a -> IndentParser s u a
indentBraces lexer p = withPos $ return id <-/> symbol lexer "{" <+/> p <-/> symbol lexer "}"

-- | parses with surrounding parentheses 
indentParens :: (Stream s (State SourcePos) Char) => GenTokenParser s u (State SourcePos) -> IndentParser s u a -> IndentParser s u a
indentParens lexer p = withPos $ return id <-/> symbol lexer "(" <+/> p <-/> symbol lexer ")"