Text/ParserCombinators/Parsec/IndentParser/Token.hs

{-# OPTIONS_GHC -fglasgow-exts #-}

{-

Haskell module for constructing indentation aware parser combinators.
Copyright (C) 2007  Piyush P Kurur, <http://www.cse.iitk.ac.in/~ppk>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
  

-}

{-|

A module for constructing indentation aware tokeniser that can be used
in conjuction with @'Text.ParserCombinators.Parsec.Token'@. All the
combinator takes a @'Text.ParserCombinators.Parsec.Token.TokenParser'@
as its first argument. For every field @foo@ of
@'Text.ParserCombinators.Parsec.Token.TokenParser'@ this module
exports a combinator @foo@. To define a tokeniser for an indentation
based language a user first defines the appropriate
@'Text.ParserCombinators.Parsec.Language.LanguageDef'@ record, applies
the combinator @'Text.ParserCombinators.Parsec.Token.makeTokenParser'@
to get a @'Text.ParserCombinators.Parsec.Token.TokenParser'@ record
say @tokP@ and then, instead of selecting the field @foo@ of @tokP@,
applies the combinator @foo@ exported from this module to @tokP@. The
semantics of the combinator @foo@ is essentially same as that of the
field @foo@ of @'Text.ParserCombinators.Parsec.Token.TokenParser'@ but
the returned parsers are indentation aware. Apart from these there are
certain new combinators that are defined specifically for parsing
certain indentation based syntactic constructs. (We have not defined
squares use brackets instead)


There are two important classes of parser combinator exported by this
module:

[Grouping Parser Combinator] A grouping parser combinator takes as
input a parser say @p@ and returns a parser that parses @p@ between
two /grouping delimiters/.  There are three flavours of grouping
parsers: @foo@, @fooOrBlock@ and @fooOrLineFold@ where @foo@ can be
one of @angles@, @braces@, @parens@, @brackets@. To illustrate we take
@foo@ to be @braces@. The parser @'braces' tokP p@ parses @p@
delimited by '{' and '}'. In this case @p@ does not care about
indentation (i.e. the parser @p@ is run in
@'Text.ParserCombinators.Parsec.IndentParser.NoIndent'@ mode). The
parser @'bracesOrBlock' tokP p@ is like @braces tokP p@ but if no
explicit delimiting braces are given parses @p@ within an indented
block. Similarly @'bracesOrLineFold' tokP p@ parses @p@ between '{'
and '}' and uses line fold when no explicit braces are given. These
can be two varients can be defined as follows

> bracesOrBlock tokP p    = braces tokP p <|> block p
> bracesOrLineFold tokP p = braces tokP p <|> lineFold p


[Seperator Parser Combinator] A seperator parser combinator takes as
input a parser say @p@ and returns a parser that parses a list of @p@
seperated by a seperator. The module exports the combinators @fooSep@,
@fooSep1@, @fooOrNewLineSep@ and @fooOrNewLineSep1@, where @foo@ is
either @semi@ (in which case the seperator is a semicolon ';') or
@comma@ (in which case the seperator is a comma ',').


To illustrate the use of this module we now give, as an incomplete
example, a parser that parses a where clause in Haskell which
illustrates the use of this module.

 
>   import qualified Text.ParserCombinators.Parsec.Language as L
>   import qualified Text.ParserCombinators.Parsec.Toke as T
>   import qualified Text.ParserCombinator.Parsec.IndentToken as IT


>   tokP = T.makeTokenParser L.haskellDef
>   semiOrNewLineSep = IT.semiOrNewLineSep tokP
>   bracesOrBlock = IT.bracesOrBlock tokP
>   identifier = IT.identifier tokP
>   ....
>   symbol = IT.symbol tokP

>   binding = semiOrNewLineSep bind
>   bind    = do id <- identifier
>                symbol (char '=')
>                e <- expr
>                return (id,e)
>  whereClause = do reserved "where"; braceOrBlock binding


-}


module Text.ParserCombinators.Parsec.IndentParser.Token
    ( 
     -- * Types
     IndentCharParser,
     TokenParser,
     -- * Combinators
     identifier, reserved,
     operator, reservedOp,
     charLiteral, stringLiteral,
     natural, integer, float, naturalOrFloat,
     decimal, hexadecimal, octal,
     semi, colon, dot, comma,
     lexeme, symbol, whiteSpace,
     -- * Separator parser combinators
     semiSep, semiSep1, commaSep, commaSep1,
     semiOrNewLineSep, semiOrNewLineSep1,
     commaOrNewLineSep, commaOrNewLineSep1,
     -- * Grouping parser combinator
     parens,parensOrBlock, parensOrLineFold,
     braces, bracesOrBlock, bracesOrLineFold,
     angles, anglesOrBlock, anglesOrLineFold,
     brackets, bracketsOrBlock, bracketsOrLineFold
    ) where

import Text.ParserCombinators.Parsec hiding (
                                             getState,
                                             setState,
                                             parseTest,
                                             runParser,
                                             parse,
                                             parseFromFile
                                            )

import qualified Text.ParserCombinators.Parsec.Token as T
import qualified Text.ParserCombinators.Parsec.Language as L
import Text.ParserCombinators.Parsec.IndentParser
import Text.ParserCombinators.Parsec.IndentParser.Prim (IndentState)


type TokenParser st = T.TokenParser (st,IndentState)





{-| Indentation aware parser to match a valid identifier of the
language.  -}

identifier :: TokenParser st -> IndentCharParser st String
identifier = indentParser . T.identifier


{-| Indentation aware parser to match a reserved word of the language.
-}

reserved :: TokenParser st ->
            String  -- ^ The reserved word to be matched.
         -> IndentCharParser st ()
reserved tokP = indentParser . T.reserved tokP


{-| The parser @reserved tokP keyword@ parses  the reserved word keyword.
 The string keyword should have been declared as a reserved word in the 
 @'Text.ParserCombinator.Parserc.Language.LanguageDef'@
                    -record.  -}

operator :: TokenParser st -> IndentCharParser st String
operator = indentParser . T.operator

{-|
  Indentation aware parser to match a reserved operator of the language.
-}

reservedOp :: TokenParser st ->
              String
              -- ^ The reserved operator to be matched. Should have
              -- been declared as a reserved operator in the 
              -- @'Text.ParserCombinator.Parserc.Language.LanguageDef'@
              -- record.
              -> IndentCharParser st ()

reservedOp tokP = indentParser . T.reservedOp tokP





{-| Indentation aware parser to match a character literal (the syntax
is assumend to be that of Hasekell which matches that of most
programming language).
-}

charLiteral :: TokenParser st -> IndentCharParser st Char
charLiteral = indentParser . T.charLiteral



{-| Indentation aware parser to match a string literal (the syntax is
assumend to be that of Hasekell which matches that of most programming
language).
-}

stringLiteral :: TokenParser st -> IndentCharParser st String
stringLiteral = indentParser . T.stringLiteral


{-| Indentation aware parser to match a natural number.  -}

natural :: TokenParser st -> IndentCharParser st Integer
natural = indentParser . T.natural

{-|
  Indentation aware parser to match an integer.
-}

integer :: TokenParser st -> IndentCharParser st Integer
integer = indentParser . T.integer

{-| Indentation aware parser to match a floating point number.  -}

float :: TokenParser st -> IndentCharParser st Double 
float = indentParser . T.float

{-| Indentation aware parser to match either a natural number or
Floating point number.  -}

naturalOrFloat :: TokenParser st
               -> IndentCharParser st (Either Integer Double)
naturalOrFloat = indentParser . T.naturalOrFloat

-- | Indentation aware parser to match an integer in decimal.


decimal  :: TokenParser st -> IndentCharParser st Integer
decimal = indentParser . T.decimal

{-| Indentation aware parser to match an integer in hexadecimal.-}

hexadecimal :: TokenParser st -> IndentCharParser st Integer
hexadecimal = indentParser . T.hexadecimal 

{-| 

Indentation aware parser to match an integer in ocatal.

-}

octal  :: TokenParser st -> IndentCharParser st Integer
octal = indentParser . T.octal


{-| Matches a semicolon and returns ';'.  -}


semi :: TokenParser st -> IndentCharParser st String
semi = indentParser . T.semi

{-| Matches a comma and returns ",".  -}

comma :: TokenParser st -> IndentCharParser st String
comma = indentParser . T.comma

{-| Matches a colon and returns ":". -}
colon :: TokenParser st -> IndentCharParser st String
colon = indentParser . T.colon

{-| Matches a dot and returns "." -}
dot   :: TokenParser st -> IndentCharParser st String
dot = indentParser . T.dot


{-| Given an indentation aware parser @p@ as argument @semiSep tokP@
  returns a parser that parses zero or more occurances of @p@
  seperated by semicolon (';') -}

semiSep :: TokenParser st 
        -> IndentCharParser st a 
        -> IndentCharParser st [a]

semiSep tokP p = p `sepBy` semicolon
    where semicolon = semi tokP


{-| Given an indentation aware parser @p@ as argument @semiSep1 tokP@
  returns a parser that parses one or more occurances of @p@ seperated
  by semicolon (';') -}


semiSep1 :: TokenParser st 
         -> IndentCharParser st a -- ^ The input Parser
         -> IndentCharParser st [a]

semiSep1 tokP p = p `sepBy1` semicolon
    where semicolon = semi tokP



{-| Given an indentation aware parser @p@ as argument @commaSep tokP@
  returns a parser that parses zero or more occurances of @p@
  seperated by comma (',') -}

commaSep :: TokenParser st
         -> IndentCharParser st a -- ^ The input Parser
         -> IndentCharParser st [a]

commaSep tokP p = p `sepBy` virgule
    where virgule = comma tokP




{-| Given an indentation aware parser @p@ as argument @commaSep1 tokP@
  returns a parser that parses one or more occurances of @p@ seperated
  by comma (',') -}


commaSep1 :: TokenParser st
         -> IndentCharParser st a -- ^ The input Parser
         -> IndentCharParser st [a]

commaSep1 tokP p = p `sepBy1` virgule
    where virgule = comma tokP




{-| 

Given an indentation aware parser @p@ as argument @semiOrNewLineSep1
tokP@ returns a parser that parses one or more occurances of @p@
seperated by either semicolons (';') or newline. To seperate multiple
occurance of @p@ in the same line use an explicit semicolon (';').

-}

semiOrNewLineSep1 :: TokenParser st
                  -> IndentCharParser st a -- ^ The input Parser
                  -> IndentCharParser st [a]

semiOrNewLineSep1 tokP p = do xs <- lineFold $ semiSep1 tokP p
                              rest <- semiOrNewLineSep tokP p
                              return (xs++rest)



{-| 

Given an indentation aware parser @p@ as argument @semiOrNewLineSep
tokP@ returns a parser that parses zero or more occurances of @p@
seperated by either semicolons (';') or newlines. To seperate multiple
occurance of @p@ in the same line use an explicit semicolon (';').

-}

semiOrNewLineSep :: TokenParser st
                 -> IndentCharParser st a -- ^ The input Parser
                 -> IndentCharParser st [a]

semiOrNewLineSep tokP p =  do xs <- lineFold $ semiSep tokP p
                              case xs of
                                [] -> return []
                                _  -> do rest <- semiOrNewLineSep tokP p
                                         return (xs++rest)







{-| 

Given an indentation aware parser @p@ as argument @commaOrNewLineSep1
tokP@ returns a parser that parses one or more occurances of @p@
seperated by either comma (',') or newline. To seperate multiple
occurance of @p@ in the same line use an explicit comma (',').

-}

commaOrNewLineSep1 :: TokenParser st
                  -> IndentCharParser st a -- ^ The input Parser
                  -> IndentCharParser st [a]

commaOrNewLineSep1 tokP p = do xs <- lineFold $ commaSep1 tokP p
                               rest <- commaOrNewLineSep tokP p
                               return (xs++rest)



{-| 

Given an indentation aware parser @p@ as argument @commaOrNewLineSep
tokP@ returns a parser that parses zero or more occurances of @p@
seperated by either comma (',') or newlines. To seperate multiple
occurance of @p@ in the same line use an explicit comma (',').

-}

commaOrNewLineSep :: TokenParser st
                 -> IndentCharParser st a -- ^ The input Parser
                 -> IndentCharParser st [a]

commaOrNewLineSep tokP p = do xs <- lineFold $ commaSep tokP p
                              case xs of
                                [] -> return []
                                _  -> do rest <- commaOrNewLineSep tokP p
                                         return (xs++rest)





{-|

The parser @parens tokP p@ parses @p@ between '(' and ')'. The parser
@p@ does not care about indentation i.e. @p@ is run in
@'Text.ParserCombinators.Parsec.IndentParser.NoIndent'@ mode.

-}

parens     :: TokenParser st ->
              IndentCharParser st a -- ^ The input Parser
           -> IndentCharParser st a

parens tokP p = between (symbol tokP "(") (symbol tokP ")") (noIndent p)

{-| 

Similar to @'parens'@ but when no explicit '(' and ')' are given,
groups @p@ by block indentation.

-}

parensOrBlock :: TokenParser st -> 
                 IndentCharParser st a -- ^ The input parser
              -> IndentCharParser st a

parensOrBlock tokP p = parens tokP p <|> block p



{-| 

Similar to @'parens'@ but when no explicit '(' and ')' are given,
groups @p@ by a line fold.

-}
parensOrLineFold :: TokenParser st -> 
                    IndentCharParser st a -- ^ The input parser
                 -> IndentCharParser st a

parensOrLineFold tokP p = parens tokP p <|> lineFold p


{-|

The parser @braces tokP p@ parses @p@ between '{' and '}'. The parser
@p@ does not care about indentation i.e. @p@ is run in
@'Text.ParserCombinators.Parsec.IndentParser.NoIndent'@ mode.

-}

braces     :: TokenParser st ->
              IndentCharParser st a -- ^ The input Parser
           -> IndentCharParser st a

braces tokP p = between (symbol tokP "{") (symbol tokP "}") (noIndent p)



{-| 

Similar to @'braces'@ but when no explicit '{' and '}' are given,
groups @p@ by block indentation.

-}
bracesOrBlock :: TokenParser st -> 
                 IndentCharParser st a -- ^ The input parser
              -> IndentCharParser st a

bracesOrBlock tokP p = braces tokP p <|> block p


{-| 

Similar to @'braces'@ but when no explicit '{' and '}' are given,
groups @p@ by a line fold.

-}

bracesOrLineFold :: TokenParser st -> 
                    IndentCharParser st a -- ^ The input parser
                 -> IndentCharParser st a

bracesOrLineFold tokP p = braces tokP p <|> lineFold p


{-|

The parser @angles tokP p@ parses @p@ between angles. The parser @p@
does not care about indentation i.e. @p@ is run in
@'Text.ParserCombinators.Parsec.IndentParser.NoIndent'@ mode.

-}

angles     :: TokenParser st ->
              IndentCharParser st a -- ^ The input Parser
           -> IndentCharParser st a

angles tokP p = between (symbol tokP "<") (symbol tokP ">") (noIndent p)



{-| 

Similar to @'angles'@ but when no explicit angles are given, groups
@p@ by block indentation.

-}

anglesOrBlock :: TokenParser st -> 
                 IndentCharParser st a -- ^ The input parser
              -> IndentCharParser st a

anglesOrBlock tokP p = angles tokP p <|> block p


{-| 

Similar to @'angles'@ but when no explicit angles are given, groups
@p@ by a line fold.

-}

anglesOrLineFold :: TokenParser st -> 
                    IndentCharParser st a -- ^ The input parser
                 -> IndentCharParser st a

anglesOrLineFold tokP p = angles tokP p <|> lineFold p



{-|

The parser @brackets tokP p@ parses @p@ between '[' and ']'. The
parser @p@ does not care about indentation i.e. @p@ is run in
@'Text.ParserCombinators.Parsec.IndentParser.NoIndent'@ mode.

-}

brackets     :: TokenParser st ->
                IndentCharParser st a -- ^ The input Parser
             -> IndentCharParser st a

brackets tokP p = between (symbol tokP "[") (symbol tokP "]") (noIndent p)



{-| 

Similar to @'brackets'@ but when no explicit '[' and ']' are given,
groups @p@ by block indentation.

-}
bracketsOrBlock :: TokenParser st -> 
                   IndentCharParser st a -- ^ The input parser
                -> IndentCharParser st a

bracketsOrBlock tokP p = brackets tokP p <|> block p


{-| 

Similar to @'brackets'@ but when no explicit '[' and ']' are given,
groups @p@ by a line fold.

-}

bracketsOrLineFold :: TokenParser st -> 
                    IndentCharParser st a -- ^ The input parser
                 -> IndentCharParser st a

bracketsOrLineFold tokP p = brackets tokP p <|> lineFold p


{-|

Indentation aware parser that is equvalent to string str.

-}

symbol     :: TokenParser st 
           -> String 
           -> IndentCharParser st String
symbol tokP = indentParser . T.symbol tokP


{-| Creates a lexeme parser. The resultant parser skips trailing
spaces and is indentation aware.  -}

lexeme     :: TokenParser st 
           -> IndentCharParser st a  -- ^ The input parser.
           -> IndentCharParser st a

lexeme tokP = indentParser . T.lexeme tokP

{-| The parser whiteSpace skips spaces and comments. This does not
care about indentation as skipping spaces should be done irrespective
of the indentation.  -}

whiteSpace :: TokenParser st -> IndentCharParser st ()
whiteSpace = T.whiteSpace