-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Combinators for parsing indentation based syntatic structures -- -- This package exposes two modules, -- Text.ParserCombinators.Parsec.IndentParser and -- Text.ParserCombinators.Parsec.IndentToken, for constructing -- parser combinators for indentation based syntactic structures. The -- former exports the basic indentation parser combinators and the later -- together with Text.ParserCombinators.Parsec.Language and -- Text.ParserCombinators.Parsec.Token can be used to define -- tokenisers for programming languages. @package IndentParser @version 0.2 -- | A module to construct indentation aware parsers. Many programming -- language have indentation based syntax rules e.g. python and Haskell. -- This module exports combinators to create such parsers. -- -- The input source can be thought of as a list of tokens. Abstractly -- each token occurs at a line and a column and has a width. The column -- number of a token measures is indentation. If t1 and t2 are two tokens -- then we say that indentation of t1 is more than t2 if the column -- number of occurrence of t1 is greater than that of t2. -- -- Currently this module supports two kind of indentation based syntactic -- structures which we now describe: -- --

Block A block of indentation c is a sequence of -- tokens with indentation at least c. Examples for a block is a -- where clause of Haskell with no explicit braces.
Line fold A line fold starting at line l and -- indentation c is a sequence of tokens that start at line -- l and possibly continue to subsequent lines as long as the -- indentation is greater than c. Such a sequence of lines need to -- be folded to a single line. An example is MIME headers. Line -- folding based binding separation is used in Haskell as well.

-- -- The module exports three combinators are indentParser, -- block and lineFold. To construct -- parsers for indentation based grammars one typically applies the -- indentParser. A block can then be parsed using the -- combinator block and a line fold using -- lineFold. Generating indentation aware tokenisers -- could be tricky. Given a language description via the -- Text.ParserCombinators.Parsec.Language.LanguageDef record use -- module -- Text.ParserCombinators.Parsec.IndentParser.Token to -- generate its tokeiser (this will apply indentParser on -- all tokenisers and then the user can forget about -- indentParser combinator). -- -- Warning: -- -- Internally indentations are implemented using Parser states. If one -- wants to use parser states as well then use the getState and -- setState functions exported by this module instead of those -- exported from the parsec library. Also use the parseTest and -- runParser function exported from this module instead of the one -- exported from Parsec. module Text.ParserCombinators.Parsec.IndentParser -- | An indentation aware parser. type IndentParser tok st a = GenParser tok (st, IndentState) a type IndentCharParser st a = IndentParser Char st a -- | The mode of the indentation parser. data IndentMode -- | Ignore indentation NoIndent :: IndentMode -- | In block mode Block :: IndentMode -- | In line fold mode LineFold :: IndentMode -- | The combinator indentParser makes its input parser indentation aware. -- Usually one would want to make all the tokenisers indentation aware. indentParser :: IndentParser tok st a -> IndentParser tok st a -- | The parser noIndent p runs p ignoring any -- indentation based structure. This can be used to parse for example an -- explicitly braced where clause in Haskell. noIndent :: IndentParser tok st a -> IndentParser tok st a -- | The parser block p parses a block of -- p. block :: IndentParser tok st a -> IndentParser tok st a -- | The parser lineFold p parses a folded line of p. lineFold :: IndentParser tok st a -> IndentParser tok st a -- | The parser betweenOrBlock open close p parses p -- between open and close. If open is matched -- p is parsed in NoIndent mode otherwise a block -- p is parsed in Block mode. For eg. the parser for -- parsing haskell where clause would look like -- --

--   whereClause = do reserved where; betweenOrBlock bindings
--

betweenOrBlock :: IndentParser tok st open -> IndentParser tok st close -> IndentParser tok st a -> IndentParser tok st a -- | Similar to betweenOrBlock but uses lineFold instead of block. betweenOrLineFold :: IndentParser tok st open -> IndentParser tok st close -> IndentParser tok st a -> IndentParser tok st a -- | Gets the current user state. getState :: IndentParser tok st st -- | Sets the user state. setState :: st -> IndentParser tok st () runParser :: IndentParser tok st a -> st -> IndentMode -> SourceName -> [tok] -> Either ParseError a -- | This is the function analogues to parseTest of the Parsec module. -- Given an indent parser p :: IndentParser tok () a and a list -- of tokens it runs the parser and prints the result. parseTest :: (Show a) => IndentParser tok () a -> [tok] -> IO () -- | A module for constructing indentation aware tokeniser that can be used -- in conjuction with -- Text.ParserCombinators.Parsec.Token. All the -- combinator takes a -- Text.ParserCombinators.Parsec.Token.TokenParser as -- its first argument. For every field foo of -- Text.ParserCombinators.Parsec.Token.TokenParser this -- module exports a combinator foo. To define a tokeniser for an -- indentation based language a user first defines the appropriate -- Text.ParserCombinators.Parsec.Language.LanguageDef -- record, applies the combinator -- Text.ParserCombinators.Parsec.Token.makeTokenParser -- to get a -- Text.ParserCombinators.Parsec.Token.TokenParser -- record say tokP and then, instead of selecting the field -- foo of tokP, applies the combinator foo -- exported from this module to tokP. The semantics of the -- combinator foo is essentially same as that of the field -- foo of -- Text.ParserCombinators.Parsec.Token.TokenParser but -- the returned parsers are indentation aware. Apart from these there are -- certain new combinators that are defined specifically for parsing -- certain indentation based syntactic constructs. (We have not defined -- squares use brackets instead) -- -- There are two important classes of parser combinator exported by this -- module: -- --

Grouping Parser Combinator A grouping parser combinator -- takes as input a parser say p and returns a parser that -- parses p between two grouping delimiters. There are -- three flavours of grouping parsers: foo, fooOrBlock -- and fooOrLineFold where foo can be one of -- angles, braces, parens, brackets. -- To illustrate we take foo to be braces. The parser -- braces tokP p parses p delimited by '{' and -- '}'. In this case p does not care about indentation (i.e. the -- parser p is run in NoIndent mode). The parser -- bracesOrBlock tokP p is like braces tokP p -- but if no explicit delimiting braces are given parses p -- within an indented block. Similarly bracesOrLineFold tokP -- p parses p between '{' and '}' and uses line fold when -- no explicit braces are given. These can be two varients can be defined -- as follows

-- --

--   bracesOrBlock tokP p    = braces tokP p <|> block p
--   bracesOrLineFold tokP p = braces tokP p <|> lineFold p
--

-- --

Seperator Parser Combinator A seperator parser combinator -- takes as input a parser say p and returns a parser that -- parses a list of p seperated by a seperator. The module -- exports the combinators fooSep, fooSep1, -- fooOrNewLineSep and fooOrNewLineSep1, where -- foo is either semi (in which case the seperator is a -- semicolon ';') or comma (in which case the seperator is a -- comma ',').

-- -- To illustrate the use of this module we now give, as an incomplete -- example, a parser that parses a where clause in Haskell which -- illustrates the use of this module. -- --

--   import qualified Text.ParserCombinators.Parsec.Language as L
--   import qualified Text.ParserCombinators.Parsec.Toke as T
--   import qualified Text.ParserCombinator.Parsec.IndentToken as IT
--

-- --

--   tokP = T.makeTokenParser L.haskellDef
--   semiOrNewLineSep = IT.semiOrNewLineSep tokP
--   bracesOrBlock = IT.bracesOrBlock tokP
--   identifier = IT.identifier tokP
--   ....
--   symbol = IT.symbol tokP
--

-- --

--    binding = semiOrNewLineSep bind
--    bind    = do id <- identifier
--                 symbol (char '=')
--                 e <- expr
--                 return (id,e)
--   whereClause = do reserved "where"; braceOrBlock binding
--

module Text.ParserCombinators.Parsec.IndentParser.Token type IndentCharParser st a = IndentParser Char st a type TokenParser st = TokenParser (st, IndentState) -- | Indentation aware parser to match a valid identifier of the language. identifier :: TokenParser st -> IndentCharParser st String -- | Indentation aware parser to match a reserved word of the language. reserved :: TokenParser st -> String -> IndentCharParser st () -- | The parser reserved tokP keyword parses the reserved word -- keyword. The string keyword should have been declared as a reserved -- word in the -- Text.ParserCombinator.Parserc.Language.LanguageDef -- -record. operator :: TokenParser st -> IndentCharParser st String -- | Indentation aware parser to match a reserved operator of the language. reservedOp :: TokenParser st -> String -> IndentCharParser st () -- | Indentation aware parser to match a character literal (the syntax is -- assumend to be that of Hasekell which matches that of most programming -- language). charLiteral :: TokenParser st -> IndentCharParser st Char -- | Indentation aware parser to match a string literal (the syntax is -- assumend to be that of Hasekell which matches that of most programming -- language). stringLiteral :: TokenParser st -> IndentCharParser st String -- | Indentation aware parser to match a natural number. natural :: TokenParser st -> IndentCharParser st Integer -- | Indentation aware parser to match an integer. integer :: TokenParser st -> IndentCharParser st Integer -- | Indentation aware parser to match a floating point number. float :: TokenParser st -> IndentCharParser st Double -- | Indentation aware parser to match either a natural number or Floating -- point number. naturalOrFloat :: TokenParser st -> IndentCharParser st (Either Integer Double) -- | Indentation aware parser to match an integer in decimal. decimal :: TokenParser st -> IndentCharParser st Integer -- | Indentation aware parser to match an integer in hexadecimal. hexadecimal :: TokenParser st -> IndentCharParser st Integer -- | Indentation aware parser to match an integer in ocatal. octal :: TokenParser st -> IndentCharParser st Integer -- | Matches a semicolon and returns ';'. semi :: TokenParser st -> IndentCharParser st String -- | Matches a colon and returns :. colon :: TokenParser st -> IndentCharParser st String -- | Matches a dot and returns . dot :: TokenParser st -> IndentCharParser st String -- | Matches a comma and returns ,. comma :: TokenParser st -> IndentCharParser st String -- | Creates a lexeme parser. The resultant parser skips trailing spaces -- and is indentation aware. lexeme :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Indentation aware parser that is equvalent to string str. symbol :: TokenParser st -> String -> IndentCharParser st String -- | The parser whiteSpace skips spaces and comments. This does not care -- about indentation as skipping spaces should be done irrespective of -- the indentation. whiteSpace :: TokenParser st -> IndentCharParser st () -- | Given an indentation aware parser p as argument semiSep -- tokP returns a parser that parses zero or more occurances of -- p seperated by semicolon (';') semiSep :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument semiSep1 -- tokP returns a parser that parses one or more occurances of -- p seperated by semicolon (';') semiSep1 :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument commaSep -- tokP returns a parser that parses zero or more occurances of -- p seperated by comma (',') commaSep :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument commaSep1 -- tokP returns a parser that parses one or more occurances of -- p seperated by comma (',') commaSep1 :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument -- semiOrNewLineSep tokP returns a parser that parses zero or -- more occurances of p seperated by either semicolons (';') or -- newlines. To seperate multiple occurance of p in the same -- line use an explicit semicolon (';'). semiOrNewLineSep :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument -- semiOrNewLineSep1 tokP returns a parser that parses one or -- more occurances of p seperated by either semicolons (';') or -- newline. To seperate multiple occurance of p in the same line -- use an explicit semicolon (';'). semiOrNewLineSep1 :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument -- commaOrNewLineSep tokP returns a parser that parses zero or -- more occurances of p seperated by either comma (',') or -- newlines. To seperate multiple occurance of p in the same -- line use an explicit comma (','). commaOrNewLineSep :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | Given an indentation aware parser p as argument -- commaOrNewLineSep1 tokP returns a parser that parses one or -- more occurances of p seperated by either comma (',') or -- newline. To seperate multiple occurance of p in the same line -- use an explicit comma (','). commaOrNewLineSep1 :: TokenParser st -> IndentCharParser st a -> IndentCharParser st [a] -- | The parser parens tokP p parses p between '(' and -- ')'. The parser p does not care about indentation i.e. -- p is run in NoIndent mode. parens :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to parens but when no explicit '(' and ')' are -- given, groups p by block indentation. parensOrBlock :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to parens but when no explicit '(' and ')' are -- given, groups p by a line fold. parensOrLineFold :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | The parser braces tokP p parses p between '{' and -- '}'. The parser p does not care about indentation i.e. -- p is run in NoIndent mode. braces :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to braces but when no explicit '{' and '}' are -- given, groups p by block indentation. bracesOrBlock :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to braces but when no explicit '{' and '}' are -- given, groups p by a line fold. bracesOrLineFold :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | The parser angles tokP p parses p between angles. -- The parser p does not care about indentation i.e. p -- is run in NoIndent mode. angles :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to angles but when no explicit angles are -- given, groups p by block indentation. anglesOrBlock :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to angles but when no explicit angles are -- given, groups p by a line fold. anglesOrLineFold :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | The parser brackets tokP p parses p between '[' and -- ']'. The parser p does not care about indentation i.e. -- p is run in NoIndent mode. brackets :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to brackets but when no explicit '[' and ']' -- are given, groups p by block indentation. bracketsOrBlock :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a -- | Similar to brackets but when no explicit '[' and ']' -- are given, groups p by a line fold. bracketsOrLineFold :: TokenParser st -> IndentCharParser st a -> IndentCharParser st a