Text.Parsec.IndentParsec.Token
Contents
Description
Module to create indentation aware tokenisers. Despite the simplicity of parser combinators, getting tokenisers for common language contructs right is tricky. The parsec way of handling this involves the following steps.
- Define the description of the language via the
record.Text.Parsec.Language.LanguageDef
- Apply the
Text.Parsec.Token.makeTokenParser
combinator get hold of
. The actual tokenisers are the fields of this record.Text.Parsec.Token.TokenParser
This module provides a similar interfaces for generating indentation aware tokenisers. There are few specific things that an indentation aware tokeniser should be careful aboute
- All tokenisers should be indentation aware.
- Whitespaces and comments should be skipped irrespective on which indentation mode one is in
- The tokenisers should themselves be lexeme parsers and should skip trailing whitespace.
Getting all this working can often be tricky.
- type GenIndentTokenParser i s u m = GenTokenParser s u (IndentT i m)
- type IndentTokenParser s u m = GenIndentTokenParser HaskellLike s u m
- identifier :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- operator :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- reserved :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -> GenIndentParsecT i s u m ()
- reservedOp :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -> GenIndentParsecT i s u m ()
- charLiteral :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Char
- stringLiteral :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- natural :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer
- integer :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer
- float :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Double
- naturalOrFloat :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m (Either Integer Double)
- decimal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer
- hexadecimal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer
- octal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer
- symbol :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -> GenIndentParsecT i s u m String
- lexeme :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a
- whiteSpace :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m ()
- semi :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- comma :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- colon :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- dot :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String
- parens :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a
- parensBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a
- braces :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a
- bracesBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a
- angles :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a
- anglesBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a
- brackets :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a
- bracketsBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a
- semiSep :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]
- semiSepOrFoldedLines :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]
- semiSep1 :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]
- semiSepOrFoldedLines1 :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]
- commaSep :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]
- commaSepOrFoldedLines :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]
- commaSep1 :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]
- commaSepOrFoldedLines1 :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]
Usage.
For each combinator foo
for every field foo
of the
Text.Parsec.Token.TokenParser
with essentially the same semantics
but for the returned parser being indentation aware. There are certain
new combinators that are defined specifically for parsing indentation
based syntactic constructs:
- Grouping Parsers
- A grouping parser takes an input parser
p
and returns a parser that parsesp
between two grouping delimiters. There are three flavours of grouping parsers:foo
,fooBlock
wherefoo
can be one ofangles
,braces
,parens
,brackets
. For example, consider the parser
parsesbraces
tokP pp
delimited by '{' and '}'. In this casep
does not care about indentation. The parser
is likebracesBlock
tokP pbraces tokP p
but if no explicit delimiting braces are given parsesp
within an indented block.
bracesBlock tokP p = braces tokP p <|> blockOf p
- Seperator Parsers
- A seperator parser takes as input a parser say
p
and returns a parser that parses a list ofp
seperated by a seperator. The module exports the combinatorsfooSep
,fooSep1
,fooSepOrFoldedLines
andfooSepOrFoldedLines1
, wherefoo
is eithersemi
(in which case the seperator is a semicolon) orcomma
(in which case the seperator is a comma).
To illustrate the use of this module we now give, as an incomplete example, a parser that parses a where clause in Haskell which illustrates the use of this module.
import qualified Text.Parsec.Language as L import qualified Text.Parsec.Toke as T import qualified Text.Parsec.IndentToken as IT tokP = T.makeTokenParser L.haskellDef mySemiSep = IT.semiSepOrFoldedLines tokP myBraces = IT.bracesBlock tokP identifier = IT.identifier tokP .... symbol = IT.symbol tokP binding = mySemiSep bind bind = do id <- identifier symbol (char '=') e <- expr return (id,e) whereClause = do reserved "where"; braceBlock binding
type GenIndentTokenParser i s u m = GenTokenParser s u (IndentT i m)Source
type IndentTokenParser s u m = GenIndentTokenParser HaskellLike s u mSource
identifier :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Indentation aware tokeniser to match a valid identifier.
operator :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Indentation aware tokeniser matches an operator.
Arguments
:: (Indentation i, Monad m) | |
=> GenIndentTokenParser i s u m | |
-> String | The reserved word. |
-> GenIndentParsecT i s u m () |
Indentation aware tokeniser to match a reserved word.
Arguments
:: (Indentation i, Monad m) | |
=> GenIndentTokenParser i s u m | |
-> String | The reserved operator to be matched. |
-> GenIndentParsecT i s u m () |
Indentation aware parser to match a reserved operator of the language.
charLiteral :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m CharSource
Indentation aware parser to match a character literal (the syntax is assumend to be that of Hasekell which matches that of most programming language
stringLiteral :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Indentation aware parser to match a string literal (the syntax is assumend to be that of Hasekell which matches that of most programming language).
natural :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m IntegerSource
Indentation aware parser to match a natural number.
integer :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m IntegerSource
Indentation aware parser to match an integer.
float :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m DoubleSource
Indentation aware tokeniser to match a floating point number.
naturalOrFloat :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m (Either Integer Double)Source
Indentation aware tokensier to match either a natural number or Floating point number.
decimal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m IntegerSource
Indentation aware tokensier to match an integer in decimal.
hexadecimal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m IntegerSource
Indentation aware tokeniser to match an integer in hexadecimal.
octal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m IntegerSource
Indentation aware tokeniser to match an integer in ocatal.
symbol :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -> GenIndentParsecT i s u m StringSource
Indentation aware tokeniser that is equvalent to
.
string
lexeme :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m aSource
Creates a lexeme tokeniser. The resultant tokeniser indentation aware and skips trailing white spaces/comments.
whiteSpace :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m ()Source
The parser whiteSpace skips spaces and comments. This does not care about indentation as skipping spaces should be done irrespective of the indentation
semi :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Matches a semicolon and returns ';'.
comma :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Matches a comma and returns ,.
colon :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Matches a colon and returns :.
dot :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m StringSource
Matches a dot and returns ..
parens :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m aSource
Match the input parser p
within a pair of paranthesis.
parensBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m aSource
Same as parens
but if no explicit paranthesis are given, matches p
inside an indented block.
braces :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m aSource
Match the input parser p
within a pair of braces
bracesBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m aSource
Same as braces
but if no explicit braces are given, matches p
inside an indented block.
angles :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m aSource
anglesBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m aSource
Same as angles
but if no explicit anglular brackets are given,
matches p inside and indented block.
brackets :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m aSource
Match p within a angular brackets i.e. '[' and ']'.
bracketsBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m aSource
Same as brackets
but if no explicit brackets are given, matches p
inside and indented block.
semiSep :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]Source
Parse zero or more p
seperated by by a semicolon
semiSepOrFoldedLines :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]Source
Parse zero or more p
seperated by semicolon or new line. Long lines
are continued using line folding.
semiSep1 :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]Source
Parse one or more p
seperated by a semicolon
semiSepOrFoldedLines1 :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]Source
Parse one or more p
seperated by semicolon or new line. Long lines
are continued using line folding.
commaSep :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]Source
Parse zero or more p
seperated by by a comma.
commaSepOrFoldedLines :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]Source
Parse zero or more p
seperated by comma or new line. Long lines are
continued using line folding.
commaSep1 :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a]Source
Parse one or more p
seperated by a comma.
commaSepOrFoldedLines1 :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a]Source
Parse one or more p
seperated by comma or new line. Long lines are
continued using line folding.