{-# LANGUAGE FlexibleContexts #-} {-| Module to create indentation aware tokenisers. Despite the simplicity of parser combinators, getting tokenisers for common language contructs right is tricky. The parsec way of handling this involves the following steps. * Define the description of the language via the @`Text.Parsec.Language.LanguageDef`@ record. * Apply the 'Text.Parsec.Token.makeTokenParser' combinator get hold of @'Text.Parsec.Token.TokenParser'@. The actual tokenisers are the fields of this record. This module provides a similar interfaces for generating indentation aware tokenisers. There are few specific things that an indentation aware tokeniser should be careful aboute 1. All tokenisers should be indentation aware. 2. Whitespaces and comments should be skipped irrespective on which indentation mode one is in 3. The tokenisers should themselves be lexeme parsers and should skip trailing whitespace. Getting all this working can often be tricky. -} module Text.Parsec.IndentParsec.Token ( -- * Usage. -- $usage GenIndentTokenParser , IndentTokenParser , identifier , operator , reserved , reservedOp , charLiteral , stringLiteral , natural , integer , float , naturalOrFloat , decimal , hexadecimal , octal , symbol , lexeme , whiteSpace , semi , comma , colon , dot , parens, parensBlock , braces, bracesBlock , angles, anglesBlock , brackets, bracketsBlock , semiSep, semiSepOrFoldedLines , semiSep1, semiSepOrFoldedLines1 , commaSep, commaSepOrFoldedLines , commaSep1, commaSepOrFoldedLines1 ) where import Text.Parsec.IndentParsec.Prim import Text.Parsec.IndentParsec.Combinator import Text.Parsec(many) import qualified Text.Parsec.Token as T import Text.Parsec(Stream) import Text.Parsec.Combinator hiding (between) {- $usage For each combinator @foo@ for every field @foo@ of the 'Text.Parsec.Token.TokenParser' with essentially the same semantics but for the returned parser being indentation aware. There are certain new combinators that are defined specifically for parsing indentation based syntactic constructs: [Grouping Parsers] A grouping parser takes an input parser @p@ and returns a parser that parses @p@ between two /grouping delimiters/. There are three flavours of grouping parsers: @foo@, @fooBlock@ where @foo@ can be one of @angles@, @braces@, @parens@, @brackets@. For example, consider the parser @'braces' tokP p@ parses @p@ delimited by '{' and '}'. In this case @p@ does not care about indentation. The parser @'bracesBlock' tokP p@ is like @braces tokP p@ but if no explicit delimiting braces are given parses @p@ within an indented block. > bracesBlock tokP p = braces tokP p <|> blockOf p [Seperator Parsers] A seperator parser takes as input a parser say @p@ and returns a parser that parses a list of @p@ seperated by a seperator. The module exports the combinators @fooSep@, @fooSep1@, @fooSepOrFoldedLines@ and @fooSepOrFoldedLines1@, where @foo@ is either @semi@ (in which case the seperator is a semicolon) or @comma@ (in which case the seperator is a comma). To illustrate the use of this module we now give, as an incomplete example, a parser that parses a where clause in Haskell which illustrates the use of this module. > import qualified Text.Parsec.Language as L > import qualified Text.Parsec.Toke as T > import qualified Text.Parsec.IndentToken as IT > tokP = T.makeTokenParser L.haskellDef > mySemiSep = IT.semiSepOrFoldedLines tokP > myBraces = IT.bracesBlock tokP > identifier = IT.identifier tokP > .... > symbol = IT.symbol tokP > > binding = mySemiSep bind > bind = do id <- identifier > symbol (char '=') > e <- expr > return (id,e) > > whereClause = do reserved "where"; braceBlock binding -} type GenIndentTokenParser i s u m = T.GenTokenParser s u (IndentT i m) type IndentTokenParser s u m = GenIndentTokenParser HaskellLike s u m -- | Indentation aware tokeniser to match a valid identifier. identifier :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String identifier = tokeniser . T.identifier -- | Indentation aware tokeniser matches an operator. operator :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String operator = tokeniser . T.operator -- | Indentation aware tokeniser to match a reserved word. reserved :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -- ^ The reserved word. -> GenIndentParsecT i s u m () reserved tokP = tokeniser . T.reserved tokP -- | Indentation aware parser to match a reserved operator of the -- language. reservedOp :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -- ^ The reserved operator to be matched. -> GenIndentParsecT i s u m () reservedOp tokP = tokeniser . T.reservedOp tokP -- | Indentation aware parser to match a character literal (the syntax -- is assumend to be that of Hasekell which matches that of most -- programming language charLiteral :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Char charLiteral = tokeniser . T.charLiteral -- | Indentation aware parser to match a string literal (the syntax is -- assumend to be that of Hasekell which matches that of most -- programming language). stringLiteral :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String stringLiteral = tokeniser . T.stringLiteral -- | Indentation aware parser to match a natural number. natural :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer natural = tokeniser . T.natural -- | Indentation aware parser to match an integer. integer :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer integer = tokeniser . T.integer -- | Indentation aware tokeniser to match a floating point number. float :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Double float = tokeniser . T.float -- | Indentation aware tokensier to match either a natural number or -- Floating point number. naturalOrFloat :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m (Either Integer Double) naturalOrFloat = tokeniser . T.naturalOrFloat -- | Indentation aware tokensier to match an integer in decimal. decimal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer decimal = tokeniser . T.decimal -- | Indentation aware tokeniser to match an integer in hexadecimal. hexadecimal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer hexadecimal = tokeniser . T.hexadecimal -- | Indentation aware tokeniser to match an integer in ocatal. octal :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m Integer octal = tokeniser . T.octal -- | Indentation aware tokeniser that is equvalent to @`string`@. symbol :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> String -> GenIndentParsecT i s u m String symbol tokP = tokeniser . T.symbol tokP -- | Creates a lexeme tokeniser. The resultant tokeniser indentation -- aware and skips trailing white spaces/comments. lexeme :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a lexeme tokP = tokeniser . T.lexeme tokP -- | The parser whiteSpace skips spaces and comments. This does not -- care about indentation as skipping spaces should be done -- irrespective of the indentation whiteSpace :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m () whiteSpace = T.whiteSpace -- | Matches a semicolon and returns ';'. semi :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String semi = tokeniser . T.semi -- | Matches a comma and returns ",". comma :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String comma = tokeniser . T.comma -- | Matches a colon and returns ":". colon :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String colon = tokeniser . T.colon -- | Matches a dot and returns ".". dot :: (Indentation i, Monad m) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String dot = tokeniser . T.dot lparen :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String rparen :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String lbrace :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String rbrace :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String langle :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String rangle :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String lbracket :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String rbracket :: (Monad m, Indentation i) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m String lparen tokP = symbol tokP "(" rparen tokP = symbol tokP ")" lbrace tokP = symbol tokP "{" rbrace tokP = symbol tokP "}" langle tokP = symbol tokP "<" rangle tokP = symbol tokP ">" lbracket tokP = symbol tokP "[" rbracket tokP = symbol tokP "]" -- | Match the input parser @p@ within a pair of paranthesis. parens :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a parens tokP = lparen tokP `between` rparen tokP {-| Same as `parens` but if no explicit paranthesis are given, matches @p@ inside an indented block. -} parensBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a parensBlock tokP = lparen tokP `betweenBlock` rparen tokP -- | Match the input parser @p@ within a pair of braces braces :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a braces tokP = lbrace tokP `between` rbrace tokP {-| Same as `braces` but if no explicit braces are given, matches @p@ inside an indented block. -} bracesBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a bracesBlock tokP = lbrace tokP `betweenBlock` rbrace tokP {-| Match the input parser @p@ within a pair of angular brackets, i.e. '<' and '>'. -} angles :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a angles tokP = langle tokP `between` rangle tokP {-| Same as `angles` but if no explicit anglular brackets are given, matches p inside and indented block. -} anglesBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a anglesBlock tokP = langle tokP `betweenBlock` rangle tokP -- | Match p within a angular brackets i.e. '[' and ']'. brackets :: (Indentation i, Show i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m a brackets tokP = lbracket tokP `between` rbracket tokP {-| Same as `brackets` but if no explicit brackets are given, matches p inside and indented block. -} bracketsBlock :: (Monad m, Stream s (IndentT HaskellLike m) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m a bracketsBlock tokP = lbracket tokP `betweenBlock` rbracket tokP -- | Parse zero or more @p@ seperated by by a semicolon semiSep :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a] semiSep tokP p = sepBy p $ semi tokP -- | Parse one or more @p@ seperated by a semicolon semiSep1 :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a] semiSep1 tokP p = sepBy1 p $ semi tokP {-| Parse zero or more @p@ seperated by semicolon or new line. Long lines are continued using line folding. -} semiSepOrFoldedLines :: (Monad m, Stream s (IndentT HaskellLike m ) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a] semiSepOrFoldedLines tokP p = fmap concat . many . foldedLinesOf . sepEndBy1 p $ semi tokP {-| Parse one or more @p@ seperated by semicolon or new line. Long lines are continued using line folding. -} semiSepOrFoldedLines1 :: (Monad m, Stream s (IndentT HaskellLike m ) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a] semiSepOrFoldedLines1 tokP p = do first <- foldedLinesOf . sepEndBy1 p $ semi tokP rest <- semiSepOrFoldedLines tokP p return (first ++ rest) -- | Parse zero or more @p@ seperated by by a comma. commaSep :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a] commaSep tokP p = sepBy p $ comma tokP -- | Parse one or more @p@ seperated by a comma. commaSep1 :: (Indentation i, Monad m, Stream s (IndentT i m) t, Show t) => GenIndentTokenParser i s u m -> GenIndentParsecT i s u m a -> GenIndentParsecT i s u m [a] commaSep1 tokP p = sepBy1 p $ comma tokP {-| Parse zero or more @p@ seperated by comma or new line. Long lines are continued using line folding. -} commaSepOrFoldedLines :: (Monad m, Stream s (IndentT HaskellLike m ) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a] commaSepOrFoldedLines tokP p = fmap concat . many . foldedLinesOf . sepEndBy1 p $ comma tokP {-| Parse one or more @p@ seperated by comma or new line. Long lines are continued using line folding. -} commaSepOrFoldedLines1 :: (Monad m, Stream s (IndentT HaskellLike m ) t, Show t) => GenIndentTokenParser HaskellLike s u m -> GenIndentParsecT HaskellLike s u m a -> GenIndentParsecT HaskellLike s u m [a] commaSepOrFoldedLines1 tokP p = do first <- foldedLinesOf . sepEndBy1 p $ comma tokP rest <- commaSepOrFoldedLines tokP p return (first ++ rest)