-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Inchworm Lexer Framework -- -- Parser combinator framework specialized to lexical analysis. Tokens -- can be specified via simple fold functions, and we include baked in -- source location handling. If you want to parse expressions instead of -- tokens then try try the parsec or attoparsec -- packages, which have more general purpose combinators. Comes with -- matchers for standard lexemes like integers, comments, and Haskell -- style strings with escape handling. No dependencies other than the -- Haskell base library. @package inchworm @version 1.0.2.2 module Text.Lexer.Inchworm.Source -- | An abstract source of input tokens that we want to perform lexical -- analysis on. -- -- Each token is associated with a source location loc. A a -- sequence of tokens has type input, and a single token type -- (Elem input). data Source m loc input Source :: ((Elem input -> Bool) -> m ()) -> (forall a. m (Maybe a) -> m (Maybe a)) -> ((Elem input -> Bool) -> m (Maybe (loc, Elem input))) -> (forall s. Maybe Int -> (Int -> Elem input -> s -> Maybe s) -> s -> m (Maybe (loc, input))) -> (Elem input -> loc -> loc) -> m (loc, input) -> Source m loc input -- | Skip over values from the source that match the given predicate. [sourceSkip] :: Source m loc input -> (Elem input -> Bool) -> m () -- | Try to evaluate the given computation that may pull values from the -- source. If it returns Nothing then rewind the source to the original -- position. [sourceTry] :: Source m loc input -> forall a. m (Maybe a) -> m (Maybe a) -- | Pull a value from the source, provided it matches the given predicate. [sourcePull] :: Source m loc input -> (Elem input -> Bool) -> m (Maybe (loc, Elem input)) -- | Use a fold function to select a some consecutive tokens from the -- source that we want to process, also passing the current index to the -- fold function. -- -- The maximum number of tokens to select is set by the first argument, -- which can be set to Nothing for no maximum. [sourcePulls] :: Source m loc input -> forall s. Maybe Int -> (Int -> Elem input -> s -> Maybe s) -> s -> m (Maybe (loc, input)) -- | Bump the source location using the given element. [sourceBumpLoc] :: Source m loc input -> Elem input -> loc -> loc -- | Get the remaining input. [sourceRemaining] :: Source m loc input -> m (loc, input) -- | A location in a source file. data Location Location :: !Int -> !Int -> Location -- | Class of sequences of things. class Sequence is where { type family Elem is; } -- | Yield the length of a sequence. length :: Sequence is => is -> Int -- | Make a source from a list of input tokens, maintaining the state in -- the IO monad. makeListSourceIO :: forall i loc. Eq i => loc -> (i -> loc -> loc) -> [i] -> IO (Source IO loc [i]) instance GHC.Show.Show Text.Lexer.Inchworm.Source.Location instance Text.Lexer.Inchworm.Source.Sequence [a] module Text.Lexer.Inchworm.Scanner -- | Scanner of input tokens that produces a result value of type -- a when successful. data Scanner m loc input a Scanner :: (Source m loc input -> m (Maybe a)) -> Scanner m loc input a [runScanner] :: Scanner m loc input a -> Source m loc input -> m (Maybe a) -- | Apply a scanner to a source of input tokens, where the tokens are -- represented as a lazy list. -- -- The result values are also produced in a lazy list. scanSourceToList :: Monad m => Source m loc [i] -> Scanner m loc [i] a -> m ([a], loc, [i]) instance GHC.Base.Monad m => GHC.Base.Functor (Text.Lexer.Inchworm.Scanner.Scanner m loc input) instance GHC.Base.Monad m => GHC.Base.Applicative (Text.Lexer.Inchworm.Scanner.Scanner m loc input) instance GHC.Base.Monad m => GHC.Base.Monad (Text.Lexer.Inchworm.Scanner.Scanner m loc input) -- | Parser combinator framework specialized to lexical analysis. Tokens -- can be specified via simple fold functions, and we include baked in -- source location handling. -- -- If you want to parse expressions instead of performing lexical -- analysis then try the parsec or attoparsec packages, -- which have more general purpose combinators. -- -- Matchers for standard tokens like comments and strings are in the -- Text.Lexer.Inchworm.Char module. -- -- No dependencies other than the Haskell base library. -- -- Minimal example -- -- The following code demonstrates how to perform lexical analysis of a -- simple LISP-like language. We use two separate name classes, one for -- variables that start with a lower-case letter, and one for -- constructors that start with an upper case letter. -- -- Integers are scanned using the scanInteger function from the -- Text.Lexer.Inchworm.Char module. -- -- The result of scanStringIO contains the list of leftover -- input characters that could not be parsed. In a real lexer you should -- check that this is empty to ensure there has not been a lexical error. -- --
-- import Text.Lexer.Inchworm.Char
-- import qualified Data.Char as Char
--
-- -- | A source token.
-- data Token
-- = KBra | KKet | KVar String | KCon String | KInt Integer
-- deriving Show
--
-- -- | A thing with attached location information.
-- data Located a
-- = Located FilePath Location a
-- deriving Show
--
-- -- | Scanner for a lispy language.
-- scanner :: FilePath
-- -> Scanner IO Location [Char] (Located Token)
-- scanner fileName
-- = skip Char.isSpace
-- $ alts [ fmap (stamp id) $ accept '(' KBra
-- , fmap (stamp id) $ accept ')' KKet
-- , fmap (stamp KInt) $ scanInteger
-- , fmap (stamp KVar)
-- $ munchWord (\ix c -> if ix == 0 then Char.isLower c
-- else Char.isAlpha c)
-- , fmap (stamp KCon)
-- $ munchWord (\ix c -> if ix == 0 then Char.isUpper c
-- else Char.isAlpha c)
-- ]
-- where -- Stamp a token with source location information.
-- stamp k (l, t)
-- = Located fileName l (k t)
--
-- main
-- = do let fileName = "Source.lispy"
-- let source = "(some (Lispy like) 26 Program 93 (for you))"
-- toks <- scanStringIO source (scanner fileName)
-- print toks
--
module Text.Lexer.Inchworm
-- | An abstract source of input tokens that we want to perform lexical
-- analysis on.
--
-- Each token is associated with a source location loc. A a
-- sequence of tokens has type input, and a single token type
-- (Elem input).
data Source m loc input
-- | Scanner of input tokens that produces a result value of type
-- a when successful.
data Scanner m loc input a
-- | Scan a list of generic input tokens in the IO monad, returning the
-- source location of the final input token, along with the remaining
-- input.
--
-- NOTE: If you just want to scan a String of characters use
-- scanStringIO from Text.Lexer.Inchworm.Char
scanListIO :: Eq i => loc -> (i -> loc -> loc) -> [i] -> Scanner IO loc [i] a -> IO ([a], loc, [i])
-- | Make a source from a list of input tokens, maintaining the state in
-- the IO monad.
makeListSourceIO :: forall i loc. Eq i => loc -> (i -> loc -> loc) -> [i] -> IO (Source IO loc [i])
-- | Apply a scanner to a source of input tokens, where the tokens are
-- represented as a lazy list.
--
-- The result values are also produced in a lazy list.
scanSourceToList :: Monad m => Source m loc [i] -> Scanner m loc [i] a -> m ([a], loc, [i])
-- | Accept the next token if it matches the given predicate, returning
-- that token as the result.
satisfies :: Monad m => (Elem input -> Bool) -> Scanner m loc input (loc, Elem input)
-- | Skip tokens that match the given predicate, before applying the given
-- argument scanner.
--
-- When lexing most source languages you can use this to skip whitespace.
skip :: Monad m => (Elem input -> Bool) -> Scanner m loc input a -> Scanner m loc input a
-- | Accept the next input token if it is equal to the given one, and
-- return a result of type a.
accept :: (Monad m, Eq (Elem input)) => Elem input -> a -> Scanner m loc input (loc, a)
-- | Accept a fixed length sequence of tokens that match the given
-- sequence, and return a result of type a.
accepts :: (Monad m, Sequence input, Eq input) => input -> a -> Scanner m loc input (loc, a)
-- | Use the given function to check whether to accept the next token,
-- returning the result it produces.
from :: Monad m => (Elem input -> Maybe a) -> Scanner m loc input (loc, a)
-- | Use the given function to check whether to accept a fixed length
-- sequence of tokens, returning the result it produces.
froms :: Monad m => Maybe Int -> (input -> Maybe a) -> Scanner m loc input (loc, a)
-- | Combine two argument scanners into a result scanner, where the first
-- argument scanner is tried before the second.
alt :: Monad m => Scanner m loc input a -> Scanner m loc input a -> Scanner m loc input a
-- | Combine a list of argumenet scanners a result scanner, where each
-- argument scanner is tried in turn until we find one that matches (or
-- not).
alts :: Monad m => [Scanner m loc input a] -> Scanner m loc input a
-- | Munch input tokens, using a predicate to select the prefix to
-- consider.
--
-- Given munch (Just n) match accept, we select a contiguous
-- sequence of tokens up to length n using the predicate match,
-- then pass that sequence to accept to make a result value out
-- of it. If match selects no tokens, or accept returns
-- Nothing then the scanner fails and no tokens are consumed from
-- the source.
--
-- For example, to scan natural numbers use:
--
-- -- scanNat :: Monad m => Scanner m loc [Char] (loc, Integer) -- scanNat = munchPred Nothing match accept -- where match _ c = isDigit c -- accept cs = Just (read cs) ---- -- To match Haskell style constructor names use: -- --
-- scanCon :: Monad m => Scanner m loc [Char] (loc, String) -- scanCon = munchPred Nothing match accept -- where match 0 c = isUpper c -- match _ c = isAlphaNum c -- accept cs = Just cs ---- -- If you want to detect built-in constructor names like Int and -- Float then you can do it in the accept function and -- produce a different result constructor for each one. munchPred :: Monad m => Maybe Int -> (Int -> Elem input -> Bool) -> (input -> Maybe a) -> Scanner m loc input (loc, a) -- | Like munchPred, but we accept prefixes of any length, and -- always accept the input tokens that match. munchWord :: Monad m => (Int -> Elem input -> Bool) -> Scanner m loc input (loc, input) -- | Like munchPred, but we can use a fold function to select the -- prefix of tokens to consider. This is useful when lexing comments, and -- string literals where consecutive tokens can have special meaning (ie -- escaped quote characters). -- -- See the source of scanHaskellChar in the -- Text.Lexer.Inchworm.Char, module for an example of its usage. munchFold :: Monad m => Maybe Int -> (Int -> Elem input -> state -> Maybe state) -> state -> (input -> Maybe a) -> Scanner m loc input (loc, a) -- | Character based scanners. module Text.Lexer.Inchworm.Char -- | Scan a string in the IO monad. scanStringIO :: String -> Scanner IO Location String a -> IO ([a], Location, String) -- | A location in a source file. data Location Location :: !Int -> !Int -> Location -- | Bump a location using the given character, updating the line and -- column number as appropriate. bumpLocationWithChar :: Char -> Location -> Location -- | Scan a decimal integer, with optional - and + sign -- specifiers. scanInteger :: Monad m => Scanner m loc [Char] (loc, Integer) -- | Scan a literal character, enclosed in single quotes. -- -- We handle the escape codes listed in Section 2.6 of the Haskell -- Report. scanHaskellChar :: Monad m => Scanner m loc [Char] (loc, Char) -- | Scan a literal string, enclosed in double quotes. -- -- We handle the escape codes listed in Section 2.6 of the Haskell -- Report, but not string gaps or the & terminator. scanHaskellString :: Monad m => Scanner m loc [Char] (loc, String) -- | Scan a Haskell block comment. scanHaskellCommentBlock :: Monad m => Scanner m loc [Char] (loc, String) -- | Scan a Haskell line comment. scanHaskellCommentLine :: Monad m => Scanner m loc [Char] (loc, String)