-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Inchworm Lexer Framework -- -- Parser combinator framework specialized to lexical analysis. Tokens -- can be specified via simple fold functions, and we include baked in -- source location handling. If you want to parse expressions instead of -- tokens then try try the parsec or attoparsec -- packages, which have more general purpose combinators. Comes with -- matchers for standard lexemes like integers, comments, and Haskell -- style strings with escape handling. No dependencies other than the -- Haskell base library. @package inchworm @version 1.0.2.2 module Text.Lexer.Inchworm.Source -- | An abstract source of input tokens that we want to perform lexical -- analysis on. -- -- Each token is associated with a source location loc. A a -- sequence of tokens has type input, and a single token type -- (Elem input). data Source m loc input Source :: ((Elem input -> Bool) -> m ()) -> (forall a. m (Maybe a) -> m (Maybe a)) -> ((Elem input -> Bool) -> m (Maybe (loc, Elem input))) -> (forall s. Maybe Int -> (Int -> Elem input -> s -> Maybe s) -> s -> m (Maybe (loc, input))) -> (Elem input -> loc -> loc) -> m (loc, input) -> Source m loc input -- | Skip over values from the source that match the given predicate. [sourceSkip] :: Source m loc input -> (Elem input -> Bool) -> m () -- | Try to evaluate the given computation that may pull values from the -- source. If it returns Nothing then rewind the source to the original -- position. [sourceTry] :: Source m loc input -> forall a. m (Maybe a) -> m (Maybe a) -- | Pull a value from the source, provided it matches the given predicate. [sourcePull] :: Source m loc input -> (Elem input -> Bool) -> m (Maybe (loc, Elem input)) -- | Use a fold function to select a some consecutive tokens from the -- source that we want to process, also passing the current index to the -- fold function. -- -- The maximum number of tokens to select is set by the first argument, -- which can be set to Nothing for no maximum. [sourcePulls] :: Source m loc input -> forall s. Maybe Int -> (Int -> Elem input -> s -> Maybe s) -> s -> m (Maybe (loc, input)) -- | Bump the source location using the given element. [sourceBumpLoc] :: Source m loc input -> Elem input -> loc -> loc -- | Get the remaining input. [sourceRemaining] :: Source m loc input -> m (loc, input) -- | A location in a source file. data Location Location :: !Int -> !Int -> Location -- | Class of sequences of things. class Sequence is where { type family Elem is; } -- | Yield the length of a sequence. length :: Sequence is => is -> Int -- | Make a source from a list of input tokens, maintaining the state in -- the IO monad. makeListSourceIO :: forall i loc. Eq i => loc -> (i -> loc -> loc) -> [i] -> IO (Source IO loc [i]) instance GHC.Show.Show Text.Lexer.Inchworm.Source.Location instance Text.Lexer.Inchworm.Source.Sequence [a] module Text.Lexer.Inchworm.Scanner -- | Scanner of input tokens that produces a result value of type -- a when successful. data Scanner m loc input a Scanner :: (Source m loc input -> m (Maybe a)) -> Scanner m loc input a [runScanner] :: Scanner m loc input a -> Source m loc input -> m (Maybe a) -- | Apply a scanner to a source of input tokens, where the tokens are -- represented as a lazy list. -- -- The result values are also produced in a lazy list. scanSourceToList :: Monad m => Source m loc [i] -> Scanner m loc [i] a -> m ([a], loc, [i]) instance GHC.Base.Monad m => GHC.Base.Functor (Text.Lexer.Inchworm.Scanner.Scanner m loc input) instance GHC.Base.Monad m => GHC.Base.Applicative (Text.Lexer.Inchworm.Scanner.Scanner m loc input) instance GHC.Base.Monad m => GHC.Base.Monad (Text.Lexer.Inchworm.Scanner.Scanner m loc input) -- | Parser combinator framework specialized to lexical analysis. Tokens -- can be specified via simple fold functions, and we include baked in -- source location handling. -- -- If you want to parse expressions instead of performing lexical -- analysis then try the parsec or attoparsec packages, -- which have more general purpose combinators. -- -- Matchers for standard tokens like comments and strings are in the -- Text.Lexer.Inchworm.Char module. -- -- No dependencies other than the Haskell base library. -- -- Minimal example -- -- The following code demonstrates how to perform lexical analysis of a -- simple LISP-like language. We use two separate name classes, one for -- variables that start with a lower-case letter, and one for -- constructors that start with an upper case letter. -- -- Integers are scanned using the scanInteger function from the -- Text.Lexer.Inchworm.Char module. -- -- The result of scanStringIO contains the list of leftover -- input characters that could not be parsed. In a real lexer you should -- check that this is empty to ensure there has not been a lexical error. -- --
--   import Text.Lexer.Inchworm.Char
--   import qualified Data.Char      as Char
--   
--   -- | A source token.
--   data Token 
--           = KBra | KKet | KVar String | KCon String | KInt Integer
--           deriving Show
--   
--   -- | A thing with attached location information.
--   data Located a
--           = Located FilePath Location a
--           deriving Show
--   
--   -- | Scanner for a lispy language.
--   scanner :: FilePath
--           -> Scanner IO Location [Char] (Located Token)
--   scanner fileName
--    = skip Char.isSpace
--    $ alts [ fmap (stamp id)   $ accept '(' KBra
--           , fmap (stamp id)   $ accept ')' KKet
--           , fmap (stamp KInt) $ scanInteger 
--           , fmap (stamp KVar)
--             $ munchWord (\ix c -> if ix == 0 then Char.isLower c
--                                              else Char.isAlpha c) 
--           , fmap (stamp KCon) 
--             $ munchWord (\ix c -> if ix == 0 then Char.isUpper c
--                                              else Char.isAlpha c)
--           ]
--    where  -- Stamp a token with source location information.
--           stamp k (l, t) 
--             = Located fileName l (k t)
--   
--   main 
--    = do   let fileName = "Source.lispy"
--           let source   = "(some (Lispy like) 26 Program 93 (for you))"
--           toks    <- scanStringIO source (scanner fileName)
--           print toks
--   
module Text.Lexer.Inchworm -- | An abstract source of input tokens that we want to perform lexical -- analysis on. -- -- Each token is associated with a source location loc. A a -- sequence of tokens has type input, and a single token type -- (Elem input). data Source m loc input -- | Scanner of input tokens that produces a result value of type -- a when successful. data Scanner m loc input a -- | Scan a list of generic input tokens in the IO monad, returning the -- source location of the final input token, along with the remaining -- input. -- -- NOTE: If you just want to scan a String of characters use -- scanStringIO from Text.Lexer.Inchworm.Char scanListIO :: Eq i => loc -> (i -> loc -> loc) -> [i] -> Scanner IO loc [i] a -> IO ([a], loc, [i]) -- | Make a source from a list of input tokens, maintaining the state in -- the IO monad. makeListSourceIO :: forall i loc. Eq i => loc -> (i -> loc -> loc) -> [i] -> IO (Source IO loc [i]) -- | Apply a scanner to a source of input tokens, where the tokens are -- represented as a lazy list. -- -- The result values are also produced in a lazy list. scanSourceToList :: Monad m => Source m loc [i] -> Scanner m loc [i] a -> m ([a], loc, [i]) -- | Accept the next token if it matches the given predicate, returning -- that token as the result. satisfies :: Monad m => (Elem input -> Bool) -> Scanner m loc input (loc, Elem input) -- | Skip tokens that match the given predicate, before applying the given -- argument scanner. -- -- When lexing most source languages you can use this to skip whitespace. skip :: Monad m => (Elem input -> Bool) -> Scanner m loc input a -> Scanner m loc input a -- | Accept the next input token if it is equal to the given one, and -- return a result of type a. accept :: (Monad m, Eq (Elem input)) => Elem input -> a -> Scanner m loc input (loc, a) -- | Accept a fixed length sequence of tokens that match the given -- sequence, and return a result of type a. accepts :: (Monad m, Sequence input, Eq input) => input -> a -> Scanner m loc input (loc, a) -- | Use the given function to check whether to accept the next token, -- returning the result it produces. from :: Monad m => (Elem input -> Maybe a) -> Scanner m loc input (loc, a) -- | Use the given function to check whether to accept a fixed length -- sequence of tokens, returning the result it produces. froms :: Monad m => Maybe Int -> (input -> Maybe a) -> Scanner m loc input (loc, a) -- | Combine two argument scanners into a result scanner, where the first -- argument scanner is tried before the second. alt :: Monad m => Scanner m loc input a -> Scanner m loc input a -> Scanner m loc input a -- | Combine a list of argumenet scanners a result scanner, where each -- argument scanner is tried in turn until we find one that matches (or -- not). alts :: Monad m => [Scanner m loc input a] -> Scanner m loc input a -- | Munch input tokens, using a predicate to select the prefix to -- consider. -- -- Given munch (Just n) match accept, we select a contiguous -- sequence of tokens up to length n using the predicate match, -- then pass that sequence to accept to make a result value out -- of it. If match selects no tokens, or accept returns -- Nothing then the scanner fails and no tokens are consumed from -- the source. -- -- For example, to scan natural numbers use: -- --
--   scanNat :: Monad m => Scanner m loc [Char] (loc, Integer)
--   scanNat = munchPred Nothing match accept
--           where match _ c = isDigit c
--                 accept cs = Just (read cs)
--   
-- -- To match Haskell style constructor names use: -- --
--   scanCon :: Monad m => Scanner m loc [Char] (loc, String)
--   scanCon = munchPred Nothing match accept
--           where  match 0 c = isUpper    c
--                  match _ c = isAlphaNum c
--                  accept cs = Just cs
--   
-- -- If you want to detect built-in constructor names like Int and -- Float then you can do it in the accept function and -- produce a different result constructor for each one. munchPred :: Monad m => Maybe Int -> (Int -> Elem input -> Bool) -> (input -> Maybe a) -> Scanner m loc input (loc, a) -- | Like munchPred, but we accept prefixes of any length, and -- always accept the input tokens that match. munchWord :: Monad m => (Int -> Elem input -> Bool) -> Scanner m loc input (loc, input) -- | Like munchPred, but we can use a fold function to select the -- prefix of tokens to consider. This is useful when lexing comments, and -- string literals where consecutive tokens can have special meaning (ie -- escaped quote characters). -- -- See the source of scanHaskellChar in the -- Text.Lexer.Inchworm.Char, module for an example of its usage. munchFold :: Monad m => Maybe Int -> (Int -> Elem input -> state -> Maybe state) -> state -> (input -> Maybe a) -> Scanner m loc input (loc, a) -- | Character based scanners. module Text.Lexer.Inchworm.Char -- | Scan a string in the IO monad. scanStringIO :: String -> Scanner IO Location String a -> IO ([a], Location, String) -- | A location in a source file. data Location Location :: !Int -> !Int -> Location -- | Bump a location using the given character, updating the line and -- column number as appropriate. bumpLocationWithChar :: Char -> Location -> Location -- | Scan a decimal integer, with optional - and + sign -- specifiers. scanInteger :: Monad m => Scanner m loc [Char] (loc, Integer) -- | Scan a literal character, enclosed in single quotes. -- -- We handle the escape codes listed in Section 2.6 of the Haskell -- Report. scanHaskellChar :: Monad m => Scanner m loc [Char] (loc, Char) -- | Scan a literal string, enclosed in double quotes. -- -- We handle the escape codes listed in Section 2.6 of the Haskell -- Report, but not string gaps or the & terminator. scanHaskellString :: Monad m => Scanner m loc [Char] (loc, String) -- | Scan a Haskell block comment. scanHaskellCommentBlock :: Monad m => Scanner m loc [Char] (loc, String) -- | Scan a Haskell line comment. scanHaskellCommentLine :: Monad m => Scanner m loc [Char] (loc, String)