-- | Parser combinator framework specialized to lexical analysis. -- Tokens can be specified via simple fold functions, -- and we include baked in source location handling. -- -- If you want to parse expressions instead of performing lexical -- analysis then try the @parsec@ or @attoparsec@ packages, which -- have more general purpose combinators. -- -- Matchers for standard tokens like comments and strings -- are in the "Text.Lexer.Inchworm.Char" module. -- -- No dependencies other than the Haskell 'base' library. -- -- __ Minimal example __ -- -- The following code demonstrates how to perform lexical analysis -- of a simple LISP-like language. We use two separate name classes, -- one for variables that start with a lower-case letter, -- and one for constructors that start with an upper case letter. -- -- Integers are scanned using the `scanInteger` function from the -- "Text.Lexer.Inchworm.Char" module. -- -- The result of @scanStringIO@ contains the list of leftover input -- characters that could not be parsed. In a real lexer you should -- check that this is empty to ensure there has not been a lexical -- error. -- -- @ -- import Text.Lexer.Inchworm.Char -- import qualified Data.Char as Char -- -- -- | A source token. -- data Token -- = KBra | KKet | KVar String | KCon String | KInt Integer -- deriving Show -- -- -- | A thing with attached location information. -- data Located a -- = Located FilePath Location a -- deriving Show -- -- -- | Scanner for a lispy language. -- scanner :: FilePath -- -> Scanner IO Location [Char] (Located Token) -- scanner fileName -- = skip Char.isSpace -- $ alts [ fmap (stamp id) $ accept '(' KBra -- , fmap (stamp id) $ accept ')' KKet -- , fmap (stamp KInt) $ scanInteger -- , fmap (stamp KVar) -- $ munchWord (\\ix c -> if ix == 0 then Char.isLower c -- else Char.isAlpha c) -- , fmap (stamp KCon) -- $ munchWord (\\ix c -> if ix == 0 then Char.isUpper c -- else Char.isAlpha c) -- ] -- where -- Stamp a token with source location information. -- stamp k (l, t) -- = Located fileName l (k t) -- -- main -- = do let fileName = "Source.lispy" -- let source = "(some (Lispy like) 26 Program 93 (for you))" -- toks <- scanStringIO source (scanner fileName) -- print toks -- @ -- module Text.Lexer.Inchworm ( -- * Basic Types Source , Scanner -- * Generic Scanning , scanListIO -- ** Source Construction , makeListSourceIO -- ** Scanner Evaluation , scanSourceToList -- * Combinators -- ** Basic , satisfies, skip -- ** Accept , accept, accepts -- ** From , from, froms -- ** Alternation , alt, alts -- ** Munching , munchPred, munchWord, munchFold) where import Text.Lexer.Inchworm.Source import Text.Lexer.Inchworm.Scanner import Text.Lexer.Inchworm.Combinator -- | Scan a list of generic input tokens in the IO monad, -- returning the source location of the final input token, -- along with the remaining input. -- -- NOTE: If you just want to scan a `String` of characters -- use @scanStringIO@ from "Text.Lexer.Inchworm.Char" -- scanListIO :: Eq i => loc -- ^ Starting source location. -> (i -> loc -> loc) -- ^ Function to bump the current location by one input token. -> [i] -- ^ List of input tokens. -> Scanner IO loc [i] a -- ^ Scanner to apply. -> IO ([a], loc, [i]) scanListIO loc bump input scanner = do src <- makeListSourceIO loc bump input scanSourceToList src scanner