{- gulcii -- graphical untyped lambda calculus interpreter Copyright (C) 2011, 2013 Claude Heiland-Allen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -} module Parse where import Control.Applicative(Applicative, Alternative, pure, empty, (<|>), (<$>), (<*>), (<*)) {- Strip comments -------------- A comment is everything from '#' to the end of the line. -} decomment :: String -> String decomment = concatMap (fst . break ('#'==)) . lines {- Tokenize -------- -} digits :: String digits = "0123456789" lowers :: String lowers = "abcdefghijklmnopqrstuvwxyz" uppers :: String uppers = "ABCDEFGHIJKLMNOPQRSTUVWXYZ@" alpha :: String alpha = lowers ++ uppers alphanum :: String alphanum = alpha ++ digits symbols :: String symbols = "\\.!?()[],=~:{}" spaces :: String spaces = " " {- Split a string into tokens (each itself a string), such that each token consists of either all digits, all letters, or a single symbol. Use whitespace to separate tokens. -} tokenize :: String -> Maybe [String] tokenize [] = Just [] tokenize (c:cs) | c `elem` digits = let (t,ts) = span (`elem` digits) cs in ((c:t):) <$> tokenize ts | c `elem` lowers = let (t,ts) = span (`elem` alphanum) cs in ((c:t):) <$> tokenize ts | c `elem` symbols = ([c]:) <$> tokenize cs | c `elem` spaces = tokenize cs | otherwise = Nothing {- Parsing primitives ------------------ A parser takes a list of tokens to a list of possible partial parses. -} newtype Parser s t = P{ unP :: [s] -> [(t, [s])] } instance Functor (Parser s) where fmap f (P p) = P (\q -> [ (f v, s) | (v,s) <- p q ] ) instance Applicative (Parser s) where pure f = P (\q -> [(f, q)]) P p1 <*> P p2 = P (\q -> [ (u v, t) | (u, s) <- p1 q, (v, t) <- p2 s ]) instance Alternative (Parser s) where empty = P (\_ -> []) P p1 <|> P p2 = P (\q -> p1 q ++ p2 q) {- Accept a token that satisfies a predicate. -} satisfy :: (s -> Bool) -> Parser s s satisfy p = P (\q -> case q of (x:xs) | p x -> [(x,xs)] _ -> []) {- Accept a specific token. -} sym :: Eq s => s -> Parser s s sym a = satisfy (== a) {- Accept some p's separated by s's. -} someSep :: Parser s a -> Parser s t -> Parser s [t] someSep s p = ((:[]) <$> p) <|> ((:) <$> p <* s <*> someSep s p) {- Accept some p's separated by s's, or nothing. -} manySep :: Parser s a -> Parser s t -> Parser s [t] manySep s p = pure [] <|> someSep s p {- Accept a name consisting of letters. -} name :: Parser String String name = P (\q -> case q of (p@(r:_):ps) | r `elem` lowers -> [(p, ps)] _ -> []) {- Accept an integer consisting of digits. -} integer :: Parser String Integer integer = P (\q -> case q of (p:ps) | all (`elem` digits) p -> [(read p, ps)] _ -> [])