{- Copyright 2008 Uwe Hollerbach Portions of this were derived from Jonathan Tang's haskell tutorial "Write yourself a scheme in 48 hours" and are thus Copyright Jonathan Tang (but I can't easily tell anymore who originally wrote what) This file is part of haskeem. haskeem is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. haskeem is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with haskeem; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA $Id: parser.hs,v 1.14 2009-06-27 20:31:52 uwe Exp $ -} module Parser (readExpr, readExprList, readNumber) where import Prelude import Data.Char import Data.Ratio import Text.ParserCombinators.Parsec as TPCP hiding (spaces) import Control.Monad.Error as CME import qualified Data.IntMap as DIM import LispData -- Parsers for the various kinds of LispVal -- "#!/some/path/to/executable" at the top of the file, to enable -- scheme "shell" scripts: for the rest of the program, it's a comment hashbang :: Parser Char hashbang = do char '#' char '!' many (noneOf "\r\n") return ' ' -- semicolon to end-of-line, the oldest style of lisp comment comment :: Parser Char comment = do char ';' many (noneOf "\r\n") return ' ' spaces :: Parser () spaces = skipMany1 (comment <|> space) -- This is not quite R6RS-compliant: R6RS allows '.' symbol :: Parser Char symbol = oneOf "!$%&*+-/:<=>?@^_~" -- This is a small extension to R6RS controlChar :: Parser Char controlChar = do char '^' c <- oneOf (['A' .. 'Z'] ++ "[\\]^_") return (chr (ord c + 1 - ord 'A')) namedChar :: Parser Char namedChar = do name <- (string "alarm" <|> string "backspace" <|> string "delete" <|> string "esc" <|> string "linefeed" <|> (TPCP.try (string "newline")) <|> string "nul" <|> string "page" <|> string "return" <|> string "space" <|> string "tab" <|> string "vtab") case name of "nul" -> return (chr 0) "alarm" -> return (chr 7) "backspace" -> return (chr 8) "tab" -> return '\t' "linefeed" -> return '\n' "newline" -> return '\n' "vtab" -> return (chr 11) "page" -> return (chr 12) "return" -> return '\r' "esc" -> return (chr 27) "space" -> return ' ' "delete" -> return (chr 127) parseChar :: Parser LispVal parseChar = do char '#' char '\\' c <- (TPCP.try controlChar) <|> (TPCP.try namedChar) <|> anyChar return (Char c) -- This is not quite R6RS-compliant: R6RS requires a hex escape spec, -- and it forbids the "otherwise" clause below. oh well... later maybe escChar :: Parser Char escChar = do char '\\' c <- anyChar return (case c of 'a' -> chr 7 'b' -> chr 8 't' -> '\t' 'n' -> '\n' 'v' -> chr 11 'f' -> chr 12 'r' -> '\r' _ -> c) parseBool :: Parser LispVal parseBool = do char '#' v <- oneOf "tTfF" return (case v of 't' -> lispTrue 'T' -> lispTrue 'f' -> lispFalse 'F' -> lispFalse) parseString :: Parser LispVal parseString = do char '"' x <- many (escChar <|> (noneOf "\"")) char '"' return (String x) parseSymbol :: Parser LispVal parseSymbol = do first <- letter <|> symbol rest <- many (letter <|> digit <|> symbol) return (Symbol (first:rest)) readBaseInt :: Integer -> String -> Integer readBaseInt b s = foldl ma 0 s where ma v1 v2 = b*v1 + (toInteger (digitToInt v2)) readBaseFrac :: Integer -> String -> Double readBaseFrac _ [] = 0.0 readBaseFrac b s = r * foldr ma 0 s where r = 1.0/(fromInteger b) ma v1 v2 = fromIntegral (digitToInt v1) + r*v2 parseHdr :: Parser (Char, Integer) parseHdr = do b <- option 'd' (char '#' >> oneOf "bodxBODX") s <- option '+' (oneOf "+-") let base = (case b of 'b' -> 2 'B' -> 2 'o' -> 8 'O' -> 8 'd' -> 10 'D' -> 10 'x' -> 16 'X' -> 16) return (s, base) baseDigits :: Integer -> String baseDigits 2 = "01" baseDigits 8 = "01234567" baseDigits 10 = "0123456789" baseDigits 16 = "0123456789abcdefABCDEF" int :: String int = "int" -- The fact that this parser can deal with floating-point numbers -- in bases 2, 8, and 16 as well as 10 is an extension of R6RS. -- Parse first alternative for floating-point numbers: \d+(\.\d*)? parseF1 :: Integer -> Parser (String,String) parseF1 b = do ip <- many1 (oneOf (baseDigits b)) fp <- option int (char '.' >> many (oneOf (baseDigits b))) return (ip,fp) -- Parse second alternative for floating-point numbers: \.\d+ parseF2 :: Integer -> Parser (String,String) parseF2 b = do char '.' fp <- many1 (oneOf (baseDigits b)) return ("0",fp) -- Parse the exponent parseExp :: Integer -> Parser Integer parseExp b = do if b == 16 then (oneOf "xX") else (oneOf "eExX") s <- option '+' (oneOf "+-") num <- many1 (oneOf (baseDigits b)) let e = readBaseInt b num return (if s == '-' then (-e) else e) powi :: Integer -> Integer -> Integer powi b e | e == 0 = 1 | e < 0 = error "negative exponent in powi" | even e = powi (b*b) (e `quot` 2) | otherwise = b * (powi b (e - 1)) pow :: Integer -> Integer -> Double pow b e = if e >= 0 then fromInteger (powi b e) else recip (fromInteger (powi b (-e))) -- Parse an integer or a floating-point number. This parser will return -- numbers written as aaaEbb (with no decimal point) as integers, if the -- exponent bb is non-negative. parseIntOrFlt :: Parser LispVal parseIntOrFlt = do (s, b) <- parseHdr (ip, fp) <- (parseF1 b) <|> (parseF2 b) e <- option 0 (parseExp b) let fpi = if fp == int then "0" else fp vf = (pow b e) * (fromInteger (readBaseInt b ip) + readBaseFrac b fpi) vi = (powi b e) * (readBaseInt b ip) if (fp == int && e >= 0) then return (IntNumber (if s == '-' then (-vi) else vi)) else return (FltNumber (if s == '-' then (-vf) else vf)) -- Parse a rational number written as numerator/denominator. This parser -- accepts and understands rational infinity, both positive and negative, -- and rational not-a-number: +infinity is written as 1/0, -infinity as -- -1/0, and not-a-number as 0/0. That's an incompatible extension of R6RS. parseRat :: Parser LispVal parseRat = do (s, b) <- parseHdr nstr <- many1 (oneOf (baseDigits b)) char '/' dstr <- many1 (oneOf (baseDigits b)) let num = readBaseInt b nstr den = readBaseInt b dstr ns = if s == '-' then (-num) else num val = if den /= 0 then ns % den else if ns > 0 then myRatPInf else if ns < 0 then myRatNInf else myRatNaN if (denominator val) == 1 then return (IntNumber (numerator val)) else return (RatNumber val) -- Parse a couple of special floating-point numbers mandated by R6RS parseNaNInf :: Parser LispVal parseNaNInf = do val <- (TPCP.try (string "+nan.0")) <|> (TPCP.try (string "-nan.0")) <|> (TPCP.try (string "+inf.0")) <|> (TPCP.try (string "-inf.0")) case val of "+nan.0" -> return (FltNumber myFltNaN) "-nan.0" -> return (FltNumber myFltNaN) "+inf.0" -> return (FltNumber myFltPInf) "-inf.0" -> return (FltNumber myFltNInf) parseNumber :: Parser LispVal parseNumber = (TPCP.try parseNaNInf) <|> (TPCP.try parseRat) <|> parseIntOrFlt -- Parsers for the abbreviations for the various kinds of quoting entities: -- ' => (quote ) -- ` => (quasiquote ) -- , => (unquote ) -- ,@ => (unquote-splicing ) parseQQ :: Parser LispVal parseQQ = do char '`' x <- parseExpr return (List [Symbol "quasiquote", x]) parseQ :: Parser LispVal parseQ = do char '\'' x <- parseExpr return (List [Symbol "quote", x]) parseUQ :: Parser LispVal parseUQ = do char ',' x <- parseExpr return (List [Symbol "unquote", x]) parseUQS :: Parser LispVal parseUQS = do char ',' char '@' x <- parseExpr return (List [Symbol "unquote-splicing", x]) parseQuoted :: Parser LispVal parseQuoted = (TPCP.try parseUQS) <|> (TPCP.try parseUQ) <|> (TPCP.try parseQQ) <|> parseQ -- Parser for a dotted-list or a regular list. Due to the representation of -- scheme lists as haskell lists rather than as dotted-pairs, it's slightly -- tricky to get the case of (a . (b . (c . ()))) and similar forms to come -- out right; however, that is explicitly described as exactly identical to -- the list (a b c) according to the RnRS standard, so it has to be treated -- correctly. parseDottedList :: Parser LispVal parseDottedList = do char '(' skipMany space hd <- sepEndBy parseExpr spaces tl <- option (List []) (TPCP.try (char '.' >> spaces >> parseExpr)) skipMany space char ')' if isl tl then return (List (hd ++ (unpl tl))) else if isdl tl then return (DottedList (hd ++ (unpdlh tl)) (unpdlt tl)) else return (DottedList hd tl) where isl (List ((Symbol sym):_)) = if sym == "unquote" || sym == "unquote-splicing" then False else True isl (List _) = True isl _ = False unpl (List l) = l isdl (DottedList _ _) = True isdl _ = False unpdlh (DottedList h _) = h unpdlt (DottedList _ t) = t -- Parser for a vector: this is similar to a list (but not a dotted-list), -- except that R6RS says access times are generally faster than for lists. -- It would seem that haskell Arrays would be the natural way to go, but -- the documentation for those is... well, crappy. Data.IntMap is much -- better documented, and pretty close to what we want. Access times aren't -- O(1), but they are O(min(n,W)), where n is the size of the vector and -- W is the size in bits of a machine word: either 32 or 64 usually. This -- is due to the implementation of Data.IntMap: internally, it's a PATRICIA -- tree. That should be fast enough for the moment; if it becomes an issue, -- I can always change later. Data.IntMaps are extensible, so I could in -- principle have extensible vectors, which would mean I'd not need to store -- the length, but bounds-checked arrays seem like a nice feature to have; -- I can add an explicit grow-vector routine, which as a result of the -- extensibility of Data.IntMaps will be very easy to write. parseVector :: Parser LispVal parseVector = do char '#' char '(' skipMany space vals <- sepBy parseExpr spaces skipMany space char ')' return (Vector (toInteger (length vals)) (DIM.fromAscList (addkey 0 vals))) where addkey _ [] = [] addkey n (v:vs) = (n, v):(addkey (n+1) vs) parseExpr :: Parser LispVal parseExpr = parseString <|> (TPCP.try parseBool) <|> (TPCP.try parseChar) <|> (TPCP.try parseNumber) <|> (TPCP.try parseVector) <|> (TPCP.try parseSymbol) <|> parseQuoted <|> parseDottedList readOrThrow :: Parser a -> String -> ThrowsError a readOrThrow parser input = case parse parser "lisp" input of Left err -> throwError (Parser err) Right val -> return val readExpr :: String -> ThrowsError LispVal readExpr = readOrThrow parseExpr readExprList :: String -> ThrowsError [LispVal] readExprList = readOrThrow ((optional hashbang) >> (skipMany spaces) >> endBy parseExpr (spaces <|> eof)) -- Parser for just numbers, for internally converting strings to numbers; -- it's just a little more lenient than only and exactly a number: allow -- whitespace on either side, that doesn't harm anything and seems polite parseJustNumber :: Parser LispVal parseJustNumber = do skipMany space num <- parseNumber skipMany space eof return num readNumber :: String -> ThrowsError LispVal readNumber input = case parse parseJustNumber "number" input of Left _ -> return lispFalse Right val -> return val