-- A data structure for S-expressions, the ones that are called -- proper lists. -- Copyright (c) 2009 The MITRE Corporation -- -- This program is free software: you can redistribute it and/or -- modify it under the terms of the BSD License as published by the -- University of California. module CPSA.Lib.SExpr (SExpr(..), showQuoted, annotation, Pos, PosHandle, posHandle, load) where import Data.Char (isSpace, isDigit, isAlphaNum, isPrint, isAscii) import Data.IORef (IORef, newIORef, readIORef, writeIORef) import System.IO (Handle, hIsEOF, hGetChar, hLookAhead, hClose) -- An S-expression--all of its constructors are strict. data SExpr a = S !a !String -- A symbol | Q !a !String -- A quoted string | N !a !Int -- An integer | L !a ![SExpr a] -- A proper list deriving Eq -- Printing support instance Show (SExpr a) where showsPrec _ (S _ s) = showString s showsPrec _ (Q _ s) = showChar '"' . showString s . showChar '"' showsPrec _ (N _ n) = shows n showsPrec _ (L _ []) = showString "()" showsPrec _ (L _ (x:xs)) = showChar '(' . shows x . showl xs . showChar ')' where showl [] = id showl (x:xs) = showChar ' ' . shows x . showl xs showQuoted :: String -> ShowS showQuoted s = showChar '"' . showString s . showChar '"' -- Extract an S-expression's annotation. annotation :: SExpr a -> a annotation (S a _) = a annotation (Q a _) = a annotation (N a _) = a annotation (L a _) = a -- S-expression Reader -- The reader returns objects of type SExpr Pos so that error messages -- can include a location. data Pos = Pos { file :: !String, line :: !Int, column :: !Int } -- Show a position in a form Emacs can read. instance Show Pos where showsPrec _ pos = showString (file pos) . showString ":" . shows (line pos) . showString ":" . shows (column pos) . showString ": " -- Bind a position to a handle data PosHandle = PosHandle { pHandle :: Handle, pFile :: String, pPosition :: IORef (Int, Int) } posHandle :: FilePath -> Handle -> IO PosHandle posHandle file handle = do pos <- newIORef (1, 1) -- Set initial position in a file return $ PosHandle { pHandle = handle, pFile = file, pPosition = pos } setPosHandle :: PosHandle -> Int -> Int -> IO () setPosHandle ph line column = writeIORef (pPosition ph) (line, column) -- The result of scanning is a token. data Token = Atom !(SExpr Pos) | Lparen !Pos | Rparen !Pos | Eof -- Read one S-expression or return Nothing on EOF load :: PosHandle -> IO (Maybe (SExpr Pos)) load p = do (l, c) <- readIORef $ pPosition p (l, c, t) <- scan p l c case t of Atom x -> do setPosHandle p l c return $ Just x Lparen pos -> do (l, c, x) <- list p pos l c [] setPosHandle p l c return $ Just x Rparen pos -> fail (shows pos "Close of unopened list") Eof -> do hClose $ pHandle p return Nothing -- A recursive decent parser list :: PosHandle -> Pos -> Int -> Int -> [SExpr Pos] -> IO (Int, Int, SExpr Pos) list p pos l c xs = do (l, c, t) <- scan p l c case t of Rparen _ -> return (l, c, L pos (seqrev xs)) Atom x -> list p pos l c (x : xs) Lparen pos' -> do (l, c, x) <- list p pos' l c [] list p pos l c (x : xs) Eof -> fail (shows pos "Unexpected end of input in list") -- Read the next character returning Nothing on EOF get :: PosHandle -> IO (Maybe Char) get p = do let h = pHandle p eof <- hIsEOF h case eof of True -> return Nothing False -> do ch <- hGetChar h return $ Just ch -- Peek at the next character returning Nothing on EOF peek :: PosHandle -> IO (Maybe Char) peek p = do let h = pHandle p eof <- hIsEOF h case eof of True -> return Nothing False -> do ch <- hLookAhead h return $ Just ch -- Return the next token and update line and column information scan :: PosHandle -> Int -> Int -> IO (Int, Int, Token) scan p l c = do ch <- get p case ch of Nothing -> return (l, c, Eof) Just ch -> skip p l c ch -- Skip whitespace and then handle first character of a token skip :: PosHandle -> Int -> Int -> Char -> IO (Int, Int, Token) skip p l _ '\n' = scan p (l + 1) 1 skip p l c ch | isSpace ch = scan p l (c + 1) skip p l c ';' = comment p l (c + 1) skip p l c '(' = return (l, c + 1, Lparen $ pos p l c) skip p l c ')' = return (l, c + 1, Rparen $ pos p l c) skip p l c ch = atom p l (c + 1) (pos p l c) ch pos :: PosHandle -> Int -> Int -> Pos pos p l c = Pos { file = pFile p, line = l, column = c } -- Consume a comment comment :: PosHandle -> Int -> Int -> IO (Int, Int, Token) comment p l c = do ch <- get p case ch of Nothing -> return (l, c, Eof) Just '\n' -> scan p (l + 1) 1 Just _ -> comment p l (c + 1) -- Scan a string, number, or a symbol atom :: PosHandle -> Int -> Int -> Pos -> Char -> IO (Int, Int, Token) atom p l c pos '"' = string p l c pos [] atom p l c pos ch | isDigit ch = number p l c pos [ch] atom p l c pos ch | ch == '+' || ch == '-' = numOrSym p l c pos [ch] atom p l c pos ch | isSym ch = symbol p l c pos [ch] atom _ _ _ pos _ = fail (shows pos "Bad char in atom") -- Scan a quoted string of characters string :: PosHandle -> Int -> Int -> Pos -> String -> IO (Int, Int, Token) string p l c pos s = do ch <- get p case ch of Nothing -> fail (shows pos "End of input in string") Just '"' -> return (l, c + 1, Atom (Q pos (seqrev s))) Just ch | isStr ch -> string p l (c + 1) pos (ch : s) Just _ -> fail (shows pos "Bad char for string") -- Scan a sequence of digits number :: PosHandle -> Int -> Int -> Pos -> String -> IO (Int, Int, Token) number p l c pos s = do ch <- peek p case ch of Nothing -> return (l, c, Atom (N pos (read (seqrev s)))) Just ch | isDigit ch -> do _ <- hGetChar $ pHandle p number p l (c + 1) pos (ch : s) Just ch | isSym ch -> fail (shows pos "Bad char after number") Just _ -> return (l, c, Atom (N pos (read (seqrev s)))) -- Scan a number that starts with a sign or a symbol numOrSym :: PosHandle -> Int -> Int -> Pos -> String -> IO (Int, Int, Token) numOrSym p l c pos s = do ch <- peek p case ch of Nothing -> symbol p l c pos s Just ch | isDigit ch -> number p l c pos s Just _ -> symbol p l c pos s -- Scan a symbol symbol :: PosHandle -> Int -> Int -> Pos -> String -> IO (Int, Int, Token) symbol p l c pos s = do ch <- peek p case ch of Nothing -> return (l, c, Atom (S pos (seqrev s))) Just ch | isSym ch -> do _ <- hGetChar $ pHandle p symbol p l (c + 1) pos (ch : s) Just _ -> return (l, c, Atom (S pos (seqrev s))) -- A reverse that evaluates the list elements. seqrev :: [a] -> [a] seqrev l = foldl (\xs x -> x `seq` xs `seq` (x:xs)) [] l -- A symbol is made from alphanumeric characters or special -- characters. A symbol may not start with a digit, or with a plus or -- minus sign followed by a digit. isSym :: Char -> Bool isSym c = isAlphaNum c || elem c specialChars specialChars :: String specialChars = "+-*/<=>!?:$%_&~^" -- A string is made from alphanumeric characters or an extension of the -- special characters. isStr :: Char -> Bool isStr c = isPrint c && isAscii c && c /= '"' && c /= '\\'