{- | Module : $Header$ Description : read and write ATerms to and from strings Copyright : (c) Klaus Luettich, C.Maeder, Uni Bremen 2002-2005 License : GPLv2 or higher, see LICENSE.txt Maintainer : Christian.Maeder@dfki.de Stability : provisional Portability : non-portable (imports ATerm.AbstractSyntax) convert 'ATermTable's (created by 'ATerm.Conversion.toATermTable') from 'String's and to 'SDoc's as shared (TAF format) or unshared (AT format). Indices (following hash marks) are base64 encoded. -} module ATerm.ReadWrite( -- * read shared or unshared ATerms readATerm , readATermFile -- * writing out shared ATerms (use these functions for serialization) , writeSharedATerm , writeSharedATermFile -- * writing out unshared ATerms (just for compatibility purposes) , writeATerm , writeATermFile -- * support different renderings via 'ATerm.SimpPretty.SDoc' , writeSharedATermSDoc , writeATermSDoc ) where {- Author: Joost Visser, CWI, 2002 Changed by: Klaus Luettich & Felix Reckers, 2002-2003, C. Maeder 2005 This module contains functions for reading and writing ATerms from and to Strings. Two ATerm formats are supported: AT: plain (non-shared) textual ATerms TAF: shared textual ATerms The binary ATerm format (BAF) is not supported. Current limitations: BLOBS and place-holders are not supported. accidental support for the empty aterm: ShAAppl "" [] [] -} import ATerm.AbstractSyntax import ATerm.SimpPretty import ATerm.Base64 import Data.Char import qualified Data.IntMap as IntMap readATermFile :: FilePath -> IO ATermTable readATermFile fp = do str <- readFile fp if null str then error $ "ATerm.readATermFile: empty input file: " ++ fp else return $ readATerm str --- From String to ATerm ------------------------------------------------------ data ReadTAFStruct = RTS ATermTable String -- remaing string (AbbrevTable Int) Int -- string length of last ATerm read {- | create an ATerm table from an input string. Shared or unshared ATerms can be read. A string for shared ATerms usually starts with an exclamation mark and contains many hash marks indicating references. Unshared ATerms are plain constructor terms. -} readATerm :: String -> ATermTable readATerm = readATerm' . dropWhile ( \ c -> case c of '!' -> True _ -> isSpace c) readATerm' :: String -> ATermTable readATerm' str = case readTAF emptyATermTable str emptyATab of (RTS at rt _ _, _) -> case dropSpaces rt of [] -> toReadonlyATT at s -> error $ "garbage following aterm: " ++ take 10 s -- non-shared is a special case of shared (but could be made more fool proof) readTAF :: ATermTable -> String -> AbbrevTable Int -> (ReadTAFStruct, Int) readTAF at str tbl = case readTAF' at str tbl of (rs@(RTS at' str' tbl' l'), eith) -> case eith of Left i -> (rs, i) Right a -> case addATerm a at' of (at'', i) -> (RTS at'' str' (insertATab l' (sizeATab tbl') i tbl') l', i) readTAF' :: ATermTable -> String -> AbbrevTable Int -> (ReadTAFStruct, Either Int ShATerm) readTAF' at str tbl = case str of '#' : xs -> case spanAbbrevChar xs of (i, str') -> case lookupATab (deAbbrev i) tbl of Nothing -> error $ "unknown aterm index " ++ '#' : i Just a -> (RTS at str' tbl $ length i + 1, Left a) '[' : _ -> case readParTAFs '[' ']' at str tbl 0 of (RTS at' str' tbl' l', kids) -> case readAnnTAF at' (dropSpaces str') tbl' l' of (rs, ann) -> (rs, Right $ ShAList kids ann) x : xs | isIntHead x -> case span isDigit xs of (i, str') -> let (intl, l') = readInteger $ x : i in case readAnnTAF at (dropSpaces str') tbl l' of (rs, ann) -> (rs, Right $ ShAInt intl ann) _ -> case readAFun str of (c, str') -> case readParenTAFs at (dropSpaces str') tbl $ length c of (RTS at' str'' tbl' l', kids) -> case readAnnTAF at' (dropSpaces str'') tbl' l' of (rs, ann) -> (rs, Right $ ShAAppl c kids ann) readParenTAFs :: ATermTable -> String -> AbbrevTable Int -> Int -> (ReadTAFStruct, [Int]) readParenTAFs = readParTAFs '(' ')' readParTAFs :: Char -> Char -> ATermTable -> String -> AbbrevTable Int -> Int -> (ReadTAFStruct, [Int]) readParTAFs op cp at str tbl l = case str of p : r | op == p -> readTAFs0 at cp (dropSpaces r) tbl $ l + 1 _ -> (RTS at str tbl l, []) readTAFs0 :: ATermTable -> Char -> String -> AbbrevTable Int -> Int -> (ReadTAFStruct, [Int]) readTAFs0 at par str tbl l = case str of p : r | par == p -> (RTS at r tbl $ l + 1, []) _ -> readTAFs1 at par str tbl l readTAFs1 :: ATermTable -> Char -> String -> AbbrevTable Int -> Int -> (ReadTAFStruct, [Int]) readTAFs1 at par str tbl l = case readTAF at (dropSpaces str) tbl of (RTS at' str' tbl' l', t) -> case readTAFs2 at' par (dropSpaces str') tbl' $ l + l' of (RTS at'' str'' tbl'' l'', ts) -> (RTS at'' str'' tbl'' l'', t : ts) readTAFs2 :: ATermTable -> Char -> String -> AbbrevTable Int -> Int -> (ReadTAFStruct, [Int]) readTAFs2 at par str tbl l = case str of ',' : r -> readTAFs1 at par (dropSpaces r) tbl $ l + 1 p : r | par == p -> (RTS at r tbl $ l + 1, []) _ -> error $ "expecting ',' or '" ++ [par] ++ "' in aterm but found: " ++ take 10 str readAnnTAF :: ATermTable -> String -> AbbrevTable Int -> Int -> (ReadTAFStruct, [Int]) readAnnTAF = readParTAFs '{' '}' -- helpers -- dropSpaces :: String -> String dropSpaces = dropWhile isSpace readAFun :: String -> (String, String) readAFun str = case str of q : r | q == '"' -> let (c, s) = spanNotQuote' r in (q : c, s) _ -> span isAFunChar str spanNotQuote' :: String -> (String, String) spanNotQuote' str = case str of q : r | q == '"' -> ([q], r) c : r@(d : s) -> let (e, l) = if c == '\\' then ([c, d], s) else ([c], r) (f, t) = spanNotQuote' l in (e ++ f, t) _ -> error $ "wrongly terminated aterm string: " ++ take 10 str isAFunChar :: Char -> Bool isAFunChar c = isAlphaNum c || c `elem` "_*+-." spanAbbrevChar :: String -> (String, String) spanAbbrevChar = span isBase64Char isIntHead :: Char -> Bool isIntHead c = isDigit c || c == '-' --- From (shared) ATerms to strings via simple documents with associated length data DocLen = DocLen SDoc Int data WriteStruct = WS (AbbrevTable DocLen) DocLen writeATermSDoc :: ATermTable -> SDoc writeATermSDoc = writeSharedATermSDoc' False writeATerm :: ATermTable -> String writeATerm = render . writeATermSDoc writeATermFile :: FilePath -> ATermTable -> IO () writeATermFile fp = writeFileSDoc fp . writeATermSDoc writeSharedATermSDoc :: ATermTable -> SDoc writeSharedATermSDoc = writeSharedATermSDoc' True writeSharedATerm :: ATermTable -> String writeSharedATerm = render . writeSharedATermSDoc writeSharedATermFile :: FilePath -> ATermTable -> IO () writeSharedATermFile fp = writeFileSDoc fp . writeSharedATermSDoc writeSharedATermSDoc' :: Bool -> ATermTable -> SDoc writeSharedATermSDoc' b at = case writeTAF b (toReadonlyATT at) emptyATab of WS _ (DocLen doc _) -> (if b then text "!" else empty) <> doc --shared (if input is True) writeTAF :: Bool -> ATermTable -> AbbrevTable DocLen -> WriteStruct writeTAF b at tbl = let i = getTopIndex at in case lookupATab i tbl of Just s -> if b then WS tbl s else writeTAF' b at tbl Nothing -> case writeTAF' b at tbl of WS tbl' d_len@(DocLen _ len) -> WS (insertATab len i (abbrevD $ sizeATab tbl') tbl') d_len writeTAF' :: Bool -> ATermTable -> AbbrevTable DocLen -> WriteStruct writeTAF' b at tbl = case getATerm at of ShAAppl c ts anns -> case writeTAFs b at ts tbl of WS tbl' kids -> case writeTAFs b at anns tbl' of WS tbl'' kidsAnn -> WS tbl'' $ dlConcat (writeATermAuxS c kids) $ parenthesiseAnnS kidsAnn ShAList ts anns -> case writeTAFs b at ts tbl of WS tbl' kids -> case writeTAFs b at anns tbl' of WS tbl'' kidsAnn -> WS tbl'' $ dlConcat (bracketS kids) $ parenthesiseAnnS kidsAnn ShAInt i anns -> case writeTAFs b at anns tbl of WS tbl' kidsAnn -> WS tbl' $ dlConcat (integerDoc i) $ parenthesiseAnnS kidsAnn dlConcat :: DocLen -> DocLen -> DocLen dlConcat s1@(DocLen sf1 sl1) s2@(DocLen sf2 sl2) | sl1 == 0 = s2 | sl2 == 0 = s1 | otherwise = DocLen (sf1 <> sf2) $ sl1 + sl2 dlConcatComma :: DocLen -> DocLen -> DocLen dlConcatComma (DocLen sf1 sl1) (DocLen sf2 sl2) = DocLen (sf1 <> comma <> sf2) $ sl1 + sl2 + 1 -- produce comma seperated output from aterm indices writeTAFs :: Bool -> ATermTable -> [Int] -> AbbrevTable DocLen -> WriteStruct writeTAFs b at inds tbl = case inds of [] -> WS tbl $ DocLen empty 0 i : is -> case writeTAF b (getATermByIndex1 i at) tbl of ws@(WS t1 s1) -> if null is then ws else case writeTAFs b at is t1 of WS t2 s2 -> WS t2 $ dlConcatComma s1 s2 docLen :: String -> DocLen docLen s = DocLen (text s) $ length s integerDoc :: Integer -> DocLen integerDoc = docLen . show writeATermAuxS :: String -> DocLen -> DocLen writeATermAuxS s = dlConcat (docLen s) . parenthesiseS -- list brackets must not be omitted bracketS :: DocLen -> DocLen bracketS (DocLen d dl) | dl == 0 = docLen "[]" | otherwise = DocLen (brackets d) $ dl + 2 parenthesiseS :: DocLen -> DocLen parenthesiseS s@(DocLen d dl) | dl == 0 = s | otherwise = DocLen (parens d) $ dl + 2 parenthesiseAnnS :: DocLen -> DocLen parenthesiseAnnS s@(DocLen d dl) | dl == 0 = s | otherwise = DocLen (braces d) $ dl + 2 {- | This abbreviation table maps abbreviation ints to aterm indices for reading and aterm indices to abbreviation docs during writing. -} data AbbrevTable a = ATab !(IntMap.IntMap a) !(Int, Int) !Int {- The last component is the current maximal abbreviation int that serves as a key when reading and as a value when writing. The pair stores the size of this component and the maximal power of 64 that is less than it, in order to simplify the computation of the size of the next abbreviation int. -} -- | initial table emptyATab :: AbbrevTable a emptyATab = ATab IntMap.empty (2, 64) 0 -- | only insert if the first argument is greater than the abbreviation size insertATab :: Int -> Int -> a -> AbbrevTable a -> AbbrevTable a insertATab dl i a t@(ATab m p@(sl, b) s) = if dl > sl then let n = s + 1 in ATab (IntMap.insert i a m) (if rem n b == 0 then (sl + 1, 64 * b) else p) n else t -- | get current abbreviation size sizeATab :: AbbrevTable a -> Int sizeATab (ATab _ _ s) = s -- | lookup map value lookupATab :: Int -> AbbrevTable a -> Maybe a lookupATab i (ATab m _ _) = IntMap.lookup i m --- Intger Read --------------------------------------------------------------- readInteger :: String -> (Integer, Int) readInteger s = case s of (hd:str) -> if hd == '-' then case conv str of (m, l) -> (negate m, l + 1) else conv s _ -> error "readInteger" where f (m, l) x = (toInteger (ord x - ord0) + 10 * m, l + 1) conv = foldl f (0, 0) --- Base 64 encoding ---------------------------------------------------------- abbrevD :: Int -> DocLen abbrevD = docLen . abbrev abbrev :: Int -> String abbrev i = '#' : mkAbbrev i mkAbbrev :: Int -> String mkAbbrev x = if x > 0 then mkAbbrevAux x "" else "A" mkAbbrevAux :: Int -> String -> String mkAbbrevAux x str = if x > 0 then case quotRem x 64 of (d, m) -> mkAbbrevAux d $ toBase64Char m : str else str deAbbrev :: String -> Int deAbbrev = let f m c = 64 * m + toBase64Int c in foldl f 0