----------------------------------------------------------------------------- -- | -- Copyright : (c) Joost Visser 2004 -- License : LGPL -- -- Maintainer : joost.visser@di.uminho.pt -- Stability : experimental -- Portability : portable -- -- This module is part of the ATerm library for Haskell. It contains functions -- for reading and writing ATerms from and to Strings. Two ATerm formats are -- supported: -- -- * AT: plain (non-shared) textual ATerms -- -- * TAF: shared textual ATerms -- -- The binary ATerm format (BAF) is not supported. -- -- Current limitations: -- -- * BLOBS and place-holders are not supported. -- -- * Annotations are not supported. -- ----------------------------------------------------------------------------- module Data.ATerm.ReadWrite ( readATerm, writeATerm, writeSharedATerm ) where import Data.ATerm.AbstractSyntax import Data.Map as FM import Data.Char ----------------------------------------------------------------------------- -- * From String to ATerm -- | Parse the given string into an ATerm. readATerm :: String -> ATerm readATerm ('!':str) = let (t,_,_,_) = readTAF str emptyRT 0 in t readATerm str = let (t,_) = readAT str in t -- non-shared -- readAT ('[':str) = let (kids, str') = readATs (dropSpaces str) in (AList kids, str') readAT str@(c:cs) | isIntHead c = let (i,str') = span isDigit cs in (AInt (read (c:i)),str') | otherwise = let (c,str') = readAFun str (kids, str'') = readParenATs (dropSpaces str') in (AAppl c kids, str'') readAFun ('"':str) = let (c,('"':str')) = spanNotQuote' str in (quote c,str') readAFun str = spanAFunChar str readParenATs ('(':str) = readATs (dropSpaces str) readParenATs str = ([],str) readATs (')':str) = ([],str) readATs (']':str) = ([],str) readATs str = readATs1 str readATs1 str = let (t,str') = readAT (dropSpaces str) (ts,str'') = readATs' (dropSpaces str') in (t:ts,str'') readATs' (',':str) = readATs1 (dropSpaces str) readATs' (')':str) = ([],str) readATs' (']':str) = ([],str) -- shared -- readTAF :: String -> ReadTable -> Int -> (ATerm,String,ReadTable,Int) readTAF ('#':str) tbl l = let (i,str') = spanAbbrevChar str in (getElementRT (deAbbrev i) tbl, str',tbl,l+(length i)+1) readTAF ('[':str) tbl l = let (kids, str',tbl',l') = readTAFs (dropSpaces str) tbl 1 t = AList kids in (t, str',condAddElementRT t l' tbl',l+l') readTAF str@(c:cs) tbl l | isIntHead c = let (i,str') = span isDigit cs ci = (c:i) l' = length ci t = AInt (read ci) tbl' = condAddElementRT t l' tbl in (t,str',tbl',l+l') | otherwise = let (c,str') = readAFun str (kids, str'',tbl',l') = readParenTAFs (dropSpaces str') tbl 0 t = AAppl c kids lks = if Prelude.null kids then 0 else l' l'' = (length c) + lks in (t, str'',condAddElementRT t l'' tbl',l'') readParenTAFs ('(':str) tbl l = readTAFs (dropSpaces str) tbl l readParenTAFs str tbl l = ([],str,tbl,l) readTAFs (')':str) tbl l = ([],str,tbl,l+1) readTAFs (']':str) tbl l = ([],str,tbl,l+1) readTAFs str tbl l = readTAFs1 str tbl l readTAFs1 str tbl l = let (t,str',tbl',l') = readTAF (dropSpaces str) tbl l (ts,str'',tbl'',l'') = readTAFs' (dropSpaces str') tbl' l' in (t:ts,str'',tbl'',l'') readTAFs' (',':str) tbl l = readTAFs1 (dropSpaces str) tbl (l+1) readTAFs' (')':str) tbl l = ([],str,tbl,l+1) readTAFs' (']':str) tbl l = ([],str,tbl,l+1) -- helpers -- dropSpaces = dropWhile isSpace spanAFunChar = span isAFunChar isAFunChar c = (isAlphaNum c) || (c `elem` "-_*+") spanNotQuote = span (/='"') spanAbbrevChar = span (`elem` toBase64) isIntHead c = (isDigit c) || (c=='-') quote str = ('"':str)++"\"" spanNotQuote' [] = ([],[]) spanNotQuote' xs@('"':xs') = ([],xs) spanNotQuote' xs@('\\':'"':xs') = ('\\':'"':ys,zs) where (ys,zs) = spanNotQuote' xs' spanNotQuote' xs@('\\':'\\':xs')= ('\\':'\\':ys,zs) where (ys,zs) = spanNotQuote' xs' spanNotQuote' xs@(x:xs') = (x:ys,zs) where (ys,zs) = spanNotQuote' xs' ----------------------------------------------------------------------------- -- * From ATerm to String -- | Write the given ATerm to non-shared textual representation (TXT format). writeATerm :: ATerm -> String writeATerm t = writeAT t -- | Write the given ATerm to fully shared textual representation (TAF format). writeSharedATerm :: ATerm -> String writeSharedATerm t = let (s,_) = writeTAF t emptyWT in ('!':s) -- non-shared -- writeAT :: ATerm -> String writeAT (AAppl c ts) = writeATermAux c (Prelude.map writeAT ts) writeAT (AList ts) = bracket (commaSep (Prelude.map writeAT ts)) writeAT (AInt i) = show i -- shared -- writeTAF :: ATerm -> WriteTable -> (String,WriteTable) writeTAF t tbl = case getIndexWT t tbl of (Just i) -> (makeAbbrev i,tbl) Nothing -> (str, condAddElementWT t str tbl') where (str,tbl') = writeTAF' t tbl writeTAF' (AAppl c ts) tbl = let (kids,tbl') = writeTAFs ts tbl in (writeATermAux c kids,tbl') writeTAF' (AList ts) tbl = let (kids,tbl') = writeTAFs ts tbl in (bracket (commaSep kids),tbl') writeTAF' (AInt i) tbl = (show i,tbl) writeTAFs [] tbl = ([],tbl) writeTAFs (t:ts) tbl = let (str,tbl') = writeTAF t tbl (strs,tbl'') = writeTAFs ts tbl' in ((str:strs),tbl'') -- helpers -- writeATermAux c [] = c writeATermAux c ts = c++(parenthesise (commaSep ts)) sepBy sep (x:y:ys) = x:sep:sepBy sep (y:ys) sepBy sep ys = ys commaSep strs = concat (sepBy "," strs) bracket str = "["++str++"]" parenthesise str = "("++str++")" ----------------------------------------------------------------------------- -- * Tables of ATerms -- For reading (remove sharing) -- Using reversed List type ReadTable = (Integer,[ATerm]) emptyRT = (0,[]) -- Hook to prevent expansion of productions. This is useful when reading -- huge AsFix files. getElementRT' i tbl = case getElementRT i tbl of (AAppl "prod" _) -> AAppl "#prod" [] t -> t getElementRT i tbl = (snd tbl)!!!((fst tbl)-i-1) addElementRT a tbl = ((fst tbl)+1,a:(snd tbl)) sizeOfRT tbl = fst tbl {--- Using Finite Map type ReadTable = FiniteMap Integer ATerm emptyRT = emptyFM getElementRT i tbl = lookupWithDefaultFM tbl (error "getElement") i addElementRT a tbl = addToFM tbl (sizeOfRT tbl) a sizeOfRT tbl = toInteger (sizeFM tbl) -} {--- Using some imported Edison sequence type ReadTable = T.Seq ATerm emptyRT = T.empty :: ReadTable getElementRT i tbl = T.lookup tbl (toInt i) addElementRT t tbl = T.snoc tbl t sizeOfRT tbl = T.size tbl -} condAddElementRT t l tbl = if (length next_abbrev) < l then addElementRT t tbl else tbl --where next_abbrev = makeAbbrev (toInteger (sizeOfRT tbl)+1) where next_abbrev = makeAbbrev (toInteger (sizeOfRT tbl)) condAddElementRT' t str tbl = if (length next_abbrev) < (length str) then addElementRT t tbl else tbl where next_abbrev = makeAbbrev (toInteger (sizeOfRT tbl)+1) -- For writing (create sharing) -- Using FiniteMap type WriteTable = FM.Map ATerm Integer emptyWT = FM.empty getIndexWT a tbl = FM.lookup a tbl addElementWT a tbl = FM.insert a (sizeOfWT tbl) tbl sizeOfWT tbl = toInteger (FM.size tbl) condAddElementWT t str tbl = if (length next_abbrev) < (length str) then addElementWT t tbl else tbl --where next_abbrev = makeAbbrev (toInteger (sizeOfWT tbl)+1) where next_abbrev = makeAbbrev (toInteger (sizeOfWT tbl)) ----------------------------------------------------------------------------- -- * Base 64 encoding mkAbbrev x | x == 0 = [toBase64!!0] | otherwise = reverse (mkAbbrevAux x) mkAbbrevAux x | x == 0 = [] | x > 0 = (toBase64!!!m:mkAbbrevAux d) where (d,m) = divMod x 64 deAbbrev x = deAbbrevAux (reverse x) deAbbrevAux [] = 0 deAbbrevAux (c:cs) = let (Just i) = indexOf c toBase64 r = deAbbrevAux cs in (i + 64*r) toBase64 = [ 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' ] makeAbbrev i = '#':mkAbbrev i ----------------------------------------------------------------------------- -- * Helpers indexOf t [] = Nothing indexOf t (x:xs) = if t==x then (Just (0::Integer)) else case indexOf t xs of (Just i) -> Just (i+1) Nothing -> Nothing (!!!) :: [b] -> Integer -> b (x:_) !!! 0 = x (_:xs) !!! n | n>0 = xs !!! (n-1) (_:_) !!! _ = error "!!!: negative index" [] !!! _ = error "!!!: index too large" ----------------------------------------------------------------------------- -- * Future work {- The code could be made more readable by introducing special monads that hide the consumed String/ATerm and the table that is built during reading/writing newtype ReadMonad t = RM { runRM :: String -> ReadTable -> (t,String,ReadTable) } instance Monad ReadMonad where return t = RM $ \str tbl -> (t,str,tbl) rm >>= f = RM $ \str tbl -> let (t,str',tbl') = runRM rm str tbl in runRM (f t) str' tbl' I guess this should be a combined ParserMonad and StateMonad. -} ------------------------------------------------------------------------------- -- * Testing -- | Test whether reading and writing to and from shared aterm representations -- (TAF format) deals correctly with the bounderies of the base-64 encoding. testAbbrevBoundaries :: Bool testAbbrevBoundaries = writeSharedATerm term == shared && readATerm shared == term where term = AList $ Prelude.map AInt ([100..164]++[100..164]) shared = "![100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,#A,#B,#C,#D,#E,#F,#G,#H,#I,#J,#K,#L,#M,#N,#O,#P,#Q,#R,#S,#T,#U,#V,#W,#X,#Y,#Z,#a,#b,#c,#d,#e,#f,#g,#h,#i,#j,#k,#l,#m,#n,#o,#p,#q,#r,#s,#t,#u,#v,#w,#x,#y,#z,#0,#1,#2,#3,#4,#5,#6,#7,#8,#9,#+,#/,164]" -------------------------------------------------------------------------------