{-| Erlang term parser and pretty printer. -} module B9.Artifact.Content.ErlTerms (parseErlTerm ,erlTermParser ,renderErlTerm ,SimpleErlangTerm(..) ,arbitraryErlSimpleAtom ,arbitraryErlString ,arbitraryErlNumber ,arbitraryErlNatural ,arbitraryErlFloat ,arbitraryErlNameChar) where import Control.Parallel.Strategies import Data.Binary import qualified Data.ByteString.Lazy.Char8 as Lazy import Data.Data import Data.Function import Data.Hashable import GHC.Generics (Generic) import Test.QuickCheck import Text.Parsec ((<|>), many, spaces, char, option, between, string, choice, octDigit, hexDigit, many1, noneOf, try, digit, anyChar, alphaNum, lower, parse) import Text.Parsec.ByteString.Lazy import Text.Show.Pretty import Control.Monad import Text.Printf import qualified Text.PrettyPrint as PP import B9.QCUtil -- | Simplified Erlang term representation. data SimpleErlangTerm = ErlString String | ErlFloat Double | ErlNatural Integer | ErlAtom String | ErlChar Char | ErlBinary String | ErlList [SimpleErlangTerm] | ErlTuple [SimpleErlangTerm] deriving (Eq,Ord,Read,Show,Data,Typeable,Generic) instance Hashable SimpleErlangTerm instance Binary SimpleErlangTerm instance NFData SimpleErlangTerm -- | Parse a subset of valid Erlang terms. It parses no maps and binaries are -- restricted to either empty binaries or binaries with a string. The input -- encoding must be restricted to ascii compatible 8-bit characters -- (e.g. latin-1 or UTF8). parseErlTerm :: String -> Lazy.ByteString -> Either String SimpleErlangTerm parseErlTerm src content = either (Left . ppShow) Right (parse erlTermParser src content) -- | Convert an abstract Erlang term to a pretty byte string preserving the -- encoding. renderErlTerm :: SimpleErlangTerm -> Lazy.ByteString renderErlTerm s = Lazy.pack (PP.render (prettyPrintErlTerm s PP.<> PP.char '.')) prettyPrintErlTerm :: SimpleErlangTerm -> PP.Doc prettyPrintErlTerm (ErlString str) = PP.doubleQuotes (PP.text (toErlStringString str)) prettyPrintErlTerm (ErlNatural n) = PP.integer n prettyPrintErlTerm (ErlFloat f) = PP.double f prettyPrintErlTerm (ErlChar c) = PP.text ("$" ++ toErlAtomChar c) prettyPrintErlTerm (ErlAtom a) = PP.text quotedAtom where quotedAtom = case toErlAtomString a of "" -> "''" a'@(firstChar:rest) | firstChar `elem` ['a' .. 'z'] && all (`elem` atomCharsThatDontNeedQuoting) rest -> a' a' -> "'" ++ a' ++ "'" atomCharsThatDontNeedQuoting = ['a' .. 'z'] ++ ['A' .. 'Z'] ++ ['0' .. '9'] ++ "@_" prettyPrintErlTerm (ErlBinary []) = PP.text "<<>>" prettyPrintErlTerm (ErlBinary b) = PP.text ("<<\"" ++ toErlStringString b ++ "\">>") prettyPrintErlTerm (ErlList xs) = PP.brackets (PP.sep (PP.punctuate PP.comma (prettyPrintErlTerm <$> xs))) prettyPrintErlTerm (ErlTuple xs) = PP.braces (PP.sep (PP.punctuate PP.comma (prettyPrintErlTerm <$> xs))) toErlStringString :: String -> String toErlStringString = join . map toErlStringChar toErlStringChar :: Char -> String toErlStringChar = (table !!) . fromEnum where table = [printf "\\x{%x}" c | c <- [0 .. (31 :: Int)]] ++ (pure . toEnum <$> [32 .. 33]) ++ ["\\\""] ++ (pure . toEnum <$> [35 .. 91]) ++ ["\\\\"] ++ (pure . toEnum <$> [93 .. 126]) ++ [printf "\\x{%x}" c | c <- [(127 :: Int) ..]] toErlAtomString :: String -> String toErlAtomString = join . map toErlAtomChar toErlAtomChar :: Char -> String toErlAtomChar = (table !!) . fromEnum where table = [printf "\\x{%x}" c | c <- [0 .. (31 :: Int)]] ++ (pure . toEnum <$> [32 .. 38]) ++ ["\\'"] ++ (pure . toEnum <$> [40 .. 91]) ++ ["\\\\"] ++ (pure . toEnum <$> [93 .. 126]) ++ [printf "\\x{%x}" c | c <- [(127 :: Int) ..]] instance Arbitrary SimpleErlangTerm where arbitrary = oneof [sized aErlString ,sized aErlNatural ,sized aErlFloat ,sized aErlChar ,sized aErlAtomUnquoted ,sized aErlAtomQuoted ,sized aErlBinary ,sized aErlList ,sized aErlTuple ] where decrSize 0 = resize 0 decrSize n = resize (n - 1) aErlString n = ErlString <$> decrSize n (listOf (choose (toEnum 0,toEnum 255))) aErlFloat n = do f <- decrSize n arbitrary :: Gen Float let d = fromRational (toRational f) return (ErlFloat d) aErlNatural n = ErlNatural <$> decrSize n arbitrary aErlChar n = ErlChar <$> decrSize n (choose (toEnum 0, toEnum 255)) aErlAtomUnquoted n = do f <- choose ('a','z') rest <- decrSize n aErlNameString return (ErlAtom (f:rest)) aErlAtomQuoted n = do cs <- decrSize n aParsableErlString return (ErlAtom ("'" ++ cs ++ "'")) aErlBinary n = ErlBinary <$> decrSize n (listOf (choose (toEnum 0,toEnum 255))) aParsableErlString = oneof [aErlNameString ,aErlEscapedCharString ,aErlControlCharString ,aErlOctalCharString ,aErlHexCharString] aErlNameString = listOf (elements (['a'..'z'] ++ ['A'..'Z']++ ['0'..'9']++"@_")) aErlEscapedCharString = elements (("\\"++) . pure <$> "0bdefnrstv\\\"\'") aErlControlCharString = elements (("\\^"++) . pure <$> (['a'..'z'] ++ ['A'..'Z'])) aErlOctalCharString = do n <- choose (1,3) os <- vectorOf n (choose (0,7)) return (join ("\\":(show <$> (os::[Int])))) aErlHexCharString = oneof [twoDigitHex,nDigitHex] where twoDigitHex = do d1 <- choose (0,15) :: Gen Int d2 <- choose (0,15) :: Gen Int return (printf "\\x%x%X" d1 d2) nDigitHex = do zs <- listOf (elements "0") v <- choose (0,255) :: Gen Int return (printf "\\x{%s%x}" zs v) aErlList n = ErlList <$> resize (n `div` 2) (listOf arbitrary) aErlTuple n = ErlTuple <$> resize (n `div` 2) (listOf arbitrary) erlTermParser :: Parser SimpleErlangTerm erlTermParser = between spaces (char '.') erlExpressionParser erlExpressionParser :: Parser SimpleErlangTerm erlExpressionParser = erlAtomParser <|> erlCharParser <|> erlStringParser <|> erlBinaryParser <|> erlListParser <|> erlTupleParser <|> try erlFloatParser <|> erlNaturalParser erlAtomParser :: Parser SimpleErlangTerm erlAtomParser = ErlAtom <$> (between (char '\'') (char '\'') (many (erlCharEscaped <|> noneOf "'")) <|> ((:) <$> lower <*> many erlNameChar)) erlNameChar :: Parser Char erlNameChar = alphaNum <|> char '@' <|> char '_' erlCharParser :: Parser SimpleErlangTerm erlCharParser = ErlChar <$> (char '$' >> (erlCharEscaped <|> anyChar)) erlFloatParser :: Parser SimpleErlangTerm erlFloatParser = do -- Parse a float as string, then use read :: Double to 'parse' the floating -- point value. Calculating by hand is complicated because of precision -- issues. sign <- option "" ((char '-' >> return "-") <|> (char '+' >> return "")) s1 <- many digit char '.' s2 <- many1 digit e <- do expSym <- choice [char 'e', char 'E'] expSign <- option "" ((char '-' >> return "-") <|> (char '+' >> return "+")) expAbs <- many1 digit return ([expSym] ++ expSign ++ expAbs) <|> return "" return (ErlFloat (read (sign ++ s1 ++ "." ++ s2 ++ e))) erlNaturalParser :: Parser SimpleErlangTerm erlNaturalParser = do sign <- signParser dec <- decimalLiteral return $ ErlNatural $ sign * dec signParser :: Parser Integer signParser = (char '-' >> return (-1)) <|> (char '+' >> return 1) <|> return 1 decimalLiteral :: Parser Integer decimalLiteral = foldr (\radix acc -> (try (string (show radix ++ "#")) >> calcBE (toInteger radix) <$> many1 (erlDigits radix)) <|> acc) (calcBE 10 <$> many1 (erlDigits 10)) [2..36] where calcBE a = foldl (\acc d -> a * acc + d) 0 erlDigits k = choice (take k digitParsers) digitParsers = -- create parsers that consume/match '0' .. '9' and "aA" .. "zZ" and return 0 .. 35 map (\(cs,v) -> choice (char <$> cs) >> return v) (((pure <$> ['0' .. '9']) ++ zipWith ((++) `on` pure) ['a' .. 'z'] ['A' .. 'Z']) `zip` [0..]) erlStringParser :: Parser SimpleErlangTerm erlStringParser = do char '"' str <- many (erlCharEscaped <|> noneOf "\"") char '"' return (ErlString str) erlCharEscaped :: Parser Char erlCharEscaped = char '\\' >> (do char '^' choice (zipWith escapedChar ccodes creplacements) <|> do char 'x' do ds <- between (char '{') (char '}') (fmap hexVal <$> many1 hexDigit) let val = foldl (\acc v -> acc * 16 + v) 0 ds return (toEnum val) <|> do x1 <- hexVal <$> hexDigit x2 <- hexVal <$> hexDigit; return (toEnum ((x1*16)+x2)) <|> do o1 <- octVal <$> octDigit do o2 <- octVal <$> octDigit do o3 <- octVal <$> octDigit return (toEnum ((((o1*8)+o2)*8)+o3)) <|> return (toEnum ((o1*8)+o2)) <|> return (toEnum o1) <|> choice (zipWith escapedChar codes replacements)) where escapedChar code replacement = char code >> return replacement codes = "0bdefnrstv\\\"'" replacements = "\NUL\b\DEL\ESC\f\n\r \t\v\\\"'" ccodes = ['a' .. 'z'] ++ ['A' .. 'Z'] creplacements = cycle ['\^A' .. '\^Z'] hexVal v | v `elem` ['a' .. 'z'] = 0xA + (fromEnum v - fromEnum 'a') | v `elem` ['A' .. 'Z'] = 0xA + (fromEnum v - fromEnum 'A') | otherwise = fromEnum v - fromEnum '0' octVal = hexVal erlBinaryParser :: Parser SimpleErlangTerm erlBinaryParser = do string "<<" spaces ErlString str <- option (ErlString "") erlStringParser string ">>" spaces return (ErlBinary str) erlListParser :: Parser SimpleErlangTerm erlListParser = ErlList <$> erlNestedParser (char '[') (char ']') erlTupleParser :: Parser SimpleErlangTerm erlTupleParser = ErlTuple <$> erlNestedParser (char '{') (char '}') erlNestedParser :: Parser a -> Parser b -> Parser [SimpleErlangTerm] erlNestedParser open close = between (open >> spaces) (close >> spaces) (commaSep erlExpressionParser) commaSep :: Parser a -> Parser [a] commaSep p = do r <- p spaces rest <- option [] (char ',' >> spaces >> commaSep p) return (r:rest) <|> return [] arbitraryErlSimpleAtom :: Gen SimpleErlangTerm arbitraryErlSimpleAtom = ErlAtom <$> ((:) <$> arbitraryLetterLower <*> listOf arbitraryErlNameChar) arbitraryErlString :: Gen SimpleErlangTerm arbitraryErlString = ErlString <$> listOf (oneof [arbitraryLetter ,arbitraryDigit]) arbitraryErlNumber :: Gen SimpleErlangTerm arbitraryErlNumber = oneof [arbitraryErlNatural, arbitraryErlFloat] arbitraryErlNatural :: Gen SimpleErlangTerm arbitraryErlNatural = ErlNatural <$> arbitrary arbitraryErlFloat :: Gen SimpleErlangTerm arbitraryErlFloat = ErlFloat <$> arbitrary arbitraryErlNameChar :: Gen Char arbitraryErlNameChar = oneof [arbitraryLetter ,arbitraryDigit ,pure '_' ,pure '@']