{-| Erlang term parser and pretty printer. -}
module B9.Artifact.Content.ErlTerms (parseErlTerm
                           ,erlTermParser
                           ,renderErlTerm
                           ,SimpleErlangTerm(..)
                           ,arbitraryErlSimpleAtom
                           ,arbitraryErlString
                           ,arbitraryErlNumber
                           ,arbitraryErlNatural
                           ,arbitraryErlFloat
                           ,arbitraryErlNameChar) where

import           Control.Parallel.Strategies
import           Data.Binary
import qualified Data.ByteString.Lazy.Char8 as Lazy
import           Data.Data
import           Data.Function
import           Data.Hashable
import           GHC.Generics (Generic)
import           Test.QuickCheck
import           Text.Parsec
       ((<|>), many, spaces, char, option, between, string, choice,
        octDigit, hexDigit, many1, noneOf, try, digit, anyChar, alphaNum,
        lower, parse)
import           Text.Parsec.ByteString.Lazy
import           Text.Show.Pretty
import           Control.Monad
import           Text.Printf
import qualified Text.PrettyPrint as PP

import           B9.QCUtil

-- | Simplified Erlang term representation.
data SimpleErlangTerm
    = ErlString String
    | ErlFloat Double
    | ErlNatural Integer
    | ErlAtom String
    | ErlChar Char
    | ErlBinary String
    | ErlList [SimpleErlangTerm]
    | ErlTuple [SimpleErlangTerm]
    deriving (Eq,Ord,Read,Show,Data,Typeable,Generic)

instance Hashable SimpleErlangTerm
instance Binary SimpleErlangTerm
instance NFData SimpleErlangTerm


-- | Parse a subset of valid Erlang terms. It parses no maps and binaries are
-- restricted to either empty binaries or binaries with a string. The input
-- encoding must be restricted to ascii compatible 8-bit characters
-- (e.g. latin-1 or UTF8).
parseErlTerm :: String -> Lazy.ByteString -> Either String SimpleErlangTerm
parseErlTerm src content =
  either (Left . ppShow) Right (parse erlTermParser src content)

-- | Convert an abstract Erlang term to a pretty byte string preserving the
-- encoding.
renderErlTerm :: SimpleErlangTerm -> Lazy.ByteString
renderErlTerm s = Lazy.pack (PP.render (prettyPrintErlTerm s PP.<> PP.char '.'))

prettyPrintErlTerm :: SimpleErlangTerm -> PP.Doc
prettyPrintErlTerm (ErlString str) = PP.doubleQuotes (PP.text (toErlStringString str))
prettyPrintErlTerm (ErlNatural n) = PP.integer n
prettyPrintErlTerm (ErlFloat f) = PP.double f
prettyPrintErlTerm (ErlChar c) = PP.text ("$" ++ toErlAtomChar c)
prettyPrintErlTerm (ErlAtom a) = PP.text quotedAtom
  where
    quotedAtom =
      case toErlAtomString a of
        "" -> "''"
        a'@(firstChar:rest)
          | firstChar `elem` ['a' .. 'z'] &&
              all (`elem` atomCharsThatDontNeedQuoting) rest -> a'
        a' -> "'" ++ a' ++ "'"
    atomCharsThatDontNeedQuoting =
      ['a' .. 'z'] ++ ['A' .. 'Z'] ++ ['0' .. '9'] ++ "@_"

prettyPrintErlTerm (ErlBinary []) = PP.text "<<>>"
prettyPrintErlTerm (ErlBinary b) = PP.text ("<<\"" ++ toErlStringString b ++ "\">>")
prettyPrintErlTerm (ErlList xs) =
  PP.brackets (PP.sep (PP.punctuate PP.comma (prettyPrintErlTerm <$> xs)))
prettyPrintErlTerm (ErlTuple xs) =
  PP.braces (PP.sep (PP.punctuate PP.comma (prettyPrintErlTerm <$> xs)))

toErlStringString :: String -> String
toErlStringString = join . map toErlStringChar

toErlStringChar :: Char -> String
toErlStringChar = (table !!) . fromEnum
  where
    table =
      [printf "\\x{%x}" c | c <- [0 .. (31 :: Int)]] ++
      (pure . toEnum <$> [32 .. 33]) ++
      ["\\\""] ++
      (pure . toEnum <$> [35 .. 91]) ++
      ["\\\\"] ++ (pure . toEnum <$> [93 .. 126]) ++ [printf "\\x{%x}" c | c <- [(127 :: Int) ..]]

toErlAtomString :: String -> String
toErlAtomString = join . map toErlAtomChar

toErlAtomChar :: Char -> String
toErlAtomChar = (table !!) . fromEnum
  where
    table =
      [printf "\\x{%x}" c | c <- [0 .. (31 :: Int)]] ++
      (pure . toEnum <$> [32 .. 38]) ++
      ["\\'"] ++
      (pure . toEnum <$> [40 .. 91]) ++
      ["\\\\"] ++ (pure . toEnum <$> [93 .. 126]) ++ [printf "\\x{%x}" c | c <- [(127 :: Int) ..]]


instance Arbitrary SimpleErlangTerm where
  arbitrary = oneof [sized aErlString
                    ,sized aErlNatural
                    ,sized aErlFloat
                    ,sized aErlChar
                    ,sized aErlAtomUnquoted
                    ,sized aErlAtomQuoted
                    ,sized aErlBinary
                    ,sized aErlList
                    ,sized aErlTuple
                    ]
    where
      decrSize 0 = resize 0
      decrSize n = resize (n - 1)
      aErlString n =
        ErlString <$> decrSize n (listOf (choose (toEnum 0,toEnum 255)))
      aErlFloat n = do
        f <- decrSize n arbitrary :: Gen Float
        let d = fromRational (toRational f)
        return (ErlFloat d)
      aErlNatural n =
        ErlNatural <$> decrSize n arbitrary
      aErlChar n =
        ErlChar <$> decrSize n (choose (toEnum 0, toEnum 255))
      aErlAtomUnquoted n = do
        f <- choose ('a','z')
        rest <- decrSize n aErlNameString
        return (ErlAtom (f:rest))
      aErlAtomQuoted n = do
        cs <- decrSize n aParsableErlString
        return (ErlAtom ("'" ++ cs ++ "'"))
      aErlBinary n =
        ErlBinary <$> decrSize n (listOf (choose (toEnum 0,toEnum 255)))
      aParsableErlString = oneof [aErlNameString
                                 ,aErlEscapedCharString
                                 ,aErlControlCharString
                                 ,aErlOctalCharString
                                 ,aErlHexCharString]
      aErlNameString = listOf (elements (['a'..'z'] ++ ['A'..'Z']++ ['0'..'9']++"@_"))
      aErlEscapedCharString = elements (("\\"++) . pure <$> "0bdefnrstv\\\"\'")
      aErlControlCharString = elements (("\\^"++) . pure <$> (['a'..'z'] ++ ['A'..'Z']))
      aErlOctalCharString = do
        n <- choose (1,3)
        os <- vectorOf n (choose (0,7))
        return (join ("\\":(show <$> (os::[Int]))))
      aErlHexCharString =
        oneof [twoDigitHex,nDigitHex]
        where
          twoDigitHex = do
            d1 <- choose (0,15) :: Gen Int
            d2 <- choose (0,15) :: Gen Int
            return (printf "\\x%x%X" d1 d2)
          nDigitHex = do
            zs <- listOf (elements "0")
            v <- choose (0,255) :: Gen Int
            return (printf "\\x{%s%x}" zs v)
      aErlList n =
        ErlList <$> resize (n `div` 2) (listOf arbitrary)
      aErlTuple n =
        ErlTuple <$> resize (n `div` 2) (listOf arbitrary)


erlTermParser :: Parser SimpleErlangTerm
erlTermParser = between spaces (char '.') erlExpressionParser

erlExpressionParser :: Parser SimpleErlangTerm
erlExpressionParser = erlAtomParser
                 <|> erlCharParser
                 <|> erlStringParser
                 <|> erlBinaryParser
                 <|> erlListParser
                 <|> erlTupleParser
                 <|> try erlFloatParser
                 <|> erlNaturalParser

erlAtomParser :: Parser SimpleErlangTerm
erlAtomParser =
  ErlAtom <$>
  (between (char '\'')
           (char '\'')
           (many (erlCharEscaped <|> noneOf "'"))
   <|>
   ((:) <$> lower <*> many erlNameChar))

erlNameChar :: Parser Char
erlNameChar = alphaNum <|> char '@' <|> char '_'

erlCharParser :: Parser SimpleErlangTerm
erlCharParser = ErlChar <$> (char '$' >> (erlCharEscaped <|> anyChar))

erlFloatParser :: Parser SimpleErlangTerm
erlFloatParser = do
  -- Parse a float as string, then use read :: Double to 'parse' the floating
  -- point value. Calculating by hand is complicated because of precision
  -- issues.
  sign <- option "" ((char '-' >> return "-") <|> (char '+' >> return ""))
  s1 <- many digit
  char '.'
  s2 <- many1 digit
  e <- do expSym <- choice [char 'e', char 'E']
          expSign <- option "" ((char '-' >> return "-") <|> (char '+' >> return "+"))
          expAbs <- many1 digit
          return ([expSym] ++ expSign ++ expAbs)
      <|> return ""
  return (ErlFloat (read (sign ++ s1 ++ "." ++ s2 ++ e)))

erlNaturalParser :: Parser SimpleErlangTerm
erlNaturalParser = do
  sign <- signParser
  dec <- decimalLiteral
  return $ ErlNatural $ sign * dec

signParser :: Parser Integer
signParser =
  (char '-' >> return (-1))
  <|> (char '+' >> return 1)
  <|> return 1

decimalLiteral :: Parser Integer
decimalLiteral =
   foldr (\radix acc ->
            (try (string (show radix ++ "#"))
             >> calcBE (toInteger radix) <$> many1 (erlDigits radix))
            <|> acc)
         (calcBE 10 <$> many1 (erlDigits 10))
         [2..36]
  where
    calcBE a = foldl (\acc d -> a * acc + d) 0
    erlDigits k = choice (take k digitParsers)
    digitParsers =
      -- create parsers that consume/match '0' .. '9' and "aA" .. "zZ" and return 0 .. 35
      map (\(cs,v) -> choice (char <$> cs) >> return v)
          (((pure <$> ['0' .. '9']) ++ zipWith ((++) `on` pure)
                                               ['a' .. 'z']
                                               ['A' .. 'Z'])
           `zip` [0..])

erlStringParser :: Parser SimpleErlangTerm
erlStringParser = do
  char '"'
  str <- many (erlCharEscaped <|> noneOf "\"")
  char '"'
  return (ErlString str)

erlCharEscaped :: Parser Char
erlCharEscaped =
  char '\\'
  >> (do char '^'
         choice (zipWith escapedChar ccodes creplacements)

      <|>
      do char 'x'
         do ds <- between (char '{') (char '}') (fmap hexVal <$> many1 hexDigit)
            let val = foldl (\acc v -> acc * 16 + v) 0 ds
            return (toEnum val)
          <|>
          do x1 <- hexVal <$> hexDigit
             x2 <- hexVal <$> hexDigit;
             return (toEnum ((x1*16)+x2))

      <|>
      do o1 <- octVal <$> octDigit
         do o2 <- octVal <$> octDigit
            do o3 <- octVal <$> octDigit
               return (toEnum ((((o1*8)+o2)*8)+o3))
              <|> return (toEnum ((o1*8)+o2))
          <|> return (toEnum o1)

      <|>
      choice (zipWith escapedChar codes replacements))
  where
    escapedChar code replacement = char code >> return replacement
    codes =
      "0bdefnrstv\\\"'"
    replacements =
      "\NUL\b\DEL\ESC\f\n\r \t\v\\\"'"
    ccodes =
      ['a' .. 'z'] ++ ['A' .. 'Z']
    creplacements =
      cycle ['\^A' .. '\^Z']
    hexVal v | v `elem` ['a' .. 'z'] = 0xA + (fromEnum v - fromEnum 'a')
             | v `elem` ['A' .. 'Z'] = 0xA + (fromEnum v - fromEnum 'A')
             | otherwise = fromEnum v - fromEnum '0'
    octVal = hexVal

erlBinaryParser :: Parser SimpleErlangTerm
erlBinaryParser =
  do string "<<"
     spaces
     ErlString str <- option (ErlString "") erlStringParser
     string ">>"
     spaces
     return (ErlBinary str)

erlListParser :: Parser SimpleErlangTerm
erlListParser = ErlList <$> erlNestedParser (char '[') (char ']')

erlTupleParser :: Parser SimpleErlangTerm
erlTupleParser = ErlTuple <$> erlNestedParser (char '{') (char '}')

erlNestedParser :: Parser a -> Parser b -> Parser [SimpleErlangTerm]
erlNestedParser open close =
  between
    (open >> spaces)
    (close >> spaces)
    (commaSep erlExpressionParser)

commaSep :: Parser a -> Parser [a]
commaSep p = do r <- p
                spaces
                rest <- option [] (char ',' >> spaces >> commaSep p)
                return (r:rest)
            <|> return []

arbitraryErlSimpleAtom :: Gen SimpleErlangTerm
arbitraryErlSimpleAtom = ErlAtom <$> ((:)
                                      <$> arbitraryLetterLower
                                      <*> listOf arbitraryErlNameChar)

arbitraryErlString :: Gen SimpleErlangTerm
arbitraryErlString = ErlString <$> listOf (oneof [arbitraryLetter
                                                 ,arbitraryDigit])

arbitraryErlNumber :: Gen SimpleErlangTerm
arbitraryErlNumber = oneof [arbitraryErlNatural, arbitraryErlFloat]

arbitraryErlNatural :: Gen SimpleErlangTerm
arbitraryErlNatural = ErlNatural <$> arbitrary

arbitraryErlFloat :: Gen SimpleErlangTerm
arbitraryErlFloat = ErlFloat <$> arbitrary

arbitraryErlNameChar :: Gen Char
arbitraryErlNameChar = oneof [arbitraryLetter
                             ,arbitraryDigit
                             ,pure '_'
                             ,pure '@']