{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}

module Jordan.FromJSON.Internal.Attoparsec where

import Control.Applicative (Alternative (..))
import Control.Monad (void, when)
import Data.Attoparsec.ByteString ((<?>))
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString as AP
import qualified Data.Attoparsec.ByteString.Char8 as CH
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import Data.Char (chr, digitToInt, isControl, isHexDigit, ord)
import Data.Functor (void, ($>))
import Data.Monoid (Alt (..))
import Data.Scientific (Scientific)
import qualified Data.Scientific as Sci
import qualified Data.Scientific as Scientific
import qualified Data.Text as Text
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Numeric (showHex)

skipSpace :: AP.Parser ()
skipSpace = AP.skipWhile isSpace <?> "skipped space"
  where
    isSpace = \case
      32 -> True
      10 -> True
      13 -> True
      9 -> True
      _ -> False

lexeme :: AP.Parser a -> AP.Parser a
lexeme a = a <* skipSpace

label :: String -> AP.Parser a -> AP.Parser a
label l p = p <?> l

parseAnyField :: AP.Parser ()
parseAnyField =
  {-# SCC ignoredObjectField #-}
  label "ignored object field" $
    void $ do
      lexeme parseJunkText
      labelSep
      anyDatum

objectEndWithJunk :: AP.Parser ()
objectEndWithJunk = endObject <|> junkFieldAndEnd
  where
    junkFieldAndEnd = void $ do
      comma
      label "ignored extra field at object end" parseAnyField
      objectEndWithJunk

comma :: AP.Parser ()
comma = label "comma character" $ void $ lexeme (AP.string ",")

quotation :: AP.Parser ()
quotation = label "quotation mark" $ void $ AP.word8 34

parseJSONText :: AP.Parser Text.Text
parseJSONText = label "JSON text" $ do
  quotation
  innerText

-- | A parser for a JSON text value that skips its input.
-- Avoids doing UTF-8 Decoding.
parseJunkText :: AP.Parser ()
parseJunkText = label "Ignored JSON Text Literal" $ do
  quotation
  junkInnerText
{-# INLINE parseJunkText #-}

-- | Parses the bit of a JSON string after the quotation.
innerText :: AP.Parser Text.Text
innerText = do
  chunk <- label "Skipped text body" $
    AP.takeWhile $ \char -> char /= 92 && char /= 34
  l <- AP.peekWord8
  case l of
    Nothing -> fail "string without end"
    Just 34 -> do
      AP.anyWord8
      pure $ decodeUtf8 chunk
    Just 92 -> do
      AP.anyWord8
      r <- label "escape value" parseEscape
      rest <- innerText
      pure $ decodeUtf8 chunk <> r <> rest
    Just _ -> fail "Impossibe: Parsed until we parsed a '\\' or a '\"', yet next char was neither"

junkInnerText :: AP.Parser ()
junkInnerText =
  {-# SCC ignoredTextBetweenQuotes #-}
  do
    AP.skipWhile $ \char -> char /= 92 && char /= 34
    !l <- AP.peekWord8
    case l of
      Nothing -> fail "string without end"
      Just 34 -> AP.anyWord8 $> ()
      Just 93 -> do
        AP.anyWord8
        parseEscape
        -- Yes we could save a miniscule amount of time by replacing this with an "ignoring" version.
        -- However, laziness means that we probably don't save *that* much.
        junkInnerText
      Just _ -> fail "Impossible: Skipped until we parsed a '\\' or a '\"', yet next char was neither"
{-# INLINE junkInnerText #-}

parseEscape :: AP.Parser Text.Text
parseEscape =
  quote
    <|> backslash
    <|> solidus
    <|> backspace
    <|> formfeed
    <|> linefeed
    <|> carriage
    <|> tab
    <|> escaped
  where
    backslash = AP.string "\\" $> "\\" <?> "Backslash escape"
    quote = AP.string "\"" $> "\"" <?> "Quote escape"
    solidus = AP.string "/" $> "/" <?> "Solidus escape"
    backspace = AP.string "b" $> "\b" <?> "Backspace escape"
    formfeed = AP.string "f" $> "\f" <?> "Formfeed escape"
    linefeed = AP.string "n" $> "\n" <?> "Linefeed escape"
    carriage = AP.string "r" $> "\r" <?> "Carriage escape"
    tab = AP.string "t" $> "\t" <?> "Tab escape"
    escaped = label "UTF Code Escape" $ do
      AP.string "u"
      a <- parseHexDigit
      b <- parseHexDigit
      c <- parseHexDigit
      d <- parseHexDigit
      let s = (((a * 16) + b) * 16 + c) * 16 + d
      pure $ Text.pack [chr s]

parseHexDigit :: AP.Parser Int
parseHexDigit = label "hex digit" (digitToInt <$> CH.satisfy isHexDigit)

parseCharInText :: Char -> AP.Parser ()
parseCharInText a = parseLit a <|> escaped a
  where
    parseLit :: Char -> AP.Parser ()
    parseLit = \case
      '\\' -> void $ AP.string "\\\\"
      '"' -> void $ AP.string "\\\""
      '/' -> void $ AP.string "/" <|> AP.string "\\/"
      '\b' -> void $ AP.string "\\b"
      '\f' -> void $ AP.string "\\f"
      '\n' -> void $ AP.string "\\n"
      '\r' -> void $ AP.string "\\r"
      '\t' -> void $ AP.string "\\t"
      a -> if isControl a then empty else void $ AP.string $ encodeUtf8 $ Text.singleton a
    escaped :: Char -> AP.Parser ()
    escaped a = void $ AP.string $ encodeUtf8 $ Text.pack $ withEscaped $ (showHex $ ord a) []
    withEscaped :: String -> String
    withEscaped a@[_] = "\\u000" <> a
    withEscaped a@[_, _] = "\\u00" <> a
    withEscaped a@[_, _, _] = "\\u0" <> a
    withEscaped r = "\\u" <> r

mustBeEscaped :: Char -> Bool
mustBeEscaped = \case
  '\\' -> True
  '"' -> True
  '/' -> True
  '\b' -> True
  '\f' -> True
  '\n' -> True
  '\r' -> True
  '\t' -> True
  _ -> False

canParseDirectly :: Text.Text -> Bool
canParseDirectly t = not $ Text.foldr (\c v -> v || mustBeEscaped c) False t

parseTextBody :: Text.Text -> AP.Parser ()
parseTextBody text
  | canParseDirectly text = void (A.string (encodeUtf8 text)) <|> parseViaChars text
  | otherwise = parseViaChars text

parseViaChars = Text.foldr (\c a -> parseCharInText c *> a) (pure ())

objectKey :: Text.Text -> AP.Parser ()
objectKey k = lexeme $ do
  quotation
  {-# SCC "knownObjectKeyBetweenQuotes" #-} parseTextBody k
  quotation
  pure ()

startObject :: AP.Parser ()
startObject =
  label "object starting brace ('{')" $
    lexeme $
      void $
        AP.word8 123

endObject :: AP.Parser ()
endObject =
  label "object ending brace ('}')" $
    lexeme $
      void $
        AP.word8 125

inObjectBraces :: AP.Parser a -> AP.Parser a
inObjectBraces interior = startObject *> interior <* endObject

startArray :: AP.Parser ()
startArray =
  label "array starting brace ('[')" $
    lexeme $
      void $
        AP.word8 91

endArray :: AP.Parser ()
endArray =
  label "array ending brace (']')" $
    lexeme $
      void $
        AP.word8 93

labelSep :: AP.Parser ()
labelSep = label "key-value separator (':')" $ void $ lexeme $ AP.string ":"

anyDatum :: AP.Parser ()
anyDatum =
  lexeme $
    {-# SCC "ignoredJSONValue" #-}
    do
      t <- AP.peekWord8
      case t of
        Just 102 -> void $ AP.string "false"
        Just 110 -> void $ AP.string "null"
        Just 116 -> void $ AP.string "true"
        Just 123 -> anyObject
        Just 34 -> parseJunkText
        Just 43 -> parseJunkNumber
        Just 45 -> parseJunkNumber
        Just 48 -> parseJunkNumber
        Just 49 -> parseJunkNumber
        Just 50 -> parseJunkNumber
        Just 51 -> parseJunkNumber
        Just 52 -> parseJunkNumber
        Just 53 -> parseJunkNumber
        Just 54 -> parseJunkNumber
        Just 55 -> parseJunkNumber
        Just 57 -> parseJunkNumber
        Just 59 -> parseJunkNumber
        Just 91 -> anyArray
        Just _ -> fail "not a valid starter of any JSON value"
        Nothing -> fail "empty input"
{-# INLINE anyDatum #-}

anyArray :: AP.Parser ()
anyArray = label "ignored array" $
  void $ do
    startArray
    endArray <|> junkItems
  where
    junkItems = do
      anyDatum
      endArray <|> (comma *> junkItems)

parseJunkDecimalZero :: AP.Parser ()
parseJunkDecimalZero = do
  let zero = 48
  digits <- A.takeWhile1 CH.isDigit_w8
  when (B.length digits > 1 && B.unsafeHead digits == zero) $
    fail "leading zero"

parseJunkExponent :: AP.Parser ()
parseJunkExponent = label "junk exponent" $ do
  A.satisfy (\ex -> ex == 101 || ex == 69)
  A.skipWhile (\ch -> ch == 45 || ch == 43)
  parseJunkDecimalZero

parseJunkNumber :: AP.Parser ()
parseJunkNumber = do
  A.skipWhile (\ch -> ch == 45 || ch == 43)
  parseJunkDecimalZero
  -- skip decimal
  dot <- A.peekWord8
  case dot of
    Just 46 -> void $ A.anyWord8 *> A.takeWhile1 CH.isDigit_w8
    _ -> pure ()
  parseJunkExponent <|> pure ()

------ Scientific parser, copy/pasted from Aeson. ----

-- (This parser was in turn copy-pasted itself from various soruces so this is kohser)

-- A strict pair
data SP = SP !Integer {-# UNPACK #-} !Int

decimal0 :: AP.Parser Integer
decimal0 = do
  let zero = 48
  digits <- A.takeWhile1 CH.isDigit_w8
  if B.length digits > 1 && B.unsafeHead digits == zero
    then fail "leading zero"
    else return (bsToInteger digits)

-- | Parse a JSON number.
--
-- This function is wholesale copy/pasted from Aeson.
-- Thanks to them.
scientific :: AP.Parser Scientific
scientific = do
  let minus = 45
      plus = 43
  sign <- A.peekWord8'
  let !positive = sign == plus || sign /= minus
  when (sign == plus || sign == minus) $
    void A.anyWord8

  n <- decimal0

  let f fracDigits =
        SP
          (B.foldl' step n fracDigits)
          (negate $ B.length fracDigits)
      step a w = a * 10 + fromIntegral (w - 48)

  dotty <- A.peekWord8
  -- '.' -> ascii 46
  SP c e <- case dotty of
    Just 46 -> A.anyWord8 *> (f <$> A.takeWhile1 CH.isDigit_w8)
    _ -> pure (SP n 0)

  let !signedCoeff
        | positive = c
        | otherwise = - c

  let littleE = 101
      bigE = 69
  ( A.satisfy (\ex -> ex == littleE || ex == bigE)
      *> fmap (Scientific.scientific signedCoeff . (e +)) (CH.signed CH.decimal)
    )
    <|> return (Scientific.scientific signedCoeff e)
{-# INLINE scientific #-}

bsToInteger :: B.ByteString -> Integer
bsToInteger bs
  | l > 40 = valInteger 10 l [fromIntegral (w - 48) | w <- B.unpack bs]
  | otherwise = bsToIntegerSimple bs
  where
    l = B.length bs

bsToIntegerSimple :: B.ByteString -> Integer
bsToIntegerSimple = B.foldl' step 0
  where
    step a b = a * 10 + fromIntegral (b - 48) -- 48 = '0'

-- A sub-quadratic algorithm for Integer. Pairs of adjacent radix b
-- digits are combined into a single radix b^2 digit. This process is
-- repeated until we are left with a single digit. This algorithm
-- performs well only on large inputs, so we use the simple algorithm
-- for smaller inputs.
valInteger :: Integer -> Int -> [Integer] -> Integer
valInteger = go
  where
    go :: Integer -> Int -> [Integer] -> Integer
    go _ _ [] = 0
    go _ _ [d] = d
    go b l ds
      | l > 40 = b' `seq` go b' l' (combine b ds')
      | otherwise = valSimple b ds
      where
        -- ensure that we have an even number of digits
        -- before we call combine:
        ds' = if even l then ds else 0 : ds
        b' = b * b
        l' = (l + 1) `quot` 2

    combine b (d1 : d2 : ds) = d `seq` (d : combine b ds)
      where
        d = d1 * b + d2
    combine _ [] = []
    combine _ [_] = errorWithoutStackTrace "this should not happen"

-- The following algorithm is only linear for types whose Num operations
-- are in constant time.
valSimple :: Integer -> [Integer] -> Integer
valSimple base = go 0
  where
    go r [] = r
    go r (d : ds) = r' `seq` go r' ds
      where
        r' = r * base + fromIntegral d

number :: AP.Parser Scientific
number = scientific
{-# INLINE number #-}

anyObject :: AP.Parser ()
anyObject =
  {-# SCC ignoredJSONObject #-}
  label "ignored object" $
    void $ do
      startObject
      endObject <|> junkField
  where
    junkField = do
      parseAnyField
      endObject <|> (comma *> junkField)

parseObjectField ::
  Text.Text ->
  AP.Parser a ->
  AP.Parser a
parseObjectField t f = do
  objectKey t
  labelSep
  lexeme f

parseDictField ::
  AP.Parser a ->
  AP.Parser (Text.Text, a)
parseDictField p = do
  key <- lexeme parseJSONText
  labelSep
  val <- p
  pure (key, val)