{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE CPP                 #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns        #-}
{-# OPTIONS_GHC -O2 #-}
-- | Parser from strict 'Text' to 'Tokens'.
module Data.Aeson.Decoding.Text (
    textToTokens,
) where

import           Data.Char                            (chr)
import           Data.Integer.Conversion              (textToInteger)
import           Data.Text.Internal                   (Text (..))

import qualified Data.Aeson.Key                       as Key
import qualified Data.Scientific                      as Sci
import qualified Data.Text                            as T
import qualified Data.Text.Array                      as A

import           Data.Aeson.Decoding.Internal
import           Data.Aeson.Decoding.Tokens
import           Data.Aeson.Internal.Prelude
import           Data.Aeson.Internal.UnescapeFromText (unescapeFromText)

#if MIN_VERSION_text(2,0,0)
import           Data.Aeson.Internal.Word8
#else
import           Data.Aeson.Internal.Word16
#endif

#if MIN_VERSION_text(2,0,0)
type Point = Word8
#else
type Point = Word16
#endif


-- | Lex (and parse) strict 'ByteString' into 'Tokens' stream.
--
-- @since 2.2.1.0
--
textToTokens :: Text -> Tokens Text String
textToTokens bs0 = goT bs0 id where
    goT :: Parser Tokens k
    goT (skipSpace -> bs) k = case unconsPoint bs of
        Nothing         -> tkErr "Unexpected end-of-input, expecting JSON value"
        Just (!w, !bs1) -> tokenCase w bs1 bs k

    tokenCase
        :: Point              -- head
        -> Text               -- tail
        -> Text               -- whole input, needed for number parsing
        -> (Text -> k)        -- continuation
        -> Tokens k String
    tokenCase W8_OPEN_CURLY   !bs !_   k      = TkRecordOpen (goR bs k)
    tokenCase W8_OPEN_SQUARE   bs  _   k      = TkArrayOpen (goA bs k)
    tokenCase W8_DOUBLE_QUOTE  bs  _   k      = scanStringLiteral (\t bs' -> TkText t (k bs')) tkErr bs
    tokenCase W8_MINUS         bs  _   k      = scanNumberLiteral (\n bs' -> TkNumber (negateNumber n) (k bs')) tkErr bs
    tokenCase w                _   wbs k
        | W8_0 <= w, w <= W8_9                = scanNumberLiteral (\n bs' -> TkNumber n (k bs')) tkErr wbs
    tokenCase W8_n             bs  _   k
        | Just bs1 <- stripPrefix "ull" 3 bs  = TkLit LitNull (k bs1)
    tokenCase W8_t             bs  _   k
        | Just bs1 <- stripPrefix "rue" 3 bs  = TkLit LitTrue (k bs1)
    tokenCase W8_f             bs  _   k
        | Just bs1 <- stripPrefix "alse" 4 bs = TkLit LitFalse (k bs1)
    tokenCase _                _   wbs _      = tkErr $ "Unexpected " ++ showBeginning wbs ++ ", expecting JSON value"

    -- Array
    goA :: Parser TkArray k
    goA (skipSpace -> bs) k = case unconsPoint bs of
        Nothing         -> tkErrEOF "JSON value or ]"
        Just (W8_CLOSE_SQUARE, !bs1) -> TkArrayEnd (k bs1)
        Just (w,  !bs1) -> TkItem $ tokenCase w bs1 bs $ \bs2 -> goA1 bs2 k

    goA1 :: Parser TkArray k
    goA1 (skipSpace -> bs) k = case unconsPoint bs of
        Nothing                      -> tkErrEOF ", or ]"
        Just (W8_CLOSE_SQUARE, !bs1) -> TkArrayEnd (k bs1)
        Just (W8_COMMA, !bs1)        -> TkItem $ goT bs1 $ \bs2 -> goA1 bs2 k
        _                            -> tkErrBS bs ", or ]"

    -- Record
    goR :: Parser TkRecord k
    goR (skipSpace -> bs) k = case unconsPoint bs of
        Nothing                       -> tkErrEOF "record key literal or }"
        Just (W8_DOUBLE_QUOTE,  !bs1) -> goRK bs1 k           -- "
        Just (W8_CLOSE_CURLY, !bs1)   -> TkRecordEnd (k bs1)  -- }
        Just _                        -> tkErrBS bs "record key literal or }"

    -- after record pair, expecting ," or }
    goR1 :: Parser TkRecord k
    goR1 (skipSpace -> bs) k = case unconsPoint bs of
        Nothing                           -> tkErr "Unexpected end-of-input, expecting , or }"
        Just (W8_COMMA, !bs1) -> case unconsPoint (skipSpace bs1) of
            Nothing                      -> tkErrEOF "key literal"
            Just (W8_DOUBLE_QUOTE, !bs2) -> goRK bs2 k
            Just _                       -> tkErrBS bs "key literal"
        Just (W8_CLOSE_CURLY, !bs1)       -> TkRecordEnd (k bs1)
        _                                 -> tkErr $ "Unexpected " ++ showBeginning bs ++ ", expecting , or }"

    -- key of record (after double quote)
    goRK :: Parser TkRecord k
    goRK bs1 k = scanStringLiteral (\t bs -> goRK' t bs k) tkErr bs1

    -- after key of a record, expecting :
    goRK' :: Text -> Parser TkRecord k
    goRK' t (skipSpace -> bs) k = case T.uncons bs of
        Nothing          -> tkErrEOF ":"
        Just (':', !bs3) -> TkPair (Key.fromText t) $ goT bs3 $ \bs4 -> goR1 bs4 k
        Just _           -> tkErrBS bs ":"

stripPrefix :: Text -> Int -> Text -> Maybe Text
stripPrefix pfx _ bs = T.stripPrefix pfx bs
{-# INLINE stripPrefix #-}

type Parser tk k = Text -> (Text -> k) -> tk k String

showBeginning :: Text -> String
showBeginning = show . T.take 30

-- | Strip leading (ASCII) space
skipSpace :: Text -> Text
skipSpace = T.dropWhile $ \w -> w == '\x20' || w == '\x0a' || w == '\x0d' || w == '\x09'
{-# INLINE skipSpace #-}

tkErrEOF :: AsError t => String ->  t k String
tkErrEOF expected = tkErr $
    "Unexpected end-of-input, expecting " ++ expected
{-# INLINE tkErrEOF #-}

tkErrBS :: AsError t => Text -> String ->  t k String
tkErrBS bs expected = tkErr $
    "Unexpected " ++ showBeginning bs ++ ", expecting " ++ expected
{-# INLINE tkErrBS #-}

-------------------------------------------------------------------------------
-- Text
-------------------------------------------------------------------------------

scanStringLiteral
    :: forall r. (Text -> Text -> r)
    -> (String -> r)
    -> Text
    -> r
scanStringLiteral ok err bs0 = go 0 bs0 where
    -- the length is counted in bytes.
    go :: Int -> Text -> r
    go !n !bs = case unconsPoint bs of
        Nothing          -> errEnd
        Just (34, _)     -> ok (unsafeTakePoints n bs0) (unsafeDropPoints (n + 1) bs0)
        Just (92, bs')   -> goSlash (n + 1) bs'
        Just (w8, bs')
            | w8 < 0x20  -> errCC
            -- we don't need to check for @>= 0x80@ chars, as text is valid unicode.
            | otherwise  -> go (n + 1) bs'

    -- in goEsc and goSlash we don't need to check for control characters as unescapeText verifies that.
    goEsc :: Int -> Text -> r
    goEsc !n !bs = case unconsPoint bs of
        Nothing        -> errEnd
        Just (34, _)   -> case unescapeFromText (unsafeTakePoints n bs0) of
            Right t -> ok t (unsafeDropPoints (n + 1) bs0)
            Left e  -> err (show e)
        Just (92, bs') -> goSlash (n + 1) bs'
        Just (_,  bs') -> goEsc (n + 1) bs'

    goSlash :: Int -> Text -> r
    goSlash !n !bs = case unconsPoint bs of
        Nothing       -> errEnd
        Just (_, bs') -> goEsc (n + 1) bs'

    errEnd = err "Unexpected end-of-input while parsing string literal"
    errCC  = err "Unespected control character while parsing string literal"

-------------------------------------------------------------------------------
-- Number
-------------------------------------------------------------------------------

--
-- number   := integer fraction exponent
-- integer  := 0 | [1-9][0-9]* | -0 | -[1-9][0-9]*
-- fraction := "" | . [0-9]+
-- exponent := "" | E sign [0-9]+ | e sign [0-9]+
-- sign     := "" | - | +
--
-- This scanner doesn't recognize the leading minus sign, we recognize only integer := 0 | [1-9][0-9]*,
-- as the minus sign is recognized by outer scanner already.
--
scanNumberLiteral
    :: forall r. (Number -> Text -> r)
    -> (String -> r)
    -> Text
    -> r
scanNumberLiteral kont err bs0 = state_start bs0 where
    state_start :: Text -> r
    state_start !bs = case unconsPoint bs of
        Nothing                      -> errEnd
        Just (w8, bs')
            | W8_0 < w8, w8 <= W8_9  -> state_i1 1 bs'
            | W8_0 == w8             -> state_after0 bs'
            | otherwise              -> errUnx w8

    state_after0 :: Text -> r
    state_after0 !bs = case unconsPoint bs of
        Nothing                         -> kont (NumInteger 0) bs
        Just (w8, bs')
            | W8_0 <= w8, w8 <= W8_9    -> err "Number literal with leading zero"
            | W8_DOT == w8              -> go_dec 0 bs'
            | W8_e == w8 || W8_E == w8  -> go_sci 0 0 bs'
            | otherwise                 -> kont (NumInteger 0) bs

    state_i1 :: Int -> Text -> r
    state_i1 !n !bs = case unconsPoint bs of
        Nothing                         -> kont (NumInteger int) bs
        Just (w8, bs')
            | W8_0 <= w8, w8 <= W8_9    -> state_i1 (n + 1) bs'
            | W8_DOT == w8              -> go_dec int bs'
            | W8_e == w8 || W8_E == w8  -> go_sci int 0 bs'
            | otherwise                 -> kont (NumInteger int) bs
      where
        int = textToInteger (unsafeTakePoints n bs0)

    go_dec :: Integer -> Text -> r
    go_dec !int !bs1 = case unconsPoint bs1 of
        Nothing                       -> errEnd
        Just (w8, bs')
            | W8_0 <= w8, w8 <= W8_9  -> state_dec 1 bs'
            | otherwise               -> errUnx w8
      where
        state_dec :: Int -> Text -> r
        state_dec !n !bs = case unconsPoint bs of
            Nothing                         -> kont (NumDecimal dec) bs
            Just (w8, bs')
                | W8_0 <= w8, w8 <= W8_9    -> state_dec (n + 1) bs'
                | W8_e == w8 || W8_E == w8  -> go_sci coef (negate n) bs'
                | otherwise                 -> kont (NumDecimal dec) bs
          where
            frac = textToInteger (unsafeTakePoints n bs1)
            coef = int * 10 ^ n + frac
            dec  = Sci.scientific coef (negate n)

    go_sci :: Integer -> Int -> Text -> r
    go_sci !coef !exp10 !bs2 = case unconsPoint bs2 of
        Nothing                       -> errEnd
        Just (w8, bs')
            | W8_0 <= w8, w8 <= W8_9  -> go_sci_pos coef exp10 bs2 1 bs'
            | W8_PLUS == w8           -> case unconsPoint bs' of
                Nothing               -> errEnd
                Just (w8', bs'')
                    | W8_0 <= w8', w8' <= W8_9  -> go_sci_pos coef exp10 bs' 1 bs''
                    | otherwise       ->  errUnx w8'
            | W8_MINUS == w8          -> case unconsPoint bs' of
                Nothing               -> errEnd
                Just (w8', bs'')
                    | W8_0 <= w8', w8' <= W8_9  -> go_sci_neg coef exp10 bs' 1 bs''
                    | otherwise       ->  errUnx w8'
            | otherwise               -> errUnx w8

    go_sci_pos :: Integer -> Int -> Text -> Int -> Text -> r
    go_sci_pos !coef !exp10 !bs2 !n !bs = case unconsPoint bs of
        Nothing                       -> kont (NumScientific sci) bs
        Just (w8, bs')
            | W8_0 <= w8, w8 <= W8_9  -> go_sci_pos coef exp10 bs2 (n + 1) bs'
            | otherwise               -> kont (NumScientific sci) bs
      where
        exp10' = fromInteger (textToInteger (unsafeTakePoints n bs2))
        sci = Sci.scientific coef (exp10 + exp10')

    go_sci_neg :: Integer -> Int -> Text -> Int -> Text -> r
    go_sci_neg !coef !exp10 !bs2 !n !bs = case unconsPoint bs of
        Nothing                       -> kont (NumScientific sci) bs
        Just (w8, bs')
            | W8_0 <= w8, w8 <= W8_9  -> go_sci_neg coef exp10 bs2 (n + 1) bs'
            | otherwise               -> kont (NumScientific sci) bs
      where
        exp10' = fromInteger (textToInteger (unsafeTakePoints n bs2))
        sci = Sci.scientific coef (exp10 - exp10')

    errEnd    = err "Unexpected end-of-input while parsing number literal"
    errUnx w8 = err $ "Unexpected " ++ show (chr (fromIntegral w8)) ++ " while parsing number literal"

-------------------------------------------------------------------------------
-- Unsafe primitives
-------------------------------------------------------------------------------

{-# INLINE unconsPoint #-}
-- Uncons a primitive unit of storage from text.
-- The left-over 'Text' value may be invalid.
unconsPoint :: Text -> Maybe (Point, Text)
unconsPoint (Text arr off len)
    | len <= 0  = Nothing
    | otherwise = Just (w8, Text arr (off + 1) (len - 1))
  where
    w8 = A.unsafeIndex arr off

unsafeTakePoints :: Int -> Text -> Text
unsafeTakePoints n (Text arr off _len) = Text arr off n
{-# INLINE unsafeTakePoints #-}

unsafeDropPoints :: Int -> Text -> Text
unsafeDropPoints n (Text arr off len) = Text arr (off + n) (len - n)
{-# INLINE unsafeDropPoints #-}