{-# LANGUAGE TemplateHaskellQuotes #-}

module Argo.Type.String where

import qualified Argo.Decoder as Decoder
import qualified Argo.Literal as Literal
import qualified Control.DeepSeq as DeepSeq
import qualified Control.Monad as Monad
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Builder.Prim as P
import qualified Data.Char as Char
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import qualified Data.Word as Word
import qualified Language.Haskell.TH.Syntax as TH

newtype String
    = String Text.Text
    deriving (Eq, Show)

instance TH.Lift Argo.Type.String.String where
    liftTyped (String x) = [|| String x ||]

instance DeepSeq.NFData Argo.Type.String.String where
    rnf (String x) = DeepSeq.rnf x

encode :: Argo.Type.String.String -> Builder.Builder
encode (String x) =
    Builder.word8 Literal.quotationMark
    <> Text.encodeUtf8BuilderEscaped encodeChar x
    <> Builder.word8 Literal.quotationMark

encodeChar :: P.BoundedPrim Word.Word8
encodeChar =
    P.condB (== Literal.quotationMark) (encodeShortEscape Literal.quotationMark)
    . P.condB (== Literal.reverseSolidus) (encodeShortEscape Literal.reverseSolidus)
    . P.condB (== Literal.backspace) (encodeShortEscape Literal.latinSmallLetterB)
    . P.condB (== Literal.formFeed) (encodeShortEscape Literal.latinSmallLetterF)
    . P.condB (== Literal.newLine) (encodeShortEscape Literal.latinSmallLetterN)
    . P.condB (== Literal.carriageReturn) (encodeShortEscape Literal.latinSmallLetterR)
    . P.condB (== Literal.horizontalTabulation) (encodeShortEscape Literal.latinSmallLetterT)
    . P.condB (< Literal.space) encodeLongEscape
    $ P.liftFixedToBounded P.word8

encodeShortEscape :: Word.Word8 -> P.BoundedPrim a
encodeShortEscape x = P.liftFixedToBounded
    $ const (Literal.reverseSolidus, x)
    P.>$< P.word8
    P.>*< P.word8

encodeLongEscape :: P.BoundedPrim Word.Word8
encodeLongEscape = P.liftFixedToBounded
    $ (\ x -> (Literal.reverseSolidus, (Literal.latinSmallLetterU, word8ToWord16 x)))
    P.>$< P.word8
    P.>*< P.word8
    P.>*< P.word16HexFixed

word8ToWord16 :: Word.Word8 -> Word.Word16
word8ToWord16 = fromIntegral

decode :: Decoder.Decoder Argo.Type.String.String
decode = do
    Decoder.word8 Literal.quotationMark
    b1 <- Decoder.get
    i <- case getClose b1 0 of
        Nothing -> fail "unterminated string"
        Just i -> pure i
    let (xs, b2) = ByteString.splitAt i b1
    Monad.when (ByteString.any (< Literal.space) xs) $ fail "unescaped control character"
    Decoder.put b2
    Decoder.word8 Literal.quotationMark
    Decoder.spaces
    case Text.decodeUtf8' xs of
        Left e -> fail $ show e
        Right x -> case unescapeText x of
            Nothing -> fail "invalid escape"
            Just y -> pure $ String y

findAt :: Word.Word8 -> Int -> ByteString.ByteString -> Maybe Int
findAt x i = fmap (+ i) . ByteString.elemIndex x . ByteString.drop i

countConsecutive :: Word.Word8 -> Int -> ByteString.ByteString -> Int
countConsecutive x i = ByteString.length . ByteString.takeWhileEnd (== x) . ByteString.take i

getClose :: ByteString.ByteString -> Int -> Maybe Int
getClose b i = do
    j <- findAt Literal.quotationMark i b
    let n = countConsecutive Literal.reverseSolidus j b
    if even n then Just j else getClose b $ j + 1

unescapeText :: Text.Text -> Maybe Text.Text
unescapeText = fmap (Text.pack . combineSurrogatePairs) . unescapeString . Text.unpack

combineSurrogatePairs :: Prelude.String -> Prelude.String
combineSurrogatePairs xs = case xs of
    "" -> xs
    x : y : zs | isHighSurrogate x && isLowSurrogate y ->
        combineSurrogatePair x y : combineSurrogatePairs zs
    x : ys -> x : combineSurrogatePairs ys

combineSurrogatePair :: Char -> Char -> Char
combineSurrogatePair hi lo = Char.chr
    $ 0x10000
    + ((Char.ord hi - 0xd800) * 0x400)
    + (Char.ord lo - 0xdc00)

isHighSurrogate :: Char -> Bool
isHighSurrogate x = '\xd800' <= x && x <= '\xdbff'

isLowSurrogate :: Char -> Bool
isLowSurrogate x = '\xdc00' <= x && x <= '\xdfff'

unescapeString :: Prelude.String -> Maybe Prelude.String
unescapeString xs = case xs of
    "" -> pure xs
    '\\' : ys -> case ys of
        "" -> fail "empty escape"
        x : zs -> case x of
            '"' -> ('"' :) <$> unescapeString zs
            '\\' -> ('\\' :) <$> unescapeString zs
            '/' -> ('/' :) <$> unescapeString zs
            'b' -> ('\b' :) <$> unescapeString zs
            'f' -> ('\f' :) <$> unescapeString zs
            'n' -> ('\n' :) <$> unescapeString zs
            'r' -> ('\r' :) <$> unescapeString zs
            't' -> ('\t' :) <$> unescapeString zs
            'u' -> case zs of
                a : b : c : d : es | Just y <- fromLongEscape a b c d ->
                    (y :) <$> unescapeString es
                _ -> fail "invalid long escape"
            _ -> fail "invalid short escape"
    x : ys -> (x :) <$> unescapeString ys

fromLongEscape :: Char -> Char -> Char -> Char -> Maybe Char
fromLongEscape a b c d = do
    w <- fromHexadecimalDigit a
    x <- fromHexadecimalDigit b
    y <- fromHexadecimalDigit c
    z <- fromHexadecimalDigit d
    pure . Char.chr $ (0x1000 * w) + (0x100 * x) + (0x10 * y) + z

fromHexadecimalDigit :: Char -> Maybe Int
fromHexadecimalDigit x =
    if Char.isHexDigit x then Just $ Char.digitToInt x else Nothing