{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language BlockArguments #-}
{-# language DerivingStrategies #-}
{-# language DeriveAnyClass #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language TypeApplications #-}
module Json
(
Value(..)
, Member(..)
, SyntaxException(..)
, decode
) where
import Prelude hiding (Bool(True,False))
import Control.Exception (Exception)
import Control.Monad.ST (ST)
import Data.Bits ((.&.),(.|.),unsafeShiftR)
import Data.Builder.ST (Builder)
import Data.Bytes.Parser (Parser)
import Data.Bytes.Types (Bytes(..))
import Data.Char (ord)
import Data.Chunks (Chunks(ChunksNil))
import Data.Number.Scientific (Scientific)
import Data.Primitive (ByteArray,MutableByteArray)
import Data.Text.Short (ShortText)
import GHC.Exts (Char(C#),Int(I#),gtWord#,ltWord#,word2Int#,chr#)
import GHC.Word (Word8(W8#),Word16(W16#))
import qualified Prelude
import qualified Data.Builder.ST as B
import qualified Data.Bytes.Parser as P
import qualified Data.Text.Short.Unsafe as TS
import qualified Data.Number.Scientific as SCI
import qualified Data.Primitive as PM
import qualified Data.Bytes.Parser.Utf8 as Utf8
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.ByteString.Short.Internal as BSS
import qualified Data.Bytes.Parser.Unsafe as Unsafe
data Value
= Object !(Chunks Member)
| Array !(Chunks Value)
| String {-# UNPACK #-} !ShortText
| Number {-# UNPACK #-} !Scientific
| Null
| True
| False
deriving stock (Eq,Show)
data SyntaxException
= EmptyInput
| ExpectedColon
| ExpectedCommaOrRightBracket
| ExpectedFalse
| ExpectedNull
| ExpectedQuote
| ExpectedQuoteOrRightBrace
| ExpectedTrue
| IncompleteArray
| IncompleteEscapeSequence
| IncompleteObject
| IncompleteString
| InvalidEscapeSequence
| InvalidLeader
| InvalidNumber
| LeadingZero
| UnexpectedLeftovers
deriving stock (Eq,Show)
deriving anyclass (Exception)
data Member = Member
{ key :: {-# UNPACK #-} !ShortText
, value :: !Value
} deriving stock (Eq,Show)
emptyArrayValue :: Value
{-# noinline emptyArrayValue #-}
emptyArrayValue = Array ChunksNil
emptyObjectValue :: Value
{-# noinline emptyObjectValue #-}
emptyObjectValue = Object ChunksNil
isSpace :: Word8 -> Prelude.Bool
isSpace w =
w == c2w ' '
|| w == c2w '\t'
|| w == c2w '\r'
|| w == c2w '\n'
decode :: Bytes -> Either SyntaxException Value
decode = P.parseBytesEither do
P.skipWhile isSpace
result <- Latin.any EmptyInput >>= parser
P.skipWhile isSpace
P.endOfInput UnexpectedLeftovers
pure result
parser :: Char -> Parser SyntaxException s Value
parser = \case
'{' -> objectTrailedByBrace
'[' -> arrayTrailedByBracket
't' -> do
Latin.char3 ExpectedTrue 'r' 'u' 'e'
pure True
'f' -> do
Latin.char4 ExpectedFalse 'a' 'l' 's' 'e'
pure False
'n' -> do
Latin.char3 ExpectedNull 'u' 'l' 'l'
pure Null
'"' -> do
start <- Unsafe.cursor
string String start
'-' -> fmap Number (SCI.parserNegatedUtf8Bytes InvalidNumber)
'0' -> Latin.trySatisfy (\c -> c >= '0' && c <= '9') >>= \case
Prelude.True -> P.fail LeadingZero
Prelude.False -> fmap Number (SCI.parserTrailingUtf8Bytes InvalidNumber 0)
c | c >= '1' && c <= '9' ->
fmap Number (SCI.parserTrailingUtf8Bytes InvalidNumber (ord c - 48))
_ -> P.fail InvalidLeader
objectTrailedByBrace :: Parser SyntaxException s Value
objectTrailedByBrace = do
P.skipWhile isSpace
Latin.any IncompleteObject >>= \case
'}' -> pure emptyObjectValue
'"' -> do
start <- Unsafe.cursor
!theKey <- string id start
P.skipWhile isSpace
Latin.char ExpectedColon ':'
P.skipWhile isSpace
val <- Latin.any IncompleteObject >>= parser
let !mbr = Member theKey val
!b0 <- P.effect B.new
b1 <- P.effect (B.push mbr b0)
objectStep b1
_ -> P.fail ExpectedQuoteOrRightBrace
objectStep :: Builder s Member -> Parser SyntaxException s Value
objectStep !b = do
P.skipWhile isSpace
Latin.any IncompleteObject >>= \case
',' -> do
P.skipWhile isSpace
Latin.char ExpectedQuote '"'
start <- Unsafe.cursor
!theKey <- string id start
P.skipWhile isSpace
Latin.char ExpectedColon ':'
P.skipWhile isSpace
val <- Latin.any IncompleteObject >>= parser
let !mbr = Member theKey val
P.effect (B.push mbr b) >>= objectStep
'}' -> do
!r <- P.effect (B.freeze b)
pure (Object r)
_ -> P.fail ExpectedCommaOrRightBracket
arrayTrailedByBracket :: Parser SyntaxException s Value
arrayTrailedByBracket = do
P.skipWhile isSpace
Latin.any IncompleteArray >>= \case
']' -> pure emptyArrayValue
c -> do
!b0 <- P.effect B.new
val <- parser c
b1 <- P.effect (B.push val b0)
arrayStep b1
arrayStep :: Builder s Value -> Parser SyntaxException s Value
arrayStep !b = do
P.skipWhile isSpace
Latin.any IncompleteArray >>= \case
',' -> do
P.skipWhile isSpace
val <- Latin.any IncompleteArray >>= parser
P.effect (B.push val b) >>= arrayStep
']' -> do
!r <- P.effect (B.freeze b)
pure (Array r)
_ -> P.fail ExpectedCommaOrRightBracket
c2w :: Char -> Word8
c2w = fromIntegral . ord
string :: (ShortText -> a) -> Int -> Parser SyntaxException s a
{-# inline string #-}
string wrap !start = go 1 where
go !canMemcpy = do
P.any IncompleteString >>= \case
92 -> P.any InvalidEscapeSequence *> go 0
34 -> do
!pos <- Unsafe.cursor
case canMemcpy of
1 -> do
src <- Unsafe.expose
str <- P.effect $ do
let end = pos - 1
let len = end - start
dst <- PM.newByteArray len
PM.copyByteArray dst 0 src start len
PM.unsafeFreezeByteArray dst
pure (wrap (TS.fromShortByteStringUnsafe (byteArrayToShortByteString str)))
_ -> do
Unsafe.unconsume (pos - start)
let end = pos - 1
let maxLen = end - start
copyAndEscape wrap maxLen
W8# w -> go (canMemcpy .&. I# (ltWord# w 128##) .&. I# (gtWord# w 31##))
copyAndEscape :: (ShortText -> a) -> Int -> Parser SyntaxException s a
{-# inline copyAndEscape #-}
copyAndEscape wrap !maxLen = do
!dst <- P.effect (PM.newByteArray maxLen)
let go !ix = Utf8.any# IncompleteString `P.bindFromCharToLifted` \c -> case c of
'\\'# -> Latin.any IncompleteEscapeSequence >>= \case
'"' -> do
P.effect (PM.writeByteArray dst ix (c2w '"'))
go (ix + 1)
'\\' -> do
P.effect (PM.writeByteArray dst ix (c2w '\\'))
go (ix + 1)
't' -> do
P.effect (PM.writeByteArray dst ix (c2w '\t'))
go (ix + 1)
'n' -> do
P.effect (PM.writeByteArray dst ix (c2w '\n'))
go (ix + 1)
'r' -> do
P.effect (PM.writeByteArray dst ix (c2w '\r'))
go (ix + 1)
'/' -> do
P.effect (PM.writeByteArray dst ix (c2w '/'))
go (ix + 1)
'b' -> do
P.effect (PM.writeByteArray dst ix (c2w '\b'))
go (ix + 1)
'f' -> do
P.effect (PM.writeByteArray dst ix (c2w '\f'))
go (ix + 1)
'u' -> do
w <- Latin.hexFixedWord16 InvalidEscapeSequence
if w >= 0xD800 && w < 0xDFFF
then go =<< P.effect (encodeUtf8Char dst ix '\xFFFD')
else go =<< P.effect (encodeUtf8Char dst ix (w16ToChar w))
_ -> P.fail InvalidEscapeSequence
'"'# -> do
str <- P.effect
(PM.unsafeFreezeByteArray =<< PM.resizeMutableByteArray dst ix)
pure (wrap (TS.fromShortByteStringUnsafe (byteArrayToShortByteString str)))
_ -> go =<< P.effect (encodeUtf8Char dst ix (C# c))
go 0
encodeUtf8Char :: MutableByteArray s -> Int -> Char -> ST s Int
encodeUtf8Char !marr !ix !c
| c < '\128' = do
PM.writeByteArray marr ix (c2w c)
pure (ix + 1)
| c < '\x0800' = do
PM.writeByteArray marr ix
(fromIntegral @Int @Word8 (unsafeShiftR (ord c) 6 .|. 0b11000000))
PM.writeByteArray marr (ix + 1)
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (ord c))))
pure (ix + 2)
| c <= '\xffff' = do
PM.writeByteArray marr ix
(fromIntegral @Int @Word8 (unsafeShiftR (ord c) 12 .|. 0b11100000))
PM.writeByteArray marr (ix + 1)
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (unsafeShiftR (ord c) 6))))
PM.writeByteArray marr (ix + 2)
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (ord c))))
pure (ix + 3)
| otherwise = do
PM.writeByteArray marr ix
(fromIntegral @Int @Word8 (unsafeShiftR (ord c) 18 .|. 0b11110000))
PM.writeByteArray marr (ix + 1)
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (unsafeShiftR (ord c) 12))))
PM.writeByteArray marr (ix + 2)
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (unsafeShiftR (ord c) 6))))
PM.writeByteArray marr (ix + 3)
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (ord c))))
pure (ix + 4)
byteArrayToShortByteString :: ByteArray -> BSS.ShortByteString
byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x
w16ToChar :: Word16 -> Char
w16ToChar (W16# w) = C# (chr# (word2Int# w))