{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE UnliftedFFITypes #-} {-| Module : Std.Data.JSON.Value Description : JSON representation and parsers Copyright : (c) Dong Han, 2019 License : BSD Maintainer : winterland1989@gmail.com Stability : experimental Portability : non-portable This module provides definition and parsers for JSON 'Value's, a Haskell JSON representation. The parsers is designed to comply with , notable pitfalls are: * The numeric representation use 'Scientific', which impose a limit on number's exponent part(limited to 'Int'). * Unescaped control characters(<=0x1F) are NOT accepted, (different from aeson). * Only @0x20, 0x09, 0x0A, 0x0D@ are valid JSON whitespaces, 'skipSpaces' from this module is different from 'P.skipSpaces'. * A JSON document shouldn't have trailing characters except whitespaces describe above, see 'parseValue'' and 'parseValueChunks''. * Objects are represented as key-value vectors, key order and duplicated keys are preserved for further processing. Note that rfc8258 doesn't enforce unique key in objects, it's up to users to decided how to deal with key duplication, e.g. prefer first or last key, see 'Std.Data.JSON.Base.withFlatMap' or 'Std.Data.JSON.Base.withFlatMapR' for example. There's no lazy parsers here, every pieces of JSON document will be parsed into a normal form 'Value'. 'Object' and 'Array's payloads are packed into 'Vector's to avoid accumulating lists in memory. Read more about . -} module Std.Data.JSON.Value ( -- * Value type Value(..) -- * parse into JSON Value , parseValue , parseValue' , parseValueChunks , parseValueChunks' -- * Value Parsers , value , object , array , string , skipSpaces ) where import Control.DeepSeq import Control.Monad import Data.Bits ((.&.)) import Data.Functor import Data.Primitive.PrimArray import Data.Scientific (Scientific, scientific) import Data.Typeable import Data.Word import GHC.Generics import qualified Std.Data.Parser as P import Std.Data.Parser (()) import qualified Std.Data.Text as T import Std.Data.TextBuilder (ToText) import qualified Std.Data.Text.Base as T import Std.Data.Vector.Base as V import Std.Data.Vector.Extra as V import Std.Foreign.PrimArray import System.IO.Unsafe (unsafeDupablePerformIO) import Test.QuickCheck.Arbitrary (Arbitrary(..)) import Test.QuickCheck.Gen (Gen(..), listOf) #define BACKSLASH 92 #define CLOSE_CURLY 125 #define CLOSE_SQUARE 93 #define COMMA 44 #define COLON 58 #define DOUBLE_QUOTE 34 #define OPEN_CURLY 123 #define OPEN_SQUARE 91 #define C_0 48 #define C_9 57 #define C_A 65 #define C_F 70 #define C_a 97 #define C_f 102 #define C_n 110 #define C_t 116 #define MINUS 45 -------------------------------------------------------------------------------- -- | A JSON value represented as a Haskell value. -- -- The 'Object''s payload is a key-value vector instead of a map, which parsed -- directly from JSON document. This design choice has following advantages: -- -- * Allow different strategies handling duplicated keys. -- * Allow different 'Map' type to do further parsing, e.g. 'Std.Data.Vector.FlatMap' -- * Roundtrip without touching the original key-value order. -- * Save time if constructing map is not neccessary, e.g. -- using a linear scan to find a key if only that key is needed. -- data Value = Object {-# UNPACK #-} !(V.Vector (T.Text, Value)) | Array {-# UNPACK #-} !(V.Vector Value) | String {-# UNPACK #-} !T.Text | Number {-# UNPACK #-} !Scientific | Bool !Bool | Null deriving (Eq, Show, Typeable, Generic, ToText) instance NFData Value where {-# INLINE rnf #-} rnf (Object o) = rnf o rnf (Array a) = rnf a rnf (String s) = rnf s rnf (Number n) = rnf n rnf (Bool b) = rnf b rnf Null = () instance Arbitrary Value where -- limit maximum depth of JSON document, otherwise it's too slow to run any tests arbitrary = arbitraryValue 0 4 where arbitraryValue d s = do i <- arbitrary :: Gen Word case (i `mod` 6) of 0 -> if d < s then Object . V.pack <$> listOf (arbitraryKV (d+1) s) else pure Null 1 -> if d < s then Array . V.pack <$> listOf (arbitraryValue (d+1) s) else pure Null 2 -> String <$> arbitrary 3 -> do c <- arbitrary e <- arbitrary pure . Number $ scientific c e 4 -> Bool <$> arbitrary _ -> pure Null arbitraryKV d s = (,) <$> arbitrary <*> arbitraryValue d s shrink (Object kvs) = snd <$> (V.unpack kvs) shrink (Array vs) = V.unpack vs shrink _ = [] -- | Parse 'Value' without consuming trailing bytes. parseValue :: V.Bytes -> (V.Bytes, Either P.ParseError Value) {-# INLINE parseValue #-} parseValue = P.parse value -- | Parse 'Value', and consume all trailing JSON white spaces, if there're -- bytes left, parsing will fail. parseValue' :: V.Bytes -> Either P.ParseError Value {-# INLINE parseValue' #-} parseValue' = P.parse_ (value <* skipSpaces <* P.endOfInput) -- | Increamental parse 'Value' without consuming trailing bytes. parseValueChunks :: Monad m => m V.Bytes -> V.Bytes -> m (V.Bytes, Either P.ParseError Value) {-# INLINE parseValueChunks #-} parseValueChunks = P.parseChunks value -- | Increamental parse 'Value' and consume all trailing JSON white spaces, if there're -- bytes left, parsing will fail. parseValueChunks' :: Monad m => m V.Bytes -> V.Bytes -> m (Either P.ParseError Value) {-# INLINE parseValueChunks' #-} parseValueChunks' mi inp = snd <$> P.parseChunks (value <* skipSpaces <* P.endOfInput) mi inp -------------------------------------------------------------------------------- -- | The only valid whitespace in a JSON document is space, newline, -- carriage pure, and tab. skipSpaces :: P.Parser () {-# INLINE skipSpaces #-} skipSpaces = P.skipWhile (\ w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09) -- | JSON 'Value' parser. value :: P.Parser Value {-# INLINABLE value #-} value = "Std.Data.JSON.Value.value" do skipSpaces w <- P.peek case w of DOUBLE_QUOTE -> P.skipWord8 *> (String <$> string_) OPEN_CURLY -> P.skipWord8 *> (Object <$> object_) OPEN_SQUARE -> P.skipWord8 *> (Array <$> array_) C_f -> P.bytes "false" $> (Bool False) C_t -> P.bytes "true" $> (Bool True) C_n -> P.bytes "null" $> Null _ | w >= 48 && w <= 57 || w == MINUS -> Number <$> P.scientific' | otherwise -> fail "Std.Data.JSON.Value.value: not a valid json value" -- | parse json array with leading OPEN_SQUARE. array :: P.Parser (V.Vector Value) {-# INLINE array #-} array = "Std.Data.JSON.Value.array" P.word8 OPEN_SQUARE *> array_ -- | parse json array without leading OPEN_SQUARE. array_ :: P.Parser (V.Vector Value) {-# INLINABLE array_ #-} array_ = do skipSpaces w <- P.peek if w == CLOSE_SQUARE then P.skipWord8 $> V.empty else loop [] 1 where loop :: [Value] -> Int -> P.Parser (V.Vector Value) loop acc !n = do !v <- value skipSpaces let acc' = v:acc ch <- P.satisfy $ \w -> w == COMMA || w == CLOSE_SQUARE if ch == COMMA then skipSpaces *> loop acc' (n+1) else pure $! V.packRN n acc' -- n start from 1, so no need to +1 here -- | parse json array with leading OPEN_CURLY. object :: P.Parser (V.Vector (T.Text, Value)) {-# INLINE object #-} object = "Std.Data.JSON.Value.object" P.word8 OPEN_CURLY *> object_ -- | parse json object without leading OPEN_CURLY. object_ :: P.Parser (V.Vector (T.Text, Value)) {-# INLINABLE object_ #-} object_ = do skipSpaces w <- P.peek if w == CLOSE_CURLY then P.skipWord8 $> V.empty else loop [] 1 where loop :: [(T.Text, Value)] -> Int -> P.Parser (V.Vector (T.Text, Value)) loop acc !n = do !k <- string skipSpaces P.word8 COLON !v <- value skipSpaces let acc' = (k, v) : acc ch <- P.satisfy $ \w -> w == COMMA || w == CLOSE_CURLY if ch == COMMA then skipSpaces *> loop acc' (n+1) else pure $! V.packRN n acc' -- n start from 1, so no need to +1 here -------------------------------------------------------------------------------- string :: P.Parser T.Text {-# INLINE string #-} string = "Std.Data.JSON.Value.string" P.word8 DOUBLE_QUOTE *> string_ string_ :: P.Parser T.Text {-# INLINE string_ #-} string_ = do (bs, state) <- P.scanChunks 0 go let mt = case state .&. 0xFF of -- need escaping 1 -> unsafeDupablePerformIO (do let !len = V.length bs !mpa <- newPrimArray len !len' <- withMutablePrimArrayUnsafe mpa (\ mba# _ -> withPrimVectorUnsafe bs (decode_json_string mba#)) !pa <- unsafeFreezePrimArray mpa if len' >= 0 then pure (Just (T.Text (V.PrimVector pa 0 len'))) -- unescaping also validate utf8 else pure Nothing) 3 -> Nothing -- reject unescaped control characters _ -> T.validateMaybe bs case mt of Just t -> P.skipWord8 $> t _ -> fail "Std.Data.JSON.Value.string_: utf8 validation or unescaping failed" where go :: Word32 -> V.Bytes -> Either Word32 (V.Bytes, V.Bytes, Word32) go !state v = case unsafeDupablePerformIO . withPrimUnsafe state $ \ ps -> withPrimVectorUnsafe v (find_json_string_end ps) of (state', len) | len >= 0 -> let !r = V.unsafeTake len v !rest = V.unsafeDrop len v in Right (r, rest, state') | otherwise -> Left state' foreign import ccall unsafe find_json_string_end :: MBA# Word32 -> BA# Word8 -> Int -> Int -> IO Int foreign import ccall unsafe decode_json_string :: MBA# Word8 -> BA# Word8 -> Int -> Int -> IO Int