{-| Module : Z.Data.JSON.Value Description : JSON representation and parsers Copyright : (c) Dong Han, 2019 License : BSD Maintainer : winterland1989@gmail.com Stability : experimental Portability : non-portable This module provides definition and parsers for JSON 'Value's, a Haskell JSON representation. The parsers is designed to comply with , notable pitfalls are: * The numeric representation use 'Scientific', which impose a limit on number's exponent part(limited to 'Int'). * Unescaped control characters(<=0x1F) are NOT accepted, (different from aeson). * Only @0x20, 0x09, 0x0A, 0x0D@ are valid JSON whitespaces, 'skipSpaces' from this module is different from 'P.skipSpaces'. * A JSON document shouldn't have trailing characters except whitespaces describe above, see 'parseValue''. * Objects are represented as key-value vectors, key order and duplicated keys are preserved for further processing. Note that rfc8258 doesn't enforce unique key in objects, it's up to users to decided how to deal with key duplication, e.g. prefer first or last key, see 'Z.Data.JSON.Base.withFlatMap' or 'Std.Data.JSON.Base.withFlatMapR' for example. There's no lazy parsers here, every pieces of JSON document will be parsed into a normal form 'Value'. 'Object' and 'Array's payloads are packed into 'Vector's to avoid accumulating lists in memory. Read more about . -} module Z.Data.JSON.Value ( -- * Value type Value(..), key, nth -- * parse into JSON Value , parseValue , parseValue' -- * Value Parsers , value , object , array , string , skipSpaces -- * Convert to Scientific , floatToScientific , doubleToScientific ) where import Control.DeepSeq import Data.Bits ((.&.)) import Data.Functor import Data.Scientific (Scientific, scientific) import Data.Typeable import Data.Int import Data.Word import GHC.Generics import Z.Data.ASCII import qualified Z.Data.Parser as P import qualified Z.Data.Builder.Numeric as B import qualified Z.Data.Text.Base as T import Z.Data.Text.Print (Print(..)) import Z.Data.Vector.Base as V import Z.Data.Vector.Extra as V import Z.Data.Vector.Search as V import Z.Foreign import System.IO.Unsafe (unsafeDupablePerformIO) import Test.QuickCheck.Arbitrary (Arbitrary(..)) import Test.QuickCheck.Gen (Gen(..), listOf) -------------------------------------------------------------------------------- -- | A JSON value represented as a Haskell value. -- -- The 'Object''s payload is a key-value vector instead of a map, which parsed -- directly from JSON document. This design choice has following advantages: -- -- * Allow different strategies handling duplicated keys. -- * Allow different 'Map' type to do further parsing, e.g. 'Z.Data.Vector.FlatMap' -- * Roundtrip without touching the original key-value order. -- * Save time if constructing map is not neccessary, e.g. -- using a linear scan to find a key if only that key is needed. -- data Value = Object {-# UNPACK #-} !(V.Vector (T.Text, Value)) | Array {-# UNPACK #-} !(V.Vector Value) | String {-# UNPACK #-} !T.Text | Number {-# UNPACK #-} !Scientific | Bool !Bool | Null deriving (Eq, Ord, Show, Typeable, Generic) deriving anyclass Print instance NFData Value where {-# INLINE rnf #-} rnf (Object o) = rnf o rnf (Array a) = rnf a rnf (String s) = rnf s rnf (Number n) = rnf n rnf (Bool b) = rnf b rnf Null = () instance Arbitrary Value where -- limit maximum depth of JSON document, otherwise it's too slow to run any tests arbitrary = arbitraryValue 0 4 where arbitraryValue :: Int -> Int -> Gen Value arbitraryValue d s = do i <- arbitrary :: Gen Word case (i `mod` 6) of 0 -> if d < s then Object . V.pack <$> listOf (arbitraryKV (d+1) s) else pure Null 1 -> if d < s then Array . V.pack <$> listOf (arbitraryValue (d+1) s) else pure Null 2 -> String <$> arbitrary 3 -> do c <- arbitrary e <- arbitrary pure . Number $ scientific c e 4 -> Bool <$> arbitrary _ -> pure Null arbitraryKV d s = (,) <$> arbitrary <*> arbitraryValue d s shrink (Object kvs) = snd <$> (V.unpack kvs) shrink (Array vs) = V.unpack vs shrink _ = [] -- | Lense for 'Array' element. -- -- 1. return `Null` if 'Value' is not an 'Array' or index not exist. -- 2. Modify will have no effect if 'Value' is not an 'Array' or index not exist. -- nth :: Functor f => Int -> (Value -> f Value) -> Value -> f Value {-# INLINABLE nth #-} nth ix f (Array vs) | Just v <- vs `indexMaybe` ix = fmap (\ x -> Array (V.unsafeModifyIndex vs ix (const x))) (f v) nth _ f v = fmap (const v) (f Null) -- | Lense for 'Object' element -- -- 1. return `Null` if 'Value' is not an 'Object' or key not exist. -- 2. Modify will have no effect if 'Value' is not an 'Object' or key not exist. -- 4. On duplicated keys prefer the last one. -- key :: Functor f => T.Text -> (Value -> f Value) -> Value -> f Value {-# INLINABLE key #-} key k f (Object kvs) | (i, Just (_, v)) <- V.findR ((k ==) . fst) kvs = fmap (\ x -> Object (V.unsafeModifyIndex kvs i (const (k, x)))) (f v) key _ f v = fmap (const v) (f Null) -- | Parse 'Value' without consuming trailing bytes. parseValue :: V.Bytes -> (V.Bytes, Either P.ParseError Value) {-# INLINE parseValue #-} parseValue = P.parse value -- | Parse 'Value', and consume all trailing JSON white spaces, if there're -- bytes left, parsing will fail. parseValue' :: V.Bytes -> Either P.ParseError Value {-# INLINE parseValue' #-} parseValue' = P.parse' (value <* skipSpaces <* P.endOfInput) -------------------------------------------------------------------------------- -- | The only valid whitespace in a JSON document is space, newline, -- carriage pure, and tab. skipSpaces :: P.Parser () {-# INLINE skipSpaces #-} skipSpaces = P.skipWhile (\ w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09) -- | JSON 'Value' parser. value :: P.Parser Value {-# INLINABLE value #-} value = do skipSpaces w <- P.peek case w of DOUBLE_QUOTE -> P.skipWord8 *> (String <$> string_) CURLY_LEFT -> P.skipWord8 *> (Object <$> object_) SQUARE_LEFT -> P.skipWord8 *> (Array <$> array_) LETTER_f -> P.bytes "false" $> (Bool False) LETTER_t -> P.bytes "true" $> (Bool True) LETTER_n -> P.bytes "null" $> Null _ | w >= 48 && w <= 57 || w == MINUS -> Number <$> P.scientific' | otherwise -> P.fail' "Z.Data.JSON.Value.value: not a valid json value" -- | parse json array with leading SQUARE_LEFT. array :: P.Parser (V.Vector Value) {-# INLINE array #-} array = P.word8 SQUARE_LEFT *> array_ -- | parse json array without leading SQUARE_LEFT. array_ :: P.Parser (V.Vector Value) {-# INLINABLE array_ #-} array_ = do skipSpaces w <- P.peek if w == SQUARE_RIGHT then P.skipWord8 $> V.empty else loop [] 1 where loop :: [Value] -> Int -> P.Parser (V.Vector Value) loop acc !n = do !v <- value skipSpaces let acc' = v:acc ch <- P.satisfy $ \w -> w == COMMA || w == SQUARE_RIGHT if ch == COMMA then skipSpaces *> loop acc' (n+1) else pure $! V.packRN n acc' -- n start from 1, so no need to +1 here -- | parse json array with leading 'CURLY_LEFT'. object :: P.Parser (V.Vector (T.Text, Value)) {-# INLINE object #-} object = P.word8 CURLY_LEFT *> object_ -- | parse json object without leading 'CURLY_LEFT'. object_ :: P.Parser (V.Vector (T.Text, Value)) {-# INLINABLE object_ #-} object_ = do skipSpaces w <- P.peek if w == CURLY_RIGHT then P.skipWord8 $> V.empty else loop [] 1 where loop :: [(T.Text, Value)] -> Int -> P.Parser (V.Vector (T.Text, Value)) loop acc !n = do !k <- string skipSpaces P.word8 COLON !v <- value skipSpaces let acc' = (k, v) : acc ch <- P.satisfy $ \w -> w == COMMA || w == CURLY_RIGHT if ch == COMMA then skipSpaces *> loop acc' (n+1) else pure $! V.packRN n acc' -- n start from 1, so no need to +1 here -------------------------------------------------------------------------------- string :: P.Parser T.Text {-# INLINE string #-} string = P.word8 DOUBLE_QUOTE *> string_ string_ :: P.Parser T.Text {-# INLINE string_ #-} string_ = do (bs, state) <- P.scanChunks 0 go let mt = case state .&. 0xFF of -- need escaping 1 -> unsafeDupablePerformIO (do let !len = V.length bs (!pa, !len') <- allocPrimArrayUnsafe len (\ mba# -> withPrimVectorUnsafe bs (decode_json_string mba#)) if len' >= 0 then pure (Just (T.Text (V.PrimVector pa 0 len'))) -- unescaping also validate utf8 else pure Nothing) 3 -> Nothing -- reject unescaped control characters _ -> T.validateMaybe bs case mt of Just t -> P.skipWord8 $> t _ -> P.fail' "Z.Data.JSON.Value.string_: utf8 validation or unescaping failed" where go :: Word32 -> V.Bytes -> Either Word32 (V.Bytes, V.Bytes, Word32) go !state v = case unsafeDupablePerformIO . withPrimUnsafe state $ \ ps -> withPrimVectorUnsafe v (find_json_string_end ps) of (state', len) | len >= 0 -> let !r = V.unsafeTake len v !rest = V.unsafeDrop len v in Right (r, rest, state') | otherwise -> Left state' foreign import ccall unsafe find_json_string_end :: MBA# Word32 -> BA# Word8 -> Int -> Int -> IO Int foreign import ccall unsafe decode_json_string :: MBA# Word8 -> BA# Word8 -> Int -> Int -> IO Int -------------------------------------------------------------------------------- -- | Convert IEEE float to scientific notition. floatToScientific :: Float -> Scientific {-# INLINE floatToScientific #-} floatToScientific rf | rf < 0 = -(fromFloatingDigits (B.grisu3_sp (-rf))) | rf == 0 = 0 | otherwise = fromFloatingDigits (B.grisu3_sp rf) -- | Convert IEEE double to scientific notition. doubleToScientific :: Double -> Scientific {-# INLINE doubleToScientific #-} doubleToScientific rf | rf < 0 = -(fromFloatingDigits (B.grisu3 (-rf))) | rf == 0 = 0 | otherwise = fromFloatingDigits (B.grisu3 rf) fromFloatingDigits :: ([Int], Int) -> Scientific {-# INLINE fromFloatingDigits #-} fromFloatingDigits (digits, e) = go digits 0 0 where -- There's no way a float or double has more digits a 'Int64' can't handle go :: [Int] -> Int64 -> Int -> Scientific go [] !c !n = scientific (fromIntegral c) (e - n) go (d:ds) !c !n = go ds (c * 10 + fromIntegral d) (n + 1)