{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -- | -- Module: Data.Aeson.Parser.Internal -- Copyright: (c) 2011-2016 Bryan O'Sullivan -- (c) 2011 MailRank, Inc. -- License: BSD3 -- Maintainer: Bryan O'Sullivan -- Stability: experimental -- Portability: portable -- -- Efficiently and correctly parse a JSON string. The string must be -- encoded as UTF-8. module Data.Aeson.Parser.Internal ( -- * Lazy parsers json, jsonEOF , jsonWith , jsonLast , jsonAccum , jsonNoDup , value , jstring , jstring_ , scientific -- * Strict parsers , json', jsonEOF' , jsonWith' , jsonLast' , jsonAccum' , jsonNoDup' , value' -- * Helpers , decodeWith , decodeStrictWith , eitherDecodeWith , eitherDecodeStrictWith -- ** Handling objects with duplicate keys , fromListAccum , parseListNoDup -- * Text literal unescaping , unescapeText ) where import Control.Applicative ((<|>)) import Control.Monad (when, void) import Data.Attoparsec.ByteString.Char8 (Parser, char, decimal, endOfInput, isDigit_w8, signed, string) import Data.Function (fix) import Data.Functor (($>)) import Data.Integer.Conversion (byteStringToInteger) import Data.Scientific (Scientific) import Data.Text (Text) import Data.Vector (Vector) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup ((<>)) #endif import qualified Data.Aeson.Key as Key import qualified Data.Aeson.KeyMap as KM import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.Lazy as L import qualified Data.ByteString as B import qualified Data.ByteString.Builder as B import qualified Data.ByteString.Lazy as BSL import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as C import qualified Data.ByteString.Unsafe as B import qualified Data.Scientific as Sci import qualified Data.Vector as Vector (empty, fromList, fromListN, reverse) import qualified Data.Word8.Patterns as W8 import Data.Aeson.Types (IResult(..), JSONPath, Object, Result(..), Value(..), Key) import Data.Aeson.Internal.Text import Data.Aeson.Decoding (unescapeText) -- $setup -- >>> :set -XOverloadedStrings -- >>> import Data.Aeson.Types ------------------------------------------------------------------------------- -- Parsers ------------------------------------------------------------------------------- -- | Parse any JSON value. -- -- The conversion of a parsed value to a Haskell value is deferred -- until the Haskell value is needed. This may improve performance if -- only a subset of the results of conversions are needed, but at a -- cost in thunk allocation. -- -- This function is an alias for 'value'. In aeson 0.8 and earlier, it -- parsed only object or array types, in conformance with the -- now-obsolete RFC 4627. -- -- ==== Warning -- -- If an object contains duplicate keys, only the first one will be kept. -- For a more flexible alternative, see 'jsonWith'. json :: Parser Value json = value -- | Parse any JSON value. -- -- This is a strict version of 'json' which avoids building up thunks -- during parsing; it performs all conversions immediately. Prefer -- this version if most of the JSON data needs to be accessed. -- -- This function is an alias for 'value''. In aeson 0.8 and earlier, it -- parsed only object or array types, in conformance with the -- now-obsolete RFC 4627. -- -- ==== Warning -- -- If an object contains duplicate keys, only the first one will be kept. -- For a more flexible alternative, see 'jsonWith''. json' :: Parser Value json' = value' -- Open recursion: object_, object_', array_, array_' are parameterized by the -- toplevel Value parser to be called recursively, to keep the parameter -- mkObject outside of the recursive loop for proper inlining. object_ :: ([(Key, Value)] -> Either String Object) -> Parser Value -> Parser Value object_ mkObject val = Object <$> objectValues mkObject key val {-# INLINE object_ #-} object_' :: ([(Key, Value)] -> Either String Object) -> Parser Value -> Parser Value object_' mkObject val' = do !vals <- objectValues mkObject key' val' return (Object vals) where key' = do !s <- key return s {-# INLINE object_' #-} objectValues :: ([(Key, Value)] -> Either String Object) -> Parser Key -> Parser Value -> Parser (KM.KeyMap Value) objectValues mkObject str val = do skipSpace w <- A.peekWord8' if w == W8.RIGHT_CURLY then A.anyWord8 >> return KM.empty else loop [] where -- Why use acc pattern here, you may ask? because then the underlying 'KM.fromList' -- implementation can make use of mutation when constructing a map. For example, -- 'HashMap` uses 'unsafeInsert' and it's much faster because it's doing in place -- update to the 'HashMap'! loop acc = do k <- (str A. "object key") <* skipSpace <* (char ':' A. "':'") v <- (val A. "object value") <* skipSpace ch <- A.satisfy (\w -> w == W8.COMMA || w == W8.RIGHT_CURLY) A. "',' or '}'" let acc' = (k, v) : acc if ch == W8.COMMA then skipSpace >> loop acc' else case mkObject acc' of Left err -> fail err Right obj -> pure obj {-# INLINE objectValues #-} array_ :: Parser Value -> Parser Value array_ val = Array <$> arrayValues val {-# INLINE array_ #-} array_' :: Parser Value -> Parser Value array_' val = do !vals <- arrayValues val return (Array vals) {-# INLINE array_' #-} arrayValues :: Parser Value -> Parser (Vector Value) arrayValues val = do skipSpace w <- A.peekWord8' if w == W8.RIGHT_SQUARE then A.anyWord8 >> return Vector.empty else loop [] 1 where loop acc !len = do v <- (val A. "json list value") <* skipSpace ch <- A.satisfy (\w -> w == W8.COMMA || w == W8.RIGHT_SQUARE) A. "',' or ']'" if ch == W8.COMMA then skipSpace >> loop (v:acc) (len+1) else return (Vector.reverse (Vector.fromListN len (v:acc))) {-# INLINE arrayValues #-} -- | Parse any JSON value. Synonym of 'json'. value :: Parser Value value = jsonWith (pure . KM.fromList) -- | Parse any JSON value. -- -- This parser is parameterized by a function to construct an 'Object' -- from a raw list of key-value pairs, where duplicates are preserved. -- The pairs appear in __reverse order__ from the source. -- -- ==== __Examples__ -- -- 'json' keeps only the first occurrence of each key, using 'Data.Aeson.KeyMap.fromList'. -- -- @ -- 'json' = 'jsonWith' ('Right' '.' 'H.fromList') -- @ -- -- 'jsonLast' keeps the last occurrence of each key, using -- @'HashMap.Lazy.fromListWith' ('const' 'id')@. -- -- @ -- 'jsonLast' = 'jsonWith' ('Right' '.' 'HashMap.Lazy.fromListWith' ('const' 'id')) -- @ -- -- 'jsonAccum' keeps wraps all values in arrays to keep duplicates, using -- 'fromListAccum'. -- -- @ -- 'jsonAccum' = 'jsonWith' ('Right' . 'fromListAccum') -- @ -- -- 'jsonNoDup' fails if any object contains duplicate keys, using 'parseListNoDup'. -- -- @ -- 'jsonNoDup' = 'jsonWith' 'parseListNoDup' -- @ jsonWith :: ([(Key, Value)] -> Either String Object) -> Parser Value jsonWith mkObject = fix $ \value_ -> do skipSpace w <- A.peekWord8' case w of W8.DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_) W8.LEFT_CURLY -> A.anyWord8 *> object_ mkObject value_ W8.LEFT_SQUARE -> A.anyWord8 *> array_ value_ W8.LOWER_F -> string "false" $> Bool False W8.LOWER_T -> string "true" $> Bool True W8.LOWER_N -> string "null" $> Null _ | w >= W8.DIGIT_0 && w <= W8.DIGIT_9 || w == W8.HYPHEN -> Number <$> scientific | otherwise -> fail "not a valid json value" {-# INLINE jsonWith #-} -- | Variant of 'json' which keeps only the last occurrence of every key. jsonLast :: Parser Value jsonLast = jsonWith (Right . KM.fromListWith (const id)) -- | Variant of 'json' wrapping all object mappings in 'Array' to preserve -- key-value pairs with the same keys. jsonAccum :: Parser Value jsonAccum = jsonWith (Right . fromListAccum) -- | Variant of 'json' which fails if any object contains duplicate keys. jsonNoDup :: Parser Value jsonNoDup = jsonWith parseListNoDup -- | @'fromListAccum' kvs@ is an object mapping keys to arrays containing all -- associated values from the original list @kvs@. -- -- >>> fromListAccum [("apple", Bool True), ("apple", Bool False), ("orange", Bool False)] -- fromList [("apple",Array [Bool False,Bool True]),("orange",Array [Bool False])] fromListAccum :: [(Key, Value)] -> Object fromListAccum = fmap (Array . Vector.fromList . ($ [])) . KM.fromListWith (.) . (fmap . fmap) (:) -- | @'fromListNoDup' kvs@ fails if @kvs@ contains duplicate keys. parseListNoDup :: [(Key, Value)] -> Either String Object parseListNoDup = KM.traverseWithKey unwrap . KM.fromListWith (\_ _ -> Nothing) . (fmap . fmap) Just where unwrap k Nothing = Left $ "found duplicate key: " ++ show k unwrap _ (Just v) = Right v -- | Strict version of 'value'. Synonym of 'json''. value' :: Parser Value value' = jsonWith' (pure . KM.fromList) -- | Strict version of 'jsonWith'. jsonWith' :: ([(Key, Value)] -> Either String Object) -> Parser Value jsonWith' mkObject = fix $ \value_ -> do skipSpace w <- A.peekWord8' case w of W8.DOUBLE_QUOTE -> do !s <- A.anyWord8 *> jstring_ return (String s) W8.LEFT_CURLY -> A.anyWord8 *> object_' mkObject value_ W8.LEFT_SQUARE -> A.anyWord8 *> array_' value_ W8.LOWER_F -> string "false" $> Bool False W8.LOWER_T -> string "true" $> Bool True W8.LOWER_N -> string "null" $> Null _ | w >= W8.DIGIT_0 && w <= W8.DIGIT_9 || w == W8.HYPHEN -> do !n <- scientific return (Number n) | otherwise -> fail "not a valid json value" {-# INLINE jsonWith' #-} -- | Variant of 'json'' which keeps only the last occurrence of every key. jsonLast' :: Parser Value jsonLast' = jsonWith' (pure . KM.fromListWith (const id)) -- | Variant of 'json'' wrapping all object mappings in 'Array' to preserve -- key-value pairs with the same keys. jsonAccum' :: Parser Value jsonAccum' = jsonWith' (pure . fromListAccum) -- | Variant of 'json'' which fails if any object contains duplicate keys. jsonNoDup' :: Parser Value jsonNoDup' = jsonWith' parseListNoDup -- | Parse a quoted JSON string. jstring :: Parser Text jstring = A.word8 W8.DOUBLE_QUOTE *> jstring_ -- | Parse a JSON Key key :: Parser Key key = Key.fromText <$> jstring -- | Parse a string without a leading quote. jstring_ :: Parser Text {-# INLINE jstring_ #-} jstring_ = do s <- A.takeWhile (\w -> w /= W8.DOUBLE_QUOTE && w /= W8.BACKSLASH && w >= 0x20 && w < 0x80) mw <- A.peekWord8 case mw of Nothing -> fail "string without end" Just W8.DOUBLE_QUOTE -> A.anyWord8 $> unsafeDecodeASCII s Just w | w < 0x20 -> fail "unescaped control character" _ -> jstringSlow s jstringSlow :: B.ByteString -> Parser Text {-# INLINE jstringSlow #-} jstringSlow s' = do s <- A.scan startState go <* A.anyWord8 case unescapeText (B.append s' s) of Right r -> return r Left err -> fail $ show err where startState = False go a c | a = Just False | c == W8.DOUBLE_QUOTE = Nothing | otherwise = let a' = c == W8.BACKSLASH in Just a' decodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString -> Maybe a decodeWith p to s = case L.parse p s of L.Done _ v -> case to v of Success a -> Just a _ -> Nothing _ -> Nothing {-# INLINE decodeWith #-} decodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString -> Maybe a decodeStrictWith p to s = case either Error to (A.parseOnly p s) of Success a -> Just a _ -> Nothing {-# INLINE decodeStrictWith #-} eitherDecodeWith :: Parser Value -> (Value -> IResult a) -> L.ByteString -> Either (JSONPath, String) a eitherDecodeWith p to s = case L.parse p s of L.Done _ v -> case to v of ISuccess a -> Right a IError path msg -> Left (path, msg) L.Fail notparsed ctx msg -> Left ([], buildMsg notparsed ctx msg) where buildMsg :: L.ByteString -> [String] -> String -> String buildMsg notYetParsed [] msg = msg ++ formatErrorLine notYetParsed buildMsg notYetParsed (expectation:_) msg = msg ++ ". Expecting " ++ expectation ++ formatErrorLine notYetParsed {-# INLINE eitherDecodeWith #-} -- | Grab the first 100 bytes from the non parsed portion and -- format to get nicer error messages formatErrorLine :: L.ByteString -> String formatErrorLine bs = C.unpack . -- if formatting results in empty ByteString just return that -- otherwise construct the error message with the bytestring builder (\bs' -> if BSL.null bs' then BSL.empty else B.toLazyByteString $ B.stringUtf8 " at '" <> B.lazyByteString bs' <> B.stringUtf8 "'" ) . -- if newline is present cut at that position BSL.takeWhile (10 /=) . -- remove spaces, CR's, tabs, backslashes and quotes characters BSL.filter (`notElem` [9, 13, 32, 34, 47, 92]) . -- take 100 bytes BSL.take 100 $ bs eitherDecodeStrictWith :: Parser Value -> (Value -> IResult a) -> B.ByteString -> Either (JSONPath, String) a eitherDecodeStrictWith p to s = case either (IError []) to (A.parseOnly p s) of ISuccess a -> Right a IError path msg -> Left (path, msg) {-# INLINE eitherDecodeStrictWith #-} -- $lazy -- -- The 'json' and 'value' parsers decouple identification from -- conversion. Identification occurs immediately (so that an invalid -- JSON document can be rejected as early as possible), but conversion -- to a Haskell value is deferred until that value is needed. -- -- This decoupling can be time-efficient if only a smallish subset of -- elements in a JSON value need to be inspected, since the cost of -- conversion is zero for uninspected elements. The trade off is an -- increase in memory usage, due to allocation of thunks for values -- that have not yet been converted. -- $strict -- -- The 'json'' and 'value'' parsers combine identification with -- conversion. They consume more CPU cycles up front, but have a -- smaller memory footprint. -- | Parse a top-level JSON value followed by optional whitespace and -- end-of-input. See also: 'json'. jsonEOF :: Parser Value jsonEOF = json <* skipSpace <* endOfInput -- | Parse a top-level JSON value followed by optional whitespace and -- end-of-input. See also: 'json''. jsonEOF' :: Parser Value jsonEOF' = json' <* skipSpace <* endOfInput -- | The only valid whitespace in a JSON document is space, newline, -- carriage return, and tab. skipSpace :: Parser () skipSpace = A.skipWhile $ \w -> w == W8.SPACE || w == W8.LF || w == W8.CR || w == W8.TAB {-# INLINE skipSpace #-} ------------------ Copy-pasted and adapted from attoparsec ------------------ -- A strict pair data SP = SP !Integer {-# UNPACK #-}!Int decimal0 :: Parser Integer decimal0 = do digits <- A.takeWhile1 isDigit_w8 if B.length digits > 1 && B.unsafeHead digits == W8.DIGIT_0 then fail "leading zero" else return (byteStringToInteger digits) -- | Parse a JSON number. scientific :: Parser Scientific scientific = do sign <- A.peekWord8' let !positive = not (sign == W8.HYPHEN) when (sign == W8.PLUS || sign == W8.HYPHEN) $ void A.anyWord8 n <- decimal0 let f fracDigits = SP (B.foldl' step n fracDigits) (negate $ B.length fracDigits) step a w = a * 10 + fromIntegral (w - W8.DIGIT_0) dotty <- A.peekWord8 SP c e <- case dotty of Just W8.PERIOD -> A.anyWord8 *> (f <$> A.takeWhile1 isDigit_w8) _ -> pure (SP n 0) let !signedCoeff | positive = c | otherwise = -c (A.satisfy (\ex -> case ex of W8.LOWER_E -> True; W8.UPPER_E -> True; _ -> False) *> fmap (Sci.scientific signedCoeff . (e +)) (signed decimal)) <|> return (Sci.scientific signedCoeff e) {-# INLINE scientific #-}