{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} -- | -- Module : Data.JsonStream.Parser -- License : BSD-style -- -- Maintainer : palkovsky.ondrej@gmail.com -- Stability : experimental -- Portability : portable -- -- An incremental applicative-style JSON parser, suitable for high performance -- memory efficient stream parsing. -- -- The parser is using "Data.Aeson" types and 'FromJSON' instance, it can be -- easily combined with aeson monadic parsing instances when appropriate. module Data.JsonStream.Parser ( -- * How to use this library -- $use -- * Performance -- $performance -- * Constant space decoding -- $constant -- * Aeson compatibility -- $aeson -- * The @Parser@ type Parser , ParseOutput(..) -- * Parsing functions , runParser , runParser' , parseByteString , parseLazyByteString -- * Aeson in-place replacement functions , decode , eitherDecode , decodeStrict , eitherDecodeStrict -- * FromJSON parser , value , string , byteString -- * Constant space parsers , safeString , number , integer , real , bool , jNull , safeByteString -- * Structure operators , (.:) , (.:?) , (.|) , (.!) -- * Structure parsers , objectWithKey , objectItems , objectValues , arrayOf , arrayWithIndexOf , indexedArrayOf , nullable -- * Parsing modifiers , filterI , takeI , mapWithFailure -- * SAX-like parsers , arrayFound , objectFound ) where #if !MIN_VERSION_bytestring(0,10,6) import Data.Monoid (Monoid, mappend, mempty) #endif #if MIN_VERSION_base(4,10,0) import Data.Semigroup (Semigroup(..)) #endif import Control.Applicative import qualified Data.Aeson as AE import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.ByteString.Lazy.Internal as BL import Data.Char (isSpace) #if MIN_VERSION_aeson(2,0,0) import qualified Data.Aeson.KeyMap as AEK import qualified Data.Aeson.Key as AEK import Data.Bifunctor (first) #else import qualified Data.HashMap.Strict as HMap #endif import Data.Scientific (Scientific, isInteger, toBoundedInteger, toRealFloat) import qualified Data.Text as T import qualified Data.Vector as Vec import Foreign.C.Types import Data.JsonStream.CLexer import Data.JsonStream.TokenParser -- | Limit for the size of an object key objectKeyStringLimit :: Int objectKeyStringLimit = 65536 -- | Private parsing result data ParseResult v = MoreData (Parser v, BS.ByteString -> TokenResult) | Failed String | Done BS.ByteString TokenResult -- The bytestring is remaining unparsed data, we need to return it somehow | Yield v (ParseResult v) instance Functor ParseResult where fmap f (MoreData (np, ntok)) = MoreData (fmap f np, ntok) fmap _ (Failed err) = Failed err fmap _ (Done ctx tok) = Done ctx tok fmap f (Yield v np) = Yield (f v) (fmap f np) -- | A representation of the parser. newtype Parser a = Parser { callParse :: TokenResult -> ParseResult a } instance Functor Parser where fmap f (Parser p) = Parser $ \d -> fmap f (p d) -- | Yield list of results, finish with last action yieldResults :: [a] -> ParseResult a -> ParseResult a yieldResults values end = foldr Yield end values -- | '<*>' will run both parsers in parallel and combine results. -- -- It behaves as a list functor (produces all combinations), but the typical -- use is: -- -- >>> :set -XOverloadedStrings -- >>> let text = "[{\"name\": \"John\", \"age\": 20}, {\"age\": 30, \"name\": \"Frank\"}]" -- >>> let parser = arrayOf $ (,) <$> "name" .: string <*> "age" .: integer -- >>> parseByteString parser text :: [(T.Text,Int)] -- [("John",20),("Frank",30)] instance Applicative Parser where pure x = Parser $ \tok -> process (callParse ignoreVal tok) where process (Failed err) = Failed err process (Done ctx tok) = Yield x (Done ctx tok) process (MoreData (np, ntok)) = MoreData (Parser (process . callParse np), ntok) process _ = Failed "Internal error in pure, ignoreVal doesn't yield" (<*>) m1 m2 = Parser $ \tok -> process ([], []) (callParse m1 tok) (callParse m2 tok) where process ([], _) (Done ctx ntok) _ = Done ctx ntok -- Optimize, return immediately when first parser fails process (lst1, lst2) (Yield v np1) p2 = process (v:lst1, lst2) np1 p2 process (lst1, lst2) p1 (Yield v np2) = process (lst1, v:lst2) p1 np2 process (lst1, lst2) (Done ctx ntok) (Done {}) = yieldResults [ mx my | mx <- reverse lst1, my <- reverse lst2 ] (Done ctx ntok) process lsts (MoreData (np1, ntok1)) (MoreData (np2, _)) = MoreData (Parser (\tok -> process lsts (callParse np1 tok) (callParse np2 tok)), ntok1) process _ (Failed err) _ = Failed err process _ _ (Failed err) = Failed err process _ _ _ = Failed "Unexpected error in parallel processing <*>." -- | '<>' will run both parsers in parallel yielding from both as the data comes -- -- >>> :m +Data.Monoid -- >>> let test = "[{\"key1\": [1,2], \"key2\": [5,6], \"key3\": [8,9]}]" -- >>> let parser = arrayOf $ "key1" .: (arrayOf value) <> "key2" .: (arrayOf value) -- >>> parseByteString parser test :: [Int] -- [1,2,5,6] #if MIN_VERSION_base(4,10,0) instance Monoid (Parser a) where mempty = ignoreVal mappend = (<>) instance Semigroup (Parser a) where (<>) m1 m2 = #else instance Monoid (Parser a) where mempty = ignoreVal mappend m1 m2 = #endif Parser $ \tok -> process (callParse m1 tok) (callParse m2 tok) where process (Yield v np1) p2 = Yield v (process np1 p2) process p1 (Yield v np2) = Yield v (process p1 np2) process (Done ctx ntok) Done {} = Done ctx ntok process (MoreData (np1, ntok)) (MoreData (np2, _)) = MoreData (Parser $ \tok -> process (callParse np1 tok) (callParse np2 tok), ntok) process (Failed err) _ = Failed err process _ (Failed err) = Failed err process _ _ = Failed "Unexpected error in parallel processing <|>" -- | Match items from the first parser, if none is matched, return items -- from the second parser. Constant-space if second parser returns -- constant number of items. '.|' is implemented using this operator. -- -- >>> let json = "[{\"key1\": [1,2], \"key2\": [5,6], \"key3\": [8,9]}]" -- >>> let parser = arrayOf $ "key1" .: (arrayOf value) <|> "key2" .: (arrayOf value) -- >>> parseByteString parser json :: [Int] -- [1,2] -- >>> let parser = arrayOf $ "key-non" .: (arrayOf value) <|> "key2" .: (arrayOf value) -- >>> parseByteString parser json :: [Int] -- [5,6] -- -- 'many' - Gather matches and return them as list. -- -- >>> let json = "[{\"keys\":[1,2], \"values\":[5,6]}, {\"keys\":[9,8], \"values\":[7,6]}]" -- >>> let parser = arrayOf $ (,) <$> many ("keys" .: arrayOf integer) <*> many ("values" .: arrayOf integer) -- >>> parseByteString parser json :: [([Int], [Int])] -- [([1,2],[5,6]),([9,8],[7,6])] instance Alternative Parser where empty = ignoreVal m1 <|> m2 = Parser $ \tok -> process [] (callParse m1 tok) (Just $ callParse m2 tok) where -- First returned item -> disable second parser process _ (Yield v np1) _ = Yield v (process [] np1 Nothing) -- First done with disabled second -> exit process _ (Done ctx ntok) Nothing = Done ctx ntok -- Both done but second not disabled -> yield items from the second process lst (Done ctx ntok) (Just (Done {})) = yieldResults (reverse lst) (Done ctx ntok) -- Second yield - remember data process lst np1 (Just (Yield v np2)) = process (v:lst) np1 (Just np2) -- Moredata processing process lst (MoreData (np1, ntok)) Nothing = MoreData (Parser $ \tok -> process lst (callParse np1 tok) Nothing, ntok) process lst (MoreData (np1, ntok)) (Just (MoreData (np2, _))) = MoreData (Parser $ \tok -> process lst (callParse np1 tok) (Just $ callParse np2 tok), ntok) process _ (Failed err) _ = Failed err process _ _ (Just (Failed err)) = Failed err process _ _ _ = Failed "Unexpected error in parallel processing <|>" some = filterI (not . null) . many many f = Parser $ \ntok -> loop id (callParse f ntok) where loop acc (Done ctx ntp) = Yield (acc []) (Done ctx ntp) loop acc (MoreData (Parser np, ntok)) = MoreData (Parser (loop acc . np), ntok) loop acc (Yield v np) = loop (\nxt -> acc (v : nxt)) np loop _ (Failed err) = Failed err array' :: (Int -> Parser a) -> Parser a array' valparse = Parser $ \tp -> case tp of (PartialResult ArrayBegin ntp) -> moreData (nextitem 0) ntp (PartialResult _ _) -> callParse ignoreVal tp -- Run ignoreval parser on the same output we got (TokMoreData ntok) -> MoreData (array' valparse, ntok) (TokFailed) -> Failed "Array - token failed" where nextitem !_ _ (ArrayEnd ctx) ntok = Done ctx ntok nextitem !i tok _ _ = arrcontent i (callParse (valparse i) tok) arrcontent !i (Done _ ntp) = moreData (nextitem (i+1)) ntp arrcontent !i (MoreData (Parser np, ntp)) = MoreData (Parser (arrcontent i . np), ntp) arrcontent !i (Yield v np) = Yield v (arrcontent i np) arrcontent !_ (Failed err) = Failed err -- | Match all items of an array. arrayOf :: Parser a -> Parser a arrayOf valparse = array' (const valparse) -- | Generate start/end objects when an element is found, in between run a parser. -- The inner parser is not run if an array is not found. elemFound :: Element -> a -> a -> Parser a -> Parser a elemFound elsearch start end parser = Parser $ moreData handle where handle tok el _ | el == elsearch = Yield start (parseAndAppend (callParse parser tok)) handle tok _ _ = callParse ignoreVal tok parseAndAppend (Failed err) = Failed err parseAndAppend (Yield v np) = Yield v (parseAndAppend np) parseAndAppend (MoreData (Parser np, ntp)) = MoreData (Parser (parseAndAppend . np), ntp) parseAndAppend (Done ctx ntp) = Yield end (Done ctx ntp) -- | Generate start/end values when an object is found, in between run a parser. -- The inner parser is not run if an array is not found. objectFound :: a -> a -> Parser a -> Parser a objectFound = elemFound ObjectBegin -- | Generate start/end values when an array is found, in between run a parser. -- The inner parser is not run if an array is not found. -- -- >>> let test = "[[1,2,3],true,[],false,{\"key\":1}]" :: BS.ByteString -- >>> parseByteString (arrayOf (arrayFound 10 20 (1 .! integer))) test :: [Int] -- [10,2,20,10,20] arrayFound :: a -> a -> Parser a -> Parser a arrayFound = elemFound ArrayBegin -- | Match nith item in an array. arrayWithIndexOf :: Int -> Parser a -> Parser a arrayWithIndexOf idx valparse = array' itemFn where itemFn aidx | aidx == idx = valparse | otherwise = ignoreVal -- | Match all items of an array, add index to output. indexedArrayOf :: Parser a -> Parser (Int, a) indexedArrayOf valparse = array' (\(!key) -> (key,) <$> valparse) -- | Go through an object; if once is True, yield only first success, then ignore the rest object' :: Bool -> (T.Text -> Parser a) -> Parser a object' once valparse = Parser $ \tp -> case tp of (PartialResult ObjectBegin ntp) -> moreData (nextitem False) ntp (PartialResult _ _) -> callParse ignoreVal tp -- Run ignoreval parser on the same output we got (TokMoreData ntok) -> MoreData (object' once valparse, ntok) TokFailed -> Failed "Array - token failed" where nextitem _ _ (ObjectEnd ctx) ntok = Done ctx ntok nextitem yielded _ (JValue (AE.String key)) ntok = objcontent yielded (callParse (valparse key) ntok) nextitem yielded _ (StringRaw bs) ntok = case unescapeText bs of Right t -> objcontent yielded (callParse (valparse t) ntok) Left e -> Failed (show e) nextitem yielded _ (StringContent str) ntok = objcontent yielded $ moreData (getLongKey [str] (BS.length str)) ntok nextitem _ _ el _ = Failed $ "Object - unexpected item: " ++ show el -- If we already yielded and should yield once, ignore the rest of the object objcontent yielded (Done _ ntp) | once && yielded = callParse (ignoreVal' 1) ntp | otherwise = moreData (nextitem yielded) ntp -- Reset to next value objcontent yielded (MoreData (Parser np, ntok)) = MoreData (Parser (objcontent yielded. np), ntok) objcontent _ (Yield v np) = Yield v (objcontent True np) objcontent _ (Failed err) = Failed err getLongKey acc !len _ el ntok = case el of StringEnd | Right key <- unescapeText (BS.concat $ reverse acc) -> callParse (valparse key) ntok | otherwise -> Failed "Error decoding UTF8" StringContent str | len > objectKeyStringLimit -> callParse (ignoreStrRestThen ignoreVal) ntok | otherwise -> moreData (getLongKey (str:acc) (len + BS.length str)) ntok _ -> Failed "Object longstr - lexer failed." -- | Helper function to deduplicate TokMoreData/FokFailed logic moreData :: (TokenResult -> Element -> TokenResult -> ParseResult v) -> TokenResult -> ParseResult v moreData parser tok = case tok of PartialResult el ntok -> parser tok el ntok TokMoreData ntok -> MoreData (Parser (moreData parser), ntok) TokFailed -> Failed "More data - lexer failed." -- | Match all key-value pairs of an object, return them as a tuple. -- If the source object defines same key multiple times, all values -- are matched. objectItems :: Parser a -> Parser (T.Text, a) objectItems valparse = object' False $ \(!key) -> (key,) <$> valparse -- | Match all key-value pairs of an object, return only values. -- If the source object defines same key multiple times, all values -- are matched. Keys are ignored. objectValues :: Parser a -> Parser a objectValues valparse = object' False (const valparse) -- | Match only specific key of an object. -- This function will return only the first matched value in an object even -- if the source JSON defines the key multiple times (in violation of the specification). objectWithKey :: T.Text -> Parser a -> Parser a objectWithKey name valparse = object' True itemFn where itemFn key | key == name = valparse | otherwise = ignoreVal -- | Parses underlying values and generates a AE.Value aeValue :: Parser AE.Value aeValue = Parser $ moreData value' where #if MIN_VERSION_aeson(2,0,0) tomap = AEK.fromList . map (first AEK.fromText) #else tomap = HMap.fromList #endif value' tok el ntok = case el of JValue val -> Yield val (Done "" ntok) JInteger val -> Yield (AE.Number $ fromIntegral val) (Done "" ntok) StringContent _ -> callParse (AE.String <$> longString Nothing) tok StringRaw bs -> case unescapeText bs of Right t -> Yield (AE.String t) (Done "" ntok) Left e -> Failed (show e) ArrayBegin -> AE.Array . Vec.fromList <$> callParse (many (arrayOf aeValue)) tok ObjectBegin -> AE.Object . tomap <$> callParse (manyReverse (objectItems aeValue)) tok _ -> Failed ("aeValue - unexpected token: " ++ show el) -- | Optimized function for aeson objects - evades reversing the objects manyReverse :: Parser a -> Parser [a] manyReverse f = Parser $ \ntok -> loop [] (callParse f ntok) where loop acc (Done ctx ntp) = Yield acc (Done ctx ntp) loop acc (MoreData (Parser np, ntok)) = MoreData (Parser (loop acc . np), ntok) loop acc (Yield v np) = loop (v : acc) np loop _ (Failed err) = Failed err -- | Convert a strict aeson value (no object/array) to a value. -- Non-matching type is ignored and not parsed (unlike 'value') jvalue :: (AE.Value -> Maybe a) -> (CLong -> Maybe a) -> Parser a jvalue convert cvtint = Parser (moreData value') where value' tok el ntok = case el of JValue val | Just convValue <- convert val -> Yield convValue (Done "" ntok) | otherwise -> Done "" ntok JInteger val | Just convValue <- cvtint val -> Yield convValue (Done "" ntok) | otherwise -> Done "" ntok _ -> callParse ignoreVal tok longByteString :: Maybe Int -> Parser BS.ByteString longByteString mbounds = Parser $ moreData (handle id 0) where handle acc !len tok el ntok = case el of JValue (AE.String _) -> Failed "INTERNAL ERROR! - got decoded JValue instead of string" StringRaw bs -> Yield bs (Done "" ntok) StringContent str | (Just bounds) <- mbounds, len > bounds -- If the string exceeds bounds, discard it -> callParse (ignoreStrRestThen (Parser $ Done "")) ntok | otherwise -> moreData (handle (acc . (str:)) (len + BS.length str)) ntok StringEnd -> Yield (BS.concat (acc [])) (Done "" ntok) _ -> callParse ignoreVal tok -- | Parse raw bytestring value (json string expected), skip parsing otherwise. -- The returned value is not unescaped. byteString :: Parser BS.ByteString byteString = longByteString Nothing -- | Stops parsing string after the limit is reached. The string will not be matched -- if it exceeds the size. The size is the size of escaped string including escape -- characters. -- The return value is not unescaped. safeByteString :: Int -> Parser BS.ByteString safeByteString limit = longByteString (Just limit) -- | Match a possibly bounded string roughly limited by a limit longString :: Maybe Int -> Parser T.Text longString mbounds = Parser $ moreData (handle id 0) where handle acc !len tok el ntok = case el of JValue (AE.String str) -> Yield str (Done "" ntok) StringRaw bs -> case unescapeText bs of Right t -> Yield t (Done "" ntok) Left e -> Failed (show e) StringContent str | (Just bounds) <- mbounds, len > bounds -- If the string exceeds bounds, discard it -> callParse (ignoreStrRestThen (Parser $ Done "")) ntok | otherwise -> moreData (handle (acc . (str:)) (len + BS.length str)) ntok StringEnd | Right val <- unescapeText (BS.concat (acc [])) -> Yield val (Done "" ntok) | otherwise -> Failed "Error decoding UTF8" _ -> callParse ignoreVal tok -- | Parse string value, skip parsing otherwise. string :: Parser T.Text string = longString Nothing -- | Stops parsing string after the limit is reached. The string will not be matched -- if it exceeds the size. The size is the size of escaped string including escape -- characters. safeString :: Int -> Parser T.Text safeString limit = longString (Just limit) -- | Parse number, return in scientific format. number :: Parser Scientific number = jvalue cvt (Just . fromIntegral) where cvt (AE.Number num) = Just num cvt _ = Nothing -- | Parse to bounded integer type (not 'Integer'). -- If you are using integer numbers, use this parser. -- It skips the conversion JSON -> 'Scientific' -> 'Int' and uses an 'Int' directly. integer :: forall i. (Integral i, Bounded i) => Parser i integer = jvalue cvt clongToBounded where clmax = toInteger (maxBound :: CLong) clmin = toInteger (minBound :: CLong) imax = toInteger (maxBound :: i) imin = toInteger (minBound :: i) -- Int is generally CLong, so we get this clongIsSmaller = clmax <= imax && clmin >= imin -- If partial, we have to convert to Integer to do the checking clongIsPartial = clmax < imax || clmin > imin inBounds num | clongIsPartial = toInteger num <= imax && toInteger num >= imin | otherwise = num <= fromIntegral (maxBound :: i) && num >= fromIntegral (minBound :: i) clongToBounded :: CLong -> Maybe i clongToBounded num | clongIsSmaller || inBounds num = Just (fromIntegral num) | otherwise = Nothing cvt (AE.Number num) | isInteger num = toBoundedInteger num cvt _ = Nothing -- | Parse to float/double. real :: RealFloat a => Parser a real = jvalue cvt (Just . fromIntegral) where cvt (AE.Number num) = Just $ toRealFloat num cvt _ = Nothing -- | Parse bool, skip if the type is not bool. bool :: Parser Bool bool = jvalue cvt (const Nothing) where cvt (AE.Bool b) = Just b cvt _ = Nothing -- | Match a null value. jNull :: Parser () jNull = jvalue cvt (const Nothing) where cvt (AE.Null) = Just () cvt _ = Nothing -- | Parses a field with a possible null value. nullable :: Parser a -> Parser (Maybe a) nullable valparse = Parser (moreData value') where value' _ (JValue AE.Null) ntok = Yield Nothing (Done "" ntok) value' tok _ _ = callParse (Just <$> valparse) tok -- | Match 'FromJSON' value. Calls parseJSON on the parsed value. -- -- >>> let json = "[{\"key1\": [1,2], \"key2\": [5,6]}]" -- >>> parseByteString (arrayOf value) json :: [AE.Value] -- [Object (fromList [("key2",Array [Number 5.0,Number 6.0]),("key1",Array [Number 1.0,Number 2.0])])] value :: AE.FromJSON a => Parser a value = Parser $ \ntok -> loop (callParse aeValue ntok) where loop (Done ctx ntp) = Done ctx ntp loop (Failed err) = Failed err loop (MoreData (Parser np, ntok)) = MoreData (Parser (loop . np), ntok) loop (Yield v np) = case AE.fromJSON v of AE.Error _ -> loop np AE.Success res -> Yield res (loop np) -- | Take maximum n matching items. -- -- >>> parseByteString (takeI 3 $ arrayOf integer) "[1,2,3,4,5,6,7,8,9,0]" :: [Int] -- [1,2,3] takeI :: Int -> Parser a -> Parser a takeI num valparse = Parser $ \tok -> loop num (callParse valparse tok) where loop _ (Done ctx ntp) = Done ctx ntp loop _ (Failed err) = Failed err loop n (MoreData (Parser np, ntok)) = MoreData (Parser (loop n . np), ntok) loop 0 (Yield _ np) = loop 0 np loop n (Yield v np) = Yield v (loop (n-1) np) -- | Skip rest of string + call next parser ignoreStrRestThen :: Parser a -> Parser a ignoreStrRestThen next = Parser $ moreData handle where handle _ el ntok = case el of StringContent _ -> moreData handle ntok StringEnd -> callParse next ntok _ -> Failed "Unexpected result in ignoreStrRestPlusOne" -- | Skip value; cheat to avoid parsing and make it faster ignoreVal :: Parser a ignoreVal = ignoreVal' 0 ignoreVal' :: Int -> Parser a ignoreVal' stval = Parser $ moreData (handleTok stval) where handleLongString level _ (StringContent _) ntok = moreData (handleLongString level) ntok handleLongString 0 _ StringEnd ntok = Done "" ntok handleLongString level _ StringEnd ntok = moreData (handleTok level) ntok handleLongString _ _ el _ = Failed $ "Unexpected element in handleLongStr: " ++ show el handleTok :: Int -> TokenResult -> Element -> TokenResult -> ParseResult a handleTok 0 _ (JValue _) ntok = Done "" ntok handleTok 0 _ (StringRaw _) ntok = Done "" ntok handleTok 0 _ (JInteger _) ntok = Done "" ntok handleTok 0 _ (ArrayEnd _) _ = Failed "ArrayEnd in ignoreval on 0 level" handleTok 0 _ (ObjectEnd _) _ = Failed "ObjectEnd in ignoreval on 0 level" handleTok 1 _ (ArrayEnd ctx) ntok = Done ctx ntok handleTok 1 _ (ObjectEnd ctx) ntok = Done ctx ntok handleTok level _ el ntok = case el of JValue _ -> moreData (handleTok level) ntok JInteger _ -> moreData (handleTok level) ntok StringContent _ -> moreData (handleLongString level) ntok StringRaw _ -> moreData (handleTok level) ntok ArrayEnd _ -> moreData (handleTok (level - 1)) ntok ObjectEnd _ -> moreData (handleTok (level - 1)) ntok ArrayBegin -> moreData (handleTok (level + 1)) ntok ObjectBegin -> moreData (handleTok (level + 1)) ntok StringEnd -> Failed "Internal error - out of order StringEnd" -- | Let only items matching a condition pass. -- -- >>> parseByteString (filterI (>5) $ arrayOf integer) "[1,2,3,4,5,6,7,8,9,0]" :: [Int] -- [6,7,8,9] filterI :: (a -> Bool) -> Parser a -> Parser a filterI cond valparse = Parser $ \ntok -> loop (callParse valparse ntok) where loop (Done ctx ntp) = Done ctx ntp loop (Failed err) = Failed err loop (MoreData (Parser np, ntok)) = MoreData (Parser (loop . np), ntok) loop (Yield v np) | cond v = Yield v (loop np) | otherwise = loop np -- | A back-door for lifting of possibly failing actions. -- If an action fails with Left value, convert it into failure -- of parsing mapWithFailure :: (a -> Either String b) -> Parser a -> Parser b mapWithFailure mapping = updateParser where updateParser (Parser run) = Parser $ updateParseResult . run updateParseResult x = case x of MoreData (parser, continuation) -> MoreData (updateParser parser, continuation) Failed message -> Failed message Done a b -> Done a b Yield val parseResult -> case mapping val of Left message -> Failed message Right val' -> Yield val' (updateParseResult parseResult) --- Convenience operators -- | Synonym for 'objectWithKey'. Matches key in an object. The '.:' operators can be chained. -- -- >>> let json = "{\"key1\": {\"nested-key\": 3}}" -- >>> parseByteString ("key1" .: "nested-key" .: integer) json :: [Int] -- [3] (.:) :: T.Text -> Parser a -> Parser a (.:) = objectWithKey infixr 7 .: -- | Returns 'Nothing' if value is null or does not exist or match. Otherwise returns 'Just' value. -- -- > key .:? val = optional (key .: val) (.:?) :: T.Text -> Parser a -> Parser (Maybe a) key .:? val = optional (key .: val) infixr 7 .:? -- | Return default value if the parsers on the left hand didn't produce a result. -- -- > p .| defval = p <|> pure defval -- -- The operator works on complete left side, the following statements are equal: -- -- > Record <$> "key1" .: "nested-key" .: value .| defaultValue -- > Record <$> (("key1" .: "nested-key" .: value) .| defaultValue) (.|) :: Parser a -> a -> Parser a p .| defval = p <|> pure defval infixl 6 .| -- | Synonym for 'arrayWithIndexOf'. Matches n-th item in array. -- -- >>> parseByteString (arrayOf (1 .! bool)) "[ [1,true,null], [2,false], [3]]" :: [Bool] -- [True,False] (.!) :: Int -> Parser a -> Parser a (.!) = arrayWithIndexOf infixr 7 .! --- -- | Result of parsing. Contains continuations to continue parsing. data ParseOutput a = ParseYield a (ParseOutput a) -- ^ Returns a value from a parser. | ParseNeedData (BS.ByteString -> ParseOutput a) -- ^ Parser needs more data to continue parsing. | ParseFailed String -- ^ Parsing failed, error is reported. | ParseDone BS.ByteString -- ^ Parsing finished, unparsed data is returned. -- | Run streaming parser with initial input. runParser' :: Parser a -> BS.ByteString -> ParseOutput a runParser' parser startdata = parse $ callParse parser (tokenParser startdata) where parse (MoreData (np, ntok)) = ParseNeedData (parse . callParse np .ntok) parse (Failed err) = ParseFailed err parse (Yield v np) = ParseYield v (parse np) parse (Done ctx _) = ParseDone ctx -- | Run streaming parser, immediately returns 'ParseNeedData'. runParser :: Parser a -> ParseOutput a runParser parser = runParser' parser BS.empty -- | Parse a bytestring, generate lazy list of parsed values. If an error occurs, throws an exception. -- -- >>> parseByteString (arrayOf integer) "[1,2,3,4]" :: [Int] -- [1,2,3,4] -- -- >>> parseByteString (arrayOf ("name" .: string)) "[{\"name\":\"KIWI\"}, {\"name\":\"BIRD\"}]" -- ["KIWI","BIRD"] parseByteString :: Parser a -> BS.ByteString -> [a] parseByteString parser startdata = loop (runParser' parser startdata) where loop (ParseNeedData _) = error "Not enough data." loop (ParseDone _) = [] loop (ParseFailed err) = error err loop (ParseYield v np) = v : loop np -- | Parse a lazy bytestring, generate lazy list of parsed values. If an error occurs, throws an exception. parseLazyByteString :: Parser a -> BL.ByteString -> [a] parseLazyByteString parser input = loop input (runParser parser) where loop BL.Empty (ParseNeedData _) = error "Not enough data." loop (BL.Chunk dta rest) (ParseNeedData np) = loop rest (np dta) loop _ (ParseDone _) = [] loop _ (ParseFailed err) = error err loop rest (ParseYield v np) = v : loop rest np -- | Deserialize a JSON value from lazy 'BL.ByteString'. -- -- If this fails due to incomplete or invalid input, 'Nothing' is returned. -- -- The input must consist solely of a JSON document, with no trailing data except for whitespace. decode :: AE.FromJSON a => BL.ByteString -> Maybe a decode bs = case eitherDecode bs of Right val -> Just val Left _ -> Nothing -- | Like 'decode' but returns an error message when decoding fails. eitherDecode :: AE.FromJSON a => BL.ByteString -> Either String a eitherDecode bs = loop bs (runParser value) where loop BL.Empty (ParseNeedData _) = Left "Not enough data." loop (BL.Chunk dta rest) (ParseNeedData np) = loop rest (np dta) loop _ (ParseDone _) = Left "Nothing parsed." loop _ (ParseFailed err) = Left err loop rest (ParseYield v next) = checkExit v next rest checkExit v (ParseDone srest) rest | BS.all isSpace srest && BL.all isSpace rest = Right v | otherwise = Left "Data followed by non-whitespace characters." checkExit _ (ParseYield _ _) _ = Left "Multiple value parses?" checkExit _ (ParseFailed err) _ = Left err checkExit _ (ParseNeedData _) BL.Empty = Left "Incomplete json structure." checkExit v (ParseNeedData cont) (BL.Chunk dta rest) = checkExit v (cont dta) rest -- | Like 'decode', but on strict 'BS.ByteString' decodeStrict :: AE.FromJSON a => BS.ByteString -> Maybe a decodeStrict bs = case eitherDecodeStrict bs of Right val -> Just val Left _ -> Nothing -- | Like 'eitherDecode', but on strict 'BS.ByteString' eitherDecodeStrict :: AE.FromJSON a => BS.ByteString -> Either String a eitherDecodeStrict bs = case runParser' value bs of ParseYield next v -> checkExit v next ParseNeedData _ -> Left "Incomplete json structure." ParseFailed err -> Left err ParseDone _ -> Left "No data found." where checkExit (ParseDone rest) v | BS.all isSpace rest = Right v checkExit _ _ = Left "Data folowed by non-whitespace characters." -- $use -- -- >>> parseByteString value "[1,2,3]" :: [[Int]] -- [[1,2,3]] -- -- The 'value' parser matches any 'AE.FromJSON' value. The above command is essentially -- identical to the aeson decode function; the parsing process can generate more -- objects, therefore the results is [a]. -- -- Example of json-stream style parsing: -- -- >>> parseByteString (arrayOf integer) "[1,2,3]" :: [Int] -- [1,2,3] -- -- Parsers can be combinated using '<*>' and '<|>' operators. The parsers are -- run in parallel and return combinations of the parsed values. -- -- >>> let text = "[{\"name\": \"John\", \"age\": 20}, {\"age\": 30, \"name\": \"Frank\"} ]" -- >>> let parser = arrayOf $ (,) <$> "name" .: string <*> "age" .: integer -- >>> parseByteString parser text :: [(T.Text,Int)] -- [("John",20),("Frank",30)] -- -- When parsing larger values, it is advisable to use lazy ByteStrings. The parsing -- is then more memory efficient as less lexical state -- is needed to be held in memory for parallel parsers. -- -- More examples are available on . -- $constant -- Constant space decoding is possible if the grammar does not specify non-constant -- operations. The non-constant operations are 'value', 'string', 'many' and in some instances -- '<*>'. -- -- The 'value' parser works by creating an aeson AST and passing it to the -- 'parseJSON' method. The AST can consume a lot of memory before it is rejected -- in 'parseJSON'. To achieve constant space the parsers 'safeString', 'number', 'integer', -- 'real' and 'bool' -- must be used; these parsers reject and do not parse data if it does not match the -- type. -- -- The object key length is limited to ~64K. Object records with longer key are ignored and unparsed. -- -- Numbers are limited to 200.000 digits. Longer numbers will make the parsing fail. -- -- The 'many' parser works by accumulating all matched values. Obviously, number -- of such values influences the amount of used memory. -- -- The '<*>' operator runs both parsers in parallel and when they are both done, it -- produces combinations of the received values. It is constant-space as long as the -- number of element produced by child parsers is limited by a constant. This can be achieved by using -- '.!' and '.:' functions combined with constant space -- parsers or limiting the number of returned elements with 'takeI'. -- -- If the source object contains an object with multiple keys with a same name, -- json-stream matches the key multiple times. The only exception -- is 'objectWithKey' ('.:' and '.:?') that return at most one value for a given key. -- $aeson -- The parser uses internally "Data.Aeson" types, so that the FromJSON instances are -- directly usable with the 'value' parser. It may be more convenient to parse the -- outer structure with json-stream and the inner objects with aeson as long as constant-space -- decoding is not required. -- -- Json-stream defines the object-access operators '.:', '.:?' -- but in a slightly different albeit more natural way. New operators are '.!' for -- array access and '.|' to handle missing values. -- -- >>> let test = "[{\"name\": \"test1\", \"value\": 1}, {\"name\": \"test2\", \"value\": null}, {\"name\": \"test3\"}]" -- >>> let person = (,) <$> "name" .: string <*> "value" .: integer .| (-1) -- >>> let people = arrayOf person -- >>> parseByteString people test :: [(T.Text, Int)] -- [("test1",1),("test2",-1),("test3",-1)] -- $performance -- The parser tries to do the least amount of work to get the job done, skipping over items that -- are not required. General guidelines to get best performance: -- -- Do not use the 'value' parser for the whole object if the object is big. Do not use json-stream -- applicative parsing for creating objects if they have lots of records, unless you are skipping -- large part of the structure. Every '<*>' causes parallel parsing, too many parallel parsers -- kill performance. -- -- > arrayOf value :: Parser MyStructure -- MyStructure with FromJSON instance -- -- will probably behave better than -- -- > arrayOf $ MyStructure <$> "field1" .: string <*> "field2" .: integer <*> .... <*> "field20" .: string -- -- and also better (at least memory-wise) than -- -- > value :: Parser [MyStructure] -- -- unless the structure has hundreths of fields and you are parsing only a substructure. -- -- The 'integer' parser was optimized in such -- a way that the integer numbers skip the conversion to 'Scientific', resulting in a slightly -- faster speed. -- -- It is possible to use the '*>' operator to filter objects based on a condition, e.g.: -- -- > arrayOf $ id <$> "error" .: number -- > *> "name" .: string -- -- This will return all objects that contain attribute error with number content. The parser will -- skip trying to decode the name attribute if error is not found. --