{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-unused-binds #-} module PicoAeson ( aeson , value' ) where import Control.Applicative ((*>), (<$>), (<*), (<|>), liftA2, pure) import Common (pathTo) import Control.DeepSeq (NFData(..)) import Control.Monad (forM) import Data.Picoparsec (Parser, char, endOfInput, string) import Data.Picoparsec.Number (scientific) import Data.Bits ((.|.), shiftL) import Data.Char (chr, digitToInt, isSpace) import Data.Hashable (Hashable(..)) import Data.List (sort) import Data.Monoid.Textual (TextualMonoid, singleton) import qualified Data.Monoid.Textual as Textual import Data.Monoid.Instances.ByteString.Char8 () import Data.Monoid.Instances.ByteString.UTF8 (ByteStringUTF8(..)) import Data.Monoid.Instances.Positioned (OffsetPositioned, LinePositioned, extract) --, position, line, column) import Data.Monoid.Instances.Stateful (Stateful(Stateful)) import Data.Scientific (Scientific) import qualified Data.Text as T import qualified Data.Text.IO as T import Data.Vector as Vector (Vector, foldl', fromList) import System.Directory (getDirectoryContents) import System.FilePath ((), dropExtension) import qualified Data.Picoparsec as P import qualified Data.ByteString as B import qualified Data.HashMap.Strict as H import Criterion.Main data Result a = Error String | Success a deriving (Eq, Show) -- | A JSON \"object\" (key\/value map). type Object t = H.HashMap t (Value t) -- | A JSON \"array\" (sequence). type Array t = Vector (Value t) -- | A JSON value represented as a Haskell value. data Value t = Object !(Object t) | Array !(Array t) | String !t | Number !Scientific | Bool !Bool | Null deriving (Eq, Show) instance NFData t => NFData (Value t) where rnf (Object o) = rnf o rnf (Array a) = Vector.foldl' (\x y -> rnf y `seq` x) () a rnf (String s) = rnf s rnf (Number n) = rnf n rnf (Bool b) = rnf b rnf Null = () instance NFData ByteStringUTF8 where rnf (ByteStringUTF8 b) = rnf b instance Hashable ByteStringUTF8 where hashWithSalt i (ByteStringUTF8 b) = hashWithSalt i b instance NFData a => NFData (OffsetPositioned a) where rnf = rnf . extract instance Hashable a => Hashable (OffsetPositioned a) where hashWithSalt i = hashWithSalt i . extract instance NFData a => NFData (LinePositioned a) where rnf = rnf . extract instance Hashable a => Hashable (LinePositioned a) where hashWithSalt i = hashWithSalt i . extract instance (NFData a, NFData b) => NFData (Stateful a b) where rnf (Stateful p) = rnf p instance (Hashable a, Hashable b)=> Hashable (Stateful a b) where hashWithSalt i (Stateful p) = hashWithSalt i p -- | Parse a top-level JSON value. This must be either an object or -- an array, per RFC 4627. -- -- The conversion of a parsed value to a Haskell value is deferred -- until the Haskell value is needed. This may improve performance if -- only a subset of the results of conversions are needed, but at a -- cost in thunk allocation. json :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) json = json_ object_ array_ {-# INLINABLE json #-} -- | Parse a top-level JSON value. This must be either an object or -- an array, per RFC 4627. -- -- This is a strict version of 'json' which avoids building up thunks -- during parsing; it performs all conversions immediately. Prefer -- this version if most of the JSON data needs to be accessed. json' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) json' = json_ object_' array_' {-# SPECIALIZE json' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-} {-# SPECIALIZE json' :: Parser T.Text (Value T.Text) #-} {-# SPECIALIZE json' :: Parser B.ByteString (Value B.ByteString) #-} json_ :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Value t) -> Parser t (Value t) json_ obj ary = do w <- skipSpace *> P.satisfyChar (\c -> c == '{' || c == '[') if w == '{' then obj else ary {-# INLINE json_ #-} object_ :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value {-# INLINABLE object_ #-} object_' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) object_' = {-# SCC "object_'" #-} do !vals <- objectValues jstring' value' return (Object vals) where jstring' = do !s <- jstring return s {-# SPECIALIZE object_' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-} {-# SPECIALIZE object_' :: Parser T.Text (Value T.Text) #-} {-# SPECIALIZE object_' :: Parser B.ByteString (Value B.ByteString) #-} {-# SPECIALIZE object_' :: Parser (OffsetPositioned B.ByteString) (Value (OffsetPositioned B.ByteString)) #-} {-# SPECIALIZE object_' :: Parser (LinePositioned B.ByteString) (Value (LinePositioned B.ByteString)) #-} {-# SPECIALIZE object_' :: Parser (OffsetPositioned T.Text) (Value (OffsetPositioned T.Text)) #-} {-# SPECIALIZE object_' :: Parser (LinePositioned T.Text) (Value (LinePositioned T.Text)) #-} {-# SPECIALIZE object_' :: Parser (Stateful [Int] T.Text) (Value (Stateful [Int] T.Text)) #-} {-# INLINABLE object_' #-} objectValues :: (Eq t, Hashable t, TextualMonoid t) => Parser t t -> Parser t (Value t) -> Parser t (H.HashMap t (Value t)) objectValues str val = do skipSpace let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val) H.fromList <$> commaSeparated pair '}' {-# INLINABLE objectValues #-} array_ :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) array_ = {-# SCC "array_" #-} Array <$> arrayValues value {-# INLINABLE array_ #-} array_' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) array_' = {-# SCC "array_'" #-} do !vals <- arrayValues value' return (Array vals) {-# INLINABLE array_' #-} commaSeparated :: (Eq t, TextualMonoid t) => Parser t a -> Char -> Parser t [a] commaSeparated item end = {-# SCC "commaSeparated" #-} do c <- P.peekChar' if c == end then P.anyToken >> return [] else loop where loop = do v <- item <* skipSpace ch <- P.satisfyChar $ \w -> w == ',' || w == end if ch == ',' then skipSpace >> (v:) <$> loop else return [v] {-# INLINABLE commaSeparated #-} arrayValues :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Vector (Value t)) arrayValues val = {-# SCC "arrayValues" #-} do skipSpace Vector.fromList <$> commaSeparated val ']' {-# INLINABLE arrayValues #-} -- | Parse any JSON value. You should usually 'json' in preference to -- this function, as this function relaxes the object-or-array -- requirement of RFC 4627. -- -- In particular, be careful in using this function if you think your -- code might interoperate with Javascript. A naïve Javascript -- library that parses JSON data using @eval@ is vulnerable to attack -- unless the encoded data represents an object or an array. JSON -- implementations in other languages conform to that same restriction -- to preserve interoperability and security. value :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) value = do c <- P.peekChar' case c of '"' -> P.anyToken *> (String <$> jstring_) '{' -> P.anyToken *> object_ '[' -> P.anyToken *> array_ 'f' -> string "false" *> pure (Bool False) 't' -> string "true" *> pure (Bool True) 'n' -> string "null" *> pure Null _ | c >= '0' && c <= '9' || c == '-' -> Number <$> scientific | otherwise -> fail "not a valid json value" {-# INLINABLE value #-} -- | Strict version of 'value'. See also 'json''. value' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) value' = do c <- P.peekChar' case c of '"' -> do !s <- P.anyToken *> jstring_ return (String s) '{' -> P.anyToken *> object_' '[' -> P.anyToken *> array_' 'f' -> string "false" *> pure (Bool False) 't' -> string "true" *> pure (Bool True) 'n' -> string "null" *> pure Null _ | c >= '0' && c <= '9' || c == '-' -> do !n <- scientific return (Number n) | otherwise -> fail "not a valid json value" {-# SPECIALIZE value' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-} {-# SPECIALIZE value' :: Parser T.Text (Value T.Text) #-} {-# SPECIALIZE value' :: Parser B.ByteString (Value B.ByteString) #-} {-# SPECIALIZE value' :: Parser (OffsetPositioned B.ByteString) (Value (OffsetPositioned B.ByteString)) #-} {-# SPECIALIZE value' :: Parser (LinePositioned B.ByteString) (Value (LinePositioned B.ByteString)) #-} {-# SPECIALIZE value' :: Parser (OffsetPositioned T.Text) (Value (OffsetPositioned T.Text)) #-} {-# SPECIALIZE value' :: Parser (LinePositioned T.Text) (Value (LinePositioned T.Text)) #-} {-# SPECIALIZE value' :: Parser (Stateful [Int] T.Text) (Value (Stateful [Int] T.Text)) #-} {-# INLINABLE value' #-} -- | Parse a quoted JSON string. jstring :: TextualMonoid t => Parser t t jstring = char '"' *> jstring_ {-# INLINE jstring #-} unescape :: TextualMonoid t => Parser t t unescape = {-# SCC "unescape" #-} (P.satisfyChar (`elem` "\"\\/ntbrfu") <|> fail "invalid JSON escape sequence") >>= \e-> case e of '"' -> pure "\"" '\\' -> pure "\\" '/' -> pure "/" 'n' -> pure "\n" 't' -> pure "\t" 'b' -> pure "\b" 'r' -> pure "\r" 'f' -> pure "\f" 'u' -> do a <- hexQuad if a < 0xd800 || a > 0xdfff then pure (singleton $ chr a) else do b <- P.string "\\u" *> hexQuad if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff then let !c = ((a - 0xd800) `shiftL` 10) + (b - 0xdc00) + 0x10000 in pure (singleton $ chr c) else fail "invalid UTF-16 surrogates" _ -> fail "invalid JSON escape sequence" {-# INLINE unescape #-} hexQuad :: TextualMonoid t => Parser t Int hexQuad = {-# SCC "hexQuad" #-} do !s <- P.take 4 let q = Textual.foldl' (const $ const (-1)) extend 0 s :: Int if q < 0 then fail "invalid hex escape" else return q where extend n c = {-# SCC "extend" #-} n `shiftL` 4 .|. digitToInt c {-# INLINE hexQuad #-} -- | Parse a string without a leading quote. jstring_ :: TextualMonoid t => Parser t t jstring_ = {-# SCC "jstring_" #-} do s <- P.scanChars False $ \s c -> if s then Just False else if c == '"' then Nothing else Just (c == '\\') _ <- P.char '"' s1 <- if Textual.elem '\\' s then case P.parseOnly unescape s of Right r -> return r Left err -> fail err else return s return s1 {-# INLINE jstring_ #-} -- $lazy -- -- The 'json' and 'value' parsers decouple identification from -- conversion. Identification occurs immediately (so that an invalid -- JSON document can be rejected as early as possible), but conversion -- to a Haskell value is deferred until that value is needed. -- -- This decoupling can be time-efficient if only a smallish subset of -- elements in a JSON value need to be inspected, since the cost of -- conversion is zero for uninspected elements. The trade off is an -- increase in memory usage, due to allocation of thunks for values -- that have not yet been converted. -- $strict -- -- The 'json'' and 'value'' parsers combine identification with -- conversion. They consume more CPU cycles up front, but have a -- smaller memory footprint. skipSpace :: TextualMonoid t => Parser t () skipSpace = {-# SCC "skipSpace" #-} P.skipCharsWhile isSpace {-# INLINABLE skipSpace #-} -- | Parse a top-level JSON value followed by optional whitespace and -- end-of-input. See also: 'json'. jsonEOF :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) jsonEOF = json <* skipSpace <* endOfInput {-# INLINABLE jsonEOF #-} -- | Parse a top-level JSON value followed by optional whitespace and -- end-of-input. See also: 'json''. jsonEOF' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t) jsonEOF' = json' <* skipSpace <* endOfInput {-# INLINE jsonEOF' #-} aeson :: IO Benchmark aeson = do path <- pathTo "json-data" names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path benches1 <- forM names $ \name -> do bs <- B.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') $ ByteStringUTF8 bs benches2 <- forM names $ \name -> do t <- T.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') t benches3 <- forM names $ \name -> do bs <- B.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') bs benches4 <- forM names $ \name -> do bs <- B.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure bs :: OffsetPositioned B.ByteString) benches5 <- forM names $ \name -> do bs <- B.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure bs :: LinePositioned B.ByteString) benches6 <- forM names $ \name -> do t <- T.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure t :: OffsetPositioned T.Text) benches7 <- forM names $ \name -> do t <- T.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure t :: LinePositioned T.Text) benches8 <- forM names $ \name -> do t <- T.readFile (path name) return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure t :: Stateful [Int] T.Text) return $ bgroup "picoparsec-aeson" [ bgroup "ByteStringUTF8" benches1 , bgroup "Text" benches2 , bgroup "ByteString.Char8" benches3 , bgroup "OffsetByteString" benches4 , bgroup "LinedByteString" benches5 , bgroup "OffsetText" benches6 , bgroup "LinedText" benches7 , bgroup "StatefulText" benches8]