{-# LANGUAGE CPP #-} -------------------------------------------------------------------- -- | -- Module : Text.JSON.Parsec -- Copyright : (c) Galois, Inc. 2007-2009, Duncan Coutts 2015 -- -- -- Minimal implementation of Canonical JSON. -- -- -- -- A \"canonical JSON\" format is provided in order to provide meaningful and -- repeatable hashes of JSON-encoded data. Canonical JSON is parsable with any -- full JSON parser, but security-conscious applications will want to verify -- that input is in canonical form before authenticating any hash or signature -- on that input. -- -- This implementation is derived from the json parser from the json package, -- with simplifications to meet the Canonical JSON grammar. -- -- TODO: Known bugs/limitations: -- -- * Decoding/encoding Unicode code-points beyond @U+00ff@ is currently broken -- module Text.JSON.Canonical ( JSValue(..) , Int54 , parseCanonicalJSON , renderCanonicalJSON , prettyCanonicalJSON ) where import Text.ParserCombinators.Parsec ( CharParser, (<|>), (), many, between, sepBy , satisfy, char, string, digit, spaces , parse ) import Text.PrettyPrint hiding (char) import qualified Text.PrettyPrint as Doc #if !(MIN_VERSION_base(4,7,0)) import Control.Applicative ((<$>), (<$), pure, (<*>), (<*), (*>)) #endif import Control.Arrow (first) import Data.Bits (Bits) #if MIN_VERSION_base(4,7,0) import Data.Bits (FiniteBits) #endif import Data.Char (isDigit, digitToInt) import Data.Data (Data) import Data.Function (on) import Data.Int (Int64) import Data.Ix (Ix) import Data.List (foldl', sortBy) import Data.Typeable (Typeable) import Foreign.Storable (Storable) import Text.Printf (PrintfArg) import qualified Data.ByteString.Lazy.Char8 as BS data JSValue = JSNull | JSBool !Bool | JSNum !Int54 | JSString String | JSArray [JSValue] | JSObject [(String, JSValue)] deriving (Show, Read, Eq, Ord) -- | 54-bit integer values -- -- JavaScript can only safely represent numbers between @-(2^53 - 1)@ and -- @2^53 - 1@. -- -- TODO: Although we introduce the type here, we don't actually do any bounds -- checking and just inherit all type class instance from Int64. We should -- probably define `fromInteger` to do bounds checking, give different instances -- for type classes such as `Bounded` and `FiniteBits`, etc. newtype Int54 = Int54 { int54ToInt64 :: Int64 } deriving ( Enum , Eq , Integral , Data , Num , Ord , Real , Ix #if MIN_VERSION_base(4,7,0) , FiniteBits #endif , Bits , Storable , PrintfArg , Typeable ) instance Bounded Int54 where maxBound = Int54 ( 2^(53 :: Int) - 1) minBound = Int54 (-(2^(53 :: Int) - 1)) instance Show Int54 where show = show . int54ToInt64 instance Read Int54 where readsPrec p = map (first Int54) . readsPrec p ------------------------------------------------------------------------------ -- rendering flat -- -- | Render a JSON value in canonical form. This rendered form is canonical -- and so allows repeatable hashes. -- -- For pretty printing, see prettyCanonicalJSON. -- -- NB: Canonical JSON's string escaping rules deviate from RFC 7159 -- JSON which requires -- -- "All Unicode characters may be placed within the quotation -- marks, except for the characters that must be escaped: quotation -- mark, reverse solidus, and the control characters (@U+0000@ -- through @U+001F@)." -- -- Whereas the current specification of Canonical JSON explicitly -- requires to violate this by only escaping the quotation mark and -- the reverse solidus. This, however, contradicts Canonical JSON's -- statement that "Canonical JSON is parsable with any full JSON -- parser" -- -- Consequently, Canonical JSON is not a proper subset of RFC 7159. -- renderCanonicalJSON :: JSValue -> BS.ByteString renderCanonicalJSON v = BS.pack (s_value v []) s_value :: JSValue -> ShowS s_value JSNull = showString "null" s_value (JSBool False) = showString "false" s_value (JSBool True) = showString "true" s_value (JSNum n) = shows n s_value (JSString s) = s_string s s_value (JSArray vs) = s_array vs s_value (JSObject fs) = s_object (sortBy (compare `on` fst) fs) s_string :: String -> ShowS s_string s = showChar '"' . showl s where showl [] = showChar '"' showl (c:cs) = s_char c . showl cs s_char '"' = showChar '\\' . showChar '"' s_char '\\' = showChar '\\' . showChar '\\' s_char c = showChar c s_array :: [JSValue] -> ShowS s_array [] = showString "[]" s_array (v0:vs0) = showChar '[' . s_value v0 . showl vs0 where showl [] = showChar ']' showl (v:vs) = showChar ',' . s_value v . showl vs s_object :: [(String, JSValue)] -> ShowS s_object [] = showString "{}" s_object ((k0,v0):kvs0) = showChar '{' . s_string k0 . showChar ':' . s_value v0 . showl kvs0 where showl [] = showChar '}' showl ((k,v):kvs) = showChar ',' . s_string k . showChar ':' . s_value v . showl kvs ------------------------------------------------------------------------------ -- parsing -- -- | Parse a canonical JSON format string as a JSON value. The input string -- does not have to be in canonical form, just in the \"canonical JSON\" -- format. -- -- Use 'renderCanonicalJSON' to convert into canonical form. -- parseCanonicalJSON :: BS.ByteString -> Either String JSValue parseCanonicalJSON = either (Left . show) Right . parse p_value "" . BS.unpack p_value :: CharParser () JSValue p_value = spaces *> p_jvalue tok :: CharParser () a -> CharParser () a tok p = p <* spaces {- value: string number object array true false null -} p_jvalue :: CharParser () JSValue p_jvalue = (JSNull <$ p_null) <|> (JSBool <$> p_boolean) <|> (JSArray <$> p_array) <|> (JSString <$> p_string) <|> (JSObject <$> p_object) <|> (JSNum <$> p_number) "JSON value" p_null :: CharParser () () p_null = tok (string "null") >> return () p_boolean :: CharParser () Bool p_boolean = tok ( (True <$ string "true") <|> (False <$ string "false") ) {- array: [] [ elements ] elements: value value , elements -} p_array :: CharParser () [JSValue] p_array = between (tok (char '[')) (tok (char ']')) $ p_jvalue `sepBy` tok (char ',') {- string: "" " chars " chars: char char chars char: any byte except hex 22 (") or hex 5C (\) \\ \" -} p_string :: CharParser () String p_string = between (char '"') (tok (char '"')) (many p_char) where p_char = (char '\\' >> p_esc) <|> (satisfy (\x -> x /= '"' && x /= '\\')) p_esc = ('"' <$ char '"') <|> ('\\' <$ char '\\') "escape character" {- object: {} { members } members: pair pair , members pair: string : value -} p_object :: CharParser () [(String,JSValue)] p_object = between (tok (char '{')) (tok (char '}')) $ p_field `sepBy` tok (char ',') where p_field = (,) <$> (p_string <* tok (char ':')) <*> p_jvalue {- number: int int: digit digit1-9 digits - digit1-9 - digit1-9 digits digits: digit digit digits -} -- | Parse an int -- -- TODO: Currently this allows for a maximum of 15 digits (i.e. a maximum value -- of @999,999,999,999,999@) as a crude approximation of the 'Int54' range. p_number :: CharParser () Int54 p_number = tok ( (char '-' *> (negate <$> pnat)) <|> pnat <|> zero ) where pnat = (\d ds -> strToInt (d:ds)) <$> digit19 <*> manyN 14 digit digit19 = satisfy (\c -> isDigit c && c /= '0') "digit" strToInt = foldl' (\x d -> 10*x + digitToInt54 d) 0 zero = 0 <$ char '0' digitToInt54 :: Char -> Int54 digitToInt54 = fromIntegral . digitToInt manyN :: Int -> CharParser () a -> CharParser () [a] manyN 0 _ = pure [] manyN n p = ((:) <$> p <*> manyN (n-1) p) <|> pure [] ------------------------------------------------------------------------------ -- rendering nicely -- -- | Render a JSON value in a reasonable human-readable form. This rendered -- form is /not the canonical form/ used for repeatable hashes, use -- 'renderCanonicalJSON' for that. -- It is suitable however as an external form as any canonical JSON parser can -- read it and convert it into the form used for repeatable hashes. -- prettyCanonicalJSON :: JSValue -> String prettyCanonicalJSON = render . jvalue jvalue :: JSValue -> Doc jvalue JSNull = text "null" jvalue (JSBool False) = text "false" jvalue (JSBool True) = text "true" jvalue (JSNum n) = integer (fromIntegral (int54ToInt64 n)) jvalue (JSString s) = jstring s jvalue (JSArray vs) = jarray vs jvalue (JSObject fs) = jobject fs jstring :: String -> Doc jstring = doubleQuotes . hcat . map jchar jchar :: Char -> Doc jchar '"' = Doc.char '\\' Doc.<> Doc.char '"' jchar '\\' = Doc.char '\\' Doc.<> Doc.char '\\' jchar c = Doc.char c jarray :: [JSValue] -> Doc jarray = sep . punctuate' lbrack comma rbrack . map jvalue jobject :: [(String, JSValue)] -> Doc jobject = sep . punctuate' lbrace comma rbrace . map (\(k,v) -> sep [jstring k Doc.<> colon, nest 2 (jvalue v)]) -- | Punctuate in this style: -- -- > [ foo, bar ] -- -- if it fits, or vertically otherwise: -- -- > [ foo -- > , bar -- > ] -- punctuate' :: Doc -> Doc -> Doc -> [Doc] -> [Doc] punctuate' l _ r [] = [l Doc.<> r] punctuate' l _ r [x] = [l <+> x <+> r] punctuate' l p r (x:xs) = l <+> x : go xs where go [] = [] go [y] = [p <+> y, r] go (y:ys) = (p <+> y) : go ys