{-# LANGUAGE ScopedTypeVariables , ParallelListComp #-} {-| Parse UTF-8 JSON into native Haskell types. -} module Text.JSONb.Decode where import Data.Char import Data.Ratio ((%)) import Prelude hiding (length, null, last, takeWhile) import Data.ByteString (length, append, empty, ByteString) import Data.ByteString.Char8 (snoc, cons, pack) import Control.Applicative hiding (empty) import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.Trie.Convenience as Trie import Data.Attoparsec (eitherResult) import Data.Attoparsec.Char8 ( choice, char, Parser, option, takeWhile1, takeWhile, skipMany, satisfy, signed, decimal, Result(..) ) import qualified Data.Attoparsec.Char8 as Attoparsec import Data.ByteString.Nums.Careless import Text.JSONb.Simple {-| Interpret a 'ByteString' as any JSON literal. -} decode :: ByteString -> Either String JSON decode bytes = (eitherResult . Attoparsec.parse json) bytes {-| Split out the first parseable JSON literal from the input, returning the result of the attempt along with the remainder of the input or the whole input if no parseable item was discovered. -} break :: ByteString -> (Either String JSON, ByteString) break bytes = case Attoparsec.parse json bytes of Done remainder result -> (Right result, remainder) Fail _ _ s -> (Left s, bytes) Partial _ -> (Left "Partial", bytes) {-| Tries to parse any JSON literal. -} json :: Parser JSON json = do whitespace choice [object, array, string, number, boolean, null] {-| Parse a JSON object (dictionary). -} object :: Parser JSON object = do char '{' whitespace Object . Trie.fromListS <$> choice [ whitespace >> char '}' >> return [] , properties [] ] where properties acc = do key <- string_literal whitespace char ':' something <- json whitespace let acc' = (key, something) : acc choice [ char ',' >> whitespace >> choice [ char '}' >> return acc' , properties acc' ] , char '}' >> return acc' ] {-| Parse a JSON array. -} array :: Parser JSON array = do char '[' Array <$> choice [ whitespace >> char ']' >> return [] , elements [] ] where elements acc = do something <- json whitespace let acc' = something : acc finish = char ']' >> return (reverse acc') choice [ char ',' >> whitespace >> choice [finish, elements acc'] , finish ] {-| Parses a string literal, unescaping as it goes. -} string :: Parser JSON string = String <$> string_literal {-| Parses a numeric literal to a @Rational@. -} number :: Parser JSON number = Number <$> do (sign :: Rational) <- (char '-' *> pure (-1)) <|> pure 1 i <- just_zero <|> positive_number f <- option 0 fractional e <- option 0 (exponentialE *> signed decimal) return (sign * (i + f) * (10^^e)) where exponentialE = char 'e' <|> char 'E' fractional = do c <- char '.' digits <- takeWhile1 isDigit return (int digits % (10^(length digits))) just_zero = char '0' *> pure 0 positive_number = pure ((int .) . cons) <*> satisfy hi <*> takeWhile isDigit where hi d = d > '0' && d <= '9' {-| Parse a JSON Boolean literal. -} boolean :: Parser JSON boolean = Boolean <$> choice [ s_as_b "true" >> pure True , s_as_b "false" >> pure False ] {-| Parse a JSON null literal. -} null :: Parser JSON null = s_as_b "null" >> return Null {-| Per RFC 4627, section 2 "JSON Grammar", only a limited set of whitespace characters actually count as insignificant whitespace. -} whitespace :: Parser () whitespace = skipMany (satisfy w) where w ' ' = True -- ASCII space. w '\n' = True -- Newline. w '\r' = True -- Carriage return. w '\t' = True -- Horizontal tab. w _ = False -- Not a JSON space. {-| Parse a JSON string literal and unescape it but don't wrap it in a string constructor (we might wrap it as a dict key instead). -} string_literal :: Parser ByteString string_literal = char '"' >> recurse empty where recurse acc = do text <- takeWhile (not . (`elem` "\\\"")) choice [ char '"' >> return (acc `append` text) , do char '\\' c <- escape_sequence recurse (acc `append` text `append` UTF8.fromString [c]) ] where escape_sequence = do choice [ c >> r | c <- fmap char "n/\"rfbt\\u" | r <- fmap return "\n/\"\r\f\b\t\\" ++ [u] ] where u = do (a,b,c,d) <- (,,,) <$> hex <*> hex <*> hex <*> hex return . toEnum $ a * 0x1000 + b * 0x100 + c * 0x10 + d * 0x1 where hex = choice digits where prep (n, chars) = fmap (fmap ((+n) . ord) . char) chars digits = concatMap prep [ (-48, ['0'..'9']) , (-55, ['A'..'F']) , (-87, ['a'..'f']) ] s_as_b s = Attoparsec.string (pack s)