{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-unused-binds #-}

module PicoAeson
    (
      aeson
    , value'
    ) where

import Control.Applicative ((*>), (<$>), (<*), (<|>), liftA2, pure)
import Common (pathTo)
import Control.DeepSeq (NFData(..))
import Control.Monad (forM)
import Data.Picoparsec (Parser, char, endOfInput, string)
import Data.Picoparsec.Number (scientific)
import Data.Bits ((.|.), shiftL)
import Data.Char (chr, digitToInt, isSpace)
import Data.Hashable (Hashable(..))
import Data.List (sort)
import Data.Monoid.Textual (TextualMonoid, singleton)
import qualified Data.Monoid.Textual as Textual
import Data.Monoid.Instances.ByteString.Char8 ()
import Data.Monoid.Instances.ByteString.UTF8 (ByteStringUTF8(..))
import Data.Monoid.Instances.Positioned (OffsetPositioned, LinePositioned, extract) --, position, line, column)
import Data.Monoid.Instances.Stateful (Stateful(Stateful))
import Data.Scientific (Scientific)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Vector as Vector (Vector, foldl', fromList)
import System.Directory (getDirectoryContents)
import System.FilePath ((</>), dropExtension)
import qualified Data.Picoparsec as P
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as H
import Criterion.Main

data Result a = Error String
              | Success a
                deriving (Eq, Show)


-- | A JSON \"object\" (key\/value map).
type Object t = H.HashMap t (Value t)

-- | A JSON \"array\" (sequence).
type Array t = Vector (Value t)

-- | A JSON value represented as a Haskell value.
data Value t = Object !(Object t)
             | Array !(Array t)
             | String !t
             | Number !Scientific
             | Bool !Bool
             | Null
               deriving (Eq, Show)

instance NFData t => NFData (Value t) where
    rnf (Object o) = rnf o
    rnf (Array a)  = Vector.foldl' (\x y -> rnf y `seq` x) () a
    rnf (String s) = rnf s
    rnf (Number n) = rnf n
    rnf (Bool b)   = rnf b
    rnf Null       = ()


instance NFData ByteStringUTF8 where
  rnf (ByteStringUTF8 b) = rnf b

instance Hashable ByteStringUTF8 where
  hashWithSalt i (ByteStringUTF8 b) = hashWithSalt i b
  

instance NFData a => NFData (OffsetPositioned a) where
  rnf = rnf . extract

instance Hashable a => Hashable (OffsetPositioned a) where
  hashWithSalt i = hashWithSalt i . extract
  

instance NFData a => NFData (LinePositioned a) where
  rnf = rnf . extract

instance Hashable a => Hashable (LinePositioned a) where
  hashWithSalt i = hashWithSalt i . extract
  
instance (NFData a, NFData b) => NFData (Stateful a b) where
  rnf (Stateful p) = rnf p

instance (Hashable a, Hashable b)=> Hashable (Stateful a b) where
  hashWithSalt i (Stateful p) = hashWithSalt i p
  

-- | Parse a top-level JSON value.  This must be either an object or
-- an array, per RFC 4627.
--
-- The conversion of a parsed value to a Haskell value is deferred
-- until the Haskell value is needed.  This may improve performance if
-- only a subset of the results of conversions are needed, but at a
-- cost in thunk allocation.
json :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
json = json_ object_ array_
{-# INLINABLE json #-}

-- | Parse a top-level JSON value.  This must be either an object or
-- an array, per RFC 4627.
--
-- This is a strict version of 'json' which avoids building up thunks
-- during parsing; it performs all conversions immediately.  Prefer
-- this version if most of the JSON data needs to be accessed.
json' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
json' = json_ object_' array_'
{-# SPECIALIZE json' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-}
{-# SPECIALIZE json' :: Parser T.Text (Value T.Text) #-}
{-# SPECIALIZE json' :: Parser B.ByteString (Value B.ByteString) #-}

json_ :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Value t) -> Parser t (Value t)
json_ obj ary = do
  w <- skipSpace *> P.satisfyChar (\c -> c == '{' || c == '[')
  if w == '{'
    then obj
    else ary
{-# INLINE json_ #-}

object_ :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value
{-# INLINABLE object_ #-}

object_' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
object_' = {-# SCC "object_'" #-} do
  !vals <- objectValues jstring' value'
  return (Object vals)
 where
  jstring' = do
    !s <- jstring
    return s
{-# SPECIALIZE object_' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-}
{-# SPECIALIZE object_' :: Parser T.Text (Value T.Text) #-}
{-# SPECIALIZE object_' :: Parser B.ByteString (Value B.ByteString) #-}
{-# SPECIALIZE object_' :: Parser (OffsetPositioned B.ByteString) (Value (OffsetPositioned B.ByteString)) #-}
{-# SPECIALIZE object_' :: Parser (LinePositioned B.ByteString) (Value (LinePositioned B.ByteString)) #-}
{-# SPECIALIZE object_' :: Parser (OffsetPositioned T.Text) (Value (OffsetPositioned T.Text)) #-}
{-# SPECIALIZE object_' :: Parser (LinePositioned T.Text) (Value (LinePositioned T.Text)) #-}
{-# SPECIALIZE object_' :: Parser (Stateful [Int] T.Text) (Value (Stateful [Int] T.Text)) #-}
{-# INLINABLE object_' #-}

objectValues :: (Eq t, Hashable t, TextualMonoid t)
                => Parser t t -> Parser t (Value t) -> Parser t (H.HashMap t (Value t))
objectValues str val = do
  skipSpace
  let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val)
  H.fromList <$> commaSeparated pair '}'
{-# INLINABLE objectValues #-}

array_ :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
array_ = {-# SCC "array_" #-} Array <$> arrayValues value
{-# INLINABLE array_ #-}

array_' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
array_' = {-# SCC "array_'" #-} do
  !vals <- arrayValues value'
  return (Array vals)
{-# INLINABLE array_' #-}

commaSeparated :: (Eq t, TextualMonoid t) => Parser t a -> Char -> Parser t [a]
commaSeparated item end = {-# SCC "commaSeparated" #-} do
  c <- P.peekChar'
  if c == end
    then P.anyToken >> return []
    else loop
  where
    loop = do
      v <- item <* skipSpace
      ch <- P.satisfyChar $ \w -> w == ',' || w == end
      if ch == ','
        then skipSpace >> (v:) <$> loop
        else return [v]
{-# INLINABLE commaSeparated #-}

arrayValues :: (Eq t, TextualMonoid t) => Parser t (Value t) -> Parser t (Vector (Value t))
arrayValues val = {-# SCC "arrayValues" #-} do
  skipSpace
  Vector.fromList <$> commaSeparated val ']'
{-# INLINABLE arrayValues #-}

-- | Parse any JSON value.  You should usually 'json' in preference to
-- this function, as this function relaxes the object-or-array
-- requirement of RFC 4627.
--
-- In particular, be careful in using this function if you think your
-- code might interoperate with Javascript.  A na&#xef;ve Javascript
-- library that parses JSON data using @eval@ is vulnerable to attack
-- unless the encoded data represents an object or an array.  JSON
-- implementations in other languages conform to that same restriction
-- to preserve interoperability and security.
value :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
value = do
  c <- P.peekChar'
  case c of
    '"'   -> P.anyToken *> (String <$> jstring_)
    '{'   -> P.anyToken *> object_
    '['   -> P.anyToken *> array_
    'f'   -> string "false" *> pure (Bool False)
    't'   -> string "true" *> pure (Bool True)
    'n'   -> string "null" *> pure Null
    _      | c >= '0' && c <= '9' || c == '-'
          -> Number <$> scientific
           | otherwise -> fail "not a valid json value"
{-# INLINABLE value #-}

-- | Strict version of 'value'. See also 'json''.
value' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
value' = do
  c <- P.peekChar'
  case c of
    '"'   -> do
             !s <- P.anyToken *> jstring_
             return (String s)
    '{'   -> P.anyToken *> object_'
    '['   -> P.anyToken *> array_'
    'f'   -> string "false" *> pure (Bool False)
    't'   -> string "true" *> pure (Bool True)
    'n'   -> string "null" *> pure Null
    _      | c >= '0' && c <= '9' || c == '-'
          -> do
             !n <- scientific
             return (Number n)
           | otherwise -> fail "not a valid json value"
{-# SPECIALIZE value' :: Parser ByteStringUTF8 (Value ByteStringUTF8) #-}
{-# SPECIALIZE value' :: Parser T.Text (Value T.Text) #-}
{-# SPECIALIZE value' :: Parser B.ByteString (Value B.ByteString) #-}
{-# SPECIALIZE value' :: Parser (OffsetPositioned B.ByteString) (Value (OffsetPositioned B.ByteString)) #-}
{-# SPECIALIZE value' :: Parser (LinePositioned B.ByteString) (Value (LinePositioned B.ByteString)) #-}
{-# SPECIALIZE value' :: Parser (OffsetPositioned T.Text) (Value (OffsetPositioned T.Text)) #-}
{-# SPECIALIZE value' :: Parser (LinePositioned T.Text) (Value (LinePositioned T.Text)) #-}
{-# SPECIALIZE value' :: Parser (Stateful [Int] T.Text) (Value (Stateful [Int] T.Text)) #-}
{-# INLINABLE value' #-}

-- | Parse a quoted JSON string.
jstring :: TextualMonoid t => Parser t t
jstring = char '"' *> jstring_
{-# INLINE jstring #-}

unescape :: TextualMonoid t => Parser t t
unescape = {-# SCC "unescape" #-}
           (P.satisfyChar (`elem` "\"\\/ntbrfu")
            <|> fail "invalid JSON escape sequence")
           >>= \e-> case e
                    of '"' -> pure "\""
                       '\\' -> pure "\\"
                       '/' -> pure "/"
                       'n' -> pure "\n"
                       't' -> pure "\t"
                       'b' -> pure "\b"
                       'r' -> pure "\r"
                       'f' -> pure "\f"
                       'u' -> do a <- hexQuad
                                 if a < 0xd800 || a > 0xdfff
                                   then pure (singleton $ chr a)
                                   else do b <- P.string "\\u" *> hexQuad
                                           if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff
                                             then let !c = ((a - 0xd800) `shiftL` 10) + (b - 0xdc00) + 0x10000
                                                  in pure (singleton $ chr c)
                                             else fail "invalid UTF-16 surrogates"
                       _ -> fail "invalid JSON escape sequence"
{-# INLINE unescape #-}

hexQuad :: TextualMonoid t => Parser t Int
hexQuad = {-# SCC "hexQuad" #-}
          do !s <- P.take 4
             let q = Textual.foldl' (const $ const (-1)) extend 0 s :: Int
             if q < 0 then fail "invalid hex escape" else return q
  where extend n c = {-# SCC "extend" #-} n `shiftL` 4 .|. digitToInt c
{-# INLINE hexQuad #-}

-- | Parse a string without a leading quote.
jstring_ :: TextualMonoid t => Parser t t
jstring_ = {-# SCC "jstring_" #-} do
  s <- P.scanChars False $ \s c -> if s then Just False
                                        else if c == '"'
                                             then Nothing
                                             else Just (c == '\\')
  _ <- P.char '"'
  s1 <- if Textual.elem '\\' s
        then case P.parseOnly unescape s of
            Right r  -> return r
            Left err -> fail err
         else return s

  return s1
{-# INLINE jstring_ #-}

-- $lazy
--
-- The 'json' and 'value' parsers decouple identification from
-- conversion.  Identification occurs immediately (so that an invalid
-- JSON document can be rejected as early as possible), but conversion
-- to a Haskell value is deferred until that value is needed.
--
-- This decoupling can be time-efficient if only a smallish subset of
-- elements in a JSON value need to be inspected, since the cost of
-- conversion is zero for uninspected elements.  The trade off is an
-- increase in memory usage, due to allocation of thunks for values
-- that have not yet been converted.

-- $strict
--
-- The 'json'' and 'value'' parsers combine identification with
-- conversion.  They consume more CPU cycles up front, but have a
-- smaller memory footprint.

skipSpace :: TextualMonoid t => Parser t ()
skipSpace = {-# SCC "skipSpace" #-} P.skipCharsWhile isSpace
{-# INLINABLE skipSpace #-}

-- | Parse a top-level JSON value followed by optional whitespace and
-- end-of-input.  See also: 'json'.
jsonEOF :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
jsonEOF = json <* skipSpace <* endOfInput
{-# INLINABLE jsonEOF #-}

-- | Parse a top-level JSON value followed by optional whitespace and
-- end-of-input.  See also: 'json''.
jsonEOF' :: (Eq t, Hashable t, TextualMonoid t) => Parser t (Value t)
jsonEOF' = json' <* skipSpace <* endOfInput
{-# INLINE jsonEOF' #-}

aeson :: IO Benchmark
aeson = do
  path <- pathTo "json-data"
  names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path
  benches1 <- forM names $ \name -> do
    bs <- B.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') $ ByteStringUTF8 bs
  benches2 <- forM names $ \name -> do
    t <- T.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') t
  benches3 <- forM names $ \name -> do
    bs <- B.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') bs
  benches4 <- forM names $ \name -> do
    bs <- B.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure bs :: OffsetPositioned B.ByteString)
  benches5 <- forM names $ \name -> do
    bs <- B.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure bs :: LinePositioned B.ByteString)
  benches6 <- forM names $ \name -> do
    t <- T.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure t :: OffsetPositioned T.Text)
  benches7 <- forM names $ \name -> do
    t <- T.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure t :: LinePositioned T.Text)
  benches8 <- forM names $ \name -> do
    t <- T.readFile (path </> name)
    return . bench (dropExtension name) $ nf (P.parseOnly jsonEOF') (pure t :: Stateful [Int] T.Text)
  return $ bgroup "picoparsec-aeson"
    [ bgroup "ByteStringUTF8"   benches1
    , bgroup "Text"             benches2
    , bgroup "ByteString.Char8" benches3
    , bgroup "OffsetByteString" benches4
    , bgroup "LinedByteString"  benches5
    , bgroup "OffsetText"       benches6
    , bgroup "LinedText"        benches7
    , bgroup "StatefulText"     benches8]