module Language.Python.Pickle where
import Control.Applicative ((<$>), (*>))
import Control.Monad.State
import Control.Monad.Writer
import qualified Data.ByteString as S
import Data.Attoparsec hiding (take)
import qualified Data.Attoparsec as A
import Data.Int (Int32)
import Data.IntMap (IntMap)
import qualified Data.IntMap as IM
import Data.List (foldl')
import Data.Map (Map)
import qualified Data.Map as M
import Data.Serialize.Get (getWord16le, getWord32le, getWord64be, runGet)
import Data.Serialize.Put (runPut, putByteString, putWord8, putWord16le, putWord32le, putWord64be, Put)
import Data.Word (Word32, Word64)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (castPtr)
import Foreign.Storable (peek)
import System.IO.Unsafe (unsafePerformIO)
unpickle :: S.ByteString -> Either String Value
unpickle s = do
xs <- parseOnly (string "\128\STX" *> many1 (choice opcodes)) s
unpickle' xs
pickle :: Value -> S.ByteString
pickle value = runPut $ do
putByteString "\128\STX"
mapM_ serialize . runPickler $ pickle' value
putByteString "."
opcodes :: [Parser OpCode]
empty_dict, empty_list, empty_tuple, tuple1, tuple2 :: Parser OpCode
binput, mark :: Parser OpCode
binint, binint1, binint2, binfloat :: Parser OpCode
short_binstring :: Parser OpCode
setitem, setitems :: Parser OpCode
append, appends :: Parser OpCode
stop :: Parser OpCode
opcodes =
[ empty_dict, empty_list, empty_tuple, tuple1, tuple2
, binput, mark
, binint, binint1, binint2, binfloat
, short_binstring
, setitem, setitems
, append, appends
, stop
]
empty_dict = string "}" *> return EMPTY_DICT
empty_list = string "]" *> return EMPTY_LIST
empty_tuple = string ")" *> return EMPTY_TUPLE
binput = string "q" *> (BINPUT . fromIntegral <$> anyWord8)
mark = string "(" *> return MARK
binint = string "J" *> (BININT <$> int4)
binint1 = string "K" *> (BININT1 . fromIntegral <$> anyWord8)
binint2 = string "M" *> (BININT2 <$> uint2)
binfloat = string "G" *> (BINFLOAT <$> float8)
short_binstring = do
_ <- string "U"
i <- fromIntegral <$> anyWord8
s <- A.take i
return $ SHORT_BINSTRING s
tuple1 = string "\133" *> return TUPLE1
tuple2 = string "\134" *> return TUPLE2
setitem = string "s" *> return SETITEM
setitems = string "u" *> return SETITEMS
append = string "a" *> return APPEND
appends = string "e" *> return APPENDS
stop = string "." *> return STOP
float8 :: Parser Double
float8 = do
w <- runGet getWord64be <$> A.take 8
case w of
Left err -> fail err
Right x -> return $ coerce x
where
coerce :: Word64 -> Double
coerce x = unsafePerformIO $ with x $ \p ->
peek (castPtr p) :: IO Double
int4 :: Parser Int
int4 = do
w <- runGet getWord32le <$> A.take 4
case w of
Left err -> fail err
Right x -> return . fromIntegral $ coerce x
where
coerce :: Word32 -> Int32
coerce x = unsafePerformIO $ with x $ \p ->
peek (castPtr p) :: IO Int32
uint2 :: Parser Int
uint2 = do
w <- runGet getWord16le <$> A.take 2
case w of
Left err -> fail err
Right x -> return $ fromIntegral x
serialize :: OpCode -> Put
serialize opcode = case opcode of
BINPUT i -> putByteString "q" >> putWord8 (fromIntegral i)
BININT i -> putByteString "J" >> putWord32le (fromIntegral i)
BININT1 i -> putByteString "K" >> putWord8 (fromIntegral i)
BININT2 i -> putByteString "M" >> putUint2 i
BINFLOAT d -> putByteString "G" >> putFloat8 d
SHORT_BINSTRING s -> do
putByteString "U"
putWord8 . fromIntegral $ S.length s
putByteString s
EMPTY_DICT -> putByteString "}"
EMPTY_LIST -> putByteString "]"
EMPTY_TUPLE -> putByteString ")"
TUPLE1 -> putByteString "\133"
TUPLE2 -> putByteString "\134"
MARK -> putByteString "("
SETITEM -> putByteString "s"
SETITEMS -> putByteString "u"
APPEND -> putByteString "a"
APPENDS -> putByteString "e"
x -> error $ "serialize: " ++ show x
putFloat8 :: Double -> Put
putFloat8 d = putWord64be (coerce d)
where
coerce :: Double -> Word64
coerce x = unsafePerformIO $ with x $ \p ->
peek (castPtr p) :: IO Word64
putUint2 :: Int -> Put
putUint2 d = putWord16le (fromIntegral d)
data OpCode =
EMPTY_DICT
| EMPTY_LIST
| EMPTY_TUPLE
| BINPUT Int
| MARK
| BININT Int
| BININT1 Int
| BININT2 Int
| BINFLOAT Double
| SHORT_BINSTRING S.ByteString
| TUPLE1
| TUPLE2
| SETITEM
| SETITEMS
| APPEND
| APPENDS
| STOP
deriving Show
data Value =
Dict (Map Value Value)
| List [Value]
| Tuple [Value]
| BinInt Int
| BinFloat Double
| BinString S.ByteString
| MarkObject
deriving (Eq, Ord, Show)
unpickle' :: [OpCode] -> Either String Value
unpickle' xs = execute xs [] (IM.empty)
type Stack = [Value]
type Memo = IntMap Value
execute :: [OpCode] -> Stack -> Memo -> Either String Value
execute [] [value] _ = Right value
execute (op:ops) stack memo = case executeOne op stack memo of
Left err -> Left err
Right (stack', memo') -> execute ops stack' memo'
execute _ _ _ = Left "`execute` unimplemented"
executePartial :: [OpCode] -> Stack -> Memo -> (Stack, Memo, [OpCode])
executePartial [] stack memo = (stack, memo, [])
executePartial (op:ops) stack memo = case executeOne op stack memo of
Left _ -> (stack, memo, op:ops)
Right (stack', memo') -> executePartial ops stack' memo'
executeOne :: OpCode -> Stack -> Memo -> Either String (Stack, Memo)
executeOne EMPTY_DICT stack memo = return (Dict M.empty: stack, memo)
executeOne EMPTY_LIST stack memo = return (List []: stack, memo)
executeOne EMPTY_TUPLE stack memo = return (Tuple []: stack, memo)
executeOne (BINPUT i) (s:stack) memo = return (s:stack, IM.insert i s memo)
executeOne (BININT i) stack memo = return (BinInt i:stack, memo)
executeOne (BININT1 i) stack memo = return (BinInt i:stack, memo)
executeOne (BININT2 i) stack memo = return (BinInt i:stack, memo)
executeOne (BINFLOAT d) stack memo = return (BinFloat d:stack, memo)
executeOne (SHORT_BINSTRING s) stack memo = return (BinString s:stack, memo)
executeOne MARK stack memo = return (MarkObject:stack, memo)
executeOne TUPLE1 (a:stack) memo = return (Tuple [a]:stack, memo)
executeOne TUPLE2 (b:a:stack) memo = return (Tuple [a, b]:stack, memo)
executeOne SETITEM stack memo = executeSetitem stack memo
executeOne SETITEMS stack memo = executeSetitems [] stack memo
executeOne APPEND stack memo = executeAppend stack memo
executeOne APPENDS stack memo = executeAppends [] stack memo
executeOne STOP stack memo = Right (stack, memo)
executeOne _ _ _ = Left "`executeOne` unimplemented"
executeSetitem :: Monad m => Stack -> Memo -> m ([Value], Memo)
executeSetitem (v:k:Dict d:stack) memo = return (Dict (M.insert k v d):stack, memo)
executeSetitems :: Monad m => [(Value, Value)] -> Stack -> Memo -> m ([Value], Memo)
executeSetitems l (MarkObject:Dict d:stack) memo = return (l `addToDict` Dict d:stack, memo)
executeSetitems l (a:b:stack) memo = executeSetitems ((b, a) : l) stack memo
executeAppend :: Monad m => Stack -> Memo -> m ([Value], Memo)
executeAppend (x:List xs:stack) memo = return (List (xs ++ [x]):stack, memo)
executeAppends :: Monad m => [Value] -> Stack -> Memo -> m ([Value], Memo)
executeAppends l (MarkObject:List xs:stack) memo = return (List (xs ++ l):stack, memo)
executeAppends l (x:stack) memo = executeAppends (x : l) stack memo
addToDict :: [(Value, Value)] -> Value -> Value
addToDict l (Dict d) = Dict $ foldl' add d l
where add d' (k, v) = M.insert k v d'
newtype Pickler a = Pickler { runP :: WriterT [OpCode] (State (Map Value Int)) a }
deriving (Monad, MonadWriter [OpCode], MonadState (Map Value Int))
runPickler :: Pickler () -> [OpCode]
runPickler p = evalState (execWriterT (runP p)) M.empty
pickle' :: Value -> Pickler ()
pickle' value = case value of
Dict d -> pickleDict d
List xs -> pickleList xs
Tuple xs -> pickleTuple xs
BinInt i -> pickleBinInt i
BinFloat d -> pickleBinFloat d
BinString s -> pickleBinString s
x -> error $ "TODO: pickle " ++ show x
binput' :: Value -> Pickler ()
binput' value = do
i <- gets M.size
m <- get
put (M.insert value i m)
tell [BINPUT i]
pickleDict :: Map Value Value -> Pickler ()
pickleDict d = do
tell [EMPTY_DICT]
binput' (Dict M.empty)
let kvs = M.toList d
case kvs of
[] -> return ()
[(k,v)] -> pickle' k >> pickle' v >> tell [SETITEM]
_ -> do
tell [MARK]
mapM_ (\(k, v) -> pickle' k >> pickle' v) kvs
tell [SETITEMS]
pickleList :: [Value] -> Pickler ()
pickleList xs = do
tell [EMPTY_LIST]
binput' (List [])
case xs of
[] -> return ()
[x] -> pickle' x >> tell [APPEND]
_ -> do
tell [MARK]
mapM_ pickle' xs
tell [APPENDS]
pickleTuple :: [Value] -> Pickler ()
pickleTuple [] = tell [EMPTY_TUPLE]
pickleTuple [a] = do
pickle' a
tell [TUPLE1]
binput' (Tuple [a])
pickleTuple [a, b] = do
pickle' a
pickle' b
tell [TUPLE2]
binput' (Tuple [a, b])
pickleTuple _ = error "pickleTuple n TODO"
pickleBinInt :: Int -> Pickler ()
pickleBinInt i | i >= 0 && i < 256 = tell [BININT1 i]
| i >= 256 && i < 65536 = tell [BININT2 i]
| otherwise = tell [BININT i]
pickleBinFloat :: Double -> Pickler ()
pickleBinFloat d = do
tell [BINFLOAT d]
pickleBinString :: S.ByteString -> Pickler ()
pickleBinString s = do
tell [SHORT_BINSTRING s]
binput' (BinString s)
dictGet :: Value -> Value -> Either String (Maybe Value)
dictGet (Dict d) v = return $ M.lookup v d
dictGet _ _ = Left "dictGet: not a dict."
dictGet' :: Value -> Value -> Either String Value
dictGet' (Dict d) v = case M.lookup v d of
Just value -> return value
Nothing -> Left "dictGet': no such key."
dictGet' _ _ = Left "dictGet': not a dict."
dictGetString :: Value -> S.ByteString -> Either String S.ByteString
dictGetString (Dict d) s = case M.lookup (BinString s) d of
Just (BinString s') -> return s'
_ -> Left "dictGetString: not a dict, or no such key."