{-# LANGUAGE FlexibleInstances, FlexibleContexts #-} -- | This is a layer on top of "Data.Binary" with its own 'Binary' class -- and customised instances for 'Word', 'Int' and 'Double'. -- The 'Int' and 'Word' instance use a variable-length encoding to save space -- for small numbers. The 'Double' instance uses the standard IEEE754 encoding. module PGF.Data.Binary ( -- * The Binary class Binary(..) -- * The Get and Put monads , Get , Put, runPut -- * Useful helpers for writing instances , putWord8 , getWord8 , putWord16be , getWord16be -- * Binary serialisation , encode , decode -- * IO functions for serialisation , encodeFile , decodeFile , encodeFile_ , decodeFile_ -- * Useful , Word8, Word16 ) where import Data.Word import qualified Data.Binary as Bin import Data.Binary.Put import Data.Binary.Get import Data.Binary.IEEE754 ( putFloat64be, getFloat64be) import Control.Monad import Control.Exception import Foreign import System.IO import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as L --import Data.Char (chr,ord) --import Data.List (unfoldr) -- And needed for the instances: import qualified Data.ByteString as B import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.IntMap as IntMap import qualified Data.IntSet as IntSet --import qualified Data.Ratio as R --import qualified Data.Tree as T import Data.Array.Unboxed ------------------------------------------------------------------------ -- | The @Binary@ class provides 'put' and 'get', methods to encode and -- decode a Haskell value to a lazy ByteString. It mirrors the Read and -- Show classes for textual representation of Haskell types, and is -- suitable for serialising Haskell values to disk, over the network. -- -- For parsing and generating simple external binary formats (e.g. C -- structures), Binary may be used, but in general is not suitable -- for complex protocols. Instead use the Put and Get primitives -- directly. -- -- Instances of Binary should satisfy the following property: -- -- > decode . encode == id -- -- That is, the 'get' and 'put' methods should be the inverse of each -- other. A range of instances are provided for basic Haskell types. -- class Binary t where -- | Encode a value in the Put monad. put :: t -> Put -- | Decode a value in the Get monad get :: Get t ------------------------------------------------------------------------ -- Wrappers to run the underlying monad -- | Encode a value using binary serialisation to a lazy ByteString. -- encode :: Binary a => a -> ByteString encode = runPut . put {-# INLINE encode #-} -- | Decode a value from a lazy ByteString, reconstructing the original structure. -- decode :: Binary a => ByteString -> a decode = runGet get ------------------------------------------------------------------------ -- Convenience IO operations -- | Lazily serialise a value to a file -- -- This is just a convenience function, it's defined simply as: -- -- > encodeFile f = B.writeFile f . encode -- -- So for example if you wanted to compress as well, you could use: -- -- > B.writeFile f . compress . encode -- encodeFile :: Binary a => FilePath -> a -> IO () encodeFile f v = L.writeFile f (encode v) encodeFile_ :: FilePath -> Put -> IO () encodeFile_ f m = L.writeFile f (runPut m) -- | Lazily reconstruct a value previously written to a file. -- -- This is just a convenience function, it's defined simply as: -- -- > decodeFile f = return . decode =<< B.readFile f -- -- So for example if you wanted to decompress as well, you could use: -- -- > return . decode . decompress =<< B.readFile f -- decodeFile :: Binary a => FilePath -> IO a decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do s <- L.hGetContents h evaluate $ runGet get s decodeFile_ :: FilePath -> Get a -> IO a decodeFile_ f m = bracket (openBinaryFile f ReadMode) hClose $ \h -> do s <- L.hGetContents h evaluate $ runGet m s ------------------------------------------------------------------------ -- For ground types, the standard instances can be reused, -- but for container types it would imply using -- the standard instances for all types of values in the container... instance Binary () where put=Bin.put; get=Bin.get instance Binary Bool where put=Bin.put; get=Bin.get instance Binary Word8 where put=Bin.put; get=Bin.get instance Binary Word16 where put=Bin.put; get=Bin.get instance Binary Char where put=Bin.put; get=Bin.get -- -- GF doesn't need these: --instance Binary Ordering where put=Bin.put; get=Bin.get --instance Binary Word32 where put=Bin.put; get=Bin.get --instance Binary Word64 where put=Bin.put; get=Bin.get --instance Binary Int8 where put=Bin.put; get=Bin.get --instance Binary Int16 where put=Bin.put; get=Bin.get --instance Binary Int32 where put=Bin.put; get=Bin.get --instance Binary Int64 where put=Bin.put; get=Bin.get -- needed by instance Binary ByteString ------------------------------------------------------------------------ -- Words are written as sequence of bytes. The last bit of each -- byte indicates whether there are more bytes to be read instance Binary Word where put i | i <= 0x7f = do put a | i <= 0x3fff = do put (a .|. 0x80) put b | i <= 0x1fffff = do put (a .|. 0x80) put (b .|. 0x80) put c | i <= 0xfffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put d -- -- #if WORD_SIZE_IN_BITS < 64 | otherwise = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put e {- -- Restricted to 32 bits even on 64-bit systems, so that negative -- Ints are written as 5 bytes instead of 10 bytes (TH 2013-02-13) --#else | i <= 0x7ffffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put e | i <= 0x3ffffffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put (e .|. 0x80) put f | i <= 0x1ffffffffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put (e .|. 0x80) put (f .|. 0x80) put g | i <= 0xffffffffffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put (e .|. 0x80) put (f .|. 0x80) put (g .|. 0x80) put h | i <= 0xffffffffffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put (e .|. 0x80) put (f .|. 0x80) put (g .|. 0x80) put h | i <= 0x7fffffffffffffff = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put (e .|. 0x80) put (f .|. 0x80) put (g .|. 0x80) put (h .|. 0x80) put j | otherwise = do put (a .|. 0x80) put (b .|. 0x80) put (c .|. 0x80) put (d .|. 0x80) put (e .|. 0x80) put (f .|. 0x80) put (g .|. 0x80) put (h .|. 0x80) put (j .|. 0x80) put k -- #endif -} where a = fromIntegral ( i .&. 0x7f) :: Word8 b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8 c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8 d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8 e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8 {- f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8 g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8 h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8 j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8 k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8 -} get = do i <- getWord8 (if i <= 0x7f then return (fromIntegral i) else do n <- get return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f))) -- Int has the same representation as Word instance Binary Int where put i = put (fromIntegral i :: Word) get = liftM toInt32 (get :: Get Word) where -- restrict to 32 bits (for PGF portability, TH 2013-02-13) toInt32 w = fromIntegral (fromIntegral w::Int32)::Int ------------------------------------------------------------------------ -- -- Portable, and pretty efficient, serialisation of Integer -- -- Fixed-size type for a subset of Integer --type SmallInt = Int32 -- Integers are encoded in two ways: if they fit inside a SmallInt, -- they're written as a byte tag, and that value. If the Integer value -- is too large to fit in a SmallInt, it is written as a byte array, -- along with a sign and length field. {- instance Binary Integer where {-# INLINE put #-} put n | n >= lo && n <= hi = do putWord8 0 put (fromIntegral n :: SmallInt) -- fast path where lo = fromIntegral (minBound :: SmallInt) :: Integer hi = fromIntegral (maxBound :: SmallInt) :: Integer put n = do putWord8 1 put sign put (unroll (abs n)) -- unroll the bytes where sign = fromIntegral (signum n) :: Word8 {-# INLINE get #-} get = do tag <- get :: Get Word8 case tag of 0 -> liftM fromIntegral (get :: Get SmallInt) _ -> do sign <- get bytes <- get let v = roll bytes return $! if sign == (1 :: Word8) then v else - v -- -- Fold and unfold an Integer to and from a list of its bytes -- unroll :: Integer -> [Word8] unroll = unfoldr step where step 0 = Nothing step i = Just (fromIntegral i, i `shiftR` 8) roll :: [Word8] -> Integer roll = foldr unstep 0 where unstep b a = a `shiftL` 8 .|. fromIntegral b instance (Binary a,Integral a) => Binary (R.Ratio a) where put r = put (R.numerator r) >> put (R.denominator r) get = liftM2 (R.%) get get -} ------------------------------------------------------------------------ -- Instances for the first few tuples instance (Binary a, Binary b) => Binary (a,b) where put (a,b) = put a >> put b get = liftM2 (,) get get instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where put (a,b,c) = put a >> put b >> put c get = liftM3 (,,) get get get instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where put (a,b,c,d) = put a >> put b >> put c >> put d get = liftM4 (,,,) get get get get instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e get = liftM5 (,,,,) get get get get get -- -- and now just recurse: -- instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f) => Binary (a,b,c,d,e,f) where put (a,b,c,d,e,f) = put (a,(b,c,d,e,f)) get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f) instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g) => Binary (a,b,c,d,e,f,g) where put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g)) get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g) instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g, Binary h) => Binary (a,b,c,d,e,f,g,h) where put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h)) get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h) instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g, Binary h, Binary i) => Binary (a,b,c,d,e,f,g,h,i) where put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i)) get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i) instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g, Binary h, Binary i, Binary j) => Binary (a,b,c,d,e,f,g,h,i,j) where put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j)) get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j) ------------------------------------------------------------------------ -- Container types instance Binary a => Binary [a] where put l = put (length l) >> mapM_ put l get = do n <- get :: Get Int xs <- replicateM n get return xs instance (Binary a) => Binary (Maybe a) where put Nothing = putWord8 0 put (Just x) = putWord8 1 >> put x get = do w <- getWord8 case w of 0 -> return Nothing _ -> liftM Just get instance (Binary a, Binary b) => Binary (Either a b) where put (Left a) = putWord8 0 >> put a put (Right b) = putWord8 1 >> put b get = do w <- getWord8 case w of 0 -> liftM Left get _ -> liftM Right get ------------------------------------------------------------------------ -- ByteStrings (have specially efficient instances) instance Binary B.ByteString where put bs = do put (B.length bs) putByteString bs get = get >>= getByteString -- -- Using old versions of fps, this is a type synonym, and non portable -- -- Requires 'flexible instances' -- {- instance Binary ByteString where put bs = do put (fromIntegral (L.length bs) :: Int) putLazyByteString bs get = get >>= getLazyByteString -} ------------------------------------------------------------------------ -- Maps and Sets instance (Ord a, Binary a) => Binary (Set.Set a) where put s = put (Set.size s) >> mapM_ put (Set.toAscList s) get = liftM Set.fromDistinctAscList get instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where put m = put (Map.size m) >> mapM_ put (Map.toAscList m) get = liftM Map.fromDistinctAscList get instance Binary IntSet.IntSet where put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s) get = liftM IntSet.fromDistinctAscList get instance (Binary e) => Binary (IntMap.IntMap e) where put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m) get = liftM IntMap.fromDistinctAscList get ------------------------------------------------------------------------ -- Floating point -- instance Binary Double where -- put d = put (decodeFloat d) -- get = liftM2 encodeFloat get get instance Binary Double where put = putFloat64be get = getFloat64be {- instance Binary Float where put f = put (decodeFloat f) get = liftM2 encodeFloat get get -} ------------------------------------------------------------------------ -- Trees {- instance (Binary e) => Binary (T.Tree e) where put (T.Node r s) = put r >> put s get = liftM2 T.Node get get -} ------------------------------------------------------------------------ -- Arrays instance (Binary i, Ix i, Binary e) => Binary (Array i e) where put a = do put (bounds a) put (rangeSize $ bounds a) -- write the length mapM_ put (elems a) -- now the elems. get = do bs <- get n <- get -- read the length xs <- replicateM n get -- now the elems. return (listArray bs xs) -- -- The IArray UArray e constraint is non portable. Requires flexible instances -- instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where put a = do put (bounds a) put (rangeSize $ bounds a) -- now write the length mapM_ put (elems a) get = do bs <- get n <- get xs <- replicateM n get return (listArray bs xs)