-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Packing and unpacking flat tables.
--
-- Packing and unpacking flat tables.
@package repa-convert
@version 4.2.3.2
-- | Pre-defined data formats.
module Data.Repa.Convert.Formats
-- | Class of types that can be formatted in some default human readable
-- ASCII way.
class FormatAscii a where type FormatAscii' a where {
type family FormatAscii' a;
}
-- | Get the standard ASCII format for a value.
--
-- The element value itself is not demanded.
formatAscii :: FormatAscii a => a -> FormatAscii' a
-- | A particular ASCII string.
data UnitAsc
UnitAsc :: String -> UnitAsc
-- | Maybe a raw list of characters, or something else.
data MaybeChars f
MaybeChars :: String -> f -> MaybeChars f
-- | Maybe a raw sequence of bytes, or something else.
data MaybeBytes f
MaybeBytes :: ByteString -> f -> MaybeBytes f
-- | Fixed length sequence of characters, represented as a (hated) Haskell
-- String.
--
--
-- - The runtime performance of the Haskell String is atrocious.
-- You really shouldn't be using them for large data sets.
-- - When packing, the length of the provided string must match the
-- width of the format, else packing will fail.
-- - When unpacking, the length of the result will be the width of the
-- format.
--
data FixChars
FixChars :: Int -> FixChars
-- | Like FixChars, but with a variable length.
data VarChars
VarChars :: VarChars
-- | Variable length string in double quotes, and standard backslash
-- encoding of non-printable characters.
data VarCharString
VarCharString :: VarCharString
-- | Match an exact sequence of characters.
data ExactChars
ExactChars :: String -> ExactChars
-- | Variable length unicode text, represented as a Data.Text thing.
data VarText
VarText :: VarText
-- | Variable length string in double quotes, and standard backslash
-- encoding of non-printable characters.
data VarTextString
VarTextString :: VarTextString
-- | Variable length sequence of bytes, represented as a ByteString.
data VarBytes
VarBytes :: VarBytes
-- | Human-readable ASCII Integer.
data IntAsc
IntAsc :: IntAsc
-- | Human-readable ASCII integer, using leading zeros to pad the encoding
-- out to a fixed length.
data IntAsc0
IntAsc0 :: Int -> IntAsc0
-- | Human-readable ASCII Double.
data DoubleAsc
DoubleAsc :: DoubleAsc
-- | Human-readable ASCII Double.
--
-- When packing we use a fixed number of zeros after the decimal point,
-- though when unpacking we allow a greater precision.
data DoubleFixedPack
DoubleFixedPack :: Int -> DoubleFixedPack
-- | Human readable ASCII date in YYYYsMMsDD format.
data YYYYsMMsDD
YYYYsMMsDD :: Char -> YYYYsMMsDD
-- | Human readable ASCII date in DDsMMsYYYY format.
data DDsMMsYYYY
DDsMMsYYYY :: Char -> DDsMMsYYYY
-- | Big-endian 8-bit unsigned word.
data Word8be
Word8be :: Word8be
-- | Big-endian 8-bit signed integer.
data Int8be
Int8be :: Int8be
-- | Big-endian 32-bit unsigned word.
data Word16be
Word16be :: Word16be
data Int16be
Int16be :: Int16be
-- | Big-endian 32-bit unsigned word.
data Word32be
Word32be :: Word32be
-- | Big-endian 32-bit signed integer.
data Int32be
Int32be :: Int32be
-- | Big-endian 32-bit IEEE 754 float.
data Float32be
Float32be :: Float32be
-- | Big-endian 64-bit unsigned word.
data Word64be
Word64be :: Word64be
-- | Big-endian 64-bit signed integer.
data Int64be
Int64be :: Int64be
-- | Big-endian 64-bit IEEE 754 float.
data Float64be
Float64be :: Float64be
-- | Append fields without separators.
data App f
App :: f -> App f
-- | Separate fields with the given character.
--
--
-- - The separating character is un-escapable.
-- - The format (Sep ',') does NOT parse a CSV file according
-- to the CSV specification:
-- http://tools.ietf.org/html/rfc4180.
-- - The type is kept abstract as we cache some pre-computed values we
-- use to unpack this format. Use mkSep to make one.
--
data Sep f
[SepNil] :: Sep ()
[SepCons] :: {-# UNPACK #-} !SepMeta -> !f -> Sep fs -> Sep (f :*: fs)
class SepFormat f
mkSep :: SepFormat f => Char -> f -> Sep f
takeSepChar :: SepFormat f => Sep f -> Maybe Char
-- | Format of a simple object format with labeled fields.
data Object fields
class ObjectFormat f
-- | A single field in an object.
data Field f
Field :: String -> f -> Maybe (Value f -> Bool) -> Field f
[fieldName] :: Field f -> String
[fieldFormat] :: Field f -> f
[fieldInclude] :: Field f -> Maybe (Value f -> Bool)
-- | Make an object format with the given labeled fields. For example:
--
--
-- > let fmt = mkObject
-- $ Field "index" IntAsc Nothing
-- :*: Field "message" (VarCharString '-') Nothing
-- :*: Field "value" (MaybeChars NULL DoubleAsc) (Just isJust)
-- :*: ()
--
--
-- Packing this produces:
--
--
-- > let Just str = packToString fmt (27 :*: "foo" :*: Nothing :*: ())
-- > putStrLn str
-- > {"index":27,"message":"foo"}
--
--
-- Note that the encodings that this format can generate are a superset
-- of the JavaScript Object Notation (JSON). With the Repa format, the
-- fields of an object can directly encode dates and other values, wheras
-- in JSON these values must be represented by strings.
mkObject :: ObjectFormat f => f -> Object (ObjectFormat' f)
-- | A strict product type, written infix.
data (:*:) a b :: * -> * -> *
(:*:) :: ~a -> ~b -> (:*:) a b
-- | This module provides the Format class definition, without
-- exporting the pre-defined formats.
module Data.Repa.Convert.Format
-- | Relates a storage format to the Haskell type of the value that is
-- stored in that format.
class Format f where type Value f where {
type family Value f;
}
-- | Yield the number of separate fields in this format.
fieldCount :: Format f => f -> Int
-- | Yield the minumum number of bytes that a value of this format will
-- take up.
--
-- Packing a value into this format is guaranteed to use at least this
-- many bytes. This is exact for fixed-size formats.
minSize :: Format f => f -> Int
-- | For fixed size formats, yield their size (length) in bytes.
--
-- Yields Nothing if this is not a fixed size format.
fixedSize :: Format f => f -> Maybe Int
-- | Yield the maximum packed size of the value in this format.
--
-- If fixedSize returns a size then packedSize returns the
-- same size.
--
-- For variable length formats, packedSize is an
-- over-approximation. We allow the actual packed value to use less
-- space, as it may not be possible to determine how much space it needs
-- without actually packing it.
--
-- Yields Nothing when a collection of values is to be packed into
-- a fixed length format, but the size of the collection does not match
-- the format.
packedSize :: Format f => f -> Value f -> Maybe Int
-- | Class of storage formats that can have values packed and unpacked from
-- foreign bufferes.
--
-- The methods are written using continuations to make it easier for GHC
-- to optimise its core code when packing/unpacking many fields.
class Format format => Packable format where pack format value = Packer (packer format value)
-- | Pack a value into a buffer using the given format.
pack :: Packable format => format -> Value format -> Packer
-- | Low level packing function for the given format.
packer :: Packable format => format -> Value format -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()
-- | Packer wraps a function that can write to a buffer.
data Packer
Packer :: (Addr# -> IO () -> (Addr# -> IO ()) -> IO ()) -> Packer
-- | Takes start of buffer; failure action; and a continuation.
--
-- We try to pack data into the given buffer. If packing succeeds then we
-- call the continuation with a pointer to the next byte after the packed
-- value, otherwise we call the failure action.
[fromPacker] :: Packer -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()
-- | Pack data into the given buffer.
--
-- PRECONDITION: The buffer needs to be big enough to hold the packed
-- data, otherwise you'll corrupt the heap (bad). Use packedSize
-- to work out how big it needs to be.
unsafeRunPacker :: Packer -> Ptr Word8 -> IO (Maybe (Ptr Word8))
class Format format => Unpackable format where unpack format = Unpacker (unpacker format)
-- | Unpack a value from a buffer using the given format.
unpack :: Unpackable format => format -> Unpacker (Value format)
-- | Low level unpacking function for the given format.
unpacker :: Unpackable format => format -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> Value format -> IO ()) -> IO ()
data Unpacker a
Unpacker :: (Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()) -> Unpacker a
-- | Takes pointers to the first byte in the buffer; the first byte after
-- the buffer; a predicate to detect a field terminator; a failure
-- action; and a continuation.
--
-- The field terminator is used by variable length encodings where the
-- length of the encoded data cannot be determined from the encoding
-- itself.
--
-- We try to unpack a value from the buffer. If unpacking succeeds then
-- call the continuation with a pointer to the next byte after the
-- unpacked value, and the value itself, otherwise call the failure
-- action.
[fromUnpacker] :: Unpacker a -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()
-- | Unpack data from the given buffer.
--
-- PRECONDITION: The buffer must be at least the minimum size of the
-- format (minSize). This allows us to avoid repeatedly checking for
-- buffer overrun when unpacking fixed size format. If the buffer is not
-- long enough then you'll get an indeterminate result (bad).
unsafeRunUnpacker :: Unpacker a -> Ptr Word8 -> Int -> (Word8 -> Bool) -> IO (Maybe (a, Ptr Word8))
-- | Convert tuples of Haskell values to and from ASCII or packed binary
-- representations.
--
-- This package is intended for cheap and cheerful serialisation and
-- deserialisation of flat tables, where each row has a fixed format. If
-- you have a table consisting of a couple hundred megs of
-- Pipe-Separated-Variables issued by some filthy enterprise system, then
-- this package is for you.
--
-- If you want to parse context-free, or context-sensitive languages then
-- try the parsec or attoparsec packages. If you have
-- binary data that does not have a fixed format then try the
-- binary or cereal packages.
--
-- For testing purposes, use packToString which takes a format, a
-- record, and returns a list of bytes.
--
--
-- > import Data.Repa.Convert
--
-- > let format = mkSep '|' (VarChars :*: IntAsc :*: DoubleAsc :*: ())
-- > let Just str = packToString format ("foo" :*: 66 :*: 93.42 :*: ())
-- > str
-- "foo|66|93.42"
--
--
-- We can then unpack the raw bytes back to Haskell values with
-- unpackFromString.
--
--
-- > unpackFromString format str
-- Just ("foo" :*: (66 :*: (93.42 :*: ())))
--
--
-- In production code use unsafeRunPacker and
-- unsafeRunUnpacker to work directly with a buffer in foreign
-- memory.
--
--
-- - NOTE that in the current version the separating character is
-- un-escapable.
-- - The above means that the format (Sep ',') does NOT parse
-- a CSV file according to the CSV specification:
-- http://tools.ietf.org/html/rfc4180.
--
module Data.Repa.Convert
-- | Relates a storage format to the Haskell type of the value that is
-- stored in that format.
class Format f where type Value f where {
type family Value f;
}
-- | Yield the number of separate fields in this format.
fieldCount :: Format f => f -> Int
-- | Yield the minumum number of bytes that a value of this format will
-- take up.
--
-- Packing a value into this format is guaranteed to use at least this
-- many bytes. This is exact for fixed-size formats.
minSize :: Format f => f -> Int
-- | For fixed size formats, yield their size (length) in bytes.
--
-- Yields Nothing if this is not a fixed size format.
fixedSize :: Format f => f -> Maybe Int
-- | Yield the maximum packed size of the value in this format.
--
-- If fixedSize returns a size then packedSize returns the
-- same size.
--
-- For variable length formats, packedSize is an
-- over-approximation. We allow the actual packed value to use less
-- space, as it may not be possible to determine how much space it needs
-- without actually packing it.
--
-- Yields Nothing when a collection of values is to be packed into
-- a fixed length format, but the size of the collection does not match
-- the format.
packedSize :: Format f => f -> Value f -> Maybe Int
-- | Constrain the type of a value to match the given format.
--
-- The value itself is not used.
forFormat :: format -> Value format -> Value format
-- | Constrain the type of some values to match the given format.
--
-- The value itself is not used.
listFormat :: format -> [Value format] -> [Value format]
-- | Pack a value to a freshly allocated ByteString.
packToByteString :: Packable format => format -> Value format -> Maybe ByteString
-- | Unpack a value from a ByteString.
unpackFromByteString :: Unpackable format => format -> ByteString -> Maybe (Value format)
-- | Pack a value to a list of Word8.
packToList8 :: Packable format => format -> Value format -> Maybe [Word8]
-- | Unpack a value from a list of Word8.
unpackFromList8 :: Unpackable format => format -> [Word8] -> Maybe (Value format)
-- | Pack a value to a (hated) Haskell String.
packToString :: Packable format => format -> Value format -> Maybe String
-- | Unpack a value from a (hated) Haskell String.
unpackFromString :: Unpackable format => format -> String -> Maybe (Value format)
-- | Class of storage formats that can have values packed and unpacked from
-- foreign bufferes.
--
-- The methods are written using continuations to make it easier for GHC
-- to optimise its core code when packing/unpacking many fields.
class Format format => Packable format where pack format value = Packer (packer format value)
-- | Pack a value into a buffer using the given format.
pack :: Packable format => format -> Value format -> Packer
-- | Low level packing function for the given format.
packer :: Packable format => format -> Value format -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()
-- | Packer wraps a function that can write to a buffer.
data Packer
Packer :: (Addr# -> IO () -> (Addr# -> IO ()) -> IO ()) -> Packer
-- | Takes start of buffer; failure action; and a continuation.
--
-- We try to pack data into the given buffer. If packing succeeds then we
-- call the continuation with a pointer to the next byte after the packed
-- value, otherwise we call the failure action.
[fromPacker] :: Packer -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()
-- | Pack data into the given buffer.
--
-- PRECONDITION: The buffer needs to be big enough to hold the packed
-- data, otherwise you'll corrupt the heap (bad). Use packedSize
-- to work out how big it needs to be.
unsafeRunPacker :: Packer -> Ptr Word8 -> IO (Maybe (Ptr Word8))
class Format format => Unpackable format where unpack format = Unpacker (unpacker format)
-- | Unpack a value from a buffer using the given format.
unpack :: Unpackable format => format -> Unpacker (Value format)
-- | Low level unpacking function for the given format.
unpacker :: Unpackable format => format -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> Value format -> IO ()) -> IO ()
data Unpacker a
Unpacker :: (Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()) -> Unpacker a
-- | Takes pointers to the first byte in the buffer; the first byte after
-- the buffer; a predicate to detect a field terminator; a failure
-- action; and a continuation.
--
-- The field terminator is used by variable length encodings where the
-- length of the encoded data cannot be determined from the encoding
-- itself.
--
-- We try to unpack a value from the buffer. If unpacking succeeds then
-- call the continuation with a pointer to the next byte after the
-- unpacked value, and the value itself, otherwise call the failure
-- action.
[fromUnpacker] :: Unpacker a -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()
-- | Unpack data from the given buffer.
--
-- PRECONDITION: The buffer must be at least the minimum size of the
-- format (minSize). This allows us to avoid repeatedly checking for
-- buffer overrun when unpacking fixed size format. If the buffer is not
-- long enough then you'll get an indeterminate result (bad).
unsafeRunUnpacker :: Unpacker a -> Ptr Word8 -> Int -> (Word8 -> Bool) -> IO (Maybe (a, Ptr Word8))