-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Packing and unpacking flat tables.
--   
--   Packing and unpacking flat tables.
@package repa-convert
@version 4.2.3.2


-- | Pre-defined data formats.
module Data.Repa.Convert.Formats

-- | Class of types that can be formatted in some default human readable
--   ASCII way.
class FormatAscii a where type FormatAscii' a where {
    type family FormatAscii' a;
}

-- | Get the standard ASCII format for a value.
--   
--   The element value itself is not demanded.
formatAscii :: FormatAscii a => a -> FormatAscii' a

-- | A particular ASCII string.
data UnitAsc
UnitAsc :: String -> UnitAsc

-- | Maybe a raw list of characters, or something else.
data MaybeChars f
MaybeChars :: String -> f -> MaybeChars f

-- | Maybe a raw sequence of bytes, or something else.
data MaybeBytes f
MaybeBytes :: ByteString -> f -> MaybeBytes f

-- | Fixed length sequence of characters, represented as a (hated) Haskell
--   <a>String</a>.
--   
--   <ul>
--   <li>The runtime performance of the Haskell <a>String</a> is atrocious.
--   You really shouldn't be using them for large data sets.</li>
--   <li>When packing, the length of the provided string must match the
--   width of the format, else packing will fail.</li>
--   <li>When unpacking, the length of the result will be the width of the
--   format.</li>
--   </ul>
data FixChars
FixChars :: Int -> FixChars

-- | Like <a>FixChars</a>, but with a variable length.
data VarChars
VarChars :: VarChars

-- | Variable length string in double quotes, and standard backslash
--   encoding of non-printable characters.
data VarCharString
VarCharString :: VarCharString

-- | Match an exact sequence of characters.
data ExactChars
ExactChars :: String -> ExactChars

-- | Variable length unicode text, represented as a <a>Data.Text</a> thing.
data VarText
VarText :: VarText

-- | Variable length string in double quotes, and standard backslash
--   encoding of non-printable characters.
data VarTextString
VarTextString :: VarTextString

-- | Variable length sequence of bytes, represented as a <a>ByteString</a>.
data VarBytes
VarBytes :: VarBytes

-- | Human-readable ASCII Integer.
data IntAsc
IntAsc :: IntAsc

-- | Human-readable ASCII integer, using leading zeros to pad the encoding
--   out to a fixed length.
data IntAsc0
IntAsc0 :: Int -> IntAsc0

-- | Human-readable ASCII Double.
data DoubleAsc
DoubleAsc :: DoubleAsc

-- | Human-readable ASCII Double.
--   
--   When packing we use a fixed number of zeros after the decimal point,
--   though when unpacking we allow a greater precision.
data DoubleFixedPack
DoubleFixedPack :: Int -> DoubleFixedPack

-- | Human readable ASCII date in YYYYsMMsDD format.
data YYYYsMMsDD
YYYYsMMsDD :: Char -> YYYYsMMsDD

-- | Human readable ASCII date in DDsMMsYYYY format.
data DDsMMsYYYY
DDsMMsYYYY :: Char -> DDsMMsYYYY

-- | Big-endian 8-bit unsigned word.
data Word8be
Word8be :: Word8be

-- | Big-endian 8-bit signed integer.
data Int8be
Int8be :: Int8be

-- | Big-endian 32-bit unsigned word.
data Word16be
Word16be :: Word16be
data Int16be
Int16be :: Int16be

-- | Big-endian 32-bit unsigned word.
data Word32be
Word32be :: Word32be

-- | Big-endian 32-bit signed integer.
data Int32be
Int32be :: Int32be

-- | Big-endian 32-bit IEEE 754 float.
data Float32be
Float32be :: Float32be

-- | Big-endian 64-bit unsigned word.
data Word64be
Word64be :: Word64be

-- | Big-endian 64-bit signed integer.
data Int64be
Int64be :: Int64be

-- | Big-endian 64-bit IEEE 754 float.
data Float64be
Float64be :: Float64be

-- | Append fields without separators.
data App f
App :: f -> App f

-- | Separate fields with the given character.
--   
--   <ul>
--   <li>The separating character is un-escapable.</li>
--   <li>The format <tt>(Sep ',')</tt> does NOT parse a CSV file according
--   to the CSV specification:
--   <a>http://tools.ietf.org/html/rfc4180</a>.</li>
--   <li>The type is kept abstract as we cache some pre-computed values we
--   use to unpack this format. Use <a>mkSep</a> to make one.</li>
--   </ul>
data Sep f
[SepNil] :: Sep ()
[SepCons] :: {-# UNPACK #-} !SepMeta -> !f -> Sep fs -> Sep (f :*: fs)
class SepFormat f
mkSep :: SepFormat f => Char -> f -> Sep f
takeSepChar :: SepFormat f => Sep f -> Maybe Char

-- | Format of a simple object format with labeled fields.
data Object fields
class ObjectFormat f

-- | A single field in an object.
data Field f
Field :: String -> f -> Maybe (Value f -> Bool) -> Field f
[fieldName] :: Field f -> String
[fieldFormat] :: Field f -> f
[fieldInclude] :: Field f -> Maybe (Value f -> Bool)

-- | Make an object format with the given labeled fields. For example:
--   
--   <pre>
--   &gt; let fmt =   mkObject 
--             $   Field "index"   IntAsc                      Nothing
--             :*: Field "message" (VarCharString '-')         Nothing 
--             :*: Field "value"   (MaybeChars <a>NULL</a> DoubleAsc) (Just isJust)
--             :*: ()
--   </pre>
--   
--   Packing this produces:
--   
--   <pre>
--   &gt; let Just str = packToString fmt (27 :*: "foo" :*: Nothing :*: ())
--   &gt; putStrLn str
--   &gt; {"index":27,"message":"foo"}
--   </pre>
--   
--   Note that the encodings that this format can generate are a superset
--   of the JavaScript Object Notation (JSON). With the Repa format, the
--   fields of an object can directly encode dates and other values, wheras
--   in JSON these values must be represented by strings.
mkObject :: ObjectFormat f => f -> Object (ObjectFormat' f)

-- | A strict product type, written infix.
data (:*:) a b :: * -> * -> *
(:*:) :: ~a -> ~b -> (:*:) a b


-- | This module provides the <a>Format</a> class definition, without
--   exporting the pre-defined formats.
module Data.Repa.Convert.Format

-- | Relates a storage format to the Haskell type of the value that is
--   stored in that format.
class Format f where type Value f where {
    type family Value f;
}

-- | Yield the number of separate fields in this format.
fieldCount :: Format f => f -> Int

-- | Yield the minumum number of bytes that a value of this format will
--   take up.
--   
--   Packing a value into this format is guaranteed to use at least this
--   many bytes. This is exact for fixed-size formats.
minSize :: Format f => f -> Int

-- | For fixed size formats, yield their size (length) in bytes.
--   
--   Yields <a>Nothing</a> if this is not a fixed size format.
fixedSize :: Format f => f -> Maybe Int

-- | Yield the maximum packed size of the value in this format.
--   
--   If <a>fixedSize</a> returns a size then <a>packedSize</a> returns the
--   same size.
--   
--   For variable length formats, <a>packedSize</a> is an
--   over-approximation. We allow the actual packed value to use less
--   space, as it may not be possible to determine how much space it needs
--   without actually packing it.
--   
--   Yields <a>Nothing</a> when a collection of values is to be packed into
--   a fixed length format, but the size of the collection does not match
--   the format.
packedSize :: Format f => f -> Value f -> Maybe Int

-- | Class of storage formats that can have values packed and unpacked from
--   foreign bufferes.
--   
--   The methods are written using continuations to make it easier for GHC
--   to optimise its core code when packing/unpacking many fields.
class Format format => Packable format where pack format value = Packer (packer format value)

-- | Pack a value into a buffer using the given format.
pack :: Packable format => format -> Value format -> Packer

-- | Low level packing function for the given format.
packer :: Packable format => format -> Value format -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()

-- | Packer wraps a function that can write to a buffer.
data Packer
Packer :: (Addr# -> IO () -> (Addr# -> IO ()) -> IO ()) -> Packer

-- | Takes start of buffer; failure action; and a continuation.
--   
--   We try to pack data into the given buffer. If packing succeeds then we
--   call the continuation with a pointer to the next byte after the packed
--   value, otherwise we call the failure action.
[fromPacker] :: Packer -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()

-- | Pack data into the given buffer.
--   
--   PRECONDITION: The buffer needs to be big enough to hold the packed
--   data, otherwise you'll corrupt the heap (bad). Use <tt>packedSize</tt>
--   to work out how big it needs to be.
unsafeRunPacker :: Packer -> Ptr Word8 -> IO (Maybe (Ptr Word8))
class Format format => Unpackable format where unpack format = Unpacker (unpacker format)

-- | Unpack a value from a buffer using the given format.
unpack :: Unpackable format => format -> Unpacker (Value format)

-- | Low level unpacking function for the given format.
unpacker :: Unpackable format => format -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> Value format -> IO ()) -> IO ()
data Unpacker a
Unpacker :: (Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()) -> Unpacker a

-- | Takes pointers to the first byte in the buffer; the first byte after
--   the buffer; a predicate to detect a field terminator; a failure
--   action; and a continuation.
--   
--   The field terminator is used by variable length encodings where the
--   length of the encoded data cannot be determined from the encoding
--   itself.
--   
--   We try to unpack a value from the buffer. If unpacking succeeds then
--   call the continuation with a pointer to the next byte after the
--   unpacked value, and the value itself, otherwise call the failure
--   action.
[fromUnpacker] :: Unpacker a -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()

-- | Unpack data from the given buffer.
--   
--   PRECONDITION: The buffer must be at least the minimum size of the
--   format (minSize). This allows us to avoid repeatedly checking for
--   buffer overrun when unpacking fixed size format. If the buffer is not
--   long enough then you'll get an indeterminate result (bad).
unsafeRunUnpacker :: Unpacker a -> Ptr Word8 -> Int -> (Word8 -> Bool) -> IO (Maybe (a, Ptr Word8))


-- | Convert tuples of Haskell values to and from ASCII or packed binary
--   representations.
--   
--   This package is intended for cheap and cheerful serialisation and
--   deserialisation of flat tables, where each row has a fixed format. If
--   you have a table consisting of a couple hundred megs of
--   Pipe-Separated-Variables issued by some filthy enterprise system, then
--   this package is for you.
--   
--   If you want to parse context-free, or context-sensitive languages then
--   try the <tt>parsec</tt> or <tt>attoparsec</tt> packages. If you have
--   binary data that does not have a fixed format then try the
--   <tt>binary</tt> or <tt>cereal</tt> packages.
--   
--   For testing purposes, use <a>packToString</a> which takes a format, a
--   record, and returns a list of bytes.
--   
--   <pre>
--   &gt; import Data.Repa.Convert
--   
--   &gt; let format   = mkSep '|' (VarChars :*: IntAsc :*: DoubleAsc :*: ())
--   &gt; let Just str = packToString format ("foo" :*: 66 :*: 93.42 :*: ())
--   &gt; str
--   "foo|66|93.42"
--   </pre>
--   
--   We can then unpack the raw bytes back to Haskell values with
--   <a>unpackFromString</a>.
--   
--   <pre>
--   &gt; unpackFromString format str 
--   Just ("foo" :*: (66 :*: (93.42 :*: ())))
--   </pre>
--   
--   In production code use <a>unsafeRunPacker</a> and
--   <a>unsafeRunUnpacker</a> to work directly with a buffer in foreign
--   memory.
--   
--   <ul>
--   <li>NOTE that in the current version the separating character is
--   un-escapable.</li>
--   <li>The above means that the format <tt>(Sep ',')</tt> does NOT parse
--   a CSV file according to the CSV specification:
--   <a>http://tools.ietf.org/html/rfc4180</a>.</li>
--   </ul>
module Data.Repa.Convert

-- | Relates a storage format to the Haskell type of the value that is
--   stored in that format.
class Format f where type Value f where {
    type family Value f;
}

-- | Yield the number of separate fields in this format.
fieldCount :: Format f => f -> Int

-- | Yield the minumum number of bytes that a value of this format will
--   take up.
--   
--   Packing a value into this format is guaranteed to use at least this
--   many bytes. This is exact for fixed-size formats.
minSize :: Format f => f -> Int

-- | For fixed size formats, yield their size (length) in bytes.
--   
--   Yields <a>Nothing</a> if this is not a fixed size format.
fixedSize :: Format f => f -> Maybe Int

-- | Yield the maximum packed size of the value in this format.
--   
--   If <a>fixedSize</a> returns a size then <a>packedSize</a> returns the
--   same size.
--   
--   For variable length formats, <a>packedSize</a> is an
--   over-approximation. We allow the actual packed value to use less
--   space, as it may not be possible to determine how much space it needs
--   without actually packing it.
--   
--   Yields <a>Nothing</a> when a collection of values is to be packed into
--   a fixed length format, but the size of the collection does not match
--   the format.
packedSize :: Format f => f -> Value f -> Maybe Int

-- | Constrain the type of a value to match the given format.
--   
--   The value itself is not used.
forFormat :: format -> Value format -> Value format

-- | Constrain the type of some values to match the given format.
--   
--   The value itself is not used.
listFormat :: format -> [Value format] -> [Value format]

-- | Pack a value to a freshly allocated <a>ByteString</a>.
packToByteString :: Packable format => format -> Value format -> Maybe ByteString

-- | Unpack a value from a <a>ByteString</a>.
unpackFromByteString :: Unpackable format => format -> ByteString -> Maybe (Value format)

-- | Pack a value to a list of <a>Word8</a>.
packToList8 :: Packable format => format -> Value format -> Maybe [Word8]

-- | Unpack a value from a list of <a>Word8</a>.
unpackFromList8 :: Unpackable format => format -> [Word8] -> Maybe (Value format)

-- | Pack a value to a (hated) Haskell <a>String</a>.
packToString :: Packable format => format -> Value format -> Maybe String

-- | Unpack a value from a (hated) Haskell <a>String</a>.
unpackFromString :: Unpackable format => format -> String -> Maybe (Value format)

-- | Class of storage formats that can have values packed and unpacked from
--   foreign bufferes.
--   
--   The methods are written using continuations to make it easier for GHC
--   to optimise its core code when packing/unpacking many fields.
class Format format => Packable format where pack format value = Packer (packer format value)

-- | Pack a value into a buffer using the given format.
pack :: Packable format => format -> Value format -> Packer

-- | Low level packing function for the given format.
packer :: Packable format => format -> Value format -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()

-- | Packer wraps a function that can write to a buffer.
data Packer
Packer :: (Addr# -> IO () -> (Addr# -> IO ()) -> IO ()) -> Packer

-- | Takes start of buffer; failure action; and a continuation.
--   
--   We try to pack data into the given buffer. If packing succeeds then we
--   call the continuation with a pointer to the next byte after the packed
--   value, otherwise we call the failure action.
[fromPacker] :: Packer -> Addr# -> IO () -> (Addr# -> IO ()) -> IO ()

-- | Pack data into the given buffer.
--   
--   PRECONDITION: The buffer needs to be big enough to hold the packed
--   data, otherwise you'll corrupt the heap (bad). Use <tt>packedSize</tt>
--   to work out how big it needs to be.
unsafeRunPacker :: Packer -> Ptr Word8 -> IO (Maybe (Ptr Word8))
class Format format => Unpackable format where unpack format = Unpacker (unpacker format)

-- | Unpack a value from a buffer using the given format.
unpack :: Unpackable format => format -> Unpacker (Value format)

-- | Low level unpacking function for the given format.
unpacker :: Unpackable format => format -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> Value format -> IO ()) -> IO ()
data Unpacker a
Unpacker :: (Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()) -> Unpacker a

-- | Takes pointers to the first byte in the buffer; the first byte after
--   the buffer; a predicate to detect a field terminator; a failure
--   action; and a continuation.
--   
--   The field terminator is used by variable length encodings where the
--   length of the encoded data cannot be determined from the encoding
--   itself.
--   
--   We try to unpack a value from the buffer. If unpacking succeeds then
--   call the continuation with a pointer to the next byte after the
--   unpacked value, and the value itself, otherwise call the failure
--   action.
[fromUnpacker] :: Unpacker a -> Addr# -> Addr# -> (Word8 -> Bool) -> IO () -> (Addr# -> a -> IO ()) -> IO ()

-- | Unpack data from the given buffer.
--   
--   PRECONDITION: The buffer must be at least the minimum size of the
--   format (minSize). This allows us to avoid repeatedly checking for
--   buffer overrun when unpacking fixed size format. If the buffer is not
--   long enough then you'll get an indeterminate result (bad).
unsafeRunUnpacker :: Unpacker a -> Ptr Word8 -> Int -> (Word8 -> Bool) -> IO (Maybe (a, Ptr Word8))