Data/Repa/Flow/Default/IO.hs


-- | Read and write files.
--
--   The functions in this module are wrappers for the ones in 
--   "Data.Repa.Flow.Default.SizedIO" that use a default chunk size of
--   64kBytes and just call `error` if the source file appears corruped. 
module Data.Repa.Flow.Default.IO
        ( defaultChunkSize

          -- * Buckets
        , module Data.Repa.Flow.IO.Bucket

          -- * Sourcing
        , sourceCSV
        , sourceTSV
        , sourceRecords
        , sourceLines
        , sourceChars
        , sourceBytes

          -- * Sinking
        , sinkChars
        , sinkLines
        , sinkBytes)
where
import Data.Repa.Flow.Default
import Data.Repa.Flow.IO.Bucket
import Data.Repa.Fusion.Unpack
import Data.Word
import qualified Data.Repa.Flow.Default.SizedIO  as F
#include "repa-flow.h"


-- | The default chunk size of 64kBytes.
defaultChunkSize :: Integer
defaultChunkSize = 64 * 1024


-- | Read a file containing Comma-Separated-Values.
sourceCSV
        :: BulkI l Bucket
        => Array l Bucket -> IO (Sources N (Array N (Array F Char)))
sourceCSV
        = F.sourceCSV defaultChunkSize
        $ error $  "Line exceeds chunk size of "
                ++ show defaultChunkSize ++ "bytes."
{-# INLINE sourceCSV #-}


-- | Read a file containing Tab-Separated-Values.
sourceTSV
        :: BulkI l Bucket
        => Array l Bucket -> IO (Sources N (Array N (Array F Char)))
sourceTSV
        = F.sourceTSV defaultChunkSize
        $ error $  "Line exceeds chunk size of "
                ++ show defaultChunkSize ++ "bytes."
{-# INLINE sourceTSV #-}


-- | Read complete records of data form a file, into chunks of the given length.
--   We read as many complete records as will fit into each chunk.
--
--   The records are separated by a special terminating character, which the 
--   given predicate detects. After reading a chunk of data we seek the file to 
--   just after the last complete record that was read, so we can continue to
--   read more complete records next time. 
--
--   If we cannot fit at least one complete record in the chunk then perform
--   the given failure action. Limiting the chunk length guards against the
--   case where a large input file is malformed, as we won't try to read the
--   whole file into memory.
-- 
--
--   * Data is read into foreign memory without copying it through the GHC heap.
--   * The provided file handle must support seeking, else you'll get an
--     exception.
--   * Each file is closed the first time the consumer tries to pull a
--     record from the associated stream when no more are available.
--
sourceRecords 
        :: BulkI l Bucket
        => (Word8 -> Bool)      -- ^ Detect the end of a record.
        -> Array l Bucket       -- ^ Buckets.
        -> IO (Sources N (Array F Word8))
sourceRecords pSep 
        = F.sourceRecords defaultChunkSize pSep
        $ error $  "Record exceeds chunk size of " 
                ++ show defaultChunkSize ++ "bytes."
{-# INLINE sourceRecords #-}


-- | Read complete lines of data from a text file, using the given chunk length.
--   We read as many complete lines as will fit into each chunk.
--
--   * The trailing new-line characters are discarded.
--   * Data is read into foreign memory without copying it through the GHC heap.
--   * The provided file handle must support seeking, else you'll get an
--     exception.
--   * Each file is closed the first time the consumer tries to pull a line
--     from the associated stream when no more are available.
--
sourceLines 
        :: BulkI l Bucket
        => Array l Bucket -> IO (Sources N (Array F Char))
sourceLines     
        = F.sourceLines   defaultChunkSize
        $ error $  "Line exceeds chunk size of "
                ++ show defaultChunkSize ++ "bytes."
{-# INLINE sourceLines #-}


-- | Read 8-bit ASCII characters from some files, using the given chunk length.
sourceChars 
        :: BulkI l Bucket
        => Array l Bucket -> IO (Sources F Char)
sourceChars     = F.sourceChars defaultChunkSize
{-# INLINE sourceChars #-}


-- | Read data from some files, using the given chunk length.
sourceBytes 
        :: BulkI l Bucket
        => Array l Bucket -> IO (Sources F Word8)
sourceBytes     = F.sourceBytes defaultChunkSize
{-# INLINE sourceBytes #-}


-- | Write vectors of text lines to the given files handles.
-- 
--   * Data is copied into a new buffer to insert newlines before being
--     written out.
--
sinkLines 
        :: ( BulkI l Bucket
           , BulkI l1 (Array l2 Char)
           , BulkI l2 Char, Unpack (Array l2 Char) t2)
        => Name l1              -- ^ Layout of chunks.
        -> Name l2              -- ^ Layout of lines in chunks.
        -> Array l Bucket       -- ^ Buckets
        -> IO (Sinks l1 (Array l2 Char))
sinkLines       = F.sinkLines
{-# INLINE sinkLines #-}


-- | Write 8-bit ASCII characters to some files.
sinkChars 
        :: BulkI l Bucket
        => Array l Bucket -> IO (Sinks F Char)
sinkChars =  F.sinkChars
{-# INLINE sinkChars #-}


-- | Write bytes to some file.
sinkBytes 
        :: BulkI l Bucket 
        => Array l Bucket -> IO (Sinks F Word8)
sinkBytes =  F.sinkBytes
{-# INLINE sinkBytes #-}