Data/Repa/Array/Generic/Unpacks.hs


module Data.Repa.Array.Generic.Unpacks 
        (unsafeUnpacksToBuffer)
where
import Data.Repa.Convert.Format
import Data.Repa.Array.Material.Foreign
import Data.Repa.Array.Internals.Target
import Data.Repa.Array.Internals.Layout

import Data.IORef
import Data.Word
import qualified Foreign.ForeignPtr             as F
import qualified Foreign.Ptr                    as F


-- | Given a buffer containing an encoded table where the values in each
--   column all have the same time, decode all the values from a single
--   column and write them to a buffer.
--
--   For example, suppose we have a table as follows, where the rows are
--   separated by newline characters and the field separated by spaces.
--
-- @
-- RED 1.0 0.0 0.0
-- GREEN 0.0 1.0 0.0
-- BLUE 0.0 0.0 1.0
-- CYAN 0.0 1.0 1.0
-- @
--
--   To decode the second column use:
--
--   * Format: DoubleAsc, as they are encoded doubles.
--   * Field separator: ' ' as the fields are separated by spaces.
--   * Starting offsets: [3, 21, 38, 55], which are the indices of the starting
--     character of each field in the second column.
--   * Ending offsets: [16, 34, 51, 68], which are the indices of the newline
--     characters.
--   * Destination buffer: an new buffer with at least as many elements as there
--     are lines in the input data.
--
--   If the parse succeeds then the buffer containing the starting offets is
--   updated so each element is the index of the NEXT field in each column.
--   This allows the client to easilly decode the next column.
--
--   If there was a parse error then this function returns a pair of the row
--   index and offset in the buffer of the field which could not be parsed.
--
--   UNSAFE: Both the buffer containing ending offsets, and the destination
--   buffer must be at least as long as the buffer containing starting offsets
--   but this is not checked. If this is not true then the function will
--   will perform an out of bounds access.
--
--   INLINE: This function is set to INLINE so that it will be specialised
--   at the call site for the given format. For standard formats it's better
--   to use the pre-specialised versions for Auto arrays.
--
unsafeUnpacksToBuffer
        :: forall format lStart lEnd lVal
        .  ( Unpackable format
           , TargetI lStart Int
           , TargetI lEnd   Int
           , TargetI lVal   (Value format))
        => format                       -- ^ Format for each element.
        -> Word8                        -- ^ Field separator character.
        -> Array  F Word8               -- ^ Packed binary source data.
        -> Buffer lStart Int            -- ^ Starting offsets for fields.
        -> Buffer lEnd   Int            -- ^ Ending   offsets of rows.
        -> Buffer lVal (Value format)   -- ^ Destination buffer for parsed fields.
        -> IO (Maybe (Int, Int))        -- ^ Nothing on successful parse.

unsafeUnpacksToBuffer
        format cTerm src 
        ixsStart ixsEnd
        bufOut
 = do
        refError        <- newIORef Nothing
        loop refError 0
        readIORef refError

 where
        -- Length of the column, in rows.
        !lenColumn
         = extent $ bufferLayout ixsStart

        -- Get unpack the starting pointer form the source buffer.
        !(offSrc, _lenSrc, fptrSrc)
         = toForeignPtr src

        -- Read all the fields of the column in turn.
        loop refError ixField
         | ixField >= lenColumn    
         = return ()

         | otherwise
         = F.withForeignPtr fptrSrc
         $ \ptrSrc 
         -> do   
                -- Starting offset of field in source array.
                ixStart <- unsafeReadBuffer ixsStart ixField

                -- Ending offset of row in source array.
                ixEnd   <- unsafeReadBuffer ixsEnd   ixField

                -- Get a pointer to the field data in the source array.
                let ptrStart 
                        = F.plusPtr ptrSrc (offSrc + ixStart)

                -- Unpack a single field.
                r       <- unsafeRunUnpacker 
                                (unpack format) 
                                ptrStart
                                (ixEnd - ixStart + 1)
                                (== cTerm)

                case r of
                 -- The field didn't parse.
                 Nothing        
                  -> writeIORef refError (Just (ixField, ixStart))

                 -- We parsed the field.
                 Just (x, ptrNext)
                  -> do 
                        -- Update the starting offset for this field,
                        -- for when we read the next column.
                        let !ixStart' = ixStart + F.minusPtr ptrNext ptrStart + 1
                        unsafeWriteBuffer ixsStart ixField ixStart'

                        -- Write the read field value.
                        unsafeWriteBuffer bufOut   ixField x

                        -- Read the next field in the column.
                        loop refError (ixField + 1)
{-# INLINE unsafeUnpacksToBuffer #-}