-- Haskell98!

-- | Random and Binary IO with IterateeM
--
-- <http://okmij.org/ftp/Streams.html#random-bin-IO>
--
--
-- Random and binary IO: Reading TIFF
--
--    Iteratees presuppose sequential processing. A general-purpose input method
--    must also support random IO: processing a seek-able input stream from an
--    arbitrary position, jumping back and forth through the stream. We demonstrate
--    random IO with iteratees, as well as reading non-textual files and converting
--    raw bytes into multi-byte quantities such as integers, rationals, and TIFF
--    dictionaries. Positioning of the input stream is evocative of delimited
--    continuations.
--
--    We use random and binary IO to write a general-purpose TIFF library. The
--    library emphasizes incremental processing, relying on iteratees and enumerators
--    for on-demand reading of tag values.  The library extensively uses nested
--    streams, tacitly converting the stream of raw bytes from the file into streams
--    of integers, rationals and other user-friendly items. The pixel matrix is
--    presented as a contiguous stream, regardless of its segmentation into strips
--    and physical arrangement.
--
--    We show a representative application of the library: reading a sample TIFF
--    file, printing selected values from the TIFF dictionary, verifying the values
--    of selected pixels and computing the histogram of pixel values. The pixel
--    verification procedure stops reading the pixel matrix as soon as all specified
--    pixel values are verified. The histogram accumulation does read the entire
--    matrix, but incrementally. Neither pixel matrix processing procedure loads the
--    whole matrix in memory. In fact, we never read and retain more than the
--    IO-buffer-full of raw data.
--
--    Version: The current version is 1.1, December 2008.
--
module System.RandomIO where


import System.Posix
import Foreign.C
import Foreign.Ptr
import Foreign.Marshal.Alloc
import Foreign.Marshal.Array
import Control.Monad.Trans
import Data.Word
import Data.Bits
import Data.IORef
import Text.Printf

import System.IO (SeekMode(..))

import System.IterateeM
import System.LowLevelIO


-- | The type of the IO monad supporting seek requests and endianness
-- The seek_request is not-quite a state, more like a `communication channel'
-- set by the iteratee and answered by the enumerator. Since the
-- base monad is IO, it seems simpler to implement both endianness
-- and seek requests as IORef cells. Their names are grouped in a structure
-- RBState, which is propagated as the `environment.'
newtype RBIO a = RBIO{unRBIO:: RBState -> IO a}

instance Monad RBIO where
    return  = RBIO . const . return
    m >>= f = RBIO( \env -> unRBIO m env >>= (\x -> unRBIO (f x) env) )

instance MonadIO RBIO where
    liftIO = RBIO . const

-- | Generally, RBState is opaque and should not be exported.
data RBState = RBState{msb_first :: IORef Bool,
                       seek_req  :: IORef (Maybe FileOffset) }

-- | The programmer should use the following functions instead
--
rb_empty = do
           mref <- newIORef True
           sref <- newIORef Nothing
           return RBState{msb_first = mref, seek_req = sref}

-- | To request seeking, the iteratee sets seek_req to (Just desired_offset)
-- When the enumerator answers the request, it sets seek_req back
-- to Nothing
--
rb_seek_set :: FileOffset -> RBIO ()
rb_seek_set off = RBIO action
 where action env = writeIORef (seek_req env) (Just off)

rb_seek_answered :: RBIO Bool
rb_seek_answered = RBIO action
 where action env = readIORef (seek_req env) >>= 
                    return . maybe True (const False)

rb_msb_first :: RBIO Bool
rb_msb_first = RBIO action
 where action env = readIORef (msb_first env)

rb_msb_first_set :: Bool -> RBIO ()
rb_msb_first_set flag = RBIO action
 where action env = writeIORef (msb_first env) flag

runRB:: RBState -> IterateeGM el RBIO a -> IO (IterateeG el RBIO a)
runRB rbs m = unRBIO (unIM m) rbs

-- ------------------------------------------------------------------------
-- Binary Random IO Iteratees

-- | A useful combinator.
-- Perhaps a better idea would have been to define
-- Iteratee to have (Maybe a) in IE_done? In that case, we could
-- make IterateeGM to be the instance of MonadPlus
bindm :: Monad m => m (Maybe a) -> (a -> m (Maybe b)) -> m (Maybe b)
bindm m f = m >>= maybe (return Nothing) f


-- | We discard all available input first.
-- We keep discarding the stream s until we determine that our request 
-- has been answered:
-- rb_seek_set sets the state seek_req to (Just off). When the
-- request is answered, the state goes back to Nothing.
-- The above features remind one of delimited continuations.
sseek :: FileOffset -> IterateeGM el RBIO ()
sseek off = lift (rb_seek_set off) >> liftI (IE_cont step)
 where
 step s@(Err _) = liftI $ IE_done () s
 step s   = do
            r <- lift rb_seek_answered
            if r then liftI $ IE_done () s
                 else liftI $ IE_cont step


-- | An iteratee that reports and propagates an error
-- We disregard the input first and then propagate error.
-- It is reminiscent of `abort'
iter_err :: Monad m => String -> IterateeGM el m ()
iter_err err = liftI $ IE_cont step
 where
 step _ = liftI $ IE_done () (Err err)


-- | Read n elements from a stream and apply the given iteratee to the
-- stream of the read elements. If the given iteratee accepted fewer
-- elements, we stop.
-- This is the variation of `stake' with the early termination
-- of processing of the outer stream once the processing of the inner stream
-- finished early. This variation is particularly useful for randomIO,
-- where we do not have to care to `drain the input stream'.
stakeR :: Monad m => Int -> EnumeratorN el el m a
stakeR 0 iter = return iter
stakeR n iter@IE_done{} = return iter
stakeR n (IE_cont k) = liftI $ IE_cont step
 where
 step (Chunk []) = liftI $ IE_cont step
 step chunk@(Chunk str) | length str <= n =
                             stakeR (n - length str) ==<< k chunk
 step (Chunk str) = done (Chunk s1) (Chunk s2)
   where (s1,s2) = splitAt n str
 step stream = done stream stream
 done s1 s2 = k s1 >>== \r -> liftI $ IE_done r s2


-- | Iteratees to read unsigned integers written in Big- or Little-endian ways
--
endian_read2 :: IterateeGM Word8 RBIO (Maybe Word16)
endian_read2 =
  bindm snext $ \c1 ->
  bindm snext $ \c2 -> do
  flag <- lift rb_msb_first
  if flag then
      return $ return $ (fromIntegral c1 `shiftL` 8) .|. fromIntegral c2
     else
      return $ return $ (fromIntegral c2 `shiftL` 8) .|. fromIntegral c1

endian_read4 :: IterateeGM Word8 RBIO (Maybe Word32)
endian_read4 =
  bindm snext $ \c1 ->
  bindm snext $ \c2 ->
  bindm snext $ \c3 ->
  bindm snext $ \c4 -> do
  flag <- lift rb_msb_first
  if flag then
      return $ return $ 
               (((((fromIntegral c1
                `shiftL` 8) .|. fromIntegral c2)
                `shiftL` 8) .|. fromIntegral c3)
                `shiftL` 8) .|. fromIntegral c4
     else
      return $ return $ 
               (((((fromIntegral c4
                `shiftL` 8) .|. fromIntegral c3)
                `shiftL` 8) .|. fromIntegral c2)
                `shiftL` 8) .|. fromIntegral c1


-- ------------------------------------------------------------------------
-- Binary Random IO enumerators

-- | The enumerator of a POSIX Fd: a variation of enum_fd that
-- supports RandomIO (seek requests)
enum_fd_random :: Fd -> EnumeratorGM Word8 RBIO a
enum_fd_random fd iter = 
    IM . RBIO $ (\env -> 
                 allocaBytes (fromIntegral buffer_size) (loop env (0,0) iter))
 where
--  buffer_size = 4096
  buffer_size = 5 -- for tests; in real life, there should be 1024 or so
  -- the second argument of loop is (off,len), describing which part
  -- of the file is currently in the buffer 'p'
  loop :: RBState -> (FileOffset,Int) -> IterateeG Word8 RBIO a -> 
          Ptr Word8 -> IO (IterateeG Word8 RBIO a)
  loop env pos iter@IE_done{} p = return iter
  loop env pos iter p = readIORef (seek_req env) >>= loop' env pos iter p

  loop' env pos@(off,len) iter p (Just off') | 
    off <= off' && off' < off + fromIntegral len =      -- Seek within buffer p
    do
    writeIORef (seek_req env) Nothing
    let local_off = fromIntegral $ off' - off
    str <- peekArray (len - local_off) (p `plusPtr` local_off)
    im  <- runRB env $ enum_pure_1chunk str iter
    loop env pos im p
  loop' env pos iter p (Just off) = do -- Seek outside the buffer
   writeIORef (seek_req env) Nothing
   off <- myfdSeek fd AbsoluteSeek (fromIntegral off)
   putStrLn $ "Read buffer, offset " ++ either (const "IO err") show off
   case off of
    Left errno -> runRB env $ enum_err "IO error" iter
    Right off  -> loop' env (off,0) iter p Nothing
    -- Thanks to John Lato for the strictness annotation
    -- Otherwise, the `off + fromIntegral len' below accumulates thunks
  loop' env (off,len) iter p Nothing | off `seq` len `seq` False = undefined
  loop' env (off,len) iter@(IE_cont step) p Nothing = do
   n <- myfdRead fd (castPtr p) buffer_size
   putStrLn $ "Read buffer, size " ++ either (const "IO err") show n
   case n of
    Left errno -> runRB env $ step (Err "IO error")
    Right 0 -> return iter
    Right n -> do
         str <- peekArray (fromIntegral n) p
         im  <- runRB env $ step (Chunk str)
         loop env (off + fromIntegral len,fromIntegral n) im p


-- ------------------------------------------------------------------------
-- Tests

test1 () = do
           Just s1 <- snext
           Just s2 <- snext
           sseek 0
           Just s3 <- snext
           sseek 100
           Just s4 <- snext
           Just s5 <- snext
           sseek 101
           Just s6 <- snext
           sseek 1
           Just s7 <- snext
           return [s1,s2,s3,s4,s5,s6,s7]

test2 () = do
           sseek 100
           sseek 0
           sseek 100
           Just s4 <- snext
           Just s5 <- snext
           sseek 101
           Just s6 <- snext
           sseek 1
           Just s7 <- snext
           sseek 0
           Just s1 <- snext
           Just s2 <- snext
           sseek 0
           Just s3 <- snext
           return [s1,s2,s3,s4,s5,s6,s7]

test3 () = do
           let show_x fmt = map (\x -> (printf fmt x)::String)
           lift $ rb_msb_first_set True
           Just ns1 <- endian_read2
           Just ns2 <- endian_read2
           Just ns3 <- endian_read2
           Just ns4 <- endian_read2
           sseek 0
           Just nl1 <- endian_read4
           Just nl2 <- endian_read4
           sseek 4
           lift $ rb_msb_first_set False
           Just ns3' <- endian_read2
           Just ns4' <- endian_read2
           sseek 0
           Just ns1' <- endian_read2
           Just ns2' <- endian_read2
           sseek 0
           Just nl1' <- endian_read4
           Just nl2' <- endian_read4
           return [show_x "%04x" [ns1,ns2,ns3,ns4],
                   show_x "%08x" [nl1,nl2],
                   show_x "%04x" [ns1',ns2',ns3',ns4'],
                   show_x "%08x" [nl1',nl2']]
                            
test4 () = do
           lift $ rb_msb_first_set True
           Just ns1 <- endian_read2
           Just ns2 <- endian_read2
           iter_err "Error"
           ns3 <- endian_read2
           return (ns1,ns2,ns3)

test_driver_random iter filepath = do
  fd <- openFd filepath ReadOnly Nothing defaultFileFlags
  rb <- rb_empty
  putStrLn "About to read file"
  result <- runRB rb $ (enum_fd_random fd >. enum_eof) ==<< iter
  closeFd fd
  putStrLn "Finished reading file"
  print_res result
 where
  print_res (IE_done a EOF) = print a >> return a
  print_res (IE_done a (Err err)) = print a >>
                                    putStrLn ("Stream error: " ++ err) >>
                                    return a

test1r = test_driver_random (test1 ()) "test_full1.txt" >>=
         return . (== [104,101,104,13,10,10,101])

test2r = test_driver_random (test2 ()) "test_full1.txt" >>=
         return . (== [104,101,104,13,10,10,101])

test3r = test_driver_random (test3 ()) "test4.txt" >>=
         return . (==
                   [["0001","0203","fffe","fdfc"],
                    ["00010203","fffefdfc"],
                    ["0100","0302","feff","fcfd"],
                    ["03020100","fcfdfeff"]])

test4r = test_driver_random (test4 ()) "test4.txt" >>=
         return . (== (1,515,Nothing))

{-
About to read file
Read buffer, size 5
Finished reading file
(1,515,Nothing)
Stream error: Error
-}