{-# LANGUAGE RankNTypes #-}

module Data.Warc
    ( Record(..)
    , Warc(..)
    , parseWarc
    , iterRecords
    ) where

import Data.Char (ord)
import Pipes (Producer, yield)
import qualified Pipes.ByteString as PBS
import Control.Lens
import qualified Pipes.Attoparsec as PA
import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
import Control.Monad (void)
import Control.Monad.Trans.Class
import Control.Monad.Trans.Either
import Control.Monad.Trans.Free
import Control.Monad.Trans.State.Strict
import Control.Error

import Data.Warc.Header

-- | A WARC record
data Record m r = Record { recWarcVersion :: Version
                         , recHeader      :: [Field]
                         , recContent     :: Producer BS.ByteString m r
                         }

instance Monad m => Functor (Record m) where
    fmap f (Record ver hdr r) = Record ver hdr (fmap f r)

-- | A WARC archive
data Warc m = Warc (FreeT (Record m) m (Producer BS.ByteString m ()))

instance Monad m => Monoid (Warc m) where
    Warc a `mappend` Warc b = Warc (a >> b)
    mempty = Warc (return (return ()))

parseWarc :: (Functor m, Monad m) => Producer ByteString m () -> Warc m
parseWarc = Warc . loop
  where
    loop upstream = FreeT $ do
        (hdr, rest) <- runStateT (PA.parse header) upstream
        go hdr rest

    go hdr rest
      | Nothing <- hdr                    = return $ Pure rest
      | Just (Left err) <- hdr            = error $ show err
      | Just (Right (ver, fields)) <- hdr = do
            let [len] = toListOf (each . _ContentLength) fields
            let produceBody = fmap consumeWhitespace . view (PBS.splitAt len)
                consumeWhitespace = PBS.dropWhile isEOL
                isEOL c = c == ord8 '\r' || c == ord8 '\n'
                ord8 = fromIntegral . ord
            return $ Free $ Record ver fields $ fmap loop $ produceBody rest

iterRecords :: Monad m => (forall a. Record m a -> m a) -> Warc m -> m (Producer BS.ByteString m ())
iterRecords f (Warc free) = go free
  where
    go (FreeT action) = action >>= \next -> do
        case next of
          Pure a -> return a
          Free record -> do
              rest <- f record
              go rest