{-# LANGUAGE BangPatterns #-}

module Digest (readDict, extract)
  where

import MD5
import Config

import Prelude
import Data.Label
import Control.Monad
import Control.Applicative
import Data.ByteString.Internal                 ( w2c )
import qualified Data.Serialize                 as S
import qualified Data.ByteString                as S
import qualified Data.ByteString.Lazy.Char8     as L

import Data.Array.Accelerate.Array.Data         as A
import Data.Array.Accelerate.Array.Sugar        as A
import qualified Data.Array.Accelerate          as A


-- MD5 block sizes
--
blockSizeBits, blockSizeBytes, blockSizeWords :: Int
blockSizeBits  = 512
blockSizeBytes = blockSizeBits `div` 8
blockSizeWords = blockSizeBytes `div` 4


-- Create an MD5 block from the given message. This appends The '1' bit to the
-- message, pads the block with zeros until the length in bits is 448, and
-- finally appends the length in bits (mod 2^64).
--
md5block :: L.ByteString -> S.ByteString
md5block msg = do
  let
      len               = fromIntegral (L.length msg)
      lenBits           = 8 * fromIntegral len
      lenZeroPad
        | len + 1 <= blockSizeBytes - 8 = (blockSizeBytes - 8) - (len + 1)
        | otherwise                     = (2 * blockSizeBytes - 8) - (len + 1)
  --
  S.runPut $! do
    S.putLazyByteString msg
    S.putWord8 0x80
    mapM_ S.putWord8 (replicate lenZeroPad 0)
    S.putWord64le lenBits


-- Create a dictionary of blocks ready to digest from the given bytestring. This
-- reads one entry per line. Because we only do a single MD5 chunk, we discard
-- any entries with (length > blockSizeBytes - 8 = 55) characters. Because of
-- this restriction, we also need to a precise count of how many entries will be
-- in the final array, since it is stored column-major and hence there is no
-- easy way to truncate it.
--
readDict :: Config -> FilePath -> IO Dictionary
readDict c fp = do
  entries <- length       . chunk <$> L.readFile fp
  blocks  <- map md5block . chunk <$> L.readFile fp

  let sh        = Z :. blockSizeWords :. entries
      (adata,_) = runArrayData $ do
        arr <- newArrayData (size sh)

        let go !_ []     = return ()
            go !n (b:bs) = do
              foldM_ (\i w -> do unsafeWriteArrayData arr (toIndex sh (Z:.i:.n)) (fromElt w)
                                 return (i+1)) 0 (bytes b)
              go (n+1) bs

        go 0 blocks
        return (arr, undefined)

  adata `seq` return $ Array (fromElt sh) adata

  where
    chunk = maybe id take (get configMaxWords c)
          . drop (get configSkipWords c)
          . filter (\w -> fromIntegral (L.length w) < blockSizeBytes - 8)
          . L.lines

    bytes = either error id
          . S.runGet (replicateM blockSizeWords S.getWord32le)

-- Extract a word from the dictionary at a given index
--
extract :: Dictionary -> Int -> L.ByteString
extract dict i
  | i > n       = error "extract: index too large"
  | otherwise   = L.takeWhile (/= w2c 0x80) bytes
  where
    Z :. _ :. n = A.arrayShape dict
    bytes       = S.runPutLazy $
      forM_ [0 .. blockSizeWords-1] $ \c -> S.putWord32le (dict `A.indexArray` (Z:.c:.i))