{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes        #-}

module HaskellWorks.Data.Xml.Conduit
  ( blankedXmlToInterestBits
  , byteStringToBits
  , blankedXmlToBalancedParens
  , blankedXmlToBalancedParens2
  , compressWordAsBit
  , interestingWord8s
  , isInterestingWord8
  ) where

import Control.Monad
import Data.Array.Unboxed             as A
import Data.ByteString                as BS
import Data.Conduit
import Data.Word
import Data.Word8
import HaskellWorks.Data.Bits.BitWise
import Prelude                        as P

import qualified Data.Bits as BITS

interestingWord8s :: A.UArray Word8 Word8
interestingWord8s = A.array (0, 255) [
  (w, if w == _bracketleft
         || w == _braceleft
         || w == _parenleft
         || w == _bracketleft
         || w == _less
         || w == _a || w == _v || w == _t
    then 1
    else 0)
  | w <- [0 .. 255]]
{-# NOINLINE interestingWord8s #-}

isInterestingWord8 :: Word8 -> Word8
isInterestingWord8 b = interestingWord8s ! b
{-# INLINABLE isInterestingWord8 #-}

blankedXmlToInterestBits :: Monad m => Conduit BS.ByteString m BS.ByteString
blankedXmlToInterestBits = blankedXmlToInterestBits' ""

blankedXmlToInterestBits' :: Monad m => BS.ByteString -> Conduit BS.ByteString m BS.ByteString
blankedXmlToInterestBits' rs = do
  mbs <- await
  case mbs of
    Just bs -> do
      let cs = if BS.length rs /= 0 then BS.concat [rs, bs] else bs
      let lencs = BS.length cs
      let q = lencs `quot` 8
      let (ds, es) = BS.splitAt (q * 8) cs
      let (fs, _) = BS.unfoldrN q gen ds
      yield fs
      blankedXmlToInterestBits' es
    Nothing -> do
      let lenrs = BS.length rs
      let q = lenrs + 7 `quot` 8
      yield (fst (BS.unfoldrN q gen rs))
  where gen :: ByteString -> Maybe (Word8, ByteString)
        gen as = if BS.length as == 0
          then Nothing
          else Just ( BS.foldr' (\b m -> (interestingWord8s ! b) .|. (m .<. 1)) 0 (BS.take 8 as)
                    , BS.drop 8 as
                    )

blankedXmlToBalancedParens :: Monad m => Conduit BS.ByteString m Bool
blankedXmlToBalancedParens = do
  mbs <- await
  case mbs of
    Just bs -> blankedXmlToBalancedParens' bs
    Nothing -> return ()

blankedXmlToBalancedParens' :: Monad m => BS.ByteString -> Conduit BS.ByteString m Bool
blankedXmlToBalancedParens' bs = case BS.uncons bs of
  Just (c, cs) -> do
    case c of
      d | d == _less          -> yield True
      d | d == _greater       -> yield False
      d | d == _bracketleft   -> yield True
      d | d == _bracketright  -> yield False
      d | d == _parenleft     -> yield True
      d | d == _parenright    -> yield False
      d | d == _a             -> yield True >> yield False
      d | d == _v             -> yield True >> yield False
      d | d == _t             -> yield True >> yield False
      _                       -> return ()
    blankedXmlToBalancedParens' cs
  Nothing -> return ()

repartitionMod8 :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString)
repartitionMod8 aBS bBS = (BS.take cLen abBS, BS.drop cLen abBS)
  where abBS = BS.concat [aBS, bBS]
        abLen = BS.length abBS
        cLen = (abLen `div` 8) * 8

compressWordAsBit :: Monad m => Conduit BS.ByteString m BS.ByteString
compressWordAsBit = compressWordAsBit' BS.empty

compressWordAsBit' :: Monad m => BS.ByteString -> Conduit BS.ByteString m BS.ByteString
compressWordAsBit' aBS = do
  mbBS <- await
  case mbBS of
    Just bBS -> do
      let (cBS, dBS) = repartitionMod8 aBS bBS
      let (cs, _) = BS.unfoldrN (BS.length cBS + 7 `div` 8) gen cBS
      yield cs
      compressWordAsBit' dBS
    Nothing -> do
      let (cs, _) = BS.unfoldrN (BS.length aBS + 7 `div` 8) gen aBS
      yield cs
  where gen :: ByteString -> Maybe (Word8, ByteString)
        gen xs = if BS.length xs == 0
          then Nothing
          else Just ( BS.foldr' (\b m -> ((b .&. 1) .|. (m .<. 1))) 0 (BS.take 8 xs)
                    , BS.drop 8 xs
                    )

blankedXmlToBalancedParens2 :: Monad m => Conduit BS.ByteString m BS.ByteString
blankedXmlToBalancedParens2 = do
  mbs <- await
  case mbs of
    Just bs -> do
      let (cs, _) = BS.unfoldrN (BS.length bs * 2) gen (Nothing, bs)
      yield cs
      blankedXmlToBalancedParens2
    Nothing -> return ()
  where gen :: (Maybe Bool, ByteString) -> Maybe (Word8, (Maybe Bool, ByteString))
        gen (Just True  , bs) = Just (0xFF, (Nothing, bs))
        gen (Just False , bs) = Just (0x00, (Nothing, bs))
        gen (Nothing    , bs) = case BS.uncons bs of
          Just (c, cs) -> case balancedParensOf c of
            MiniN  -> gen         (Nothing    , cs)
            MiniT  -> Just (0xFF, (Nothing    , cs))
            MiniF  -> Just (0x00, (Nothing    , cs))
            MiniTF -> Just (0xFF, (Just False , cs))
          Nothing   -> Nothing

data MiniBP = MiniN | MiniT | MiniF | MiniTF

balancedParensOf :: Word8 -> MiniBP
balancedParensOf c = case c of
    d | d == _less          -> MiniT
    d | d == _greater       -> MiniF
    d | d == _bracketleft   -> MiniT
    d | d == _bracketright  -> MiniF
    d | d == _parenleft     -> MiniT
    d | d == _parenright    -> MiniF
    d | d == _t             -> MiniTF
    d | d == _a             -> MiniTF
    d | d == _v             -> MiniTF
    _                       -> MiniN

yieldBitsOfWord8 :: Monad m => Word8 -> Conduit BS.ByteString m Bool
yieldBitsOfWord8 w = do
  yield ((w .&. BITS.bit 0) /= 0)
  yield ((w .&. BITS.bit 1) /= 0)
  yield ((w .&. BITS.bit 2) /= 0)
  yield ((w .&. BITS.bit 3) /= 0)
  yield ((w .&. BITS.bit 4) /= 0)
  yield ((w .&. BITS.bit 5) /= 0)
  yield ((w .&. BITS.bit 6) /= 0)
  yield ((w .&. BITS.bit 7) /= 0)

yieldBitsofWord8s :: Monad m => [Word8] -> Conduit BS.ByteString m Bool
yieldBitsofWord8s = P.foldr ((>>) . yieldBitsOfWord8) (return ())

byteStringToBits :: Monad m => Conduit BS.ByteString m Bool
byteStringToBits = do
  mbs <- await
  case mbs of
    Just bs -> yieldBitsofWord8s (BS.unpack bs) >> byteStringToBits
    Nothing -> return ()