{-# LANGUAGE CPP #-}
module Darcs.Util.ByteString
    (
    
      gzReadFilePS
    , mmapFilePS
    , gzWriteFilePS
    , gzWriteFilePSs
    , gzReadStdin
    , gzWriteHandle
    , FileSegment
    , readSegment
    
    , isGZFile
    , gzDecompress
    
    , dropSpace
    , linesPS
    , unlinesPS
    , hashPS
    , breakFirstPS
    , breakLastPS
    , substrPS
    , isFunky
    , fromHex2PS
    , fromPS2Hex
    , betweenLinesPS
    , intercalate
    
    , isAscii
    , decodeLocale
    , encodeLocale
    , unpackPSFromUTF8
    , packStringToUTF8
    
    , prop_unlinesPS_linesPS_left_inverse
    , prop_linesPS_length
    , prop_unlinesPS_length
    , propHexConversion
    , spec_betweenLinesPS
    ) where
import Darcs.Prelude
import qualified Data.ByteString            as B
import qualified Data.ByteString.Char8      as BC
import qualified Data.ByteString.Lazy       as BL
import Data.ByteString (intercalate)
import qualified Data.ByteString.Base16     as B16
import System.IO ( withFile, IOMode(ReadMode)
                 , hSeek, SeekMode(SeekFromEnd,AbsoluteSeek)
                 , openBinaryFile, hClose, Handle, hGetChar
                 , stdin)
import System.IO.Error          ( catchIOError )
import System.IO.Unsafe         ( unsafePerformIO )
import Data.Bits                ( rotateL )
import Data.Char                ( ord )
import Data.Word                ( Word8 )
import Data.Int                 ( Int32, Int64 )
import Data.List                ( intersperse )
import Control.Exception ( throw )
import Control.Monad            ( when )
import Control.Monad.ST.Lazy    ( ST )
import qualified Codec.Compression.GZip as GZ
import qualified Codec.Compression.Zlib.Internal as ZI
import Darcs.Util.Encoding ( decode, encode, decodeUtf8, encodeUtf8 )
import Darcs.Util.Global ( addCRCWarning )
#if mingw32_HOST_OS
#else
import System.IO.MMap( mmapFileByteString )
#endif
import System.Mem( performGC )
import System.Posix.Files( fileSize, getSymbolicLinkStatus )
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 0x20 = True
isSpaceWord8 0x09 = True
isSpaceWord8 0x0A = True
isSpaceWord8 0x0D = True
isSpaceWord8 _    = False
{-# INLINE isSpaceWord8 #-}
dropSpace :: B.ByteString -> B.ByteString
dropSpace bs = B.dropWhile isSpaceWord8 bs
{-# INLINE isFunky #-}
isFunky :: B.ByteString -> Bool
isFunky ps = 0 `B.elem` ps || 26 `B.elem` ps
{-# INLINE hashPS #-}
hashPS :: B.ByteString -> Int32
hashPS = B.foldl' hashByte 0
{-# INLINE hashByte #-}
hashByte :: Int32 -> Word8 -> Int32
hashByte h x = fromIntegral x + rotateL h 8
{-# INLINE substrPS #-}
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS tok str
    | B.null tok = Just 0
    | B.length tok > B.length str = Nothing
    | otherwise = do n <- B.elemIndex (B.head tok) str
                     let ttok = B.tail tok
                         reststr = B.drop (n+1) str
                     if ttok == B.take (B.length ttok) reststr
                        then Just n
                        else ((n+1)+) `fmap` substrPS tok reststr
{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS c p = case BC.elemIndex c p of
                   Nothing -> Nothing
                   Just n -> Just (B.take n p, B.drop (n+1) p)
{-# INLINE breakLastPS #-}
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS c p = case BC.elemIndexEnd c p of
                  Nothing -> Nothing
                  Just n -> Just (B.take n p, B.drop (n+1) p)
{-# INLINE linesPS #-}
linesPS :: B.ByteString -> [B.ByteString]
linesPS ps
     | B.null ps = [B.empty]
     | otherwise = BC.split '\n' ps
{-# INLINE unlinesPS #-}
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS [] = B.empty
unlinesPS x  = B.concat $ intersperse (BC.singleton '\n') x
prop_unlinesPS_linesPS_left_inverse :: B.ByteString -> Bool
prop_unlinesPS_linesPS_left_inverse x = unlinesPS (linesPS x) == x
prop_linesPS_length :: B.ByteString -> Bool
prop_linesPS_length x = length (linesPS x) == length (BC.elemIndices '\n' x) + 1
prop_unlinesPS_length :: [B.ByteString] -> Bool
prop_unlinesPS_length xs =
  B.length (unlinesPS xs) == if null xs then 0 else sum (map B.length xs) + length xs - 1
gzDecompress :: Maybe Int -> BL.ByteString -> ([B.ByteString], Bool)
gzDecompress mbufsize =
    
    
    decompressWarn (ZI.decompressST ZI.gzipFormat decompressParams)
  where
        decompressParams = case mbufsize of
                              Just bufsize -> GZ.defaultDecompressParams { GZ.decompressBufferSize = bufsize }
                              Nothing -> GZ.defaultDecompressParams
        decompressWarn :: (forall s . ZI.DecompressStream (ST s)) -> BL.ByteString -> ([B.ByteString], Bool)
        decompressWarn = ZI.foldDecompressStreamWithInput
                           (\x ~(xs, b) -> (x:xs, b))
                           (\xs -> if BL.null xs
                                      then ([], False)
                                      else error "trailing data at end of compressed stream"
                           )
                           handleBad
        
        
        
        handleBad (ZI.DataFormatError "incorrect data check") = ([], True)
        handleBad e = error (show e)
isGZFile :: FilePath -> IO (Maybe Int)
isGZFile f = do
    h <- openBinaryFile f ReadMode
    header <- B.hGet h 2
    if header /= B.pack [31,139]
       then do hClose h
               return Nothing
       else do hSeek h SeekFromEnd (-4)
               len <- hGetLittleEndInt h
               hClose h
               return (Just len)
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS f = do
    mlen <- isGZFile f
    case mlen of
       Nothing -> mmapFilePS f
       Just len ->
            do 
               
               
               let doDecompress buf = let (res, bad) = gzDecompress (Just len) buf
                                      in do when bad $ addCRCWarning f
                                            return res
               compressed <- (BL.fromChunks . return) `fmap` mmapFilePS f
               B.concat `fmap` doDecompress compressed
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
    b1 <- ord `fmap` hGetChar h
    b2 <- ord `fmap` hGetChar h
    b3 <- ord `fmap` hGetChar h
    b4 <- ord `fmap` hGetChar h
    return $ b1 + 256*b2 + 65536*b3 + 16777216*b4
gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]
gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs f pss  =
    BL.writeFile f $ GZ.compress $ BL.fromChunks pss
gzWriteHandle :: Handle -> [B.ByteString] -> IO ()
gzWriteHandle h pss  =
    BL.hPut h $ GZ.compress $ BL.fromChunks pss
gzReadStdin :: IO B.ByteString
gzReadStdin = do
    header <- B.hGet stdin 2
    rest   <- B.hGetContents stdin
    let allStdin = B.concat [header,rest]
    return $
     if header /= B.pack [31,139]
      then allStdin
      else let decompress = fst . gzDecompress Nothing
               compressed = BL.fromChunks [allStdin]
           in
           B.concat $ decompress compressed
type FileSegment = (FilePath, Maybe (Int64, Int))
readSegment :: FileSegment -> IO BL.ByteString
readSegment (f,range) = do
    bs <- tryToRead
       `catchIOError` (\_ -> do
                     size <- fileSize `fmap` getSymbolicLinkStatus f
                     if size == 0
                        then return B.empty
                        else performGC >> tryToRead)
    return $ BL.fromChunks [bs]
  where
    tryToRead =
        case range of
            Nothing -> B.readFile f
            Just (off, size) -> withFile f ReadMode $ \h -> do
                hSeek h AbsoluteSeek $ fromIntegral off
                B.hGet h size
{-# INLINE readSegment #-}
mmapFilePS :: FilePath -> IO B.ByteString
#if mingw32_HOST_OS
mmapFilePS = B.readFile
#else
mmapFilePS f =
  mmapFileByteString f Nothing
   `catchIOError` (\_ -> do
                     size <- fileSize `fmap` getSymbolicLinkStatus f
                     if size == 0
                        then return B.empty
                        else performGC >> mmapFileByteString f Nothing)
#endif
fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex = B16.encode
fromHex2PS :: B.ByteString -> B.ByteString
fromHex2PS s =
  case B16.decode s of
#if MIN_VERSION_base16_bytestring(1,0,0)
    Right result -> result
    Left msg -> throw $ userError $ "fromHex2PS: input is not hex encoded: "++msg
#else
    (result, rest) | B.null rest -> result
    _ -> throw $ userError $ "fromHex2PS: input is not hex encoded"
#endif
propHexConversion :: B.ByteString -> Bool
propHexConversion x = fromHex2PS (fromPS2Hex x) == x
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
               -> Maybe B.ByteString
betweenLinesPS start end ps =
  case B.breakSubstring start_line ps of
    (before_start, at_start)
      | not (B.null at_start)
      , B.null before_start || BC.last before_start == '\n' ->
          case B.breakSubstring end_line (B.drop (B.length start_line) at_start) of
            (before_end, at_end)
              | not (B.null at_end)
              , B.null before_end || BC.last before_end == '\n' -> Just before_end
              | otherwise -> Nothing
      | otherwise -> Nothing
  where
    start_line = BC.snoc start '\n'
    end_line = BC.snoc end '\n'
spec_betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
                    -> Maybe B.ByteString
spec_betweenLinesPS start end ps =
  case break (start ==) (linesPS ps) of
    (_, _:after_start) ->
      case break (end ==) after_start of
        (before_end, _:_) ->
          Just $ BC.unlines before_end
        _ -> Nothing
    _ -> Nothing
isAscii :: B.ByteString -> Bool
isAscii = B.all (< 128)
unpackPSFromUTF8 :: B.ByteString -> String
unpackPSFromUTF8  = unsafePerformIO . decodeUtf8
packStringToUTF8 :: String -> B.ByteString
packStringToUTF8 = unsafePerformIO . encodeUtf8
decodeLocale :: B.ByteString -> String
decodeLocale = unsafePerformIO . decode
encodeLocale :: String -> B.ByteString
encodeLocale s = unsafePerformIO $ encode s `catchIOError` (\_ -> encodeUtf8 s)