{-# LANGUAGE CPP #-}
module Darcs.Util.ByteString
(
gzReadFilePS
, mmapFilePS
, gzWriteFilePS
, gzWriteFilePSs
, gzReadStdin
, gzWriteHandle
, FileSegment
, readSegment
, isGZFile
, gzDecompress
, dropSpace
, breakSpace
, linesPS
, unlinesPS
, hashPS
, breakFirstPS
, breakLastPS
, substrPS
, readIntPS
, isFunky
, fromHex2PS
, fromPS2Hex
, betweenLinesPS
, intercalate
, isAscii
, decodeLocale
, encodeLocale
, unpackPSFromUTF8
, packStringToUTF8
, prop_unlinesPS_linesPS_left_inverse
, prop_linesPS_length
, prop_unlinesPS_length
, propHexConversion
, spec_betweenLinesPS
) where
import Prelude ()
import Darcs.Prelude
import Codec.Binary.Base16 ( b16Enc, b16Dec )
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as BL
import Data.ByteString (intercalate)
import System.IO ( withFile, IOMode(ReadMode)
, hSeek, SeekMode(SeekFromEnd,AbsoluteSeek)
, openBinaryFile, hClose, Handle, hGetChar
, stdin)
import System.IO.Error ( catchIOError )
import System.IO.Unsafe ( unsafePerformIO )
import Data.Bits ( rotateL )
import Data.Char ( ord, isSpace, toLower, toUpper )
import Data.Word ( Word8 )
import Data.Int ( Int32, Int64 )
import Data.List ( intersperse )
import Control.Monad ( when )
import Control.Monad.ST.Lazy ( ST )
import qualified Codec.Compression.GZip as GZ
import qualified Codec.Compression.Zlib.Internal as ZI
import Darcs.Util.Encoding ( decode, encode, decodeUtf8, encodeUtf8 )
import Darcs.Util.Global ( addCRCWarning )
#if mingw32_HOST_OS
#else
import System.IO.MMap( mmapFileByteString )
#endif
import System.Mem( performGC )
import System.Posix.Files( fileSize, getSymbolicLinkStatus )
readIntPS :: B.ByteString -> Maybe (Int, B.ByteString)
readIntPS = BC.readInt . BC.dropWhile isSpace
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 = (`elem` [0x20, 0x09, 0x0A, 0x0D])
{-# INLINE isSpaceWord8 #-}
dropSpace :: B.ByteString -> B.ByteString
dropSpace bs = B.dropWhile isSpaceWord8 bs
breakSpace :: B.ByteString -> (B.ByteString, B.ByteString)
breakSpace bs = B.break isSpaceWord8 bs
{-# INLINE isFunky #-}
isFunky :: B.ByteString -> Bool
isFunky ps = 0 `B.elem` ps || 26 `B.elem` ps
{-# INLINE hashPS #-}
hashPS :: B.ByteString -> Int32
hashPS = B.foldl' hashByte 0
{-# INLINE hashByte #-}
hashByte :: Int32 -> Word8 -> Int32
hashByte h x = fromIntegral x + rotateL h 8
{-# INLINE substrPS #-}
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS tok str
| B.null tok = Just 0
| B.length tok > B.length str = Nothing
| otherwise = do n <- B.elemIndex (B.head tok) str
let ttok = B.tail tok
reststr = B.drop (n+1) str
if ttok == B.take (B.length ttok) reststr
then Just n
else ((n+1)+) `fmap` substrPS tok reststr
{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS c p = case BC.elemIndex c p of
Nothing -> Nothing
Just n -> Just (B.take n p, B.drop (n+1) p)
{-# INLINE breakLastPS #-}
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS c p = case BC.elemIndexEnd c p of
Nothing -> Nothing
Just n -> Just (B.take n p, B.drop (n+1) p)
{-# INLINE linesPS #-}
linesPS :: B.ByteString -> [B.ByteString]
linesPS ps
| B.null ps = [B.empty]
| otherwise = BC.split '\n' ps
{-# INLINE unlinesPS #-}
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS [] = B.empty
unlinesPS x = B.concat $ intersperse (BC.singleton '\n') x
prop_unlinesPS_linesPS_left_inverse :: B.ByteString -> Bool
prop_unlinesPS_linesPS_left_inverse x = unlinesPS (linesPS x) == x
prop_linesPS_length :: B.ByteString -> Bool
prop_linesPS_length x = length (linesPS x) == length (BC.elemIndices '\n' x) + 1
prop_unlinesPS_length :: [B.ByteString] -> Bool
prop_unlinesPS_length xs =
B.length (unlinesPS xs) == if null xs then 0 else sum (map B.length xs) + length xs - 1
gzDecompress :: Maybe Int -> BL.ByteString -> ([B.ByteString], Bool)
gzDecompress mbufsize =
decompressWarn (ZI.decompressST ZI.gzipFormat decompressParams)
where
decompressParams = case mbufsize of
Just bufsize -> GZ.defaultDecompressParams { GZ.decompressBufferSize = bufsize }
Nothing -> GZ.defaultDecompressParams
decompressWarn :: (forall s . ZI.DecompressStream (ST s)) -> BL.ByteString -> ([B.ByteString], Bool)
decompressWarn = ZI.foldDecompressStreamWithInput
(\x ~(xs, b) -> (x:xs, b))
(\xs -> if BL.null xs
then ([], False)
else error "trailing data at end of compressed stream"
)
handleBad
handleBad (ZI.DataFormatError "incorrect data check") = ([], True)
handleBad e = error (show e)
isGZFile :: FilePath -> IO (Maybe Int)
isGZFile f = do
h <- openBinaryFile f ReadMode
header <- B.hGet h 2
if header /= B.pack [31,139]
then do hClose h
return Nothing
else do hSeek h SeekFromEnd (-4)
len <- hGetLittleEndInt h
hClose h
return (Just len)
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS f = do
mlen <- isGZFile f
case mlen of
Nothing -> mmapFilePS f
Just len ->
do
let doDecompress buf = let (res, bad) = gzDecompress (Just len) buf
in do when bad $ addCRCWarning f
return res
compressed <- (BL.fromChunks . return) `fmap` mmapFilePS f
B.concat `fmap` doDecompress compressed
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
b1 <- ord `fmap` hGetChar h
b2 <- ord `fmap` hGetChar h
b3 <- ord `fmap` hGetChar h
b4 <- ord `fmap` hGetChar h
return $ b1 + 256*b2 + 65536*b3 + 16777216*b4
gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]
gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs f pss =
BL.writeFile f $ GZ.compress $ BL.fromChunks pss
gzWriteHandle :: Handle -> [B.ByteString] -> IO ()
gzWriteHandle h pss =
BL.hPut h $ GZ.compress $ BL.fromChunks pss
gzReadStdin :: IO B.ByteString
gzReadStdin = do
header <- B.hGet stdin 2
rest <- B.hGetContents stdin
let allStdin = B.concat [header,rest]
return $
if header /= B.pack [31,139]
then allStdin
else let decompress = fst . gzDecompress Nothing
compressed = BL.fromChunks [allStdin]
in
B.concat $ decompress compressed
type FileSegment = (FilePath, Maybe (Int64, Int))
readSegment :: FileSegment -> IO BL.ByteString
readSegment (f,range) = do
bs <- tryToRead
`catchIOError` (\_ -> do
size <- fileSize `fmap` getSymbolicLinkStatus f
if size == 0
then return B.empty
else performGC >> tryToRead)
return $ BL.fromChunks [bs]
where
tryToRead =
case range of
Nothing -> B.readFile f
Just (off, size) -> withFile f ReadMode $ \h -> do
hSeek h AbsoluteSeek $ fromIntegral off
B.hGet h size
{-# INLINE readSegment #-}
mmapFilePS :: FilePath -> IO B.ByteString
#if mingw32_HOST_OS
mmapFilePS = B.readFile
#else
mmapFilePS f =
mmapFileByteString f Nothing
`catchIOError` (\_ -> do
size <- fileSize `fmap` getSymbolicLinkStatus f
if size == 0
then return B.empty
else performGC >> mmapFileByteString f Nothing)
#endif
fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex = BC.map toLower . b16Enc
fromHex2PS :: B.ByteString -> B.ByteString
fromHex2PS s =
case b16Dec $ BC.map toUpper s of
Right (result, remaining)
| B.null remaining -> result
_ -> error "fromHex2PS: input is not hex encoded"
propHexConversion :: B.ByteString -> Bool
propHexConversion x = fromHex2PS (fromPS2Hex x) == x
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
-> Maybe B.ByteString
betweenLinesPS start end ps =
case B.breakSubstring start_line ps of
(before_start, at_start)
| not (B.null at_start)
, B.null before_start || BC.last before_start == '\n' ->
case B.breakSubstring end_line (B.drop (B.length start_line) at_start) of
(before_end, at_end)
| not (B.null at_end)
, B.null before_end || BC.last before_end == '\n' -> Just before_end
| otherwise -> Nothing
| otherwise -> Nothing
where
start_line = BC.snoc start '\n'
end_line = BC.snoc end '\n'
spec_betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
-> Maybe B.ByteString
spec_betweenLinesPS start end ps =
case break (start ==) (linesPS ps) of
(_, _:after_start) ->
case break (end ==) after_start of
(before_end, _:_) ->
Just $ BC.unlines before_end
_ -> Nothing
_ -> Nothing
isAscii :: B.ByteString -> Bool
isAscii = B.all (< 128)
unpackPSFromUTF8 :: B.ByteString -> String
unpackPSFromUTF8 = unsafePerformIO . decodeUtf8
packStringToUTF8 :: String -> B.ByteString
packStringToUTF8 = unsafePerformIO . encodeUtf8
decodeLocale :: B.ByteString -> String
decodeLocale = unsafePerformIO . decode
encodeLocale :: String -> B.ByteString
encodeLocale s = unsafePerformIO $ encode s `catchIOError` (\_ -> encodeUtf8 s)