{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Darcs.Util.ByteString
-- Copyright   :  (c) The University of Glasgow 2001,
--                    David Roundy 2003-2005
-- License : GPL (I'm happy to also license this file BSD style but don't
--           want to bother distributing two license files with darcs.
--
-- Maintainer  :  droundy@abridgegame.org
-- Stability   :  experimental
-- Portability :  portable
--
-- GZIp and MMap IO for ByteStrings, encoding utilities, and miscellaneous
-- functions for Data.ByteString
--
module Darcs.Util.ByteString
    (
    -- * IO with mmap or gzip
      gzReadFilePS
    , mmapFilePS
    , gzWriteFilePS
    , gzWriteFilePSs
    , gzReadStdin
    , gzWriteHandle
    , FileSegment
    , readSegment
    -- * gzip handling
    , isGZFile
    , gzDecompress
    -- * list utilities
    , dropSpace
    , linesPS
    , unlinesPS
    , hashPS
    , breakFirstPS
    , breakLastPS
    , substrPS
    , isFunky
    , fromHex2PS
    , fromPS2Hex
    , betweenLinesPS
    , intercalate
    -- * encoding and unicode utilities
    , isAscii
    , decodeLocale
    , encodeLocale
    , unpackPSFromUTF8
    , packStringToUTF8
    -- * properties
    , prop_unlinesPS_linesPS_left_inverse
    , prop_linesPS_length
    , prop_unlinesPS_length
    , propHexConversion
    , spec_betweenLinesPS
    ) where

import Darcs.Prelude

import qualified Data.ByteString            as B
import qualified Data.ByteString.Char8      as BC
import qualified Data.ByteString.Lazy       as BL
import Data.ByteString (intercalate)
import qualified Data.ByteString.Base16     as B16

import System.IO ( withFile, IOMode(ReadMode)
                 , hSeek, SeekMode(SeekFromEnd,AbsoluteSeek)
                 , openBinaryFile, hClose, Handle, hGetChar
                 , stdin)
import System.IO.Error          ( catchIOError )
import System.IO.Unsafe         ( unsafePerformIO )

import Data.Bits                ( rotateL )
import Data.Char                ( ord )
import Data.Word                ( Word8 )
import Data.Int                 ( Int32, Int64 )
import Data.List                ( intersperse )
import Control.Exception ( throw )
import Control.Monad            ( when )
import Control.Monad.ST.Lazy    ( ST )

import qualified Codec.Compression.GZip as GZ
import qualified Codec.Compression.Zlib.Internal as ZI
import Darcs.Util.Encoding ( decode, encode, decodeUtf8, encodeUtf8 )
import Darcs.Util.Global ( addCRCWarning )

#if mingw32_HOST_OS
#else
import System.IO.MMap( mmapFileByteString )
#endif
import System.Mem( performGC )
import System.Posix.Files( fileSize, getSymbolicLinkStatus )

------------------------------------------------------------------------
-- A locale-independent isspace(3) so patches are interpreted the same everywhere.
-- ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 Word8
0x20 = Bool
True
isSpaceWord8 Word8
0x09 = Bool
True
isSpaceWord8 Word8
0x0A = Bool
True
isSpaceWord8 Word8
0x0D = Bool
True
isSpaceWord8 Word8
_    = Bool
False
{-# INLINE isSpaceWord8 #-}

-- | Drop leading white space, where white space is defined as
-- consisting of ' ', '\t', '\n', or '\r'.
dropSpace :: B.ByteString -> B.ByteString
dropSpace :: ByteString -> ByteString
dropSpace ByteString
bs = (Word8 -> Bool) -> ByteString -> ByteString
B.dropWhile Word8 -> Bool
isSpaceWord8 ByteString
bs

------------------------------------------------------------------------

{-# INLINE isFunky #-}
isFunky :: B.ByteString -> Bool
isFunky :: ByteString -> Bool
isFunky ByteString
ps = Word8
0 Word8 -> ByteString -> Bool
`B.elem` ByteString
ps Bool -> Bool -> Bool
|| Word8
26 Word8 -> ByteString -> Bool
`B.elem` ByteString
ps

------------------------------------------------------------------------

{-# INLINE hashPS #-}
hashPS :: B.ByteString -> Int32
hashPS :: ByteString -> Int32
hashPS = (Int32 -> Word8 -> Int32) -> Int32 -> ByteString -> Int32
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl' Int32 -> Word8 -> Int32
hashByte Int32
0

{-# INLINE hashByte #-}
hashByte :: Int32 -> Word8 -> Int32
hashByte :: Int32 -> Word8 -> Int32
hashByte Int32
h Word8
x = Word8 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+ Int32 -> Int -> Int32
forall a. Bits a => a -> Int -> a
rotateL Int32
h Int
8

{-# INLINE substrPS #-}
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS :: ByteString -> ByteString -> Maybe Int
substrPS ByteString
tok ByteString
str
    | ByteString -> Bool
B.null ByteString
tok = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
    | ByteString -> Int
B.length ByteString
tok Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> ByteString -> Int
B.length ByteString
str = Maybe Int
forall a. Maybe a
Nothing
    | Bool
otherwise = do Int
n <- Word8 -> ByteString -> Maybe Int
B.elemIndex (ByteString -> Word8
B.head ByteString
tok) ByteString
str
                     let ttok :: ByteString
ttok = ByteString -> ByteString
B.tail ByteString
tok
                         reststr :: ByteString
reststr = Int -> ByteString -> ByteString
B.drop (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ByteString
str
                     if ByteString
ttok ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> ByteString -> ByteString
B.take (ByteString -> Int
B.length ByteString
ttok) ByteString
reststr
                        then Int -> Maybe Int
forall a. a -> Maybe a
Just Int
n
                        else ((Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)Int -> Int -> Int
forall a. Num a => a -> a -> a
+) (Int -> Int) -> Maybe Int -> Maybe Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ByteString -> ByteString -> Maybe Int
substrPS ByteString
tok ByteString
reststr

------------------------------------------------------------------------

-- TODO: replace breakFirstPS and breakLastPS with definitions based on
-- ByteString's break/breakEnd
{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakFirstPS Char
c ByteString
p = case Char -> ByteString -> Maybe Int
BC.elemIndex Char
c ByteString
p of
                   Maybe Int
Nothing -> Maybe (ByteString, ByteString)
forall a. Maybe a
Nothing
                   Just Int
n -> (ByteString, ByteString) -> Maybe (ByteString, ByteString)
forall a. a -> Maybe a
Just (Int -> ByteString -> ByteString
B.take Int
n ByteString
p, Int -> ByteString -> ByteString
B.drop (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ByteString
p)

{-# INLINE breakLastPS #-}
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakLastPS Char
c ByteString
p = case Char -> ByteString -> Maybe Int
BC.elemIndexEnd Char
c ByteString
p of
                  Maybe Int
Nothing -> Maybe (ByteString, ByteString)
forall a. Maybe a
Nothing
                  Just Int
n -> (ByteString, ByteString) -> Maybe (ByteString, ByteString)
forall a. a -> Maybe a
Just (Int -> ByteString -> ByteString
B.take Int
n ByteString
p, Int -> ByteString -> ByteString
B.drop (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ByteString
p)

------------------------------------------------------------------------
-- linesPS and unlinesPS

{-# INLINE linesPS #-}
linesPS :: B.ByteString -> [B.ByteString]
linesPS :: ByteString -> [ByteString]
linesPS ByteString
ps
     | ByteString -> Bool
B.null ByteString
ps = [ByteString
B.empty]
     | Bool
otherwise = Char -> ByteString -> [ByteString]
BC.split Char
'\n' ByteString
ps

{-# INLINE unlinesPS #-}
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS :: [ByteString] -> ByteString
unlinesPS [] = ByteString
B.empty
unlinesPS [ByteString]
x  = [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
intersperse (Char -> ByteString
BC.singleton Char
'\n') [ByteString]
x

-- properties of linesPS and unlinesPS

prop_unlinesPS_linesPS_left_inverse :: B.ByteString -> Bool
prop_unlinesPS_linesPS_left_inverse :: ByteString -> Bool
prop_unlinesPS_linesPS_left_inverse ByteString
x = [ByteString] -> ByteString
unlinesPS (ByteString -> [ByteString]
linesPS ByteString
x) ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
x

prop_linesPS_length :: B.ByteString -> Bool
prop_linesPS_length :: ByteString -> Bool
prop_linesPS_length ByteString
x = [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (ByteString -> [ByteString]
linesPS ByteString
x) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Char -> ByteString -> [Int]
BC.elemIndices Char
'\n' ByteString
x) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

prop_unlinesPS_length :: [B.ByteString] -> Bool
prop_unlinesPS_length :: [ByteString] -> Bool
prop_unlinesPS_length [ByteString]
xs =
  ByteString -> Int
B.length ([ByteString] -> ByteString
unlinesPS [ByteString]
xs) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
xs then Int
0 else [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((ByteString -> Int) -> [ByteString] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Int
B.length [ByteString]
xs) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
xs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1

-- -----------------------------------------------------------------------------
-- gzReadFilePS

-- |Decompress the given bytestring into a lazy list of chunks, along with a boolean
-- flag indicating (if True) that the CRC was corrupted.
-- Inspecting the flag will cause the entire list of chunks to be evaluated (but if
-- you throw away the list immediately this should run in constant space).
gzDecompress :: Maybe Int -> BL.ByteString -> ([B.ByteString], Bool)
gzDecompress :: Maybe Int -> ByteString -> ([ByteString], Bool)
gzDecompress Maybe Int
mbufsize =
    -- This is what the code would be without the bad CRC recovery logic:
    -- return . BL.toChunks . GZ.decompressWith decompressParams
    (forall s. DecompressStream (ST s))
-> ByteString -> ([ByteString], Bool)
decompressWarn (Format -> DecompressParams -> DecompressStream (ST s)
forall s. Format -> DecompressParams -> DecompressStream (ST s)
ZI.decompressST Format
ZI.gzipFormat DecompressParams
decompressParams)
  where
        decompressParams :: DecompressParams
decompressParams = case Maybe Int
mbufsize of
                              Just Int
bufsize -> DecompressParams
GZ.defaultDecompressParams { decompressBufferSize :: Int
GZ.decompressBufferSize = Int
bufsize }
                              Maybe Int
Nothing -> DecompressParams
GZ.defaultDecompressParams

        decompressWarn :: (forall s . ZI.DecompressStream (ST s)) -> BL.ByteString -> ([B.ByteString], Bool)
        decompressWarn :: (forall s. DecompressStream (ST s))
-> ByteString -> ([ByteString], Bool)
decompressWarn = (ByteString -> ([ByteString], Bool) -> ([ByteString], Bool))
-> (ByteString -> ([ByteString], Bool))
-> (DecompressError -> ([ByteString], Bool))
-> (forall s. DecompressStream (ST s))
-> ByteString
-> ([ByteString], Bool)
forall a.
(ByteString -> a -> a)
-> (ByteString -> a)
-> (DecompressError -> a)
-> (forall s. DecompressStream (ST s))
-> ByteString
-> a
ZI.foldDecompressStreamWithInput
                           (\ByteString
x ~([ByteString]
xs, Bool
b) -> (ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
xs, Bool
b))
                           (\ByteString
xs -> if ByteString -> Bool
BL.null ByteString
xs
                                      then ([], Bool
False)
                                      else [Char] -> ([ByteString], Bool)
forall a. HasCallStack => [Char] -> a
error [Char]
"trailing data at end of compressed stream"
                           )
                           DecompressError -> ([ByteString], Bool)
forall a. DecompressError -> ([a], Bool)
handleBad

        -- For a while a bug in darcs caused gzip files with good data but bad CRCs to be
        -- produced. Trap bad CRC messages, run the specified action to report that it happened,
        -- but continue on the assumption that the data is valid.
        handleBad :: DecompressError -> ([a], Bool)
handleBad (ZI.DataFormatError [Char]
"incorrect data check") = ([], Bool
True)
        handleBad DecompressError
e = [Char] -> ([a], Bool)
forall a. HasCallStack => [Char] -> a
error (DecompressError -> [Char]
forall a. Show a => a -> [Char]
show DecompressError
e)

isGZFile :: FilePath -> IO (Maybe Int)
isGZFile :: [Char] -> IO (Maybe Int)
isGZFile [Char]
f = do
    Handle
h <- [Char] -> IOMode -> IO Handle
openBinaryFile [Char]
f IOMode
ReadMode
    ByteString
header <- Handle -> Int -> IO ByteString
B.hGet Handle
h Int
2
    if ByteString
header ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Word8] -> ByteString
B.pack [Word8
31,Word8
139]
       then do Handle -> IO ()
hClose Handle
h
               Maybe Int -> IO (Maybe Int)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Int
forall a. Maybe a
Nothing
       else do Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
h SeekMode
SeekFromEnd (-Integer
4)
               Int
len <- Handle -> IO Int
hGetLittleEndInt Handle
h
               Handle -> IO ()
hClose Handle
h
               Maybe Int -> IO (Maybe Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
len)

-- | Read an entire file, which may or may not be gzip compressed, directly
-- into a 'B.ByteString'.
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS :: [Char] -> IO ByteString
gzReadFilePS [Char]
f = do
    Maybe Int
mlen <- [Char] -> IO (Maybe Int)
isGZFile [Char]
f
    case Maybe Int
mlen of
       Maybe Int
Nothing -> [Char] -> IO ByteString
mmapFilePS [Char]
f
       Just Int
len ->
            do -- Passing the length to gzDecompress means that it produces produces one chunk,
               -- which in turn means that B.concat won't need to copy data.
               -- If the length is wrong this will just affect efficiency, not correctness
               let doDecompress :: ByteString -> IO [ByteString]
doDecompress ByteString
buf = let ([ByteString]
res, Bool
bad) = Maybe Int -> ByteString -> ([ByteString], Bool)
gzDecompress (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
len) ByteString
buf
                                      in do Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
bad (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> IO ()
addCRCWarning [Char]
f
                                            [ByteString] -> IO [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return [ByteString]
res
               ByteString
compressed <- ([ByteString] -> ByteString
BL.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return) (ByteString -> ByteString) -> IO ByteString -> IO ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [Char] -> IO ByteString
mmapFilePS [Char]
f
               [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> IO [ByteString] -> IO ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ByteString -> IO [ByteString]
doDecompress ByteString
compressed

hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt Handle
h = do
    Int
b1 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int
b2 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int
b3 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int
b4 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int -> IO Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> IO Int) -> Int -> IO Int
forall a b. (a -> b) -> a -> b
$ Int
b1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
256Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
65536Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b3 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
16777216Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b4

gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS :: [Char] -> ByteString -> IO ()
gzWriteFilePS [Char]
f ByteString
ps = [Char] -> [ByteString] -> IO ()
gzWriteFilePSs [Char]
f [ByteString
ps]

gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs :: [Char] -> [ByteString] -> IO ()
gzWriteFilePSs [Char]
f [ByteString]
pss  =
    [Char] -> ByteString -> IO ()
BL.writeFile [Char]
f (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
GZ.compress (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.fromChunks [ByteString]
pss

gzWriteHandle :: Handle -> [B.ByteString] -> IO ()
gzWriteHandle :: Handle -> [ByteString] -> IO ()
gzWriteHandle Handle
h [ByteString]
pss  =
    Handle -> ByteString -> IO ()
BL.hPut Handle
h (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
GZ.compress (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.fromChunks [ByteString]
pss

-- | Read standard input, which may or may not be gzip compressed, directly
-- into a 'B.ByteString'.
gzReadStdin :: IO B.ByteString
gzReadStdin :: IO ByteString
gzReadStdin = do
    ByteString
header <- Handle -> Int -> IO ByteString
B.hGet Handle
stdin Int
2
    ByteString
rest   <- Handle -> IO ByteString
B.hGetContents Handle
stdin
    let allStdin :: ByteString
allStdin = [ByteString] -> ByteString
B.concat [ByteString
header,ByteString
rest]
    ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$
     if ByteString
header ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Word8] -> ByteString
B.pack [Word8
31,Word8
139]
      then ByteString
allStdin
      else let decompress :: ByteString -> [ByteString]
decompress = ([ByteString], Bool) -> [ByteString]
forall a b. (a, b) -> a
fst (([ByteString], Bool) -> [ByteString])
-> (ByteString -> ([ByteString], Bool))
-> ByteString
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe Int -> ByteString -> ([ByteString], Bool)
gzDecompress Maybe Int
forall a. Maybe a
Nothing
               compressed :: ByteString
compressed = [ByteString] -> ByteString
BL.fromChunks [ByteString
allStdin]
           in
           [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
decompress ByteString
compressed

-- | Pointer to a filesystem, possibly with start/end offsets. Supposed to be
-- fed to (uncurry mmapFileByteString) or similar.
type FileSegment = (FilePath, Maybe (Int64, Int))

-- | Read in a FileSegment into a Lazy ByteString. Implemented using mmap.
readSegment :: FileSegment -> IO BL.ByteString
readSegment :: FileSegment -> IO ByteString
readSegment ([Char]
f,Maybe (Int64, Int)
range) = do
    ByteString
bs <- IO ByteString
tryToRead
       IO ByteString -> (IOError -> IO ByteString) -> IO ByteString
forall a. IO a -> (IOError -> IO a) -> IO a
`catchIOError` (\IOError
_ -> do
                     FileOffset
size <- FileStatus -> FileOffset
fileSize (FileStatus -> FileOffset) -> IO FileStatus -> IO FileOffset
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [Char] -> IO FileStatus
getSymbolicLinkStatus [Char]
f
                     if FileOffset
size FileOffset -> FileOffset -> Bool
forall a. Eq a => a -> a -> Bool
== FileOffset
0
                        then ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
B.empty
                        else IO ()
performGC IO () -> IO ByteString -> IO ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> IO ByteString
tryToRead)
    ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.fromChunks [ByteString
bs]
  where
    tryToRead :: IO ByteString
tryToRead =
        case Maybe (Int64, Int)
range of
            Maybe (Int64, Int)
Nothing -> [Char] -> IO ByteString
B.readFile [Char]
f
            Just (Int64
off, Int
size) -> [Char] -> IOMode -> (Handle -> IO ByteString) -> IO ByteString
forall r. [Char] -> IOMode -> (Handle -> IO r) -> IO r
withFile [Char]
f IOMode
ReadMode ((Handle -> IO ByteString) -> IO ByteString)
-> (Handle -> IO ByteString) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
                Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
h SeekMode
AbsoluteSeek (Integer -> IO ()) -> Integer -> IO ()
forall a b. (a -> b) -> a -> b
$ Int64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
off
                Handle -> Int -> IO ByteString
B.hGet Handle
h Int
size
{-# INLINE readSegment #-}

-- -----------------------------------------------------------------------------
-- mmapFilePS

-- | Like readFilePS, this reads an entire file directly into a
-- 'B.ByteString', but it is even more efficient.  It involves directly
-- mapping the file to memory.  This has the advantage that the contents of
-- the file never need to be copied.  Also, under memory pressure the page
-- may simply be discarded, wile in the case of readFilePS it would need to
-- be written to swap.  If you read many small files, mmapFilePS will be
-- less memory-efficient than readFilePS, since each mmapFilePS takes up a
-- separate page of memory.  Also, you can run into bus errors if the file
-- is modified.

mmapFilePS :: FilePath -> IO B.ByteString
#if mingw32_HOST_OS
mmapFilePS = B.readFile
#else
mmapFilePS :: [Char] -> IO ByteString
mmapFilePS [Char]
f =
  [Char] -> Maybe (Int64, Int) -> IO ByteString
mmapFileByteString [Char]
f Maybe (Int64, Int)
forall a. Maybe a
Nothing
   IO ByteString -> (IOError -> IO ByteString) -> IO ByteString
forall a. IO a -> (IOError -> IO a) -> IO a
`catchIOError` (\IOError
_ -> do
                     FileOffset
size <- FileStatus -> FileOffset
fileSize (FileStatus -> FileOffset) -> IO FileStatus -> IO FileOffset
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [Char] -> IO FileStatus
getSymbolicLinkStatus [Char]
f
                     if FileOffset
size FileOffset -> FileOffset -> Bool
forall a. Eq a => a -> a -> Bool
== FileOffset
0
                        then ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
B.empty
                        else IO ()
performGC IO () -> IO ByteString -> IO ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Char] -> Maybe (Int64, Int) -> IO ByteString
mmapFileByteString [Char]
f Maybe (Int64, Int)
forall a. Maybe a
Nothing)
#endif

-- -------------------------------------------------------------------------
-- fromPS2Hex

fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex :: ByteString -> ByteString
fromPS2Hex = ByteString -> ByteString
B16.encode

-- -------------------------------------------------------------------------
-- fromHex2PS

fromHex2PS :: B.ByteString -> B.ByteString
fromHex2PS :: ByteString -> ByteString
fromHex2PS ByteString
s =
  case ByteString -> Either [Char] ByteString
B16.decode ByteString
s of
#if MIN_VERSION_base16_bytestring(1,0,0)
    Right ByteString
result -> ByteString
result
    Left [Char]
msg -> IOError -> ByteString
forall a e. Exception e => e -> a
throw (IOError -> ByteString) -> IOError -> ByteString
forall a b. (a -> b) -> a -> b
$ [Char] -> IOError
userError ([Char] -> IOError) -> [Char] -> IOError
forall a b. (a -> b) -> a -> b
$ [Char]
"fromHex2PS: input is not hex encoded: "[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
msg
#else
    (result, rest) | B.null rest -> result
    _ -> throw $ userError $ "fromHex2PS: input is not hex encoded"
#endif

propHexConversion :: B.ByteString -> Bool
propHexConversion :: ByteString -> Bool
propHexConversion ByteString
x = ByteString -> ByteString
fromHex2PS (ByteString -> ByteString
fromPS2Hex ByteString
x) ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
x

-- -------------------------------------------------------------------------
-- betweenLinesPS

-- | Return the B.ByteString between the two lines given,
-- or Nothing if they do not appear.
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
               -> Maybe B.ByteString
betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString
betweenLinesPS ByteString
start ByteString
end ByteString
ps =
  case ByteString -> ByteString -> (ByteString, ByteString)
B.breakSubstring ByteString
start_line ByteString
ps of
    (ByteString
before_start, ByteString
at_start)
      | Bool -> Bool
not (ByteString -> Bool
B.null ByteString
at_start)
      , ByteString -> Bool
B.null ByteString
before_start Bool -> Bool -> Bool
|| ByteString -> Char
BC.last ByteString
before_start Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' ->
          case ByteString -> ByteString -> (ByteString, ByteString)
B.breakSubstring ByteString
end_line (Int -> ByteString -> ByteString
B.drop (ByteString -> Int
B.length ByteString
start_line) ByteString
at_start) of
            (ByteString
before_end, ByteString
at_end)
              | Bool -> Bool
not (ByteString -> Bool
B.null ByteString
at_end)
              , ByteString -> Bool
B.null ByteString
before_end Bool -> Bool -> Bool
|| ByteString -> Char
BC.last ByteString
before_end Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' -> ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
before_end
              | Bool
otherwise -> Maybe ByteString
forall a. Maybe a
Nothing
      | Bool
otherwise -> Maybe ByteString
forall a. Maybe a
Nothing
  where
    start_line :: ByteString
start_line = ByteString -> Char -> ByteString
BC.snoc ByteString
start Char
'\n'
    end_line :: ByteString
end_line = ByteString -> Char -> ByteString
BC.snoc ByteString
end Char
'\n'

-- | Simpler but less efficient variant of 'betweenLinesPS'.
spec_betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
                    -> Maybe B.ByteString
spec_betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString
spec_betweenLinesPS ByteString
start ByteString
end ByteString
ps =
  case (ByteString -> Bool)
-> [ByteString] -> ([ByteString], [ByteString])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (ByteString
start ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==) (ByteString -> [ByteString]
linesPS ByteString
ps) of
    ([ByteString]
_, ByteString
_:[ByteString]
after_start) ->
      case (ByteString -> Bool)
-> [ByteString] -> ([ByteString], [ByteString])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (ByteString
end ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==) [ByteString]
after_start of
        ([ByteString]
before_end, ByteString
_:[ByteString]
_) ->
          ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BC.unlines [ByteString]
before_end
        ([ByteString], [ByteString])
_ -> Maybe ByteString
forall a. Maybe a
Nothing
    ([ByteString], [ByteString])
_ -> Maybe ByteString
forall a. Maybe a
Nothing

-- | Test if a ByteString is made of ascii characters
isAscii :: B.ByteString -> Bool
isAscii :: ByteString -> Bool
isAscii = (Word8 -> Bool) -> ByteString -> Bool
B.all (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128)

-- * Encoding functions

-- Use of 'unsafePerformIO' is ratified by the fact that these
-- really are pure functions.

-- | Decode a 'ByteString' containing UTF-8 to a 'String'. Decoding errors
-- are flagged with the U+FFFD character.
unpackPSFromUTF8 :: B.ByteString -> String
unpackPSFromUTF8 :: ByteString -> [Char]
unpackPSFromUTF8  = IO [Char] -> [Char]
forall a. IO a -> a
unsafePerformIO (IO [Char] -> [Char])
-> (ByteString -> IO [Char]) -> ByteString -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> IO [Char]
decodeUtf8

-- | Encode a 'String' to a 'ByteString' using UTF-8.
packStringToUTF8 :: String -> B.ByteString
packStringToUTF8 :: [Char] -> ByteString
packStringToUTF8 = IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString)
-> ([Char] -> IO ByteString) -> [Char] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO ByteString
encodeUtf8

-- | Decode a 'ByteString' to a 'String' according to the current locale,
-- using lone surrogates for un-decodable bytes.
decodeLocale :: B.ByteString -> String
decodeLocale :: ByteString -> [Char]
decodeLocale = IO [Char] -> [Char]
forall a. IO a -> a
unsafePerformIO (IO [Char] -> [Char])
-> (ByteString -> IO [Char]) -> ByteString -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> IO [Char]
decode

-- | Encode a 'String' to a 'ByteString' according to the current locale,
-- converting lone surrogates back to the original byte. If that
-- fails (because the locale does not support the full unicode range)
-- then encode using utf-8, assuming that the un-ecodable characters
-- come from patch meta data.
--
-- See also 'Darcs.UI.Commands.setEnvCautiously'.
encodeLocale :: String -> B.ByteString
encodeLocale :: [Char] -> ByteString
encodeLocale [Char]
s = IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString) -> IO ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ [Char] -> IO ByteString
encode [Char]
s IO ByteString -> (IOError -> IO ByteString) -> IO ByteString
forall a. IO a -> (IOError -> IO a) -> IO a
`catchIOError` (\IOError
_ -> [Char] -> IO ByteString
encodeUtf8 [Char]
s)