{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Darcs.Util.ByteString
-- Copyright   :  (c) The University of Glasgow 2001,
--                    David Roundy 2003-2005
-- License : GPL (I'm happy to also license this file BSD style but don't
--           want to bother distributing two license files with darcs.
--
-- Maintainer  :  droundy@abridgegame.org
-- Stability   :  experimental
-- Portability :  portable
--
-- GZIp and MMap IO for ByteStrings, encoding utilities, and miscellaneous
-- functions for Data.ByteString
--
module Darcs.Util.ByteString
    (
    -- * IO with mmap or gzip
      gzReadFilePS
    , mmapFilePS
    , gzWriteFilePS
    , gzWriteFilePSs
    , gzReadStdin
    , gzWriteHandle
    , FileSegment
    , readSegment
    -- * gzip handling
    , isGZFile
    , gzDecompress
    -- * list utilities
    , dropSpace
    , linesPS
    , unlinesPS
    , hashPS
    , breakFirstPS
    , breakLastPS
    , substrPS
    , isFunky
    , fromHex2PS
    , fromPS2Hex
    , betweenLinesPS
    , intercalate
    -- * encoding and unicode utilities
    , isAscii
    , decodeLocale
    , encodeLocale
    , unpackPSFromUTF8
    , packStringToUTF8
    -- * properties
    , prop_unlinesPS_linesPS_left_inverse
    , prop_linesPS_length
    , prop_unlinesPS_length
    , propHexConversion
    , spec_betweenLinesPS
    ) where

import Darcs.Prelude

import qualified Data.ByteString            as B
import qualified Data.ByteString.Char8      as BC
import qualified Data.ByteString.Lazy       as BL
import Data.ByteString (intercalate)
import qualified Data.ByteString.Base16     as B16

import System.Directory ( getFileSize )
import System.IO ( withFile, IOMode(ReadMode)
                 , hSeek, SeekMode(SeekFromEnd,AbsoluteSeek)
                 , openBinaryFile, hClose, Handle, hGetChar
                 , stdin)
import System.IO.Error          ( catchIOError )
import System.IO.Unsafe         ( unsafePerformIO )

import Data.Bits                ( rotateL )
import Data.Char                ( ord )
import Data.Word                ( Word8 )
import Data.Int                 ( Int32, Int64 )
import Data.List                ( intersperse )
import Control.Monad            ( when )
import Control.Monad.ST.Lazy    ( ST )

import qualified Codec.Compression.GZip as GZ
import qualified Codec.Compression.Zlib.Internal as ZI
import Darcs.Util.Encoding ( decode, encode, decodeUtf8, encodeUtf8 )
import Darcs.Util.Global ( addCRCWarning )

#if mingw32_HOST_OS
#else
import System.IO.MMap( mmapFileByteString )
#endif
import System.Mem( performGC )

------------------------------------------------------------------------
-- A locale-independent isspace(3) so patches are interpreted the same everywhere.
-- ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 Word8
0x20 = Bool
True
isSpaceWord8 Word8
0x09 = Bool
True
isSpaceWord8 Word8
0x0A = Bool
True
isSpaceWord8 Word8
0x0D = Bool
True
isSpaceWord8 Word8
_    = Bool
False
{-# INLINE isSpaceWord8 #-}

-- | Drop leading white space, where white space is defined as
-- consisting of ' ', '\t', '\n', or '\r'.
dropSpace :: B.ByteString -> B.ByteString
dropSpace :: ByteString -> ByteString
dropSpace ByteString
bs = (Word8 -> Bool) -> ByteString -> ByteString
B.dropWhile Word8 -> Bool
isSpaceWord8 ByteString
bs

------------------------------------------------------------------------

{-# INLINE isFunky #-}
isFunky :: B.ByteString -> Bool
isFunky :: ByteString -> Bool
isFunky ByteString
ps = Word8
0 Word8 -> ByteString -> Bool
`B.elem` ByteString
ps Bool -> Bool -> Bool
|| Word8
26 Word8 -> ByteString -> Bool
`B.elem` ByteString
ps

------------------------------------------------------------------------

{-# INLINE hashPS #-}
hashPS :: B.ByteString -> Int32
hashPS :: ByteString -> Int32
hashPS = (Int32 -> Word8 -> Int32) -> Int32 -> ByteString -> Int32
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl' Int32 -> Word8 -> Int32
hashByte Int32
0

{-# INLINE hashByte #-}
hashByte :: Int32 -> Word8 -> Int32
hashByte :: Int32 -> Word8 -> Int32
hashByte Int32
h Word8
x = Word8 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+ Int32 -> Int -> Int32
forall a. Bits a => a -> Int -> a
rotateL Int32
h Int
8

{-# INLINE substrPS #-}
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS :: ByteString -> ByteString -> Maybe Int
substrPS ByteString
tok ByteString
str
    | ByteString -> Bool
B.null ByteString
tok = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
    | ByteString -> Int
B.length ByteString
tok Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> ByteString -> Int
B.length ByteString
str = Maybe Int
forall a. Maybe a
Nothing
    | Bool
otherwise = do Int
n <- Word8 -> ByteString -> Maybe Int
B.elemIndex (HasCallStack => ByteString -> Word8
ByteString -> Word8
B.head ByteString
tok) ByteString
str
                     let ttok :: ByteString
ttok = HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
tok
                         reststr :: ByteString
reststr = Int -> ByteString -> ByteString
B.drop (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ByteString
str
                     if ByteString
ttok ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> ByteString -> ByteString
B.take (ByteString -> Int
B.length ByteString
ttok) ByteString
reststr
                        then Int -> Maybe Int
forall a. a -> Maybe a
Just Int
n
                        else ((Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)Int -> Int -> Int
forall a. Num a => a -> a -> a
+) (Int -> Int) -> Maybe Int -> Maybe Int
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ByteString -> ByteString -> Maybe Int
substrPS ByteString
tok ByteString
reststr

------------------------------------------------------------------------

-- TODO: replace breakFirstPS and breakLastPS with definitions based on
-- ByteString's break/breakEnd
{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakFirstPS Char
c ByteString
p = case Char -> ByteString -> Maybe Int
BC.elemIndex Char
c ByteString
p of
                   Maybe Int
Nothing -> Maybe (ByteString, ByteString)
forall a. Maybe a
Nothing
                   Just Int
n -> (ByteString, ByteString) -> Maybe (ByteString, ByteString)
forall a. a -> Maybe a
Just (Int -> ByteString -> ByteString
B.take Int
n ByteString
p, Int -> ByteString -> ByteString
B.drop (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ByteString
p)

{-# INLINE breakLastPS #-}
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakLastPS Char
c ByteString
p = case Char -> ByteString -> Maybe Int
BC.elemIndexEnd Char
c ByteString
p of
                  Maybe Int
Nothing -> Maybe (ByteString, ByteString)
forall a. Maybe a
Nothing
                  Just Int
n -> (ByteString, ByteString) -> Maybe (ByteString, ByteString)
forall a. a -> Maybe a
Just (Int -> ByteString -> ByteString
B.take Int
n ByteString
p, Int -> ByteString -> ByteString
B.drop (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ByteString
p)

------------------------------------------------------------------------
-- linesPS and unlinesPS

{-# INLINE linesPS #-}
-- | Split the input into lines, that is, sections separated by '\n' bytes,
-- unless it is empty, in which case the result has one empty line.
linesPS :: B.ByteString -> [B.ByteString]
linesPS :: ByteString -> [ByteString]
linesPS ByteString
ps
     | ByteString -> Bool
B.null ByteString
ps = [ByteString
B.empty]
     | Bool
otherwise = Char -> ByteString -> [ByteString]
BC.split Char
'\n' ByteString
ps

{-# INLINE unlinesPS #-}
-- | Concatenate the inputs with '\n' bytes in interspersed.
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS :: [ByteString] -> ByteString
unlinesPS = [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString)
-> ([ByteString] -> [ByteString]) -> [ByteString] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
intersperse (Char -> ByteString
BC.singleton Char
'\n')

-- properties of linesPS and unlinesPS

prop_unlinesPS_linesPS_left_inverse :: B.ByteString -> Bool
prop_unlinesPS_linesPS_left_inverse :: ByteString -> Bool
prop_unlinesPS_linesPS_left_inverse ByteString
x = [ByteString] -> ByteString
unlinesPS (ByteString -> [ByteString]
linesPS ByteString
x) ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
x

prop_linesPS_length :: B.ByteString -> Bool
prop_linesPS_length :: ByteString -> Bool
prop_linesPS_length ByteString
x = [ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (ByteString -> [ByteString]
linesPS ByteString
x) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [Int] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Char -> ByteString -> [Int]
BC.elemIndices Char
'\n' ByteString
x) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

prop_unlinesPS_length :: [B.ByteString] -> Bool
prop_unlinesPS_length :: [ByteString] -> Bool
prop_unlinesPS_length [ByteString]
xs =
  ByteString -> Int
B.length ([ByteString] -> ByteString
unlinesPS [ByteString]
xs) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== if [ByteString] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
xs then Int
0 else [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((ByteString -> Int) -> [ByteString] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Int
B.length [ByteString]
xs) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
xs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1

-- -----------------------------------------------------------------------------
-- gzReadFilePS

-- |Decompress the given bytestring into a lazy list of chunks, along with a boolean
-- flag indicating (if True) that the CRC was corrupted.
-- Inspecting the flag will cause the entire list of chunks to be evaluated (but if
-- you throw away the list immediately this should run in constant space).
gzDecompress :: Maybe Int -> BL.ByteString -> ([B.ByteString], Bool)
gzDecompress :: Maybe Int -> ByteString -> ([ByteString], Bool)
gzDecompress Maybe Int
mbufsize =
    -- This is what the code would be without the bad CRC recovery logic:
    -- return . BL.toChunks . GZ.decompressWith decompressParams
    (forall s. DecompressStream (ST s))
-> ByteString -> ([ByteString], Bool)
decompressWarn (Format -> DecompressParams -> DecompressStream (ST s)
forall s. Format -> DecompressParams -> DecompressStream (ST s)
ZI.decompressST Format
ZI.gzipFormat DecompressParams
decompressParams)
  where
        decompressParams :: DecompressParams
decompressParams = case Maybe Int
mbufsize of
                              Just Int
bufsize -> DecompressParams
GZ.defaultDecompressParams { GZ.decompressBufferSize = bufsize }
                              Maybe Int
Nothing -> DecompressParams
GZ.defaultDecompressParams

        decompressWarn :: (forall s . ZI.DecompressStream (ST s)) -> BL.ByteString -> ([B.ByteString], Bool)
        decompressWarn :: (forall s. DecompressStream (ST s))
-> ByteString -> ([ByteString], Bool)
decompressWarn = (ByteString -> ([ByteString], Bool) -> ([ByteString], Bool))
-> (ByteString -> ([ByteString], Bool))
-> (DecompressError -> ([ByteString], Bool))
-> (forall s. DecompressStream (ST s))
-> ByteString
-> ([ByteString], Bool)
forall a.
(ByteString -> a -> a)
-> (ByteString -> a)
-> (DecompressError -> a)
-> (forall s. DecompressStream (ST s))
-> ByteString
-> a
ZI.foldDecompressStreamWithInput
                           (\ByteString
x ~([ByteString]
xs, Bool
b) -> (ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
xs, Bool
b))
                           (\ByteString
xs -> if ByteString -> Bool
BL.null ByteString
xs
                                      then ([], Bool
False)
                                      else String -> ([ByteString], Bool)
forall a. HasCallStack => String -> a
error String
"trailing data at end of compressed stream"
                           )
                           DecompressError -> ([ByteString], Bool)
forall {a}. DecompressError -> ([a], Bool)
handleBad

        -- For a while a bug in darcs caused gzip files with good data but bad CRCs to be
        -- produced. Trap bad CRC messages, run the specified action to report that it happened,
        -- but continue on the assumption that the data is valid.
        handleBad :: DecompressError -> ([a], Bool)
handleBad (ZI.DataFormatError String
"incorrect data check") = ([], Bool
True)
        handleBad DecompressError
e = String -> ([a], Bool)
forall a. HasCallStack => String -> a
error (DecompressError -> String
forall a. Show a => a -> String
show DecompressError
e)

isGZFile :: FilePath -> IO (Maybe Int)
isGZFile :: String -> IO (Maybe Int)
isGZFile String
f = do
    Handle
h <- String -> IOMode -> IO Handle
openBinaryFile String
f IOMode
ReadMode
    ByteString
header <- Handle -> Int -> IO ByteString
B.hGet Handle
h Int
2
    if ByteString
header ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Word8] -> ByteString
B.pack [Word8
31,Word8
139]
       then do Handle -> IO ()
hClose Handle
h
               Maybe Int -> IO (Maybe Int)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Int
forall a. Maybe a
Nothing
       else do Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
h SeekMode
SeekFromEnd (-Integer
4)
               Int
len <- Handle -> IO Int
hGetLittleEndInt Handle
h
               Handle -> IO ()
hClose Handle
h
               Maybe Int -> IO (Maybe Int)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
len)

-- | Read an entire file, which may or may not be gzip compressed, directly
-- into a 'B.ByteString'.
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS :: String -> IO ByteString
gzReadFilePS String
f = do
    Maybe Int
mlen <- String -> IO (Maybe Int)
isGZFile String
f
    case Maybe Int
mlen of
       Maybe Int
Nothing -> String -> IO ByteString
mmapFilePS String
f
       Just Int
len ->
            do -- Passing the length to gzDecompress means that it produces produces one chunk,
               -- which in turn means that B.concat won't need to copy data.
               -- If the length is wrong this will just affect efficiency, not correctness
               let doDecompress :: ByteString -> IO [ByteString]
doDecompress ByteString
buf = let ([ByteString]
res, Bool
bad) = Maybe Int -> ByteString -> ([ByteString], Bool)
gzDecompress (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
len) ByteString
buf
                                      in do Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
bad (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> IO ()
addCRCWarning String
f
                                            [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [ByteString]
res
               ByteString
compressed <- ([ByteString] -> ByteString
BL.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
forall a. a -> [a]
forall (m :: * -> *) a. Monad m => a -> m a
return) (ByteString -> ByteString) -> IO ByteString -> IO ByteString
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` String -> IO ByteString
mmapFilePS String
f
               [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> IO [ByteString] -> IO ByteString
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ByteString -> IO [ByteString]
doDecompress ByteString
compressed

hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt Handle
h = do
    Int
b1 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int
b2 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int
b3 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int
b4 <- Char -> Int
ord (Char -> Int) -> IO Char -> IO Int
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Handle -> IO Char
hGetChar Handle
h
    Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> IO Int) -> Int -> IO Int
forall a b. (a -> b) -> a -> b
$ Int
b1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
256Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
65536Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b3 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
16777216Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b4

gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS :: String -> ByteString -> IO ()
gzWriteFilePS String
f ByteString
ps = String -> [ByteString] -> IO ()
gzWriteFilePSs String
f [ByteString
ps]

gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs :: String -> [ByteString] -> IO ()
gzWriteFilePSs String
f [ByteString]
pss  =
    String -> ByteString -> IO ()
BL.writeFile String
f (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
GZ.compress (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.fromChunks [ByteString]
pss

gzWriteHandle :: Handle -> [B.ByteString] -> IO ()
gzWriteHandle :: Handle -> [ByteString] -> IO ()
gzWriteHandle Handle
h [ByteString]
pss  =
    Handle -> ByteString -> IO ()
BL.hPut Handle
h (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
GZ.compress (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.fromChunks [ByteString]
pss

-- | Read standard input, which may or may not be gzip compressed, directly
-- into a 'B.ByteString'.
gzReadStdin :: IO B.ByteString
gzReadStdin :: IO ByteString
gzReadStdin = do
    ByteString
header <- Handle -> Int -> IO ByteString
B.hGet Handle
stdin Int
2
    ByteString
rest   <- Handle -> IO ByteString
B.hGetContents Handle
stdin
    let allStdin :: ByteString
allStdin = [ByteString] -> ByteString
B.concat [ByteString
header,ByteString
rest]
    ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$
     if ByteString
header ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Word8] -> ByteString
B.pack [Word8
31,Word8
139]
      then ByteString
allStdin
      else let decompress :: ByteString -> [ByteString]
decompress = ([ByteString], Bool) -> [ByteString]
forall a b. (a, b) -> a
fst (([ByteString], Bool) -> [ByteString])
-> (ByteString -> ([ByteString], Bool))
-> ByteString
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe Int -> ByteString -> ([ByteString], Bool)
gzDecompress Maybe Int
forall a. Maybe a
Nothing
               compressed :: ByteString
compressed = [ByteString] -> ByteString
BL.fromChunks [ByteString
allStdin]
           in
           [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
decompress ByteString
compressed

-- | Pointer to a filesystem, possibly with start/end offsets. Supposed to be
-- fed to (uncurry mmapFileByteString) or similar.
type FileSegment = (FilePath, Maybe (Int64, Int))

-- | Read in a FileSegment into a Lazy ByteString. Implemented using mmap.
readSegment :: FileSegment -> IO BL.ByteString
readSegment :: FileSegment -> IO ByteString
readSegment (String
f,Maybe (Int64, Int)
range) = do
    ByteString
bs <- IO ByteString
tryToRead
       IO ByteString -> (IOError -> IO ByteString) -> IO ByteString
forall a. IO a -> (IOError -> IO a) -> IO a
`catchIOError` (\IOError
_ -> do
                     Integer
size <- String -> IO Integer
getFileSize String
f
                     if Integer
size Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
                        then ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
B.empty
                        else IO ()
performGC IO () -> IO ByteString -> IO ByteString
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> IO ByteString
tryToRead)
    ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.fromChunks [ByteString
bs]
  where
    tryToRead :: IO ByteString
tryToRead =
        case Maybe (Int64, Int)
range of
            Maybe (Int64, Int)
Nothing -> String -> IO ByteString
B.readFile String
f
            Just (Int64
off, Int
size) -> String -> IOMode -> (Handle -> IO ByteString) -> IO ByteString
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
f IOMode
ReadMode ((Handle -> IO ByteString) -> IO ByteString)
-> (Handle -> IO ByteString) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
                Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
h SeekMode
AbsoluteSeek (Integer -> IO ()) -> Integer -> IO ()
forall a b. (a -> b) -> a -> b
$ Int64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
off
                Handle -> Int -> IO ByteString
B.hGet Handle
h Int
size
{-# INLINE readSegment #-}

-- -----------------------------------------------------------------------------
-- mmapFilePS

-- | Like readFilePS, this reads an entire file directly into a
-- 'B.ByteString', but it is even more efficient.  It involves directly
-- mapping the file to memory.  This has the advantage that the contents of
-- the file never need to be copied.  Also, under memory pressure the page
-- may simply be discarded, wile in the case of readFilePS it would need to
-- be written to swap.  If you read many small files, mmapFilePS will be
-- less memory-efficient than readFilePS, since each mmapFilePS takes up a
-- separate page of memory.  Also, you can run into bus errors if the file
-- is modified.

mmapFilePS :: FilePath -> IO B.ByteString
#if mingw32_HOST_OS
mmapFilePS = B.readFile
#else
mmapFilePS :: String -> IO ByteString
mmapFilePS String
f =
  String -> Maybe (Int64, Int) -> IO ByteString
mmapFileByteString String
f Maybe (Int64, Int)
forall a. Maybe a
Nothing
   IO ByteString -> (IOError -> IO ByteString) -> IO ByteString
forall a. IO a -> (IOError -> IO a) -> IO a
`catchIOError` (\IOError
_ -> do
                     Integer
size <- String -> IO Integer
getFileSize String
f
                     if Integer
size Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
                        then ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
B.empty
                        else IO ()
performGC IO () -> IO ByteString -> IO ByteString
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Maybe (Int64, Int) -> IO ByteString
mmapFileByteString String
f Maybe (Int64, Int)
forall a. Maybe a
Nothing)
#endif

-- -------------------------------------------------------------------------
-- fromPS2Hex

fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex :: ByteString -> ByteString
fromPS2Hex = ByteString -> ByteString
B16.encode

-- -------------------------------------------------------------------------
-- fromHex2PS

fromHex2PS :: B.ByteString -> Either String B.ByteString
fromHex2PS :: ByteString -> Either String ByteString
fromHex2PS ByteString
s =
  case ByteString -> Either String ByteString
B16.decode ByteString
s of
    Right ByteString
result -> ByteString -> Either String ByteString
forall a b. b -> Either a b
Right ByteString
result
    Left String
msg -> String -> Either String ByteString
forall a b. a -> Either a b
Left (String -> Either String ByteString)
-> String -> Either String ByteString
forall a b. (a -> b) -> a -> b
$ String
"fromHex2PS: input is not hex encoded: "String -> String -> String
forall a. [a] -> [a] -> [a]
++String
msg

propHexConversion :: B.ByteString -> Bool
propHexConversion :: ByteString -> Bool
propHexConversion ByteString
x = ByteString -> Either String ByteString
fromHex2PS (ByteString -> ByteString
fromPS2Hex ByteString
x) Either String ByteString -> Either String ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> Either String ByteString
forall a b. b -> Either a b
Right ByteString
x

-- -------------------------------------------------------------------------
-- betweenLinesPS

-- | Return the B.ByteString between the two lines given,
-- or Nothing if either of them does not appear.
--
-- Precondition: the first two arguments (start and end line)
-- must be non-empty and contain no newline bytes.
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
               -> Maybe B.ByteString
betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString
betweenLinesPS ByteString
start ByteString
end ByteString
ps = do
  Int
at_start <- Int -> ByteString -> ByteString -> Maybe Int
findLine Int
0 ByteString
start ByteString
ps
  Int
at_end <- Int -> ByteString -> ByteString -> Maybe Int
findLine Int
0 ByteString
end (Int -> ByteString -> ByteString
B.drop (Int
at_start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
start) ByteString
ps)
  -- the "drop 1" eliminates the newline after start
  -- (a trailing newline before end, if present, is retained)
  ByteString -> Maybe ByteString
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
B.drop Int
1 (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
B.take Int
at_end (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
B.drop (Int
at_start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
start) ByteString
ps
  where
    -- find index of substring x if it is a full line
    findLine :: Int -> ByteString -> ByteString -> Maybe Int
findLine Int
i ByteString
x ByteString
s =
      case ByteString -> ByteString -> (ByteString, ByteString)
B.breakSubstring ByteString
x ByteString
s of
        (ByteString
before, ByteString
at)
          | ByteString -> Bool
B.null ByteString
at -> Maybe Int
forall a. Maybe a
Nothing -- not found at all
          | Bool -> Bool
not (ByteString -> Bool
B.null ByteString
after) Bool -> Bool -> Bool
&& ByteString -> Char
BC.head ByteString
after Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' -> do
              -- found but not followed by newline
              Int
next_nl <- Char -> ByteString -> Maybe Int
BC.elemIndex Char
'\n' ByteString
after
              Int -> ByteString -> ByteString -> Maybe Int
findLine (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i_after Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
next_nl) ByteString
x (Int -> ByteString -> ByteString
B.drop Int
next_nl ByteString
after)
          | Bool -> Bool
not (ByteString -> Bool
B.null ByteString
before) Bool -> Bool -> Bool
&& ByteString -> Char
BC.last ByteString
before Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' ->
              -- found, followed by newline but not preceded by newline
              Int -> ByteString -> ByteString -> Maybe Int
findLine (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i_after) ByteString
x ByteString
after
          | Bool
otherwise -> Int -> Maybe Int
forall a. a -> Maybe a
Just (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i_before)
          where
            after :: ByteString
after = Int -> ByteString -> ByteString
B.drop Int
l_x ByteString
at
            l_x :: Int
l_x = ByteString -> Int
B.length ByteString
x
            i_before :: Int
i_before = ByteString -> Int
B.length ByteString
before
            i_after :: Int
i_after = Int
i_before Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l_x

-- | Simpler but less efficient variant of 'betweenLinesPS'. Note
-- that this is only equivalent under the stated preconditions.
spec_betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
                    -> Maybe B.ByteString
spec_betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString
spec_betweenLinesPS ByteString
start ByteString
end ByteString
ps =
  case (ByteString -> Bool)
-> [ByteString] -> ([ByteString], [ByteString])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (ByteString
start ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==) (ByteString -> [ByteString]
linesPS ByteString
ps) of
    ([ByteString]
_, ByteString
_:[ByteString]
after_start) ->
      case (ByteString -> Bool)
-> [ByteString] -> ([ByteString], [ByteString])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (ByteString
end ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==) [ByteString]
after_start of
        ([ByteString]
before_end, ByteString
_:[ByteString]
_) ->
          ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ if [ByteString] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
before_end then ByteString
B.empty else [ByteString] -> ByteString
BC.unlines [ByteString]
before_end
        ([ByteString], [ByteString])
_ -> Maybe ByteString
forall a. Maybe a
Nothing
    ([ByteString], [ByteString])
_ -> Maybe ByteString
forall a. Maybe a
Nothing

-- | Test if a ByteString is made of ascii characters
isAscii :: B.ByteString -> Bool
isAscii :: ByteString -> Bool
isAscii = (Word8 -> Bool) -> ByteString -> Bool
B.all (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128)

-- * Encoding functions

-- Use of 'unsafePerformIO' is ratified by the fact that these
-- really are pure functions.

-- | Decode a 'ByteString' containing UTF-8 to a 'String'. Decoding errors
-- are flagged with the U+FFFD character.
unpackPSFromUTF8 :: B.ByteString -> String
unpackPSFromUTF8 :: ByteString -> String
unpackPSFromUTF8  = IO String -> String
forall a. IO a -> a
unsafePerformIO (IO String -> String)
-> (ByteString -> IO String) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> IO String
decodeUtf8

-- | Encode a 'String' to a 'ByteString' using UTF-8.
packStringToUTF8 :: String -> B.ByteString
packStringToUTF8 :: String -> ByteString
packStringToUTF8 = IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString)
-> (String -> IO ByteString) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO ByteString
encodeUtf8

-- | Decode a 'ByteString' to a 'String' according to the current locale,
-- using lone surrogates for un-decodable bytes.
decodeLocale :: B.ByteString -> String
decodeLocale :: ByteString -> String
decodeLocale = IO String -> String
forall a. IO a -> a
unsafePerformIO (IO String -> String)
-> (ByteString -> IO String) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> IO String
decode

-- | Encode a 'String' to a 'ByteString' according to the current locale,
-- converting lone surrogates back to the original byte. If that
-- fails (because the locale does not support the full unicode range)
-- then encode using utf-8, assuming that the un-ecodable characters
-- come from patch meta data.
--
-- See also 'Darcs.UI.Commands.setEnvCautiously'.
encodeLocale :: String -> B.ByteString
encodeLocale :: String -> ByteString
encodeLocale String
s = IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString) -> IO ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
encode String
s IO ByteString -> (IOError -> IO ByteString) -> IO ByteString
forall a. IO a -> (IOError -> IO a) -> IO a
`catchIOError` (\IOError
_ -> String -> IO ByteString
encodeUtf8 String
s)