{-# LANGUAGE BangPatterns, ForeignFunctionInterface, CPP #-} module ByteStringUtils (unsafeWithInternals, unpackPSfromUTF8, gzReadFilePS, mmapFilePS, gzWriteFilePS, gzWriteFilePSs, ifHeadThenTail, dropSpace, breakSpace, linesPS, unlinesPS, hashPS, breakFirstPS, breakLastPS, substrPS, readIntPS, is_funky, fromHex2PS, fromPS2Hex, betweenLinesPS, break_after_nth_newline, break_before_nth_newline, intercalate) where import Prelude hiding (catch) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Internal as BI import Data.ByteString (intercalate, uncons) import Data.ByteString.Internal (fromForeignPtr) import Control.Exception (catch) import System.IO import System.IO.Unsafe (unsafePerformIO) import Foreign.Storable (peekElemOff, peek) import Foreign.Marshal.Alloc (free) import Foreign.Marshal.Array (mallocArray, peekArray, advancePtr) import Foreign.C.Types (CInt) import Data.Bits (rotateL) import Data.Char (chr, ord, isSpace) import Data.Word (Word8) import Data.Int (Int32) import Control.Monad (when) import Foreign.Ptr (nullPtr) import Foreign.ForeignPtr (ForeignPtr) import Foreign.Ptr (plusPtr, Ptr) import Foreign.ForeignPtr (withForeignPtr) import Foreign.ForeignPtr (addForeignPtrFinalizer) import Foreign.Ptr (FunPtr) import qualified Data.ByteString.Lazy as BL import qualified Codec.Compression.GZip as GZ import Foreign.C.String (CString, withCString) import System.IO.MMap (mmapFileByteString) import System.Mem (performGC) import System.Posix.Files (fileSize, getSymbolicLinkStatus) debugForeignPtr :: ForeignPtr a -> String -> IO () foreign import ccall unsafe "static fpstring.h debug_alloc" debug_alloc :: Ptr a -> CString -> IO () foreign import ccall unsafe "static fpstring.h & debug_free" debug_free :: FunPtr (Ptr a -> IO ()) debugForeignPtr fp n = withCString n $ \ cname -> withForeignPtr fp $ \ p -> do debug_alloc p cname addForeignPtrFinalizer debug_free fp debugForeignPtr _ _ = return () unsafeWithInternals :: B.ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a unsafeWithInternals ps f = case BI.toForeignPtr ps of (fp, s, l) -> withForeignPtr fp $ \ p -> f (p `plusPtr` s) l readIntPS :: B.ByteString -> Maybe (Int, B.ByteString) readIntPS = BC.readInt . BC.dropWhile isSpace unpackPSfromUTF8 :: B.ByteString -> String unpackPSfromUTF8 ps = case BI.toForeignPtr ps of (_, _, 0) -> "" (x, s, l) -> unsafePerformIO $ withForeignPtr x $ \ p -> do outbuf <- mallocArray l lout <- fromIntegral `fmap` utf8_to_ints outbuf (p `plusPtr` s) (fromIntegral l) when (lout < 0) $ error "Bad UTF8!" str <- (map (chr . fromIntegral)) `fmap` peekArray lout outbuf free outbuf return str foreign import ccall unsafe "static fpstring.h utf8_to_ints" utf8_to_ints :: Ptr Int -> Ptr Word8 -> CInt -> IO CInt {-# INLINE ifHeadThenTail #-} ifHeadThenTail :: Word8 -> B.ByteString -> Maybe B.ByteString ifHeadThenTail c s = case uncons s of Just (w, t) | w == c -> Just t _ -> Nothing isSpaceWord8 :: Word8 -> Bool isSpaceWord8 w = w == 32 || w == 9 || w == 10 || w == 13 {-# INLINE isSpaceWord8 #-} firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int firstnonspace !ptr !n !m | n >= m = return n | otherwise = do w <- peekElemOff ptr n if isSpaceWord8 w then firstnonspace ptr (n + 1) m else return n firstspace :: Ptr Word8 -> Int -> Int -> IO Int firstspace !ptr !n !m | n >= m = return n | otherwise = do w <- peekElemOff ptr n if (not . isSpaceWord8) w then firstspace ptr (n + 1) m else return n dropSpace :: B.ByteString -> B.ByteString dropSpace (BI.PS x s l) = BI.inlinePerformIO $ withForeignPtr x $ \ p -> do i <- firstnonspace (p `plusPtr` s) 0 l return $! if i == l then B.empty else BI.PS x (s + i) (l - i) {-# INLINE dropSpace #-} breakSpace :: B.ByteString -> (B.ByteString, B.ByteString) breakSpace (BI.PS x s l) = BI.inlinePerformIO $ withForeignPtr x $ \ p -> do i <- firstspace (p `plusPtr` s) 0 l return $! case () of _ | i == 0 -> (B.empty, BI.PS x s l) | i == l -> (BI.PS x s l, B.empty) | otherwise -> (BI.PS x s i, BI.PS x (s + i) (l - i)) {-# INLINE breakSpace #-} {-# INLINE is_funky #-} is_funky :: B.ByteString -> Bool is_funky ps = case BI.toForeignPtr ps of (x, s, l) -> unsafePerformIO $ withForeignPtr x $ \ p -> (/= 0) `fmap` has_funky_char (p `plusPtr` s) (fromIntegral l) foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char :: Ptr Word8 -> CInt -> IO CInt {-# INLINE hashPS #-} hashPS :: B.ByteString -> Int32 hashPS ps = case BI.toForeignPtr ps of (x, s, l) -> unsafePerformIO $ withForeignPtr x $ \ p -> do hash (p `plusPtr` s) l hash :: Ptr Word8 -> Int -> IO Int32 hash ptr len = f (0 :: Int32) ptr len where f h _ 0 = return h f h p n = do x <- peek p let !h' = (fromIntegral x) + (rotateL h 8) f h' (p `advancePtr` 1) (n - 1) {-# INLINE substrPS #-} substrPS :: B.ByteString -> B.ByteString -> Maybe Int substrPS tok str | B.null tok = Just 0 | B.length tok > B.length str = Nothing | otherwise = do n <- BC.elemIndex (BC.head tok) str let ttok = B.tail tok reststr = B.drop (n + 1) str if ttok == B.take (B.length ttok) reststr then Just n else ((n + 1) +) `fmap` substrPS tok reststr {-# INLINE breakFirstPS #-} breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString, B.ByteString) breakFirstPS c p = case BC.elemIndex c p of Nothing -> Nothing Just n -> Just (B.take n p, B.drop (n + 1) p) {-# INLINE breakLastPS #-} breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString, B.ByteString) breakLastPS c p = case BC.elemIndexEnd c p of Nothing -> Nothing Just n -> Just (B.take n p, B.drop (n + 1) p) {-# INLINE linesPS #-} linesPS :: B.ByteString -> [B.ByteString] linesPS ps | B.null ps = [B.empty] | otherwise = BC.split '\n' ps unlinesPS :: [B.ByteString] -> B.ByteString unlinesPS [] = BC.empty unlinesPS x = BC.init $ BC.unlines x {-# INLINE unlinesPS #-} foreign import ccall unsafe "static zlib.h gzopen" c_gzopen :: CString -> CString -> IO (Ptr ()) foreign import ccall unsafe "static zlib.h gzclose" c_gzclose :: Ptr () -> IO () foreign import ccall unsafe "static zlib.h gzread" c_gzread :: Ptr () -> Ptr Word8 -> CInt -> IO CInt foreign import ccall unsafe "static zlib.h gzwrite" c_gzwrite :: Ptr () -> Ptr Word8 -> CInt -> IO CInt gzReadFilePS :: FilePath -> IO B.ByteString gzReadFilePS f = do h <- openBinaryFile f ReadMode header <- B.hGet h 2 if header /= BC.pack "\US\139" then do hClose h mmapFilePS f else do hSeek h SeekFromEnd (-4) len <- hGetLittleEndInt h hClose h let decompress = GZ.decompressWith GZ.defaultDecompressParams{GZ.decompressBufferSize = len} fmap (B.concat . BL.toChunks . decompress) $ fmap (BL.fromChunks . (: [])) $ B.readFile f BL.readFile f withCString f $ \ fstr -> withCString "rb" $ \ rb -> do gzf <- c_gzopen fstr rb when (gzf == nullPtr) $ fail $ "problem opening file " ++ f fp <- BI.mallocByteString len debugForeignPtr fp $ "gzReadFilePS " ++ f lread <- withForeignPtr fp $ \ p -> c_gzread gzf p (fromIntegral len) c_gzclose gzf when (fromIntegral lread /= len) $ fail $ "problem gzreading file " ++ f return $ fromForeignPtr fp 0 len hGetLittleEndInt :: Handle -> IO Int hGetLittleEndInt h = do b1 <- ord `fmap` hGetChar h b2 <- ord `fmap` hGetChar h b3 <- ord `fmap` hGetChar h b4 <- ord `fmap` hGetChar h return $ b1 + 256 * b2 + 65536 * b3 + 16777216 * b4 gzWriteFilePS :: FilePath -> B.ByteString -> IO () gzWriteFilePS f ps = gzWriteFilePSs f [ps] gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO () gzWriteFilePSs f pss = BL.writeFile f $ GZ.compress $ BL.fromChunks pss withCString f $ \ fstr -> withCString "wb" $ \ wb -> do gzf <- c_gzopen fstr wb when (gzf == nullPtr) $ fail $ "problem gzopening file for write: " ++ f mapM_ (gzWriteToGzf gzf) pss `catch` \ _ -> fail $ "problem gzwriting file: " ++ f c_gzclose gzf gzWriteToGzf :: Ptr () -> B.ByteString -> IO () gzWriteToGzf gzf ps = case BI.toForeignPtr ps of (_, _, 0) -> return () (x, s, l) -> do lw <- withForeignPtr x $ \ p -> c_gzwrite gzf (p `plusPtr` s) (fromIntegral l) when (fromIntegral lw /= l) $ fail $ "problem in gzWriteToGzf" mmapFilePS :: FilePath -> IO B.ByteString mmapFilePS f = do x <- mmapFileByteString f Nothing `catch` (\ _ -> do size <- fileSize `fmap` getSymbolicLinkStatus f if size == 0 then return B.empty else performGC >> mmapFileByteString f Nothing) return x mmapFilePS = B.readFile foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () fromPS2Hex :: B.ByteString -> B.ByteString fromPS2Hex ps = case BI.toForeignPtr ps of (x, s, l) -> BI.unsafeCreate (2 * l) $ \ p -> withForeignPtr x $ \ f -> conv_to_hex p (f `plusPtr` s) $ fromIntegral l foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () fromHex2PS :: B.ByteString -> B.ByteString fromHex2PS ps = case BI.toForeignPtr ps of (x, s, l) -> BI.unsafeCreate (l `div` 2) $ \ p -> withForeignPtr x $ \ f -> conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2) betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString -> Maybe (B.ByteString) betweenLinesPS start end ps = case break (start ==) (linesPS ps) of (_, _ : rest@(bs1 : _)) -> case BI.toForeignPtr bs1 of (ps1, s1, _) -> case break (end ==) rest of (_, bs2 : _) -> case BI.toForeignPtr bs2 of (_, s2, _) -> Just $ fromForeignPtr ps1 s1 (s2 - s1) _ -> Nothing _ -> Nothing break_after_nth_newline :: Int -> B.ByteString -> Maybe (B.ByteString, B.ByteString) break_after_nth_newline 0 the_ps | B.null the_ps = Just (B.empty, B.empty) break_after_nth_newline n the_ps = case BI.toForeignPtr the_ps of (fp, the_s, l) -> unsafePerformIO $ withForeignPtr fp $ \ p -> do let findit 0 s | s == end = return $ Just (the_ps, B.empty) findit _ s | s == end = return Nothing findit 0 s = let left_l = s - the_s in return $ Just (fromForeignPtr fp the_s left_l, fromForeignPtr fp s (l - left_l)) findit i s = do w <- peekElemOff p s if w == nl then findit (i - 1) (s + 1) else findit i (s + 1) nl = BI.c2w '\n' end = the_s + l findit n the_s break_before_nth_newline :: Int -> B.ByteString -> (B.ByteString, B.ByteString) break_before_nth_newline 0 the_ps | B.null the_ps = (B.empty, B.empty) break_before_nth_newline n the_ps = case BI.toForeignPtr the_ps of (fp, the_s, l) -> unsafePerformIO $ withForeignPtr fp $ \ p -> do let findit _ s | s == end = return (the_ps, B.empty) findit i s = do w <- peekElemOff p s if w == nl then if i == 0 then let left_l = s - the_s in return (fromForeignPtr fp the_s left_l, fromForeignPtr fp s (l - left_l)) else findit (i - 1) (s + 1) else findit i (s + 1) nl = BI.c2w '\n' end = the_s + l findit n the_s