module ByteStringUtils (
        unsafeWithInternals,
        unpackPSFromUTF8,
        packStringToUTF8,
        
        gzReadFilePS,
        mmapFilePS,
        gzWriteFilePS,
        gzWriteFilePSs,
        gzReadStdin,
        
        isGZFile,
        gzDecompress,
        
        dropSpace,
        breakSpace,
        linesPS,
        unlinesPS,
        hashPS,
        breakFirstPS,
        breakLastPS,
        substrPS,
        readIntPS,
        isFunky,
        fromHex2PS,
        fromPS2Hex,
        betweenLinesPS,
        breakAfterNthNewline,
        breakBeforeNthNewline,
        intercalate,
        
        isAscii,
        decodeLocale,
        encodeLocale,
        decodeString
    ) where
import Prelude hiding ( catch )
import qualified Data.ByteString            as B
import qualified Data.ByteString.Char8      as BC
import qualified Data.ByteString.Internal   as BI
import Data.ByteString (intercalate)
import Data.ByteString.Internal (fromForeignPtr)
#if defined (HAVE_MMAP)
import Control.Exception.Extensible ( catch, SomeException )
#endif
import System.IO
import System.IO.Unsafe         ( unsafePerformIO )
import System.Console.Haskeline ( InputT, runInputTBehavior, defaultSettings, useFileHandle )
import System.Console.Haskeline.Encoding ( decode, encode )
import Foreign.Storable         ( peekElemOff, peek )
import Foreign.Marshal.Array    ( advancePtr )
import Foreign.C.Types          ( CInt )
import Data.Bits                ( rotateL )
import Data.Char                ( ord, isSpace )
import Data.Word                ( Word8 )
import Data.Int                 ( Int32 )
import qualified Data.Text as T ( pack, unpack )
import Data.Text.Encoding       ( encodeUtf8, decodeUtf8With )
import Data.Text.Encoding.Error ( lenientDecode )
import Control.Monad            ( when )
import Foreign.Ptr              ( plusPtr, Ptr )
import Foreign.ForeignPtr       ( withForeignPtr )
#ifdef DEBUG_PS
import Foreign.ForeignPtr       ( addForeignPtrFinalizer )
import Foreign.Ptr              ( FunPtr )
#endif
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.GZip as GZ
import qualified Codec.Compression.Zlib.Internal as ZI
import Darcs.Global ( addCRCWarning )
#ifdef HAVE_MMAP
import System.IO.MMap( mmapFileByteString )
import System.Mem( performGC )
import System.Posix.Files( fileSize, getSymbolicLinkStatus )
#endif
unsafeWithInternals :: B.ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a
unsafeWithInternals ps f
 = case BI.toForeignPtr ps of
   (fp,s,l) -> withForeignPtr fp $ \p -> f (p `plusPtr` s) l
readIntPS :: B.ByteString -> Maybe (Int, B.ByteString)
readIntPS = BC.readInt . BC.dropWhile isSpace
unpackPSFromUTF8 :: B.ByteString -> String
unpackPSFromUTF8  = T.unpack . decodeUtf8With lenientDecode
packStringToUTF8 :: String -> B.ByteString
packStringToUTF8 = encodeUtf8 . T.pack
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 w =
    w == 0x20 ||    
    w == 0x09 ||    
    w == 0x0A ||    
    w == 0x0D       
firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int
firstnonspace !ptr !n !m
    | n >= m    = return n
    | otherwise = do w <- peekElemOff ptr n
                     if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n
firstspace :: Ptr Word8 -> Int -> Int -> IO Int
firstspace !ptr !n !m
    | n >= m    = return n
    | otherwise = do w <- peekElemOff ptr n
                     if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n
dropSpace :: B.ByteString -> B.ByteString
dropSpace (BI.PS x s l) = BI.inlinePerformIO $ withForeignPtr x $ \p -> do
    i <- firstnonspace (p `plusPtr` s) 0 l
    return $! if i == l then B.empty else BI.PS x (s+i) (li)
breakSpace :: B.ByteString -> (B.ByteString,B.ByteString)
breakSpace (BI.PS x s l) = BI.inlinePerformIO $ withForeignPtr x $ \p -> do
    i <- firstspace (p `plusPtr` s) 0 l
    return $! case () of {_
        | i == 0    -> (B.empty, BI.PS x s l)
        | i == l    -> (BI.PS x s l, B.empty)
        | otherwise -> (BI.PS x s i, BI.PS x (s+i) (li))
    }
isFunky :: B.ByteString -> Bool
isFunky ps = case BI.toForeignPtr ps of
   (x,s,l) ->
    unsafePerformIO $ withForeignPtr x $ \p->
    (/=0) `fmap` has_funky_char (p `plusPtr` s) (fromIntegral l)
foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char
    :: Ptr Word8 -> CInt -> IO CInt
hashPS :: B.ByteString -> Int32
hashPS ps =
   case BI.toForeignPtr ps of
   (x,s,l) ->
    unsafePerformIO $ withForeignPtr x $ \p->
    hash (p `plusPtr` s) l
hash :: Ptr Word8 -> Int -> IO Int32
hash ptr len = f (0 :: Int32) ptr len
 where f h _ 0 = return h
       f h p n = do x <- peek p
                    let !h' =  (fromIntegral x) + (rotateL h 8)
                    f h' (p `advancePtr` 1) (n1)
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS tok str
    | B.null tok = Just 0
    | B.length tok > B.length str = Nothing
    | otherwise = do n <- BC.elemIndex (BC.head tok) str
                     let ttok = B.tail tok
                         reststr = B.drop (n+1) str
                     if ttok == B.take (B.length ttok) reststr
                        then Just n
                        else ((n+1)+) `fmap` substrPS tok reststr
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS c p = case BC.elemIndex c p of
                   Nothing -> Nothing
                   Just n -> Just (B.take n p, B.drop (n+1) p)
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS c p = case BC.elemIndexEnd c p of
                  Nothing -> Nothing
                  Just n -> Just (B.take n p, B.drop (n+1) p)
linesPS :: B.ByteString -> [B.ByteString]
linesPS ps
     | B.null ps = [B.empty]
     | otherwise = BC.split '\n' ps
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS [] = BC.empty
unlinesPS x  = BC.init $ BC.unlines x
gzDecompress :: Maybe Int -> BL.ByteString -> ([B.ByteString], Bool)
gzDecompress mbufsize =
    
    
    toListWarn . ZI.decompressWithErrors ZI.gzipFormat decompressParams
  where
        decompressParams = case mbufsize of
                              Just bufsize -> GZ.defaultDecompressParams { GZ.decompressBufferSize = bufsize }
                              Nothing -> GZ.defaultDecompressParams
        toListWarn :: ZI.DecompressStream -> ([B.ByteString], Bool)
        toListWarn = foldDecompressStream (\x ~(xs, b) -> (x:xs, b)) ([], False) handleBad
        
        foldDecompressStream :: (B.ByteString -> a -> a) -> a
                             -> (ZI.DecompressError -> String -> a)
                             -> ZI.DecompressStream -> a
        foldDecompressStream chunk end err = fold
                   where
                       fold ZI.StreamEnd               = end
                       fold (ZI.StreamChunk bs stream) = chunk bs (fold stream)
                       fold (ZI.StreamError code msg)  = err code msg
        
        
        
        handleBad ZI.DataError "incorrect data check" = ([], True)
        handleBad _ msg = error msg
isGZFile :: FilePath -> IO (Maybe Int)
isGZFile f = do
    h <- openBinaryFile f ReadMode
    header <- B.hGet h 2
    if header /= BC.pack "\31\139"
       then do hClose h
               return Nothing
       else do hSeek h SeekFromEnd (4)
               len <- hGetLittleEndInt h
               hClose h
               return (Just len)
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS f = do
    mlen <- isGZFile f
    case mlen of
       Nothing -> mmapFilePS f
       Just len ->
            do 
               
               
               let doDecompress buf = let (res, bad) = gzDecompress (Just len) buf
                                      in do when bad $ addCRCWarning f
                                            return res
               compressed <- (BL.fromChunks . return) `fmap` mmapFilePS f
               B.concat `fmap` doDecompress compressed
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
    b1 <- ord `fmap` hGetChar h
    b2 <- ord `fmap` hGetChar h
    b3 <- ord `fmap` hGetChar h
    b4 <- ord `fmap` hGetChar h
    return $ b1 + 256*b2 + 65536*b3 + 16777216*b4
gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]
gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs f pss  =
    BL.writeFile f $ GZ.compress $ BL.fromChunks pss
gzReadStdin :: IO B.ByteString
gzReadStdin = do header <- B.hGet stdin 2
                 rest   <- B.hGetContents stdin
                 let allStdin = B.concat [header,rest]
                 return $
                  if header /= BC.pack "\31\139"
                   then allStdin
                   else let decompress = fst . gzDecompress Nothing
                            compressed = BL.fromChunks [allStdin]
                        in
                        B.concat $ decompress compressed
mmapFilePS :: FilePath -> IO B.ByteString
#ifdef HAVE_MMAP
mmapFilePS f = do
  x <- mmapFileByteString f Nothing
   `catch` (\(_ :: SomeException) -> do
                     size <- fileSize `fmap` getSymbolicLinkStatus f
                     if size == 0
                        then return B.empty
                        else performGC >> mmapFileByteString f Nothing)
  return x
#else
mmapFilePS = B.readFile
#endif
foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex
    :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex ps = case BI.toForeignPtr ps of
          (x,s,l) ->
           BI.unsafeCreate (2*l) $ \p -> withForeignPtr x $ \f ->
           conv_to_hex p (f `plusPtr` s) $ fromIntegral l
foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex
    :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
fromHex2PS :: B.ByteString -> B.ByteString
fromHex2PS ps = case BI.toForeignPtr ps of
          (x,s,l) ->
           BI.unsafeCreate (l `div` 2) $ \p -> withForeignPtr x $ \f ->
           conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2)
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
               -> Maybe (B.ByteString)
betweenLinesPS start end ps
 = case break (start ==) (linesPS ps) of
       (_, _:rest@(bs1:_)) ->
           case BI.toForeignPtr bs1 of
            (ps1,s1,_) ->
             case break (end ==) rest of
               (_, bs2:_) -> case BI.toForeignPtr bs2 of (_,s2,_) -> Just $ fromForeignPtr ps1 s1 (s2  s1)
               _ -> Nothing
       _ -> Nothing
breakAfterNthNewline :: Int -> B.ByteString
                        -> Maybe (B.ByteString, B.ByteString)
breakAfterNthNewline 0 the_ps | B.null the_ps = Just (B.empty, B.empty)
breakAfterNthNewline n the_ps =
  case BI.toForeignPtr the_ps of
  (fp,the_s,l) ->
   unsafePerformIO $ withForeignPtr fp $ \p ->
   do let findit 0 s | s == end = return $ Just (the_ps, B.empty)
          findit _ s | s == end = return Nothing
          findit 0 s = let left_l = s  the_s
                       in return $ Just (fromForeignPtr fp the_s left_l,
                                         fromForeignPtr fp s (l  left_l))
          findit i s = do w <- peekElemOff p s
                          if w == nl then findit (i1) (s+1)
                                     else findit i (s+1)
          nl = BI.c2w '\n'
          end = the_s + l
      findit n the_s
breakBeforeNthNewline :: Int -> B.ByteString -> (B.ByteString, B.ByteString)
breakBeforeNthNewline 0 the_ps
 | B.null the_ps = (B.empty, B.empty)
breakBeforeNthNewline n the_ps =
 case BI.toForeignPtr the_ps of
 (fp,the_s,l) ->
   unsafePerformIO $ withForeignPtr fp $ \p ->
   do let findit _ s | s == end = return (the_ps, B.empty)
          findit i s = do w <- peekElemOff p s
                          if w == nl
                            then if i == 0
                                 then let left_l = s  the_s
                                      in return (fromForeignPtr fp the_s left_l,
                                                 fromForeignPtr fp s (l  left_l))
                                 else findit (i1) (s+1)
                            else findit i (s+1)
          nl = BI.c2w '\n'
          end = the_s + l
      findit n the_s
unsafeRunInput :: InputT IO a -> a
unsafeRunInput = unsafePerformIO . runInputTBehavior (useFileHandle stdin) defaultSettings
isAscii :: B.ByteString -> Bool
isAscii = B.all (\w -> w < 128)
decodeLocale :: B.ByteString -> String
decodeLocale = unsafeRunInput . decode
encodeLatin1 :: String -> B.ByteString
encodeLatin1 = B.pack . (map (fromIntegral . ord))
encodeLocale :: String -> B.ByteString
encodeLocale = unsafeRunInput . encode
decodeString :: String -> String
decodeString = decodeLocale . encodeLatin1