module Codec.Text.Detect (detectEncodingName, detectEncoding) where
import Control.Exception
import qualified Data.ByteString.Internal as SI
import qualified Data.ByteString.Lazy as L
import Data.Traversable (traverse)
import Control.Applicative
import Foreign.C.Types
import Foreign.C.String
import Foreign.Ptr
import Foreign.ForeignPtr
import System.IO
import System.IO.Unsafe
#if !MIN_VERSION_base(4,3,0)
mask :: ((IO a -> IO a) -> IO b) -> IO b
mask io = blocked >>= \b -> if b then io id else block $ io unblock
#endif
type Csd_t = Ptr ()
foreign import ccall unsafe "csd_open" c_csd_open :: IO Csd_t
foreign import ccall unsafe "csd_consider" c_csd_consider :: Csd_t -> CString -> CInt -> IO CInt
foreign import ccall unsafe "csd_close" c_csd_close :: Csd_t -> IO CString
detectEncodingName :: L.ByteString -> Maybe String
detectEncodingName b = unsafePerformIO $ do
mask $ \restore -> do
csd <- c_csd_open
restore ((\f -> foldr f (return ()) (L.toChunks b)) $ \chunk feed_more -> do
let (fptr, ptr_offset, chunk_length) = SI.toForeignPtr chunk
res <- withForeignPtr fptr $ \ptr -> c_csd_consider csd (ptr `plusPtr` ptr_offset) (fromIntegral chunk_length)
case res `compare` 0 of
LT -> return ()
EQ -> feed_more
GT -> return ()
) `onException` c_csd_close csd
c_encoding_ptr <- c_csd_close csd
if c_encoding_ptr == nullPtr
then return Nothing
else Just . normalise <$> peekCString c_encoding_ptr
where
normalise "GB18030" = "gb18030"
normalise x = x
detectEncoding :: L.ByteString -> IO (Maybe TextEncoding)
detectEncoding = traverse mkTextEncoding . detectEncodingName