#include <unicode/urename.h>
module Codec.Text.UConv (
convert, EncodingName,
) where
import Foreign
import Foreign.C
import Foreign.Ptr
import Data.ByteString.Internal
import Data.ByteString (ByteString, packCStringLen, concat)
import Data.ByteString.Lazy (ByteString, toChunks, fromChunks)
import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
import System.IO.Unsafe (unsafePerformIO)
type LB = Data.ByteString.Lazy.ByteString
type SB = Data.ByteString.ByteString
type EncodingName = String
class UConvertible a where
withCStringLenU :: a -> (CStringLen -> IO b) -> IO b
fromCStringLenU :: CStringLen -> IO a
instance UConvertible String where
withCStringLenU = withCStringLen
fromCStringLenU = peekCStringLen
instance UConvertible LB where
withCStringLenU = unsafeUseAsCStringLen . Data.ByteString.concat . toChunks
fromCStringLenU = fmap (fromChunks . (:[])) . packCStringLen
instance UConvertible SB where
withCStringLenU = unsafeUseAsCStringLen
fromCStringLenU = packCStringLen
convert :: (UConvertible a, UConvertible b, Monad m)
=> EncodingName
-> EncodingName
-> a
-> m b
convert fromcode tocode input = unsafePerformIO $ do
withCString fromcode $ \fromcode_c -> do
withCString tocode $ \tocode_c -> do
withCStringLenU input $ \(in_c, in_len) -> do
let out_len = 4 * in_len
allocaBytes out_len $ \out_c -> do
alloca $ \err -> do
poke err (0 :: Word8)
out_c' <- memset (castPtr out_c) 0 (toEnum out_len)
len <- ucnv_convert tocode_c
fromcode_c
(castPtr out_c')
(toEnum out_len)
(castPtr in_c)
(toEnum in_len)
err
errCode <- peek err :: IO Word8
if (errCode == 0)
then (return . return) =<< fromCStringLenU (out_c, fromEnum len)
else return (fail $ "<<error: " ++ (show errCode) ++ ">>")
foreign import ccall ucnv_convert ::
CString ->
CString ->
Ptr CChar ->
Int32 ->
Ptr CChar ->
Int32 ->
Ptr Word8 ->
IO Int32