#include ----------------------------------------------------------------------------- -- | -- Copyright : (c) 2006-2007 Duncan Coutts, 2008 Audrey Tang -- License : BSD-style -- -- Maintainer : audreyt@audreyt.org -- Stability : experimental -- Portability : portable (H98 + FFI) -- -- String encoding conversion -- ----------------------------------------------------------------------------- module Codec.Text.UConv ( -- | This module provides pure functions for converting the string encoding -- of strings. -- * Simple conversion API convert, EncodingName, ) where import Foreign import Foreign.C import Foreign.Ptr import Data.ByteString.Internal import Data.ByteString (ByteString, packCStringLen, concat) import Data.ByteString.Lazy (ByteString, toChunks, fromChunks) import Data.ByteString.Unsafe (unsafeUseAsCStringLen) import System.IO.Unsafe (unsafePerformIO) type LB = Data.ByteString.Lazy.ByteString type SB = Data.ByteString.ByteString type EncodingName = String class UConvertible a where {-# SPECIALIZE withCStringLenU :: SB -> (CStringLen -> IO b) -> IO b #-} {-# SPECIALIZE withCStringLenU :: LB -> (CStringLen -> IO b) -> IO b #-} {-# SPECIALIZE withCStringLenU :: String -> (CStringLen -> IO b) -> IO b #-} withCStringLenU :: a -> (CStringLen -> IO b) -> IO b {-# SPECIALIZE fromCStringLenU :: CStringLen -> IO SB #-} {-# SPECIALIZE fromCStringLenU :: CStringLen -> IO LB #-} {-# SPECIALIZE fromCStringLenU :: CStringLen -> IO String #-} fromCStringLenU :: CStringLen -> IO a instance UConvertible String where withCStringLenU = withCStringLen fromCStringLenU = peekCStringLen instance UConvertible LB where withCStringLenU = unsafeUseAsCStringLen . Data.ByteString.concat . toChunks fromCStringLenU = fmap (fromChunks . (:[])) . packCStringLen instance UConvertible SB where withCStringLenU = unsafeUseAsCStringLen fromCStringLenU = packCStringLen {-# NOINLINE convert #-} {-# SPECIALIZE convert :: EncodingName -> EncodingName -> String -> IO String #-} {-# SPECIALIZE convert :: EncodingName -> EncodingName -> String -> Maybe String #-} -- | -- Convert a sequence of bytes from one encoding to another. convert :: (UConvertible a, UConvertible b, Monad m) => EncodingName -- ^ The encoding to convert from -> EncodingName -- ^ The encoding to convert to -> a -- ^ The input to convert -> m b -- ^ The output converted convert fromcode tocode input = unsafePerformIO $ do -- Create a conversion descriptor withCString fromcode $ \fromcode_c -> do withCString tocode $ \tocode_c -> do withCStringLenU input $ \(in_c, in_len) -> do let out_len = 4 * in_len allocaBytes out_len $ \out_c -> do alloca $ \err -> do poke err (0 :: Word8) out_c' <- memset (castPtr out_c) 0 (toEnum out_len) len <- ucnv_convert tocode_c fromcode_c (castPtr out_c') (toEnum out_len) (castPtr in_c) (toEnum in_len) err errCode <- peek err :: IO Word8 if (errCode == 0) then (return . return) =<< fromCStringLenU (out_c, fromEnum len) else return (fail $ "<>") foreign import ccall ucnv_convert :: CString -> -- toConverterName CString -> -- fromConverterName Ptr CChar -> -- target Int32 -> -- targetCapacity Ptr CChar -> -- source Int32 -> -- sourceLength Ptr Word8 -> -- pErrorCode IO Int32 -- return: targetLength