module Text.Iconv ( Charset, Bytes, Fuzziness(Fuzzy, Strict), convert, canConvert, ConversionResult(ConversionSuccess, ConversionFailed, ConversionUnsupported), ) where import Prelude hiding (ioError) import Control.Exception (bracket, ioError) import Control.Monad (liftM) import Foreign.C.Error (getErrno, errnoToIOError, eINVAL, eILSEQ, e2BIG) import Foreign.C.String (CString, withCString, withCStringLen, peekCStringLen) import Foreign.C.Types (CInt, CSize) import Foreign.Ptr (Ptr, nullPtr, plusPtr) import Foreign.Marshal.Alloc (alloca, allocaBytes) import Foreign.Storable (peek, poke) import System.IO.Unsafe (unsafePerformIO) type Charset = String type Bytes = String data ConversionResult = ConversionSuccess Bytes | ConversionFailed | ConversionUnsupported deriving (Read, Show, Eq, Ord) -- We should get this from C, but there isn't a nice way to do so type Iconv = Ptr IconvInternals data IconvInternals foreign import ccall unsafe "static iconv.h iconv_open" c_iconv_open :: CString -> CString -> IO Iconv foreign import ccall unsafe "static iconv.h iconv" c_iconv :: Iconv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize -> IO CSize foreign import ccall unsafe "static iconv.h iconv_close" c_iconv_close :: Iconv -> IO CInt iconv_open :: Charset -> Charset -> IO (Maybe Iconv) iconv_open from to = withCString from $ \c_from -> withCString to $ \c_to -> do i <- c_iconv_open c_to c_from if i == plusPtr nullPtr (-1) -- Ugly hack again! then do e <- getErrno if e == eINVAL then return Nothing else ioError (errnoToIOError "iconv_open" e Nothing Nothing) else return (Just i) iconv :: Bool -> Iconv -> Bytes -> IO (Maybe Bytes) iconv ignore_ilseq i xs = withCStringLen xs $ \(c_xs, len) -> alloca $ \p_xs -> alloca $ \p_len -> allocaBytes bs $ \res -> alloca $ \p_res -> alloca $ \p_res_len -> do poke p_xs c_xs poke p_len (fromIntegral len) let iconv_core :: Ptr CString -> Ptr CSize -> IO (Maybe Bytes) iconv_core p_xs' p_len' = do poke p_res res poke p_res_len bs r <- c_iconv i p_xs' p_len' p_res p_res_len m_e <- if r == -1 then liftM Just getErrno else return Nothing case m_e of Just e | e == eINVAL || (e == eILSEQ && not ignore_ilseq) -> return Nothing | e /= e2BIG && e /= eILSEQ -> ioError (errnoToIOError "iconv" e Nothing Nothing) _ -> do unused_len <- peek p_res_len let res_len = bs - fromIntegral unused_len liftM Just $ peekCStringLen (res, res_len) let f = do m_res_init <- iconv_core p_xs p_len case m_res_init of Nothing -> return Nothing Just res_init -> do rem_inp_len <- peek p_len m_rest <- if rem_inp_len == 0 then iconv_core nullPtr nullPtr else f return $ case m_rest of Nothing -> Nothing Just rest -> Just (res_init ++ rest) f where bs :: Integral a => a bs = 1024 convert :: Fuzziness -> Charset -> Charset -> Bytes -> ConversionResult convert fuzziness from to xs = unsafePerformIO $ bracket (iconv_open from' to') (\m_i -> case m_i of Nothing -> return () Just i -> do -- Don't bother checking c_iconv_close suceeeds c_iconv_close i return ()) (\m_i -> case m_i of Nothing -> return ConversionUnsupported Just i -> do m <- iconv ignore_ilseq i xs case m of Nothing -> return ConversionFailed Just x -> return $ ConversionSuccess x) where (from', to', ignore_ilseq) = account_for_fuzziness fuzziness from to canConvert :: Fuzziness -> Charset -> Charset -> Bool canConvert fuzziness from to = unsafePerformIO $ do m_i <- iconv_open from to case m_i of Nothing -> return False Just i -> do -- Don't bother checking c_iconv_close suceeeds c_iconv_close i return True where (from', to', _) = account_for_fuzziness fuzziness from to data Fuzziness = Fuzzy | Strict deriving Eq account_for_fuzziness :: Fuzziness -> Charset -> Charset -> (Charset, Charset, Bool) account_for_fuzziness Strict from to = (from, to, False) account_for_fuzziness Fuzzy from to = (from, to ++ "//IGNORE//TRANSLIT", True)