{-# LANGUAGE ForeignFunctionInterface #-} -- | -- Module : Data.Text.ICU.Text -- Copyright : (c) 2010 Bryan O'Sullivan -- -- License : BSD-style -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : GHC -- -- Functions for manipulating Unicode text, implemented as bindings to -- the International Components for Unicode (ICU) libraries. module Data.Text.ICU.Text ( -- * Case conversion -- $case toCaseFold , toLower , toUpper ) where import Data.Int (Int32) import Data.Text (Text) import Data.Text.Foreign (fromPtr, useAsPtr) import Data.Text.ICU.Error.Internal (UErrorCode, handleError, handleOverflowError) import Data.Text.ICU.Internal (LocaleName, UChar, withLocaleName) import Data.Word (Word32) import Foreign.C.String (CString) import Foreign.Marshal.Array (allocaArray) import Foreign.Ptr (Ptr, castPtr) import System.IO.Unsafe (unsafePerformIO) -- $case -- -- In some languages, case conversion is a locale- and -- context-dependent operation. The case conversion functions in this -- module are locale and context sensitive. -- | Case-fold the characters in a string. -- -- Case folding is locale independent and not context sensitive, but -- there is an option for treating the letter I specially for Turkic -- languages. The result may be longer or shorter than the original. toCaseFold :: Bool -- ^ Whether to include or exclude mappings for -- dotted and dotless I and i that are marked with -- 'I' in @CaseFolding.txt@. -> Text -> Text toCaseFold excludeI s = unsafePerformIO . useAsPtr s $ \sptr slen -> do let opts = fromIntegral . fromEnum $ excludeI go len = allocaArray len $ \dptr -> do n <- fmap fromIntegral . handleError $ u_strFoldCase dptr (fromIntegral len) sptr (fromIntegral slen) opts if n > len then go n else fromPtr dptr (fromIntegral n) go (fromIntegral slen) type CaseMapper = Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> CString -> Ptr UErrorCode -> IO Int32 caseMap :: CaseMapper -> LocaleName -> Text -> Text caseMap mapFn loc s = unsafePerformIO . withLocaleName loc $ \locale -> useAsPtr s $ \sptr slen -> handleOverflowError (fromIntegral slen) (\dptr dlen -> mapFn dptr dlen sptr (fromIntegral slen) locale) (\dptr dlen -> fromPtr (castPtr dptr) (fromIntegral dlen)) -- | Lowercase the characters in a string. -- -- Casing is locale dependent and context sensitive. The result may -- be longer or shorter than the original. toLower :: LocaleName -> Text -> Text toLower = caseMap u_strToLower -- | Uppercase the characters in a string. -- -- Casing is locale dependent and context sensitive. The result may -- be longer or shorter than the original. toUpper :: LocaleName -> Text -> Text toUpper = caseMap u_strToUpper foreign import ccall unsafe "hs_text_icu.h __hs_u_strFoldCase" u_strFoldCase :: Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> Word32 -> Ptr UErrorCode -> IO Int32 foreign import ccall unsafe "hs_text_icu.h __hs_u_strToLower" u_strToLower :: CaseMapper foreign import ccall unsafe "hs_text_icu.h __hs_u_strToUpper" u_strToUpper :: CaseMapper