{-# LANGUAGE CPP, ForeignFunctionInterface #-} {-| This module contains basal stuff for the CompactString ICU bindings. The real functionality is in other modules. -} module Text.Unicode.Base(UChar ,UErrorCode ,UBool ,uBoolToBool ,BitPackable(..) ,withCompactString ,handleError ,cOrderingToOrdering ) where import Data.CompactString(CompactString) import Data.CompactString.Encodings(UTF16Native) import qualified Data.CompactString as CS import qualified Data.CompactString.Unsafe as CS(unsafeFromByteString) #if __GLASGOW_HASKELL__ >= 608 import Data.ByteString.Internal(toForeignPtr) #else import Data.ByteString.Base(toForeignPtr) #endif import Foreign.Ptr import Foreign.ForeignPtr import Foreign.C.Types(CInt, CChar) import Foreign.Storable(peek, poke) import Foreign.Marshal.Alloc(alloca) import Data.Int(Int32) import Data.Word(Word16) import Data.Bits((.|.)) import Control.Monad(when) {-| The ICU character type. A UChar is a 16-bit unit of a UTF-16 encoded string -} type UChar = Word16 {-| The internal ICU error code type. -} type UErrorCode = CInt {-| The internal ICU boolean type. See unicode/umachine.h. Be aware that to Haskell, this is a numeric type and not a boolean. -} type UBool = CChar {-| Converts an ICU bool to a Haskell one, preserving truth or falsehood. -} uBoolToBool :: UBool -> Bool uBoolToBool = (/= 0) {-| A type class for all option types, for which we want to turn a list of options into a bit field. -} class BitPackable a where {-| Tells of a certain option what its C integer/enum value is. -} intValue :: a -> Int32 {-| Takes a list of options and encodes it into a C integer. -} packOptions :: [a] -> Int32 packOptions = foldr (.|.) 0 . map intValue {-| Runs a function, expecting a 32-bit integer, with the given options bit-packed into a 32-bit integer. -} withPackedOptions :: [a] -> (Int32 -> IO b) -> IO b withPackedOptions opts f = f $ packOptions opts {-| Runs a raw ICU-type function on a CompactString encoded in UTF16. The ICU-type function has type "Ptr UChar -> Int32 -> a". This function may not modify the memory under the Ptr UChar. The size of the Ptr UChar in 16-bit words is passed in as the Int32 argument. Accessing memory from Ptr UChar outside of that size also sends us off to lala land, of course. -} withCompactString :: CompactString UTF16Native -> (Ptr UChar -> Int32 -> IO a) -> IO a withCompactString cs f = withForeignPtr ptr $ \word8Pointer -> let contentPointer = plusPtr word8Pointer offset :: Ptr UChar numUChars = fromIntegral (length `div` 2) -- length is always even for a UTF-16 string in f contentPointer numUChars where (ptr, offset, length) = toForeignPtr $ CS.toByteString cs {-| Provides simple (i.e. abort-if-anything-wrong) error handling for ICU functions. Takes as an argument a function that writes an ICU error code to a certain memory address (like most ICU4C functions do). This function runs the given function, giving it a memory address to write the error code to. When the given function indicates an error, it aborts the program. Otherwise it just returns the result. -} handleError :: (Ptr UErrorCode -> IO a) -> IO a handleError f = alloca $ \errptr -> do poke errptr 0 result <- f errptr errorCode <- peek errptr when (errorCode > 0) (error (errMsg ++ show errorCode)) return result where errMsg = "Data.CompactString.ICU.handleError: error returned by ICU4C function: " {-| Converts a C ordering (-1 means LT, 0 means EQ, 1 means GT) to a Haskell ordering. -} cOrderingToOrdering :: (Integral a) => a -> Ordering cOrderingToOrdering i | i < 0 = LT | i == 0 = EQ | i > 0 = GT