module Text.Unicode.Normalization(NormalizationMode(..)
,normalizationToCInt
,NormalizationOption(..)
,normalize
,NormalizationCheckResult(..)
,quickCheck
,isNormalized
,concatenate
,ComparisonOption(..)
,compare
) where
import Prelude hiding (compare)
import System.IO.Unsafe(unsafePerformIO)
import Foreign.Ptr
import Foreign.C.Types
import Data.Int
import Data.CompactString(CompactString)
import Data.CompactString.Encodings(UTF16Native)
import qualified Data.CompactString as CS
import qualified Data.CompactString.Unsafe as CS(unsafeFromByteString)
#if __GLASGOW_HASKELL__ >= 608
import Data.ByteString.Internal(createAndTrim)
#else
import Data.ByteString.Base(createAndTrim)
#endif
import Text.Unicode.Base
import Debug.Trace
data NormalizationMode = NFD | NFKD | NFC | NFKC | FCD deriving (Eq, Show)
data NormalizationOption = Unicode3_2
deriving (Eq, Show)
instance BitPackable NormalizationOption where
intValue Unicode3_2 = 0x20
normalizationToCInt :: NormalizationMode -> CInt
normalizationToCInt NFD = 2
normalizationToCInt NFKD = 3
normalizationToCInt NFC = 4
normalizationToCInt NFKC = 5
normalizationToCInt FCD = 6
foreign import ccall "unicode/unorm.h unorm_normalize_3_8" raw_normalize :: Ptr UChar -> Int32 -> CInt -> Int32 -> Ptr UChar -> Int32 -> Ptr UErrorCode -> IO Int32
normalize :: CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> CompactString UTF16Native
normalize s n opts = CS.unsafeFromByteString $ unsafePerformIO $ createAndTrim maxBufSizeWord8s $ \tptr ->
withCompactString s $ \sptr slen ->
withPackedOptions opts $ \rawOptions ->
let buf = castPtr tptr :: Ptr UChar
rawNormalization = normalizationToCInt n
rawBufSize = fromIntegral maxBufSizeUChars
in do resultLength <- handleError $ \errptr ->
raw_normalize sptr slen rawNormalization rawOptions buf rawBufSize errptr
return (fromIntegral resultLength * 2)
where
maxBufSizeWord8s = 3 * 2 * CS.length s
maxBufSizeUChars = 3 * CS.length s
data NormalizationCheckResult = Normalized | NotNormalized | MaybeNormalized deriving (Eq, Show)
int32ToNormalizationCheckResult :: Int32 -> NormalizationCheckResult
int32ToNormalizationCheckResult 0 = NotNormalized
int32ToNormalizationCheckResult 1 = Normalized
int32ToNormalizationCheckResult 2 = MaybeNormalized
foreign import ccall "unicode/unorm.h unorm_quickCheckWithOptions_3_8" raw_quickCheck :: Ptr UChar -> Int32 -> CInt -> Int32 -> Ptr UErrorCode -> IO Int32
quickCheck :: CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> NormalizationCheckResult
quickCheck s nm opts = int32ToNormalizationCheckResult $ unsafePerformIO $ withCompactString s $ \buf len ->
withPackedOptions opts $ \rawOpts ->
handleError $ \errptr -> raw_quickCheck buf len rawNormalizationMode rawOpts errptr
where rawNormalizationMode = normalizationToCInt nm
foreign import ccall "unicode/unorm.h unorm_isNormalizedWithOptions_3_8" raw_isNormalized :: Ptr UChar -> Int32 -> CInt -> Int32 -> Ptr UErrorCode -> IO UBool
isNormalized :: CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> Bool
isNormalized s nm opts = uBoolToBool $ unsafePerformIO $ withCompactString s $ \buf len ->
withPackedOptions opts $ \rawOpts ->
handleError $ \errptr -> raw_isNormalized buf len rawNormalizationMode rawOpts errptr
where rawNormalizationMode = normalizationToCInt nm
foreign import ccall "unicode/unorm.h unorm_concatenate_3_8" raw_concatenate :: Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> CInt -> Int32 -> Ptr UErrorCode -> IO Int32
concatenate :: CompactString UTF16Native
-> CompactString UTF16Native
-> NormalizationMode
-> [NormalizationOption]
-> CompactString UTF16Native
concatenate s1 s2 m opts = CS.unsafeFromByteString $ unsafePerformIO $ createAndTrim maxBufSizeWord8s $ \targetPtrWord8 ->
withCompactString s1 $ \buf1 len1 ->
withCompactString s2 $ \buf2 len2 ->
withPackedOptions opts $ \rawOpts ->
handleError $ \errptr ->
let targetPtr = castPtr targetPtrWord8 :: Ptr UChar
in do bufUsed <- raw_concatenate buf1 len1
buf2 len2
targetPtr maxBufSizeUChars
rawMode rawOpts errptr
return (fromIntegral bufUsed * 2)
where rawMode = normalizationToCInt m
maxBufSizeWord8s :: Int
maxBufSizeWord8s = fromIntegral (2 * maxBufSizeUChars)
maxBufSizeUChars :: Int32
maxBufSizeUChars = fromIntegral (3 * (CS.length s1 + CS.length s2))
data ComparisonOption = InputIsFCD
| IgnoreCase
| CompareCodePointOrder
deriving (Eq, Show)
instance BitPackable ComparisonOption where
intValue InputIsFCD = 0x20000
intValue IgnoreCase = 0x10000
intValue CompareCodePointOrder = 0x8000
foreign import ccall "unicode/unorm.h unorm_compare_3_8" raw_compare :: Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> Int32 -> Ptr UErrorCode -> IO Int32
compare :: CompactString UTF16Native -> CompactString UTF16Native -> [ComparisonOption] -> Ordering
compare s1 s2 opts = cOrderingToOrdering $ unsafePerformIO $ withCompactString s1 $ \buf1 len1 ->
withCompactString s2 $ \buf2 len2 ->
withPackedOptions opts $ \rawOpts ->
handleError $ \errptr ->
raw_compare buf1 len1 buf2 len2 rawOpts errptr