module Data.Text.ICU.Collate
(
MCollator
, Attribute(..)
, AlternateHandling(..)
, CaseFirst(..)
, Strength(..)
, open
, collate
, collateIter
, equals
, getAttribute
, setAttribute
, sortKey
, freeze
) where
import Data.ByteString (empty)
import Data.ByteString.Internal (ByteString(..), create, mallocByteString,
memcpy)
import Data.Int (Int32)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Foreign (useAsPtr)
import Data.Text.ICU.Collate.Internal (Collator(..), MCollator, UCollator,
equals, withCollator, wrap)
import Data.Text.ICU.Error.Internal (UErrorCode, handleError)
import Data.Text.ICU.Internal
(LocaleName, UChar, CharIterator, UCharIterator,
asOrdering, withCharIterator, withLocaleName)
import Data.Typeable (Typeable)
import Data.Word (Word8)
import Foreign.C.String (CString)
import Foreign.C.Types (CInt)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (Ptr, nullPtr)
data AlternateHandling = NonIgnorable
| Shifted
deriving (Eq, Bounded, Enum, Show, Typeable)
data CaseFirst = UpperFirst
| LowerFirst
deriving (Eq, Bounded, Enum, Show, Typeable)
data Strength = Primary
| Secondary
| Tertiary
| Quaternary
| Identical
deriving (Eq, Bounded, Enum, Show, Typeable)
data Attribute = French Bool
| AlternateHandling AlternateHandling
| CaseFirst (Maybe CaseFirst)
| CaseLevel Bool
| NormalizationMode Bool
| Strength Strength
| HiraganaQuaternaryMode Bool
| Numeric Bool
deriving (Eq, Show, Typeable)
type UColAttribute = CInt
type UColAttributeValue = CInt
toUAttribute :: Attribute -> (UColAttribute, UColAttributeValue)
toUAttribute (French v)
= ((0), toOO v)
toUAttribute (AlternateHandling v)
= ((1), toAH v)
toUAttribute (CaseFirst v)
= ((2), toCF v)
toUAttribute (CaseLevel v)
= ((3), toOO v)
toUAttribute (NormalizationMode v)
= ((4), toOO v)
toUAttribute (Strength v)
= ((5), toS v)
toUAttribute (HiraganaQuaternaryMode v)
= ((6), toOO v)
toUAttribute (Numeric v)
= ((7), toOO v)
toOO :: Bool -> UColAttributeValue
toOO False = 16
toOO True = 17
toAH :: AlternateHandling -> UColAttributeValue
toAH NonIgnorable = 21
toAH Shifted = 20
toCF :: Maybe CaseFirst -> UColAttributeValue
toCF Nothing = 16
toCF (Just UpperFirst) = 25
toCF (Just LowerFirst) = 24
toS :: Strength -> UColAttributeValue
toS Primary = 0
toS Secondary = 1
toS Tertiary = 2
toS Quaternary = 3
toS Identical = 15
fromOO :: UColAttributeValue -> Bool
fromOO (16) = False
fromOO (17) = True
fromOO bad = valueError "fromOO" bad
fromAH :: UColAttributeValue -> AlternateHandling
fromAH (21) = NonIgnorable
fromAH (20) = Shifted
fromAH bad = valueError "fromAH" bad
fromCF :: UColAttributeValue -> Maybe CaseFirst
fromCF (16) = Nothing
fromCF (25) = Just UpperFirst
fromCF (24) = Just LowerFirst
fromCF bad = valueError "fromCF" bad
fromS :: UColAttributeValue -> Strength
fromS (0) = Primary
fromS (1) = Secondary
fromS (2) = Tertiary
fromS (3) = Quaternary
fromS (15) = Identical
fromS bad = valueError "fromS" bad
fromUAttribute :: UColAttribute -> UColAttributeValue -> Attribute
fromUAttribute key val =
case key of
(0) -> French (fromOO val)
(1) -> AlternateHandling (fromAH val)
(2) -> CaseFirst (fromCF val)
(3) -> CaseLevel (fromOO val)
(4) -> NormalizationMode (fromOO val)
(5) -> Strength (fromS val)
(6) -> HiraganaQuaternaryMode (fromOO val)
(7) -> Numeric (fromOO val)
_ -> valueError "fromUAttribute" key
valueError :: Show a => String -> a -> z
valueError func bad = error ("Data.Text.ICU.Collate.IO." ++ func ++
": invalid value " ++ show bad)
type UCollationResult = CInt
open :: LocaleName
-> IO MCollator
open loc = wrap =<< withLocaleName loc (handleError . ucol_open)
setAttribute :: MCollator -> Attribute -> IO ()
setAttribute c a =
withCollator c $ \cptr ->
handleError $ uncurry (ucol_setAttribute cptr) (toUAttribute a)
getAttribute :: MCollator -> Attribute -> IO Attribute
getAttribute c a = do
let name = fst (toUAttribute a)
val <- withCollator c $ \cptr -> handleError $ ucol_getAttribute cptr name
return $! fromUAttribute name val
collate :: MCollator -> Text -> Text -> IO Ordering
collate c a b =
withCollator c $ \cptr ->
useAsPtr a $ \aptr alen ->
useAsPtr b $ \bptr blen ->
fmap asOrdering . handleError $
ucol_strcoll cptr aptr (fromIntegral alen) bptr (fromIntegral blen)
collateIter :: MCollator -> CharIterator -> CharIterator -> IO Ordering
collateIter c a b =
fmap asOrdering . withCollator c $ \cptr ->
withCharIterator a $ \ai ->
withCharIterator b $ handleError . ucol_strcollIter cptr ai
sortKey :: MCollator -> Text -> IO ByteString
sortKey c t
| T.null t = return empty
| otherwise = do
withCollator c $ \cptr ->
useAsPtr t $ \tptr tlen -> do
let len = fromIntegral tlen
loop n = do
fp <- mallocByteString (fromIntegral n)
i <- withForeignPtr fp $ \p -> ucol_getSortKey cptr tptr len p n
let j = fromIntegral i
case undefined of
_ | i == 0 -> error "Data.Text.ICU.Collate.IO.sortKey: internal error"
| i > n -> loop i
| i <= n `div` 2 -> create j $ \p -> withForeignPtr fp $ \op ->
memcpy p op (fromIntegral i)
| otherwise -> return $! PS fp 0 j
loop (min (len * 4) 8)
freeze :: MCollator -> IO Collator
freeze c = do
p <- withCollator c $ \cptr ->
with (512)
(handleError . ucol_safeClone cptr nullPtr)
C `fmap` wrap p
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_open" ucol_open
:: CString -> Ptr UErrorCode -> IO (Ptr UCollator)
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_getAttribute" ucol_getAttribute
:: Ptr UCollator -> UColAttribute -> Ptr UErrorCode -> IO UColAttributeValue
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_setAttribute" ucol_setAttribute
:: Ptr UCollator -> UColAttribute -> UColAttributeValue -> Ptr UErrorCode -> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_strcoll" ucol_strcoll
:: Ptr UCollator -> Ptr UChar -> Int32 -> Ptr UChar -> Int32
-> Ptr UErrorCode -> IO UCollationResult
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_getSortKey" ucol_getSortKey
:: Ptr UCollator -> Ptr UChar -> Int32 -> Ptr Word8 -> Int32
-> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_strcollIter" ucol_strcollIter
:: Ptr UCollator -> Ptr UCharIterator -> Ptr UCharIterator -> Ptr UErrorCode
-> IO UCollationResult
foreign import ccall unsafe "hs_text_icu.h __hs_ucol_safeClone" ucol_safeClone
:: Ptr UCollator -> Ptr a -> Ptr Int32 -> Ptr UErrorCode
-> IO (Ptr UCollator)