-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Unicode normalization using the ICU library -- -- Unicode normalization using the ICU library @package unicode-normalization @version 0.1 -- | This module contains basal stuff for the CompactString ICU bindings. -- The real functionality is in other modules. module Text.Unicode.Base -- | The ICU character type. A UChar is a 16-bit unit of a UTF-16 encoded -- string type UChar = Word16 -- | The internal ICU error code type. type UErrorCode = CInt -- | The internal ICU boolean type. See unicode/umachine.h. Be aware that -- to Haskell, this is a numeric type and not a boolean. type UBool = CChar -- | Converts an ICU bool to a Haskell one, preserving truth or falsehood. uBoolToBool :: UBool -> Bool -- | A type class for all option types, for which we want to turn a list of -- options into a bit field. class BitPackable a intValue :: (BitPackable a) => a -> Int32 packOptions :: (BitPackable a) => [a] -> Int32 withPackedOptions :: (BitPackable a) => [a] -> (Int32 -> IO b) -> IO b -- | Runs a raw ICU-type function on a CompactString encoded in UTF16. The -- ICU-type function has type Ptr UChar -> Int32 -> a. This -- function may not modify the memory under the Ptr UChar. The size of -- the Ptr UChar in 16-bit words is passed in as the Int32 argument. -- Accessing memory from Ptr UChar outside of that size also sends us off -- to lala land, of course. withCompactString :: CompactString UTF16Native -> (Ptr UChar -> Int32 -> IO a) -> IO a -- | Provides simple (i.e. abort-if-anything-wrong) error handling for ICU -- functions. -- -- Takes as an argument a function that writes an ICU error code to a -- certain memory address (like most ICU4C functions do). -- -- This function runs the given function, giving it a memory address to -- write the error code to. When the given function indicates an error, -- it aborts the program. Otherwise it just returns the result. handleError :: (Ptr UErrorCode -> IO a) -> IO a -- | Converts a C ordering (-1 means LT, 0 means EQ, 1 means GT) to a -- Haskell ordering. cOrderingToOrdering :: (Integral a) => a -> Ordering -- | This module contains functions to do Unicode normalization of -- CompactStrings. module Text.Unicode.Normalization -- | A data type for representing an ICU Normalization type. You use this -- to specify how you'd like ICU to normalize your string. data NormalizationMode NFD :: NormalizationMode NFKD :: NormalizationMode NFC :: NormalizationMode NFKC :: NormalizationMode FCD :: NormalizationMode -- | Internal function to convert a NormalizationMode to its C enum value normalizationToCInt :: NormalizationMode -> CInt -- | Options to pass to normalize. -- -- There is only one option ATM. data NormalizationOption -- | Normalize according to Unicode 3.2 Unicode3_2 :: NormalizationOption -- | Normalizes the given string, according to the given normalization type -- and options. -- -- This function is a higher-level wrapper around raw_normalize. -- -- Move this to something like Data.CompactString.Normalization, -- eventually. -- -- Generalize out the UErrorCode handling. normalize :: CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> CompactString UTF16Native -- | A type for the result of a quick normalization check. data NormalizationCheckResult Normalized :: NormalizationCheckResult NotNormalized :: NormalizationCheckResult MaybeNormalized :: NormalizationCheckResult -- | Attempts to check quickly whether a string is already normalized -- according to a certain normalization mode. -- -- When you get MaybeNormalized as a result, you should normalize the -- string and compare it to the original to know if it is normalized. You -- can make ICU do that by calling isNormalized. quickCheck :: CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> NormalizationCheckResult -- | Tells of a string whether it is already normalized according to a -- certain mode and options isNormalized :: CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> Bool -- | Concatenates two normalized strings, such that the result is also -- normalized. -- -- More formally: Given that string1 is normalized according to mode and -- options, and string2 is normalized according to mode and options, the -- result of concatenate string1 string2 mode options will be a -- concatenation of string1 and string2 and be normalized according to -- mode and options. concatenate :: CompactString UTF16Native -> CompactString UTF16Native -> NormalizationMode -> [NormalizationOption] -> CompactString UTF16Native -- | A data type to encode options to the compare function. data ComparisonOption -- | Assume that both strings are FCD normalized InputIsFCD :: ComparisonOption -- | Do case-insensitive comparison IgnoreCase :: ComparisonOption -- | Compare by code point order (default is code unit order) CompareCodePointOrder :: ComparisonOption -- | Compares two Unicode strings for canonical equivalence. -- -- Two Unicode strings are canonically equivalent when their NFD and NFC -- normalizations are equal. compare :: CompactString UTF16Native -> CompactString UTF16Native -> [ComparisonOption] -> Ordering instance Eq ComparisonOption instance Show ComparisonOption instance Eq NormalizationCheckResult instance Show NormalizationCheckResult instance Eq NormalizationOption instance Show NormalizationOption instance Eq NormalizationMode instance Show NormalizationMode instance BitPackable ComparisonOption instance BitPackable NormalizationOption