-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Access Unicode character database -- -- unicode-data provides Haskell APIs to efficiently access the -- unicode character database. Performance is the primary goal in the -- design of this package. -- -- The Haskell data structures are generated programmatically from the -- unicode character database (UCD) files. The latest unicode version -- supported by this library is 13.0.0. @package unicode-data @version 0.1.0 -- | Fast, static bitmap lookup utilities module Unicode.Internal.Bits -- | lookup64 addr index looks up the bit stored at bit index -- index using a bitmap starting at the address addr. -- Looks up the 64-bit word containing the bit and then the bit in that -- word. The caller must make sure that the 64-bit word at the byte -- address (addr + index / 64) * 8 is legally accessible memory. lookupBit64 :: Addr# -> Int -> Bool module Unicode.Internal.Char.DerivedCoreProperties isUppercase :: Char -> Bool isLowercase :: Char -> Bool isAlphabetic :: Char -> Bool -- | Case and case mapping related functions. module Unicode.Char.Case -- | Returns True for lower-case letters. -- --
--   isLower c == Data.Char.isLower c
--   
isLower :: Char -> Bool -- | Returns True for upper-case or title-case letters. Title case -- is used by a small number of letter ligatures like the -- single-character form of Lj. -- --
--   isUpper c == Data.Char.isUpper c
--   
isUpper :: Char -> Bool module Unicode.Internal.Char.PropList isWhite_Space :: Char -> Bool module Unicode.Internal.Char.UnicodeData.CombiningClass combiningClass :: Char -> Int isCombining :: Char -> Bool module Unicode.Internal.Char.UnicodeData.Compositions compose :: Char -> Char -> Maybe Char composeStarters :: Char -> Char -> Maybe Char isSecondStarter :: Char -> Bool module Unicode.Internal.Char.UnicodeData.Decomposable isDecomposable :: Char -> Bool module Unicode.Internal.Char.UnicodeData.DecomposableK isDecomposable :: Char -> Bool module Unicode.Internal.Char.UnicodeData.Decompositions decompose :: Char -> [Char] module Unicode.Internal.Char.UnicodeData.DecompositionsK2 decompose :: Char -> [Char] module Unicode.Internal.Char.UnicodeData.DecompositionsK decompose :: Char -> [Char] -- | Fast division by known constants. -- -- Division by a constant can be replaced by a double-word -- multiplication. Roughly speaking, instead of dividing by x, multiply -- by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The -- peculiar details can be found in Hacker's Delight, Ch. 10. -- -- Even GHC 8.10 does not provide a primitive for a signed double-word -- multiplication, but since our applications does not involve negative -- integers, we convert Int to Word and use -- timesWord#. -- -- Textbook unsigned division by 21 or 28 becomes involved, when an -- argument is allowed to take the full range of Word up to 2^64. -- Luckily, in our case the argument was casted from Int, so we -- can guarantee that it is below 2^63. module Unicode.Internal.Division -- | Input must be non-negative. -- -- Instead of division by 21, we compute floor(floor((2^68+17)21 * n) -- 2^68) = floor((2^68+17)21 * n2^68) = floor(n21 + -- (n2^63 * 1732)21) = floor(n/21), because n2^63 * -- 1732 < 1. quotRem21 :: Int -> (Int, Int) -- | Input must be non-negative. -- -- Instead of division by 28, we compute floor(floor((2^65+3)7 * n) -- 2^67) = floor((2^65+3)7 * n2^67) = floor(n28 + -- (n2^63 * 34)28) = floor(n/28), because n2^63 * 34 -- < 1. quotRem28 :: Int -> (Int, Int) -- | General character property related functions. module Unicode.Char.General -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- --
--   isLetter c == Data.Char.isLetter c
--   
isLetter :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- --
--   isSpace c == Data.Char.isSpace c
--   
isSpace :: Char -> Bool -- | Determine whether a character is a jamo L, V or T character. isJamo :: Char -> Bool -- | Total count of all jamo characters. -- --
--   jamoNCount = jamoVCount * jamoTCount
--   
jamoNCount :: Int -- | First leading consonant jamo. jamoLFirst :: Int -- | Given a Unicode character, if it is a leading jamo, return its index -- in the list of leading jamo consonants, otherwise return -- Nothing. jamoLIndex :: Char -> Maybe Int -- | Last leading consonant jamo. jamoLLast :: Int -- | First vowel jamo. jamoVFirst :: Int -- | Total count of vowel jamo. jamoVCount :: Int -- | Given a Unicode character, if it is a vowel jamo, return its index in -- the list of vowel jamo, otherwise return Nothing. jamoVIndex :: Char -> Maybe Int -- | Last vowel jamo. jamoVLast :: Int -- | The first trailing consonant jamo. -- -- Note that jamoTFirst does not represent a valid T, it -- represents a missing T i.e. LV without a T. See comments under -- jamoTIndex . jamoTFirst :: Int -- | Total count of trailing consonant jamo. jamoTCount :: Int -- | Given a Unicode character, if it is a trailing jamo consonant, return -- its index in the list of trailing jamo consonants, otherwise return -- Nothing. -- -- Note that index 0 is not a valid index for a trailing consonant. Index -- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable -- Decomposition" in the Conformance chapter of the Unicode standard for -- more details. jamoTIndex :: Char -> Maybe Int -- | Last trailing consonant jamo. jamoTLast :: Int -- | Codepoint of the first pre-composed Hangul character. hangulFirst :: Int -- | Codepoint of the last Hangul character. hangulLast :: Int -- | Determine if the given character is a precomposed Hangul syllable. isHangul :: Char -> Bool -- | Determine if the given character is a Hangul LV syllable. isHangulLV :: Char -> Bool -- | Low level Unicode database functions to facilitate Unicode -- normalization. -- -- For more information on Unicode normalization please refer to the -- following sections of the Unicode standard: -- -- module Unicode.Char.Normalization -- | Returns True if a character is a combining character. isCombining :: Char -> Bool -- | Returns the combining class of a character. combiningClass :: Char -> Int -- | Return True if a starter character may combine with some -- preceding starter character. isCombiningStarter :: Char -> Bool -- | Compose a starter character (combining class 0) with a combining -- character (non-zero combining class). Returns the composed character -- if the starter combines with the combining character, returns -- Nothing otherwise. compose :: Char -> Char -> Maybe Char -- | Compose a starter character with another starter character. Returns -- the composed character if the two starters combine, returns -- Nothing otherwise. composeStarters :: Char -> Char -> Maybe Char -- | Whether we are decomposing in canonical or compatibility mode. data DecomposeMode Canonical :: DecomposeMode Kompat :: DecomposeMode -- | Given a non-Hangul character determine if the character is -- decomposable. Note that in case compatibility decompositions a -- character may decompose into a single compatibility character. isDecomposable :: DecomposeMode -> Char -> Bool -- | Decompose a non-Hangul character into its canonical or compatibility -- decompositions. Note that the resulting characters may further -- decompose. decompose :: DecomposeMode -> Char -> [Char] -- | Decompose a Hangul syllable into its corresponding Jamo characters. decomposeHangul :: Char -> (Char, Char, Char) -- | This module provides APIs to access the Unicode character database -- (UCD) corresponding to Unicode Standard version 13.0.0. -- -- This module re-exports several sub-modules under it. The sub-module -- structure under Char is largely based on the "Property Index -- by Scope of Use" in UnicodeĀ® Standard Annex #44. -- -- The Unicode.Char.* modules in turn depend on -- Unicode.Internal.Char.* modules which are programmatically -- generated from the Unicode standard's Unicode character database -- files. The module structure under Unicode.Internal.Char is -- largely based on the UCD text file names from which the properties are -- generated. -- -- For the original UCD files used in this code please refer to the -- UCD section on the Unicode standard page. See -- https://www.unicode.org/reports/tr44/ to understand the -- contents and the format of the unicode database files. module Unicode.Char