-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Access Unicode character database -- -- unicode-data provides Haskell APIs to efficiently access the -- Unicode character database. Performance is the primary goal in the -- design of this package. -- -- The Haskell data structures are generated programmatically from the -- Unicode character database (UCD) files. The latest Unicode version -- supported by this library is 14.0.0. @package unicode-data @version 0.3.0 -- | Fast, static bitmap lookup utilities module Unicode.Internal.Bits -- | lookup64 addr index looks up the bit stored at bit index -- index using a bitmap starting at the address addr. -- Looks up the 64-bit word containing the bit and then the bit in that -- word. The caller must make sure that the 64-bit word at the byte -- address (addr + index / 64) * 8 is legally accessible memory. lookupBit64 :: Addr# -> Int -> Bool -- | lookupIntN addr index looks up for the index-th -- 8-bits word in the bitmap starting at addr, then -- convert it to an Int. -- -- The caller must make sure that: -- --

ceiling (addr + (n * 8)) is legally accessible -- Word8.

lookupIntN :: Addr# -> Int -> Int module Unicode.Internal.Char.DerivedCoreProperties isXID_Continue :: Char -> Bool isXID_Start :: Char -> Bool isID_Continue :: Char -> Bool isID_Start :: Char -> Bool isUppercase :: Char -> Bool isLowercase :: Char -> Bool isAlphabetic :: Char -> Bool -- | Case and case mapping related functions. module Unicode.Char.Case -- | Returns True for lower-case letters. isLowerCase :: Char -> Bool -- | Returns True for lower-case letters. -- | Deprecated: Use isLowerCase instead. Note that the behavior of this -- function does not match base:Data.Char.isLower. See -- Unicode.Char.Case.Compat for behavior compatible with -- base:Data.Char. isLower :: Char -> Bool -- | Returns True for upper-case letters. isUpperCase :: Char -> Bool -- | Returns True for upper-case letters. -- | Deprecated: Use isUpperCase instead. Note that the behavior of this -- function does not match base:Data.Char.isUpper. See -- Unicode.Char.Case.Compat for behavior compatible with -- base:Data.Char. isUpper :: Char -> Bool module Unicode.Internal.Char.PropList isPattern_Syntax :: Char -> Bool isPattern_White_Space :: Char -> Bool isWhite_Space :: Char -> Bool -- | Unicode Identifier and Pattern Syntax property functions based on -- Unicode Standard Annex #31 module Unicode.Char.Identifiers -- | Returns True if a character is an identifier continue -- character. isIDContinue :: Char -> Bool -- | Returns True if a character is an identifier start character. isIDStart :: Char -> Bool -- | Returns True if a character is an identifier continue -- character, using the NFKC modifications detailed in UAX #31, -- 5.1. isXIDContinue :: Char -> Bool -- | Returns True if a character is an identifier start character, -- using the NFKC modifications detailed in UAX #31, 5.1. isXIDStart :: Char -> Bool -- | Returns True if a character is a pattern syntax character. isPatternSyntax :: Char -> Bool -- | Returns True if a character is a pattern whitespace character. isPatternWhitespace :: Char -> Bool module Unicode.Internal.Char.UnicodeData.CombiningClass combiningClass :: Char -> Int isCombining :: Char -> Bool module Unicode.Internal.Char.UnicodeData.Compositions compose :: Char -> Char -> Maybe Char composeStarters :: Char -> Char -> Maybe Char isSecondStarter :: Char -> Bool module Unicode.Internal.Char.UnicodeData.Decomposable isDecomposable :: Char -> Bool module Unicode.Internal.Char.UnicodeData.DecomposableK isDecomposable :: Char -> Bool module Unicode.Internal.Char.UnicodeData.Decompositions decompose :: Char -> [Char] module Unicode.Internal.Char.UnicodeData.DecompositionsK2 decompose :: Char -> [Char] module Unicode.Internal.Char.UnicodeData.DecompositionsK decompose :: Char -> [Char] module Unicode.Internal.Char.UnicodeData.GeneralCategory generalCategory :: Char -> Int module Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping toSimpleLowerCase :: Char -> Char module Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping toSimpleTitleCase :: Char -> Char module Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping toSimpleUpperCase :: Char -> Char -- | Fast division by known constants. -- -- Division by a constant can be replaced by a double-word -- multiplication. Roughly speaking, instead of dividing by x, multiply -- by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The -- peculiar details can be found in Hacker's Delight, Ch. 10. -- -- Even GHC 8.10 does not provide a primitive for a signed double-word -- multiplication, but since our applications does not involve negative -- integers, we convert Int to Word and use -- timesWord#. -- -- Textbook unsigned division by 21 or 28 becomes involved, when an -- argument is allowed to take the full range of Word up to 2^64. -- Luckily, in our case the argument was casted from Int, so we -- can guarantee that it is below 2^63. module Unicode.Internal.Division -- | Input must be non-negative. -- -- Instead of division by 21, we compute floor(floor((2^68+17)21 * n) -- 2^68) = floor((2^68+17)21 * n2^68) = floor(n21 + -- (n2^63 * 1732)21) = floor(n/21), because n2^63 * -- 1732 < 1. quotRem21 :: Int -> (Int, Int) -- | Input must be non-negative. -- -- Instead of division by 28, we compute floor(floor((2^65+3)7 * n) -- 2^67) = floor((2^65+3)7 * n2^67) = floor(n28 + -- (n2^63 * 34)28) = floor(n/28), because n2^63 * 34 -- < 1. quotRem28 :: Int -> (Int, Int) -- | General character property related functions. module Unicode.Char.General -- | Unicode General Categories. -- -- These classes are defined in the [Unicode Character -- Database](http:/www.unicode.orgreportstr44tr44-14.html#GC_Values_Table), -- part of the Unicode standard -- -- Note: the classes must be in the same order they are listed in -- the Unicode Standard, because some functions (e.g. -- generalCategory) rely on the Enum instance. data GeneralCategory -- | Lu: Letter, Uppercase UppercaseLetter :: GeneralCategory -- | Ll: Letter, Lowercase LowercaseLetter :: GeneralCategory -- | Lt: Letter, Titlecase TitlecaseLetter :: GeneralCategory -- | Lm: Letter, Modifier ModifierLetter :: GeneralCategory -- | Lo: Letter, Other OtherLetter :: GeneralCategory -- | Mn: Mark, Non-Spacing NonSpacingMark :: GeneralCategory -- | Mc: Mark, Spacing Combining SpacingCombiningMark :: GeneralCategory -- | Me: Mark, Enclosing EnclosingMark :: GeneralCategory -- | Nd: Number, Decimal DecimalNumber :: GeneralCategory -- | Nl: Number, Letter LetterNumber :: GeneralCategory -- | No: Number, Other OtherNumber :: GeneralCategory -- | Pc: Punctuation, Connector ConnectorPunctuation :: GeneralCategory -- | Pd: Punctuation, Dash DashPunctuation :: GeneralCategory -- | Ps: Punctuation, Open OpenPunctuation :: GeneralCategory -- | Pe: Punctuation, Close ClosePunctuation :: GeneralCategory -- | Pi: Punctuation, Initial quote InitialQuote :: GeneralCategory -- | Pf: Punctuation, Final quote FinalQuote :: GeneralCategory -- | Po: Punctuation, Other OtherPunctuation :: GeneralCategory -- | Sm: Symbol, Math MathSymbol :: GeneralCategory -- | Sc: Symbol, Currency CurrencySymbol :: GeneralCategory -- | Sk: Symbol, Modifier ModifierSymbol :: GeneralCategory -- | So: Symbol, Other OtherSymbol :: GeneralCategory -- | Zs: Separator, Space Space :: GeneralCategory -- | Zl: Separator, Line LineSeparator :: GeneralCategory -- | Zp: Separator, Paragraph ParagraphSeparator :: GeneralCategory -- | Cc: Other, Control Control :: GeneralCategory -- | Cf: Other, Format Format :: GeneralCategory -- | Cs: Other, Surrogate Surrogate :: GeneralCategory -- | Co: Other, Private Use PrivateUse :: GeneralCategory -- | Cn: Other, Not Assigned NotAssigned :: GeneralCategory -- | Abbreviation of GeneralCategory used in the Unicode standard. generalCategoryAbbr :: GeneralCategory -> String -- | The Unicode general category of the character. -- -- This property is defined in the column 2 of the UnicodeData -- table. -- -- This relies on the Enum instance of GeneralCategory, -- which must remain in the same order as the categories are presented in -- the Unicode standard. -- --

--   show (generalCategory c) == show (Data.Char.generalCategory c)
--

generalCategory :: Char -> GeneralCategory -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- -- Note: this function is not equivalent to isAlpha -- /isLetter: -- --

isAlpha matches the following general -- categories:
- UppercaseLetter -- (Lu)
- LowercaseLetter -- (Ll)
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter (Lo)
whereas isAlphabetic matches:
- Uppercase -- property
- Lowercase -- property
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter -- (Lo)
- LetterNumber -- (Nl)
- Other_Alphabetic -- property

isAlphabetic :: Char -> Bool -- | Selects alphabetic or numeric Unicode characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter
DecimalNumber
LetterNumber
OtherNumber

-- --

--   isAlphaNum c == Data.Char.isAlphaNum c
--

isAlphaNum :: Char -> Bool -- | Selects control characters, which are the non-printing characters of -- the Latin-1 subset of Unicode. -- -- This function returns True if its argument has the -- GeneralCategory Control. -- --

--   isControl c == Data.Char.isControl c
--

isControl :: Char -> Bool -- | Selects Unicode mark characters, for example accents and the like, -- which combine with preceding characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

NonSpacingMark
SpacingCombiningMark
EnclosingMark

-- --

--   isMark c == Data.Char.isMark c
--

isMark :: Char -> Bool -- | Selects printable Unicode characters (letters, numbers, marks, -- punctuation, symbols and spaces). -- -- This function returns False if its argument has one of the -- following GeneralCategorys, or True otherwise: -- --

LineSeparator
ParagraphSeparator
Control
Format
Surrogate
PrivateUse
NotAssigned

-- --

--   isPrint c == Data.Char.isPrint c
--

isPrint :: Char -> Bool -- | Selects Unicode punctuation characters, including various kinds of -- connectors, brackets and quotes. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

ConnectorPunctuation
DashPunctuation
OpenPunctuation
ClosePunctuation
InitialQuote
FinalQuote
OtherPunctuation

-- --

--   isPunctuation c == Data.Char.isPunctuation c
--

isPunctuation :: Char -> Bool -- | Selects Unicode space and separator characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

Space
LineSeparator
ParagraphSeparator

-- --

--   isSeparator c == Data.Char.isSeparator c
--

isSeparator :: Char -> Bool -- | Selects Unicode symbol characters, including mathematical and currency -- symbols. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: * -- MathSymbol * CurrencySymbol * ModifierSymbol * -- OtherSymbol -- --

--   isSymbol c == Data.Char.isSymbol c
--

isSymbol :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- -- See: Unicode White_Space. -- -- Note: isWhiteSpace is not equivalent to -- isSpace. isWhiteSpace selects the same characters from -- isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

isWhiteSpace :: Char -> Bool -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- | Deprecated: Use isAlphabetic instead. Note that the behavior of -- this function does not match base:Data.Char.isLetter. See -- Unicode.Char.General.Compat for behavior compatible with -- base:Data.Char. isLetter :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- | Deprecated: Use isWhiteSpace instead. Note that the behavior of -- this function does not match base:Data.Char.isSpace. See -- Unicode.Char.General.Compat for behavior compatible with -- base:Data.Char. isSpace :: Char -> Bool -- | Selects the first 128 characters of the Unicode character set, -- corresponding to the ASCII character set. isAscii :: Char -> Bool -- | Selects the first 256 characters of the Unicode character set, -- corresponding to the ISO 8859-1 (Latin-1) character set. isLatin1 :: Char -> Bool -- | Selects ASCII upper-case letters, i.e. characters satisfying both -- isAscii and isUpper. isAsciiUpper :: Char -> Bool -- | Selects ASCII lower-case letters, i.e. characters satisfying both -- isAscii and isLower. isAsciiLower :: Char -> Bool -- | Determine whether a character is a jamo L, V or T character. isJamo :: Char -> Bool -- | Total count of all jamo characters. -- --

--   jamoNCount = jamoVCount * jamoTCount
--

jamoNCount :: Int -- | First leading consonant jamo. jamoLFirst :: Int -- | Total count of leading consonant jamo. jamoLCount :: Int -- | Given a Unicode character, if it is a leading jamo, return its index -- in the list of leading jamo consonants, otherwise return -- Nothing. jamoLIndex :: Char -> Maybe Int -- | Last leading consonant jamo. jamoLLast :: Int -- | First vowel jamo. jamoVFirst :: Int -- | Total count of vowel jamo. jamoVCount :: Int -- | Given a Unicode character, if it is a vowel jamo, return its index in -- the list of vowel jamo, otherwise return Nothing. jamoVIndex :: Char -> Maybe Int -- | Last vowel jamo. jamoVLast :: Int -- | The first trailing consonant jamo. -- -- Note that jamoTFirst does not represent a valid T, it -- represents a missing T i.e. LV without a T. See comments under -- jamoTIndex . jamoTFirst :: Int -- | Total count of trailing consonant jamo. jamoTCount :: Int -- | Given a Unicode character, if it is a trailing jamo consonant, return -- its index in the list of trailing jamo consonants, otherwise return -- Nothing. -- -- Note that index 0 is not a valid index for a trailing consonant. Index -- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable -- Decomposition" in the Conformance chapter of the Unicode standard for -- more details. jamoTIndex :: Char -> Maybe Int -- | Last trailing consonant jamo. jamoTLast :: Int -- | Codepoint of the first pre-composed Hangul character. hangulFirst :: Int -- | Codepoint of the last Hangul character. hangulLast :: Int -- | Determine if the given character is a precomposed Hangul syllable. isHangul :: Char -> Bool -- | Determine if the given character is a Hangul LV syllable. isHangulLV :: Char -> Bool instance GHC.Ix.Ix Unicode.Char.General.GeneralCategory instance GHC.Enum.Bounded Unicode.Char.General.GeneralCategory instance GHC.Enum.Enum Unicode.Char.General.GeneralCategory instance GHC.Classes.Ord Unicode.Char.General.GeneralCategory instance GHC.Classes.Eq Unicode.Char.General.GeneralCategory instance GHC.Show.Show Unicode.Char.General.GeneralCategory -- | Numeric character property related functions. module Unicode.Char.Numeric -- | Selects Unicode numeric characters, including digits from various -- scripts, Roman numerals, et cetera. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

DecimalNumber
LetterNumber
OtherNumber

-- --

--   isNumber c == Data.Char.isNumber c
--

isNumber :: Char -> Bool -- | Selects ASCII digits, i.e. '0'..'9'. isDigit :: Char -> Bool -- | Selects ASCII octal digits, i.e. '0'..'7'. isOctDigit :: Char -> Bool -- | Selects ASCII hexadecimal digits, i.e. '0'..'9', -- 'a'..'f', 'A'..'F'. isHexDigit :: Char -> Bool -- | Convert a single digit Char to the corresponding Int. -- This function fails unless its argument satisfies isHexDigit, -- but recognises both upper- and lower-case hexadecimal digits (that is, -- '0'..'9', 'a'..'f', -- 'A'..'F'). -- --

Examples

-- -- Characters '0' through '9' are converted properly to -- 0..9: -- --

--   >>> map digitToInt ['0'..'9']
--   [0,1,2,3,4,5,6,7,8,9]
--

-- -- Both upper- and lower-case 'A' through 'F' are -- converted as well, to 10..15. -- --

--   >>> map digitToInt ['a'..'f']
--   [10,11,12,13,14,15]
--   
--   >>> map digitToInt ['A'..'F']
--   [10,11,12,13,14,15]
--

-- -- Anything else throws an exception: -- --

--   >>> digitToInt 'G'
--   *** Exception: Char.digitToInt: not a digit 'G'
--   
--   >>> digitToInt '♥'
--   *** Exception: Char.digitToInt: not a digit '\9829'
--

digitToInt :: Char -> Int -- | Convert an Int in the range 0..15 to the -- corresponding single digit Char. This function fails on other -- inputs, and generates lower-case hexadecimal digits. intToDigit :: Int -> Char -- | Low level Unicode database functions to facilitate Unicode -- normalization. -- -- For more information on Unicode normalization please refer to the -- following sections of the Unicode standard: -- --

2 General Structure
- 2.3 Compatibility -- Characters
- 2.12 Equivalent Sequences
3 Conformance
- 3.6 Combination
- 3.7 -- Decomposition
- 3.11 Normalization Forms
- 3.12 Conjoining -- Jamo Behavior
4 Character Properties
- 4.3 Combining Classes
Unicode® Standard Annex #15 - Unicode Normalization -- Forms
Unicode® Standard Annex #44 - Unicode Character -- Database

module Unicode.Char.Normalization -- | Returns True if a character is a combining character. isCombining :: Char -> Bool -- | Returns the combining class of a character. combiningClass :: Char -> Int -- | Return True if a starter character may combine with some -- preceding starter character. isCombiningStarter :: Char -> Bool -- | Compose a starter character (combining class 0) with a combining -- character (non-zero combining class). Returns the composed character -- if the starter combines with the combining character, returns -- Nothing otherwise. compose :: Char -> Char -> Maybe Char -- | Compose a starter character with another starter character. Returns -- the composed character if the two starters combine, returns -- Nothing otherwise. composeStarters :: Char -> Char -> Maybe Char -- | Whether we are decomposing in canonical or compatibility mode. data DecomposeMode Canonical :: DecomposeMode Kompat :: DecomposeMode -- | Given a non-Hangul character determine if the character is -- decomposable. Note that in case compatibility decompositions a -- character may decompose into a single compatibility character. isDecomposable :: DecomposeMode -> Char -> Bool -- | Decompose a non-Hangul character into its canonical or compatibility -- decompositions. Note that the resulting characters may further -- decompose. decompose :: DecomposeMode -> Char -> [Char] -- | Decompose a Hangul syllable into its corresponding Jamo characters. decomposeHangul :: Char -> (Char, Char, Char) -- | Compatibility module for general character property related functions. -- -- The functions of this module are drop-in replacement for those in -- Data.Char. They are similar but not identical to some functions -- in Unicode.Char.General, therefore they are placed in a -- separate module in order to avoid ambiguity. module Unicode.Char.General.Compat -- | Same as isLetter. isAlpha :: Char -> Bool -- | Selects alphabetic Unicode characters (lower-case, upper-case and -- title-case letters, plus letters of caseless scripts and modifiers -- letters). -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter

-- -- Note: this function is not equivalent to -- isAlphabetic. See the description of isAlphabetic for -- further details. -- --

--   isLetter c == Data.Char.isLetter c
--

isLetter :: Char -> Bool -- | Selects Unicode space characters (general category Space), and -- the control characters \t, \n, \r, -- \f, \v. -- -- Note: isSpace is not equivalent to -- isWhiteSpace. isWhiteSpace selects the same characters -- from isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

-- --

--   isSpace c == Data.Char.isSpace c
--

isSpace :: Char -> Bool -- | Compatibility module for case and case mapping related functions.. -- -- The functions of this module are drop-in replacement for those in -- Data.Char. They are similar but not identical to some functions -- in Unicode.Char.Case, therefore they are placed in a separate -- module in order to avoid ambiguity. module Unicode.Char.Case.Compat -- | Selects upper-case or title-case alphabetic Unicode characters -- (letters). Title case is used by a small number of letter ligatures -- like the single-character form of Lj. -- --

--   isUpper c == Data.Char.isUpper c
--

isUpper :: Char -> Bool -- | Selects lower-case alphabetic Unicode characters (letters). -- --

--   isLower c == Data.Char.isLower c
--

isLower :: Char -> Bool -- | Convert a letter to the corresponding upper-case letter, if any. Any -- other character is returned unchanged. -- --

--   toUpper c == Data.Char.toUpper c
--

toUpper :: Char -> Char -- | Convert a letter to the corresponding lower-case letter, if any. Any -- other character is returned unchanged. -- --

--   toLower c == Data.Char.toLower c
--

toLower :: Char -> Char -- | Convert a letter to the corresponding title-case or upper-case letter, -- if any. (Title case differs from upper case only for a small number of -- ligature letters.) Any other character is returned unchanged. -- --

--   toTitle c == Data.Char.toTitle c
--

toTitle :: Char -> Char -- | This module provides APIs to access the Unicode character database -- (UCD) corresponding to Unicode Standard version 14.0.0. -- -- This module re-exports several sub-modules under it. The sub-module -- structure under Char is largely based on the "Property Index -- by Scope of Use" in Unicode® Standard Annex #44. -- -- The Unicode.Char.* modules in turn depend on -- Unicode.Internal.Char.* modules which are programmatically -- generated from the Unicode standard's Unicode character database -- files. The module structure under Unicode.Internal.Char is -- largely based on the UCD text file names from which the properties are -- generated. -- -- For the original UCD files used in this code please refer to the -- UCD section on the Unicode standard page. See -- https://www.unicode.org/reports/tr44/ to understand the -- contents and the format of the unicode database files. module Unicode.Char -- | Same as isLetter. isAlpha :: Char -> Bool -- | Convert a letter to the corresponding upper-case letter, if any. Any -- other character is returned unchanged. -- --

--   toUpper c == Data.Char.toUpper c
--

toUpper :: Char -> Char -- | Convert a letter to the corresponding lower-case letter, if any. Any -- other character is returned unchanged. -- --

--   toLower c == Data.Char.toLower c
--

--   toTitle c == Data.Char.toTitle c
--

toTitle :: Char -> Char -- | Version of Unicode standard used by unicode-data. unicodeVersion :: Version -- | The fromEnum method restricted to the type Char. ord :: Char -> Int -- | The toEnum method restricted to the type Char. chr :: Int -> Char