-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Access Unicode character database
--
-- unicode-data provides Haskell APIs to efficiently access the
-- Unicode character database. Performance is the primary goal in the
-- design of this package.
--
-- The Haskell data structures are generated programmatically from the
-- Unicode character database (UCD) files. The latest Unicode version
-- supported by this library is 14.0.0.
@package unicode-data
@version 0.3.0
-- | Fast, static bitmap lookup utilities
module Unicode.Internal.Bits
-- | lookup64 addr index looks up the bit stored at bit index
-- index using a bitmap starting at the address addr.
-- Looks up the 64-bit word containing the bit and then the bit in that
-- word. The caller must make sure that the 64-bit word at the byte
-- address (addr + index / 64) * 8 is legally accessible memory.
lookupBit64 :: Addr# -> Int -> Bool
-- | lookupIntN addr index looks up for the index-th
-- 8-bits word in the bitmap starting at addr, then
-- convert it to an Int.
--
-- The caller must make sure that:
--
--
-- - ceiling (addr + (n * 8)) is legally accessible
-- Word8.
--
lookupIntN :: Addr# -> Int -> Int
module Unicode.Internal.Char.DerivedCoreProperties
isXID_Continue :: Char -> Bool
isXID_Start :: Char -> Bool
isID_Continue :: Char -> Bool
isID_Start :: Char -> Bool
isUppercase :: Char -> Bool
isLowercase :: Char -> Bool
isAlphabetic :: Char -> Bool
-- | Case and case mapping related functions.
module Unicode.Char.Case
-- | Returns True for lower-case letters.
isLowerCase :: Char -> Bool
-- | Returns True for lower-case letters.
-- | Deprecated: Use isLowerCase instead. Note that the behavior of this
-- function does not match base:Data.Char.isLower. See
-- Unicode.Char.Case.Compat for behavior compatible with
-- base:Data.Char.
isLower :: Char -> Bool
-- | Returns True for upper-case letters.
isUpperCase :: Char -> Bool
-- | Returns True for upper-case letters.
-- | Deprecated: Use isUpperCase instead. Note that the behavior of this
-- function does not match base:Data.Char.isUpper. See
-- Unicode.Char.Case.Compat for behavior compatible with
-- base:Data.Char.
isUpper :: Char -> Bool
module Unicode.Internal.Char.PropList
isPattern_Syntax :: Char -> Bool
isPattern_White_Space :: Char -> Bool
isWhite_Space :: Char -> Bool
-- | Unicode Identifier and Pattern Syntax property functions based on
-- Unicode Standard Annex #31
module Unicode.Char.Identifiers
-- | Returns True if a character is an identifier continue
-- character.
isIDContinue :: Char -> Bool
-- | Returns True if a character is an identifier start character.
isIDStart :: Char -> Bool
-- | Returns True if a character is an identifier continue
-- character, using the NFKC modifications detailed in UAX #31,
-- 5.1.
isXIDContinue :: Char -> Bool
-- | Returns True if a character is an identifier start character,
-- using the NFKC modifications detailed in UAX #31, 5.1.
isXIDStart :: Char -> Bool
-- | Returns True if a character is a pattern syntax character.
isPatternSyntax :: Char -> Bool
-- | Returns True if a character is a pattern whitespace character.
isPatternWhitespace :: Char -> Bool
module Unicode.Internal.Char.UnicodeData.CombiningClass
combiningClass :: Char -> Int
isCombining :: Char -> Bool
module Unicode.Internal.Char.UnicodeData.Compositions
compose :: Char -> Char -> Maybe Char
composeStarters :: Char -> Char -> Maybe Char
isSecondStarter :: Char -> Bool
module Unicode.Internal.Char.UnicodeData.Decomposable
isDecomposable :: Char -> Bool
module Unicode.Internal.Char.UnicodeData.DecomposableK
isDecomposable :: Char -> Bool
module Unicode.Internal.Char.UnicodeData.Decompositions
decompose :: Char -> [Char]
module Unicode.Internal.Char.UnicodeData.DecompositionsK2
decompose :: Char -> [Char]
module Unicode.Internal.Char.UnicodeData.DecompositionsK
decompose :: Char -> [Char]
module Unicode.Internal.Char.UnicodeData.GeneralCategory
generalCategory :: Char -> Int
module Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping
toSimpleLowerCase :: Char -> Char
module Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping
toSimpleTitleCase :: Char -> Char
module Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping
toSimpleUpperCase :: Char -> Char
-- | Fast division by known constants.
--
-- Division by a constant can be replaced by a double-word
-- multiplication. Roughly speaking, instead of dividing by x, multiply
-- by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The
-- peculiar details can be found in Hacker's Delight, Ch. 10.
--
-- Even GHC 8.10 does not provide a primitive for a signed double-word
-- multiplication, but since our applications does not involve negative
-- integers, we convert Int to Word and use
-- timesWord#.
--
-- Textbook unsigned division by 21 or 28 becomes involved, when an
-- argument is allowed to take the full range of Word up to 2^64.
-- Luckily, in our case the argument was casted from Int, so we
-- can guarantee that it is below 2^63.
module Unicode.Internal.Division
-- | Input must be non-negative.
--
-- Instead of division by 21, we compute floor(floor((2^68+17)21 * n)
-- 2^68) = floor((2^68+17)21 * n2^68) = floor(n21 +
-- (n2^63 * 1732)21) = floor(n/21), because n2^63 *
-- 1732 < 1.
quotRem21 :: Int -> (Int, Int)
-- | Input must be non-negative.
--
-- Instead of division by 28, we compute floor(floor((2^65+3)7 * n)
-- 2^67) = floor((2^65+3)7 * n2^67) = floor(n28 +
-- (n2^63 * 34)28) = floor(n/28), because n2^63 * 34
-- < 1.
quotRem28 :: Int -> (Int, Int)
-- | General character property related functions.
module Unicode.Char.General
-- | Unicode General Categories.
--
-- These classes are defined in the [Unicode Character
-- Database](http:/www.unicode.orgreportstr44tr44-14.html#GC_Values_Table),
-- part of the Unicode standard
--
-- Note: the classes must be in the same order they are listed in
-- the Unicode Standard, because some functions (e.g.
-- generalCategory) rely on the Enum instance.
data GeneralCategory
-- | Lu: Letter, Uppercase
UppercaseLetter :: GeneralCategory
-- | Ll: Letter, Lowercase
LowercaseLetter :: GeneralCategory
-- | Lt: Letter, Titlecase
TitlecaseLetter :: GeneralCategory
-- | Lm: Letter, Modifier
ModifierLetter :: GeneralCategory
-- | Lo: Letter, Other
OtherLetter :: GeneralCategory
-- | Mn: Mark, Non-Spacing
NonSpacingMark :: GeneralCategory
-- | Mc: Mark, Spacing Combining
SpacingCombiningMark :: GeneralCategory
-- | Me: Mark, Enclosing
EnclosingMark :: GeneralCategory
-- | Nd: Number, Decimal
DecimalNumber :: GeneralCategory
-- | Nl: Number, Letter
LetterNumber :: GeneralCategory
-- | No: Number, Other
OtherNumber :: GeneralCategory
-- | Pc: Punctuation, Connector
ConnectorPunctuation :: GeneralCategory
-- | Pd: Punctuation, Dash
DashPunctuation :: GeneralCategory
-- | Ps: Punctuation, Open
OpenPunctuation :: GeneralCategory
-- | Pe: Punctuation, Close
ClosePunctuation :: GeneralCategory
-- | Pi: Punctuation, Initial quote
InitialQuote :: GeneralCategory
-- | Pf: Punctuation, Final quote
FinalQuote :: GeneralCategory
-- | Po: Punctuation, Other
OtherPunctuation :: GeneralCategory
-- | Sm: Symbol, Math
MathSymbol :: GeneralCategory
-- | Sc: Symbol, Currency
CurrencySymbol :: GeneralCategory
-- | Sk: Symbol, Modifier
ModifierSymbol :: GeneralCategory
-- | So: Symbol, Other
OtherSymbol :: GeneralCategory
-- | Zs: Separator, Space
Space :: GeneralCategory
-- | Zl: Separator, Line
LineSeparator :: GeneralCategory
-- | Zp: Separator, Paragraph
ParagraphSeparator :: GeneralCategory
-- | Cc: Other, Control
Control :: GeneralCategory
-- | Cf: Other, Format
Format :: GeneralCategory
-- | Cs: Other, Surrogate
Surrogate :: GeneralCategory
-- | Co: Other, Private Use
PrivateUse :: GeneralCategory
-- | Cn: Other, Not Assigned
NotAssigned :: GeneralCategory
-- | Abbreviation of GeneralCategory used in the Unicode standard.
generalCategoryAbbr :: GeneralCategory -> String
-- | The Unicode general category of the character.
--
-- This property is defined in the column 2 of the UnicodeData
-- table.
--
-- This relies on the Enum instance of GeneralCategory,
-- which must remain in the same order as the categories are presented in
-- the Unicode standard.
--
--
-- show (generalCategory c) == show (Data.Char.generalCategory c)
--
generalCategory :: Char -> GeneralCategory
-- | Returns True for alphabetic Unicode characters (lower-case,
-- upper-case and title-case letters, plus letters of caseless scripts
-- and modifiers letters).
--
-- Note: this function is not equivalent to isAlpha
-- /isLetter:
--
--
-- - isAlpha matches the following general
-- categories:
-- - whereas isAlphabetic matches:
--
isAlphabetic :: Char -> Bool
-- | Selects alphabetic or numeric Unicode characters.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isAlphaNum c == Data.Char.isAlphaNum c
--
isAlphaNum :: Char -> Bool
-- | Selects control characters, which are the non-printing characters of
-- the Latin-1 subset of Unicode.
--
-- This function returns True if its argument has the
-- GeneralCategory Control.
--
--
-- isControl c == Data.Char.isControl c
--
isControl :: Char -> Bool
-- | Selects Unicode mark characters, for example accents and the like,
-- which combine with preceding characters.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isMark c == Data.Char.isMark c
--
isMark :: Char -> Bool
-- | Selects printable Unicode characters (letters, numbers, marks,
-- punctuation, symbols and spaces).
--
-- This function returns False if its argument has one of the
-- following GeneralCategorys, or True otherwise:
--
--
--
--
-- isPrint c == Data.Char.isPrint c
--
isPrint :: Char -> Bool
-- | Selects Unicode punctuation characters, including various kinds of
-- connectors, brackets and quotes.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isPunctuation c == Data.Char.isPunctuation c
--
isPunctuation :: Char -> Bool
-- | Selects Unicode space and separator characters.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isSeparator c == Data.Char.isSeparator c
--
isSeparator :: Char -> Bool
-- | Selects Unicode symbol characters, including mathematical and currency
-- symbols.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise: *
-- MathSymbol * CurrencySymbol * ModifierSymbol *
-- OtherSymbol
--
--
-- isSymbol c == Data.Char.isSymbol c
--
isSymbol :: Char -> Bool
-- | Returns True for any whitespace characters, and the control
-- characters \t, \n, \r, \f,
-- \v.
--
-- See: Unicode White_Space.
--
-- Note: isWhiteSpace is not equivalent to
-- isSpace. isWhiteSpace selects the same characters from
-- isSpace plus the following:
--
--
-- - U+0085 NEXT LINE (NEL)
-- - U+2028 LINE SEPARATOR
-- - U+2029 PARAGRAPH SEPARATOR
--
isWhiteSpace :: Char -> Bool
-- | Returns True for alphabetic Unicode characters (lower-case,
-- upper-case and title-case letters, plus letters of caseless scripts
-- and modifiers letters).
-- | Deprecated: Use isAlphabetic instead. Note that the behavior of
-- this function does not match base:Data.Char.isLetter. See
-- Unicode.Char.General.Compat for behavior compatible with
-- base:Data.Char.
isLetter :: Char -> Bool
-- | Returns True for any whitespace characters, and the control
-- characters \t, \n, \r, \f,
-- \v.
-- | Deprecated: Use isWhiteSpace instead. Note that the behavior of
-- this function does not match base:Data.Char.isSpace. See
-- Unicode.Char.General.Compat for behavior compatible with
-- base:Data.Char.
isSpace :: Char -> Bool
-- | Selects the first 128 characters of the Unicode character set,
-- corresponding to the ASCII character set.
isAscii :: Char -> Bool
-- | Selects the first 256 characters of the Unicode character set,
-- corresponding to the ISO 8859-1 (Latin-1) character set.
isLatin1 :: Char -> Bool
-- | Selects ASCII upper-case letters, i.e. characters satisfying both
-- isAscii and isUpper.
isAsciiUpper :: Char -> Bool
-- | Selects ASCII lower-case letters, i.e. characters satisfying both
-- isAscii and isLower.
isAsciiLower :: Char -> Bool
-- | Determine whether a character is a jamo L, V or T character.
isJamo :: Char -> Bool
-- | Total count of all jamo characters.
--
--
-- jamoNCount = jamoVCount * jamoTCount
--
jamoNCount :: Int
-- | First leading consonant jamo.
jamoLFirst :: Int
-- | Total count of leading consonant jamo.
jamoLCount :: Int
-- | Given a Unicode character, if it is a leading jamo, return its index
-- in the list of leading jamo consonants, otherwise return
-- Nothing.
jamoLIndex :: Char -> Maybe Int
-- | Last leading consonant jamo.
jamoLLast :: Int
-- | First vowel jamo.
jamoVFirst :: Int
-- | Total count of vowel jamo.
jamoVCount :: Int
-- | Given a Unicode character, if it is a vowel jamo, return its index in
-- the list of vowel jamo, otherwise return Nothing.
jamoVIndex :: Char -> Maybe Int
-- | Last vowel jamo.
jamoVLast :: Int
-- | The first trailing consonant jamo.
--
-- Note that jamoTFirst does not represent a valid T, it
-- represents a missing T i.e. LV without a T. See comments under
-- jamoTIndex .
jamoTFirst :: Int
-- | Total count of trailing consonant jamo.
jamoTCount :: Int
-- | Given a Unicode character, if it is a trailing jamo consonant, return
-- its index in the list of trailing jamo consonants, otherwise return
-- Nothing.
--
-- Note that index 0 is not a valid index for a trailing consonant. Index
-- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable
-- Decomposition" in the Conformance chapter of the Unicode standard for
-- more details.
jamoTIndex :: Char -> Maybe Int
-- | Last trailing consonant jamo.
jamoTLast :: Int
-- | Codepoint of the first pre-composed Hangul character.
hangulFirst :: Int
-- | Codepoint of the last Hangul character.
hangulLast :: Int
-- | Determine if the given character is a precomposed Hangul syllable.
isHangul :: Char -> Bool
-- | Determine if the given character is a Hangul LV syllable.
isHangulLV :: Char -> Bool
instance GHC.Ix.Ix Unicode.Char.General.GeneralCategory
instance GHC.Enum.Bounded Unicode.Char.General.GeneralCategory
instance GHC.Enum.Enum Unicode.Char.General.GeneralCategory
instance GHC.Classes.Ord Unicode.Char.General.GeneralCategory
instance GHC.Classes.Eq Unicode.Char.General.GeneralCategory
instance GHC.Show.Show Unicode.Char.General.GeneralCategory
-- | Numeric character property related functions.
module Unicode.Char.Numeric
-- | Selects Unicode numeric characters, including digits from various
-- scripts, Roman numerals, et cetera.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isNumber c == Data.Char.isNumber c
--
isNumber :: Char -> Bool
-- | Selects ASCII digits, i.e. '0'..'9'.
isDigit :: Char -> Bool
-- | Selects ASCII octal digits, i.e. '0'..'7'.
isOctDigit :: Char -> Bool
-- | Selects ASCII hexadecimal digits, i.e. '0'..'9',
-- 'a'..'f', 'A'..'F'.
isHexDigit :: Char -> Bool
-- | Convert a single digit Char to the corresponding Int.
-- This function fails unless its argument satisfies isHexDigit,
-- but recognises both upper- and lower-case hexadecimal digits (that is,
-- '0'..'9', 'a'..'f',
-- 'A'..'F').
--
-- Examples
--
-- Characters '0' through '9' are converted properly to
-- 0..9:
--
--
-- >>> map digitToInt ['0'..'9']
-- [0,1,2,3,4,5,6,7,8,9]
--
--
-- Both upper- and lower-case 'A' through 'F' are
-- converted as well, to 10..15.
--
--
-- >>> map digitToInt ['a'..'f']
-- [10,11,12,13,14,15]
--
-- >>> map digitToInt ['A'..'F']
-- [10,11,12,13,14,15]
--
--
-- Anything else throws an exception:
--
--
-- >>> digitToInt 'G'
-- *** Exception: Char.digitToInt: not a digit 'G'
--
-- >>> digitToInt '♥'
-- *** Exception: Char.digitToInt: not a digit '\9829'
--
digitToInt :: Char -> Int
-- | Convert an Int in the range 0..15 to the
-- corresponding single digit Char. This function fails on other
-- inputs, and generates lower-case hexadecimal digits.
intToDigit :: Int -> Char
-- | Low level Unicode database functions to facilitate Unicode
-- normalization.
--
-- For more information on Unicode normalization please refer to the
-- following sections of the Unicode standard:
--
--
module Unicode.Char.Normalization
-- | Returns True if a character is a combining character.
isCombining :: Char -> Bool
-- | Returns the combining class of a character.
combiningClass :: Char -> Int
-- | Return True if a starter character may combine with some
-- preceding starter character.
isCombiningStarter :: Char -> Bool
-- | Compose a starter character (combining class 0) with a combining
-- character (non-zero combining class). Returns the composed character
-- if the starter combines with the combining character, returns
-- Nothing otherwise.
compose :: Char -> Char -> Maybe Char
-- | Compose a starter character with another starter character. Returns
-- the composed character if the two starters combine, returns
-- Nothing otherwise.
composeStarters :: Char -> Char -> Maybe Char
-- | Whether we are decomposing in canonical or compatibility mode.
data DecomposeMode
Canonical :: DecomposeMode
Kompat :: DecomposeMode
-- | Given a non-Hangul character determine if the character is
-- decomposable. Note that in case compatibility decompositions a
-- character may decompose into a single compatibility character.
isDecomposable :: DecomposeMode -> Char -> Bool
-- | Decompose a non-Hangul character into its canonical or compatibility
-- decompositions. Note that the resulting characters may further
-- decompose.
decompose :: DecomposeMode -> Char -> [Char]
-- | Decompose a Hangul syllable into its corresponding Jamo characters.
decomposeHangul :: Char -> (Char, Char, Char)
-- | Compatibility module for general character property related functions.
--
-- The functions of this module are drop-in replacement for those in
-- Data.Char. They are similar but not identical to some functions
-- in Unicode.Char.General, therefore they are placed in a
-- separate module in order to avoid ambiguity.
module Unicode.Char.General.Compat
-- | Same as isLetter.
isAlpha :: Char -> Bool
-- | Selects alphabetic Unicode characters (lower-case, upper-case and
-- title-case letters, plus letters of caseless scripts and modifiers
-- letters).
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
-- Note: this function is not equivalent to
-- isAlphabetic. See the description of isAlphabetic for
-- further details.
--
--
-- isLetter c == Data.Char.isLetter c
--
isLetter :: Char -> Bool
-- | Selects Unicode space characters (general category Space), and
-- the control characters \t, \n, \r,
-- \f, \v.
--
-- Note: isSpace is not equivalent to
-- isWhiteSpace. isWhiteSpace selects the same characters
-- from isSpace plus the following:
--
--
-- - U+0085 NEXT LINE (NEL)
-- - U+2028 LINE SEPARATOR
-- - U+2029 PARAGRAPH SEPARATOR
--
--
--
-- isSpace c == Data.Char.isSpace c
--
isSpace :: Char -> Bool
-- | Compatibility module for case and case mapping related functions..
--
-- The functions of this module are drop-in replacement for those in
-- Data.Char. They are similar but not identical to some functions
-- in Unicode.Char.Case, therefore they are placed in a separate
-- module in order to avoid ambiguity.
module Unicode.Char.Case.Compat
-- | Selects upper-case or title-case alphabetic Unicode characters
-- (letters). Title case is used by a small number of letter ligatures
-- like the single-character form of Lj.
--
--
-- isUpper c == Data.Char.isUpper c
--
isUpper :: Char -> Bool
-- | Selects lower-case alphabetic Unicode characters (letters).
--
--
-- isLower c == Data.Char.isLower c
--
isLower :: Char -> Bool
-- | Convert a letter to the corresponding upper-case letter, if any. Any
-- other character is returned unchanged.
--
--
-- toUpper c == Data.Char.toUpper c
--
toUpper :: Char -> Char
-- | Convert a letter to the corresponding lower-case letter, if any. Any
-- other character is returned unchanged.
--
--
-- toLower c == Data.Char.toLower c
--
toLower :: Char -> Char
-- | Convert a letter to the corresponding title-case or upper-case letter,
-- if any. (Title case differs from upper case only for a small number of
-- ligature letters.) Any other character is returned unchanged.
--
--
-- toTitle c == Data.Char.toTitle c
--
toTitle :: Char -> Char
-- | This module provides APIs to access the Unicode character database
-- (UCD) corresponding to Unicode Standard version 14.0.0.
--
-- This module re-exports several sub-modules under it. The sub-module
-- structure under Char is largely based on the "Property Index
-- by Scope of Use" in Unicode® Standard Annex #44.
--
-- The Unicode.Char.* modules in turn depend on
-- Unicode.Internal.Char.* modules which are programmatically
-- generated from the Unicode standard's Unicode character database
-- files. The module structure under Unicode.Internal.Char is
-- largely based on the UCD text file names from which the properties are
-- generated.
--
-- For the original UCD files used in this code please refer to the
-- UCD section on the Unicode standard page. See
-- https://www.unicode.org/reports/tr44/ to understand the
-- contents and the format of the unicode database files.
module Unicode.Char
-- | Same as isLetter.
isAlpha :: Char -> Bool
-- | Convert a letter to the corresponding upper-case letter, if any. Any
-- other character is returned unchanged.
--
--
-- toUpper c == Data.Char.toUpper c
--
toUpper :: Char -> Char
-- | Convert a letter to the corresponding lower-case letter, if any. Any
-- other character is returned unchanged.
--
--
-- toLower c == Data.Char.toLower c
--
toLower :: Char -> Char
-- | Convert a letter to the corresponding title-case or upper-case letter,
-- if any. (Title case differs from upper case only for a small number of
-- ligature letters.) Any other character is returned unchanged.
--
--
-- toTitle c == Data.Char.toTitle c
--
toTitle :: Char -> Char
-- | Version of Unicode standard used by unicode-data.
unicodeVersion :: Version
-- | The fromEnum method restricted to the type Char.
ord :: Char -> Int
-- | The toEnum method restricted to the type Char.
chr :: Int -> Char