-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Access Unicode Character Database (UCD)
--
-- unicode-data provides Haskell APIs to efficiently access the
-- Unicode character database (UCD). Performance is the primary
-- goal in the design of this package.
--
-- The Haskell data structures are generated programmatically from the
-- UCD files. The latest Unicode version supported by this library is
-- 15.0.0.
@package unicode-data
@version 0.5.0
-- | Fast, static bitmap lookup utilities
module Unicode.Internal.Bits
-- | lookupBit addr byteIndex bitIndex looks up the bit stored in
-- the byte at index byteIndex at the bit index
-- bitIndex using a bitmap starting at the address
-- addr. The caller must make sure that the byte at address
-- (addr + byteIndex) is legally accessible memory.
lookupBit :: Addr# -> Int -> Int -> Bool
-- | lookupWord8AsInt addr index looks up for the
-- index-th 8-bits word in the bitmap starting at
-- addr, then convert it to an Int.
--
-- The caller must make sure that:
--
--
-- - ceiling (addr + (n * 8)) is legally accessible
-- Word8#.
--
lookupWord8AsInt :: Addr# -> Int -> Int
lookupWord8AsInt# :: Addr# -> Int# -> Int#
lookupWord16AsInt :: Addr# -> Int -> Int
lookupWord16AsInt# :: Addr# -> Int# -> Int#
-- | lookupWord32# addr index looks up for the index-th
-- 32-bits word in the bitmap starting at addr, then convert it
-- to a Word#.
--
-- The caller must make sure that:
--
--
-- - ceiling (addr + (n * 32)) is legally accessible
-- Word32#.
--
lookupWord32# :: Addr# -> Int# -> Word#
unpackCString# :: Addr# -> [Char]
-- | Unicode blocks related functions.
module Unicode.Char.General.Blocks
-- | Unicode block.
--
-- There is a total of 327 blocks.
data Block
-- | U+0000..U+007F: Basic Latin.
BasicLatin :: Block
-- | U+0080..U+00FF: Latin-1 Supplement.
Latin1Supplement :: Block
-- | U+0100..U+017F: Latin Extended-A.
LatinExtendedA :: Block
-- | U+0180..U+024F: Latin Extended-B.
LatinExtendedB :: Block
-- | U+0250..U+02AF: IPA Extensions.
IPAExtensions :: Block
-- | U+02B0..U+02FF: Spacing Modifier Letters.
SpacingModifierLetters :: Block
-- | U+0300..U+036F: Combining Diacritical Marks.
CombiningDiacriticalMarks :: Block
-- | U+0370..U+03FF: Greek and Coptic.
GreekAndCoptic :: Block
-- | U+0400..U+04FF: Cyrillic.
Cyrillic :: Block
-- | U+0500..U+052F: Cyrillic Supplement.
CyrillicSupplement :: Block
-- | U+0530..U+058F: Armenian.
Armenian :: Block
-- | U+0590..U+05FF: Hebrew.
Hebrew :: Block
-- | U+0600..U+06FF: Arabic.
Arabic :: Block
-- | U+0700..U+074F: Syriac.
Syriac :: Block
-- | U+0750..U+077F: Arabic Supplement.
ArabicSupplement :: Block
-- | U+0780..U+07BF: Thaana.
Thaana :: Block
-- | U+07C0..U+07FF: NKo.
NKo :: Block
-- | U+0800..U+083F: Samaritan.
Samaritan :: Block
-- | U+0840..U+085F: Mandaic.
Mandaic :: Block
-- | U+0860..U+086F: Syriac Supplement.
SyriacSupplement :: Block
-- | U+0870..U+089F: Arabic Extended-B.
ArabicExtendedB :: Block
-- | U+08A0..U+08FF: Arabic Extended-A.
ArabicExtendedA :: Block
-- | U+0900..U+097F: Devanagari.
Devanagari :: Block
-- | U+0980..U+09FF: Bengali.
Bengali :: Block
-- | U+0A00..U+0A7F: Gurmukhi.
Gurmukhi :: Block
-- | U+0A80..U+0AFF: Gujarati.
Gujarati :: Block
-- | U+0B00..U+0B7F: Oriya.
Oriya :: Block
-- | U+0B80..U+0BFF: Tamil.
Tamil :: Block
-- | U+0C00..U+0C7F: Telugu.
Telugu :: Block
-- | U+0C80..U+0CFF: Kannada.
Kannada :: Block
-- | U+0D00..U+0D7F: Malayalam.
Malayalam :: Block
-- | U+0D80..U+0DFF: Sinhala.
Sinhala :: Block
-- | U+0E00..U+0E7F: Thai.
Thai :: Block
-- | U+0E80..U+0EFF: Lao.
Lao :: Block
-- | U+0F00..U+0FFF: Tibetan.
Tibetan :: Block
-- | U+1000..U+109F: Myanmar.
Myanmar :: Block
-- | U+10A0..U+10FF: Georgian.
Georgian :: Block
-- | U+1100..U+11FF: Hangul Jamo.
HangulJamo :: Block
-- | U+1200..U+137F: Ethiopic.
Ethiopic :: Block
-- | U+1380..U+139F: Ethiopic Supplement.
EthiopicSupplement :: Block
-- | U+13A0..U+13FF: Cherokee.
Cherokee :: Block
-- | U+1400..U+167F: Unified Canadian Aboriginal Syllabics.
UnifiedCanadianAboriginalSyllabics :: Block
-- | U+1680..U+169F: Ogham.
Ogham :: Block
-- | U+16A0..U+16FF: Runic.
Runic :: Block
-- | U+1700..U+171F: Tagalog.
Tagalog :: Block
-- | U+1720..U+173F: Hanunoo.
Hanunoo :: Block
-- | U+1740..U+175F: Buhid.
Buhid :: Block
-- | U+1760..U+177F: Tagbanwa.
Tagbanwa :: Block
-- | U+1780..U+17FF: Khmer.
Khmer :: Block
-- | U+1800..U+18AF: Mongolian.
Mongolian :: Block
-- | U+18B0..U+18FF: Unified Canadian Aboriginal Syllabics
-- Extended.
UnifiedCanadianAboriginalSyllabicsExtended :: Block
-- | U+1900..U+194F: Limbu.
Limbu :: Block
-- | U+1950..U+197F: Tai Le.
TaiLe :: Block
-- | U+1980..U+19DF: New Tai Lue.
NewTaiLue :: Block
-- | U+19E0..U+19FF: Khmer Symbols.
KhmerSymbols :: Block
-- | U+1A00..U+1A1F: Buginese.
Buginese :: Block
-- | U+1A20..U+1AAF: Tai Tham.
TaiTham :: Block
-- | U+1AB0..U+1AFF: Combining Diacritical Marks Extended.
CombiningDiacriticalMarksExtended :: Block
-- | U+1B00..U+1B7F: Balinese.
Balinese :: Block
-- | U+1B80..U+1BBF: Sundanese.
Sundanese :: Block
-- | U+1BC0..U+1BFF: Batak.
Batak :: Block
-- | U+1C00..U+1C4F: Lepcha.
Lepcha :: Block
-- | U+1C50..U+1C7F: Ol Chiki.
OlChiki :: Block
-- | U+1C80..U+1C8F: Cyrillic Extended-C.
CyrillicExtendedC :: Block
-- | U+1C90..U+1CBF: Georgian Extended.
GeorgianExtended :: Block
-- | U+1CC0..U+1CCF: Sundanese Supplement.
SundaneseSupplement :: Block
-- | U+1CD0..U+1CFF: Vedic Extensions.
VedicExtensions :: Block
-- | U+1D00..U+1D7F: Phonetic Extensions.
PhoneticExtensions :: Block
-- | U+1D80..U+1DBF: Phonetic Extensions Supplement.
PhoneticExtensionsSupplement :: Block
-- | U+1DC0..U+1DFF: Combining Diacritical Marks Supplement.
CombiningDiacriticalMarksSupplement :: Block
-- | U+1E00..U+1EFF: Latin Extended Additional.
LatinExtendedAdditional :: Block
-- | U+1F00..U+1FFF: Greek Extended.
GreekExtended :: Block
-- | U+2000..U+206F: General Punctuation.
GeneralPunctuation :: Block
-- | U+2070..U+209F: Superscripts and Subscripts.
SuperscriptsAndSubscripts :: Block
-- | U+20A0..U+20CF: Currency Symbols.
CurrencySymbols :: Block
-- | U+20D0..U+20FF: Combining Diacritical Marks for Symbols.
CombiningDiacriticalMarksForSymbols :: Block
-- | U+2100..U+214F: Letterlike Symbols.
LetterlikeSymbols :: Block
-- | U+2150..U+218F: Number Forms.
NumberForms :: Block
-- | U+2190..U+21FF: Arrows.
Arrows :: Block
-- | U+2200..U+22FF: Mathematical Operators.
MathematicalOperators :: Block
-- | U+2300..U+23FF: Miscellaneous Technical.
MiscellaneousTechnical :: Block
-- | U+2400..U+243F: Control Pictures.
ControlPictures :: Block
-- | U+2440..U+245F: Optical Character Recognition.
OpticalCharacterRecognition :: Block
-- | U+2460..U+24FF: Enclosed Alphanumerics.
EnclosedAlphanumerics :: Block
-- | U+2500..U+257F: Box Drawing.
BoxDrawing :: Block
-- | U+2580..U+259F: Block Elements.
BlockElements :: Block
-- | U+25A0..U+25FF: Geometric Shapes.
GeometricShapes :: Block
-- | U+2600..U+26FF: Miscellaneous Symbols.
MiscellaneousSymbols :: Block
-- | U+2700..U+27BF: Dingbats.
Dingbats :: Block
-- | U+27C0..U+27EF: Miscellaneous Mathematical Symbols-A.
MiscellaneousMathematicalSymbolsA :: Block
-- | U+27F0..U+27FF: Supplemental Arrows-A.
SupplementalArrowsA :: Block
-- | U+2800..U+28FF: Braille Patterns.
BraillePatterns :: Block
-- | U+2900..U+297F: Supplemental Arrows-B.
SupplementalArrowsB :: Block
-- | U+2980..U+29FF: Miscellaneous Mathematical Symbols-B.
MiscellaneousMathematicalSymbolsB :: Block
-- | U+2A00..U+2AFF: Supplemental Mathematical Operators.
SupplementalMathematicalOperators :: Block
-- | U+2B00..U+2BFF: Miscellaneous Symbols and Arrows.
MiscellaneousSymbolsAndArrows :: Block
-- | U+2C00..U+2C5F: Glagolitic.
Glagolitic :: Block
-- | U+2C60..U+2C7F: Latin Extended-C.
LatinExtendedC :: Block
-- | U+2C80..U+2CFF: Coptic.
Coptic :: Block
-- | U+2D00..U+2D2F: Georgian Supplement.
GeorgianSupplement :: Block
-- | U+2D30..U+2D7F: Tifinagh.
Tifinagh :: Block
-- | U+2D80..U+2DDF: Ethiopic Extended.
EthiopicExtended :: Block
-- | U+2DE0..U+2DFF: Cyrillic Extended-A.
CyrillicExtendedA :: Block
-- | U+2E00..U+2E7F: Supplemental Punctuation.
SupplementalPunctuation :: Block
-- | U+2E80..U+2EFF: CJK Radicals Supplement.
CJKRadicalsSupplement :: Block
-- | U+2F00..U+2FDF: Kangxi Radicals.
KangxiRadicals :: Block
-- | U+2FF0..U+2FFF: Ideographic Description Characters.
IdeographicDescriptionCharacters :: Block
-- | U+3000..U+303F: CJK Symbols and Punctuation.
CJKSymbolsAndPunctuation :: Block
-- | U+3040..U+309F: Hiragana.
Hiragana :: Block
-- | U+30A0..U+30FF: Katakana.
Katakana :: Block
-- | U+3100..U+312F: Bopomofo.
Bopomofo :: Block
-- | U+3130..U+318F: Hangul Compatibility Jamo.
HangulCompatibilityJamo :: Block
-- | U+3190..U+319F: Kanbun.
Kanbun :: Block
-- | U+31A0..U+31BF: Bopomofo Extended.
BopomofoExtended :: Block
-- | U+31C0..U+31EF: CJK Strokes.
CJKStrokes :: Block
-- | U+31F0..U+31FF: Katakana Phonetic Extensions.
KatakanaPhoneticExtensions :: Block
-- | U+3200..U+32FF: Enclosed CJK Letters and Months.
EnclosedCJKLettersAndMonths :: Block
-- | U+3300..U+33FF: CJK Compatibility.
CJKCompatibility :: Block
-- | U+3400..U+4DBF: CJK Unified Ideographs Extension A.
CJKUnifiedIdeographsExtensionA :: Block
-- | U+4DC0..U+4DFF: Yijing Hexagram Symbols.
YijingHexagramSymbols :: Block
-- | U+4E00..U+9FFF: CJK Unified Ideographs.
CJKUnifiedIdeographs :: Block
-- | U+A000..U+A48F: Yi Syllables.
YiSyllables :: Block
-- | U+A490..U+A4CF: Yi Radicals.
YiRadicals :: Block
-- | U+A4D0..U+A4FF: Lisu.
Lisu :: Block
-- | U+A500..U+A63F: Vai.
Vai :: Block
-- | U+A640..U+A69F: Cyrillic Extended-B.
CyrillicExtendedB :: Block
-- | U+A6A0..U+A6FF: Bamum.
Bamum :: Block
-- | U+A700..U+A71F: Modifier Tone Letters.
ModifierToneLetters :: Block
-- | U+A720..U+A7FF: Latin Extended-D.
LatinExtendedD :: Block
-- | U+A800..U+A82F: Syloti Nagri.
SylotiNagri :: Block
-- | U+A830..U+A83F: Common Indic Number Forms.
CommonIndicNumberForms :: Block
-- | U+A840..U+A87F: Phags-pa.
PhagsPa :: Block
-- | U+A880..U+A8DF: Saurashtra.
Saurashtra :: Block
-- | U+A8E0..U+A8FF: Devanagari Extended.
DevanagariExtended :: Block
-- | U+A900..U+A92F: Kayah Li.
KayahLi :: Block
-- | U+A930..U+A95F: Rejang.
Rejang :: Block
-- | U+A960..U+A97F: Hangul Jamo Extended-A.
HangulJamoExtendedA :: Block
-- | U+A980..U+A9DF: Javanese.
Javanese :: Block
-- | U+A9E0..U+A9FF: Myanmar Extended-B.
MyanmarExtendedB :: Block
-- | U+AA00..U+AA5F: Cham.
Cham :: Block
-- | U+AA60..U+AA7F: Myanmar Extended-A.
MyanmarExtendedA :: Block
-- | U+AA80..U+AADF: Tai Viet.
TaiViet :: Block
-- | U+AAE0..U+AAFF: Meetei Mayek Extensions.
MeeteiMayekExtensions :: Block
-- | U+AB00..U+AB2F: Ethiopic Extended-A.
EthiopicExtendedA :: Block
-- | U+AB30..U+AB6F: Latin Extended-E.
LatinExtendedE :: Block
-- | U+AB70..U+ABBF: Cherokee Supplement.
CherokeeSupplement :: Block
-- | U+ABC0..U+ABFF: Meetei Mayek.
MeeteiMayek :: Block
-- | U+AC00..U+D7AF: Hangul Syllables.
HangulSyllables :: Block
-- | U+D7B0..U+D7FF: Hangul Jamo Extended-B.
HangulJamoExtendedB :: Block
-- | U+D800..U+DB7F: High Surrogates.
HighSurrogates :: Block
-- | U+DB80..U+DBFF: High Private Use Surrogates.
HighPrivateUseSurrogates :: Block
-- | U+DC00..U+DFFF: Low Surrogates.
LowSurrogates :: Block
-- | U+E000..U+F8FF: Private Use Area.
PrivateUseArea :: Block
-- | U+F900..U+FAFF: CJK Compatibility Ideographs.
CJKCompatibilityIdeographs :: Block
-- | U+FB00..U+FB4F: Alphabetic Presentation Forms.
AlphabeticPresentationForms :: Block
-- | U+FB50..U+FDFF: Arabic Presentation Forms-A.
ArabicPresentationFormsA :: Block
-- | U+FE00..U+FE0F: Variation Selectors.
VariationSelectors :: Block
-- | U+FE10..U+FE1F: Vertical Forms.
VerticalForms :: Block
-- | U+FE20..U+FE2F: Combining Half Marks.
CombiningHalfMarks :: Block
-- | U+FE30..U+FE4F: CJK Compatibility Forms.
CJKCompatibilityForms :: Block
-- | U+FE50..U+FE6F: Small Form Variants.
SmallFormVariants :: Block
-- | U+FE70..U+FEFF: Arabic Presentation Forms-B.
ArabicPresentationFormsB :: Block
-- | U+FF00..U+FFEF: Halfwidth and Fullwidth Forms.
HalfwidthAndFullwidthForms :: Block
-- | U+FFF0..U+FFFF: Specials.
Specials :: Block
-- | U+10000..U+1007F: Linear B Syllabary.
LinearBSyllabary :: Block
-- | U+10080..U+100FF: Linear B Ideograms.
LinearBIdeograms :: Block
-- | U+10100..U+1013F: Aegean Numbers.
AegeanNumbers :: Block
-- | U+10140..U+1018F: Ancient Greek Numbers.
AncientGreekNumbers :: Block
-- | U+10190..U+101CF: Ancient Symbols.
AncientSymbols :: Block
-- | U+101D0..U+101FF: Phaistos Disc.
PhaistosDisc :: Block
-- | U+10280..U+1029F: Lycian.
Lycian :: Block
-- | U+102A0..U+102DF: Carian.
Carian :: Block
-- | U+102E0..U+102FF: Coptic Epact Numbers.
CopticEpactNumbers :: Block
-- | U+10300..U+1032F: Old Italic.
OldItalic :: Block
-- | U+10330..U+1034F: Gothic.
Gothic :: Block
-- | U+10350..U+1037F: Old Permic.
OldPermic :: Block
-- | U+10380..U+1039F: Ugaritic.
Ugaritic :: Block
-- | U+103A0..U+103DF: Old Persian.
OldPersian :: Block
-- | U+10400..U+1044F: Deseret.
Deseret :: Block
-- | U+10450..U+1047F: Shavian.
Shavian :: Block
-- | U+10480..U+104AF: Osmanya.
Osmanya :: Block
-- | U+104B0..U+104FF: Osage.
Osage :: Block
-- | U+10500..U+1052F: Elbasan.
Elbasan :: Block
-- | U+10530..U+1056F: Caucasian Albanian.
CaucasianAlbanian :: Block
-- | U+10570..U+105BF: Vithkuqi.
Vithkuqi :: Block
-- | U+10600..U+1077F: Linear A.
LinearA :: Block
-- | U+10780..U+107BF: Latin Extended-F.
LatinExtendedF :: Block
-- | U+10800..U+1083F: Cypriot Syllabary.
CypriotSyllabary :: Block
-- | U+10840..U+1085F: Imperial Aramaic.
ImperialAramaic :: Block
-- | U+10860..U+1087F: Palmyrene.
Palmyrene :: Block
-- | U+10880..U+108AF: Nabataean.
Nabataean :: Block
-- | U+108E0..U+108FF: Hatran.
Hatran :: Block
-- | U+10900..U+1091F: Phoenician.
Phoenician :: Block
-- | U+10920..U+1093F: Lydian.
Lydian :: Block
-- | U+10980..U+1099F: Meroitic Hieroglyphs.
MeroiticHieroglyphs :: Block
-- | U+109A0..U+109FF: Meroitic Cursive.
MeroiticCursive :: Block
-- | U+10A00..U+10A5F: Kharoshthi.
Kharoshthi :: Block
-- | U+10A60..U+10A7F: Old South Arabian.
OldSouthArabian :: Block
-- | U+10A80..U+10A9F: Old North Arabian.
OldNorthArabian :: Block
-- | U+10AC0..U+10AFF: Manichaean.
Manichaean :: Block
-- | U+10B00..U+10B3F: Avestan.
Avestan :: Block
-- | U+10B40..U+10B5F: Inscriptional Parthian.
InscriptionalParthian :: Block
-- | U+10B60..U+10B7F: Inscriptional Pahlavi.
InscriptionalPahlavi :: Block
-- | U+10B80..U+10BAF: Psalter Pahlavi.
PsalterPahlavi :: Block
-- | U+10C00..U+10C4F: Old Turkic.
OldTurkic :: Block
-- | U+10C80..U+10CFF: Old Hungarian.
OldHungarian :: Block
-- | U+10D00..U+10D3F: Hanifi Rohingya.
HanifiRohingya :: Block
-- | U+10E60..U+10E7F: Rumi Numeral Symbols.
RumiNumeralSymbols :: Block
-- | U+10E80..U+10EBF: Yezidi.
Yezidi :: Block
-- | U+10EC0..U+10EFF: Arabic Extended-C.
ArabicExtendedC :: Block
-- | U+10F00..U+10F2F: Old Sogdian.
OldSogdian :: Block
-- | U+10F30..U+10F6F: Sogdian.
Sogdian :: Block
-- | U+10F70..U+10FAF: Old Uyghur.
OldUyghur :: Block
-- | U+10FB0..U+10FDF: Chorasmian.
Chorasmian :: Block
-- | U+10FE0..U+10FFF: Elymaic.
Elymaic :: Block
-- | U+11000..U+1107F: Brahmi.
Brahmi :: Block
-- | U+11080..U+110CF: Kaithi.
Kaithi :: Block
-- | U+110D0..U+110FF: Sora Sompeng.
SoraSompeng :: Block
-- | U+11100..U+1114F: Chakma.
Chakma :: Block
-- | U+11150..U+1117F: Mahajani.
Mahajani :: Block
-- | U+11180..U+111DF: Sharada.
Sharada :: Block
-- | U+111E0..U+111FF: Sinhala Archaic Numbers.
SinhalaArchaicNumbers :: Block
-- | U+11200..U+1124F: Khojki.
Khojki :: Block
-- | U+11280..U+112AF: Multani.
Multani :: Block
-- | U+112B0..U+112FF: Khudawadi.
Khudawadi :: Block
-- | U+11300..U+1137F: Grantha.
Grantha :: Block
-- | U+11400..U+1147F: Newa.
Newa :: Block
-- | U+11480..U+114DF: Tirhuta.
Tirhuta :: Block
-- | U+11580..U+115FF: Siddham.
Siddham :: Block
-- | U+11600..U+1165F: Modi.
Modi :: Block
-- | U+11660..U+1167F: Mongolian Supplement.
MongolianSupplement :: Block
-- | U+11680..U+116CF: Takri.
Takri :: Block
-- | U+11700..U+1174F: Ahom.
Ahom :: Block
-- | U+11800..U+1184F: Dogra.
Dogra :: Block
-- | U+118A0..U+118FF: Warang Citi.
WarangCiti :: Block
-- | U+11900..U+1195F: Dives Akuru.
DivesAkuru :: Block
-- | U+119A0..U+119FF: Nandinagari.
Nandinagari :: Block
-- | U+11A00..U+11A4F: Zanabazar Square.
ZanabazarSquare :: Block
-- | U+11A50..U+11AAF: Soyombo.
Soyombo :: Block
-- | U+11AB0..U+11ABF: Unified Canadian Aboriginal Syllabics
-- Extended-A.
UnifiedCanadianAboriginalSyllabicsExtendedA :: Block
-- | U+11AC0..U+11AFF: Pau Cin Hau.
PauCinHau :: Block
-- | U+11B00..U+11B5F: Devanagari Extended-A.
DevanagariExtendedA :: Block
-- | U+11C00..U+11C6F: Bhaiksuki.
Bhaiksuki :: Block
-- | U+11C70..U+11CBF: Marchen.
Marchen :: Block
-- | U+11D00..U+11D5F: Masaram Gondi.
MasaramGondi :: Block
-- | U+11D60..U+11DAF: Gunjala Gondi.
GunjalaGondi :: Block
-- | U+11EE0..U+11EFF: Makasar.
Makasar :: Block
-- | U+11F00..U+11F5F: Kawi.
Kawi :: Block
-- | U+11FB0..U+11FBF: Lisu Supplement.
LisuSupplement :: Block
-- | U+11FC0..U+11FFF: Tamil Supplement.
TamilSupplement :: Block
-- | U+12000..U+123FF: Cuneiform.
Cuneiform :: Block
-- | U+12400..U+1247F: Cuneiform Numbers and Punctuation.
CuneiformNumbersAndPunctuation :: Block
-- | U+12480..U+1254F: Early Dynastic Cuneiform.
EarlyDynasticCuneiform :: Block
-- | U+12F90..U+12FFF: Cypro-Minoan.
CyproMinoan :: Block
-- | U+13000..U+1342F: Egyptian Hieroglyphs.
EgyptianHieroglyphs :: Block
-- | U+13430..U+1345F: Egyptian Hieroglyph Format Controls.
EgyptianHieroglyphFormatControls :: Block
-- | U+14400..U+1467F: Anatolian Hieroglyphs.
AnatolianHieroglyphs :: Block
-- | U+16800..U+16A3F: Bamum Supplement.
BamumSupplement :: Block
-- | U+16A40..U+16A6F: Mro.
Mro :: Block
-- | U+16A70..U+16ACF: Tangsa.
Tangsa :: Block
-- | U+16AD0..U+16AFF: Bassa Vah.
BassaVah :: Block
-- | U+16B00..U+16B8F: Pahawh Hmong.
PahawhHmong :: Block
-- | U+16E40..U+16E9F: Medefaidrin.
Medefaidrin :: Block
-- | U+16F00..U+16F9F: Miao.
Miao :: Block
-- | U+16FE0..U+16FFF: Ideographic Symbols and Punctuation.
IdeographicSymbolsAndPunctuation :: Block
-- | U+17000..U+187FF: Tangut.
Tangut :: Block
-- | U+18800..U+18AFF: Tangut Components.
TangutComponents :: Block
-- | U+18B00..U+18CFF: Khitan Small Script.
KhitanSmallScript :: Block
-- | U+18D00..U+18D7F: Tangut Supplement.
TangutSupplement :: Block
-- | U+1AFF0..U+1AFFF: Kana Extended-B.
KanaExtendedB :: Block
-- | U+1B000..U+1B0FF: Kana Supplement.
KanaSupplement :: Block
-- | U+1B100..U+1B12F: Kana Extended-A.
KanaExtendedA :: Block
-- | U+1B130..U+1B16F: Small Kana Extension.
SmallKanaExtension :: Block
-- | U+1B170..U+1B2FF: Nushu.
Nushu :: Block
-- | U+1BC00..U+1BC9F: Duployan.
Duployan :: Block
-- | U+1BCA0..U+1BCAF: Shorthand Format Controls.
ShorthandFormatControls :: Block
-- | U+1CF00..U+1CFCF: Znamenny Musical Notation.
ZnamennyMusicalNotation :: Block
-- | U+1D000..U+1D0FF: Byzantine Musical Symbols.
ByzantineMusicalSymbols :: Block
-- | U+1D100..U+1D1FF: Musical Symbols.
MusicalSymbols :: Block
-- | U+1D200..U+1D24F: Ancient Greek Musical Notation.
AncientGreekMusicalNotation :: Block
-- | U+1D2C0..U+1D2DF: Kaktovik Numerals.
KaktovikNumerals :: Block
-- | U+1D2E0..U+1D2FF: Mayan Numerals.
MayanNumerals :: Block
-- | U+1D300..U+1D35F: Tai Xuan Jing Symbols.
TaiXuanJingSymbols :: Block
-- | U+1D360..U+1D37F: Counting Rod Numerals.
CountingRodNumerals :: Block
-- | U+1D400..U+1D7FF: Mathematical Alphanumeric Symbols.
MathematicalAlphanumericSymbols :: Block
-- | U+1D800..U+1DAAF: Sutton SignWriting.
SuttonSignWriting :: Block
-- | U+1DF00..U+1DFFF: Latin Extended-G.
LatinExtendedG :: Block
-- | U+1E000..U+1E02F: Glagolitic Supplement.
GlagoliticSupplement :: Block
-- | U+1E030..U+1E08F: Cyrillic Extended-D.
CyrillicExtendedD :: Block
-- | U+1E100..U+1E14F: Nyiakeng Puachue Hmong.
NyiakengPuachueHmong :: Block
-- | U+1E290..U+1E2BF: Toto.
Toto :: Block
-- | U+1E2C0..U+1E2FF: Wancho.
Wancho :: Block
-- | U+1E4D0..U+1E4FF: Nag Mundari.
NagMundari :: Block
-- | U+1E7E0..U+1E7FF: Ethiopic Extended-B.
EthiopicExtendedB :: Block
-- | U+1E800..U+1E8DF: Mende Kikakui.
MendeKikakui :: Block
-- | U+1E900..U+1E95F: Adlam.
Adlam :: Block
-- | U+1EC70..U+1ECBF: Indic Siyaq Numbers.
IndicSiyaqNumbers :: Block
-- | U+1ED00..U+1ED4F: Ottoman Siyaq Numbers.
OttomanSiyaqNumbers :: Block
-- | U+1EE00..U+1EEFF: Arabic Mathematical Alphabetic Symbols.
ArabicMathematicalAlphabeticSymbols :: Block
-- | U+1F000..U+1F02F: Mahjong Tiles.
MahjongTiles :: Block
-- | U+1F030..U+1F09F: Domino Tiles.
DominoTiles :: Block
-- | U+1F0A0..U+1F0FF: Playing Cards.
PlayingCards :: Block
-- | U+1F100..U+1F1FF: Enclosed Alphanumeric Supplement.
EnclosedAlphanumericSupplement :: Block
-- | U+1F200..U+1F2FF: Enclosed Ideographic Supplement.
EnclosedIdeographicSupplement :: Block
-- | U+1F300..U+1F5FF: Miscellaneous Symbols and Pictographs.
MiscellaneousSymbolsAndPictographs :: Block
-- | U+1F600..U+1F64F: Emoticons.
Emoticons :: Block
-- | U+1F650..U+1F67F: Ornamental Dingbats.
OrnamentalDingbats :: Block
-- | U+1F680..U+1F6FF: Transport and Map Symbols.
TransportAndMapSymbols :: Block
-- | U+1F700..U+1F77F: Alchemical Symbols.
AlchemicalSymbols :: Block
-- | U+1F780..U+1F7FF: Geometric Shapes Extended.
GeometricShapesExtended :: Block
-- | U+1F800..U+1F8FF: Supplemental Arrows-C.
SupplementalArrowsC :: Block
-- | U+1F900..U+1F9FF: Supplemental Symbols and Pictographs.
SupplementalSymbolsAndPictographs :: Block
-- | U+1FA00..U+1FA6F: Chess Symbols.
ChessSymbols :: Block
-- | U+1FA70..U+1FAFF: Symbols and Pictographs Extended-A.
SymbolsAndPictographsExtendedA :: Block
-- | U+1FB00..U+1FBFF: Symbols for Legacy Computing.
SymbolsForLegacyComputing :: Block
-- | U+20000..U+2A6DF: CJK Unified Ideographs Extension B.
CJKUnifiedIdeographsExtensionB :: Block
-- | U+2A700..U+2B73F: CJK Unified Ideographs Extension C.
CJKUnifiedIdeographsExtensionC :: Block
-- | U+2B740..U+2B81F: CJK Unified Ideographs Extension D.
CJKUnifiedIdeographsExtensionD :: Block
-- | U+2B820..U+2CEAF: CJK Unified Ideographs Extension E.
CJKUnifiedIdeographsExtensionE :: Block
-- | U+2CEB0..U+2EBEF: CJK Unified Ideographs Extension F.
CJKUnifiedIdeographsExtensionF :: Block
-- | U+2F800..U+2FA1F: CJK Compatibility Ideographs Supplement.
CJKCompatibilityIdeographsSupplement :: Block
-- | U+30000..U+3134F: CJK Unified Ideographs Extension G.
CJKUnifiedIdeographsExtensionG :: Block
-- | U+31350..U+323AF: CJK Unified Ideographs Extension H.
CJKUnifiedIdeographsExtensionH :: Block
-- | U+E0000..U+E007F: Tags.
Tags :: Block
-- | U+E0100..U+E01EF: Variation Selectors Supplement.
VariationSelectorsSupplement :: Block
-- | U+F0000..U+FFFFF: Supplementary Private Use Area-A.
SupplementaryPrivateUseAreaA :: Block
-- | U+100000..U+10FFFF: Supplementary Private Use Area-B.
SupplementaryPrivateUseAreaB :: Block
-- | Character block, if defined.
block :: Char -> Maybe Block
-- | Block definition: range and name.
data BlockDefinition
BlockDefinition :: !(Int, Int) -> !String -> BlockDefinition
-- | Range
[blockRange] :: BlockDefinition -> !(Int, Int)
-- | Name
[blockName] :: BlockDefinition -> !String
-- | Block definition
blockDefinition :: Block -> BlockDefinition
instance GHC.Show.Show Unicode.Char.General.Blocks.BlockDefinition
instance GHC.Classes.Ord Unicode.Char.General.Blocks.BlockDefinition
instance GHC.Classes.Eq Unicode.Char.General.Blocks.BlockDefinition
-- | Unicode Identifier and Pattern Syntax property functions based on
-- Unicode Standard Annex #31
module Unicode.Char.Identifiers
-- | Returns True if a character is an identifier continue
-- character.
isIDContinue :: Char -> Bool
-- | Returns True if a character is an identifier start character.
isIDStart :: Char -> Bool
-- | Returns True if a character is an identifier continue
-- character, using the NFKC modifications detailed in UAX #31,
-- 5.1.
isXIDContinue :: Char -> Bool
-- | Returns True if a character is an identifier start character,
-- using the NFKC modifications detailed in UAX #31, 5.1.
isXIDStart :: Char -> Bool
-- | Returns True if a character is a pattern syntax character.
isPatternSyntax :: Char -> Bool
-- | Returns True if a character is a pattern whitespace character.
isPatternWhitespace :: Char -> Bool
-- | Compatibility module for numeric character property related functions.
module Unicode.Char.Numeric.Compat
-- | Selects Unicode numeric characters, including digits from various
-- scripts, Roman numerals, et cetera.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
-- Note: a character may have a numeric value (see
-- numericValue) but return False, because isNumber
-- only tests GeneralCategory: some CJK characters are
-- OtherLetter and do have a numeric value. Use isNumeric
-- to cover those cases as well.
--
--
-- isNumber c == Data.Char.isNumber c
--
--
-- @since 0.3.1 moved to Compat module.
isNumber :: Char -> Bool
-- | Numeric character property related functions.
module Unicode.Char.Numeric
-- | Selects Unicode character with a numeric value.
--
-- Note: a character may have a numeric value but return
-- False with the predicate isNumber, because
-- isNumber only tests GeneralCategory: some CJK characters
-- are OtherLetter and do have a numeric value.
--
--
-- isNumeric c == isJust (numericValue c)
--
isNumeric :: Char -> Bool
-- | Selects Unicode numeric characters, including digits from various
-- scripts, Roman numerals, et cetera.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
-- Note: a character may have a numeric value (see
-- numericValue) but return False, because isNumber
-- only tests GeneralCategory: some CJK characters are
-- OtherLetter and do have a numeric value. Use isNumeric
-- to cover those cases as well.
--
--
-- isNumber c == Data.Char.isNumber c
--
-- | Deprecated: Use Unicode.Char.Numeric.Compat.isNumber instead. This
-- function will be a synonym for isNumeric in a future release. See
-- Unicode.Char.Numeric.Compat for behavior compatible with
-- base:Data.Char.
isNumber :: Char -> Bool
-- | Numeric value of a character, if relevant.
--
-- Note: a character may have a numeric value but return
-- False with the predicate isNumber, because
-- isNumber only tests GeneralCategory: some CJK characters
-- are OtherLetter and do have a numeric value.
numericValue :: Char -> Maybe Rational
-- | Integer value of a character, if relevant.
--
-- This is a special case of numericValue.
--
-- Warning: There is a risk of integer overflow depending
-- of the chosen concrete return type. As of Unicode 15.0 the results
-- range from 0 to 1e12.
--
--
-- >>> integerValue '\x5146' :: Maybe Int64 -- OK
-- Just 1000000000000
--
-- >>> integerValue '\x5146' :: Maybe Int32 -- Will overflow!
-- Just (-727379968)
--
--
-- Therefore it is advised to use: integerValue
-- @Int64.
--
-- Note: A character may have a numeric value but return
-- False with the predicate isNumber, because
-- isNumber only tests GeneralCategory: some CJK characters
-- are OtherLetter and do have a numeric value.
integerValue :: Integral a => Char -> Maybe a
-- | Selects ASCII digits, i.e. '0'..'9'.
isDigit :: Char -> Bool
-- | Selects ASCII octal digits, i.e. '0'..'7'.
isOctDigit :: Char -> Bool
-- | Selects ASCII hexadecimal digits, i.e. '0'..'9',
-- 'a'..'f', 'A'..'F'.
isHexDigit :: Char -> Bool
-- | Convert a single digit Char to the corresponding Int.
-- This function fails unless its argument satisfies isHexDigit,
-- but recognises both upper- and lower-case hexadecimal digits (that is,
-- '0'..'9', 'a'..'f',
-- 'A'..'F').
--
-- Examples
--
-- Characters '0' through '9' are converted properly to
-- 0..9:
--
--
-- >>> map digitToInt ['0'..'9']
-- [0,1,2,3,4,5,6,7,8,9]
--
--
-- Both upper- and lower-case 'A' through 'F' are
-- converted as well, to 10..15.
--
--
-- >>> map digitToInt ['a'..'f']
-- [10,11,12,13,14,15]
--
-- >>> map digitToInt ['A'..'F']
-- [10,11,12,13,14,15]
--
--
-- Anything else throws an exception:
--
--
-- >>> digitToInt 'G'
-- *** Exception: Char.digitToInt: not a digit 'G'
--
-- >>> digitToInt '♥'
-- *** Exception: Char.digitToInt: not a digit '\9829'
--
digitToInt :: Char -> Int
-- | Convert an Int in the range 0..15 to the
-- corresponding single digit Char. This function fails on other
-- inputs, and generates lower-case hexadecimal digits.
intToDigit :: Int -> Char
-- | Compatibility module for general character property related functions.
--
-- The functions of this module are drop-in replacement for those in
-- Data.Char. They are similar but not identical to some functions
-- in Unicode.Char.General, therefore they are placed in a
-- separate module in order to avoid ambiguity.
module Unicode.Char.General.Compat
-- | Same as isLetter.
isAlpha :: Char -> Bool
-- | Selects alphabetic Unicode characters (lower-case, upper-case and
-- title-case letters, plus letters of caseless scripts and modifiers
-- letters).
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
-- - UppercaseLetter
-- - LowercaseLetter
-- - TitlecaseLetter
-- - ModifierLetter
-- - OtherLetter
--
--
-- Note: this function is not equivalent to
-- isAlphabetic. See the description of isAlphabetic for
-- further details.
--
--
-- isLetter c == Data.Char.isLetter c
--
isLetter :: Char -> Bool
-- | Selects Unicode space characters (general category Space),
-- and the control characters \t, \n, \r,
-- \f, \v.
--
-- Note: isSpace is not equivalent to
-- isWhiteSpace. isWhiteSpace selects the same characters
-- from isSpace plus the following:
--
--
-- - U+0085 NEXT LINE (NEL)
-- - U+2028 LINE SEPARATOR
-- - U+2029 PARAGRAPH SEPARATOR
--
--
--
-- isSpace c == Data.Char.isSpace c
--
isSpace :: Char -> Bool
-- | Compatibility module for case and case mapping related functions.
--
-- The functions of this module are drop-in replacement for those in
-- Data.Char. They are similar but not identical to some functions
-- in Unicode.Char.Case, therefore they are placed in a separate
-- module in order to avoid ambiguity.
module Unicode.Char.Case.Compat
-- | Selects upper-case or title-case alphabetic Unicode characters
-- (letters). Title case is used by a small number of letter ligatures
-- like the single-character form of Lj.
--
-- It matches characters with general category UppercaseLetter and
-- TitlecaseLetter.
--
-- See: isUpperCase for the full upper case predicate.
--
--
-- isUpper c == Data.Char.isUpper c
--
isUpper :: Char -> Bool
-- | Selects lower-case alphabetic Unicode characters (letters).
--
-- It matches characters with general category LowercaseLetter.
--
-- See: isLowerCase for the full lower case predicate.
--
--
-- isLower c == Data.Char.isLower c
--
isLower :: Char -> Bool
-- | Convert a letter to the corresponding upper-case letter, if any. Any
-- other character is returned unchanged.
--
-- It uses the character property Simple_Uppercase_Mapping.
--
-- See: upperCaseMapping and toUpperString for full
-- upper case conversion.
--
--
-- toUpper c == Data.Char.toUpper c
--
toUpper :: Char -> Char
-- | Convert a letter to the corresponding lower-case letter, if any. Any
-- other character is returned unchanged.
--
-- It uses the character property Simple_Lowercase_Mapping.
--
-- See: lowerCaseMapping and toLowerString for full
-- lower case conversion.
--
--
-- toLower c == Data.Char.toLower c
--
toLower :: Char -> Char
-- | Convert a letter to the corresponding title-case or upper-case letter,
-- if any. (Title case differs from upper case only for a small number of
-- ligature letters.) Any other character is returned unchanged.
--
-- It uses the character property Simple_Titlecase_Mapping.
--
-- See: titleCaseMapping and toTitleString for full
-- title case conversion.
--
--
-- toTitle c == Data.Char.toTitle c
--
toTitle :: Char -> Char
-- | Fast division by known constants.
--
-- Division by a constant can be replaced by a double-word
-- multiplication. Roughly speaking, instead of dividing by x, multiply
-- by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The
-- peculiar details can be found in Hacker's Delight, Ch. 10.
--
-- Even GHC 8.10 does not provide a primitive for a signed double-word
-- multiplication, but since our applications does not involve negative
-- integers, we convert Int to Word and use
-- timesWord#.
--
-- Textbook unsigned division by 21 or 28 becomes involved, when an
-- argument is allowed to take the full range of Word up to 2^64.
-- Luckily, in our case the argument was casted from Int, so we
-- can guarantee that it is below 2^63.
module Unicode.Internal.Division
-- | Input must be non-negative.
--
-- Instead of division by 21, we compute floor(floor((2^68+17)21 * n)
-- 2^68) = floor((2^68+17)21 * n2^68) = floor(n21 +
-- (n2^63 * 1732)21) = floor(n/21), because n2^63 *
-- 1732 < 1.
quotRem21 :: Int -> (Int, Int)
-- | Input must be non-negative.
--
-- Instead of division by 28, we compute floor(floor((2^65+3)7 * n)
-- 2^67) = floor((2^65+3)7 * n2^67) = floor(n28 +
-- (n2^63 * 34)28) = floor(n/28), because n2^63 * 34
-- < 1.
quotRem28 :: Int -> (Int, Int)
-- | General character property related functions.
module Unicode.Char.General
-- | Unicode General Categories.
--
-- These classes are defined in the Unicode Character Database,
-- part of the Unicode standard
--
-- Note: the classes must be in the same order they are listed in
-- the Unicode Standard, because some functions (e.g.
-- generalCategory) rely on the Enum instance.
data GeneralCategory
-- | Lu: Letter, Uppercase
UppercaseLetter :: GeneralCategory
-- | Ll: Letter, Lowercase
LowercaseLetter :: GeneralCategory
-- | Lt: Letter, Titlecase
TitlecaseLetter :: GeneralCategory
-- | Lm: Letter, Modifier
ModifierLetter :: GeneralCategory
-- | Lo: Letter, Other
OtherLetter :: GeneralCategory
-- | Mn: Mark, Non-Spacing
NonSpacingMark :: GeneralCategory
-- | Mc: Mark, Spacing Combining
SpacingCombiningMark :: GeneralCategory
-- | Me: Mark, Enclosing
EnclosingMark :: GeneralCategory
-- | Nd: Number, Decimal
DecimalNumber :: GeneralCategory
-- | Nl: Number, Letter
LetterNumber :: GeneralCategory
-- | No: Number, Other
OtherNumber :: GeneralCategory
-- | Pc: Punctuation, Connector
ConnectorPunctuation :: GeneralCategory
-- | Pd: Punctuation, Dash
DashPunctuation :: GeneralCategory
-- | Ps: Punctuation, Open
OpenPunctuation :: GeneralCategory
-- | Pe: Punctuation, Close
ClosePunctuation :: GeneralCategory
-- | Pi: Punctuation, Initial quote
InitialQuote :: GeneralCategory
-- | Pf: Punctuation, Final quote
FinalQuote :: GeneralCategory
-- | Po: Punctuation, Other
OtherPunctuation :: GeneralCategory
-- | Sm: Symbol, Math
MathSymbol :: GeneralCategory
-- | Sc: Symbol, Currency
CurrencySymbol :: GeneralCategory
-- | Sk: Symbol, Modifier
ModifierSymbol :: GeneralCategory
-- | So: Symbol, Other
OtherSymbol :: GeneralCategory
-- | Zs: Separator, Space
Space :: GeneralCategory
-- | Zl: Separator, Line
LineSeparator :: GeneralCategory
-- | Zp: Separator, Paragraph
ParagraphSeparator :: GeneralCategory
-- | Cc: Other, Control
Control :: GeneralCategory
-- | Cf: Other, Format
Format :: GeneralCategory
-- | Cs: Other, Surrogate
Surrogate :: GeneralCategory
-- | Co: Other, Private Use
PrivateUse :: GeneralCategory
-- | Cn: Other, Not Assigned
NotAssigned :: GeneralCategory
-- | Abbreviation of GeneralCategory used in the Unicode standard.
generalCategoryAbbr :: GeneralCategory -> String
-- | The Unicode general category of the character.
--
-- This property is defined in the column 2 of the UnicodeData
-- table.
--
-- This relies on the Enum instance of GeneralCategory,
-- which must remain in the same order as the categories are presented in
-- the Unicode standard.
--
--
-- show (generalCategory c) == show (Data.Char.generalCategory c)
--
generalCategory :: Char -> GeneralCategory
-- | Returns True for alphabetic Unicode characters (lower-case,
-- upper-case and title-case letters, plus letters of caseless scripts
-- and modifiers letters).
--
-- Note: this function is not equivalent to isAlpha
-- / isLetter:
--
--
isAlphabetic :: Char -> Bool
-- | Selects alphabetic or numeric Unicode characters.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isAlphaNum c == Data.Char.isAlphaNum c
--
isAlphaNum :: Char -> Bool
-- | Selects control characters, which are the non-printing characters of
-- the Latin-1 subset of Unicode.
--
-- This function returns True if its argument has the
-- GeneralCategory Control.
--
--
-- isControl c == Data.Char.isControl c
--
isControl :: Char -> Bool
-- | Selects Unicode mark characters, for example accents and the like,
-- which combine with preceding characters.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isMark c == Data.Char.isMark c
--
isMark :: Char -> Bool
-- | Selects printable Unicode characters (letters, numbers, marks,
-- punctuation, symbols and spaces).
--
-- This function returns False if its argument has one of the
-- following GeneralCategorys, or True otherwise:
--
--
--
--
-- isPrint c == Data.Char.isPrint c
--
isPrint :: Char -> Bool
-- | Selects Unicode punctuation characters, including various kinds of
-- connectors, brackets and quotes.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isPunctuation c == Data.Char.isPunctuation c
--
isPunctuation :: Char -> Bool
-- | Selects Unicode space and separator characters.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise:
--
--
--
--
-- isSeparator c == Data.Char.isSeparator c
--
isSeparator :: Char -> Bool
-- | Selects Unicode symbol characters, including mathematical and currency
-- symbols.
--
-- This function returns True if its argument has one of the
-- following GeneralCategorys, or False otherwise: *
-- MathSymbol * CurrencySymbol * ModifierSymbol *
-- OtherSymbol
--
--
-- isSymbol c == Data.Char.isSymbol c
--
isSymbol :: Char -> Bool
-- | Returns True for any whitespace characters, and the control
-- characters \t, \n, \r, \f,
-- \v.
--
-- See: Unicode White_Space.
--
-- Note: isWhiteSpace is not equivalent to
-- isSpace. isWhiteSpace selects the same characters from
-- isSpace plus the following:
--
--
-- - U+0085 NEXT LINE (NEL)
-- - U+2028 LINE SEPARATOR
-- - U+2029 PARAGRAPH SEPARATOR
--
isWhiteSpace :: Char -> Bool
-- | Returns True for alphabetic Unicode characters (lower-case,
-- upper-case and title-case letters, plus letters of caseless scripts
-- and modifiers letters).
-- | Deprecated: Use isAlphabetic instead. Note that the behavior of
-- this function does not match base:Data.Char.isLetter. See
-- Unicode.Char.General.Compat for behavior compatible with
-- base:Data.Char.
isLetter :: Char -> Bool
-- | Returns True for any whitespace characters, and the control
-- characters \t, \n, \r, \f,
-- \v.
-- | Deprecated: Use isWhiteSpace instead. Note that the behavior of
-- this function does not match base:Data.Char.isSpace. See
-- Unicode.Char.General.Compat for behavior compatible with
-- base:Data.Char.
isSpace :: Char -> Bool
-- | Selects the first 128 characters of the Unicode character set,
-- corresponding to the ASCII character set.
isAscii :: Char -> Bool
-- | Selects the first 256 characters of the Unicode character set,
-- corresponding to the ISO 8859-1 (Latin-1) character set.
isLatin1 :: Char -> Bool
-- | Selects ASCII upper-case letters, i.e. characters satisfying both
-- isAscii and isUpper.
isAsciiUpper :: Char -> Bool
-- | Selects ASCII lower-case letters, i.e. characters satisfying both
-- isAscii and isLower.
isAsciiLower :: Char -> Bool
-- | Determine whether a character is a jamo L, V or T character.
isJamo :: Char -> Bool
-- | Total count of all jamo characters.
--
--
-- jamoNCount = jamoVCount * jamoTCount
--
jamoNCount :: Int
-- | First leading consonant jamo.
jamoLFirst :: Int
-- | Total count of leading consonant jamo.
jamoLCount :: Int
-- | Given a Unicode character, if it is a leading jamo, return its index
-- in the list of leading jamo consonants, otherwise return
-- Nothing.
jamoLIndex :: Char -> Maybe Int
-- | Last leading consonant jamo.
jamoLLast :: Int
-- | First vowel jamo.
jamoVFirst :: Int
-- | Total count of vowel jamo.
jamoVCount :: Int
-- | Given a Unicode character, if it is a vowel jamo, return its index in
-- the list of vowel jamo, otherwise return Nothing.
jamoVIndex :: Char -> Maybe Int
-- | Last vowel jamo.
jamoVLast :: Int
-- | The first trailing consonant jamo.
--
-- Note that jamoTFirst does not represent a valid T, it
-- represents a missing T i.e. LV without a T. See comments under
-- jamoTIndex .
jamoTFirst :: Int
-- | Total count of trailing consonant jamo.
jamoTCount :: Int
-- | Given a Unicode character, if it is a trailing jamo consonant, return
-- its index in the list of trailing jamo consonants, otherwise return
-- Nothing.
--
-- Note that index 0 is not a valid index for a trailing consonant. Index
-- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable
-- Decomposition" in the Conformance chapter of the Unicode standard for
-- more details.
jamoTIndex :: Char -> Maybe Int
-- | Last trailing consonant jamo.
jamoTLast :: Int
-- | Codepoint of the first pre-composed Hangul character.
hangulFirst :: Int
-- | Codepoint of the last Hangul character.
hangulLast :: Int
-- | Determine if the given character is a precomposed Hangul syllable.
isHangul :: Char -> Bool
-- | Determine if the given character is a Hangul LV syllable.
--
-- Note: this function requires a precomposed Hangul syllable but
-- does not check it. Use isHangul to check the input
-- character before passing it to isHangulLV.
isHangulLV :: Char -> Bool
instance GHC.Ix.Ix Unicode.Char.General.GeneralCategory
instance GHC.Enum.Bounded Unicode.Char.General.GeneralCategory
instance GHC.Enum.Enum Unicode.Char.General.GeneralCategory
instance GHC.Classes.Ord Unicode.Char.General.GeneralCategory
instance GHC.Classes.Eq Unicode.Char.General.GeneralCategory
instance GHC.Show.Show Unicode.Char.General.GeneralCategory
-- | Low level Unicode database functions to facilitate Unicode
-- normalization.
--
-- For more information on Unicode normalization please refer to the
-- following sections of the Unicode standard:
--
--
module Unicode.Char.Normalization
-- | Returns True if a character is a combining character.
isCombining :: Char -> Bool
-- | Returns the combining class of a character.
combiningClass :: Char -> Int
-- | Return True if a starter character may combine with some
-- preceding starter character.
isCombiningStarter :: Char -> Bool
-- | Compose a starter character (combining class 0) with a combining
-- character (non-zero combining class). Returns the composed character
-- if the starter combines with the combining character, returns
-- Nothing otherwise.
compose :: Char -> Char -> Maybe Char
-- | Compose a starter character with another starter character. Returns
-- the composed character if the two starters combine, returns
-- Nothing otherwise.
composeStarters :: Char -> Char -> Maybe Char
-- | Whether we are decomposing in canonical or compatibility mode.
data DecomposeMode
Canonical :: DecomposeMode
Kompat :: DecomposeMode
-- | Given a non-Hangul character determine if the character is
-- decomposable. Note that in case compatibility decompositions a
-- character may decompose into a single compatibility character.
isDecomposable :: DecomposeMode -> Char -> Bool
-- | Decompose a non-Hangul character into its canonical or compatibility
-- decompositions. Note that the resulting characters may further
-- decompose.
decompose :: DecomposeMode -> Char -> [Char]
-- | Decompose a Hangul syllable into its corresponding Jamo characters.
decomposeHangul :: Char -> (Char, Char, Char)
module Unicode.Internal.Unfold
-- | An Unfold a b is a generator of a stream of values of type
-- b from a seed of type a.
data Unfold a b
Unfold :: (s -> Step s b) -> (a -> Step s b) -> Unfold a b
-- | A stream is a succession of Steps.
data Step s a
-- | Produces a single value and the next state of the stream.
Yield :: !a -> !s -> Step s a
-- | Indicates there are no more values in the stream.
Stop :: Step s a
-- | Convert an 'Unfold a a' to a list [a], if the resulting list is empty
-- the seed is used as a default output.
toList :: Unfold a a -> a -> [a]
instance GHC.Base.Functor (Unicode.Internal.Unfold.Step s)
-- | Case and case mapping related functions.
--
-- This module provides full predicates and mappings that are
-- not compatible with those in Data.Char, which rely on
-- simple properties. See Unicode.Char.Case.Compat for a drop-in
-- replacement of the functions in Data.Char.
module Unicode.Char.Case
-- | Returns True for lower-case characters.
--
-- It uses the character property Lowercase.
isLowerCase :: Char -> Bool
-- | Returns True for lower-case characters.
--
-- It uses the character property Lowercase.
-- | Deprecated: Use isLowerCase instead. Note that the behavior of this
-- function does not match base:Data.Char.isLower. See
-- Unicode.Char.Case.Compat for behavior compatible with
-- base:Data.Char.
isLower :: Char -> Bool
-- | Returns True for upper-case characters.
--
-- It uses the character property Uppercase.
--
-- Note: it does not match title-cased letters. Those are matched
-- using: generalCategory c == TitlecaseLetter.
isUpperCase :: Char -> Bool
-- | Returns True for upper-case characters.
--
-- It uses the character property Uppercase.
--
-- Note: it does not match title-cased letters. Those are matched
-- using: generalCategory c == TitlecaseLetter.
-- | Deprecated: Use isUpperCase instead. Note that the behavior of this
-- function does not match base:Data.Char.isUpper. See
-- Unicode.Char.Case.Compat for behavior compatible with
-- base:Data.Char.
isUpper :: Char -> Bool
-- | Returns the full folded case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Case_Folding.
caseFoldMapping :: Unfold Char Char
-- | Convert a character to full folded case if defined, else to
-- itself.
--
-- This function is mainly useful for performing caseless (also known as
-- case insensitive) string comparisons.
--
-- A string x is a caseless match for a string y if and
-- only if:
--
--
-- foldMap toCaseFoldString x == foldMap toCaseFoldString y
--
--
-- The result string may have more than one character, and may differ
-- from applying toLowerString to the input string. For instance,
-- “ﬓ” (U+FB13 Armenian small ligature men now) is case folded
-- to the sequence “մ” (U+0574 Armenian small letter men)
-- followed by “ն” (U+0576 Armenian small letter now), while “µ”
-- (U+00B5 micro sign) is case folded to “μ” (U+03BC
-- Greek small letter mu) instead of itself.
--
-- It uses the character property Case_Folding.
--
--
-- toCaseFoldString c == foldMap toCaseFoldString (toCaseFoldString c)
--
toCaseFoldString :: Char -> String
-- | Returns the full lower case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Lowercase_Mapping.
lowerCaseMapping :: Unfold Char Char
-- | Convert a character to full lower case if defined, else to
-- itself.
--
-- The result string may have more than one character. For instance, “İ”
-- (U+0130 Latin capital letter I with dot above) maps to the
-- sequence: “i” (U+0069 Latin small letter I) followed by “ ̇”
-- (U+0307 combining dot above).
--
-- It uses the character property Lowercase_Mapping.
--
-- See: toLower for simple lower case conversion.
--
--
-- toLowerString c == foldMap toLowerString (toLowerString c)
--
toLowerString :: Char -> String
-- | Returns the full title case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Titlecase_Mapping.
titleCaseMapping :: Unfold Char Char
-- | Convert a character to full title case if defined, else to
-- itself.
--
-- The result string may have more than one character. For instance, “fl”
-- (U+FB02 Latin small ligature FL) is converted to the
-- sequence: “F” (U+0046 Latin capital letter F) followed by “l”
-- (U+006C Latin small letter L).
--
-- It uses the character property Titlecase_Mapping.
--
-- See: toTitle for simple title case conversion.
toTitleString :: Char -> String
-- | Returns the full upper case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Uppercase_Mapping.
upperCaseMapping :: Unfold Char Char
-- | Convert a character to full upper case if defined, else to
-- itself.
--
-- The result string may have more than one character. For instance, the
-- German “ß” (U+00DF Eszett) maps to the two-letter sequence
-- “SS”.
--
-- It uses the character property Uppercase_Mapping.
--
-- See: toUpper for simple upper case conversion.
--
--
-- toUpperString c == foldMap toUpperString (toUpperString c)
--
toUpperString :: Char -> String
-- | An Unfold a b is a generator of a stream of values of type
-- b from a seed of type a.
data Unfold a b
Unfold :: (s -> Step s b) -> (a -> Step s b) -> Unfold a b
-- | A stream is a succession of Steps.
data Step s a
-- | Produces a single value and the next state of the stream.
Yield :: !a -> !s -> Step s a
-- | Indicates there are no more values in the stream.
Stop :: Step s a
-- | This module provides APIs to access the Unicode character database
-- (UCD) corresponding to Unicode Standard version 15.0.0.
--
-- This module re-exports several sub-modules under it. The sub-module
-- structure under Char is largely based on the "Property Index
-- by Scope of Use" in Unicode® Standard Annex #44.
--
-- The Unicode.Char.* modules in turn depend on
-- Unicode.Internal.Char.* modules which are programmatically
-- generated from the Unicode standard's Unicode character database
-- files. The module structure under Unicode.Internal.Char is
-- largely based on the UCD text file names from which the properties are
-- generated.
--
-- For the original UCD files used in this code please refer to the
-- UCD section on the Unicode standard page. See
-- https://www.unicode.org/reports/tr44/ to understand the
-- contents and the format of the unicode database files.
module Unicode.Char
-- | Version of the Unicode standard used by this package: 15.0.0.
unicodeVersion :: Version
-- | Same as isLetter.
isAlpha :: Char -> Bool
-- | Returns True for upper-case characters.
--
-- It uses the character property Uppercase.
--
-- Note: it does not match title-cased letters. Those are matched
-- using: generalCategory c == TitlecaseLetter.
-- | Deprecated: Use isUpperCase instead. Note that the behavior of this
-- function does not match base:Data.Char.isUpper. See
-- Unicode.Char.Case.Compat for behavior compatible with
-- base:Data.Char.
isUpper :: Char -> Bool
-- | Returns True for upper-case characters.
--
-- It uses the character property Uppercase.
--
-- Note: it does not match title-cased letters. Those are matched
-- using: generalCategory c == TitlecaseLetter.
isUpperCase :: Char -> Bool
-- | Returns True for lower-case characters.
--
-- It uses the character property Lowercase.
-- | Deprecated: Use isLowerCase instead. Note that the behavior of this
-- function does not match base:Data.Char.isLower. See
-- Unicode.Char.Case.Compat for behavior compatible with
-- base:Data.Char.
isLower :: Char -> Bool
-- | Returns True for lower-case characters.
--
-- It uses the character property Lowercase.
isLowerCase :: Char -> Bool
-- | Returns the full folded case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Case_Folding.
caseFoldMapping :: Unfold Char Char
-- | Convert a character to full folded case if defined, else to
-- itself.
--
-- This function is mainly useful for performing caseless (also known as
-- case insensitive) string comparisons.
--
-- A string x is a caseless match for a string y if and
-- only if:
--
--
-- foldMap toCaseFoldString x == foldMap toCaseFoldString y
--
--
-- The result string may have more than one character, and may differ
-- from applying toLowerString to the input string. For instance,
-- “ﬓ” (U+FB13 Armenian small ligature men now) is case folded
-- to the sequence “մ” (U+0574 Armenian small letter men)
-- followed by “ն” (U+0576 Armenian small letter now), while “µ”
-- (U+00B5 micro sign) is case folded to “μ” (U+03BC
-- Greek small letter mu) instead of itself.
--
-- It uses the character property Case_Folding.
--
--
-- toCaseFoldString c == foldMap toCaseFoldString (toCaseFoldString c)
--
toCaseFoldString :: Char -> String
-- | Returns the full lower case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Lowercase_Mapping.
lowerCaseMapping :: Unfold Char Char
-- | Convert a character to full lower case if defined, else to
-- itself.
--
-- The result string may have more than one character. For instance, “İ”
-- (U+0130 Latin capital letter I with dot above) maps to the
-- sequence: “i” (U+0069 Latin small letter I) followed by “ ̇”
-- (U+0307 combining dot above).
--
-- It uses the character property Lowercase_Mapping.
--
-- See: toLower for simple lower case conversion.
--
--
-- toLowerString c == foldMap toLowerString (toLowerString c)
--
toLowerString :: Char -> String
-- | Returns the full title case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Titlecase_Mapping.
titleCaseMapping :: Unfold Char Char
-- | Convert a character to full title case if defined, else to
-- itself.
--
-- The result string may have more than one character. For instance, “fl”
-- (U+FB02 Latin small ligature FL) is converted to the
-- sequence: “F” (U+0046 Latin capital letter F) followed by “l”
-- (U+006C Latin small letter L).
--
-- It uses the character property Titlecase_Mapping.
--
-- See: toTitle for simple title case conversion.
toTitleString :: Char -> String
-- | Returns the full upper case mapping of a character if the
-- character is changed, else nothing.
--
-- It uses the character property Uppercase_Mapping.
upperCaseMapping :: Unfold Char Char
-- | Convert a character to full upper case if defined, else to
-- itself.
--
-- The result string may have more than one character. For instance, the
-- German “ß” (U+00DF Eszett) maps to the two-letter sequence
-- “SS”.
--
-- It uses the character property Uppercase_Mapping.
--
-- See: toUpper for simple upper case conversion.
--
--
-- toUpperString c == foldMap toUpperString (toUpperString c)
--
toUpperString :: Char -> String
-- | Convert a letter to the corresponding upper-case letter, if any. Any
-- other character is returned unchanged.
--
-- It uses the character property Simple_Uppercase_Mapping.
--
-- See: upperCaseMapping and toUpperString for full
-- upper case conversion.
--
--
-- toUpper c == Data.Char.toUpper c
--
toUpper :: Char -> Char
-- | Convert a letter to the corresponding lower-case letter, if any. Any
-- other character is returned unchanged.
--
-- It uses the character property Simple_Lowercase_Mapping.
--
-- See: lowerCaseMapping and toLowerString for full
-- lower case conversion.
--
--
-- toLower c == Data.Char.toLower c
--
toLower :: Char -> Char
-- | Convert a letter to the corresponding title-case or upper-case letter,
-- if any. (Title case differs from upper case only for a small number of
-- ligature letters.) Any other character is returned unchanged.
--
-- It uses the character property Simple_Titlecase_Mapping.
--
-- See: titleCaseMapping and toTitleString for full
-- title case conversion.
--
--
-- toTitle c == Data.Char.toTitle c
--
toTitle :: Char -> Char
-- | The fromEnum method restricted to the type Char.
ord :: Char -> Int
-- | The toEnum method restricted to the type Char.
chr :: Int -> Char