-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Access Unicode Character Database (UCD) -- -- unicode-data provides Haskell APIs to efficiently access the -- Unicode character database (UCD). Performance is the primary -- goal in the design of this package. -- -- The Haskell data structures are generated programmatically from the -- UCD files. The latest Unicode version supported by this library is -- 15.0.0. @package unicode-data @version 0.5.0 -- | Fast, static bitmap lookup utilities module Unicode.Internal.Bits -- | lookupBit addr byteIndex bitIndex looks up the bit stored in -- the byte at index byteIndex at the bit index -- bitIndex using a bitmap starting at the address -- addr. The caller must make sure that the byte at address -- (addr + byteIndex) is legally accessible memory. lookupBit :: Addr# -> Int -> Int -> Bool -- | lookupWord8AsInt addr index looks up for the -- index-th 8-bits word in the bitmap starting at -- addr, then convert it to an Int. -- -- The caller must make sure that: -- --

ceiling (addr + (n * 8)) is legally accessible -- Word8#.

lookupWord8AsInt :: Addr# -> Int -> Int lookupWord8AsInt# :: Addr# -> Int# -> Int# lookupWord16AsInt :: Addr# -> Int -> Int lookupWord16AsInt# :: Addr# -> Int# -> Int# -- | lookupWord32# addr index looks up for the index-th -- 32-bits word in the bitmap starting at addr, then convert it -- to a Word#. -- -- The caller must make sure that: -- --

ceiling (addr + (n * 32)) is legally accessible -- Word32#.

lookupWord32# :: Addr# -> Int# -> Word# unpackCString# :: Addr# -> [Char] -- | Unicode blocks related functions. module Unicode.Char.General.Blocks -- | Unicode block. -- -- There is a total of 327 blocks. data Block -- | U+0000..U+007F: Basic Latin. BasicLatin :: Block -- | U+0080..U+00FF: Latin-1 Supplement. Latin1Supplement :: Block -- | U+0100..U+017F: Latin Extended-A. LatinExtendedA :: Block -- | U+0180..U+024F: Latin Extended-B. LatinExtendedB :: Block -- | U+0250..U+02AF: IPA Extensions. IPAExtensions :: Block -- | U+02B0..U+02FF: Spacing Modifier Letters. SpacingModifierLetters :: Block -- | U+0300..U+036F: Combining Diacritical Marks. CombiningDiacriticalMarks :: Block -- | U+0370..U+03FF: Greek and Coptic. GreekAndCoptic :: Block -- | U+0400..U+04FF: Cyrillic. Cyrillic :: Block -- | U+0500..U+052F: Cyrillic Supplement. CyrillicSupplement :: Block -- | U+0530..U+058F: Armenian. Armenian :: Block -- | U+0590..U+05FF: Hebrew. Hebrew :: Block -- | U+0600..U+06FF: Arabic. Arabic :: Block -- | U+0700..U+074F: Syriac. Syriac :: Block -- | U+0750..U+077F: Arabic Supplement. ArabicSupplement :: Block -- | U+0780..U+07BF: Thaana. Thaana :: Block -- | U+07C0..U+07FF: NKo. NKo :: Block -- | U+0800..U+083F: Samaritan. Samaritan :: Block -- | U+0840..U+085F: Mandaic. Mandaic :: Block -- | U+0860..U+086F: Syriac Supplement. SyriacSupplement :: Block -- | U+0870..U+089F: Arabic Extended-B. ArabicExtendedB :: Block -- | U+08A0..U+08FF: Arabic Extended-A. ArabicExtendedA :: Block -- | U+0900..U+097F: Devanagari. Devanagari :: Block -- | U+0980..U+09FF: Bengali. Bengali :: Block -- | U+0A00..U+0A7F: Gurmukhi. Gurmukhi :: Block -- | U+0A80..U+0AFF: Gujarati. Gujarati :: Block -- | U+0B00..U+0B7F: Oriya. Oriya :: Block -- | U+0B80..U+0BFF: Tamil. Tamil :: Block -- | U+0C00..U+0C7F: Telugu. Telugu :: Block -- | U+0C80..U+0CFF: Kannada. Kannada :: Block -- | U+0D00..U+0D7F: Malayalam. Malayalam :: Block -- | U+0D80..U+0DFF: Sinhala. Sinhala :: Block -- | U+0E00..U+0E7F: Thai. Thai :: Block -- | U+0E80..U+0EFF: Lao. Lao :: Block -- | U+0F00..U+0FFF: Tibetan. Tibetan :: Block -- | U+1000..U+109F: Myanmar. Myanmar :: Block -- | U+10A0..U+10FF: Georgian. Georgian :: Block -- | U+1100..U+11FF: Hangul Jamo. HangulJamo :: Block -- | U+1200..U+137F: Ethiopic. Ethiopic :: Block -- | U+1380..U+139F: Ethiopic Supplement. EthiopicSupplement :: Block -- | U+13A0..U+13FF: Cherokee. Cherokee :: Block -- | U+1400..U+167F: Unified Canadian Aboriginal Syllabics. UnifiedCanadianAboriginalSyllabics :: Block -- | U+1680..U+169F: Ogham. Ogham :: Block -- | U+16A0..U+16FF: Runic. Runic :: Block -- | U+1700..U+171F: Tagalog. Tagalog :: Block -- | U+1720..U+173F: Hanunoo. Hanunoo :: Block -- | U+1740..U+175F: Buhid. Buhid :: Block -- | U+1760..U+177F: Tagbanwa. Tagbanwa :: Block -- | U+1780..U+17FF: Khmer. Khmer :: Block -- | U+1800..U+18AF: Mongolian. Mongolian :: Block -- | U+18B0..U+18FF: Unified Canadian Aboriginal Syllabics -- Extended. UnifiedCanadianAboriginalSyllabicsExtended :: Block -- | U+1900..U+194F: Limbu. Limbu :: Block -- | U+1950..U+197F: Tai Le. TaiLe :: Block -- | U+1980..U+19DF: New Tai Lue. NewTaiLue :: Block -- | U+19E0..U+19FF: Khmer Symbols. KhmerSymbols :: Block -- | U+1A00..U+1A1F: Buginese. Buginese :: Block -- | U+1A20..U+1AAF: Tai Tham. TaiTham :: Block -- | U+1AB0..U+1AFF: Combining Diacritical Marks Extended. CombiningDiacriticalMarksExtended :: Block -- | U+1B00..U+1B7F: Balinese. Balinese :: Block -- | U+1B80..U+1BBF: Sundanese. Sundanese :: Block -- | U+1BC0..U+1BFF: Batak. Batak :: Block -- | U+1C00..U+1C4F: Lepcha. Lepcha :: Block -- | U+1C50..U+1C7F: Ol Chiki. OlChiki :: Block -- | U+1C80..U+1C8F: Cyrillic Extended-C. CyrillicExtendedC :: Block -- | U+1C90..U+1CBF: Georgian Extended. GeorgianExtended :: Block -- | U+1CC0..U+1CCF: Sundanese Supplement. SundaneseSupplement :: Block -- | U+1CD0..U+1CFF: Vedic Extensions. VedicExtensions :: Block -- | U+1D00..U+1D7F: Phonetic Extensions. PhoneticExtensions :: Block -- | U+1D80..U+1DBF: Phonetic Extensions Supplement. PhoneticExtensionsSupplement :: Block -- | U+1DC0..U+1DFF: Combining Diacritical Marks Supplement. CombiningDiacriticalMarksSupplement :: Block -- | U+1E00..U+1EFF: Latin Extended Additional. LatinExtendedAdditional :: Block -- | U+1F00..U+1FFF: Greek Extended. GreekExtended :: Block -- | U+2000..U+206F: General Punctuation. GeneralPunctuation :: Block -- | U+2070..U+209F: Superscripts and Subscripts. SuperscriptsAndSubscripts :: Block -- | U+20A0..U+20CF: Currency Symbols. CurrencySymbols :: Block -- | U+20D0..U+20FF: Combining Diacritical Marks for Symbols. CombiningDiacriticalMarksForSymbols :: Block -- | U+2100..U+214F: Letterlike Symbols. LetterlikeSymbols :: Block -- | U+2150..U+218F: Number Forms. NumberForms :: Block -- | U+2190..U+21FF: Arrows. Arrows :: Block -- | U+2200..U+22FF: Mathematical Operators. MathematicalOperators :: Block -- | U+2300..U+23FF: Miscellaneous Technical. MiscellaneousTechnical :: Block -- | U+2400..U+243F: Control Pictures. ControlPictures :: Block -- | U+2440..U+245F: Optical Character Recognition. OpticalCharacterRecognition :: Block -- | U+2460..U+24FF: Enclosed Alphanumerics. EnclosedAlphanumerics :: Block -- | U+2500..U+257F: Box Drawing. BoxDrawing :: Block -- | U+2580..U+259F: Block Elements. BlockElements :: Block -- | U+25A0..U+25FF: Geometric Shapes. GeometricShapes :: Block -- | U+2600..U+26FF: Miscellaneous Symbols. MiscellaneousSymbols :: Block -- | U+2700..U+27BF: Dingbats. Dingbats :: Block -- | U+27C0..U+27EF: Miscellaneous Mathematical Symbols-A. MiscellaneousMathematicalSymbolsA :: Block -- | U+27F0..U+27FF: Supplemental Arrows-A. SupplementalArrowsA :: Block -- | U+2800..U+28FF: Braille Patterns. BraillePatterns :: Block -- | U+2900..U+297F: Supplemental Arrows-B. SupplementalArrowsB :: Block -- | U+2980..U+29FF: Miscellaneous Mathematical Symbols-B. MiscellaneousMathematicalSymbolsB :: Block -- | U+2A00..U+2AFF: Supplemental Mathematical Operators. SupplementalMathematicalOperators :: Block -- | U+2B00..U+2BFF: Miscellaneous Symbols and Arrows. MiscellaneousSymbolsAndArrows :: Block -- | U+2C00..U+2C5F: Glagolitic. Glagolitic :: Block -- | U+2C60..U+2C7F: Latin Extended-C. LatinExtendedC :: Block -- | U+2C80..U+2CFF: Coptic. Coptic :: Block -- | U+2D00..U+2D2F: Georgian Supplement. GeorgianSupplement :: Block -- | U+2D30..U+2D7F: Tifinagh. Tifinagh :: Block -- | U+2D80..U+2DDF: Ethiopic Extended. EthiopicExtended :: Block -- | U+2DE0..U+2DFF: Cyrillic Extended-A. CyrillicExtendedA :: Block -- | U+2E00..U+2E7F: Supplemental Punctuation. SupplementalPunctuation :: Block -- | U+2E80..U+2EFF: CJK Radicals Supplement. CJKRadicalsSupplement :: Block -- | U+2F00..U+2FDF: Kangxi Radicals. KangxiRadicals :: Block -- | U+2FF0..U+2FFF: Ideographic Description Characters. IdeographicDescriptionCharacters :: Block -- | U+3000..U+303F: CJK Symbols and Punctuation. CJKSymbolsAndPunctuation :: Block -- | U+3040..U+309F: Hiragana. Hiragana :: Block -- | U+30A0..U+30FF: Katakana. Katakana :: Block -- | U+3100..U+312F: Bopomofo. Bopomofo :: Block -- | U+3130..U+318F: Hangul Compatibility Jamo. HangulCompatibilityJamo :: Block -- | U+3190..U+319F: Kanbun. Kanbun :: Block -- | U+31A0..U+31BF: Bopomofo Extended. BopomofoExtended :: Block -- | U+31C0..U+31EF: CJK Strokes. CJKStrokes :: Block -- | U+31F0..U+31FF: Katakana Phonetic Extensions. KatakanaPhoneticExtensions :: Block -- | U+3200..U+32FF: Enclosed CJK Letters and Months. EnclosedCJKLettersAndMonths :: Block -- | U+3300..U+33FF: CJK Compatibility. CJKCompatibility :: Block -- | U+3400..U+4DBF: CJK Unified Ideographs Extension A. CJKUnifiedIdeographsExtensionA :: Block -- | U+4DC0..U+4DFF: Yijing Hexagram Symbols. YijingHexagramSymbols :: Block -- | U+4E00..U+9FFF: CJK Unified Ideographs. CJKUnifiedIdeographs :: Block -- | U+A000..U+A48F: Yi Syllables. YiSyllables :: Block -- | U+A490..U+A4CF: Yi Radicals. YiRadicals :: Block -- | U+A4D0..U+A4FF: Lisu. Lisu :: Block -- | U+A500..U+A63F: Vai. Vai :: Block -- | U+A640..U+A69F: Cyrillic Extended-B. CyrillicExtendedB :: Block -- | U+A6A0..U+A6FF: Bamum. Bamum :: Block -- | U+A700..U+A71F: Modifier Tone Letters. ModifierToneLetters :: Block -- | U+A720..U+A7FF: Latin Extended-D. LatinExtendedD :: Block -- | U+A800..U+A82F: Syloti Nagri. SylotiNagri :: Block -- | U+A830..U+A83F: Common Indic Number Forms. CommonIndicNumberForms :: Block -- | U+A840..U+A87F: Phags-pa. PhagsPa :: Block -- | U+A880..U+A8DF: Saurashtra. Saurashtra :: Block -- | U+A8E0..U+A8FF: Devanagari Extended. DevanagariExtended :: Block -- | U+A900..U+A92F: Kayah Li. KayahLi :: Block -- | U+A930..U+A95F: Rejang. Rejang :: Block -- | U+A960..U+A97F: Hangul Jamo Extended-A. HangulJamoExtendedA :: Block -- | U+A980..U+A9DF: Javanese. Javanese :: Block -- | U+A9E0..U+A9FF: Myanmar Extended-B. MyanmarExtendedB :: Block -- | U+AA00..U+AA5F: Cham. Cham :: Block -- | U+AA60..U+AA7F: Myanmar Extended-A. MyanmarExtendedA :: Block -- | U+AA80..U+AADF: Tai Viet. TaiViet :: Block -- | U+AAE0..U+AAFF: Meetei Mayek Extensions. MeeteiMayekExtensions :: Block -- | U+AB00..U+AB2F: Ethiopic Extended-A. EthiopicExtendedA :: Block -- | U+AB30..U+AB6F: Latin Extended-E. LatinExtendedE :: Block -- | U+AB70..U+ABBF: Cherokee Supplement. CherokeeSupplement :: Block -- | U+ABC0..U+ABFF: Meetei Mayek. MeeteiMayek :: Block -- | U+AC00..U+D7AF: Hangul Syllables. HangulSyllables :: Block -- | U+D7B0..U+D7FF: Hangul Jamo Extended-B. HangulJamoExtendedB :: Block -- | U+D800..U+DB7F: High Surrogates. HighSurrogates :: Block -- | U+DB80..U+DBFF: High Private Use Surrogates. HighPrivateUseSurrogates :: Block -- | U+DC00..U+DFFF: Low Surrogates. LowSurrogates :: Block -- | U+E000..U+F8FF: Private Use Area. PrivateUseArea :: Block -- | U+F900..U+FAFF: CJK Compatibility Ideographs. CJKCompatibilityIdeographs :: Block -- | U+FB00..U+FB4F: Alphabetic Presentation Forms. AlphabeticPresentationForms :: Block -- | U+FB50..U+FDFF: Arabic Presentation Forms-A. ArabicPresentationFormsA :: Block -- | U+FE00..U+FE0F: Variation Selectors. VariationSelectors :: Block -- | U+FE10..U+FE1F: Vertical Forms. VerticalForms :: Block -- | U+FE20..U+FE2F: Combining Half Marks. CombiningHalfMarks :: Block -- | U+FE30..U+FE4F: CJK Compatibility Forms. CJKCompatibilityForms :: Block -- | U+FE50..U+FE6F: Small Form Variants. SmallFormVariants :: Block -- | U+FE70..U+FEFF: Arabic Presentation Forms-B. ArabicPresentationFormsB :: Block -- | U+FF00..U+FFEF: Halfwidth and Fullwidth Forms. HalfwidthAndFullwidthForms :: Block -- | U+FFF0..U+FFFF: Specials. Specials :: Block -- | U+10000..U+1007F: Linear B Syllabary. LinearBSyllabary :: Block -- | U+10080..U+100FF: Linear B Ideograms. LinearBIdeograms :: Block -- | U+10100..U+1013F: Aegean Numbers. AegeanNumbers :: Block -- | U+10140..U+1018F: Ancient Greek Numbers. AncientGreekNumbers :: Block -- | U+10190..U+101CF: Ancient Symbols. AncientSymbols :: Block -- | U+101D0..U+101FF: Phaistos Disc. PhaistosDisc :: Block -- | U+10280..U+1029F: Lycian. Lycian :: Block -- | U+102A0..U+102DF: Carian. Carian :: Block -- | U+102E0..U+102FF: Coptic Epact Numbers. CopticEpactNumbers :: Block -- | U+10300..U+1032F: Old Italic. OldItalic :: Block -- | U+10330..U+1034F: Gothic. Gothic :: Block -- | U+10350..U+1037F: Old Permic. OldPermic :: Block -- | U+10380..U+1039F: Ugaritic. Ugaritic :: Block -- | U+103A0..U+103DF: Old Persian. OldPersian :: Block -- | U+10400..U+1044F: Deseret. Deseret :: Block -- | U+10450..U+1047F: Shavian. Shavian :: Block -- | U+10480..U+104AF: Osmanya. Osmanya :: Block -- | U+104B0..U+104FF: Osage. Osage :: Block -- | U+10500..U+1052F: Elbasan. Elbasan :: Block -- | U+10530..U+1056F: Caucasian Albanian. CaucasianAlbanian :: Block -- | U+10570..U+105BF: Vithkuqi. Vithkuqi :: Block -- | U+10600..U+1077F: Linear A. LinearA :: Block -- | U+10780..U+107BF: Latin Extended-F. LatinExtendedF :: Block -- | U+10800..U+1083F: Cypriot Syllabary. CypriotSyllabary :: Block -- | U+10840..U+1085F: Imperial Aramaic. ImperialAramaic :: Block -- | U+10860..U+1087F: Palmyrene. Palmyrene :: Block -- | U+10880..U+108AF: Nabataean. Nabataean :: Block -- | U+108E0..U+108FF: Hatran. Hatran :: Block -- | U+10900..U+1091F: Phoenician. Phoenician :: Block -- | U+10920..U+1093F: Lydian. Lydian :: Block -- | U+10980..U+1099F: Meroitic Hieroglyphs. MeroiticHieroglyphs :: Block -- | U+109A0..U+109FF: Meroitic Cursive. MeroiticCursive :: Block -- | U+10A00..U+10A5F: Kharoshthi. Kharoshthi :: Block -- | U+10A60..U+10A7F: Old South Arabian. OldSouthArabian :: Block -- | U+10A80..U+10A9F: Old North Arabian. OldNorthArabian :: Block -- | U+10AC0..U+10AFF: Manichaean. Manichaean :: Block -- | U+10B00..U+10B3F: Avestan. Avestan :: Block -- | U+10B40..U+10B5F: Inscriptional Parthian. InscriptionalParthian :: Block -- | U+10B60..U+10B7F: Inscriptional Pahlavi. InscriptionalPahlavi :: Block -- | U+10B80..U+10BAF: Psalter Pahlavi. PsalterPahlavi :: Block -- | U+10C00..U+10C4F: Old Turkic. OldTurkic :: Block -- | U+10C80..U+10CFF: Old Hungarian. OldHungarian :: Block -- | U+10D00..U+10D3F: Hanifi Rohingya. HanifiRohingya :: Block -- | U+10E60..U+10E7F: Rumi Numeral Symbols. RumiNumeralSymbols :: Block -- | U+10E80..U+10EBF: Yezidi. Yezidi :: Block -- | U+10EC0..U+10EFF: Arabic Extended-C. ArabicExtendedC :: Block -- | U+10F00..U+10F2F: Old Sogdian. OldSogdian :: Block -- | U+10F30..U+10F6F: Sogdian. Sogdian :: Block -- | U+10F70..U+10FAF: Old Uyghur. OldUyghur :: Block -- | U+10FB0..U+10FDF: Chorasmian. Chorasmian :: Block -- | U+10FE0..U+10FFF: Elymaic. Elymaic :: Block -- | U+11000..U+1107F: Brahmi. Brahmi :: Block -- | U+11080..U+110CF: Kaithi. Kaithi :: Block -- | U+110D0..U+110FF: Sora Sompeng. SoraSompeng :: Block -- | U+11100..U+1114F: Chakma. Chakma :: Block -- | U+11150..U+1117F: Mahajani. Mahajani :: Block -- | U+11180..U+111DF: Sharada. Sharada :: Block -- | U+111E0..U+111FF: Sinhala Archaic Numbers. SinhalaArchaicNumbers :: Block -- | U+11200..U+1124F: Khojki. Khojki :: Block -- | U+11280..U+112AF: Multani. Multani :: Block -- | U+112B0..U+112FF: Khudawadi. Khudawadi :: Block -- | U+11300..U+1137F: Grantha. Grantha :: Block -- | U+11400..U+1147F: Newa. Newa :: Block -- | U+11480..U+114DF: Tirhuta. Tirhuta :: Block -- | U+11580..U+115FF: Siddham. Siddham :: Block -- | U+11600..U+1165F: Modi. Modi :: Block -- | U+11660..U+1167F: Mongolian Supplement. MongolianSupplement :: Block -- | U+11680..U+116CF: Takri. Takri :: Block -- | U+11700..U+1174F: Ahom. Ahom :: Block -- | U+11800..U+1184F: Dogra. Dogra :: Block -- | U+118A0..U+118FF: Warang Citi. WarangCiti :: Block -- | U+11900..U+1195F: Dives Akuru. DivesAkuru :: Block -- | U+119A0..U+119FF: Nandinagari. Nandinagari :: Block -- | U+11A00..U+11A4F: Zanabazar Square. ZanabazarSquare :: Block -- | U+11A50..U+11AAF: Soyombo. Soyombo :: Block -- | U+11AB0..U+11ABF: Unified Canadian Aboriginal Syllabics -- Extended-A. UnifiedCanadianAboriginalSyllabicsExtendedA :: Block -- | U+11AC0..U+11AFF: Pau Cin Hau. PauCinHau :: Block -- | U+11B00..U+11B5F: Devanagari Extended-A. DevanagariExtendedA :: Block -- | U+11C00..U+11C6F: Bhaiksuki. Bhaiksuki :: Block -- | U+11C70..U+11CBF: Marchen. Marchen :: Block -- | U+11D00..U+11D5F: Masaram Gondi. MasaramGondi :: Block -- | U+11D60..U+11DAF: Gunjala Gondi. GunjalaGondi :: Block -- | U+11EE0..U+11EFF: Makasar. Makasar :: Block -- | U+11F00..U+11F5F: Kawi. Kawi :: Block -- | U+11FB0..U+11FBF: Lisu Supplement. LisuSupplement :: Block -- | U+11FC0..U+11FFF: Tamil Supplement. TamilSupplement :: Block -- | U+12000..U+123FF: Cuneiform. Cuneiform :: Block -- | U+12400..U+1247F: Cuneiform Numbers and Punctuation. CuneiformNumbersAndPunctuation :: Block -- | U+12480..U+1254F: Early Dynastic Cuneiform. EarlyDynasticCuneiform :: Block -- | U+12F90..U+12FFF: Cypro-Minoan. CyproMinoan :: Block -- | U+13000..U+1342F: Egyptian Hieroglyphs. EgyptianHieroglyphs :: Block -- | U+13430..U+1345F: Egyptian Hieroglyph Format Controls. EgyptianHieroglyphFormatControls :: Block -- | U+14400..U+1467F: Anatolian Hieroglyphs. AnatolianHieroglyphs :: Block -- | U+16800..U+16A3F: Bamum Supplement. BamumSupplement :: Block -- | U+16A40..U+16A6F: Mro. Mro :: Block -- | U+16A70..U+16ACF: Tangsa. Tangsa :: Block -- | U+16AD0..U+16AFF: Bassa Vah. BassaVah :: Block -- | U+16B00..U+16B8F: Pahawh Hmong. PahawhHmong :: Block -- | U+16E40..U+16E9F: Medefaidrin. Medefaidrin :: Block -- | U+16F00..U+16F9F: Miao. Miao :: Block -- | U+16FE0..U+16FFF: Ideographic Symbols and Punctuation. IdeographicSymbolsAndPunctuation :: Block -- | U+17000..U+187FF: Tangut. Tangut :: Block -- | U+18800..U+18AFF: Tangut Components. TangutComponents :: Block -- | U+18B00..U+18CFF: Khitan Small Script. KhitanSmallScript :: Block -- | U+18D00..U+18D7F: Tangut Supplement. TangutSupplement :: Block -- | U+1AFF0..U+1AFFF: Kana Extended-B. KanaExtendedB :: Block -- | U+1B000..U+1B0FF: Kana Supplement. KanaSupplement :: Block -- | U+1B100..U+1B12F: Kana Extended-A. KanaExtendedA :: Block -- | U+1B130..U+1B16F: Small Kana Extension. SmallKanaExtension :: Block -- | U+1B170..U+1B2FF: Nushu. Nushu :: Block -- | U+1BC00..U+1BC9F: Duployan. Duployan :: Block -- | U+1BCA0..U+1BCAF: Shorthand Format Controls. ShorthandFormatControls :: Block -- | U+1CF00..U+1CFCF: Znamenny Musical Notation. ZnamennyMusicalNotation :: Block -- | U+1D000..U+1D0FF: Byzantine Musical Symbols. ByzantineMusicalSymbols :: Block -- | U+1D100..U+1D1FF: Musical Symbols. MusicalSymbols :: Block -- | U+1D200..U+1D24F: Ancient Greek Musical Notation. AncientGreekMusicalNotation :: Block -- | U+1D2C0..U+1D2DF: Kaktovik Numerals. KaktovikNumerals :: Block -- | U+1D2E0..U+1D2FF: Mayan Numerals. MayanNumerals :: Block -- | U+1D300..U+1D35F: Tai Xuan Jing Symbols. TaiXuanJingSymbols :: Block -- | U+1D360..U+1D37F: Counting Rod Numerals. CountingRodNumerals :: Block -- | U+1D400..U+1D7FF: Mathematical Alphanumeric Symbols. MathematicalAlphanumericSymbols :: Block -- | U+1D800..U+1DAAF: Sutton SignWriting. SuttonSignWriting :: Block -- | U+1DF00..U+1DFFF: Latin Extended-G. LatinExtendedG :: Block -- | U+1E000..U+1E02F: Glagolitic Supplement. GlagoliticSupplement :: Block -- | U+1E030..U+1E08F: Cyrillic Extended-D. CyrillicExtendedD :: Block -- | U+1E100..U+1E14F: Nyiakeng Puachue Hmong. NyiakengPuachueHmong :: Block -- | U+1E290..U+1E2BF: Toto. Toto :: Block -- | U+1E2C0..U+1E2FF: Wancho. Wancho :: Block -- | U+1E4D0..U+1E4FF: Nag Mundari. NagMundari :: Block -- | U+1E7E0..U+1E7FF: Ethiopic Extended-B. EthiopicExtendedB :: Block -- | U+1E800..U+1E8DF: Mende Kikakui. MendeKikakui :: Block -- | U+1E900..U+1E95F: Adlam. Adlam :: Block -- | U+1EC70..U+1ECBF: Indic Siyaq Numbers. IndicSiyaqNumbers :: Block -- | U+1ED00..U+1ED4F: Ottoman Siyaq Numbers. OttomanSiyaqNumbers :: Block -- | U+1EE00..U+1EEFF: Arabic Mathematical Alphabetic Symbols. ArabicMathematicalAlphabeticSymbols :: Block -- | U+1F000..U+1F02F: Mahjong Tiles. MahjongTiles :: Block -- | U+1F030..U+1F09F: Domino Tiles. DominoTiles :: Block -- | U+1F0A0..U+1F0FF: Playing Cards. PlayingCards :: Block -- | U+1F100..U+1F1FF: Enclosed Alphanumeric Supplement. EnclosedAlphanumericSupplement :: Block -- | U+1F200..U+1F2FF: Enclosed Ideographic Supplement. EnclosedIdeographicSupplement :: Block -- | U+1F300..U+1F5FF: Miscellaneous Symbols and Pictographs. MiscellaneousSymbolsAndPictographs :: Block -- | U+1F600..U+1F64F: Emoticons. Emoticons :: Block -- | U+1F650..U+1F67F: Ornamental Dingbats. OrnamentalDingbats :: Block -- | U+1F680..U+1F6FF: Transport and Map Symbols. TransportAndMapSymbols :: Block -- | U+1F700..U+1F77F: Alchemical Symbols. AlchemicalSymbols :: Block -- | U+1F780..U+1F7FF: Geometric Shapes Extended. GeometricShapesExtended :: Block -- | U+1F800..U+1F8FF: Supplemental Arrows-C. SupplementalArrowsC :: Block -- | U+1F900..U+1F9FF: Supplemental Symbols and Pictographs. SupplementalSymbolsAndPictographs :: Block -- | U+1FA00..U+1FA6F: Chess Symbols. ChessSymbols :: Block -- | U+1FA70..U+1FAFF: Symbols and Pictographs Extended-A. SymbolsAndPictographsExtendedA :: Block -- | U+1FB00..U+1FBFF: Symbols for Legacy Computing. SymbolsForLegacyComputing :: Block -- | U+20000..U+2A6DF: CJK Unified Ideographs Extension B. CJKUnifiedIdeographsExtensionB :: Block -- | U+2A700..U+2B73F: CJK Unified Ideographs Extension C. CJKUnifiedIdeographsExtensionC :: Block -- | U+2B740..U+2B81F: CJK Unified Ideographs Extension D. CJKUnifiedIdeographsExtensionD :: Block -- | U+2B820..U+2CEAF: CJK Unified Ideographs Extension E. CJKUnifiedIdeographsExtensionE :: Block -- | U+2CEB0..U+2EBEF: CJK Unified Ideographs Extension F. CJKUnifiedIdeographsExtensionF :: Block -- | U+2F800..U+2FA1F: CJK Compatibility Ideographs Supplement. CJKCompatibilityIdeographsSupplement :: Block -- | U+30000..U+3134F: CJK Unified Ideographs Extension G. CJKUnifiedIdeographsExtensionG :: Block -- | U+31350..U+323AF: CJK Unified Ideographs Extension H. CJKUnifiedIdeographsExtensionH :: Block -- | U+E0000..U+E007F: Tags. Tags :: Block -- | U+E0100..U+E01EF: Variation Selectors Supplement. VariationSelectorsSupplement :: Block -- | U+F0000..U+FFFFF: Supplementary Private Use Area-A. SupplementaryPrivateUseAreaA :: Block -- | U+100000..U+10FFFF: Supplementary Private Use Area-B. SupplementaryPrivateUseAreaB :: Block -- | Character block, if defined. block :: Char -> Maybe Block -- | Block definition: range and name. data BlockDefinition BlockDefinition :: !(Int, Int) -> !String -> BlockDefinition -- | Range [blockRange] :: BlockDefinition -> !(Int, Int) -- | Name [blockName] :: BlockDefinition -> !String -- | Block definition blockDefinition :: Block -> BlockDefinition instance GHC.Show.Show Unicode.Char.General.Blocks.BlockDefinition instance GHC.Classes.Ord Unicode.Char.General.Blocks.BlockDefinition instance GHC.Classes.Eq Unicode.Char.General.Blocks.BlockDefinition -- | Unicode Identifier and Pattern Syntax property functions based on -- Unicode Standard Annex #31 module Unicode.Char.Identifiers -- | Returns True if a character is an identifier continue -- character. isIDContinue :: Char -> Bool -- | Returns True if a character is an identifier start character. isIDStart :: Char -> Bool -- | Returns True if a character is an identifier continue -- character, using the NFKC modifications detailed in UAX #31, -- 5.1. isXIDContinue :: Char -> Bool -- | Returns True if a character is an identifier start character, -- using the NFKC modifications detailed in UAX #31, 5.1. isXIDStart :: Char -> Bool -- | Returns True if a character is a pattern syntax character. isPatternSyntax :: Char -> Bool -- | Returns True if a character is a pattern whitespace character. isPatternWhitespace :: Char -> Bool -- | Compatibility module for numeric character property related functions. module Unicode.Char.Numeric.Compat -- | Selects Unicode numeric characters, including digits from various -- scripts, Roman numerals, et cetera. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

DecimalNumber
LetterNumber
OtherNumber

-- -- Note: a character may have a numeric value (see -- numericValue) but return False, because isNumber -- only tests GeneralCategory: some CJK characters are -- OtherLetter and do have a numeric value. Use isNumeric -- to cover those cases as well. -- --

--   isNumber c == Data.Char.isNumber c
--

-- -- @since 0.3.1 moved to Compat module. isNumber :: Char -> Bool -- | Numeric character property related functions. module Unicode.Char.Numeric -- | Selects Unicode character with a numeric value. -- -- Note: a character may have a numeric value but return -- False with the predicate isNumber, because -- isNumber only tests GeneralCategory: some CJK characters -- are OtherLetter and do have a numeric value. -- --

--   isNumeric c == isJust (numericValue c)
--

isNumeric :: Char -> Bool -- | Selects Unicode numeric characters, including digits from various -- scripts, Roman numerals, et cetera. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

DecimalNumber
LetterNumber
OtherNumber

--   isNumber c == Data.Char.isNumber c
--

-- | Deprecated: Use Unicode.Char.Numeric.Compat.isNumber instead. This -- function will be a synonym for isNumeric in a future release. See -- Unicode.Char.Numeric.Compat for behavior compatible with -- base:Data.Char. isNumber :: Char -> Bool -- | Numeric value of a character, if relevant. -- -- Note: a character may have a numeric value but return -- False with the predicate isNumber, because -- isNumber only tests GeneralCategory: some CJK characters -- are OtherLetter and do have a numeric value. numericValue :: Char -> Maybe Rational -- | Integer value of a character, if relevant. -- -- This is a special case of numericValue. -- -- Warning: There is a risk of integer overflow depending -- of the chosen concrete return type. As of Unicode 15.0 the results -- range from 0 to 1e12. -- --

--   >>> integerValue '\x5146' :: Maybe Int64 -- OK
--   Just 1000000000000
--   
--   >>> integerValue '\x5146' :: Maybe Int32 -- Will overflow!
--   Just (-727379968)
--

-- -- Therefore it is advised to use: integerValue -- @Int64. -- -- Note: A character may have a numeric value but return -- False with the predicate isNumber, because -- isNumber only tests GeneralCategory: some CJK characters -- are OtherLetter and do have a numeric value. integerValue :: Integral a => Char -> Maybe a -- | Selects ASCII digits, i.e. '0'..'9'. isDigit :: Char -> Bool -- | Selects ASCII octal digits, i.e. '0'..'7'. isOctDigit :: Char -> Bool -- | Selects ASCII hexadecimal digits, i.e. '0'..'9', -- 'a'..'f', 'A'..'F'. isHexDigit :: Char -> Bool -- | Convert a single digit Char to the corresponding Int. -- This function fails unless its argument satisfies isHexDigit, -- but recognises both upper- and lower-case hexadecimal digits (that is, -- '0'..'9', 'a'..'f', -- 'A'..'F'). -- --

Examples

-- -- Characters '0' through '9' are converted properly to -- 0..9: -- --

--   >>> map digitToInt ['0'..'9']
--   [0,1,2,3,4,5,6,7,8,9]
--

-- -- Both upper- and lower-case 'A' through 'F' are -- converted as well, to 10..15. -- --

--   >>> map digitToInt ['a'..'f']
--   [10,11,12,13,14,15]
--   
--   >>> map digitToInt ['A'..'F']
--   [10,11,12,13,14,15]
--

-- -- Anything else throws an exception: -- --

--   >>> digitToInt 'G'
--   *** Exception: Char.digitToInt: not a digit 'G'
--   
--   >>> digitToInt '♥'
--   *** Exception: Char.digitToInt: not a digit '\9829'
--

digitToInt :: Char -> Int -- | Convert an Int in the range 0..15 to the -- corresponding single digit Char. This function fails on other -- inputs, and generates lower-case hexadecimal digits. intToDigit :: Int -> Char -- | Compatibility module for general character property related functions. -- -- The functions of this module are drop-in replacement for those in -- Data.Char. They are similar but not identical to some functions -- in Unicode.Char.General, therefore they are placed in a -- separate module in order to avoid ambiguity. module Unicode.Char.General.Compat -- | Same as isLetter. isAlpha :: Char -> Bool -- | Selects alphabetic Unicode characters (lower-case, upper-case and -- title-case letters, plus letters of caseless scripts and modifiers -- letters). -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter

-- -- Note: this function is not equivalent to -- isAlphabetic. See the description of isAlphabetic for -- further details. -- --

--   isLetter c == Data.Char.isLetter c
--

isLetter :: Char -> Bool -- | Selects Unicode space characters (general category Space), -- and the control characters \t, \n, \r, -- \f, \v. -- -- Note: isSpace is not equivalent to -- isWhiteSpace. isWhiteSpace selects the same characters -- from isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

-- --

--   isSpace c == Data.Char.isSpace c
--

isSpace :: Char -> Bool -- | Compatibility module for case and case mapping related functions. -- -- The functions of this module are drop-in replacement for those in -- Data.Char. They are similar but not identical to some functions -- in Unicode.Char.Case, therefore they are placed in a separate -- module in order to avoid ambiguity. module Unicode.Char.Case.Compat -- | Selects upper-case or title-case alphabetic Unicode characters -- (letters). Title case is used by a small number of letter ligatures -- like the single-character form of Lj. -- -- It matches characters with general category UppercaseLetter and -- TitlecaseLetter. -- -- See: isUpperCase for the full upper case predicate. -- --

--   isUpper c == Data.Char.isUpper c
--

isUpper :: Char -> Bool -- | Selects lower-case alphabetic Unicode characters (letters). -- -- It matches characters with general category LowercaseLetter. -- -- See: isLowerCase for the full lower case predicate. -- --

--   isLower c == Data.Char.isLower c
--

isLower :: Char -> Bool -- | Convert a letter to the corresponding upper-case letter, if any. Any -- other character is returned unchanged. -- -- It uses the character property Simple_Uppercase_Mapping. -- -- See: upperCaseMapping and toUpperString for full -- upper case conversion. -- --

--   toUpper c == Data.Char.toUpper c
--

toUpper :: Char -> Char -- | Convert a letter to the corresponding lower-case letter, if any. Any -- other character is returned unchanged. -- -- It uses the character property Simple_Lowercase_Mapping. -- -- See: lowerCaseMapping and toLowerString for full -- lower case conversion. -- --

--   toLower c == Data.Char.toLower c
--

toLower :: Char -> Char -- | Convert a letter to the corresponding title-case or upper-case letter, -- if any. (Title case differs from upper case only for a small number of -- ligature letters.) Any other character is returned unchanged. -- -- It uses the character property Simple_Titlecase_Mapping. -- -- See: titleCaseMapping and toTitleString for full -- title case conversion. -- --

--   toTitle c == Data.Char.toTitle c
--

toTitle :: Char -> Char -- | Fast division by known constants. -- -- Division by a constant can be replaced by a double-word -- multiplication. Roughly speaking, instead of dividing by x, multiply -- by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The -- peculiar details can be found in Hacker's Delight, Ch. 10. -- -- Even GHC 8.10 does not provide a primitive for a signed double-word -- multiplication, but since our applications does not involve negative -- integers, we convert Int to Word and use -- timesWord#. -- -- Textbook unsigned division by 21 or 28 becomes involved, when an -- argument is allowed to take the full range of Word up to 2^64. -- Luckily, in our case the argument was casted from Int, so we -- can guarantee that it is below 2^63. module Unicode.Internal.Division -- | Input must be non-negative. -- -- Instead of division by 21, we compute floor(floor((2^68+17)21 * n) -- 2^68) = floor((2^68+17)21 * n2^68) = floor(n21 + -- (n2^63 * 1732)21) = floor(n/21), because n2^63 * -- 1732 < 1. quotRem21 :: Int -> (Int, Int) -- | Input must be non-negative. -- -- Instead of division by 28, we compute floor(floor((2^65+3)7 * n) -- 2^67) = floor((2^65+3)7 * n2^67) = floor(n28 + -- (n2^63 * 34)28) = floor(n/28), because n2^63 * 34 -- < 1. quotRem28 :: Int -> (Int, Int) -- | General character property related functions. module Unicode.Char.General -- | Unicode General Categories. -- -- These classes are defined in the Unicode Character Database, -- part of the Unicode standard -- -- Note: the classes must be in the same order they are listed in -- the Unicode Standard, because some functions (e.g. -- generalCategory) rely on the Enum instance. data GeneralCategory -- | Lu: Letter, Uppercase UppercaseLetter :: GeneralCategory -- | Ll: Letter, Lowercase LowercaseLetter :: GeneralCategory -- | Lt: Letter, Titlecase TitlecaseLetter :: GeneralCategory -- | Lm: Letter, Modifier ModifierLetter :: GeneralCategory -- | Lo: Letter, Other OtherLetter :: GeneralCategory -- | Mn: Mark, Non-Spacing NonSpacingMark :: GeneralCategory -- | Mc: Mark, Spacing Combining SpacingCombiningMark :: GeneralCategory -- | Me: Mark, Enclosing EnclosingMark :: GeneralCategory -- | Nd: Number, Decimal DecimalNumber :: GeneralCategory -- | Nl: Number, Letter LetterNumber :: GeneralCategory -- | No: Number, Other OtherNumber :: GeneralCategory -- | Pc: Punctuation, Connector ConnectorPunctuation :: GeneralCategory -- | Pd: Punctuation, Dash DashPunctuation :: GeneralCategory -- | Ps: Punctuation, Open OpenPunctuation :: GeneralCategory -- | Pe: Punctuation, Close ClosePunctuation :: GeneralCategory -- | Pi: Punctuation, Initial quote InitialQuote :: GeneralCategory -- | Pf: Punctuation, Final quote FinalQuote :: GeneralCategory -- | Po: Punctuation, Other OtherPunctuation :: GeneralCategory -- | Sm: Symbol, Math MathSymbol :: GeneralCategory -- | Sc: Symbol, Currency CurrencySymbol :: GeneralCategory -- | Sk: Symbol, Modifier ModifierSymbol :: GeneralCategory -- | So: Symbol, Other OtherSymbol :: GeneralCategory -- | Zs: Separator, Space Space :: GeneralCategory -- | Zl: Separator, Line LineSeparator :: GeneralCategory -- | Zp: Separator, Paragraph ParagraphSeparator :: GeneralCategory -- | Cc: Other, Control Control :: GeneralCategory -- | Cf: Other, Format Format :: GeneralCategory -- | Cs: Other, Surrogate Surrogate :: GeneralCategory -- | Co: Other, Private Use PrivateUse :: GeneralCategory -- | Cn: Other, Not Assigned NotAssigned :: GeneralCategory -- | Abbreviation of GeneralCategory used in the Unicode standard. generalCategoryAbbr :: GeneralCategory -> String -- | The Unicode general category of the character. -- -- This property is defined in the column 2 of the UnicodeData -- table. -- -- This relies on the Enum instance of GeneralCategory, -- which must remain in the same order as the categories are presented in -- the Unicode standard. -- --

--   show (generalCategory c) == show (Data.Char.generalCategory c)
--

generalCategory :: Char -> GeneralCategory -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- -- Note: this function is not equivalent to isAlpha -- / isLetter: -- --

isAlpha matches the following general -- categories:
- UppercaseLetter -- (Lu)
- LowercaseLetter -- (Ll)
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter (Lo)
whereas isAlphabetic matches:
- Uppercase -- property
- Lowercase -- property
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter -- (Lo)
- LetterNumber -- (Nl)
- Other_Alphabetic -- property

isAlphabetic :: Char -> Bool -- | Selects alphabetic or numeric Unicode characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter
DecimalNumber
LetterNumber
OtherNumber

-- --

--   isAlphaNum c == Data.Char.isAlphaNum c
--

isAlphaNum :: Char -> Bool -- | Selects control characters, which are the non-printing characters of -- the Latin-1 subset of Unicode. -- -- This function returns True if its argument has the -- GeneralCategory Control. -- --

--   isControl c == Data.Char.isControl c
--

isControl :: Char -> Bool -- | Selects Unicode mark characters, for example accents and the like, -- which combine with preceding characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

NonSpacingMark
SpacingCombiningMark
EnclosingMark

-- --

--   isMark c == Data.Char.isMark c
--

isMark :: Char -> Bool -- | Selects printable Unicode characters (letters, numbers, marks, -- punctuation, symbols and spaces). -- -- This function returns False if its argument has one of the -- following GeneralCategorys, or True otherwise: -- --

LineSeparator
ParagraphSeparator
Control
Format
Surrogate
PrivateUse
NotAssigned

-- --

--   isPrint c == Data.Char.isPrint c
--

isPrint :: Char -> Bool -- | Selects Unicode punctuation characters, including various kinds of -- connectors, brackets and quotes. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

ConnectorPunctuation
DashPunctuation
OpenPunctuation
ClosePunctuation
InitialQuote
FinalQuote
OtherPunctuation

-- --

--   isPunctuation c == Data.Char.isPunctuation c
--

isPunctuation :: Char -> Bool -- | Selects Unicode space and separator characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

Space
LineSeparator
ParagraphSeparator

-- --

--   isSeparator c == Data.Char.isSeparator c
--

isSeparator :: Char -> Bool -- | Selects Unicode symbol characters, including mathematical and currency -- symbols. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: * -- MathSymbol * CurrencySymbol * ModifierSymbol * -- OtherSymbol -- --

--   isSymbol c == Data.Char.isSymbol c
--

isSymbol :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- -- See: Unicode White_Space. -- -- Note: isWhiteSpace is not equivalent to -- isSpace. isWhiteSpace selects the same characters from -- isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

isWhiteSpace :: Char -> Bool -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- | Deprecated: Use isAlphabetic instead. Note that the behavior of -- this function does not match base:Data.Char.isLetter. See -- Unicode.Char.General.Compat for behavior compatible with -- base:Data.Char. isLetter :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- | Deprecated: Use isWhiteSpace instead. Note that the behavior of -- this function does not match base:Data.Char.isSpace. See -- Unicode.Char.General.Compat for behavior compatible with -- base:Data.Char. isSpace :: Char -> Bool -- | Selects the first 128 characters of the Unicode character set, -- corresponding to the ASCII character set. isAscii :: Char -> Bool -- | Selects the first 256 characters of the Unicode character set, -- corresponding to the ISO 8859-1 (Latin-1) character set. isLatin1 :: Char -> Bool -- | Selects ASCII upper-case letters, i.e. characters satisfying both -- isAscii and isUpper. isAsciiUpper :: Char -> Bool -- | Selects ASCII lower-case letters, i.e. characters satisfying both -- isAscii and isLower. isAsciiLower :: Char -> Bool -- | Determine whether a character is a jamo L, V or T character. isJamo :: Char -> Bool -- | Total count of all jamo characters. -- --

--   jamoNCount = jamoVCount * jamoTCount
--

jamoNCount :: Int -- | First leading consonant jamo. jamoLFirst :: Int -- | Total count of leading consonant jamo. jamoLCount :: Int -- | Given a Unicode character, if it is a leading jamo, return its index -- in the list of leading jamo consonants, otherwise return -- Nothing. jamoLIndex :: Char -> Maybe Int -- | Last leading consonant jamo. jamoLLast :: Int -- | First vowel jamo. jamoVFirst :: Int -- | Total count of vowel jamo. jamoVCount :: Int -- | Given a Unicode character, if it is a vowel jamo, return its index in -- the list of vowel jamo, otherwise return Nothing. jamoVIndex :: Char -> Maybe Int -- | Last vowel jamo. jamoVLast :: Int -- | The first trailing consonant jamo. -- -- Note that jamoTFirst does not represent a valid T, it -- represents a missing T i.e. LV without a T. See comments under -- jamoTIndex . jamoTFirst :: Int -- | Total count of trailing consonant jamo. jamoTCount :: Int -- | Given a Unicode character, if it is a trailing jamo consonant, return -- its index in the list of trailing jamo consonants, otherwise return -- Nothing. -- -- Note that index 0 is not a valid index for a trailing consonant. Index -- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable -- Decomposition" in the Conformance chapter of the Unicode standard for -- more details. jamoTIndex :: Char -> Maybe Int -- | Last trailing consonant jamo. jamoTLast :: Int -- | Codepoint of the first pre-composed Hangul character. hangulFirst :: Int -- | Codepoint of the last Hangul character. hangulLast :: Int -- | Determine if the given character is a precomposed Hangul syllable. isHangul :: Char -> Bool -- | Determine if the given character is a Hangul LV syllable. -- -- Note: this function requires a precomposed Hangul syllable but -- does not check it. Use isHangul to check the input -- character before passing it to isHangulLV. isHangulLV :: Char -> Bool instance GHC.Ix.Ix Unicode.Char.General.GeneralCategory instance GHC.Enum.Bounded Unicode.Char.General.GeneralCategory instance GHC.Enum.Enum Unicode.Char.General.GeneralCategory instance GHC.Classes.Ord Unicode.Char.General.GeneralCategory instance GHC.Classes.Eq Unicode.Char.General.GeneralCategory instance GHC.Show.Show Unicode.Char.General.GeneralCategory -- | Low level Unicode database functions to facilitate Unicode -- normalization. -- -- For more information on Unicode normalization please refer to the -- following sections of the Unicode standard: -- --

2 General Structure
- 2.3 Compatibility -- Characters
- 2.12 Equivalent Sequences
3 Conformance
- 3.6 Combination
- 3.7 -- Decomposition
- 3.11 Normalization Forms
- 3.12 Conjoining -- Jamo Behavior
4 Character Properties
- 4.3 Combining Classes
Unicode® Standard Annex #15 - Unicode Normalization -- Forms
Unicode® Standard Annex #44 - Unicode Character -- Database

module Unicode.Char.Normalization -- | Returns True if a character is a combining character. isCombining :: Char -> Bool -- | Returns the combining class of a character. combiningClass :: Char -> Int -- | Return True if a starter character may combine with some -- preceding starter character. isCombiningStarter :: Char -> Bool -- | Compose a starter character (combining class 0) with a combining -- character (non-zero combining class). Returns the composed character -- if the starter combines with the combining character, returns -- Nothing otherwise. compose :: Char -> Char -> Maybe Char -- | Compose a starter character with another starter character. Returns -- the composed character if the two starters combine, returns -- Nothing otherwise. composeStarters :: Char -> Char -> Maybe Char -- | Whether we are decomposing in canonical or compatibility mode. data DecomposeMode Canonical :: DecomposeMode Kompat :: DecomposeMode -- | Given a non-Hangul character determine if the character is -- decomposable. Note that in case compatibility decompositions a -- character may decompose into a single compatibility character. isDecomposable :: DecomposeMode -> Char -> Bool -- | Decompose a non-Hangul character into its canonical or compatibility -- decompositions. Note that the resulting characters may further -- decompose. decompose :: DecomposeMode -> Char -> [Char] -- | Decompose a Hangul syllable into its corresponding Jamo characters. decomposeHangul :: Char -> (Char, Char, Char) module Unicode.Internal.Unfold -- | An Unfold a b is a generator of a stream of values of type -- b from a seed of type a. data Unfold a b Unfold :: (s -> Step s b) -> (a -> Step s b) -> Unfold a b -- | A stream is a succession of Steps. data Step s a -- | Produces a single value and the next state of the stream. Yield :: !a -> !s -> Step s a -- | Indicates there are no more values in the stream. Stop :: Step s a -- | Convert an 'Unfold a a' to a list [a], if the resulting list is empty -- the seed is used as a default output. toList :: Unfold a a -> a -> [a] instance GHC.Base.Functor (Unicode.Internal.Unfold.Step s) -- | Case and case mapping related functions. -- -- This module provides full predicates and mappings that are -- not compatible with those in Data.Char, which rely on -- simple properties. See Unicode.Char.Case.Compat for a drop-in -- replacement of the functions in Data.Char. module Unicode.Char.Case -- | Returns True for lower-case characters. -- -- It uses the character property Lowercase. isLowerCase :: Char -> Bool -- | Returns True for lower-case characters. -- -- It uses the character property Lowercase. -- | Deprecated: Use isLowerCase instead. Note that the behavior of this -- function does not match base:Data.Char.isLower. See -- Unicode.Char.Case.Compat for behavior compatible with -- base:Data.Char. isLower :: Char -> Bool -- | Returns True for upper-case characters. -- -- It uses the character property Uppercase. -- -- Note: it does not match title-cased letters. Those are matched -- using: generalCategory c == TitlecaseLetter. isUpperCase :: Char -> Bool -- | Returns True for upper-case characters. -- -- It uses the character property Uppercase. -- -- Note: it does not match title-cased letters. Those are matched -- using: generalCategory c == TitlecaseLetter. -- | Deprecated: Use isUpperCase instead. Note that the behavior of this -- function does not match base:Data.Char.isUpper. See -- Unicode.Char.Case.Compat for behavior compatible with -- base:Data.Char. isUpper :: Char -> Bool -- | Returns the full folded case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Case_Folding. caseFoldMapping :: Unfold Char Char -- | Convert a character to full folded case if defined, else to -- itself. -- -- This function is mainly useful for performing caseless (also known as -- case insensitive) string comparisons. -- -- A string x is a caseless match for a string y if and -- only if: -- --

--   foldMap toCaseFoldString x == foldMap toCaseFoldString y
--

-- -- The result string may have more than one character, and may differ -- from applying toLowerString to the input string. For instance, -- “ﬓ” (U+FB13 Armenian small ligature men now) is case folded -- to the sequence “մ” (U+0574 Armenian small letter men) -- followed by “ն” (U+0576 Armenian small letter now), while “µ” -- (U+00B5 micro sign) is case folded to “μ” (U+03BC -- Greek small letter mu) instead of itself. -- -- It uses the character property Case_Folding. -- --

--   toCaseFoldString c == foldMap toCaseFoldString (toCaseFoldString c)
--

toCaseFoldString :: Char -> String -- | Returns the full lower case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Lowercase_Mapping. lowerCaseMapping :: Unfold Char Char -- | Convert a character to full lower case if defined, else to -- itself. -- -- The result string may have more than one character. For instance, “İ” -- (U+0130 Latin capital letter I with dot above) maps to the -- sequence: “i” (U+0069 Latin small letter I) followed by “ ̇” -- (U+0307 combining dot above). -- -- It uses the character property Lowercase_Mapping. -- -- See: toLower for simple lower case conversion. -- --

--   toLowerString c == foldMap toLowerString (toLowerString c)
--

toLowerString :: Char -> String -- | Returns the full title case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Titlecase_Mapping. titleCaseMapping :: Unfold Char Char -- | Convert a character to full title case if defined, else to -- itself. -- -- The result string may have more than one character. For instance, “ﬂ” -- (U+FB02 Latin small ligature FL) is converted to the -- sequence: “F” (U+0046 Latin capital letter F) followed by “l” -- (U+006C Latin small letter L). -- -- It uses the character property Titlecase_Mapping. -- -- See: toTitle for simple title case conversion. toTitleString :: Char -> String -- | Returns the full upper case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Uppercase_Mapping. upperCaseMapping :: Unfold Char Char -- | Convert a character to full upper case if defined, else to -- itself. -- -- The result string may have more than one character. For instance, the -- German “ß” (U+00DF Eszett) maps to the two-letter sequence -- “SS”. -- -- It uses the character property Uppercase_Mapping. -- -- See: toUpper for simple upper case conversion. -- --

--   toUpperString c == foldMap toUpperString (toUpperString c)
--

toUpperString :: Char -> String -- | An Unfold a b is a generator of a stream of values of type -- b from a seed of type a. data Unfold a b Unfold :: (s -> Step s b) -> (a -> Step s b) -> Unfold a b -- | A stream is a succession of Steps. data Step s a -- | Produces a single value and the next state of the stream. Yield :: !a -> !s -> Step s a -- | Indicates there are no more values in the stream. Stop :: Step s a -- | This module provides APIs to access the Unicode character database -- (UCD) corresponding to Unicode Standard version 15.0.0. -- -- This module re-exports several sub-modules under it. The sub-module -- structure under Char is largely based on the "Property Index -- by Scope of Use" in Unicode® Standard Annex #44. -- -- The Unicode.Char.* modules in turn depend on -- Unicode.Internal.Char.* modules which are programmatically -- generated from the Unicode standard's Unicode character database -- files. The module structure under Unicode.Internal.Char is -- largely based on the UCD text file names from which the properties are -- generated. -- -- For the original UCD files used in this code please refer to the -- UCD section on the Unicode standard page. See -- https://www.unicode.org/reports/tr44/ to understand the -- contents and the format of the unicode database files. module Unicode.Char -- | Version of the Unicode standard used by this package: 15.0.0. unicodeVersion :: Version -- | Same as isLetter. isAlpha :: Char -> Bool -- | Returns True for upper-case characters. -- -- It uses the character property Uppercase. -- -- Note: it does not match title-cased letters. Those are matched -- using: generalCategory c == TitlecaseLetter. -- | Deprecated: Use isUpperCase instead. Note that the behavior of this -- function does not match base:Data.Char.isUpper. See -- Unicode.Char.Case.Compat for behavior compatible with -- base:Data.Char. isUpper :: Char -> Bool -- | Returns True for upper-case characters. -- -- It uses the character property Uppercase. -- -- Note: it does not match title-cased letters. Those are matched -- using: generalCategory c == TitlecaseLetter. isUpperCase :: Char -> Bool -- | Returns True for lower-case characters. -- -- It uses the character property Lowercase. -- | Deprecated: Use isLowerCase instead. Note that the behavior of this -- function does not match base:Data.Char.isLower. See -- Unicode.Char.Case.Compat for behavior compatible with -- base:Data.Char. isLower :: Char -> Bool -- | Returns True for lower-case characters. -- -- It uses the character property Lowercase. isLowerCase :: Char -> Bool -- | Returns the full folded case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Case_Folding. caseFoldMapping :: Unfold Char Char -- | Convert a character to full folded case if defined, else to -- itself. -- -- This function is mainly useful for performing caseless (also known as -- case insensitive) string comparisons. -- -- A string x is a caseless match for a string y if and -- only if: -- --

--   foldMap toCaseFoldString x == foldMap toCaseFoldString y
--

--   toCaseFoldString c == foldMap toCaseFoldString (toCaseFoldString c)
--

--   toLowerString c == foldMap toLowerString (toLowerString c)
--

--   toUpperString c == foldMap toUpperString (toUpperString c)
--

toUpperString :: Char -> String -- | Convert a letter to the corresponding upper-case letter, if any. Any -- other character is returned unchanged. -- -- It uses the character property Simple_Uppercase_Mapping. -- -- See: upperCaseMapping and toUpperString for full -- upper case conversion. -- --

--   toUpper c == Data.Char.toUpper c
--

--   toLower c == Data.Char.toLower c
--

--   toTitle c == Data.Char.toTitle c
--

toTitle :: Char -> Char -- | The fromEnum method restricted to the type Char. ord :: Char -> Int -- | The toEnum method restricted to the type Char. chr :: Int -> Char