-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Access Unicode Character Database (UCD) -- -- unicode-data provides Haskell APIs to efficiently access the -- Unicode character database (UCD). Performance is the primary -- goal in the design of this package. -- -- The Haskell data structures are generated programmatically from the -- UCD files. The latest Unicode version supported by this library is -- 15.1.0. @package unicode-data @version 0.6.0 -- | Fast, static bitmap lookup utilities module Unicode.Internal.Bits -- | lookupBit addr byteIndex bitIndex looks up the bit stored in -- the byte at index byteIndex at the bit index -- bitIndex using a bitmap starting at the address -- addr. The caller must make sure that the byte at address -- (addr + byteIndex) is legally accessible memory. lookupBit :: Addr# -> Int -> Int -> Bool -- | lookupWord8AsInt addr index looks up for the -- index-th 8-bits word in the bitmap starting at -- addr, then convert it to an Int. -- -- The caller must make sure that: -- --

ceiling (addr + (n * 8)) is legally accessible -- Word8#.

lookupWord8AsInt :: Addr# -> Int -> Int lookupWord8AsInt# :: Addr# -> Int# -> Int# lookupWord16AsInt :: Addr# -> Int -> Int lookupWord16AsInt# :: Addr# -> Int# -> Int# -- | lookupWord32# addr index looks up for the index-th -- 32-bits word in the bitmap starting at addr, then convert it -- to a Word#. -- -- The caller must make sure that: -- --

ceiling (addr + (n * 32)) is legally accessible -- Word32#.

lookupWord32# :: Addr# -> Int# -> Word# unpackCString# :: Addr# -> [Char] -- | Unicode blocks related functions. module Unicode.Char.General.Blocks -- | Unicode block. -- -- There is a total of 328 blocks. data Block -- | U+0000..U+007F: Basic Latin. BasicLatin :: Block -- | U+0080..U+00FF: Latin-1 Supplement. Latin1Supplement :: Block -- | U+0100..U+017F: Latin Extended-A. LatinExtendedA :: Block -- | U+0180..U+024F: Latin Extended-B. LatinExtendedB :: Block -- | U+0250..U+02AF: IPA Extensions. IPAExtensions :: Block -- | U+02B0..U+02FF: Spacing Modifier Letters. SpacingModifierLetters :: Block -- | U+0300..U+036F: Combining Diacritical Marks. CombiningDiacriticalMarks :: Block -- | U+0370..U+03FF: Greek and Coptic. GreekAndCoptic :: Block -- | U+0400..U+04FF: Cyrillic. Cyrillic :: Block -- | U+0500..U+052F: Cyrillic Supplement. CyrillicSupplement :: Block -- | U+0530..U+058F: Armenian. Armenian :: Block -- | U+0590..U+05FF: Hebrew. Hebrew :: Block -- | U+0600..U+06FF: Arabic. Arabic :: Block -- | U+0700..U+074F: Syriac. Syriac :: Block -- | U+0750..U+077F: Arabic Supplement. ArabicSupplement :: Block -- | U+0780..U+07BF: Thaana. Thaana :: Block -- | U+07C0..U+07FF: NKo. NKo :: Block -- | U+0800..U+083F: Samaritan. Samaritan :: Block -- | U+0840..U+085F: Mandaic. Mandaic :: Block -- | U+0860..U+086F: Syriac Supplement. SyriacSupplement :: Block -- | U+0870..U+089F: Arabic Extended-B. ArabicExtendedB :: Block -- | U+08A0..U+08FF: Arabic Extended-A. ArabicExtendedA :: Block -- | U+0900..U+097F: Devanagari. Devanagari :: Block -- | U+0980..U+09FF: Bengali. Bengali :: Block -- | U+0A00..U+0A7F: Gurmukhi. Gurmukhi :: Block -- | U+0A80..U+0AFF: Gujarati. Gujarati :: Block -- | U+0B00..U+0B7F: Oriya. Oriya :: Block -- | U+0B80..U+0BFF: Tamil. Tamil :: Block -- | U+0C00..U+0C7F: Telugu. Telugu :: Block -- | U+0C80..U+0CFF: Kannada. Kannada :: Block -- | U+0D00..U+0D7F: Malayalam. Malayalam :: Block -- | U+0D80..U+0DFF: Sinhala. Sinhala :: Block -- | U+0E00..U+0E7F: Thai. Thai :: Block -- | U+0E80..U+0EFF: Lao. Lao :: Block -- | U+0F00..U+0FFF: Tibetan. Tibetan :: Block -- | U+1000..U+109F: Myanmar. Myanmar :: Block -- | U+10A0..U+10FF: Georgian. Georgian :: Block -- | U+1100..U+11FF: Hangul Jamo. HangulJamo :: Block -- | U+1200..U+137F: Ethiopic. Ethiopic :: Block -- | U+1380..U+139F: Ethiopic Supplement. EthiopicSupplement :: Block -- | U+13A0..U+13FF: Cherokee. Cherokee :: Block -- | U+1400..U+167F: Unified Canadian Aboriginal Syllabics. UnifiedCanadianAboriginalSyllabics :: Block -- | U+1680..U+169F: Ogham. Ogham :: Block -- | U+16A0..U+16FF: Runic. Runic :: Block -- | U+1700..U+171F: Tagalog. Tagalog :: Block -- | U+1720..U+173F: Hanunoo. Hanunoo :: Block -- | U+1740..U+175F: Buhid. Buhid :: Block -- | U+1760..U+177F: Tagbanwa. Tagbanwa :: Block -- | U+1780..U+17FF: Khmer. Khmer :: Block -- | U+1800..U+18AF: Mongolian. Mongolian :: Block -- | U+18B0..U+18FF: Unified Canadian Aboriginal Syllabics -- Extended. UnifiedCanadianAboriginalSyllabicsExtended :: Block -- | U+1900..U+194F: Limbu. Limbu :: Block -- | U+1950..U+197F: Tai Le. TaiLe :: Block -- | U+1980..U+19DF: New Tai Lue. NewTaiLue :: Block -- | U+19E0..U+19FF: Khmer Symbols. KhmerSymbols :: Block -- | U+1A00..U+1A1F: Buginese. Buginese :: Block -- | U+1A20..U+1AAF: Tai Tham. TaiTham :: Block -- | U+1AB0..U+1AFF: Combining Diacritical Marks Extended. CombiningDiacriticalMarksExtended :: Block -- | U+1B00..U+1B7F: Balinese. Balinese :: Block -- | U+1B80..U+1BBF: Sundanese. Sundanese :: Block -- | U+1BC0..U+1BFF: Batak. Batak :: Block -- | U+1C00..U+1C4F: Lepcha. Lepcha :: Block -- | U+1C50..U+1C7F: Ol Chiki. OlChiki :: Block -- | U+1C80..U+1C8F: Cyrillic Extended-C. CyrillicExtendedC :: Block -- | U+1C90..U+1CBF: Georgian Extended. GeorgianExtended :: Block -- | U+1CC0..U+1CCF: Sundanese Supplement. SundaneseSupplement :: Block -- | U+1CD0..U+1CFF: Vedic Extensions. VedicExtensions :: Block -- | U+1D00..U+1D7F: Phonetic Extensions. PhoneticExtensions :: Block -- | U+1D80..U+1DBF: Phonetic Extensions Supplement. PhoneticExtensionsSupplement :: Block -- | U+1DC0..U+1DFF: Combining Diacritical Marks Supplement. CombiningDiacriticalMarksSupplement :: Block -- | U+1E00..U+1EFF: Latin Extended Additional. LatinExtendedAdditional :: Block -- | U+1F00..U+1FFF: Greek Extended. GreekExtended :: Block -- | U+2000..U+206F: General Punctuation. GeneralPunctuation :: Block -- | U+2070..U+209F: Superscripts and Subscripts. SuperscriptsAndSubscripts :: Block -- | U+20A0..U+20CF: Currency Symbols. CurrencySymbols :: Block -- | U+20D0..U+20FF: Combining Diacritical Marks for Symbols. CombiningDiacriticalMarksForSymbols :: Block -- | U+2100..U+214F: Letterlike Symbols. LetterlikeSymbols :: Block -- | U+2150..U+218F: Number Forms. NumberForms :: Block -- | U+2190..U+21FF: Arrows. Arrows :: Block -- | U+2200..U+22FF: Mathematical Operators. MathematicalOperators :: Block -- | U+2300..U+23FF: Miscellaneous Technical. MiscellaneousTechnical :: Block -- | U+2400..U+243F: Control Pictures. ControlPictures :: Block -- | U+2440..U+245F: Optical Character Recognition. OpticalCharacterRecognition :: Block -- | U+2460..U+24FF: Enclosed Alphanumerics. EnclosedAlphanumerics :: Block -- | U+2500..U+257F: Box Drawing. BoxDrawing :: Block -- | U+2580..U+259F: Block Elements. BlockElements :: Block -- | U+25A0..U+25FF: Geometric Shapes. GeometricShapes :: Block -- | U+2600..U+26FF: Miscellaneous Symbols. MiscellaneousSymbols :: Block -- | U+2700..U+27BF: Dingbats. Dingbats :: Block -- | U+27C0..U+27EF: Miscellaneous Mathematical Symbols-A. MiscellaneousMathematicalSymbolsA :: Block -- | U+27F0..U+27FF: Supplemental Arrows-A. SupplementalArrowsA :: Block -- | U+2800..U+28FF: Braille Patterns. BraillePatterns :: Block -- | U+2900..U+297F: Supplemental Arrows-B. SupplementalArrowsB :: Block -- | U+2980..U+29FF: Miscellaneous Mathematical Symbols-B. MiscellaneousMathematicalSymbolsB :: Block -- | U+2A00..U+2AFF: Supplemental Mathematical Operators. SupplementalMathematicalOperators :: Block -- | U+2B00..U+2BFF: Miscellaneous Symbols and Arrows. MiscellaneousSymbolsAndArrows :: Block -- | U+2C00..U+2C5F: Glagolitic. Glagolitic :: Block -- | U+2C60..U+2C7F: Latin Extended-C. LatinExtendedC :: Block -- | U+2C80..U+2CFF: Coptic. Coptic :: Block -- | U+2D00..U+2D2F: Georgian Supplement. GeorgianSupplement :: Block -- | U+2D30..U+2D7F: Tifinagh. Tifinagh :: Block -- | U+2D80..U+2DDF: Ethiopic Extended. EthiopicExtended :: Block -- | U+2DE0..U+2DFF: Cyrillic Extended-A. CyrillicExtendedA :: Block -- | U+2E00..U+2E7F: Supplemental Punctuation. SupplementalPunctuation :: Block -- | U+2E80..U+2EFF: CJK Radicals Supplement. CJKRadicalsSupplement :: Block -- | U+2F00..U+2FDF: Kangxi Radicals. KangxiRadicals :: Block -- | U+2FF0..U+2FFF: Ideographic Description Characters. IdeographicDescriptionCharacters :: Block -- | U+3000..U+303F: CJK Symbols and Punctuation. CJKSymbolsAndPunctuation :: Block -- | U+3040..U+309F: Hiragana. Hiragana :: Block -- | U+30A0..U+30FF: Katakana. Katakana :: Block -- | U+3100..U+312F: Bopomofo. Bopomofo :: Block -- | U+3130..U+318F: Hangul Compatibility Jamo. HangulCompatibilityJamo :: Block -- | U+3190..U+319F: Kanbun. Kanbun :: Block -- | U+31A0..U+31BF: Bopomofo Extended. BopomofoExtended :: Block -- | U+31C0..U+31EF: CJK Strokes. CJKStrokes :: Block -- | U+31F0..U+31FF: Katakana Phonetic Extensions. KatakanaPhoneticExtensions :: Block -- | U+3200..U+32FF: Enclosed CJK Letters and Months. EnclosedCJKLettersAndMonths :: Block -- | U+3300..U+33FF: CJK Compatibility. CJKCompatibility :: Block -- | U+3400..U+4DBF: CJK Unified Ideographs Extension A. CJKUnifiedIdeographsExtensionA :: Block -- | U+4DC0..U+4DFF: Yijing Hexagram Symbols. YijingHexagramSymbols :: Block -- | U+4E00..U+9FFF: CJK Unified Ideographs. CJKUnifiedIdeographs :: Block -- | U+A000..U+A48F: Yi Syllables. YiSyllables :: Block -- | U+A490..U+A4CF: Yi Radicals. YiRadicals :: Block -- | U+A4D0..U+A4FF: Lisu. Lisu :: Block -- | U+A500..U+A63F: Vai. Vai :: Block -- | U+A640..U+A69F: Cyrillic Extended-B. CyrillicExtendedB :: Block -- | U+A6A0..U+A6FF: Bamum. Bamum :: Block -- | U+A700..U+A71F: Modifier Tone Letters. ModifierToneLetters :: Block -- | U+A720..U+A7FF: Latin Extended-D. LatinExtendedD :: Block -- | U+A800..U+A82F: Syloti Nagri. SylotiNagri :: Block -- | U+A830..U+A83F: Common Indic Number Forms. CommonIndicNumberForms :: Block -- | U+A840..U+A87F: Phags-pa. PhagsPa :: Block -- | U+A880..U+A8DF: Saurashtra. Saurashtra :: Block -- | U+A8E0..U+A8FF: Devanagari Extended. DevanagariExtended :: Block -- | U+A900..U+A92F: Kayah Li. KayahLi :: Block -- | U+A930..U+A95F: Rejang. Rejang :: Block -- | U+A960..U+A97F: Hangul Jamo Extended-A. HangulJamoExtendedA :: Block -- | U+A980..U+A9DF: Javanese. Javanese :: Block -- | U+A9E0..U+A9FF: Myanmar Extended-B. MyanmarExtendedB :: Block -- | U+AA00..U+AA5F: Cham. Cham :: Block -- | U+AA60..U+AA7F: Myanmar Extended-A. MyanmarExtendedA :: Block -- | U+AA80..U+AADF: Tai Viet. TaiViet :: Block -- | U+AAE0..U+AAFF: Meetei Mayek Extensions. MeeteiMayekExtensions :: Block -- | U+AB00..U+AB2F: Ethiopic Extended-A. EthiopicExtendedA :: Block -- | U+AB30..U+AB6F: Latin Extended-E. LatinExtendedE :: Block -- | U+AB70..U+ABBF: Cherokee Supplement. CherokeeSupplement :: Block -- | U+ABC0..U+ABFF: Meetei Mayek. MeeteiMayek :: Block -- | U+AC00..U+D7AF: Hangul Syllables. HangulSyllables :: Block -- | U+D7B0..U+D7FF: Hangul Jamo Extended-B. HangulJamoExtendedB :: Block -- | U+D800..U+DB7F: High Surrogates. HighSurrogates :: Block -- | U+DB80..U+DBFF: High Private Use Surrogates. HighPrivateUseSurrogates :: Block -- | U+DC00..U+DFFF: Low Surrogates. LowSurrogates :: Block -- | U+E000..U+F8FF: Private Use Area. PrivateUseArea :: Block -- | U+F900..U+FAFF: CJK Compatibility Ideographs. CJKCompatibilityIdeographs :: Block -- | U+FB00..U+FB4F: Alphabetic Presentation Forms. AlphabeticPresentationForms :: Block -- | U+FB50..U+FDFF: Arabic Presentation Forms-A. ArabicPresentationFormsA :: Block -- | U+FE00..U+FE0F: Variation Selectors. VariationSelectors :: Block -- | U+FE10..U+FE1F: Vertical Forms. VerticalForms :: Block -- | U+FE20..U+FE2F: Combining Half Marks. CombiningHalfMarks :: Block -- | U+FE30..U+FE4F: CJK Compatibility Forms. CJKCompatibilityForms :: Block -- | U+FE50..U+FE6F: Small Form Variants. SmallFormVariants :: Block -- | U+FE70..U+FEFF: Arabic Presentation Forms-B. ArabicPresentationFormsB :: Block -- | U+FF00..U+FFEF: Halfwidth and Fullwidth Forms. HalfwidthAndFullwidthForms :: Block -- | U+FFF0..U+FFFF: Specials. Specials :: Block -- | U+10000..U+1007F: Linear B Syllabary. LinearBSyllabary :: Block -- | U+10080..U+100FF: Linear B Ideograms. LinearBIdeograms :: Block -- | U+10100..U+1013F: Aegean Numbers. AegeanNumbers :: Block -- | U+10140..U+1018F: Ancient Greek Numbers. AncientGreekNumbers :: Block -- | U+10190..U+101CF: Ancient Symbols. AncientSymbols :: Block -- | U+101D0..U+101FF: Phaistos Disc. PhaistosDisc :: Block -- | U+10280..U+1029F: Lycian. Lycian :: Block -- | U+102A0..U+102DF: Carian. Carian :: Block -- | U+102E0..U+102FF: Coptic Epact Numbers. CopticEpactNumbers :: Block -- | U+10300..U+1032F: Old Italic. OldItalic :: Block -- | U+10330..U+1034F: Gothic. Gothic :: Block -- | U+10350..U+1037F: Old Permic. OldPermic :: Block -- | U+10380..U+1039F: Ugaritic. Ugaritic :: Block -- | U+103A0..U+103DF: Old Persian. OldPersian :: Block -- | U+10400..U+1044F: Deseret. Deseret :: Block -- | U+10450..U+1047F: Shavian. Shavian :: Block -- | U+10480..U+104AF: Osmanya. Osmanya :: Block -- | U+104B0..U+104FF: Osage. Osage :: Block -- | U+10500..U+1052F: Elbasan. Elbasan :: Block -- | U+10530..U+1056F: Caucasian Albanian. CaucasianAlbanian :: Block -- | U+10570..U+105BF: Vithkuqi. Vithkuqi :: Block -- | U+10600..U+1077F: Linear A. LinearA :: Block -- | U+10780..U+107BF: Latin Extended-F. LatinExtendedF :: Block -- | U+10800..U+1083F: Cypriot Syllabary. CypriotSyllabary :: Block -- | U+10840..U+1085F: Imperial Aramaic. ImperialAramaic :: Block -- | U+10860..U+1087F: Palmyrene. Palmyrene :: Block -- | U+10880..U+108AF: Nabataean. Nabataean :: Block -- | U+108E0..U+108FF: Hatran. Hatran :: Block -- | U+10900..U+1091F: Phoenician. Phoenician :: Block -- | U+10920..U+1093F: Lydian. Lydian :: Block -- | U+10980..U+1099F: Meroitic Hieroglyphs. MeroiticHieroglyphs :: Block -- | U+109A0..U+109FF: Meroitic Cursive. MeroiticCursive :: Block -- | U+10A00..U+10A5F: Kharoshthi. Kharoshthi :: Block -- | U+10A60..U+10A7F: Old South Arabian. OldSouthArabian :: Block -- | U+10A80..U+10A9F: Old North Arabian. OldNorthArabian :: Block -- | U+10AC0..U+10AFF: Manichaean. Manichaean :: Block -- | U+10B00..U+10B3F: Avestan. Avestan :: Block -- | U+10B40..U+10B5F: Inscriptional Parthian. InscriptionalParthian :: Block -- | U+10B60..U+10B7F: Inscriptional Pahlavi. InscriptionalPahlavi :: Block -- | U+10B80..U+10BAF: Psalter Pahlavi. PsalterPahlavi :: Block -- | U+10C00..U+10C4F: Old Turkic. OldTurkic :: Block -- | U+10C80..U+10CFF: Old Hungarian. OldHungarian :: Block -- | U+10D00..U+10D3F: Hanifi Rohingya. HanifiRohingya :: Block -- | U+10E60..U+10E7F: Rumi Numeral Symbols. RumiNumeralSymbols :: Block -- | U+10E80..U+10EBF: Yezidi. Yezidi :: Block -- | U+10EC0..U+10EFF: Arabic Extended-C. ArabicExtendedC :: Block -- | U+10F00..U+10F2F: Old Sogdian. OldSogdian :: Block -- | U+10F30..U+10F6F: Sogdian. Sogdian :: Block -- | U+10F70..U+10FAF: Old Uyghur. OldUyghur :: Block -- | U+10FB0..U+10FDF: Chorasmian. Chorasmian :: Block -- | U+10FE0..U+10FFF: Elymaic. Elymaic :: Block -- | U+11000..U+1107F: Brahmi. Brahmi :: Block -- | U+11080..U+110CF: Kaithi. Kaithi :: Block -- | U+110D0..U+110FF: Sora Sompeng. SoraSompeng :: Block -- | U+11100..U+1114F: Chakma. Chakma :: Block -- | U+11150..U+1117F: Mahajani. Mahajani :: Block -- | U+11180..U+111DF: Sharada. Sharada :: Block -- | U+111E0..U+111FF: Sinhala Archaic Numbers. SinhalaArchaicNumbers :: Block -- | U+11200..U+1124F: Khojki. Khojki :: Block -- | U+11280..U+112AF: Multani. Multani :: Block -- | U+112B0..U+112FF: Khudawadi. Khudawadi :: Block -- | U+11300..U+1137F: Grantha. Grantha :: Block -- | U+11400..U+1147F: Newa. Newa :: Block -- | U+11480..U+114DF: Tirhuta. Tirhuta :: Block -- | U+11580..U+115FF: Siddham. Siddham :: Block -- | U+11600..U+1165F: Modi. Modi :: Block -- | U+11660..U+1167F: Mongolian Supplement. MongolianSupplement :: Block -- | U+11680..U+116CF: Takri. Takri :: Block -- | U+11700..U+1174F: Ahom. Ahom :: Block -- | U+11800..U+1184F: Dogra. Dogra :: Block -- | U+118A0..U+118FF: Warang Citi. WarangCiti :: Block -- | U+11900..U+1195F: Dives Akuru. DivesAkuru :: Block -- | U+119A0..U+119FF: Nandinagari. Nandinagari :: Block -- | U+11A00..U+11A4F: Zanabazar Square. ZanabazarSquare :: Block -- | U+11A50..U+11AAF: Soyombo. Soyombo :: Block -- | U+11AB0..U+11ABF: Unified Canadian Aboriginal Syllabics -- Extended-A. UnifiedCanadianAboriginalSyllabicsExtendedA :: Block -- | U+11AC0..U+11AFF: Pau Cin Hau. PauCinHau :: Block -- | U+11B00..U+11B5F: Devanagari Extended-A. DevanagariExtendedA :: Block -- | U+11C00..U+11C6F: Bhaiksuki. Bhaiksuki :: Block -- | U+11C70..U+11CBF: Marchen. Marchen :: Block -- | U+11D00..U+11D5F: Masaram Gondi. MasaramGondi :: Block -- | U+11D60..U+11DAF: Gunjala Gondi. GunjalaGondi :: Block -- | U+11EE0..U+11EFF: Makasar. Makasar :: Block -- | U+11F00..U+11F5F: Kawi. Kawi :: Block -- | U+11FB0..U+11FBF: Lisu Supplement. LisuSupplement :: Block -- | U+11FC0..U+11FFF: Tamil Supplement. TamilSupplement :: Block -- | U+12000..U+123FF: Cuneiform. Cuneiform :: Block -- | U+12400..U+1247F: Cuneiform Numbers and Punctuation. CuneiformNumbersAndPunctuation :: Block -- | U+12480..U+1254F: Early Dynastic Cuneiform. EarlyDynasticCuneiform :: Block -- | U+12F90..U+12FFF: Cypro-Minoan. CyproMinoan :: Block -- | U+13000..U+1342F: Egyptian Hieroglyphs. EgyptianHieroglyphs :: Block -- | U+13430..U+1345F: Egyptian Hieroglyph Format Controls. EgyptianHieroglyphFormatControls :: Block -- | U+14400..U+1467F: Anatolian Hieroglyphs. AnatolianHieroglyphs :: Block -- | U+16800..U+16A3F: Bamum Supplement. BamumSupplement :: Block -- | U+16A40..U+16A6F: Mro. Mro :: Block -- | U+16A70..U+16ACF: Tangsa. Tangsa :: Block -- | U+16AD0..U+16AFF: Bassa Vah. BassaVah :: Block -- | U+16B00..U+16B8F: Pahawh Hmong. PahawhHmong :: Block -- | U+16E40..U+16E9F: Medefaidrin. Medefaidrin :: Block -- | U+16F00..U+16F9F: Miao. Miao :: Block -- | U+16FE0..U+16FFF: Ideographic Symbols and Punctuation. IdeographicSymbolsAndPunctuation :: Block -- | U+17000..U+187FF: Tangut. Tangut :: Block -- | U+18800..U+18AFF: Tangut Components. TangutComponents :: Block -- | U+18B00..U+18CFF: Khitan Small Script. KhitanSmallScript :: Block -- | U+18D00..U+18D7F: Tangut Supplement. TangutSupplement :: Block -- | U+1AFF0..U+1AFFF: Kana Extended-B. KanaExtendedB :: Block -- | U+1B000..U+1B0FF: Kana Supplement. KanaSupplement :: Block -- | U+1B100..U+1B12F: Kana Extended-A. KanaExtendedA :: Block -- | U+1B130..U+1B16F: Small Kana Extension. SmallKanaExtension :: Block -- | U+1B170..U+1B2FF: Nushu. Nushu :: Block -- | U+1BC00..U+1BC9F: Duployan. Duployan :: Block -- | U+1BCA0..U+1BCAF: Shorthand Format Controls. ShorthandFormatControls :: Block -- | U+1CF00..U+1CFCF: Znamenny Musical Notation. ZnamennyMusicalNotation :: Block -- | U+1D000..U+1D0FF: Byzantine Musical Symbols. ByzantineMusicalSymbols :: Block -- | U+1D100..U+1D1FF: Musical Symbols. MusicalSymbols :: Block -- | U+1D200..U+1D24F: Ancient Greek Musical Notation. AncientGreekMusicalNotation :: Block -- | U+1D2C0..U+1D2DF: Kaktovik Numerals. KaktovikNumerals :: Block -- | U+1D2E0..U+1D2FF: Mayan Numerals. MayanNumerals :: Block -- | U+1D300..U+1D35F: Tai Xuan Jing Symbols. TaiXuanJingSymbols :: Block -- | U+1D360..U+1D37F: Counting Rod Numerals. CountingRodNumerals :: Block -- | U+1D400..U+1D7FF: Mathematical Alphanumeric Symbols. MathematicalAlphanumericSymbols :: Block -- | U+1D800..U+1DAAF: Sutton SignWriting. SuttonSignWriting :: Block -- | U+1DF00..U+1DFFF: Latin Extended-G. LatinExtendedG :: Block -- | U+1E000..U+1E02F: Glagolitic Supplement. GlagoliticSupplement :: Block -- | U+1E030..U+1E08F: Cyrillic Extended-D. CyrillicExtendedD :: Block -- | U+1E100..U+1E14F: Nyiakeng Puachue Hmong. NyiakengPuachueHmong :: Block -- | U+1E290..U+1E2BF: Toto. Toto :: Block -- | U+1E2C0..U+1E2FF: Wancho. Wancho :: Block -- | U+1E4D0..U+1E4FF: Nag Mundari. NagMundari :: Block -- | U+1E7E0..U+1E7FF: Ethiopic Extended-B. EthiopicExtendedB :: Block -- | U+1E800..U+1E8DF: Mende Kikakui. MendeKikakui :: Block -- | U+1E900..U+1E95F: Adlam. Adlam :: Block -- | U+1EC70..U+1ECBF: Indic Siyaq Numbers. IndicSiyaqNumbers :: Block -- | U+1ED00..U+1ED4F: Ottoman Siyaq Numbers. OttomanSiyaqNumbers :: Block -- | U+1EE00..U+1EEFF: Arabic Mathematical Alphabetic Symbols. ArabicMathematicalAlphabeticSymbols :: Block -- | U+1F000..U+1F02F: Mahjong Tiles. MahjongTiles :: Block -- | U+1F030..U+1F09F: Domino Tiles. DominoTiles :: Block -- | U+1F0A0..U+1F0FF: Playing Cards. PlayingCards :: Block -- | U+1F100..U+1F1FF: Enclosed Alphanumeric Supplement. EnclosedAlphanumericSupplement :: Block -- | U+1F200..U+1F2FF: Enclosed Ideographic Supplement. EnclosedIdeographicSupplement :: Block -- | U+1F300..U+1F5FF: Miscellaneous Symbols and Pictographs. MiscellaneousSymbolsAndPictographs :: Block -- | U+1F600..U+1F64F: Emoticons. Emoticons :: Block -- | U+1F650..U+1F67F: Ornamental Dingbats. OrnamentalDingbats :: Block -- | U+1F680..U+1F6FF: Transport and Map Symbols. TransportAndMapSymbols :: Block -- | U+1F700..U+1F77F: Alchemical Symbols. AlchemicalSymbols :: Block -- | U+1F780..U+1F7FF: Geometric Shapes Extended. GeometricShapesExtended :: Block -- | U+1F800..U+1F8FF: Supplemental Arrows-C. SupplementalArrowsC :: Block -- | U+1F900..U+1F9FF: Supplemental Symbols and Pictographs. SupplementalSymbolsAndPictographs :: Block -- | U+1FA00..U+1FA6F: Chess Symbols. ChessSymbols :: Block -- | U+1FA70..U+1FAFF: Symbols and Pictographs Extended-A. SymbolsAndPictographsExtendedA :: Block -- | U+1FB00..U+1FBFF: Symbols for Legacy Computing. SymbolsForLegacyComputing :: Block -- | U+20000..U+2A6DF: CJK Unified Ideographs Extension B. CJKUnifiedIdeographsExtensionB :: Block -- | U+2A700..U+2B73F: CJK Unified Ideographs Extension C. CJKUnifiedIdeographsExtensionC :: Block -- | U+2B740..U+2B81F: CJK Unified Ideographs Extension D. CJKUnifiedIdeographsExtensionD :: Block -- | U+2B820..U+2CEAF: CJK Unified Ideographs Extension E. CJKUnifiedIdeographsExtensionE :: Block -- | U+2CEB0..U+2EBEF: CJK Unified Ideographs Extension F. CJKUnifiedIdeographsExtensionF :: Block -- | U+2EBF0..U+2EE5F: CJK Unified Ideographs Extension I. CJKUnifiedIdeographsExtensionI :: Block -- | U+2F800..U+2FA1F: CJK Compatibility Ideographs Supplement. CJKCompatibilityIdeographsSupplement :: Block -- | U+30000..U+3134F: CJK Unified Ideographs Extension G. CJKUnifiedIdeographsExtensionG :: Block -- | U+31350..U+323AF: CJK Unified Ideographs Extension H. CJKUnifiedIdeographsExtensionH :: Block -- | U+E0000..U+E007F: Tags. Tags :: Block -- | U+E0100..U+E01EF: Variation Selectors Supplement. VariationSelectorsSupplement :: Block -- | U+F0000..U+FFFFF: Supplementary Private Use Area-A. SupplementaryPrivateUseAreaA :: Block -- | U+100000..U+10FFFF: Supplementary Private Use Area-B. SupplementaryPrivateUseAreaB :: Block -- | Character block, if defined. block :: Char -> Maybe Block -- | Block definition: range and name. data BlockDefinition BlockDefinition :: !(Int, Int) -> !String -> BlockDefinition -- | Range [blockRange] :: BlockDefinition -> !(Int, Int) -- | Name [blockName] :: BlockDefinition -> !String -- | Block definition blockDefinition :: Block -> BlockDefinition instance GHC.Show.Show Unicode.Char.General.Blocks.BlockDefinition instance GHC.Classes.Ord Unicode.Char.General.Blocks.BlockDefinition instance GHC.Classes.Eq Unicode.Char.General.Blocks.BlockDefinition -- | Numeric character property related functions. module Unicode.Char.Numeric -- | Selects Unicode character with a numeric value. -- -- Note: a character may have a numeric value but return -- False with the predicate isNumber, because -- isNumber only tests GeneralCategory: some CJK characters -- are OtherLetter and do have a numeric value. -- --

--   isNumeric c == isJust (numericValue c)
--

isNumeric :: Char -> Bool -- | Numeric value of a character, if relevant. -- -- Note: a character may have a numeric value but return -- False with the predicate isNumber, because -- isNumber only tests GeneralCategory: some CJK characters -- are OtherLetter and do have a numeric value. numericValue :: Char -> Maybe Rational -- | Integer value of a character, if relevant. -- -- This is a special case of numericValue. -- -- Warning: There is a risk of integer overflow depending -- of the chosen concrete return type. As of Unicode 15.1 the results -- range from 0 to 1e16. -- --

--   >>> minimum [v | v@Just{} <- integerValue <$> [minBound..]] :: Maybe Integer
--   Just 0
--   
--   >>> maximum (integerValue <$> [minBound..]) :: Maybe Integer
--   Just 10000000000000000
--   
--   >>> integerValue '\x4EAC' :: Maybe Int64 -- OK
--   Just 10000000000000000
--   
--   >>> integerValue '\x4EAC' :: Maybe Int32 -- Will overflow!
--   Just 1874919424
--

-- -- Therefore it is advised to use: integerValue -- @Int64. -- -- Note: A character may have a numeric value but return -- False with the predicate isNumber, because -- isNumber only tests GeneralCategory: some CJK characters -- are OtherLetter and do have a numeric value. integerValue :: Integral a => Char -> Maybe a -- | Same a intToDigit, but with upper case. -- --

--   >>> intToDigiT <$> [0..15]
--   "0123456789ABCDEF"
--

intToDigiT :: Int -> Char -- | Selects ASCII digits, i.e. '0'..'9'. isDigit :: Char -> Bool -- | Selects ASCII octal digits, i.e. '0'..'7'. isOctDigit :: Char -> Bool -- | Selects ASCII hexadecimal digits, i.e. '0'..'9', -- 'a'..'f', 'A'..'F'. isHexDigit :: Char -> Bool -- | Convert a single digit Char to the corresponding Int. -- This function fails unless its argument satisfies isHexDigit, -- but recognises both upper- and lower-case hexadecimal digits (that is, -- '0'..'9', 'a'..'f', -- 'A'..'F'). -- --

Examples

-- -- Characters '0' through '9' are converted properly to -- 0..9: -- --

--   >>> map digitToInt ['0'..'9']
--   [0,1,2,3,4,5,6,7,8,9]
--

-- -- Both upper- and lower-case 'A' through 'F' are -- converted as well, to 10..15. -- --

--   >>> map digitToInt ['a'..'f']
--   [10,11,12,13,14,15]
--   
--   >>> map digitToInt ['A'..'F']
--   [10,11,12,13,14,15]
--

-- -- Anything else throws an exception: -- --

--   >>> digitToInt 'G'
--   *** Exception: Char.digitToInt: not a digit 'G'
--   
--   >>> digitToInt '♥'
--   *** Exception: Char.digitToInt: not a digit '\9829'
--

digitToInt :: Char -> Int -- | Convert an Int in the range 0..15 to the -- corresponding single digit Char. This function fails on other -- inputs, and generates lower-case hexadecimal digits. intToDigit :: Int -> Char -- | Unicode Identifier and Pattern Syntax property functions based on -- Unicode Standard Annex #31 module Unicode.Char.Identifiers -- | Returns True if a character is an identifier continue -- character. isIDContinue :: Char -> Bool -- | Returns True if a character is an identifier start character. isIDStart :: Char -> Bool -- | Returns True if a character is an identifier continue -- character, using the NFKC modifications detailed in UAX #31, -- 5.1. isXIDContinue :: Char -> Bool -- | Returns True if a character is an identifier start character, -- using the NFKC modifications detailed in UAX #31, 5.1. isXIDStart :: Char -> Bool -- | Returns True if a character is a pattern syntax character. isPatternSyntax :: Char -> Bool -- | Returns True if a character is a pattern whitespace character. isPatternWhitespace :: Char -> Bool -- | Compatibility module for numeric character property related functions. module Unicode.Char.Numeric.Compat -- | Selects Unicode numeric characters, including digits from various -- scripts, Roman numerals, et cetera. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

DecimalNumber
LetterNumber
OtherNumber

-- -- Note: a character may have a numeric value (see -- numericValue) but return False, because isNumber -- only tests GeneralCategory: some CJK characters are -- OtherLetter and do have a numeric value. Use isNumeric -- to cover those cases as well. -- --

--   isNumber c == Data.Char.isNumber c
--

-- -- @since 0.3.1 moved to Compat module. isNumber :: Char -> Bool -- | Compatibility module for general character property related functions. -- -- The functions of this module are drop-in replacement for those in -- Data.Char. They are similar but not identical to some functions -- in Unicode.Char.General, therefore they are placed in a -- separate module in order to avoid ambiguity. module Unicode.Char.General.Compat -- | Same as isLetter. isAlpha :: Char -> Bool -- | Selects alphabetic or numeric Unicode characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter
DecimalNumber
LetterNumber
OtherNumber

-- --

--   isAlphaNum c == Data.Char.isAlphaNum c
--

-- -- Note: this function is incompatible with isAlphabetic: -- --

--   >>> Unicode.Char.General.isAlphabetic '\x345'
--   True
--   
--   >>> isAlphaNum '\x345'
--   False
--

-- -- @since 0.6.0 moved to Compat module isAlphaNum :: Char -> Bool -- | Selects alphabetic Unicode characters (lower-case, upper-case and -- title-case letters, plus letters of caseless scripts and modifiers -- letters). -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter

-- -- Note: this function is not equivalent to -- isAlphabetic. See the description of isAlphabetic for -- further details. -- --

--   isLetter c == Data.Char.isLetter c
--

isLetter :: Char -> Bool -- | Selects Unicode space characters (general category Space), -- and the control characters \t, \n, \r, -- \f, \v. -- -- Note: isSpace is not equivalent to -- isWhiteSpace. isWhiteSpace selects the same characters -- from isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

-- --

--   isSpace c == Data.Char.isSpace c
--

isSpace :: Char -> Bool -- | Compatibility module for case and case mapping related functions. -- -- The functions of this module are drop-in replacement for those in -- Data.Char. They are similar but not identical to some functions -- in Unicode.Char.Case, therefore they are placed in a separate -- module in order to avoid ambiguity. module Unicode.Char.Case.Compat -- | Selects upper-case or title-case alphabetic Unicode characters -- (letters). Title case is used by a small number of letter ligatures -- like the single-character form of Lj. -- -- It matches characters with general category UppercaseLetter and -- TitlecaseLetter. -- -- See: isUpperCase for the full upper case predicate. -- --

--   isUpper c == Data.Char.isUpper c
--

isUpper :: Char -> Bool -- | Selects lower-case alphabetic Unicode characters (letters). -- -- It matches characters with general category LowercaseLetter. -- -- See: isLowerCase for the full lower case predicate. -- --

--   isLower c == Data.Char.isLower c
--

isLower :: Char -> Bool -- | Convert a letter to the corresponding upper-case letter, if any. Any -- other character is returned unchanged. -- -- It uses the character property Simple_Uppercase_Mapping. -- -- See: upperCaseMapping and toUpperString for full -- upper case conversion. -- --

--   toUpper c == Data.Char.toUpper c
--

toUpper :: Char -> Char -- | Convert a letter to the corresponding lower-case letter, if any. Any -- other character is returned unchanged. -- -- It uses the character property Simple_Lowercase_Mapping. -- -- See: lowerCaseMapping and toLowerString for full -- lower case conversion. -- --

--   toLower c == Data.Char.toLower c
--

toLower :: Char -> Char -- | Convert a letter to the corresponding title-case or upper-case letter, -- if any. (Title case differs from upper case only for a small number of -- ligature letters.) Any other character is returned unchanged. -- -- It uses the character property Simple_Titlecase_Mapping. -- -- See: titleCaseMapping and toTitleString for full -- title case conversion. -- --

--   toTitle c == Data.Char.toTitle c
--

toTitle :: Char -> Char -- | Fast division by known constants. -- -- Division by a constant can be replaced by a double-word -- multiplication. Roughly speaking, instead of dividing by x, multiply -- by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The -- peculiar details can be found in Hacker's Delight, Ch. 10. -- -- Even GHC 8.10 does not provide a primitive for a signed double-word -- multiplication, but since our applications does not involve negative -- integers, we convert Int to Word and use -- timesWord#. -- -- Textbook unsigned division by 21 or 28 becomes involved, when an -- argument is allowed to take the full range of Word up to 2^64. -- Luckily, in our case the argument was casted from Int, so we -- can guarantee that it is below 2^63. module Unicode.Internal.Division -- | Input must be non-negative. -- -- Instead of division by 21, we compute floor(floor((2^68+17)21 * n) -- 2^68) = floor((2^68+17)21 * n2^68) = floor(n21 + -- (n2^63 * 1732)21) = floor(n/21), because n2^63 * -- 1732 < 1. quotRem21 :: Int -> (Int, Int) -- | Input must be non-negative. -- -- Instead of division by 28, we compute floor(floor((2^65+3)7 * n) -- 2^67) = floor((2^65+3)7 * n2^67) = floor(n28 + -- (n2^63 * 34)28) = floor(n/28), because n2^63 * 34 -- < 1. quotRem28 :: Int -> (Int, Int) -- | General character property related functions. module Unicode.Char.General -- | Types of Code Points. -- -- These classes are defined in the section 2.4 “Code Points and -- Characters” of the Unicode standard. data CodePointType -- | Graphic: defined by the following general categories: -- --

Letters (L): UppercaseLetter, LowercaseLetter, -- TitlecaseLetter, ModifierLetter, -- OtherLetter.
Marks (M): NonSpacingMark, SpacingCombiningMark, -- EnclosingMark.
Numbers (N): DecimalNumber, LetterNumber, -- OtherNumber.
Punctuation (P): ConnectorPunctuation, -- DashPunctuation, OpenPunctuation, -- ClosePunctuation, InitialQuote, FinalQuote, -- OtherPunctuation.
Symbol (S): MathSymbol, CurrencySymbol, -- ModifierSymbol, OtherSymbol.
Separators: Space.

GraphicType :: CodePointType -- | Format: invisible but affects neighboring characters. -- -- Defined by the following general categories: LineSeparator, -- ParagraphSeparator, Format. FormatType :: CodePointType -- | Control: usage defined by protocols or standards outside the -- Unicode Standard. -- -- Defined by the general category Control. ControlType :: CodePointType -- | Private-use: usage defined by private agreement outside the -- Unicode Standard. -- -- Defined by the general category PrivateUse. PrivateUseType :: CodePointType -- | Surrogate: Permanently reserved for UTF-16. -- -- Defined by the general category Surrogate. SurrogateType :: CodePointType -- | Noncharacter: a code point that is permanently reserved for -- internal use (see definition D14 in the section 3.4 “Characters and -- Encoding” of the Unicode Standard). Noncharacters consist of the -- values U+nFFFE and U+nFFFF (where n is from -- 0 to 10₁₆) and the values U+FDD0..U+FDEF. -- -- They are a subset of the general category NotAssigned. NoncharacterType :: CodePointType -- | Reserved: any code point of the Unicode Standard that is -- reserved for future assignment (see definition D15 in the section -- 3.4 “Characters and Encoding” of the Unicode Standard). Also -- known as an unassigned code point. -- -- They are a subset of the general category NotAssigned. ReservedType :: CodePointType -- | Returns the CodePointType of a character. codePointType :: Char -> CodePointType -- | Unicode General Categories. -- -- These classes are defined in the Unicode Character Database, -- part of the Unicode standard. -- -- Note: the classes must be in the same order they are listed in -- the Unicode Standard, because some functions (e.g. -- generalCategory) rely on the Enum instance. data GeneralCategory -- | Lu: Letter, Uppercase UppercaseLetter :: GeneralCategory -- | Ll: Letter, Lowercase LowercaseLetter :: GeneralCategory -- | Lt: Letter, Titlecase TitlecaseLetter :: GeneralCategory -- | Lm: Letter, Modifier ModifierLetter :: GeneralCategory -- | Lo: Letter, Other OtherLetter :: GeneralCategory -- | Mn: Mark, Non-Spacing NonSpacingMark :: GeneralCategory -- | Mc: Mark, Spacing Combining SpacingCombiningMark :: GeneralCategory -- | Me: Mark, Enclosing EnclosingMark :: GeneralCategory -- | Nd: Number, Decimal DecimalNumber :: GeneralCategory -- | Nl: Number, Letter LetterNumber :: GeneralCategory -- | No: Number, Other OtherNumber :: GeneralCategory -- | Pc: Punctuation, Connector ConnectorPunctuation :: GeneralCategory -- | Pd: Punctuation, Dash DashPunctuation :: GeneralCategory -- | Ps: Punctuation, Open OpenPunctuation :: GeneralCategory -- | Pe: Punctuation, Close ClosePunctuation :: GeneralCategory -- | Pi: Punctuation, Initial quote InitialQuote :: GeneralCategory -- | Pf: Punctuation, Final quote FinalQuote :: GeneralCategory -- | Po: Punctuation, Other OtherPunctuation :: GeneralCategory -- | Sm: Symbol, Math MathSymbol :: GeneralCategory -- | Sc: Symbol, Currency CurrencySymbol :: GeneralCategory -- | Sk: Symbol, Modifier ModifierSymbol :: GeneralCategory -- | So: Symbol, Other OtherSymbol :: GeneralCategory -- | Zs: Separator, Space Space :: GeneralCategory -- | Zl: Separator, Line LineSeparator :: GeneralCategory -- | Zp: Separator, Paragraph ParagraphSeparator :: GeneralCategory -- | Cc: Other, Control Control :: GeneralCategory -- | Cf: Other, Format Format :: GeneralCategory -- | Cs: Other, Surrogate Surrogate :: GeneralCategory -- | Co: Other, Private Use PrivateUse :: GeneralCategory -- | Cn: Other, Not Assigned NotAssigned :: GeneralCategory -- | Abbreviation of GeneralCategory used in the Unicode standard. generalCategoryAbbr :: GeneralCategory -> String -- | The Unicode general category of the character. -- -- This property is defined in the column 2 of the UnicodeData -- table. -- -- This relies on the Enum instance of GeneralCategory, -- which must remain in the same order as the categories are presented in -- the Unicode standard. -- --

--   show (generalCategory c) == show (Data.Char.generalCategory c)
--

generalCategory :: Char -> GeneralCategory -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- -- Note: this function is not equivalent to isAlpha -- / isLetter: -- --

isAlpha matches the following general -- categories:
- UppercaseLetter -- (Lu)
- LowercaseLetter -- (Ll)
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter (Lo)
whereas isAlphabetic matches:
- Uppercase -- property
- Lowercase -- property
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter -- (Lo)
- LetterNumber -- (Nl)
- Other_Alphabetic -- property

isAlphabetic :: Char -> Bool -- | Selects alphabetic or numeric Unicode characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

UppercaseLetter
LowercaseLetter
TitlecaseLetter
ModifierLetter
OtherLetter
DecimalNumber
LetterNumber
OtherNumber

-- --

--   isAlphaNum c == Data.Char.isAlphaNum c
--

-- -- Note: this function is incompatible with isAlphabetic: -- --

--   >>> isAlphabetic '\x345'
--   True
--   
--   >>> isAlphaNum '\x345'
--   False
--

-- | Deprecated: Use Unicode.Char.General.Compat.isAlphaNum instead. isAlphaNum :: Char -> Bool -- | Selects control characters, which are the non-printing characters of -- the Latin-1 subset of Unicode. -- -- This function returns True if its argument has the -- GeneralCategory Control. -- --

--   isControl c == Data.Char.isControl c
--

isControl :: Char -> Bool -- | Selects Unicode mark characters, for example accents and the like, -- which combine with preceding characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

NonSpacingMark
SpacingCombiningMark
EnclosingMark

-- --

--   isMark c == Data.Char.isMark c
--

isMark :: Char -> Bool -- | Selects printable Unicode characters (letters, numbers, marks, -- punctuation, symbols and spaces). -- -- This function returns False if its argument has one of the -- following GeneralCategorys, or True otherwise: -- --

LineSeparator
ParagraphSeparator
Control
Format
Surrogate
PrivateUse
NotAssigned

-- --

--   isPrint c == Data.Char.isPrint c
--

isPrint :: Char -> Bool -- | Selects Unicode punctuation characters, including various kinds of -- connectors, brackets and quotes. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

ConnectorPunctuation
DashPunctuation
OpenPunctuation
ClosePunctuation
InitialQuote
FinalQuote
OtherPunctuation

-- --

--   isPunctuation c == Data.Char.isPunctuation c
--

isPunctuation :: Char -> Bool -- | Selects Unicode space and separator characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

Space
LineSeparator
ParagraphSeparator

-- --

--   isSeparator c == Data.Char.isSeparator c
--

isSeparator :: Char -> Bool -- | Selects Unicode symbol characters, including mathematical and currency -- symbols. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: * -- MathSymbol * CurrencySymbol * ModifierSymbol * -- OtherSymbol -- --

--   isSymbol c == Data.Char.isSymbol c
--

isSymbol :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- -- See: Unicode White_Space. -- -- Note: isWhiteSpace is not equivalent to -- isSpace. isWhiteSpace selects the same characters from -- isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

isWhiteSpace :: Char -> Bool -- | Returns True for any noncharacter. -- -- A noncharacter is a code point that is permanently reserved for -- internal use (see definition D14 in the section 3.4 “Characters and -- Encoding” of the Unicode Standard). -- -- Noncharacters consist of the values U+nFFFE and -- U+nFFFF (where n is from 0 to 10₁₆) and the values -- U+FDD0..U+FDEF. isNoncharacter :: Char -> Bool -- | Selects the first 128 characters of the Unicode character set, -- corresponding to the ASCII character set. isAscii :: Char -> Bool -- | Selects the first 256 characters of the Unicode character set, -- corresponding to the ISO 8859-1 (Latin-1) character set. isLatin1 :: Char -> Bool -- | Selects ASCII upper-case letters, i.e. characters satisfying both -- isAscii and isUpper. isAsciiUpper :: Char -> Bool -- | Selects ASCII lower-case letters, i.e. characters satisfying both -- isAscii and isLower. isAsciiLower :: Char -> Bool -- | Determine whether a character is a jamo L, V or T character. isJamo :: Char -> Bool -- | Total count of all jamo characters. -- --

--   jamoNCount = jamoVCount * jamoTCount
--

jamoNCount :: Int -- | First leading consonant jamo. jamoLFirst :: Int -- | Total count of leading consonant jamo. jamoLCount :: Int -- | Given a Unicode character, if it is a leading jamo, return its index -- in the list of leading jamo consonants, otherwise return -- Nothing. jamoLIndex :: Char -> Maybe Int -- | Last leading consonant jamo. jamoLLast :: Int -- | First vowel jamo. jamoVFirst :: Int -- | Total count of vowel jamo. jamoVCount :: Int -- | Given a Unicode character, if it is a vowel jamo, return its index in -- the list of vowel jamo, otherwise return Nothing. jamoVIndex :: Char -> Maybe Int -- | Last vowel jamo. jamoVLast :: Int -- | The first trailing consonant jamo. -- -- Note that jamoTFirst does not represent a valid T, it -- represents a missing T i.e. LV without a T. See comments under -- jamoTIndex . jamoTFirst :: Int -- | Total count of trailing consonant jamo. jamoTCount :: Int -- | Given a Unicode character, if it is a trailing jamo consonant, return -- its index in the list of trailing jamo consonants, otherwise return -- Nothing. -- -- Note that index 0 is not a valid index for a trailing consonant. Index -- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable -- Decomposition" in the Conformance chapter of the Unicode standard for -- more details. jamoTIndex :: Char -> Maybe Int -- | Last trailing consonant jamo. jamoTLast :: Int -- | Codepoint of the first pre-composed Hangul character. hangulFirst :: Int -- | Codepoint of the last Hangul character. hangulLast :: Int -- | Determine if the given character is a precomposed Hangul syllable. isHangul :: Char -> Bool -- | Determine if the given character is a Hangul LV syllable. -- -- Note: this function requires a precomposed Hangul syllable but -- does not check it. Use isHangul to check the input -- character before passing it to isHangulLV. isHangulLV :: Char -> Bool instance GHC.Ix.Ix Unicode.Char.General.GeneralCategory instance GHC.Enum.Bounded Unicode.Char.General.GeneralCategory instance GHC.Enum.Enum Unicode.Char.General.GeneralCategory instance GHC.Classes.Ord Unicode.Char.General.GeneralCategory instance GHC.Classes.Eq Unicode.Char.General.GeneralCategory instance GHC.Show.Show Unicode.Char.General.GeneralCategory instance GHC.Ix.Ix Unicode.Char.General.CodePointType instance GHC.Enum.Bounded Unicode.Char.General.CodePointType instance GHC.Enum.Enum Unicode.Char.General.CodePointType instance GHC.Classes.Ord Unicode.Char.General.CodePointType instance GHC.Classes.Eq Unicode.Char.General.CodePointType instance GHC.Show.Show Unicode.Char.General.CodePointType -- | Low level Unicode database functions to facilitate Unicode -- normalization. -- -- For more information on Unicode normalization please refer to the -- following sections of the Unicode standard: -- --

2 General Structure
- 2.3 Compatibility -- Characters
- 2.12 Equivalent Sequences
3 Conformance
- 3.6 Combination
- 3.7 -- Decomposition
- 3.11 Normalization Forms
- 3.12 Conjoining -- Jamo Behavior
4 Character Properties
- 4.3 Combining Classes
Unicode® Standard Annex #15 - Unicode Normalization -- Forms
Unicode® Standard Annex #44 - Unicode Character -- Database

module Unicode.Char.Normalization -- | Returns True if a character is a combining character. isCombining :: Char -> Bool -- | Returns the combining class of a character. combiningClass :: Char -> Int -- | Return True if a starter character may combine with some -- preceding starter character. isCombiningStarter :: Char -> Bool -- | Compose a starter character (combining class 0) with a combining -- character (non-zero combining class). Returns the composed character -- if the starter combines with the combining character, returns -- Nothing otherwise. compose :: Char -> Char -> Maybe Char -- | Compose a starter character with another starter character. Returns -- the composed character if the two starters combine, returns -- Nothing otherwise. composeStarters :: Char -> Char -> Maybe Char -- | Whether we are decomposing in canonical or compatibility mode. data DecomposeMode Canonical :: DecomposeMode Kompat :: DecomposeMode -- | Given a non-Hangul character determine if the character is -- decomposable. Note that in case compatibility decompositions a -- character may decompose into a single compatibility character. isDecomposable :: DecomposeMode -> Char -> Bool -- | Decompose a non-Hangul character into its canonical or compatibility -- decompositions. Note that the resulting characters may further -- decompose. decompose :: DecomposeMode -> Char -> [Char] -- | Decompose a Hangul syllable into its corresponding Jamo characters. decomposeHangul :: Char -> (Char, Char, Char) module Unicode.Internal.Unfold -- | An Unfold a b is a generator of a stream of values of type -- b from a seed of type a. data Unfold a b Unfold :: (s -> Step s b) -> (a -> Step s b) -> Unfold a b -- | A stream is a succession of Steps. data Step s a -- | Produces a single value and the next state of the stream. Yield :: !a -> !s -> Step s a -- | Indicates there are no more values in the stream. Stop :: Step s a -- | Convert an 'Unfold a a' to a list [a], if the resulting list is empty -- the seed is used as a default output. toList :: Unfold a a -> a -> [a] instance GHC.Base.Functor (Unicode.Internal.Unfold.Step s) -- | Case and case mapping related functions. -- -- This module provides full predicates and mappings that are -- not compatible with those in Data.Char, which rely on -- simple properties. See Unicode.Char.Case.Compat for a drop-in -- replacement of the functions in Data.Char. module Unicode.Char.Case -- | Returns True for lower-case characters. -- -- It uses the character property Lowercase. -- -- See: isLower for the legacy predicate. isLowerCase :: Char -> Bool -- | Returns True for upper-case characters. -- -- It uses the character property Uppercase. -- -- Note: it does not match title-cased letters. Those are matched -- using: generalCategory c == TitlecaseLetter. -- -- See: isUpper for the legacy predicate. isUpperCase :: Char -> Bool -- | Returns the full folded case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Case_Folding. caseFoldMapping :: Unfold Char Char -- | Convert a character to full folded case if defined, else to -- itself. -- -- This function is mainly useful for performing caseless (also known as -- case insensitive) string comparisons. -- -- A string x is a caseless match for a string y if and -- only if: -- --

--   foldMap toCaseFoldString x == foldMap toCaseFoldString y
--

-- -- The result string may have more than one character, and may differ -- from applying toLowerString to the input string. For instance, -- “ﬓ” (U+FB13 Armenian small ligature men now) is case folded -- to the sequence “մ” (U+0574 Armenian small letter men) -- followed by “ն” (U+0576 Armenian small letter now), while “µ” -- (U+00B5 micro sign) is case folded to “μ” (U+03BC -- Greek small letter mu) instead of itself. -- -- It uses the character property Case_Folding. -- --

--   toCaseFoldString c == foldMap toCaseFoldString (toCaseFoldString c)
--

toCaseFoldString :: Char -> String -- | Returns the full lower case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Lowercase_Mapping. lowerCaseMapping :: Unfold Char Char -- | Convert a character to full lower case if defined, else to -- itself. -- -- The result string may have more than one character. For instance, “İ” -- (U+0130 Latin capital letter I with dot above) maps to the -- sequence: “i” (U+0069 Latin small letter I) followed by “ ̇” -- (U+0307 combining dot above). -- -- It uses the character property Lowercase_Mapping. -- -- See: toLower for simple lower case conversion. -- --

--   toLowerString c == foldMap toLowerString (toLowerString c)
--

toLowerString :: Char -> String -- | Returns the full title case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Titlecase_Mapping. titleCaseMapping :: Unfold Char Char -- | Convert a character to full title case if defined, else to -- itself. -- -- The result string may have more than one character. For instance, “ﬂ” -- (U+FB02 Latin small ligature FL) is converted to the -- sequence: “F” (U+0046 Latin capital letter F) followed by “l” -- (U+006C Latin small letter L). -- -- It uses the character property Titlecase_Mapping. -- -- See: toTitle for simple title case conversion. toTitleString :: Char -> String -- | Returns the full upper case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Uppercase_Mapping. upperCaseMapping :: Unfold Char Char -- | Convert a character to full upper case if defined, else to -- itself. -- -- The result string may have more than one character. For instance, the -- German “ß” (U+00DF Eszett) maps to the two-letter sequence -- “SS”. -- -- It uses the character property Uppercase_Mapping. -- -- See: toUpper for simple upper case conversion. -- --

--   toUpperString c == foldMap toUpperString (toUpperString c)
--

toUpperString :: Char -> String -- | An Unfold a b is a generator of a stream of values of type -- b from a seed of type a. data Unfold a b Unfold :: (s -> Step s b) -> (a -> Step s b) -> Unfold a b -- | A stream is a succession of Steps. data Step s a -- | Produces a single value and the next state of the stream. Yield :: !a -> !s -> Step s a -- | Indicates there are no more values in the stream. Stop :: Step s a -- | This module provides APIs to access the Unicode character database -- (UCD) corresponding to Unicode Standard version 15.1.0. -- -- This module re-exports several sub-modules under it. The sub-module -- structure under Char is largely based on the "Property Index -- by Scope of Use" in Unicode® Standard Annex #44. -- -- The Unicode.Char.* modules in turn depend on -- Unicode.Internal.Char.* modules which are programmatically -- generated from the Unicode standard's Unicode character database -- files. The module structure under Unicode.Internal.Char is -- largely based on the UCD text file names from which the properties are -- generated. -- -- For the original UCD files used in this code please refer to the -- UCD section on the Unicode standard page. See -- https://www.unicode.org/reports/tr44/ to understand the -- contents and the format of the unicode database files. module Unicode.Char -- | Version of the Unicode standard used by this package: 15.1.0. unicodeVersion :: Version -- | Unicode General Categories. -- -- These classes are defined in the Unicode Character Database, -- part of the Unicode standard. -- -- Note: the classes must be in the same order they are listed in -- the Unicode Standard, because some functions (e.g. -- generalCategory) rely on the Enum instance. data GeneralCategory -- | Lu: Letter, Uppercase UppercaseLetter :: GeneralCategory -- | Ll: Letter, Lowercase LowercaseLetter :: GeneralCategory -- | Lt: Letter, Titlecase TitlecaseLetter :: GeneralCategory -- | Lm: Letter, Modifier ModifierLetter :: GeneralCategory -- | Lo: Letter, Other OtherLetter :: GeneralCategory -- | Mn: Mark, Non-Spacing NonSpacingMark :: GeneralCategory -- | Mc: Mark, Spacing Combining SpacingCombiningMark :: GeneralCategory -- | Me: Mark, Enclosing EnclosingMark :: GeneralCategory -- | Nd: Number, Decimal DecimalNumber :: GeneralCategory -- | Nl: Number, Letter LetterNumber :: GeneralCategory -- | No: Number, Other OtherNumber :: GeneralCategory -- | Pc: Punctuation, Connector ConnectorPunctuation :: GeneralCategory -- | Pd: Punctuation, Dash DashPunctuation :: GeneralCategory -- | Ps: Punctuation, Open OpenPunctuation :: GeneralCategory -- | Pe: Punctuation, Close ClosePunctuation :: GeneralCategory -- | Pi: Punctuation, Initial quote InitialQuote :: GeneralCategory -- | Pf: Punctuation, Final quote FinalQuote :: GeneralCategory -- | Po: Punctuation, Other OtherPunctuation :: GeneralCategory -- | Sm: Symbol, Math MathSymbol :: GeneralCategory -- | Sc: Symbol, Currency CurrencySymbol :: GeneralCategory -- | Sk: Symbol, Modifier ModifierSymbol :: GeneralCategory -- | So: Symbol, Other OtherSymbol :: GeneralCategory -- | Zs: Separator, Space Space :: GeneralCategory -- | Zl: Separator, Line LineSeparator :: GeneralCategory -- | Zp: Separator, Paragraph ParagraphSeparator :: GeneralCategory -- | Cc: Other, Control Control :: GeneralCategory -- | Cf: Other, Format Format :: GeneralCategory -- | Cs: Other, Surrogate Surrogate :: GeneralCategory -- | Co: Other, Private Use PrivateUse :: GeneralCategory -- | Cn: Other, Not Assigned NotAssigned :: GeneralCategory -- | Types of Code Points. -- -- These classes are defined in the section 2.4 “Code Points and -- Characters” of the Unicode standard. data CodePointType -- | Graphic: defined by the following general categories: -- --

Letters (L): UppercaseLetter, LowercaseLetter, -- TitlecaseLetter, ModifierLetter, -- OtherLetter.
Marks (M): NonSpacingMark, SpacingCombiningMark, -- EnclosingMark.
Numbers (N): DecimalNumber, LetterNumber, -- OtherNumber.
Punctuation (P): ConnectorPunctuation, -- DashPunctuation, OpenPunctuation, -- ClosePunctuation, InitialQuote, FinalQuote, -- OtherPunctuation.
Symbol (S): MathSymbol, CurrencySymbol, -- ModifierSymbol, OtherSymbol.
Separators: Space.

--   show (generalCategory c) == show (Data.Char.generalCategory c)
--

generalCategory :: Char -> GeneralCategory -- | Selects the first 128 characters of the Unicode character set, -- corresponding to the ASCII character set. isAscii :: Char -> Bool -- | Selects the first 256 characters of the Unicode character set, -- corresponding to the ISO 8859-1 (Latin-1) character set. isLatin1 :: Char -> Bool -- | Selects ASCII lower-case letters, i.e. characters satisfying both -- isAscii and isLower. isAsciiLower :: Char -> Bool -- | Selects ASCII upper-case letters, i.e. characters satisfying both -- isAscii and isUpper. isAsciiUpper :: Char -> Bool -- | Selects control characters, which are the non-printing characters of -- the Latin-1 subset of Unicode. -- -- This function returns True if its argument has the -- GeneralCategory Control. -- --

--   isControl c == Data.Char.isControl c
--

isControl :: Char -> Bool -- | Selects printable Unicode characters (letters, numbers, marks, -- punctuation, symbols and spaces). -- -- This function returns False if its argument has one of the -- following GeneralCategorys, or True otherwise: -- --

LineSeparator
ParagraphSeparator
Control
Format
Surrogate
PrivateUse
NotAssigned

-- --

--   isPrint c == Data.Char.isPrint c
--

ConnectorPunctuation
DashPunctuation
OpenPunctuation
ClosePunctuation
InitialQuote
FinalQuote
OtherPunctuation

-- --

--   isPunctuation c == Data.Char.isPunctuation c
--

isPunctuation :: Char -> Bool -- | Selects Unicode symbol characters, including mathematical and currency -- symbols. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: * -- MathSymbol * CurrencySymbol * ModifierSymbol * -- OtherSymbol -- --

--   isSymbol c == Data.Char.isSymbol c
--

isSymbol :: Char -> Bool -- | Selects Unicode mark characters, for example accents and the like, -- which combine with preceding characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

NonSpacingMark
SpacingCombiningMark
EnclosingMark

-- --

--   isMark c == Data.Char.isMark c
--

isMark :: Char -> Bool -- | Selects Unicode space and separator characters. -- -- This function returns True if its argument has one of the -- following GeneralCategorys, or False otherwise: -- --

Space
LineSeparator
ParagraphSeparator

-- --

--   isSeparator c == Data.Char.isSeparator c
--

isSeparator :: Char -> Bool -- | Returns the CodePointType of a character. codePointType :: Char -> CodePointType -- | Abbreviation of GeneralCategory used in the Unicode standard. generalCategoryAbbr :: GeneralCategory -> String -- | Returns True for alphabetic Unicode characters (lower-case, -- upper-case and title-case letters, plus letters of caseless scripts -- and modifiers letters). -- -- Note: this function is not equivalent to isAlpha -- / isLetter: -- --

isAlpha matches the following general -- categories:
- UppercaseLetter -- (Lu)
- LowercaseLetter -- (Ll)
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter (Lo)
whereas isAlphabetic matches:
- Uppercase -- property
- Lowercase -- property
- TitlecaseLetter -- (Lt)
- ModifierLetter -- (Lm)
- OtherLetter -- (Lo)
- LetterNumber -- (Nl)
- Other_Alphabetic -- property

isAlphabetic :: Char -> Bool -- | Returns True for any whitespace characters, and the control -- characters \t, \n, \r, \f, -- \v. -- -- See: Unicode White_Space. -- -- Note: isWhiteSpace is not equivalent to -- isSpace. isWhiteSpace selects the same characters from -- isSpace plus the following: -- --

U+0085 NEXT LINE (NEL)
U+2028 LINE SEPARATOR
U+2029 PARAGRAPH SEPARATOR

--   jamoNCount = jamoVCount * jamoTCount
--

jamoNCount :: Int -- | First leading consonant jamo. jamoLFirst :: Int -- | Total count of leading consonant jamo. jamoLCount :: Int -- | Given a Unicode character, if it is a leading jamo, return its index -- in the list of leading jamo consonants, otherwise return -- Nothing. jamoLIndex :: Char -> Maybe Int -- | Last leading consonant jamo. jamoLLast :: Int -- | First vowel jamo. jamoVFirst :: Int -- | Total count of vowel jamo. jamoVCount :: Int -- | Given a Unicode character, if it is a vowel jamo, return its index in -- the list of vowel jamo, otherwise return Nothing. jamoVIndex :: Char -> Maybe Int -- | Last vowel jamo. jamoVLast :: Int -- | The first trailing consonant jamo. -- -- Note that jamoTFirst does not represent a valid T, it -- represents a missing T i.e. LV without a T. See comments under -- jamoTIndex . jamoTFirst :: Int -- | Total count of trailing consonant jamo. jamoTCount :: Int -- | Given a Unicode character, if it is a trailing jamo consonant, return -- its index in the list of trailing jamo consonants, otherwise return -- Nothing. -- -- Note that index 0 is not a valid index for a trailing consonant. Index -- 0 corresponds to an LV syllable, without a T. See "Hangul Syllable -- Decomposition" in the Conformance chapter of the Unicode standard for -- more details. jamoTIndex :: Char -> Maybe Int -- | Last trailing consonant jamo. jamoTLast :: Int -- | Codepoint of the first pre-composed Hangul character. hangulFirst :: Int -- | Codepoint of the last Hangul character. hangulLast :: Int -- | Determine if the given character is a precomposed Hangul syllable. isHangul :: Char -> Bool -- | Determine if the given character is a Hangul LV syllable. -- -- Note: this function requires a precomposed Hangul syllable but -- does not check it. Use isHangul to check the input -- character before passing it to isHangulLV. isHangulLV :: Char -> Bool -- | Returns True for upper-case characters. -- -- It uses the character property Uppercase. -- -- Note: it does not match title-cased letters. Those are matched -- using: generalCategory c == TitlecaseLetter. -- -- See: isUpper for the legacy predicate. isUpperCase :: Char -> Bool -- | Returns True for lower-case characters. -- -- It uses the character property Lowercase. -- -- See: isLower for the legacy predicate. isLowerCase :: Char -> Bool -- | Returns the full folded case mapping of a character if the -- character is changed, else nothing. -- -- It uses the character property Case_Folding. caseFoldMapping :: Unfold Char Char -- | Convert a character to full folded case if defined, else to -- itself. -- -- This function is mainly useful for performing caseless (also known as -- case insensitive) string comparisons. -- -- A string x is a caseless match for a string y if and -- only if: -- --

--   foldMap toCaseFoldString x == foldMap toCaseFoldString y
--

--   toCaseFoldString c == foldMap toCaseFoldString (toCaseFoldString c)
--

--   toLowerString c == foldMap toLowerString (toLowerString c)
--

--   toUpperString c == foldMap toUpperString (toUpperString c)
--

toUpperString :: Char -> String -- | Show the code point of a character using the Unicode Standard -- convention: hexadecimal codepoint padded with zeros if inferior to 4 -- characters. -- --

--   >>> showCodePoint '\xf' ""
--   "000F"
--   
--   >>> showCodePoint '\x1ffff' ""
--   "1FFFF"
--

showCodePoint :: Char -> ShowS -- | The fromEnum method restricted to the type Char. ord :: Char -> Int -- | The toEnum method restricted to the type Char. chr :: Int -> Char