| Copyright | (c) 2024 Pierre Le Marre |
|---|---|
| Maintainer | dev@wismill.eu |
| Stability | experimental |
| Safe Haskell | Safe-Inferred |
| Language | GHC2021 |
Unicode.CharacterDatabase.Parser.UnicodeData
Description
Parser for UnicodeData.txt.
Since: 0.1.0
Synopsis
- parse :: ByteString -> [Entry]
- data Entry = Entry {
- range :: !CodePointRange
- details :: !CharDetails
- data CharDetails = CharDetails {
- name :: !ShortByteString
- generalCategory :: !GeneralCategory
- combiningClass :: !Word8
- bidiClass :: !ShortByteString
- bidiMirrored :: !Bool
- decomposition :: !Decomposition
- numericValue :: !NumericValue
- simpleUpperCaseMapping :: !(Maybe Char)
- simpleLowerCaseMapping :: !(Maybe Char)
- simpleTitleCaseMapping :: !(Maybe Char)
- data GeneralCategory where
- data DecompositionType
- data Decomposition
- = Self
- | Decomposition { }
- data NumericValue
- = NotNumeric
- | Integer !Integer
- | Rational !Rational
Documentation
parse :: ByteString -> [Entry] Source #
Parser for UnicodeData.txt file
>>>:{traverse_ print . parse $ "0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;\n\ \00A8;DIAERESIS;Sk;0;ON;<compat> 0020 0308;;;;N;SPACING DIAERESIS;;;;\n\ \17000;<Tangut Ideograph, First>;Lo;0;L;;;;;N;;;;;\n\ \187F7;<Tangut Ideograph, Last>;Lo;0;L;;;;;N;;;;;\n" :} Entry {range = SingleChar {start = 'A'}, details = CharDetails {name = "LATIN CAPITAL LETTER A", generalCategory = Lu, combiningClass = 0, bidiClass = "L", bidiMirrored = False, decomposition = Self, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Just 'a', simpleTitleCaseMapping = Nothing}} Entry {range = SingleChar {start = '\168'}, details = CharDetails {name = "DIAERESIS", generalCategory = Sk, combiningClass = 0, bidiClass = "ON", bidiMirrored = False, decomposition = Decomposition {decompositionType = Compat, decompositionMapping = " \776"}, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Nothing, simpleTitleCaseMapping = Nothing}} Entry {range = CharRange {start = '\94208', end = '\100343'}, details = CharDetails {name = "Tangut Ideograph", generalCategory = Lo, combiningClass = 0, bidiClass = "L", bidiMirrored = False, decomposition = Self, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Nothing, simpleTitleCaseMapping = Nothing}}
Since: 0.1.0
An entry in UnicodeData.txt.
Since: 0.1.0
Constructors
| Entry | |
Fields
| |
data CharDetails Source #
Core characteristics of a Unicode code point
Since: 0.1.0
Constructors
| CharDetails | |
Fields
| |
Instances
| Show CharDetails Source # | |
Defined in Unicode.CharacterDatabase.Parser.UnicodeData Methods showsPrec :: Int -> CharDetails -> ShowS # show :: CharDetails -> String # showList :: [CharDetails] -> ShowS # | |
| Eq CharDetails Source # | |
Defined in Unicode.CharacterDatabase.Parser.UnicodeData | |
data GeneralCategory Source #
See: https://www.unicode.org/reports/tr44/#General_Category
Since: 0.1.0
Constructors
| Lu | Letter, Uppercase |
| Ll | Letter, Lowercase |
| Lt | Letter, Titlecase |
| Lm | Letter, Modifier |
| Lo | Letter, Other |
| Mn | Mark, Non-Spacing |
| Mc | Mark, Spacing Combining |
| Me | Mark, Enclosing |
| Nd | Number, Decimal |
| Nl | Number, Letter |
| No | Number, Other |
| Pc | Punctuation, Connector |
| Pd | Punctuation, Dash |
| Ps | Punctuation, Open |
| Pe | Punctuation, Close |
| Pi | Punctuation, Initial quote |
| Pf | Punctuation, Final quote |
| Po | Punctuation, Other |
| Sm | Symbol, Math |
| Sc | Symbol, Currency |
| Sk | Symbol, Modifier |
| So | Symbol, Other |
| Zs | Separator, Space |
| Zl | Separator, Line |
| Zp | Separator, Paragraph |
| Cc | Other, Control |
| Cf | Other, Format |
| Cs | Other, Surrogate |
| Co | Other, Private Use |
| Cn | Other, Not Assigned |
Bundled Patterns
| pattern DefaultGeneralCategory :: GeneralCategory |
Instances
data DecompositionType Source #
Constructors
| Canonical | |
| Compat | |
| Font | |
| NoBreak | |
| Initial | |
| Medial | |
| Final | |
| Isolated | |
| Circle | |
| Super | |
| Sub | |
| Vertical | |
| Wide | |
| Narrow | |
| Small | |
| Square | |
| Fraction |
Instances
| Show DecompositionType Source # | |
Defined in Unicode.CharacterDatabase.Parser.UnicodeData Methods showsPrec :: Int -> DecompositionType -> ShowS # show :: DecompositionType -> String # showList :: [DecompositionType] -> ShowS # | |
| Eq DecompositionType Source # | |
Defined in Unicode.CharacterDatabase.Parser.UnicodeData Methods (==) :: DecompositionType -> DecompositionType -> Bool # (/=) :: DecompositionType -> DecompositionType -> Bool # | |
data Decomposition Source #
Unicode decomposition of a code point
Since: 0.1.0
Constructors
| Self | |
| Decomposition | |
Fields | |
Instances
| Show Decomposition Source # | |
Defined in Unicode.CharacterDatabase.Parser.UnicodeData Methods showsPrec :: Int -> Decomposition -> ShowS # show :: Decomposition -> String # showList :: [Decomposition] -> ShowS # | |
| Eq Decomposition Source # | |
Defined in Unicode.CharacterDatabase.Parser.UnicodeData Methods (==) :: Decomposition -> Decomposition -> Bool # (/=) :: Decomposition -> Decomposition -> Bool # | |
data NumericValue Source #
Numeric value of a code point, if relevant
Since: 0.1.0
Constructors
| NotNumeric | |
| Integer !Integer | |
| Rational !Rational |
Instances
| Show NumericValue Source # | |
Defined in Unicode.CharacterDatabase.Parser.Internal Methods showsPrec :: Int -> NumericValue -> ShowS # show :: NumericValue -> String # showList :: [NumericValue] -> ShowS # | |
| Eq NumericValue Source # | |
Defined in Unicode.CharacterDatabase.Parser.Internal | |