-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Parsers for Unicode Character Database (UCD) files -- -- This package provides simple ByteString parsers for the -- Unicode character database (UCD). -- -- See the Unicode Standard Annex #44 for further details. @package unicode-data-parser @version 0.2.0 -- | Miscellaneous bits common to various parsers module Unicode.CharacterDatabase.Parser.Common -- | Parse a code point formatted as hexadecimal -- -- Warning: raise an error on invalid input. -- --
--   >>> parseCodePoint "0061"
--   'a'
--   
parseCodePoint :: ShortByteString -> Char -- | Parse a list of code points parseCodePointList :: ShortByteString -> [Char] -- | A Unicode code point range data CodePointRange SingleChar :: !Char -> CodePointRange [$sel:start:SingleChar] :: CodePointRange -> !Char CharRange :: !Char -> !Char -> CodePointRange [$sel:start:SingleChar] :: CodePointRange -> !Char [$sel:end:SingleChar] :: CodePointRange -> !Char -- | Numeric value of a code point, if relevant data NumericValue NotNumeric :: NumericValue Integer :: !Integer -> NumericValue Rational :: !Rational -> NumericValue -- | Parse space-separated list, similar to words. parseList :: ShortByteString -> [ShortByteString] -- | Default values for properties. module Unicode.CharacterDatabase.Parser.Properties.Defaults -- | Default age defaultAge :: ShortByteString -- | Default general category defaultGeneralCategory :: ShortByteString -- | Default script defaultScript :: ShortByteString -- | Parser for properties files with multiple properties, such as: -- -- module Unicode.CharacterDatabase.Parser.Properties.Multiple -- | A parser for properties files with multiple properties parse :: ByteString -> [Entry] -- | An entry from a properties file with multiple properties -- --
--   >>> parse "0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>"
--   [Entry {range = CharRange {start = '\t', end = '\r'}, property = "White_Space", value = Nothing}]
--   
--   >>> parse "061C          ; Bidi_Control # Cf       ARABIC LETTER MARK"
--   [Entry {range = SingleChar {start = '\1564'}, property = "Bidi_Control", value = Nothing}]
--   
--   >>> parse "037A  ; FC_NFKC; 0020 03B9      # Lm  GREEK YPOGEGRAMMENI"
--   [Entry {range = SingleChar {start = '\890'}, property = "FC_NFKC", value = Just "0020 03B9"}]
--   
data Entry Entry :: !CodePointRange -> !ShortByteString -> !Maybe ShortByteString -> Entry [$sel:range:Entry] :: Entry -> !CodePointRange [$sel:property:Entry] :: Entry -> !ShortByteString [$sel:value:Entry] :: Entry -> !Maybe ShortByteString instance GHC.Show.Show Unicode.CharacterDatabase.Parser.Properties.Multiple.Entry instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.Properties.Multiple.Entry -- | Parser for properties files with a single property, such as: -- -- module Unicode.CharacterDatabase.Parser.Properties.Single -- | A parser for properties files with one value per entry -- --
--   >>> parse "102E0         ; Arab Copt # Mn       COPTIC EPACT THOUSANDS MARK"
--   [Entry {range = SingleChar {start = '\66272'}, value = "Arab Copt"}]
--   
--   >>> parse "1CF7          ; Beng # Mc       VEDIC SIGN ATIKRAMA"
--   [Entry {range = SingleChar {start = '\7415'}, value = "Beng"}]
--   
--   >>> parse "1CDE..1CDF    ; Deva # Mn   [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW"
--   [Entry {range = CharRange {start = '\7390', end = '\7391'}, value = "Deva"}]
--   
--   >>> parse "1CD0          ; Beng Deva Gran Knda # Mn       VEDIC TONE KARSHANA"
--   [Entry {range = SingleChar {start = '\7376'}, value = "Beng Deva Gran Knda"}]
--   
parse :: ByteString -> [Entry] -- | An entry from a properties file with one value per entry data Entry Entry :: !CodePointRange -> !ShortByteString -> Entry [$sel:range:Entry] :: Entry -> !CodePointRange [$sel:value:Entry] :: Entry -> !ShortByteString -- | A parser for properties files with multiple values per entry parseMultipleValues :: ByteString -> [EntryMultipleValues] -- | An entry from a properties file with multiple values per entry data EntryMultipleValues EntryMultipleValues :: !CodePointRange -> !NonEmpty ShortByteString -> EntryMultipleValues [$sel:range:EntryMultipleValues] :: EntryMultipleValues -> !CodePointRange [$sel:values:EntryMultipleValues] :: EntryMultipleValues -> !NonEmpty ShortByteString instance GHC.Show.Show Unicode.CharacterDatabase.Parser.Properties.Single.Entry instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.Properties.Single.Entry instance GHC.Show.Show Unicode.CharacterDatabase.Parser.Properties.Single.EntryMultipleValues instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.Properties.Single.EntryMultipleValues -- | Parser for UnicodeData.txt. module Unicode.CharacterDatabase.Parser.UnicodeData -- | Parser for UnicodeData.txt file -- --
--   >>> :{
--   traverse_ print . parse $
--     "0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;\n\
--     \00A8;DIAERESIS;Sk;0;ON;<compat> 0020 0308;;;;N;SPACING DIAERESIS;;;;\n\
--     \17000;<Tangut Ideograph, First>;Lo;0;L;;;;;N;;;;;\n\
--     \187F7;<Tangut Ideograph, Last>;Lo;0;L;;;;;N;;;;;\n"
--   :}
--   Entry {range = SingleChar {start = 'A'}, details = CharDetails {name = "LATIN CAPITAL LETTER A", generalCategory = Lu, combiningClass = 0, bidiClass = "L", bidiMirrored = False, decomposition = Self, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Just 'a', simpleTitleCaseMapping = Nothing}}
--   Entry {range = SingleChar {start = '\168'}, details = CharDetails {name = "DIAERESIS", generalCategory = Sk, combiningClass = 0, bidiClass = "ON", bidiMirrored = False, decomposition = Decomposition {decompositionType = Compat, decompositionMapping = " \776"}, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Nothing, simpleTitleCaseMapping = Nothing}}
--   Entry {range = CharRange {start = '\94208', end = '\100343'}, details = CharDetails {name = "Tangut Ideograph", generalCategory = Lo, combiningClass = 0, bidiClass = "L", bidiMirrored = False, decomposition = Self, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Nothing, simpleTitleCaseMapping = Nothing}}
--   
parse :: ByteString -> [Entry] -- | An entry in UnicodeData.txt. data Entry Entry :: !CodePointRange -> !CharDetails -> Entry [$sel:range:Entry] :: Entry -> !CodePointRange [$sel:details:Entry] :: Entry -> !CharDetails -- | Core characteristics of a Unicode code point data CharDetails CharDetails :: !ShortByteString -> !GeneralCategory -> !Word8 -> !ShortByteString -> !Bool -> !Decomposition -> !NumericValue -> !Maybe Char -> !Maybe Char -> !Maybe Char -> CharDetails -- | In case of a range, the range’s name. It is better to use the names -- from DerivedName.txt. [$sel:name:CharDetails] :: CharDetails -> !ShortByteString [$sel:generalCategory:CharDetails] :: CharDetails -> !GeneralCategory -- | Value in the range 0..254 [$sel:combiningClass:CharDetails] :: CharDetails -> !Word8 [$sel:bidiClass:CharDetails] :: CharDetails -> !ShortByteString [$sel:bidiMirrored:CharDetails] :: CharDetails -> !Bool [$sel:decomposition:CharDetails] :: CharDetails -> !Decomposition [$sel:numericValue:CharDetails] :: CharDetails -> !NumericValue [$sel:simpleUpperCaseMapping:CharDetails] :: CharDetails -> !Maybe Char [$sel:simpleLowerCaseMapping:CharDetails] :: CharDetails -> !Maybe Char [$sel:simpleTitleCaseMapping:CharDetails] :: CharDetails -> !Maybe Char -- | See: https://www.unicode.org/reports/tr44/#General_Category data GeneralCategory -- | Letter, Uppercase Lu :: GeneralCategory -- | Letter, Lowercase Ll :: GeneralCategory -- | Letter, Titlecase Lt :: GeneralCategory -- | Letter, Modifier Lm :: GeneralCategory -- | Letter, Other Lo :: GeneralCategory -- | Mark, Non-Spacing Mn :: GeneralCategory -- | Mark, Spacing Combining Mc :: GeneralCategory -- | Mark, Enclosing Me :: GeneralCategory -- | Number, Decimal Nd :: GeneralCategory -- | Number, Letter Nl :: GeneralCategory -- | Number, Other No :: GeneralCategory -- | Punctuation, Connector Pc :: GeneralCategory -- | Punctuation, Dash Pd :: GeneralCategory -- | Punctuation, Open Ps :: GeneralCategory -- | Punctuation, Close Pe :: GeneralCategory -- | Punctuation, Initial quote Pi :: GeneralCategory -- | Punctuation, Final quote Pf :: GeneralCategory -- | Punctuation, Other Po :: GeneralCategory -- | Symbol, Math Sm :: GeneralCategory -- | Symbol, Currency Sc :: GeneralCategory -- | Symbol, Modifier Sk :: GeneralCategory -- | Symbol, Other So :: GeneralCategory -- | Separator, Space Zs :: GeneralCategory -- | Separator, Line Zl :: GeneralCategory -- | Separator, Paragraph Zp :: GeneralCategory -- | Other, Control Cc :: GeneralCategory -- | Other, Format Cf :: GeneralCategory -- | Other, Surrogate Cs :: GeneralCategory -- | Other, Private Use Co :: GeneralCategory -- | Other, Not Assigned Cn :: GeneralCategory pattern DefaultGeneralCategory :: GeneralCategory -- | See: -- https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings data DecompositionType Canonical :: DecompositionType Compat :: DecompositionType Font :: DecompositionType NoBreak :: DecompositionType Initial :: DecompositionType Medial :: DecompositionType Final :: DecompositionType Isolated :: DecompositionType Circle :: DecompositionType Super :: DecompositionType Sub :: DecompositionType Vertical :: DecompositionType Wide :: DecompositionType Narrow :: DecompositionType Small :: DecompositionType Square :: DecompositionType Fraction :: DecompositionType -- | Unicode decomposition of a code point data Decomposition Self :: Decomposition Decomposition :: !DecompositionType -> ![Char] -> Decomposition [$sel:decompositionType:Self] :: Decomposition -> !DecompositionType [$sel:decompositionMapping:Self] :: Decomposition -> ![Char] -- | Numeric value of a code point, if relevant data NumericValue NotNumeric :: NumericValue Integer :: !Integer -> NumericValue Rational :: !Rational -> NumericValue instance GHC.Read.Read Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory instance GHC.Enum.Enum Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory instance GHC.Enum.Bounded Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.DecompositionType instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.DecompositionType instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.Decomposition instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.Decomposition instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.CharDetails instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.CharDetails instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.Entry instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.Entry