-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Parsers for Unicode Character Database (UCD) files
--   
--   This package provides simple <a>ByteString</a> parsers for the
--   <a>Unicode character database</a> (UCD).
--   
--   See the <a>Unicode Standard Annex #44</a> for further details.
@package unicode-data-parser
@version 0.2.0


-- | Miscellaneous bits common to various parsers
module Unicode.CharacterDatabase.Parser.Common

-- | Parse a code point formatted as hexadecimal
--   
--   <i>Warning:</i> raise an error on invalid input.
--   
--   <pre>
--   &gt;&gt;&gt; parseCodePoint "0061"
--   'a'
--   </pre>
parseCodePoint :: ShortByteString -> Char

-- | Parse a list of code points
parseCodePointList :: ShortByteString -> [Char]

-- | A Unicode code point range
data CodePointRange
SingleChar :: !Char -> CodePointRange
[$sel:start:SingleChar] :: CodePointRange -> !Char
CharRange :: !Char -> !Char -> CodePointRange
[$sel:start:SingleChar] :: CodePointRange -> !Char
[$sel:end:SingleChar] :: CodePointRange -> !Char

-- | Numeric value of a code point, if relevant
data NumericValue
NotNumeric :: NumericValue
Integer :: !Integer -> NumericValue
Rational :: !Rational -> NumericValue

-- | Parse space-separated list, similar to <a>words</a>.
parseList :: ShortByteString -> [ShortByteString]


-- | <a>Default values</a> for properties.
module Unicode.CharacterDatabase.Parser.Properties.Defaults

-- | Default age
defaultAge :: ShortByteString

-- | Default general category
defaultGeneralCategory :: ShortByteString

-- | Default script
defaultScript :: ShortByteString


-- | Parser for properties files with <i>multiple</i> properties, such as:
--   
--   <ul>
--   <li><a>DerivedCoreProperties.txt</a></li>
--   <li><a>PropList.txt</a></li>
--   <li><a>DerivedNormalizationProps.txt</a></li>
--   </ul>
module Unicode.CharacterDatabase.Parser.Properties.Multiple

-- | A parser for properties files with multiple properties
parse :: ByteString -> [Entry]

-- | An entry from a properties file with multiple properties
--   
--   <pre>
--   &gt;&gt;&gt; parse "0009..000D    ; White_Space # Cc   [5] &lt;control-0009&gt;..&lt;control-000D&gt;"
--   [Entry {range = CharRange {start = '\t', end = '\r'}, property = "White_Space", value = Nothing}]
--   
--   &gt;&gt;&gt; parse "061C          ; Bidi_Control # Cf       ARABIC LETTER MARK"
--   [Entry {range = SingleChar {start = '\1564'}, property = "Bidi_Control", value = Nothing}]
--   
--   &gt;&gt;&gt; parse "037A  ; FC_NFKC; 0020 03B9      # Lm  GREEK YPOGEGRAMMENI"
--   [Entry {range = SingleChar {start = '\890'}, property = "FC_NFKC", value = Just "0020 03B9"}]
--   </pre>
data Entry
Entry :: !CodePointRange -> !ShortByteString -> !Maybe ShortByteString -> Entry
[$sel:range:Entry] :: Entry -> !CodePointRange
[$sel:property:Entry] :: Entry -> !ShortByteString
[$sel:value:Entry] :: Entry -> !Maybe ShortByteString
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.Properties.Multiple.Entry
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.Properties.Multiple.Entry


-- | Parser for properties files with a <i>single</i> property, such as:
--   
--   <ul>
--   <li><a>Scripts.txt</a></li>
--   <li><a>ScriptExtensions.txt</a></li>
--   <li><a>extracted/DerivedCombiningClass.txt</a></li>
--   </ul>
module Unicode.CharacterDatabase.Parser.Properties.Single

-- | A parser for properties files with one value per entry
--   
--   <pre>
--   &gt;&gt;&gt; parse "102E0         ; Arab Copt # Mn       COPTIC EPACT THOUSANDS MARK"
--   [Entry {range = SingleChar {start = '\66272'}, value = "Arab Copt"}]
--   
--   &gt;&gt;&gt; parse "1CF7          ; Beng # Mc       VEDIC SIGN ATIKRAMA"
--   [Entry {range = SingleChar {start = '\7415'}, value = "Beng"}]
--   
--   &gt;&gt;&gt; parse "1CDE..1CDF    ; Deva # Mn   [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW"
--   [Entry {range = CharRange {start = '\7390', end = '\7391'}, value = "Deva"}]
--   
--   &gt;&gt;&gt; parse "1CD0          ; Beng Deva Gran Knda # Mn       VEDIC TONE KARSHANA"
--   [Entry {range = SingleChar {start = '\7376'}, value = "Beng Deva Gran Knda"}]
--   </pre>
parse :: ByteString -> [Entry]

-- | An entry from a properties file with one value per entry
data Entry
Entry :: !CodePointRange -> !ShortByteString -> Entry
[$sel:range:Entry] :: Entry -> !CodePointRange
[$sel:value:Entry] :: Entry -> !ShortByteString

-- | A parser for properties files with multiple values per entry
parseMultipleValues :: ByteString -> [EntryMultipleValues]

-- | An entry from a properties file with multiple values per entry
data EntryMultipleValues
EntryMultipleValues :: !CodePointRange -> !NonEmpty ShortByteString -> EntryMultipleValues
[$sel:range:EntryMultipleValues] :: EntryMultipleValues -> !CodePointRange
[$sel:values:EntryMultipleValues] :: EntryMultipleValues -> !NonEmpty ShortByteString
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.Properties.Single.Entry
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.Properties.Single.Entry
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.Properties.Single.EntryMultipleValues
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.Properties.Single.EntryMultipleValues


-- | Parser for <a>UnicodeData.txt</a>.
module Unicode.CharacterDatabase.Parser.UnicodeData

-- | Parser for <a>UnicodeData.txt file</a>
--   
--   <pre>
--   &gt;&gt;&gt; :{
--   traverse_ print . parse $
--     "0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;\n\
--     \00A8;DIAERESIS;Sk;0;ON;&lt;compat&gt; 0020 0308;;;;N;SPACING DIAERESIS;;;;\n\
--     \17000;&lt;Tangut Ideograph, First&gt;;Lo;0;L;;;;;N;;;;;\n\
--     \187F7;&lt;Tangut Ideograph, Last&gt;;Lo;0;L;;;;;N;;;;;\n"
--   :}
--   Entry {range = SingleChar {start = 'A'}, details = CharDetails {name = "LATIN CAPITAL LETTER A", generalCategory = Lu, combiningClass = 0, bidiClass = "L", bidiMirrored = False, decomposition = Self, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Just 'a', simpleTitleCaseMapping = Nothing}}
--   Entry {range = SingleChar {start = '\168'}, details = CharDetails {name = "DIAERESIS", generalCategory = Sk, combiningClass = 0, bidiClass = "ON", bidiMirrored = False, decomposition = Decomposition {decompositionType = Compat, decompositionMapping = " \776"}, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Nothing, simpleTitleCaseMapping = Nothing}}
--   Entry {range = CharRange {start = '\94208', end = '\100343'}, details = CharDetails {name = "Tangut Ideograph", generalCategory = Lo, combiningClass = 0, bidiClass = "L", bidiMirrored = False, decomposition = Self, numericValue = NotNumeric, simpleUpperCaseMapping = Nothing, simpleLowerCaseMapping = Nothing, simpleTitleCaseMapping = Nothing}}
--   </pre>
parse :: ByteString -> [Entry]

-- | An entry in <tt>UnicodeData.txt</tt>.
data Entry
Entry :: !CodePointRange -> !CharDetails -> Entry
[$sel:range:Entry] :: Entry -> !CodePointRange
[$sel:details:Entry] :: Entry -> !CharDetails

-- | Core characteristics of a Unicode code point
data CharDetails
CharDetails :: !ShortByteString -> !GeneralCategory -> !Word8 -> !ShortByteString -> !Bool -> !Decomposition -> !NumericValue -> !Maybe Char -> !Maybe Char -> !Maybe Char -> CharDetails

-- | In case of a range, the range’s name. It is better to use the names
--   from <tt>DerivedName.txt</tt>.
[$sel:name:CharDetails] :: CharDetails -> !ShortByteString
[$sel:generalCategory:CharDetails] :: CharDetails -> !GeneralCategory

-- | Value in the range 0..254
[$sel:combiningClass:CharDetails] :: CharDetails -> !Word8
[$sel:bidiClass:CharDetails] :: CharDetails -> !ShortByteString
[$sel:bidiMirrored:CharDetails] :: CharDetails -> !Bool
[$sel:decomposition:CharDetails] :: CharDetails -> !Decomposition
[$sel:numericValue:CharDetails] :: CharDetails -> !NumericValue
[$sel:simpleUpperCaseMapping:CharDetails] :: CharDetails -> !Maybe Char
[$sel:simpleLowerCaseMapping:CharDetails] :: CharDetails -> !Maybe Char
[$sel:simpleTitleCaseMapping:CharDetails] :: CharDetails -> !Maybe Char

-- | See: <a>https://www.unicode.org/reports/tr44/#General_Category</a>
data GeneralCategory

-- | Letter, Uppercase
Lu :: GeneralCategory

-- | Letter, Lowercase
Ll :: GeneralCategory

-- | Letter, Titlecase
Lt :: GeneralCategory

-- | Letter, Modifier
Lm :: GeneralCategory

-- | Letter, Other
Lo :: GeneralCategory

-- | Mark, Non-Spacing
Mn :: GeneralCategory

-- | Mark, Spacing Combining
Mc :: GeneralCategory

-- | Mark, Enclosing
Me :: GeneralCategory

-- | Number, Decimal
Nd :: GeneralCategory

-- | Number, Letter
Nl :: GeneralCategory

-- | Number, Other
No :: GeneralCategory

-- | Punctuation, Connector
Pc :: GeneralCategory

-- | Punctuation, Dash
Pd :: GeneralCategory

-- | Punctuation, Open
Ps :: GeneralCategory

-- | Punctuation, Close
Pe :: GeneralCategory

-- | Punctuation, Initial quote
Pi :: GeneralCategory

-- | Punctuation, Final quote
Pf :: GeneralCategory

-- | Punctuation, Other
Po :: GeneralCategory

-- | Symbol, Math
Sm :: GeneralCategory

-- | Symbol, Currency
Sc :: GeneralCategory

-- | Symbol, Modifier
Sk :: GeneralCategory

-- | Symbol, Other
So :: GeneralCategory

-- | Separator, Space
Zs :: GeneralCategory

-- | Separator, Line
Zl :: GeneralCategory

-- | Separator, Paragraph
Zp :: GeneralCategory

-- | Other, Control
Cc :: GeneralCategory

-- | Other, Format
Cf :: GeneralCategory

-- | Other, Surrogate
Cs :: GeneralCategory

-- | Other, Private Use
Co :: GeneralCategory

-- | Other, Not Assigned
Cn :: GeneralCategory
pattern DefaultGeneralCategory :: GeneralCategory

-- | See:
--   <a>https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings</a>
data DecompositionType
Canonical :: DecompositionType
Compat :: DecompositionType
Font :: DecompositionType
NoBreak :: DecompositionType
Initial :: DecompositionType
Medial :: DecompositionType
Final :: DecompositionType
Isolated :: DecompositionType
Circle :: DecompositionType
Super :: DecompositionType
Sub :: DecompositionType
Vertical :: DecompositionType
Wide :: DecompositionType
Narrow :: DecompositionType
Small :: DecompositionType
Square :: DecompositionType
Fraction :: DecompositionType

-- | Unicode decomposition of a code point
data Decomposition
Self :: Decomposition
Decomposition :: !DecompositionType -> ![Char] -> Decomposition
[$sel:decompositionType:Self] :: Decomposition -> !DecompositionType
[$sel:decompositionMapping:Self] :: Decomposition -> ![Char]

-- | Numeric value of a code point, if relevant
data NumericValue
NotNumeric :: NumericValue
Integer :: !Integer -> NumericValue
Rational :: !Rational -> NumericValue
instance GHC.Read.Read Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory
instance GHC.Enum.Enum Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory
instance GHC.Enum.Bounded Unicode.CharacterDatabase.Parser.UnicodeData.GeneralCategory
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.DecompositionType
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.DecompositionType
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.Decomposition
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.Decomposition
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.CharDetails
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.CharDetails
instance GHC.Show.Show Unicode.CharacterDatabase.Parser.UnicodeData.Entry
instance GHC.Classes.Eq Unicode.CharacterDatabase.Parser.UnicodeData.Entry