-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Unicode 3.2.0 character properties -- -- Unicode 3.2.0 character properties @package unicode-properties @version 3.2.0.0 module Data.Char.Properties.Decomposition data DecompositionType DCCanonical :: DecompositionType DCFont :: DecompositionType DCNoBreak :: DecompositionType DCInitial :: DecompositionType DCMedial :: DecompositionType DCFinal :: DecompositionType DCIsolated :: DecompositionType DCCircle :: DecompositionType DCSuper :: DecompositionType DCSub :: DecompositionType DCVertical :: DecompositionType DCWide :: DecompositionType DCNarrow :: DecompositionType DCSmall :: DecompositionType DCSquare :: DecompositionType DCFraction :: DecompositionType DCCompat :: DecompositionType data Decomposition NoDecomposition :: Decomposition MkDecomposition :: DecompositionType -> [Char] -> Decomposition getDecomposition :: Char -> Decomposition extractDecompositionCanonical :: Decomposition -> Maybe [Char] decomposeCanonical :: String -> String extractDecompositionCompatibility :: Decomposition -> Maybe [Char] decomposeCompatibility :: String -> String module Data.Char.Properties.Case toUpperCase :: Char -> Char toLowerCase :: Char -> Char toTitleCase :: Char -> Char module Data.Char.Properties.BidiCategory data BidiCategory BidiL :: BidiCategory BidiLRE :: BidiCategory BidiLRO :: BidiCategory BidiR :: BidiCategory BidiAL :: BidiCategory BidiRLE :: BidiCategory BidiRLO :: BidiCategory BidiPDF :: BidiCategory BidiEN :: BidiCategory BidiES :: BidiCategory BidiET :: BidiCategory BidiAN :: BidiCategory BidiCS :: BidiCategory BidiNSM :: BidiCategory BidiBN :: BidiCategory BidiB :: BidiCategory BidiS :: BidiCategory BidiWS :: BidiCategory BidiON :: BidiCategory getBidiCategory :: Char -> BidiCategory module Data.Char.Properties.GeneralCategory data GeneralCategory GcLu :: GeneralCategory GcLl :: GeneralCategory GcLt :: GeneralCategory GcLm :: GeneralCategory GcLo :: GeneralCategory GcMn :: GeneralCategory GcMc :: GeneralCategory GcMe :: GeneralCategory GcNd :: GeneralCategory GcNl :: GeneralCategory GcNo :: GeneralCategory GcZs :: GeneralCategory GcZl :: GeneralCategory GcZp :: GeneralCategory GcPc :: GeneralCategory GcPd :: GeneralCategory GcPs :: GeneralCategory GcPe :: GeneralCategory GcPi :: GeneralCategory GcPf :: GeneralCategory GcPo :: GeneralCategory GcSm :: GeneralCategory GcSc :: GeneralCategory GcSk :: GeneralCategory GcSo :: GeneralCategory GcCc :: GeneralCategory GcCf :: GeneralCategory GcCs :: GeneralCategory GcCo :: GeneralCategory GcCn :: GeneralCategory getGeneralCategory :: Char -> GeneralCategory data GCMajorClass ClLetter :: GCMajorClass ClMark :: GCMajorClass ClNumber :: GCMajorClass ClSeparator :: GCMajorClass ClPunctuation :: GCMajorClass ClSymbol :: GCMajorClass ClOther :: GCMajorClass gcMajorClass :: GeneralCategory -> GCMajorClass instance Eq GCMajorClass instance Show GCMajorClass instance Show GeneralCategory module Data.Char.Properties.Misc getCombiningClass :: Char -> Word8 getDecimalDigit :: Char -> Maybe Word8 getDigit :: Char -> Maybe Word8 getNumber :: Char -> Maybe Rational isMirrored :: Char -> Bool -- | Space characters and those format control characters (such as TAB, CR -- and LF) which should be treated by programming languages as "white -- space" for the purpose of parsing elements. Note: ZERO WIDTH SPACE and -- ZERO WIDTH NO-BREAK SPACE are not included, since their functions are -- restricted to line-break control. Their names are unfortunately -- misleading in this respect. Note: There are other senses of -- "whitespace" that encompass a different set of characters. isWhiteSpace :: Char -> Bool -- | Those format control characters which have specific functions in the -- Bidirectional Algorithm. isBidiControl :: Char -> Bool -- | Those format control characters which have specific functions for -- control of cursive joining and ligation. isJoinControl :: Char -> Bool -- | Those punctuation characters explicitly called out as dashes in the -- Unicode Standard, plus compatibility equivalents to those. Most of -- these have the Pd General Category, but some have the Sm General -- Category because of their use in mathematics. isDash :: Char -> Bool -- | Those dashes used to mark connections between pieces of words, plus -- the Katakana middle dot. The Katakana middle dot functions like a -- hyphen, but is shaped like a dot rather than a dash. isHyphen :: Char -> Bool -- | Those punctuation characters that function as quotation marks. isQuotationMark :: Char -> Bool -- | Those punctuation characters that generally mark the end of textual -- units. isTerminalPunctuation :: Char -> Bool -- | Used in deriving the Math property. isOtherMath :: Char -> Bool -- | Characters commonly used for the representation of hexadecimal -- numbers, plus their compatibility equivalents. isHexDigit :: Char -> Bool -- | ASCII characters commonly used for the representation of hexadecimal -- numbers. isASCIIHexDigit :: Char -> Bool -- | Used in deriving the Alphabetic property. isOtherAlphabetic :: Char -> Bool -- | Characters considered to be CJKV (Chinese, Japanese, Korean, and -- Vietnamese) ideographs. isIdeographic :: Char -> Bool -- | Characters that linguistically modify the meaning of another character -- to which they apply. Some diacritics are not combining characters, and -- some combining characters are not diacritics. isDiacritic :: Char -> Bool -- | Characters whose principal function is to extend the value or shape of -- a preceding alphabetic character. Typical of these are length and -- iteration marks. isExtender :: Char -> Bool -- | Used in deriving the Lowercase property. isOtherLowercase :: Char -> Bool -- | Used in deriving the Uppercase property. isOtherUppercase :: Char -> Bool -- | Code points that are explicitly defined as illegal for the encoding of -- characters. See Unicode 3.1 for more information. isNoncharacterCodePoint :: Char -> Bool -- | Used in deriving the Grapheme_Extend property. isOtherGraphemeExtend :: Char -> Bool -- | Used in determining default grapheme cluster boundaries. For more -- information, see UTR #29: Text Boundaries (in proposed draft status at -- publication of Unicode 3.2). isGraphemeLink :: Char -> Bool -- | For a machine-readable list of Ideographic Description Sequences. For -- more information, see Unicode 3.2. isIDSBinaryOperator :: Char -> Bool -- | For a machine-readable list of Ideographic Description Sequences. For -- more information, see Unicode 3.2. isIDSTrinaryOperator :: Char -> Bool -- | For a machine-readable list of Ideographic Description Sequences. For -- more information, see Unicode 3.2. isRadical :: Char -> Bool -- | For a machine-readable list of Ideographic Description Sequences. For -- more information, see Unicode 3.2. isUnifiedIdeograph :: Char -> Bool -- | Used in deriving the Default_Ignorable_Code_Point property. isOtherDefaultIgnorableCodePoint :: Char -> Bool -- | For a machine-readable list of deprecated characters. No characters -- will ever be removed from the standard, but the usage of deprecated -- characters is strongly discouraged. For more information, see Unicode -- 3.2. isDeprecated :: Char -> Bool -- | Characters with a "soft dot", like i or j. An accent placed on these -- characters causes the dot to disappear. An explicit dot above can be -- added where required, such as in Lithuanian. For more information, see -- Unicode 3.0, Chapter 7, Diacritics on i and j isSoftDotted :: Char -> Bool -- | There are a small number of characters that do not use logical order. -- These characters require special handling in most processing. For more -- information, see Unicode 3.2. isLogicalOrderException :: Char -> Bool -- | Combining Grapheme Joiner character. isCGJ :: Char -> Bool -- | Characters with the Math property. For more information, see Chapter -- 4, Character Properties. -- -- Math = Sm + Other_Math. isMath :: Char -> Bool -- | Characters with the Alphabetic property. For more information, see -- Chapter 4, Character Properties. -- -- Alphabetic = Lu+Ll+Lt+Lm+Lo+ Other_Alphabetic. isAlphabetic :: Char -> Bool -- | Characters with the Lowercase property. For more information, see -- Chapter 4, Character Properties and UAX #21: Case Mappings. -- -- Lowercase = Ll + Other_Lowercase. isLowercase :: Char -> Bool -- | Characters with the Uppercase property. For more information, see -- Chapter 4, Character Properties and UAX #21: Case Mappings. -- -- Uppercase = Lu + Other_Uppercase. isUppercase :: Char -> Bool -- | Characters that can start an identifier. -- -- ID_Start = Lu+Ll+Lt+Lm+Lo+Nl. isIDStart :: Char -> Bool -- | Characters that can continue an identifier. See Cf Note. -- -- ID_Continue = ID_Start + Mn+Mc+Nd+Pc. isIDContinue :: Char -> Bool -- | For programmatic determination of default-ignorable code points. New -- characters that should be ignored in processing (unless explicitly -- supported) will be assigned in these ranges, permitting programs to -- correctly handle the default behavior of such characters when not -- otherwise supported. For more information, see UTR #29: Text -- Boundaries (in proposed draft status at release time for Unicode 3.2). -- -- Default_Ignorable_Code_Point = Other_Default_Ignorable_Code_Point + Cf -- + Cc + Cs - White_Space. isDefaultIgnorableCodePoint :: Char -> Bool -- | For programmatic determination of grapheme cluster boundaries. For -- more information, see UTR #29: Text Boundaries (in proposed draft -- status at publication of Unicode 3.2). -- -- Grapheme_Base = [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - -- Grapheme_Extend - Grapheme_Link - CGJ. isGraphemeBase :: Char -> Bool -- | For programmatic determination of grapheme cluster boundaries. For -- more information, see UTR #29: Text Boundaries (in proposed draft -- status at publication of Unicode 3.2). -- -- Grapheme_Extend = Me + Mn + Mc + Other_Grapheme_Extend - Grapheme_Link -- - CGJ. isGraphemeExtend :: Char -> Bool -- | Returns true if the general category is Lt. isTitlecase :: Char -> Bool isLineBreak :: Char -> Bool module Data.Char.Properties -- | This is Unicode version 3.2.0 or later. data Version_Unicode_AtLeast_3_2_0 -- | This is Unicode version 3.2.0. data Version_Unicode_Exactly_3_2_0 -- | This is the Unicode version. ([3,2,0]) version_Unicode :: [Int]