module Data.Text.ICU.Char
(
Property
, BidiClass_(..)
, Block_(..)
, Bool_(..)
, Decomposition_(..)
, EastAsianWidth_(..)
, GeneralCategory_(..)
, HangulSyllableType_(..)
, JoiningGroup_(..)
, JoiningType_(..)
, NumericType_(..)
, CanonicalCombiningClass_(..)
, LeadCanonicalCombiningClass_(..)
, TrailingCanonicalCombiningClass_(..)
, NFCQuickCheck_(..)
, NFDQuickCheck_(..)
, NFKCQuickCheck_(..)
, NFKDQuickCheck_(..)
, GraphemeClusterBreak_(..)
, LineBreak_(..)
, SentenceBreak_(..)
, WordBreak_(..)
, BlockCode(..)
, Direction(..)
, Decomposition(..)
, EastAsianWidth(..)
, GeneralCategory(..)
, HangulSyllableType(..)
, JoiningGroup(..)
, JoiningType(..)
, NumericType(..)
, GraphemeClusterBreak(..)
, LineBreak(..)
, SentenceBreak(..)
, WordBreak(..)
, blockCode
, charFullName
, charName
, charFromFullName
, charFromName
, combiningClass
, direction
, property
, isoComment
, isMirrored
, mirror
, digitToInt
, numericValue
) where
import Data.Char (chr, ord)
import Data.Int (Int32)
import Data.Text.ICU.Error (u_INVALID_CHAR_FOUND)
import Data.Text.ICU.Error.Internal (UErrorCode, handleOverflowError, withError)
import Data.Text.ICU.Internal (UBool, UChar32, asBool)
import Data.Text.ICU.Normalize.Internal (toNCR)
import Data.Typeable (Typeable)
import Data.Word (Word8)
import Foreign.C.String (CString, peekCStringLen, withCString)
import Foreign.C.Types (CInt(..))
import Foreign.Ptr (Ptr)
import System.IO.Unsafe (unsafePerformIO)
data Direction =
LeftToRight
| RightToLeft
| EuropeanNumber
| EuropeanNumberSeparator
| EuropeanNumberTerminator
| ArabicNumber
| CommonNumberSeparator
| BlockSeparator
| SegmentSeparator
| WhiteSpaceNeutral
| OtherNeutral
| LeftToRightEmbedding
| LeftToRightOverride
| RightToLeftArabic
| RightToLeftEmbedding
| RightToLeftOverride
| PopDirectionalFormat
| DirNonSpacingMark
| BoundaryNeutral
deriving (Eq, Enum, Show, Typeable)
data BlockCode =
NoBlock
| BasicLatin
| Latin1Supplement
| LatinExtendedA
| LatinExtendedB
| IPAExtensions
| SpacingModifierLetters
| CombiningDiacriticalMarks
| GreekAndCoptic
| Cyrillic
| Armenian
| Hebrew
| Arabic
| Syriac
| Thaana
| Devanagari
| Bengali
| Gurmukhi
| Gujarati
| Oriya
| Tamil
| Telugu
| Kannada
| Malayalam
| Sinhala
| Thai
| Lao
| Tibetan
| Myanmar
| Georgian
| HangulJamo
| Ethiopic
| Cherokee
| UnifiedCanadianAboriginalSyllabics
| Ogham
| Runic
| Khmer
| Mongolian
| LatinExtendedAdditional
| GreekExtended
| GeneralPunctuation
| SuperscriptsAndSubscripts
| CurrencySymbols
| CombiningDiacriticalMarksForSymbols
| LetterlikeSymbols
| NumberForms
| Arrows
| MathematicalOperators
| MiscellaneousTechnical
| ControlPictures
| OpticalCharacterRecognition
| EnclosedAlphanumerics
| BoxDrawing
| BlockElements
| GeometricShapes
| MiscellaneousSymbols
| Dingbats
| BraillePatterns
| CJKRadicalsSupplement
| KangxiRadicals
| IdeographicDescriptionCharacters
| CJKSymbolsAndPunctuation
| Hiragana
| Katakana
| Bopomofo
| HangulCompatibilityJamo
| Kanbun
| BopomofoExtended
| EnclosedCJKLettersAndMonths
| CJKCompatibility
| CJKUnifiedIdeographsExtensionA
| CJKUnifiedIdeographs
| YiSyllables
| YiRadicals
| HangulSyllables
| HighSurrogates
| HighPrivateUseSurrogates
| LowSurrogates
| PrivateUseArea
| CJKCompatibilityIdeographs
| AlphabeticPresentationForms
| ArabicPresentationFormsA
| CombiningHalfMarks
| CJKCompatibilityForms
| SmallFormVariants
| ArabicPresentationFormsB
| Specials
| HalfwidthAndFullwidthForms
| OldItalic
| Gothic
| Deseret
| ByzantineMusicalSymbols
| MusicalSymbols
| MathematicalAlphanumericSymbols
| CJKUnifiedIdeographsExtensionB
| CJKCompatibilityIdeographsSupplement
| Tags
| CyrillicSupplement
| Tagalog
| Hanunoo
| Buhid
| Tagbanwa
| MiscellaneousMathematicalSymbolsA
| SupplementalArrowsA
| SupplementalArrowsB
| MiscellaneousMathematicalSymbolsB
| SupplementalMathematicalOperators
| KatakanaPhoneticExtensions
| VariationSelectors
| SupplementaryPrivateUseAreaA
| SupplementaryPrivateUseAreaB
| Limbu
| TaiLe
| KhmerSymbols
| PhoneticExtensions
| MiscellaneousSymbolsAndArrows
| YijingHexagramSymbols
| LinearBSyllabary
| LinearBIdeograms
| AegeanNumbers
| Ugaritic
| Shavian
| Osmanya
| CypriotSyllabary
| TaiXuanJingSymbols
| VariationSelectorsSupplement
| AncientGreekMusicalNotation
| AncientGreekNumbers
| ArabicSupplement
| Buginese
| CJKStrokes
| CombiningDiacriticalMarksSupplement
| Coptic
| EthiopicExtended
| EthiopicSupplement
| GeorgianSupplement
| Glagolitic
| Kharoshthi
| ModifierToneLetters
| NewTaiLue
| OldPersian
| PhoneticExtensionsSupplement
| SupplementalPunctuation
| SylotiNagri
| Tifinagh
| VerticalForms
| N'Ko
| Balinese
| LatinExtendedC
| LatinExtendedD
| PhagsPa
| Phoenician
| Cuneiform
| CuneiformNumbersAndPunctuation
| CountingRodNumerals
| Sundanese
| Lepcha
| OlChiki
| CyrillicExtendedA
| Vai
| CyrillicExtendedB
| Saurashtra
| KayahLi
| Rejang
| Cham
| AncientSymbols
| PhaistosDisc
| Lycian
| Carian
| Lydian
| MahjongTiles
| DominoTiles
deriving (Eq, Enum, Show, Typeable)
data Bool_ =
Alphabetic
| ASCIIHexDigit
| BidiControl
| BidiMirrored
| Dash
| DefaultIgnorable
| Deprecated
| Diacritic
| Extender
| FullCompositionExclusion
| GraphemeBase
| GraphemeExtend
| GraphemeLink
| HexDigit
| Hyphen
| IDContinue
| IDStart
| Ideographic
| IDSBinaryOperator
| IDSTrinaryOperator
| JoinControl
| LogicalOrderException
| Lowercase
| Math
| NonCharacter
| QuotationMark
| Radical
| SoftDotted
| TerminalPunctuation
| UnifiedIdeograph
| Uppercase
| WhiteSpace
| XidContinue
| XidStart
| CaseSensitive
| STerm
| VariationSelector
| NFDInert
| NFKDInert
| NFCInert
| NFKCInert
| SegmentStarter
| PatternSyntax
| PatternWhiteSpace
| POSIXAlNum
| POSIXBlank
| POSIXGraph
| POSIXPrint
| POSIXXDigit
deriving (Eq, Enum, Show, Typeable)
class Property p v | p -> v where
fromNative :: p -> Int32 -> v
toUProperty :: p -> UProperty
data BidiClass_ = BidiClass deriving (Show, Typeable)
instance Property BidiClass_ Direction where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4096)
data Block_ = Block
instance Property Block_ BlockCode where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4097)
data CanonicalCombiningClass_ = CanonicalCombiningClass deriving (Show,Typeable)
instance Property CanonicalCombiningClass_ Int where
fromNative _ = fromIntegral
toUProperty _ = (4098)
data Decomposition_ = Decomposition deriving (Show, Typeable)
data Decomposition =
Canonical
| Compat
| Circle
| Final
| Font
| Fraction
| Initial
| Isolated
| Medial
| Narrow
| NoBreak
| Small
| Square
| Sub
| Super
| Vertical
| Wide
| Count
deriving (Eq, Enum, Show, Typeable)
instance Property Decomposition_ (Maybe Decomposition) where
fromNative _ = maybeEnum
toUProperty _ = (4099)
data EastAsianWidth_ = EastAsianWidth deriving (Show, Typeable)
data EastAsianWidth = EANeutral
| EAAmbiguous
| EAHalf
| EAFull
| EANarrow
| EAWide
| EACount
deriving (Eq, Enum, Show, Typeable)
instance Property EastAsianWidth_ EastAsianWidth where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4100)
instance Property Bool_ Bool where
fromNative _ = (/=0)
toUProperty = fromIntegral . fromEnum
data GeneralCategory_ = GeneralCategory deriving (Show, Typeable)
data GeneralCategory =
GeneralOtherType
| UppercaseLetter
| LowercaseLetter
| TitlecaseLetter
| ModifierLetter
| OtherLetter
| NonSpacingMark
| EnclosingMark
| CombiningSpacingMark
| DecimalDigitNumber
| LetterNumber
| OtherNumber
| SpaceSeparator
| LineSeparator
| ParagraphSeparator
| ControlChar
| FormatChar
| PrivateUseChar
| Surrogate
| DashPunctuation
| StartPunctuation
| EndPunctuation
| ConnectorPunctuation
| OtherPunctuation
| MathSymbol
| CurrencySymbol
| ModifierSymbol
| OtherSymbol
| InitialPunctuation
| FinalPunctuation
deriving (Eq, Enum, Show, Typeable)
instance Property GeneralCategory_ GeneralCategory where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4101)
data JoiningGroup_ = JoiningGroup deriving (Show, Typeable)
maybeEnum :: Enum a => Int32 -> Maybe a
maybeEnum 0 = Nothing
maybeEnum n = Just $! toEnum (fromIntegral n1)
data JoiningGroup =
Ain
| Alaph
| Alef
| Beh
| Beth
| Dal
| DalathRish
| E
| Feh
| FinalSemkath
| Gaf
| Gamal
| Hah
| HamzaOnHehGoal
| He
| Heh
| HehGoal
| Heth
| Kaf
| Kaph
| KnottedHeh
| Lam
| Lamadh
| Meem
| Mim
| Noon
| Nun
| Pe
| Qaf
| Qaph
| Reh
| ReversedPe
| Sad
| Sadhe
| Seen
| Semkath
| Shin
| SwashKaf
| SyriacWaw
| Tah
| Taw
| TehMarbuta
| Teth
| Waw
| Yeh
| YehBarree
| YehWithTail
| Yudh
| YudhHe
| Zain
| Fe
| Khaph
| Zhain
| BurushaskiYehBarree
deriving (Eq, Enum, Show, Typeable)
instance Property JoiningGroup_ (Maybe JoiningGroup) where
fromNative _ = maybeEnum
toUProperty _ = (4102)
data JoiningType_ = JoiningType deriving (Show, Typeable)
data JoiningType =
JoinCausing
| DualJoining
| LeftJoining
| RightJoining
| Transparent
deriving (Eq, Enum, Show, Typeable)
instance Property JoiningType_ (Maybe JoiningType) where
fromNative _ = maybeEnum
toUProperty _ = (4103)
data LineBreak_ = LineBreak deriving (Show, Typeable)
data LineBreak =
Ambiguous
| LBAlphabetic
| BreakBoth
| BreakAfter
| BreakBefore
| MandatoryBreak
| ContingentBreak
| ClosePunctuation
| CombiningMark
| CarriageReturn
| Exclamation
| Glue
| LBHyphen
| LBIdeographic
| Inseparable
| InfixNumeric
| LineFeed
| Nonstarter
| Numeric
| OpenPunctuation
| PostfixNumeric
| PrefixNumeric
| Quotation
| ComplexContext
| LBSurrogate
| Space
| BreakSymbols
| Zwspace
| NextLine
| WordJoiner
| H2
| H3
| JL
| JT
| JV
deriving (Eq, Enum, Show, Typeable)
instance Property LineBreak_ (Maybe LineBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4104)
data NumericType_ = NumericType deriving (Show, Typeable)
data NumericType = NTDecimal | NTDigit | NTNumeric
deriving (Eq, Enum, Show, Typeable)
instance Property NumericType_ (Maybe NumericType) where
fromNative _ = maybeEnum
toUProperty _ = (4105)
data HangulSyllableType_ = HangulSyllableType deriving (Show, Typeable)
data HangulSyllableType =
LeadingJamo
| VowelJamo
| TrailingJamo
| LVSyllable
| LVTSyllable
deriving (Eq, Enum, Show, Typeable)
instance Property HangulSyllableType_ (Maybe HangulSyllableType) where
fromNative _ = maybeEnum
toUProperty _ = (4107)
data NFCQuickCheck_ = NFCQuickCheck deriving (Show, Typeable)
data NFDQuickCheck_ = NFDQuickCheck deriving (Show, Typeable)
data NFKCQuickCheck_ = NFKCQuickCheck deriving (Show, Typeable)
data NFKDQuickCheck_ = NFKDQuickCheck deriving (Show, Typeable)
instance Property NFCQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4110)
instance Property NFDQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4108)
instance Property NFKCQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4111)
instance Property NFKDQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4109)
data LeadCanonicalCombiningClass_ = LeadCanonicalCombiningClass
deriving (Show, Typeable)
instance Property LeadCanonicalCombiningClass_ Int where
fromNative _ = fromIntegral
toUProperty _ = (4112)
data TrailingCanonicalCombiningClass_ = TrailingCanonicalCombiningClass
deriving (Show, Typeable)
instance Property TrailingCanonicalCombiningClass_ Int where
fromNative _ = fromIntegral
toUProperty _ = (4113)
data GraphemeClusterBreak_ = GraphemeClusterBreak deriving (Show, Typeable)
data GraphemeClusterBreak =
Control
| CR
| Extend
| L
| LF
| LV
| LVT
| T
| V
| SpacingMark
| Prepend
deriving (Eq, Enum, Show, Typeable)
instance Property GraphemeClusterBreak_ (Maybe GraphemeClusterBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4114)
data SentenceBreak_ = SentenceBreak deriving (Show, Typeable)
data SentenceBreak =
SBATerm
| SBClose
| SBFormat
| SBLower
| SBNumeric
| SBOLetter
| SBSep
| SBSP
| SBSTerm
| SBUpper
| SBCR
| SBExtend
| SBLF
| SBSContinue
deriving (Eq, Enum, Show, Typeable)
instance Property SentenceBreak_ (Maybe SentenceBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4115)
data WordBreak_ = WordBreak deriving (Show, Typeable)
data WordBreak =
WBALetter
| WBFormat
| WBKatakana
| WBMidLetter
| WBMidNum
| WBNumeric
| WBExtendNumLet
| WBCR
| WBExtend
| WBLF
| WBMidNumLet
| WBNewline
deriving (Eq, Enum, Show, Typeable)
instance Property WordBreak_ (Maybe WordBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4116)
property :: Property p v => p -> Char -> v
property p c = fromNative p . u_getIntPropertyValue (fromIntegral (ord c)) .
toUProperty $ p
blockCode :: Char -> BlockCode
blockCode = toEnum . fromIntegral . ublock_getCode . fromIntegral . ord
direction :: Char -> Direction
direction = toEnum . fromIntegral . u_charDirection . fromIntegral . ord
isMirrored :: Char -> Bool
isMirrored = asBool . u_isMirrored . fromIntegral . ord
mirror :: Char -> Char
mirror = chr . fromIntegral . u_charMirror . fromIntegral . ord
combiningClass :: Char -> Int
combiningClass = fromIntegral . u_getCombiningClass . fromIntegral . ord
digitToInt :: Char -> Maybe Int
digitToInt c
| i == 1 = Nothing
| otherwise = Just $! fromIntegral i
where i = u_charDigitValue . fromIntegral . ord $ c
numericValue :: Char -> Maybe Double
numericValue c
| v == (123456789) = Nothing
| otherwise = Just v
where v = u_getNumericValue . fromIntegral . ord $ c
charName :: Char -> String
charName = charName' (0)
charFullName :: Char -> String
charFullName = charName' (2)
charFromName :: String -> Maybe Char
charFromName = charFromName' (0)
charFromFullName :: String -> Maybe Char
charFromFullName = charFromName' (2)
charFromName' :: UCharNameChoice -> String -> Maybe Char
charFromName' choice name = unsafePerformIO . withCString name $ \ptr -> do
(err,r) <- withError $ u_charFromName choice ptr
return $! if err == u_INVALID_CHAR_FOUND || r == 0xffff
then Nothing
else Just $! chr (fromIntegral r)
isoComment :: Char -> String
isoComment c = fillString $ u_getISOComment (fromIntegral (ord c))
charName' :: UCharNameChoice -> Char -> String
charName' choice c = fillString $ u_charName (fromIntegral (ord c)) choice
fillString :: (CString -> Int32 -> Ptr UErrorCode -> IO Int32) -> String
fillString act = unsafePerformIO $
handleOverflowError 83 act (curry peekCStringLen)
type UBlockCode = CInt
type UCharDirection = CInt
type UCharNameChoice = CInt
type UProperty = CInt
foreign import ccall unsafe "hs_text_icu.h __hs_ublock_getCode" ublock_getCode
:: UChar32 -> UBlockCode
foreign import ccall unsafe "hs_text_icu.h __hs_u_charDirection" u_charDirection
:: UChar32 -> UCharDirection
foreign import ccall unsafe "hs_text_icu.h __hs_u_isMirrored" u_isMirrored
:: UChar32 -> UBool
foreign import ccall unsafe "hs_text_icu.h __hs_u_charMirror" u_charMirror
:: UChar32 -> UChar32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getCombiningClass" u_getCombiningClass
:: UChar32 -> Word8
foreign import ccall unsafe "hs_text_icu.h __hs_u_charDigitValue" u_charDigitValue
:: UChar32 -> Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_charName" u_charName
:: UChar32 -> UCharNameChoice -> CString -> Int32 -> Ptr UErrorCode
-> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_charFromName" u_charFromName
:: UCharNameChoice -> CString -> Ptr UErrorCode
-> IO UChar32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getISOComment" u_getISOComment
:: UChar32 -> CString -> Int32 -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getIntPropertyValue" u_getIntPropertyValue
:: UChar32 -> UProperty -> Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getNumericValue" u_getNumericValue
:: UChar32 -> Double