module Data.Text.ICU.Char
(
Property
, BidiClass_(..)
, Block_(..)
, Bool_(..)
, Decomposition_(..)
, EastAsianWidth_(..)
, GeneralCategory_(..)
, HangulSyllableType_(..)
, JoiningGroup_(..)
, JoiningType_(..)
, NumericType_(..)
, CanonicalCombiningClass_(..)
, LeadCanonicalCombiningClass_(..)
, TrailingCanonicalCombiningClass_(..)
, NFCQuickCheck_(..)
, NFDQuickCheck_(..)
, NFKCQuickCheck_(..)
, NFKDQuickCheck_(..)
, GraphemeClusterBreak_(..)
, LineBreak_(..)
, SentenceBreak_(..)
, WordBreak_(..)
, BlockCode(..)
, Direction(..)
, Decomposition(..)
, EastAsianWidth(..)
, GeneralCategory(..)
, HangulSyllableType(..)
, JoiningGroup(..)
, JoiningType(..)
, NumericType(..)
, GraphemeClusterBreak(..)
, LineBreak(..)
, SentenceBreak(..)
, WordBreak(..)
, blockCode
, charFullName
, charName
, charFromFullName
, charFromName
, combiningClass
, direction
, property
, isMirrored
, mirror
, digitToInt
, numericValue
) where
import Control.DeepSeq (NFData(..))
import Data.Char (chr, ord)
import Data.Int (Int32)
import Data.Text.ICU.Error (u_INVALID_CHAR_FOUND)
import Data.Text.ICU.Error.Internal (UErrorCode, handleOverflowError, withError)
import Data.Text.ICU.Internal (UBool, UChar32, asBool)
import Data.Text.ICU.Normalize.Internal (toNCR)
import Data.Typeable (Typeable)
import Data.Word (Word8)
import Foreign.C.String (CString, peekCStringLen, withCString)
import Foreign.C.Types (CInt(..))
import Foreign.Ptr (Ptr)
import System.IO.Unsafe (unsafePerformIO)
data Direction =
LeftToRight
| RightToLeft
| EuropeanNumber
| EuropeanNumberSeparator
| EuropeanNumberTerminator
| ArabicNumber
| CommonNumberSeparator
| BlockSeparator
| SegmentSeparator
| WhiteSpaceNeutral
| OtherNeutral
| LeftToRightEmbedding
| LeftToRightOverride
| RightToLeftArabic
| RightToLeftEmbedding
| RightToLeftOverride
| PopDirectionalFormat
| DirNonSpacingMark
| BoundaryNeutral
deriving (Eq, Enum, Show, Typeable)
instance NFData Direction where
rnf !_ = ()
data BlockCode =
NoBlock
| BasicLatin
| Latin1Supplement
| LatinExtendedA
| LatinExtendedB
| IPAExtensions
| SpacingModifierLetters
| CombiningDiacriticalMarks
| GreekAndCoptic
| Cyrillic
| Armenian
| Hebrew
| Arabic
| Syriac
| Thaana
| Devanagari
| Bengali
| Gurmukhi
| Gujarati
| Oriya
| Tamil
| Telugu
| Kannada
| Malayalam
| Sinhala
| Thai
| Lao
| Tibetan
| Myanmar
| Georgian
| HangulJamo
| Ethiopic
| Cherokee
| UnifiedCanadianAboriginalSyllabics
| Ogham
| Runic
| Khmer
| Mongolian
| LatinExtendedAdditional
| GreekExtended
| GeneralPunctuation
| SuperscriptsAndSubscripts
| CurrencySymbols
| CombiningDiacriticalMarksForSymbols
| LetterlikeSymbols
| NumberForms
| Arrows
| MathematicalOperators
| MiscellaneousTechnical
| ControlPictures
| OpticalCharacterRecognition
| EnclosedAlphanumerics
| BoxDrawing
| BlockElements
| GeometricShapes
| MiscellaneousSymbols
| Dingbats
| BraillePatterns
| CJKRadicalsSupplement
| KangxiRadicals
| IdeographicDescriptionCharacters
| CJKSymbolsAndPunctuation
| Hiragana
| Katakana
| Bopomofo
| HangulCompatibilityJamo
| Kanbun
| BopomofoExtended
| EnclosedCJKLettersAndMonths
| CJKCompatibility
| CJKUnifiedIdeographsExtensionA
| CJKUnifiedIdeographs
| YiSyllables
| YiRadicals
| HangulSyllables
| HighSurrogates
| HighPrivateUseSurrogates
| LowSurrogates
| PrivateUseArea
| CJKCompatibilityIdeographs
| AlphabeticPresentationForms
| ArabicPresentationFormsA
| CombiningHalfMarks
| CJKCompatibilityForms
| SmallFormVariants
| ArabicPresentationFormsB
| Specials
| HalfwidthAndFullwidthForms
| OldItalic
| Gothic
| Deseret
| ByzantineMusicalSymbols
| MusicalSymbols
| MathematicalAlphanumericSymbols
| CJKUnifiedIdeographsExtensionB
| CJKCompatibilityIdeographsSupplement
| Tags
| CyrillicSupplement
| Tagalog
| Hanunoo
| Buhid
| Tagbanwa
| MiscellaneousMathematicalSymbolsA
| SupplementalArrowsA
| SupplementalArrowsB
| MiscellaneousMathematicalSymbolsB
| SupplementalMathematicalOperators
| KatakanaPhoneticExtensions
| VariationSelectors
| SupplementaryPrivateUseAreaA
| SupplementaryPrivateUseAreaB
| Limbu
| TaiLe
| KhmerSymbols
| PhoneticExtensions
| MiscellaneousSymbolsAndArrows
| YijingHexagramSymbols
| LinearBSyllabary
| LinearBIdeograms
| AegeanNumbers
| Ugaritic
| Shavian
| Osmanya
| CypriotSyllabary
| TaiXuanJingSymbols
| VariationSelectorsSupplement
| AncientGreekMusicalNotation
| AncientGreekNumbers
| ArabicSupplement
| Buginese
| CJKStrokes
| CombiningDiacriticalMarksSupplement
| Coptic
| EthiopicExtended
| EthiopicSupplement
| GeorgianSupplement
| Glagolitic
| Kharoshthi
| ModifierToneLetters
| NewTaiLue
| OldPersian
| PhoneticExtensionsSupplement
| SupplementalPunctuation
| SylotiNagri
| Tifinagh
| VerticalForms
| N'Ko
| Balinese
| LatinExtendedC
| LatinExtendedD
| PhagsPa
| Phoenician
| Cuneiform
| CuneiformNumbersAndPunctuation
| CountingRodNumerals
| Sundanese
| Lepcha
| OlChiki
| CyrillicExtendedA
| Vai
| CyrillicExtendedB
| Saurashtra
| KayahLi
| Rejang
| Cham
| AncientSymbols
| PhaistosDisc
| Lycian
| Carian
| Lydian
| MahjongTiles
| DominoTiles
| Samaritan
| UnifiedCanadianAboriginalSyllabicsExtended
| TaiTham
| VedicExtensions
| Lisu
| Bamum
| CommonIndicNumberForms
| DevanagariExtended
| HangulJamoExtendedA
| Javanese
| MyanmarExtendedA
| TaiViet
| MeeteiMayek
| HangulJamoExtendedB
| ImperialAramaic
| OldSouthArabian
| Avestan
| InscriptionalParthian
| InscriptionalPahlavi
| OldTurkic
| RumiNumeralSymbols
| Kaithi
| EgyptianHieroglyphs
| EnclosedAlphanumericSupplement
| EnclosedIdeographicSupplement
| CJKUnifiedIdeographsExtensionC
| Mandaic
| Batak
| EthiopicExtendedA
| Brahmi
| BamumSupplement
| KanaSupplement
| PlayingCards
| MiscellaneousSymbolsAndPictographs
| Emoticons
| TransportAndMapSymbols
| AlchemicalSymbols
| CJKUnifiedIdeographsExtensionD
| ArabicExtendedA
| ArabicMathematicalAlphabeticSymbols
| Chakma
| MeeteiMayekExtensions
| MeroiticCursive
| MeroiticHieroglyphs
| Miao
| Sharada
| SoraSompeng
| SundaneseSupplement
| Takri
deriving (Eq, Enum, Bounded, Show, Typeable)
instance NFData BlockCode where
rnf !_ = ()
data Bool_ =
Alphabetic
| ASCIIHexDigit
| BidiControl
| BidiMirrored
| Dash
| DefaultIgnorable
| Deprecated
| Diacritic
| Extender
| FullCompositionExclusion
| GraphemeBase
| GraphemeExtend
| GraphemeLink
| HexDigit
| Hyphen
| IDContinue
| IDStart
| Ideographic
| IDSBinaryOperator
| IDSTrinaryOperator
| JoinControl
| LogicalOrderException
| Lowercase
| Math
| NonCharacter
| QuotationMark
| Radical
| SoftDotted
| TerminalPunctuation
| UnifiedIdeograph
| Uppercase
| WhiteSpace
| XidContinue
| XidStart
| CaseSensitive
| STerm
| VariationSelector
| NFDInert
| NFKDInert
| NFCInert
| NFKCInert
| SegmentStarter
| PatternSyntax
| PatternWhiteSpace
| POSIXAlNum
| POSIXBlank
| POSIXGraph
| POSIXPrint
| POSIXXDigit
deriving (Eq, Enum, Show, Typeable)
instance NFData Bool_ where
rnf !_ = ()
class Property p v | p -> v where
fromNative :: p -> Int32 -> v
toUProperty :: p -> UProperty
data BidiClass_ = BidiClass deriving (Show, Typeable)
instance NFData BidiClass_ where
rnf !_ = ()
instance Property BidiClass_ Direction where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4096)
data Block_ = Block
instance NFData Block_ where
rnf !_ = ()
instance Property Block_ BlockCode where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4097)
data CanonicalCombiningClass_ = CanonicalCombiningClass deriving (Show,Typeable)
instance NFData CanonicalCombiningClass_ where
rnf !_ = ()
instance Property CanonicalCombiningClass_ Int where
fromNative _ = fromIntegral
toUProperty _ = (4098)
data Decomposition_ = Decomposition deriving (Show, Typeable)
instance NFData Decomposition_ where
rnf !_ = ()
data Decomposition =
Canonical
| Compat
| Circle
| Final
| Font
| Fraction
| Initial
| Isolated
| Medial
| Narrow
| NoBreak
| Small
| Square
| Sub
| Super
| Vertical
| Wide
| Count
deriving (Eq, Enum, Show, Typeable)
instance NFData Decomposition where
rnf !_ = ()
instance Property Decomposition_ (Maybe Decomposition) where
fromNative _ = maybeEnum
toUProperty _ = (4099)
data EastAsianWidth_ = EastAsianWidth deriving (Show, Typeable)
instance NFData EastAsianWidth_ where
rnf !_ = ()
data EastAsianWidth = EANeutral
| EAAmbiguous
| EAHalf
| EAFull
| EANarrow
| EAWide
| EACount
deriving (Eq, Enum, Show, Typeable)
instance NFData EastAsianWidth where
rnf !_ = ()
instance Property EastAsianWidth_ EastAsianWidth where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4100)
instance Property Bool_ Bool where
fromNative _ = (/=0)
toUProperty = fromIntegral . fromEnum
data GeneralCategory_ = GeneralCategory deriving (Show, Typeable)
instance NFData GeneralCategory_ where
rnf !_ = ()
data GeneralCategory =
GeneralOtherType
| UppercaseLetter
| LowercaseLetter
| TitlecaseLetter
| ModifierLetter
| OtherLetter
| NonSpacingMark
| EnclosingMark
| CombiningSpacingMark
| DecimalDigitNumber
| LetterNumber
| OtherNumber
| SpaceSeparator
| LineSeparator
| ParagraphSeparator
| ControlChar
| FormatChar
| PrivateUseChar
| Surrogate
| DashPunctuation
| StartPunctuation
| EndPunctuation
| ConnectorPunctuation
| OtherPunctuation
| MathSymbol
| CurrencySymbol
| ModifierSymbol
| OtherSymbol
| InitialPunctuation
| FinalPunctuation
deriving (Eq, Enum, Show, Typeable)
instance NFData GeneralCategory where
rnf !_ = ()
instance Property GeneralCategory_ GeneralCategory where
fromNative _ = toEnum . fromIntegral
toUProperty _ = (4101)
data JoiningGroup_ = JoiningGroup deriving (Show, Typeable)
instance NFData JoiningGroup_ where
rnf !_ = ()
maybeEnum :: Enum a => Int32 -> Maybe a
maybeEnum 0 = Nothing
maybeEnum n = Just $! toEnum (fromIntegral n1)
data JoiningGroup =
Ain
| Alaph
| Alef
| Beh
| Beth
| Dal
| DalathRish
| E
| Feh
| FinalSemkath
| Gaf
| Gamal
| Hah
| HamzaOnHehGoal
| He
| Heh
| HehGoal
| Heth
| Kaf
| Kaph
| KnottedHeh
| Lam
| Lamadh
| Meem
| Mim
| Noon
| Nun
| Pe
| Qaf
| Qaph
| Reh
| ReversedPe
| Sad
| Sadhe
| Seen
| Semkath
| Shin
| SwashKaf
| SyriacWaw
| Tah
| Taw
| TehMarbuta
| Teth
| Waw
| Yeh
| YehBarree
| YehWithTail
| Yudh
| YudhHe
| Zain
| Fe
| Khaph
| Zhain
| BurushaskiYehBarree
deriving (Eq, Enum, Show, Typeable)
instance NFData JoiningGroup where
rnf !_ = ()
instance Property JoiningGroup_ (Maybe JoiningGroup) where
fromNative _ = maybeEnum
toUProperty _ = (4102)
data JoiningType_ = JoiningType deriving (Show, Typeable)
instance NFData JoiningType_ where
rnf !_ = ()
data JoiningType =
JoinCausing
| DualJoining
| LeftJoining
| RightJoining
| Transparent
deriving (Eq, Enum, Show, Typeable)
instance NFData JoiningType where
rnf !_ = ()
instance Property JoiningType_ (Maybe JoiningType) where
fromNative _ = maybeEnum
toUProperty _ = (4103)
data LineBreak_ = LineBreak deriving (Show, Typeable)
instance NFData LineBreak_ where
rnf !_ = ()
data LineBreak =
Ambiguous
| LBAlphabetic
| BreakBoth
| BreakAfter
| BreakBefore
| MandatoryBreak
| ContingentBreak
| ClosePunctuation
| CombiningMark
| CarriageReturn
| Exclamation
| Glue
| LBHyphen
| LBIdeographic
| Inseparable
| InfixNumeric
| LineFeed
| Nonstarter
| Numeric
| OpenPunctuation
| PostfixNumeric
| PrefixNumeric
| Quotation
| ComplexContext
| LBSurrogate
| Space
| BreakSymbols
| Zwspace
| NextLine
| WordJoiner
| H2
| H3
| JL
| JT
| JV
deriving (Eq, Enum, Show, Typeable)
instance NFData LineBreak where
rnf !_ = ()
instance Property LineBreak_ (Maybe LineBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4104)
data NumericType_ = NumericType deriving (Show, Typeable)
instance NFData NumericType_ where
rnf !_ = ()
data NumericType = NTDecimal | NTDigit | NTNumeric
deriving (Eq, Enum, Show, Typeable)
instance NFData NumericType where
rnf !_ = ()
instance Property NumericType_ (Maybe NumericType) where
fromNative _ = maybeEnum
toUProperty _ = (4105)
data HangulSyllableType_ = HangulSyllableType deriving (Show, Typeable)
instance NFData HangulSyllableType_ where
rnf !_ = ()
data HangulSyllableType =
LeadingJamo
| VowelJamo
| TrailingJamo
| LVSyllable
| LVTSyllable
deriving (Eq, Enum, Show, Typeable)
instance NFData HangulSyllableType where
rnf !_ = ()
instance Property HangulSyllableType_ (Maybe HangulSyllableType) where
fromNative _ = maybeEnum
toUProperty _ = (4107)
data NFCQuickCheck_ = NFCQuickCheck deriving (Show, Typeable)
data NFDQuickCheck_ = NFDQuickCheck deriving (Show, Typeable)
data NFKCQuickCheck_ = NFKCQuickCheck deriving (Show, Typeable)
data NFKDQuickCheck_ = NFKDQuickCheck deriving (Show, Typeable)
instance NFData NFCQuickCheck_ where
rnf !_ = ()
instance NFData NFDQuickCheck_ where
rnf !_ = ()
instance NFData NFKCQuickCheck_ where
rnf !_ = ()
instance NFData NFKDQuickCheck_ where
rnf !_ = ()
instance Property NFCQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4110)
instance Property NFDQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4108)
instance Property NFKCQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4111)
instance Property NFKDQuickCheck_ (Maybe Bool) where
fromNative _ = toNCR . fromIntegral
toUProperty _ = (4109)
data LeadCanonicalCombiningClass_ = LeadCanonicalCombiningClass
deriving (Show, Typeable)
instance NFData LeadCanonicalCombiningClass_ where
rnf !_ = ()
instance Property LeadCanonicalCombiningClass_ Int where
fromNative _ = fromIntegral
toUProperty _ = (4112)
data TrailingCanonicalCombiningClass_ = TrailingCanonicalCombiningClass
deriving (Show, Typeable)
instance NFData TrailingCanonicalCombiningClass_ where
rnf !_ = ()
instance Property TrailingCanonicalCombiningClass_ Int where
fromNative _ = fromIntegral
toUProperty _ = (4113)
data GraphemeClusterBreak_ = GraphemeClusterBreak deriving (Show, Typeable)
instance NFData GraphemeClusterBreak_ where
rnf !_ = ()
data GraphemeClusterBreak =
Control
| CR
| Extend
| L
| LF
| LV
| LVT
| T
| V
| SpacingMark
| Prepend
deriving (Eq, Enum, Show, Typeable)
instance NFData GraphemeClusterBreak where
rnf !_ = ()
instance Property GraphemeClusterBreak_ (Maybe GraphemeClusterBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4114)
data SentenceBreak_ = SentenceBreak deriving (Show, Typeable)
instance NFData SentenceBreak_ where
rnf !_ = ()
data SentenceBreak =
SBATerm
| SBClose
| SBFormat
| SBLower
| SBNumeric
| SBOLetter
| SBSep
| SBSP
| SBSTerm
| SBUpper
| SBCR
| SBExtend
| SBLF
| SBSContinue
deriving (Eq, Enum, Show, Typeable)
instance NFData SentenceBreak where
rnf !_ = ()
instance Property SentenceBreak_ (Maybe SentenceBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4115)
data WordBreak_ = WordBreak deriving (Show, Typeable)
instance NFData WordBreak_ where
rnf !_ = ()
data WordBreak =
WBALetter
| WBFormat
| WBKatakana
| WBMidLetter
| WBMidNum
| WBNumeric
| WBExtendNumLet
| WBCR
| WBExtend
| WBLF
| WBMidNumLet
| WBNewline
deriving (Eq, Enum, Show, Typeable)
instance NFData WordBreak where
rnf !_ = ()
instance Property WordBreak_ (Maybe WordBreak) where
fromNative _ = maybeEnum
toUProperty _ = (4116)
property :: Property p v => p -> Char -> v
property p c = fromNative p . u_getIntPropertyValue (fromIntegral (ord c)) .
toUProperty $ p
blockCode :: Char -> BlockCode
blockCode = toEnum . fromIntegral . ublock_getCode . fromIntegral . ord
direction :: Char -> Direction
direction = toEnum . fromIntegral . u_charDirection . fromIntegral . ord
isMirrored :: Char -> Bool
isMirrored = asBool . u_isMirrored . fromIntegral . ord
mirror :: Char -> Char
mirror = chr . fromIntegral . u_charMirror . fromIntegral . ord
combiningClass :: Char -> Int
combiningClass = fromIntegral . u_getCombiningClass . fromIntegral . ord
digitToInt :: Char -> Maybe Int
digitToInt c
| i == 1 = Nothing
| otherwise = Just $! fromIntegral i
where i = u_charDigitValue . fromIntegral . ord $ c
numericValue :: Char -> Maybe Double
numericValue c
| v == (123456789) = Nothing
| otherwise = Just v
where v = u_getNumericValue . fromIntegral . ord $ c
charName :: Char -> String
charName = charName' (0)
charFullName :: Char -> String
charFullName = charName' (2)
charFromName :: String -> Maybe Char
charFromName = charFromName' (0)
charFromFullName :: String -> Maybe Char
charFromFullName = charFromName' (2)
charFromName' :: UCharNameChoice -> String -> Maybe Char
charFromName' choice name = unsafePerformIO . withCString name $ \ptr -> do
(err,r) <- withError $ u_charFromName choice ptr
return $! if err == u_INVALID_CHAR_FOUND || r == 0xffff
then Nothing
else Just $! chr (fromIntegral r)
charName' :: UCharNameChoice -> Char -> String
charName' choice c = fillString $ u_charName (fromIntegral (ord c)) choice
fillString :: (CString -> Int32 -> Ptr UErrorCode -> IO Int32) -> String
fillString act = unsafePerformIO $
handleOverflowError 83 act (curry peekCStringLen)
type UBlockCode = CInt
type UCharDirection = CInt
type UCharNameChoice = CInt
type UProperty = CInt
foreign import ccall unsafe "hs_text_icu.h __hs_ublock_getCode" ublock_getCode
:: UChar32 -> UBlockCode
foreign import ccall unsafe "hs_text_icu.h __hs_u_charDirection" u_charDirection
:: UChar32 -> UCharDirection
foreign import ccall unsafe "hs_text_icu.h __hs_u_isMirrored" u_isMirrored
:: UChar32 -> UBool
foreign import ccall unsafe "hs_text_icu.h __hs_u_charMirror" u_charMirror
:: UChar32 -> UChar32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getCombiningClass" u_getCombiningClass
:: UChar32 -> Word8
foreign import ccall unsafe "hs_text_icu.h __hs_u_charDigitValue" u_charDigitValue
:: UChar32 -> Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_charName" u_charName
:: UChar32 -> UCharNameChoice -> CString -> Int32 -> Ptr UErrorCode
-> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_charFromName" u_charFromName
:: UCharNameChoice -> CString -> Ptr UErrorCode
-> IO UChar32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getIntPropertyValue" u_getIntPropertyValue
:: UChar32 -> UProperty -> Int32
foreign import ccall unsafe "hs_text_icu.h __hs_u_getNumericValue" u_getNumericValue
:: UChar32 -> Double