{-# LANGUAGE OverloadedStrings #-} module Data.Text.IDN.Internal.UnicodeSpec (spec) where import Test.Hspec import Data.Text.IDN.Internal.Unicode import Data.Text.IDN.Types spec :: Spec spec = do describe "codePointStatus" $ do it "correctly identifies ASCII letters as PVALID" $ do codePointStatus 'a' `shouldBe` PVALID codePointStatus 'z' `shouldBe` PVALID codePointStatus 'A' `shouldBe` PVALID codePointStatus 'Z' `shouldBe` PVALID it "correctly identifies ASCII digits as PVALID" $ do codePointStatus '0' `shouldBe` PVALID codePointStatus '9' `shouldBe` PVALID it "correctly identifies hyphen as PVALID" $ do codePointStatus '-' `shouldBe` PVALID it "correctly identifies ZERO WIDTH NON-JOINER as CONTEXTJ" $ do codePointStatus '\x200C' `shouldBe` CONTEXTJ it "correctly identifies ZERO WIDTH JOINER as CONTEXTJ" $ do codePointStatus '\x200D' `shouldBe` CONTEXTJ it "correctly identifies MIDDLE DOT as CONTEXTO" $ do codePointStatus '\xB7' `shouldBe` CONTEXTO it "correctly identifies spaces as DISALLOWED" $ do codePointStatus ' ' `shouldBe` DISALLOWED codePointStatus '\t' `shouldBe` DISALLOWED it "correctly identifies control characters as DISALLOWED" $ do codePointStatus '\x00' `shouldBe` DISALLOWED codePointStatus '\x1F' `shouldBe` DISALLOWED describe "bidiClass" $ do it "correctly identifies ASCII letters as L (Left-to-Right)" $ do bidiClass 'a' `shouldBe` L bidiClass 'Z' `shouldBe` L it "correctly identifies Arabic letters as AL (Arabic Letter)" $ do bidiClass '\x0627' `shouldBe` AL -- ARABIC LETTER ALEF bidiClass '\x0628' `shouldBe` AL -- ARABIC LETTER BEH it "correctly identifies Hebrew letters as R (Right-to-Left)" $ do bidiClass '\x05D0' `shouldBe` R -- HEBREW LETTER ALEF bidiClass '\x05D1' `shouldBe` R -- HEBREW LETTER BET it "correctly identifies ASCII digits as EN (European Number)" $ do bidiClass '0' `shouldBe` EN bidiClass '9' `shouldBe` EN it "correctly identifies Arabic-Indic digits as AN (Arabic Number)" $ do bidiClass '\x0660' `shouldBe` AN -- ARABIC-INDIC DIGIT ZERO bidiClass '\x0669' `shouldBe` AN -- ARABIC-INDIC DIGIT NINE it "correctly identifies whitespace as WS" $ do bidiClass ' ' `shouldBe` WS bidiClass '\t' `shouldBe` S it "correctly identifies paragraph separator as B" $ do bidiClass '\x000A' `shouldBe` B -- LINE FEED bidiClass '\x000D' `shouldBe` B -- CARRIAGE RETURN describe "isCombiningMark" $ do it "returns False for regular ASCII letters" $ do isCombiningMark 'a' `shouldBe` False isCombiningMark 'Z' `shouldBe` False it "returns True for combining diacritical marks" $ do isCombiningMark '\x0301' `shouldBe` True -- COMBINING ACUTE ACCENT isCombiningMark '\x0308' `shouldBe` True -- COMBINING DIAERESIS describe "isVirama" $ do it "returns False for regular characters" $ do isVirama 'a' `shouldBe` False isVirama '\x0627' `shouldBe` False it "returns True for virama characters" $ do isVirama '\x094D' `shouldBe` True -- DEVANAGARI SIGN VIRAMA describe "scriptOf" $ do it "identifies Latin script" $ do scriptOf 'a' `shouldBe` Just Latin scriptOf 'Z' `shouldBe` Just Latin it "identifies Greek script" $ do scriptOf '\x03B1' `shouldBe` Just Greek -- GREEK SMALL LETTER ALPHA scriptOf '\x03A9' `shouldBe` Just Greek -- GREEK CAPITAL LETTER OMEGA it "identifies Hebrew script" $ do scriptOf '\x05D0' `shouldBe` Just Hebrew -- HEBREW LETTER ALEF it "identifies Arabic script" $ do scriptOf '\x0627' `shouldBe` Just Arabic -- ARABIC LETTER ALEF it "identifies Hiragana script" $ do scriptOf '\x3042' `shouldSatisfy` \s -> s == Just Hiragana || s == Just OtherScript it "identifies Katakana script" $ do scriptOf '\x30A2' `shouldSatisfy` \s -> s == Just Katakana || s == Just OtherScript describe "contextRule" $ do it "identifies ZWNJ rule" $ do lookupContextRule '\x200C' `shouldBe` Just ZWNJRule it "identifies ZWJ rule" $ do lookupContextRule '\x200D' `shouldBe` Just ZWJRule it "identifies Middle Dot rule" $ do lookupContextRule '\xB7' `shouldBe` Just MiddleDotRule it "identifies Greek Keraia rule" $ do lookupContextRule '\x0375' `shouldBe` Just GreekKeraiaRule it "identifies Hebrew punctuation rules" $ do lookupContextRule '\x05F3' `shouldBe` Just HebrewGereshRule lookupContextRule '\x05F4' `shouldBe` Just HebrewGershayimRule it "identifies Katakana Middle Dot rule" $ do lookupContextRule '\x30FB' `shouldBe` Just KatakanaMiddleDotRule it "identifies Arabic-Indic Digits rule" $ do lookupContextRule '\x0660' `shouldBe` Just ArabicIndicDigitsRule lookupContextRule '\x0669' `shouldBe` Just ArabicIndicDigitsRule it "identifies Extended Arabic-Indic Digits rule" $ do lookupContextRule '\x06F0' `shouldBe` Just ExtendedArabicIndicDigitsRule lookupContextRule '\x06F9' `shouldBe` Just ExtendedArabicIndicDigitsRule it "returns Nothing for regular characters" $ do lookupContextRule 'a' `shouldBe` Nothing lookupContextRule '0' `shouldBe` Nothing