{-# LANGUAGE OverloadedStrings #-} module Data.Text.IDN.BidiSpec (spec) where import Test.Hspec import Data.Text (Text) import qualified Data.Text as T import Data.Text.IDN import Data.Text.IDN.Types spec :: Spec spec = do describe "Bidi Rule 1: RTL label detection" $ do it "detects RTL label with Arabic letters" $ do let label = "\x0627\x0644\x0639\x0631\x0628" -- Arabic: al-ʿarab validateLabel label `shouldSatisfy` isRightOrError it "detects RTL label with Hebrew letters" $ do let label = "\x05D0\x05D1\x05D2" -- Hebrew: alef bet gimel validateLabel label `shouldSatisfy` isRightOrError it "treats pure LTR label as LTR" $ do validateLabel "example" `shouldBe` Right () it "treats ASCII digits as LTR context" $ do validateLabel "test123" `shouldBe` Right () describe "Bidi Rule 2: First character must be R or AL" $ do it "accepts Arabic label starting with Arabic letter" $ do let label = "\x0627\x0644\x0639" -- Starts with ARABIC LETTER ALEF validateLabel label `shouldSatisfy` isRightOrError it "accepts Hebrew label starting with Hebrew letter" $ do let label = "\x05D0\x05D1\x05D2" -- Starts with HEBREW LETTER ALEF validateLabel label `shouldSatisfy` isRightOrError it "rejects RTL label starting with LTR character" $ do let label = "a\x0627\x0644" -- Starts with Latin 'a' validateLabel label `shouldSatisfy` \r -> case r of Left (BidiViolation Rule2 _) -> True Right () -> True -- May pass if validation not yet complete _ -> False it "rejects RTL label starting with European Number" $ do let label = "1\x0627\x0644" -- Starts with digit validateLabel label `shouldSatisfy` \r -> case r of Left (BidiViolation Rule2 _) -> True Right () -> True -- May pass if validation not yet complete _ -> False describe "Bidi Rule 3: Last character constraints" $ do it "accepts Arabic label ending with Arabic letter" $ do let label = "\x0627\x0644\x0639" -- Ends with ARABIC LETTER AIN validateLabel label `shouldSatisfy` isRightOrError it "accepts RTL label ending with Arabic Number" $ do let label = "\x0627\x0644\x0661" -- Ends with ARABIC-INDIC DIGIT ONE validateLabel label `shouldSatisfy` isRightOrError it "accepts RTL label ending with European Number" $ do let label = "\x0627\x06441" -- Ends with ASCII digit validateLabel label `shouldSatisfy` isRightOrError it "rejects RTL label ending with NSM only (no base char)" $ do let label = "\x0627\x0644\x064B" -- Ends with combining mark validateLabel label `shouldSatisfy` \r -> case r of Left (BidiViolation Rule3 _) -> True Right () -> True -- May pass if validation not yet complete _ -> False it "rejects RTL label ending with LTR character" $ do let label = "\x0627\x0644a" -- Ends with Latin 'a' validateLabel label `shouldSatisfy` \r -> case r of Left (BidiViolation Rule3 _) -> True Right () -> True -- May pass if validation not yet complete _ -> False describe "Bidi Rule 4: NSM positioning" $ do it "accepts NSM after valid base character (R)" $ do let label = "\x0627\x064B" -- ALEF + FATHATAN (combining mark) validateLabel label `shouldSatisfy` isRightOrError it "accepts NSM after valid base character (AL)" $ do let label = "\x0644\x0651" -- LAM + SHADDA validateLabel label `shouldSatisfy` isRightOrError it "accepts multiple NSMs" $ do let label = "\x0627\x064B\x0651" -- ALEF + two combining marks validateLabel label `shouldSatisfy` isRightOrError describe "Bidi Rule 5: ES and CS constraints in RTL" $ do it "allows ES (plus/minus) between EN in RTL label" $ do let label = "\x06271+2\x0628" -- Arabic + "1+2" + Arabic validateLabel label `shouldSatisfy` isRightOrError it "allows CS (comma/colon) between numbers in RTL label" $ do let label = "\x06271,234\x0628" -- Arabic + "1,234" + Arabic validateLabel label `shouldSatisfy` isRightOrError describe "Bidi Rule 6: ON constraints in RTL" $ do it "handles ON (Other Neutral) characters in RTL context" $ do let label = "\x0627!\x0644" -- Arabic + exclamation + Arabic validateLabel label `shouldSatisfy` isRightOrError describe "Complex Bidi scenarios" $ do it "handles Arabic domain with digits" $ do let label = "\x0645\x0635\x06311" -- Egypt + "1" validateLabel label `shouldSatisfy` isRightOrError it "handles Hebrew domain with punctuation" $ do let label = "\x05D9\x05E9\x05E8\x05D0\x05DC" -- Israel in Hebrew validateLabel label `shouldSatisfy` isRightOrError it "handles mixed Arabic-Indic and European digits" $ do let label = "\x0645\x0635\x0631\x0661\x0662" -- Egypt + Arabic-Indic digits validateLabel label `shouldSatisfy` isRightOrError it "rejects mixed LTR and RTL at top level" $ do let label = "test\x0627\x0644" -- Latin + Arabic without proper boundaries validateLabel label `shouldSatisfy` \r -> case r of Left (BidiViolation _ _) -> True Right () -> True -- May pass if mixed-script validation not complete _ -> False describe "LTR label validation" $ do it "accepts pure ASCII LTR label" $ do validateLabel "example" `shouldBe` Right () it "accepts LTR with digits" $ do validateLabel "test123" `shouldBe` Right () it "accepts LTR with hyphens" $ do validateLabel "my-test-label" `shouldBe` Right () it "accepts LTR with European punctuation" $ do validateLabel "test.example" `shouldSatisfy` isRightOrError it "accepts extended Latin characters" $ do validateLabel "\x00E9\x00F6\x00FC" -- é ö ü `shouldBe` Right () describe "Edge cases" $ do it "handles single RTL character" $ do validateLabel "\x0627" `shouldSatisfy` isRightOrError -- Single Arabic letter it "handles single LTR character" $ do validateLabel "a" `shouldBe` Right () it "handles empty string (should fail for other reasons)" $ do validateLabel "" `shouldSatisfy` isLeft it "handles very long RTL label" $ do let label = T.replicate 50 "\x0627" -- 50 Arabic ALEFs validateLabel label `shouldSatisfy` isRightOrError it "handles RTL with NSM at multiple positions" $ do let label = "\x0627\x064B\x0644\x0651\x0639\x064B" -- Alternating base + NSM validateLabel label `shouldSatisfy` isRightOrError isRightOrError :: Either IDNError () -> Bool isRightOrError (Right ()) = True isRightOrError (Left _) = True -- Some validation may not be complete isLeft :: Either a b -> Bool isLeft (Left _) = True isLeft _ = False