{-# LANGUAGE OverloadedStrings #-} -- | This module implements the two algorithms from RFC 3490. () module Text.IDNA (acePrefix, toASCII, toUnicode) where import Text.StringPrep import Text.StringPrep.Profiles import qualified Data.Text as Text import Data.Text (Text) import qualified Data.Text.Punycode as Puny import Data.Text.Encoding as E -- | The ASCII Compatible Encoding prefix (currently \'@xn--@\'). acePrefix :: Text acePrefix = "xn--" -- | Implements the ToASCII algorithm. toASCII :: Bool -- ^ Whether to allow unassigned code points (in RFC: AllowUnassigned). -> Bool -- ^ Whether to disallow certain ASCII characters (in RFC: UseSTD3ASCIIRules). -> Text -- ^ The text to transform. -> Maybe Text toASCII allowUnassigned useSTD3ASCIIRules t = do step2 <- if Text.any (>'\x7f') t then runStringPrep (namePrepProfile allowUnassigned) t else return t step3 <- if (useSTD3ASCIIRules && (Text.any isLDHascii step2 || Text.head step2 == '-' || Text.last step2 == '-')) then Nothing else return step2 step7 <- if (Text.any (>'\x7f') step2) then if acePrefix `Text.isPrefixOf` step3 then Nothing else case return (Puny.encode step3) of -- TODO: this can fail? Left _ -> Nothing Right t' -> return $ acePrefix `Text.append` E.decodeUtf8 t' else return step3 if Text.length step7 <= 63 then return step7 else Nothing isLDHascii :: Char -> Bool isLDHascii c = '\x0' <= c && c <= '\x2c' || '\x2e' <= c && c <= '\x2f' || '\x3a' <= c && c <= '\x40' || '\x5b' <= c && c <= '\x60' || '\x7b' <= c && c <= '\x7f' toUnicode :: Bool -- ^ Whether to allow unassigned code points (in RFC: AllowUnassigned). -> Bool -- ^ Whether to disallow certain ASCII characters (in RFC: UseSTD3ASCIIRules). -> Text -- ^ The text to transform. -> Text toUnicode allowUnassigned useSTD3ASCIIRules t = mergeEither $ do step2 <- if Text.any (>'\x7f') t then case runStringPrep (namePrepProfile allowUnassigned) t of Nothing -> Left t Just t' -> return t' else return t step3 <- if not $ acePrefix `Text.isPrefixOf` step2 then Left step2 else return step2 let step4 = Text.drop (Text.length acePrefix) step3 step5 <- case Puny.decode $ E.encodeUtf8 step4 of Left _ -> Left step3 Right s -> return s case toASCII allowUnassigned useSTD3ASCIIRules step5 of Nothing -> return step3 Just t' -> if t' == step3 then return step5 else return step3 mergeEither :: Either a a -> a mergeEither (Left x) = x mergeEither (Right y) = y tests :: [Text] tests = ["Bücher","tūdaliņ"]