module Text.HTML.WraXML.String where

import qualified Text.XML.WraXML.String as XmlString
import qualified Text.HTML.Basic.Character as HtmlChar
import qualified Data.Char as Char
import qualified Text.XML.HXT.DOM.Unicode as Unicode

import qualified Control.Monad.Exception.Synchronous as Exc

import Text.XML.WraXML.Utility (compose, )


type T = [Atom]

type Atom = HtmlChar.T


{- |
Literal translation from pure strings.
This can only work, if the string does not contain special characters.
-}
fromString :: String -> T
fromString = map HtmlChar.fromUnicode

{- |
default routine
-}
fromUnicodeString :: String -> T
fromUnicodeString = map HtmlChar.asciiFromUnicode

fromUnicodeStringInternetExploder :: String -> T
fromUnicodeStringInternetExploder =
   map HtmlChar.asciiFromUnicodeInternetExploder


toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat =
   flip compose "" .
   map HtmlChar.toUnicodeOrFormat

toUnicodeString :: T -> String
toUnicodeString =
   map (Exc.resolve error . HtmlChar.toUnicode)


{- |
Decode plain characters using the given decoder,
and decode entities by HXT's XML entity table.
Decoding errors for both conversions are embedded where they occur.
-}
toUnicodeStringDecodingEmbedError ::
   Unicode.DecodingFctEmbedErrors -> T -> XmlString.EmbeddedExceptions
toUnicodeStringDecodingEmbedError f =
   concat .
   HtmlChar.switchUnicodeRuns
      (map Exc.fromEither . f)
      ((:[]) . HtmlChar.toUnicode . HtmlChar.fromCharRef)
      ((:[]) . HtmlChar.toUnicode . HtmlChar.fromEntityRef)



{- |
Convert characters to lower case.
This uses ISO latin encoding and may fail for exotic characters.
-}
toLower :: T -> T
toLower = map HtmlChar.toLower

toUpper :: T -> T
toUpper = map HtmlChar.toUpper


toCanonicalUnicodeString :: T -> String
toCanonicalUnicodeString = map Char.toLower . toUnicodeString

equalIgnoreCase :: T -> T -> Bool
equalIgnoreCase x y =
   toCanonicalUnicodeString x == toCanonicalUnicodeString y

elemIgnoreCase :: T -> [T] -> Bool
elemIgnoreCase x ys =
   elem (toCanonicalUnicodeString x) (map toCanonicalUnicodeString ys)