module Text.XML.WraXML.String where

import qualified Text.XML.HXT.DOM.Unicode as Unicode

import qualified Text.XML.Basic.Character as XMLChar

import Control.Applicative (Applicative, liftA, )
import qualified Numeric
import qualified Data.Char as Char
import Text.XML.WraXML.Utility (compose, )
import Data.Tuple.HT (mapFst, )

import qualified Control.Monad.Exception.Synchronous as Exc


type T = [Atom]
type EmbeddedExceptions = [Exc.Exceptional String Char]

type Atom = XMLChar.T


{- |
Literal translation from pure strings.
This can only work, if the string does not contain special characters.
-}
fromString :: String -> T
fromString = map XMLChar.fromUnicode

diffFromString :: String -> T -> T
diffFromString =
   flip (foldr (\c -> (XMLChar.fromUnicode c :)))

{- |
default routine
-}
fromUnicodeString :: String -> T
fromUnicodeString = map XMLChar.asciiFromUnicode


toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat =
   flip compose "" .
   map XMLChar.toUnicodeOrFormat

toUnicodeStringEmbedMessage :: T -> String
toUnicodeStringEmbedMessage =
   flip compose "" .
   map
      (Exc.switch (\err ->
         showString "(decoding error: " . showString err . showString ")")
         (:)) .
   toUnicodeStringEmbedException

{- |
Errors in on resolution of references yield undefined elements.
-}
toUnicodeString :: T -> String
toUnicodeString =
   map (Exc.resolve error) . toUnicodeStringEmbedException

toUnicodeStringEmbedException :: T -> EmbeddedExceptions
toUnicodeStringEmbedException =
   map XMLChar.toUnicode


{-# DEPRECATED utf8ToUnicodeString, isoLatin1ToUnicodeString, replaceUTF8ByUnicode, uStringWithErrorsMergePlainChars "XMLChar.Unicode constructors must contain unicode characters and not encoded ones. Decode characters before parsing!" #-}
{- |
Interpret the XML string as mixture of ISO-Latin-1 characters and XML entities
and convert that to a Unicode string.
-}
isoLatin1ToUnicodeString :: T -> String
isoLatin1ToUnicodeString = toUnicodeString

{- |
Interpret the XML string as mixture of UTF-8 characters and XML entities
and convert that to a Unicode string.
-}
utf8ToUnicodeString :: T -> String
utf8ToUnicodeString = toUnicodeString . replaceUTF8ByUnicode


readHex :: (Num a) => String -> a
readHex str =
   case Numeric.readHex str of
      [(n,"")] -> n
      _ -> error "readHex: no parse"

{- |
Caution: There is a memory leak for the case that entity references are huge.
-}
parse :: String -> T
parse ('&':'#':'x':xs) =
   parseAux Char.isHexDigit (XMLChar.fromCharRef . readHex) "&#x" xs
parse ('&':'#':xs) =
   parseAux Char.isDigit (XMLChar.fromCharRef . read) "&#" xs
parse ('&':xs) =
   parseAux Char.isAlphaNum XMLChar.fromEntityRef "&" xs
parse (x:xs) = XMLChar.fromUnicode x : parse xs
parse [] = []
-- use unfoldr?

parseAux ::
   (Char -> Bool)         ->
   (String -> XMLChar.T)  ->
   String                 ->
   String                 ->
   T
parseAux check ref prefix xs =
   let (name,rest0) = span check xs
   in  case rest0 of
          ';':rest1 -> ref name : parse rest1
          _ -> map XMLChar.fromUnicode (prefix++name) ++ parse rest0


reduceRefs :: T -> T
reduceRefs = map XMLChar.reduceRef

{- |
Consider the XmlString as a mixture of XML entities and UTF-8 characters.
Replace UTF-8 characters by Unicode representations.
-}
replaceUTF8ByUnicode :: T -> T
replaceUTF8ByUnicode =
   mapUnicodeRuns (fst . Unicode.utf8ToUnicode)

mapUnicodeRuns :: (String -> String) -> T -> T
mapUnicodeRuns f =
   flip compose [] .
   XMLChar.switchUnicodeRuns
--      (\s -> fromString (f s) ++)
      (diffFromString . f)
      ((:) . XMLChar.fromCharRef)
      ((:) . XMLChar.fromEntityRef)



uStringWithErrorsMergePlainChars ::
   EmbeddedExceptions -> [Exc.Exceptional String String]
uStringWithErrorsMergePlainChars =
   foldr (\x ys ->
      case x of
         Exc.Exception err -> Exc.Exception err : ys
         Exc.Success c  ->
            uncurry (:) $
            mapFst Exc.Success $
            mapFst (c:) $
               case ys of
                  Exc.Success cs : ys0 -> (cs, ys0)
                  _ -> ([], ys))
      []



liftFromUnicode :: (String -> String) -> (T -> T)
liftFromUnicode f =
   fromUnicodeString . f . toUnicodeString

liftFromUnicodeA :: Applicative m => (String -> m String) -> (T -> m T)
liftFromUnicodeA f =
   liftA fromUnicodeString . f . toUnicodeString