module Text.XML.WraXML.String where

import qualified Text.XML.Basic.Character as XmlChar
import Text.XML.WraXML.Utility (compose)

import qualified Data.String.Unicode as Unicode
import qualified Data.Char as Char
import Data.Tuple.HT (mapFst)

import qualified Numeric

import qualified Control.Monad.Exception.Synchronous as Exc
import Control.Monad.Trans.State (State, evalState)
import Control.Applicative (Applicative, liftA)


type T = [Atom]
type EmbeddedExceptions = [Exc.Exceptional String Char]

type Atom = XmlChar.T


-- | should be an abstract type
type Encoding = String
-- | should be [Word8]
type Encoded  = String


{- |
Literal translation from pure strings.
This can only work, if the string does not contain special characters.
-}
fromString :: String -> T
fromString :: String -> T
fromString = forall a b. (a -> b) -> [a] -> [b]
map Char -> T
XmlChar.fromUnicode

diffFromString :: String -> T -> T
diffFromString :: String -> T -> T
diffFromString =
   forall a b c. (a -> b -> c) -> b -> a -> c
flip (forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Char
c -> (Char -> T
XmlChar.fromUnicode Char
c forall a. a -> [a] -> [a]
:)))

{- |
default routine
-}
fromUnicodeString :: String -> T
fromUnicodeString :: String -> T
fromUnicodeString = forall a b. (a -> b) -> [a] -> [b]
map Char -> T
XmlChar.asciiFromUnicode


toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat =
   forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. [a -> a] -> a -> a
compose String
"" forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall a b. (a -> b) -> [a] -> [b]
map T -> ShowS
XmlChar.toUnicodeOrFormat

toUnicodeStringEmbedMessage :: T -> String
toUnicodeStringEmbedMessage :: T -> String
toUnicodeStringEmbedMessage =
   forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. [a -> a] -> a -> a
compose String
"" forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall a b. (a -> b) -> [a] -> [b]
map
      (forall e b a. (e -> b) -> (a -> b) -> Exceptional e a -> b
Exc.switch (\String
err ->
         String -> ShowS
showString String
"(decoding error: " forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
err forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
")")
         (:)) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   T -> EmbeddedExceptions
toUnicodeStringEmbedException

{- |
Errors in on resolution of references yield undefined elements.
-}
toUnicodeString :: T -> String
toUnicodeString :: T -> String
toUnicodeString =
   forall a b. (a -> b) -> [a] -> [b]
map (forall e a. (e -> a) -> Exceptional e a -> a
Exc.resolve forall a. HasCallStack => String -> a
error) forall b c a. (b -> c) -> (a -> b) -> a -> c
. T -> EmbeddedExceptions
toUnicodeStringEmbedException

toUnicodeStringEmbedException :: T -> EmbeddedExceptions
toUnicodeStringEmbedException :: T -> EmbeddedExceptions
toUnicodeStringEmbedException =
   forall a b. (a -> b) -> [a] -> [b]
map T -> Exceptional String Char
XmlChar.toUnicode


{-# DEPRECATED utf8ToUnicodeString, isoLatin1ToUnicodeString, replaceUTF8ByUnicode, uStringWithErrorsMergePlainChars "XmlChar.Unicode constructors must contain unicode characters and not encoded ones. Decode characters before parsing!" #-}
{- |
Interpret the XML string as mixture of ISO-Latin-1 characters and XML entities
and convert that to a Unicode string.
-}
isoLatin1ToUnicodeString :: T -> String
isoLatin1ToUnicodeString :: T -> String
isoLatin1ToUnicodeString = T -> String
toUnicodeString

{- |
Interpret the XML string as mixture of UTF-8 characters and XML entities
and convert that to a Unicode string.
-}
utf8ToUnicodeString :: T -> String
utf8ToUnicodeString :: T -> String
utf8ToUnicodeString = T -> String
toUnicodeString forall b c a. (b -> c) -> (a -> b) -> a -> c
. T -> T
replaceUTF8ByUnicode


readHex :: (Eq a, Num a) => String -> a
readHex :: forall a. (Eq a, Num a) => String -> a
readHex String
str =
   case forall a. (Eq a, Num a) => ReadS a
Numeric.readHex String
str of
      [(a
n,String
"")] -> a
n
      [(a, String)]
_ -> forall a. HasCallStack => String -> a
error String
"readHex: no parse"

{- |
Caution: There is a memory leak for the case that entity references are huge.
-}
parse :: String -> T
parse :: String -> T
parse (Char
'&':Char
'#':Char
'x':String
xs) =
   (Char -> Bool) -> (String -> T) -> String -> String -> T
parseAux Char -> Bool
Char.isHexDigit (Int -> T
XmlChar.fromCharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => String -> a
readHex) String
"&#x" String
xs
parse (Char
'&':Char
'#':String
xs) =
   (Char -> Bool) -> (String -> T) -> String -> String -> T
parseAux Char -> Bool
Char.isDigit (Int -> T
XmlChar.fromCharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Read a => String -> a
read) String
"&#" String
xs
parse (Char
'&':String
xs) =
   (Char -> Bool) -> (String -> T) -> String -> String -> T
parseAux Char -> Bool
Char.isAlphaNum String -> T
XmlChar.fromEntityRef String
"&" String
xs
parse (Char
x:String
xs) = Char -> T
XmlChar.fromUnicode Char
x forall a. a -> [a] -> [a]
: String -> T
parse String
xs
parse [] = []
-- use unfoldr?

parseAux ::
   (Char -> Bool)         ->
   (String -> XmlChar.T)  ->
   String                 ->
   String                 ->
   T
parseAux :: (Char -> Bool) -> (String -> T) -> String -> String -> T
parseAux Char -> Bool
check String -> T
ref String
prefix String
xs =
   let (String
name,String
rest0) = forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
check String
xs
   in  case String
rest0 of
          Char
';':String
rest1 -> String -> T
ref String
name forall a. a -> [a] -> [a]
: String -> T
parse String
rest1
          String
_ -> forall a b. (a -> b) -> [a] -> [b]
map Char -> T
XmlChar.fromUnicode (String
prefixforall a. [a] -> [a] -> [a]
++String
name) forall a. [a] -> [a] -> [a]
++ String -> T
parse String
rest0


reduceRefs :: T -> T
reduceRefs :: T -> T
reduceRefs = forall a b. (a -> b) -> [a] -> [b]
map T -> T
XmlChar.reduceRef

{- |
Consider the XmlString as a mixture of XML entities and UTF-8 characters.
Replace UTF-8 characters by Unicode representations.
-}
replaceUTF8ByUnicode :: T -> T
replaceUTF8ByUnicode :: T -> T
replaceUTF8ByUnicode =
   ShowS -> T -> T
mapUnicodeRuns (forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecodingFct
Unicode.utf8ToUnicode)

mapUnicodeRuns :: (String -> String) -> T -> T
mapUnicodeRuns :: ShowS -> T -> T
mapUnicodeRuns ShowS
f =
   forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. [a -> a] -> a -> a
compose [] forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall a. (String -> a) -> (Int -> a) -> (String -> a) -> T -> [a]
XmlChar.switchUnicodeRuns
--      (\s -> fromString (f s) ++)
      (String -> T -> T
diffFromString forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
f)
      ((:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> T
XmlChar.fromCharRef)
      ((:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> T
XmlChar.fromEntityRef)



uStringWithErrorsMergePlainChars ::
   EmbeddedExceptions -> [Exc.Exceptional String String]
uStringWithErrorsMergePlainChars :: EmbeddedExceptions -> [Exceptional String String]
uStringWithErrorsMergePlainChars =
   forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Exceptional String Char
x [Exceptional String String]
ys ->
      case Exceptional String Char
x of
         Exc.Exception String
err -> forall e a. e -> Exceptional e a
Exc.Exception String
err forall a. a -> [a] -> [a]
: [Exceptional String String]
ys
         Exc.Success Char
c  ->
            forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (:) forall a b. (a -> b) -> a -> b
$
            forall a c b. (a -> c) -> (a, b) -> (c, b)
mapFst forall e a. a -> Exceptional e a
Exc.Success forall a b. (a -> b) -> a -> b
$
            forall a c b. (a -> c) -> (a, b) -> (c, b)
mapFst (Char
cforall a. a -> [a] -> [a]
:) forall a b. (a -> b) -> a -> b
$
               case [Exceptional String String]
ys of
                  Exc.Success String
cs : [Exceptional String String]
ys0 -> (String
cs, [Exceptional String String]
ys0)
                  [Exceptional String String]
_ -> ([], [Exceptional String String]
ys))
      []


evalDecodeAdaptive ::
   State (Encoded -> String) a -> a
evalDecodeAdaptive :: forall a. State ShowS a -> a
evalDecodeAdaptive =
   forall a b c. (a -> b -> c) -> b -> a -> c
flip forall s a. State s a -> s -> a
evalState forall a. a -> a
id


liftFromUnicode :: (String -> String) -> (T -> T)
liftFromUnicode :: ShowS -> T -> T
liftFromUnicode ShowS
f =
   String -> T
fromUnicodeString forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. T -> String
toUnicodeString

liftFromUnicodeA :: Applicative m => (String -> m String) -> (T -> m T)
liftFromUnicodeA :: forall (m :: * -> *).
Applicative m =>
(String -> m String) -> T -> m T
liftFromUnicodeA String -> m String
f =
   forall (f :: * -> *) a b. Applicative f => (a -> b) -> f a -> f b
liftA String -> T
fromUnicodeString forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> m String
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. T -> String
toUnicodeString