module Text.XML.WraXML.String where
import qualified Text.XML.HXT.DOM.Unicode as Unicode
import qualified Text.XML.Basic.Character as XmlChar
import Control.Applicative (Applicative, liftA, )
import qualified Numeric
import qualified Data.Char as Char
import Text.XML.WraXML.Utility (compose, )
import Data.Tuple.HT (mapFst, )
import qualified Control.Monad.Exception.Synchronous as Exc
import Control.Monad.Trans.State (State, evalState, )
type T = [Atom]
type EmbeddedExceptions = [Exc.Exceptional String Char]
type Atom = XmlChar.T
type Encoding = String
type Encoded = String
fromString :: String -> T
fromString = map XmlChar.fromUnicode
diffFromString :: String -> T -> T
diffFromString =
flip (foldr (\c -> (XmlChar.fromUnicode c :)))
fromUnicodeString :: String -> T
fromUnicodeString = map XmlChar.asciiFromUnicode
toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat =
flip compose "" .
map XmlChar.toUnicodeOrFormat
toUnicodeStringEmbedMessage :: T -> String
toUnicodeStringEmbedMessage =
flip compose "" .
map
(Exc.switch (\err ->
showString "(decoding error: " . showString err . showString ")")
(:)) .
toUnicodeStringEmbedException
toUnicodeString :: T -> String
toUnicodeString =
map (Exc.resolve error) . toUnicodeStringEmbedException
toUnicodeStringEmbedException :: T -> EmbeddedExceptions
toUnicodeStringEmbedException =
map XmlChar.toUnicode
{-# DEPRECATED utf8ToUnicodeString, isoLatin1ToUnicodeString, replaceUTF8ByUnicode, uStringWithErrorsMergePlainChars "XmlChar.Unicode constructors must contain unicode characters and not encoded ones. Decode characters before parsing!" #-}
isoLatin1ToUnicodeString :: T -> String
isoLatin1ToUnicodeString = toUnicodeString
utf8ToUnicodeString :: T -> String
utf8ToUnicodeString = toUnicodeString . replaceUTF8ByUnicode
readHex :: (Eq a, Num a) => String -> a
readHex str =
case Numeric.readHex str of
[(n,"")] -> n
_ -> error "readHex: no parse"
parse :: String -> T
parse ('&':'#':'x':xs) =
parseAux Char.isHexDigit (XmlChar.fromCharRef . readHex) "&#x" xs
parse ('&':'#':xs) =
parseAux Char.isDigit (XmlChar.fromCharRef . read) "&#" xs
parse ('&':xs) =
parseAux Char.isAlphaNum XmlChar.fromEntityRef "&" xs
parse (x:xs) = XmlChar.fromUnicode x : parse xs
parse [] = []
parseAux ::
(Char -> Bool) ->
(String -> XmlChar.T) ->
String ->
String ->
T
parseAux check ref prefix xs =
let (name,rest0) = span check xs
in case rest0 of
';':rest1 -> ref name : parse rest1
_ -> map XmlChar.fromUnicode (prefix++name) ++ parse rest0
reduceRefs :: T -> T
reduceRefs = map XmlChar.reduceRef
replaceUTF8ByUnicode :: T -> T
replaceUTF8ByUnicode =
mapUnicodeRuns (fst . Unicode.utf8ToUnicode)
mapUnicodeRuns :: (String -> String) -> T -> T
mapUnicodeRuns f =
flip compose [] .
XmlChar.switchUnicodeRuns
(diffFromString . f)
((:) . XmlChar.fromCharRef)
((:) . XmlChar.fromEntityRef)
uStringWithErrorsMergePlainChars ::
EmbeddedExceptions -> [Exc.Exceptional String String]
uStringWithErrorsMergePlainChars =
foldr (\x ys ->
case x of
Exc.Exception err -> Exc.Exception err : ys
Exc.Success c ->
uncurry (:) $
mapFst Exc.Success $
mapFst (c:) $
case ys of
Exc.Success cs : ys0 -> (cs, ys0)
_ -> ([], ys))
[]
evalDecodeAdaptive ::
State (Encoded -> String) a -> a
evalDecodeAdaptive =
flip evalState id
liftFromUnicode :: (String -> String) -> (T -> T)
liftFromUnicode f =
fromUnicodeString . f . toUnicodeString
liftFromUnicodeA :: Applicative m => (String -> m String) -> (T -> m T)
liftFromUnicodeA f =
liftA fromUnicodeString . f . toUnicodeString