module Text.XML.WraXML.String where
import qualified Text.XML.HXT.DOM.Unicode as Unicode
import qualified Text.XML.Basic.Character as XMLChar
import Control.Applicative (Applicative, liftA, )
import qualified Numeric
import qualified Data.Char as Char
import Text.XML.WraXML.Utility (compose, )
import Data.Tuple.HT (mapFst, )
import qualified Control.Monad.Exception.Synchronous as Exc
type T = [Atom]
type EmbeddedExceptions = [Exc.Exceptional String Char]
type Atom = XMLChar.T
fromString :: String -> T
fromString = map XMLChar.fromUnicode
diffFromString :: String -> T -> T
diffFromString =
flip (foldr (\c -> (XMLChar.fromUnicode c :)))
fromUnicodeString :: String -> T
fromUnicodeString = map XMLChar.asciiFromUnicode
toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat =
flip compose "" .
map XMLChar.toUnicodeOrFormat
toUnicodeStringEmbedMessage :: T -> String
toUnicodeStringEmbedMessage =
flip compose "" .
map
(Exc.switch (\err ->
showString "(decoding error: " . showString err . showString ")")
(:)) .
toUnicodeStringEmbedException
toUnicodeString :: T -> String
toUnicodeString =
map (Exc.resolve error) . toUnicodeStringEmbedException
toUnicodeStringEmbedException :: T -> EmbeddedExceptions
toUnicodeStringEmbedException =
map XMLChar.toUnicode
isoLatin1ToUnicodeString :: T -> String
isoLatin1ToUnicodeString = toUnicodeString
utf8ToUnicodeString :: T -> String
utf8ToUnicodeString = toUnicodeString . replaceUTF8ByUnicode
readHex :: (Num a) => String -> a
readHex str =
case Numeric.readHex str of
[(n,"")] -> n
_ -> error "readHex: no parse"
parse :: String -> T
parse ('&':'#':'x':xs) =
parseAux Char.isHexDigit (XMLChar.fromCharRef . readHex) "&#x" xs
parse ('&':'#':xs) =
parseAux Char.isDigit (XMLChar.fromCharRef . read) "&#" xs
parse ('&':xs) =
parseAux Char.isAlphaNum XMLChar.fromEntityRef "&" xs
parse (x:xs) = XMLChar.fromUnicode x : parse xs
parse [] = []
parseAux ::
(Char -> Bool) ->
(String -> XMLChar.T) ->
String ->
String ->
T
parseAux check ref prefix xs =
let (name,rest0) = span check xs
in case rest0 of
';':rest1 -> ref name : parse rest1
_ -> map XMLChar.fromUnicode (prefix++name) ++ parse rest0
reduceRefs :: T -> T
reduceRefs = map XMLChar.reduceRef
replaceUTF8ByUnicode :: T -> T
replaceUTF8ByUnicode =
mapUnicodeRuns (fst . Unicode.utf8ToUnicode)
mapUnicodeRuns :: (String -> String) -> T -> T
mapUnicodeRuns f =
flip compose [] .
XMLChar.switchUnicodeRuns
(diffFromString . f)
((:) . XMLChar.fromCharRef)
((:) . XMLChar.fromEntityRef)
uStringWithErrorsMergePlainChars ::
EmbeddedExceptions -> [Exc.Exceptional String String]
uStringWithErrorsMergePlainChars =
foldr (\x ys ->
case x of
Exc.Exception err -> Exc.Exception err : ys
Exc.Success c ->
uncurry (:) $
mapFst Exc.Success $
mapFst (c:) $
case ys of
Exc.Success cs : ys0 -> (cs, ys0)
_ -> ([], ys))
[]
liftFromUnicode :: (String -> String) -> (T -> T)
liftFromUnicode f =
fromUnicodeString . f . toUnicodeString
liftFromUnicodeA :: Applicative m => (String -> m String) -> (T -> m T)
liftFromUnicodeA f =
liftA fromUnicodeString . f . toUnicodeString