Text.XML.HXT.DOM.Unicode

hxt-7.3: A collection of tools for processing XML with Haskell.

Portability	portable
Stability	experimental
Maintainer	Uwe Schmidt (uwe@fh-wedel.de)

Contents

Unicode Type declarations
XML char predicates
UTF-8 and Unicode conversion functions

Description

Version : $Id$

Unicode and UTF-8 Conversion Functions

Synopsis

type Unicode = Char

type UString = [Unicode]

type UTF8Char = Char

type UTF8String = String

type UStringWithErrors = [Either String Char]

type DecodingFct = String -> (UString, [String])

type DecodingFctEmbedErrors = String -> UStringWithErrors

isXmlChar :: Unicode -> Bool

isXmlLatin1Char :: Unicode -> Bool

isXmlSpaceChar :: Unicode -> Bool

isXml11SpaceChar :: Unicode -> Bool

isXmlNameChar :: Unicode -> Bool

isXmlNameStartChar :: Unicode -> Bool

isXmlNCNameChar :: Unicode -> Bool

isXmlNCNameStartChar :: Unicode -> Bool

isXmlPubidChar :: Unicode -> Bool

isXmlLetter :: Unicode -> Bool

isXmlBaseChar :: Unicode -> Bool

isXmlIdeographicChar :: Unicode -> Bool

isXmlCombiningChar :: Unicode -> Bool

isXmlDigit :: Unicode -> Bool

isXmlExtender :: Unicode -> Bool

isXmlControlOrPermanentlyUndefined :: Unicode -> Bool

utf8ToUnicode :: DecodingFct

utf8ToUnicodeEmbedErrors :: DecodingFctEmbedErrors

latin1ToUnicode :: String -> UString

ucs2ToUnicode :: String -> UString

ucs2BigEndianToUnicode :: String -> UString

ucs2LittleEndianToUnicode :: String -> UString

utf16beToUnicode :: String -> UString

utf16leToUnicode :: String -> UString

unicodeCharToUtf8 :: Unicode -> UTF8String

unicodeToUtf8 :: UString -> UTF8String

unicodeToXmlEntity :: UString -> String

unicodeToLatin1 :: UString -> String

unicodeRemoveNoneAscii :: UString -> String

unicodeRemoveNoneLatin1 :: UString -> String

intToCharRef :: Int -> String

intToCharRefHex :: Int -> String

getDecodingFct :: String -> Maybe DecodingFct

getDecodingFctEmbedErrors :: String -> Maybe DecodingFctEmbedErrors

getOutputEncodingFct :: String -> Maybe (String -> UString)

normalizeNL :: String -> String

guessEncoding :: String -> String

Unicode Type declarations

type Unicode = Char

Unicode is represented as the Char type Precondition for this is the support of Unicode character range in the compiler (e.g. ghc but not hugs)

type UString = [Unicode]

the type for Unicode strings

type UTF8Char = Char

UTF-8 charachters are represented by the Char type

type UTF8String = String

UTF-8 strings are implemented as Haskell strings

type UStringWithErrors = [Either String Char]

type DecodingFct = String -> (UString, [String])

Decoding function with a pair containing the result string and a list of decoding errors as result

type DecodingFctEmbedErrors = String -> UStringWithErrors

Decoding function where decoding errors are interleaved with decoded characters

XML char predicates

isXmlChar :: Unicode -> Bool

checking for valid XML characters

isXmlLatin1Char :: Unicode -> Bool

test for a legal latin1 XML char

isXmlSpaceChar :: Unicode -> Bool

checking for XML space character: \n, \r, \t and " "

isXml11SpaceChar :: Unicode -> Bool

checking for XML1.1 space character: additional space 0x85 and 0x2028