Text.XML.HXT.DOM.Unicode

hxt-7.1: A collection of tools for processing XML with Haskell.

Contents

Index

Contents

Unicode Type declarations
Unicode and UTF-8 predicates
UTF-8 and Unicode conversion functions

Description

Unicode (UCS-2) and UTF-8 Conversion Funtions

Synopsis

type Unicode = Char

type UString = [Unicode]

type UTF8Char = Char

type UTF8String = String

isLeadingMultiByteChar :: Char -> Bool

isFollowingMultiByteChar :: Char -> Bool

isMultiByteChar :: Char -> Bool

isNByteChar :: Unicode -> (Int, Int, Int)

is1ByteXmlChar :: Unicode -> Bool

isMultiByteXmlChar :: Unicode -> Bool

isXmlChar :: Unicode -> Bool

isXmlLatin1Char :: Unicode -> Bool

isXmlSpaceChar :: Unicode -> Bool

isXml11SpaceChar :: Unicode -> Bool

isXmlNameChar :: Unicode -> Bool

isXmlNameStartChar :: Unicode -> Bool

isXmlNCNameChar :: Unicode -> Bool

isXmlNCNameStartChar :: Unicode -> Bool

isXmlPubidChar :: Unicode -> Bool

isXmlLetter :: Unicode -> Bool

isXmlBaseChar :: Unicode -> Bool

isXmlIdeographicChar :: Unicode -> Bool

isXmlCombiningChar :: Unicode -> Bool

isXmlDigit :: Unicode -> Bool

isXmlExtender :: Unicode -> Bool

isXmlControlOrPermanentlyUndefined :: Unicode -> Bool

utf8ToUnicodeChar :: UTF8String -> Unicode

utf8ToUnicode :: UTF8String -> UString

utf8WithByteMarkToUnicode :: UTF8String -> UString

latin1ToUnicode :: String -> UString

ucs2ToUnicode :: String -> UString

ucs2BigEndianToUnicode :: String -> UString

ucs2LittleEndianToUnicode :: String -> UString

utf16beToUnicode :: String -> UString

utf16leToUnicode :: String -> UString

unicodeCharToUtf8 :: Unicode -> UTF8String

unicodeToUtf8 :: UString -> UTF8String

unicodeToXmlEntity :: UString -> String

unicodeToLatin1 :: UString -> String

unicodeRemoveNoneAscii :: UString -> String

unicodeRemoveNoneLatin1 :: UString -> String

intToCharRef :: Int -> String

intToCharRefHex :: Int -> String

getEncodingFct :: String -> Maybe (UString -> String)

getOutputEncodingFct :: String -> Maybe (String -> UString)

normalizeNL :: String -> String

guessEncoding :: String -> String

Unicode Type declarations

type Unicode = Char

Unicode is represented as the Char type Precondition for this is the support of Unicode character range in the compiler (e.g. ghc but not hugs)

type UString = [Unicode]

the type for Unicode strings

type UTF8Char = Char

UTF-8 charachters are represented by the Char type

type UTF8String = String

UTF-8 strings are implemented as Haskell strings

Unicode and UTF-8 predicates

isLeadingMultiByteChar :: Char -> Bool

test for leading multibyte UTF-8 character

isFollowingMultiByteChar :: Char -> Bool

test for following multibyte UTF-8 character

isMultiByteChar :: Char -> Bool

test for following multibyte UTF-8 character

isNByteChar :: Unicode -> (Int, Int, Int)

compute the number of following bytes and the mask bits of a leading UTF-8 multibyte char

is1ByteXmlChar :: Unicode -> Bool

test for a legal 1 byte XML char

isMultiByteXmlChar :: Unicode -> Bool

test for a legal multi byte XML char

isXmlChar :: Unicode -> Bool

checking for valid XML characters

isXmlLatin1Char :: Unicode -> Bool

test for a legal latin1 XML char

isXmlSpaceChar :: Unicode -> Bool

checking for XML space character: \n, \r, \t and " "

isXml11SpaceChar :: Unicode -> Bool

checking for XML1.1 space character: additional space 0x85 and 0x2028