-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Unicode en-/decoding functions for utf8, iso-latin-* and other encodings
--
-- Unicode encoding and decoding functions for utf8, iso-latin-* and
-- somes other encodings, used in the Haskell XML Toolbox. ISO Latin 1 -
-- 16, utf8, utf16, ASCII are supported. Decoding is done with lasy
-- functions, errors may be detected or ignored.
@package hxt-unicode
@version 9.0.0
module Data.String.UTF8
encode :: [Char] -> [Word8]
decode :: [Word8] -> ([Char], [(Error, Int)])
decodeEmbedErrors :: [Word8] -> [Either (Error, Int) Char]
encodeOne :: Char -> [Word8]
decodeOne :: [Word8] -> (Either Error Char, Int, [Word8])
data Error
instance Show Error
instance Eq Error
-- | Interface for Data.Char.UTF8 funtions
module Data.String.UTF8Decoding
-- | calls Data.Char.UTF8.decode for parsing and decoding UTF-8
decodeUtf8 :: String -> (String, [String])
decodeUtf8EmbedErrors :: String -> [Either String Char]
decodeUtf8IgnoreErrors :: String -> String
-- | Constants for character encodings
module Data.String.EncodingNames
iso8859_1 :: String
iso8859_2 :: String
iso8859_3 :: String
iso8859_4 :: String
iso8859_5 :: String
iso8859_6 :: String
iso8859_7 :: String
iso8859_8 :: String
iso8859_9 :: String
iso8859_10 :: String
iso8859_11 :: String
iso8859_13 :: String
iso8859_14 :: String
iso8859_15 :: String
iso8859_16 :: String
usAscii :: String
ucs2 :: String
utf8 :: String
utf16 :: String
utf16be :: String
utf16le :: String
unicodeString :: String
isoLatin1 :: String
module Data.Char.IsoLatinTables
iso_8859_2 :: [(Char, Char)]
iso_8859_3 :: [(Char, Char)]
iso_8859_4 :: [(Char, Char)]
iso_8859_5 :: [(Char, Char)]
iso_8859_6 :: [(Char, Char)]
iso_8859_7 :: [(Char, Char)]
iso_8859_8 :: [(Char, Char)]
iso_8859_9 :: [(Char, Char)]
iso_8859_10 :: [(Char, Char)]
iso_8859_11 :: [(Char, Char)]
iso_8859_13 :: [(Char, Char)]
iso_8859_14 :: [(Char, Char)]
iso_8859_15 :: [(Char, Char)]
iso_8859_16 :: [(Char, Char)]
-- | Unicode and UTF-8 Conversion Functions
module Data.String.Unicode
-- | Unicode is represented as the Char type Precondition for this is the
-- support of Unicode character range in the compiler (e.g. ghc but not
-- hugs)
type Unicode = Char
-- | the type for Unicode strings
type UString = [Unicode]
-- | UTF-8 charachters are represented by the Char type
type UTF8Char = Char
-- | UTF-8 strings are implemented as Haskell strings
type UTF8String = String
type UStringWithErrors = [Either String Char]
-- | Decoding function with a pair containing the result string and a list
-- of decoding errors as result
type DecodingFct = String -> (UString, [String])
-- | Decoding function where decoding errors are interleaved with decoded
-- characters
type DecodingFctEmbedErrors = String -> UStringWithErrors
-- | UTF-8 to Unicode conversion with deletion of leading byte order mark,
-- as described in XML standard F.1
utf8ToUnicode :: DecodingFct
utf8ToUnicodeEmbedErrors :: DecodingFctEmbedErrors
-- | code conversion from latin1 to Unicode
latin1ToUnicode :: String -> UString
-- | UCS-2 to UTF-8 conversion with byte order mark analysis
ucs2ToUnicode :: String -> UString
-- | UCS-2 big endian to Unicode conversion
ucs2BigEndianToUnicode :: String -> UString
-- | UCS-2 little endian to Unicode conversion
ucs2LittleEndianToUnicode :: String -> UString
-- | UTF-16 big endian to UTF-8 conversion with removal of byte order mark
utf16beToUnicode :: String -> UString
-- | UTF-16 little endian to UTF-8 conversion with removal of byte order
-- mark
utf16leToUnicode :: String -> UString
-- | conversion from Unicode (Char) to a UTF8 encoded string.
unicodeCharToUtf8 :: Unicode -> UTF8String
-- | conversion from Unicode strings (UString) to UTF8 encoded strings.
unicodeToUtf8 :: UString -> UTF8String
-- | substitute all Unicode characters, that are not legal 1-byte UTF-8 XML
-- characters by a character reference.
--
-- This function can be used to translate all text nodes and attribute
-- values into pure ascii.
--
-- see also : unicodeToLatin1
unicodeToXmlEntity :: UString -> String
-- | substitute all Unicode characters, that are not legal latin1 UTF-8 XML
-- characters by a character reference.
--
-- This function can be used to translate all text nodes and attribute
-- values into ISO latin1.
--
-- see also : unicodeToXmlEntity
unicodeToLatin1 :: UString -> String
-- | removes all non ascii chars, may be used to transform a document into
-- a pure ascii representation by removing all non ascii chars from tag
-- and attibute names
--
-- see also : unicodeRemoveNoneLatin1, unicodeToXmlEntity
unicodeRemoveNoneAscii :: UString -> String
-- | removes all non latin1 chars, may be used to transform a document into
-- a pure ascii representation by removing all non ascii chars from tag
-- and attibute names
--
-- see also : unicodeRemoveNoneAscii, unicodeToLatin1
unicodeRemoveNoneLatin1 :: UString -> String
-- | convert an Unicode into a XML character reference.
--
-- see also : intToCharRefHex
intToCharRef :: Int -> String
-- | convert an Unicode into a XML hexadecimal character reference.
--
-- see also: intToCharRef
intToCharRefHex :: Int -> String
intToHexString :: Int -> String
-- | the lookup function for selecting the decoding function
getDecodingFct :: String -> Maybe DecodingFct
-- | the lookup function for selecting the decoding function
getDecodingFctEmbedErrors :: String -> Maybe DecodingFctEmbedErrors
-- | the lookup function for selecting the encoding function
getOutputEncodingFct :: String -> Maybe (String -> UString)
-- | White Space (XML Standard 2.3) and end of line handling (2.11)
--
-- #x0D and #x0D#x0A are mapped to #x0A
normalizeNL :: String -> String
guessEncoding :: String -> String