-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Unicode en-/decoding functions for utf8, iso-latin-* and other encodings -- -- Unicode encoding and decoding functions for utf8, iso-latin-* and -- somes other encodings, used in the Haskell XML Toolbox. ISO Latin 1 - -- 16, utf8, utf16, ASCII are supported. Decoding is done with lasy -- functions, errors may be detected or ignored. @package hxt-unicode @version 9.0.2 module Data.String.UTF8 encode :: [Char] -> [Word8] decode :: [Word8] -> ([Char], [(Error, Int)]) decodeEmbedErrors :: [Word8] -> [Either (Error, Int) Char] encodeOne :: Char -> [Word8] decodeOne :: [Word8] -> (Either Error Char, Int, [Word8]) data Error instance Show Error instance Eq Error -- | Interface for Data.Char.UTF8 funtions module Data.String.UTF8Decoding -- | calls Data.Char.UTF8.decode for parsing and decoding UTF-8 decodeUtf8 :: String -> (String, [String]) decodeUtf8EmbedErrors :: String -> [Either String Char] decodeUtf8IgnoreErrors :: String -> String -- | Constants for character encodings module Data.String.EncodingNames isoLatin1, unicodeString, utf16le, utf16be, utf16, utf8, ucs2, usAscii, iso8859_16, iso8859_15, iso8859_14, iso8859_13, iso8859_11, iso8859_10, iso8859_9, iso8859_8, iso8859_7, iso8859_6, iso8859_5, iso8859_4, iso8859_3, iso8859_2, iso8859_1 :: String module Data.Char.IsoLatinTables iso_8859_2 :: [(Char, Char)] iso_8859_3 :: [(Char, Char)] iso_8859_4 :: [(Char, Char)] iso_8859_5 :: [(Char, Char)] iso_8859_6 :: [(Char, Char)] iso_8859_7 :: [(Char, Char)] iso_8859_8 :: [(Char, Char)] iso_8859_9 :: [(Char, Char)] iso_8859_10 :: [(Char, Char)] iso_8859_11 :: [(Char, Char)] iso_8859_13 :: [(Char, Char)] iso_8859_14 :: [(Char, Char)] iso_8859_15 :: [(Char, Char)] iso_8859_16 :: [(Char, Char)] -- | Unicode and UTF-8 Conversion Functions module Data.String.Unicode -- | Unicode is represented as the Char type Precondition for this is the -- support of Unicode character range in the compiler (e.g. ghc but not -- hugs) type Unicode = Char -- | the type for Unicode strings type UString = [Unicode] -- | UTF-8 charachters are represented by the Char type type UTF8Char = Char -- | UTF-8 strings are implemented as Haskell strings type UTF8String = String type UStringWithErrors = [Either String Char] -- | Decoding function with a pair containing the result string and a list -- of decoding errors as result type DecodingFct = String -> (UString, [String]) -- | Decoding function where decoding errors are interleaved with decoded -- characters type DecodingFctEmbedErrors = String -> UStringWithErrors -- | UTF-8 to Unicode conversion with deletion of leading byte order mark, -- as described in XML standard F.1 utf8ToUnicode :: DecodingFct utf8ToUnicodeEmbedErrors :: DecodingFctEmbedErrors -- | code conversion from latin1 to Unicode latin1ToUnicode :: String -> UString -- | UCS-2 to UTF-8 conversion with byte order mark analysis ucs2ToUnicode :: String -> UString -- | UCS-2 big endian to Unicode conversion ucs2BigEndianToUnicode :: String -> UString -- | UCS-2 little endian to Unicode conversion ucs2LittleEndianToUnicode :: String -> UString -- | UTF-16 big endian to UTF-8 conversion with removal of byte order mark utf16beToUnicode :: String -> UString -- | UTF-16 little endian to UTF-8 conversion with removal of byte order -- mark utf16leToUnicode :: String -> UString -- | conversion from Unicode (Char) to a UTF8 encoded string. unicodeCharToUtf8 :: Unicode -> UTF8String -- | conversion from Unicode strings (UString) to UTF8 encoded strings. unicodeToUtf8 :: UString -> UTF8String -- | substitute all Unicode characters, that are not legal 1-byte UTF-8 XML -- characters by a character reference. -- -- This function can be used to translate all text nodes and attribute -- values into pure ascii. -- -- see also : unicodeToLatin1 unicodeToXmlEntity :: UString -> String -- | substitute all Unicode characters, that are not legal latin1 UTF-8 XML -- characters by a character reference. -- -- This function can be used to translate all text nodes and attribute -- values into ISO latin1. -- -- see also : unicodeToXmlEntity unicodeToLatin1 :: UString -> String -- | removes all non ascii chars, may be used to transform a document into -- a pure ascii representation by removing all non ascii chars from tag -- and attibute names -- -- see also : unicodeRemoveNoneLatin1, unicodeToXmlEntity unicodeRemoveNoneAscii :: UString -> String -- | removes all non latin1 chars, may be used to transform a document into -- a pure ascii representation by removing all non ascii chars from tag -- and attibute names -- -- see also : unicodeRemoveNoneAscii, unicodeToLatin1 unicodeRemoveNoneLatin1 :: UString -> String -- | convert an Unicode into a XML character reference. -- -- see also : intToCharRefHex intToCharRef :: Int -> String -- | convert an Unicode into a XML hexadecimal character reference. -- -- see also: intToCharRef intToCharRefHex :: Int -> String intToHexString :: Int -> String -- | the lookup function for selecting the decoding function getDecodingFct :: String -> Maybe DecodingFct -- | the lookup function for selecting the decoding function getDecodingFctEmbedErrors :: String -> Maybe DecodingFctEmbedErrors -- | the lookup function for selecting the encoding function getOutputEncodingFct :: String -> Maybe (String -> UString) -- | White Space (XML Standard 2.3) and end of line handling (2.11) -- -- #x0D and #x0D#x0A are mapped to #x0A normalizeNL :: String -> String guessEncoding :: String -> String -- | the lookup function for selecting the encoding function getOutputEncodingFct' :: String -> Maybe (Char -> StringFct) -- | conversion from Unicode (Char) to a UTF8 encoded string. unicodeCharToUtf8' :: Char -> StringFct -- | substitute all Unicode characters, that are not legal 1-byte UTF-8 XML -- characters by a character reference. unicodeCharToXmlEntity' :: Char -> StringFct -- | substitute all Unicode characters, that are not legal latin1 UTF-8 XML -- characters by a character reference. unicodeCharToLatin1' :: Char -> StringFct