-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Support for reading and writing UTF8 Strings -- -- A UTF8 layer for IO and Strings. The utf8-string package provides -- operations for encoding UTF8 strings to Word8 lists and back, and for -- reading and writing UTF8 without truncation. @package utf8-string @version 0.3.7 -- | Support for encoding UTF8 Strings to and from [Word8] module Codec.Binary.UTF8.String -- | Encode a Haskell String to a list of Word8 values, in UTF8 format. encode :: String -> [Word8] -- | Decode a UTF8 string packed into a list of Word8 values, directly to -- String decode :: [Word8] -> String -- | Encode a string using encode and store the result in a -- String. encodeString :: String -> String -- | Decode a string using decode using a String as input. | -- This is not safe but it is necessary if UTF-8 encoded text | has been -- loaded into a String prior to being decoded. decodeString :: String -> String -- | Encode a single Haskell Char to a list of Word8 values, in UTF8 -- format. encodeChar :: Char -> [Word8] -- | isUTF8Encoded str tries to recognize input string as being in -- UTF-8 form. isUTF8Encoded :: String -> Bool -- | utf8Encode str is a convenience function; checks to see if -- str isn't UTF-8 encoded before doing so. Sometimes useful, -- but you are better off keeping track of the encoding so as to avoid -- the cost of checking. utf8Encode :: String -> String module Codec.Binary.UTF8.Generic class (Num s, Ord s) => UTF8Bytes b s | b -> s bsplit :: UTF8Bytes b s => s -> b -> (b, b) bdrop :: UTF8Bytes b s => s -> b -> b buncons :: UTF8Bytes b s => b -> Maybe (Word8, b) elemIndex :: UTF8Bytes b s => Word8 -> b -> Maybe s empty :: UTF8Bytes b s => b null :: UTF8Bytes b s => b -> Bool pack :: UTF8Bytes b s => [Word8] -> b tail :: UTF8Bytes b s => b -> b -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: UTF8Bytes b s => b -> Maybe (Char, s) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: UTF8Bytes b s => b -> Maybe (Char, b) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: UTF8Bytes b s => s -> b -> (b, b) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: UTF8Bytes b s => s -> b -> b -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: UTF8Bytes b s => s -> b -> b -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: UTF8Bytes b s => String -> b -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\xFFFD'. toString :: UTF8Bytes b s => b -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a -- | Traverse a bytestring (right biased). foldr :: UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: UTF8Bytes b s => b -> s -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: UTF8Bytes b s => b -> [b] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: UTF8Bytes b s => b -> [b] instance UTF8Bytes [Word8] Int instance UTF8Bytes ByteString Int64 instance UTF8Bytes ByteString Int module Data.String.UTF8 -- | The type of strings that are represented using the UTF8 encoding. The -- parameter is the type of the container for the representation. data UTF8 string class (Num s, Ord s) => UTF8Bytes b s | b -> s -- | Converts a Haskell string into a UTF8 encoded string. Complexity: -- linear. fromString :: UTF8Bytes string index => String -> UTF8 string -- | Convert a UTF8 encoded string into a Haskell string. Invalid -- characters are replaced by replacement_char. Complexity: -- linear. toString :: UTF8Bytes string index => UTF8 string -> String fromRep :: string -> UTF8 string toRep :: UTF8 string -> string -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Invalid characters -- are replaced by replacement_char. uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. See also bytesSplitAt. splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- replacement_char to the predicate. break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a -- | Traverse a bytestring (right biased). foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. The function is linear in the -- number of bytes in the representation. length :: UTF8Bytes string index => UTF8 string -> index -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string] -- | Checks if there are no more bytes in the underlying representation. null :: UTF8Bytes string index => UTF8 string -> Bool -- | Extract the first character for the underlying representation, if one -- is available. It also returns the number of bytes used in the -- representation of the character. See also uncons, -- dropBytes. decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index) -- | Split after a given number of bytes in the underlying representation. -- See also splitAt. byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) -- | Take only the given number of bytes from the underlying -- representation. See also take. byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | Drop the given number of bytes from the underlying representation. See -- also drop. byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string instance Eq string => Eq (UTF8 string) instance Ord string => Ord (UTF8 string) instance UTF8Bytes string index => Show (UTF8 string) -- | String IO preserving UTF8 encoding. module System.IO.UTF8 -- | The print function outputs a value of any printable type to the -- standard output device. This function differs from the System.IO.print -- in that it preserves any UTF8 encoding of the shown value. print :: Show a => a -> IO () -- | Write a UTF8 string to the standard output device putStr :: String -> IO () -- | The same as putStr, but adds a newline character. putStrLn :: String -> IO () -- | Read a UTF8 line from the standard input device getLine :: IO String -- | The readLn function combines getLine and -- readIO, preserving UTF8 readLn :: Read a => IO a openBinaryFile :: FilePath -> IOMode -> IO Handle withBinaryFile :: FilePath -> IOMode -> (Handle -> IO a) -> IO a -- | The readFile function reads a file and returns the contents of -- the file as a UTF8 string. The file is read lazily, on demand, as with -- getContents. readFile :: FilePath -> IO String -- | The computation writeFile file str function writes the -- UTF8 string str, to the file file. writeFile :: FilePath -> String -> IO () -- | The computation appendFile file str function appends -- the UTF8 string str, to the file file. appendFile :: FilePath -> String -> IO () interact :: (String -> String) -> IO () -- | Lazily read stdin as a UTF8 string. getContents :: IO String -- | Read a UTF8 line from a Handle hGetLine :: Handle -> IO String -- | Lazily read a UTF8 string from a Handle hGetContents :: Handle -> IO String -- | Write a UTF8 string to a Handle. hPutStr :: Handle -> String -> IO () -- | Write a UTF8 string to a Handle, appending a newline. hPutStrLn :: Handle -> String -> IO () -- | Support for UTF-8 based environment manipulation module System.Environment.UTF8 getArgs :: IO [String] getProgName :: IO String getEnv :: String -> IO String withArgs :: [String] -> IO a -> IO a withProgName :: String -> IO a -> IO a getEnvironment :: IO [(String, String)] module Data.ByteString.UTF8 -- | A space-efficient representation of a Word8 vector, supporting many -- efficient operations. A ByteString contains 8-bit characters -- only. -- -- Instances of Eq, Ord, Read, Show, Data, Typeable data ByteString :: * -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: ByteString -> Maybe (Char, Int) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: ByteString -> Maybe (Char, ByteString) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: Int -> ByteString -> (ByteString, ByteString) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: Int -> ByteString -> ByteString -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: Int -> ByteString -> ByteString -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> ByteString -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\xFFFD'. toString :: ByteString -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: (a -> Char -> a) -> a -> ByteString -> a -- | Traverse a bytestring (right biased). foldr :: (Char -> a -> a) -> a -> ByteString -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: ByteString -> Int -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: ByteString -> [ByteString] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: ByteString -> [ByteString] module Data.ByteString.Lazy.UTF8 -- | A space-efficient representation of a Word8 vector, supporting many -- efficient operations. A ByteString contains 8-bit characters -- only. -- -- Instances of Eq, Ord, Read, Show, Data, Typeable data ByteString :: * -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: ByteString -> Maybe (Char, Int64) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: ByteString -> Maybe (Char, ByteString) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: Int64 -> ByteString -> (ByteString, ByteString) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: Int64 -> ByteString -> ByteString -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: Int64 -> ByteString -> ByteString -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> ByteString -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\xFFFD'. toString :: ByteString -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: (a -> Char -> a) -> a -> ByteString -> a -- | Traverse a bytestring (right biased). foldr :: (Char -> a -> a) -> a -> ByteString -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: ByteString -> Int -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: ByteString -> [ByteString] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: ByteString -> [ByteString]