-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Support for reading and writing UTF8 Strings -- @package utf8-string @version 1.0.1 -- | Support for encoding UTF8 Strings to and from [Word8] module Codec.Binary.UTF8.String -- | Encode a Haskell String to a list of Word8 values, in UTF8 format. encode :: String -> [Word8] -- | Decode a UTF8 string packed into a list of Word8 values, directly to -- String decode :: [Word8] -> String -- | Encode a string using encode and store the result in a -- String. encodeString :: String -> String -- | Decode a string using decode using a String as input. | -- This is not safe but it is necessary if UTF-8 encoded text | has been -- loaded into a String prior to being decoded. decodeString :: String -> String -- | Encode a single Haskell Char to a list of Word8 values, in UTF8 -- format. encodeChar :: Char -> [Word8] -- | isUTF8Encoded str tries to recognize input string as being in -- UTF-8 form. isUTF8Encoded :: String -> Bool -- | utf8Encode str is a convenience function; checks to see if -- str isn't UTF-8 encoded before doing so. Sometimes useful, -- but you are better off keeping track of the encoding so as to avoid -- the cost of checking. utf8Encode :: String -> String module Codec.Binary.UTF8.Generic class (Num s, Ord s) => UTF8Bytes b s | b -> s bsplit :: UTF8Bytes b s => s -> b -> (b, b) bdrop :: UTF8Bytes b s => s -> b -> b buncons :: UTF8Bytes b s => b -> Maybe (Word8, b) elemIndex :: UTF8Bytes b s => Word8 -> b -> Maybe s empty :: UTF8Bytes b s => b null :: UTF8Bytes b s => b -> Bool pack :: UTF8Bytes b s => [Word8] -> b tail :: UTF8Bytes b s => b -> b -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: UTF8Bytes b s => b -> Maybe (Char, s) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: UTF8Bytes b s => b -> Maybe (Char, b) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: UTF8Bytes b s => s -> b -> (b, b) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: UTF8Bytes b s => s -> b -> b -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: UTF8Bytes b s => s -> b -> b -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: UTF8Bytes b s => String -> b -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\xFFFD'. toString :: UTF8Bytes b s => b -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a -- | Traverse a bytestring (right biased). foldr :: UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: UTF8Bytes b s => b -> s -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: UTF8Bytes b s => b -> [b] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: UTF8Bytes b s => b -> [b] instance UTF8Bytes [Word8] Int instance UTF8Bytes ByteString Int64 instance UTF8Bytes ByteString Int module Data.String.UTF8 -- | The type of strings that are represented using the UTF8 encoding. The -- parameter is the type of the container for the representation. data UTF8 string class (Num s, Ord s) => UTF8Bytes b s | b -> s -- | Converts a Haskell string into a UTF8 encoded string. Complexity: -- linear. fromString :: UTF8Bytes string index => String -> UTF8 string -- | Convert a UTF8 encoded string into a Haskell string. Invalid -- characters are replaced by replacement_char. Complexity: -- linear. toString :: UTF8Bytes string index => UTF8 string -> String fromRep :: string -> UTF8 string toRep :: UTF8 string -> string -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Invalid characters -- are replaced by replacement_char. uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. See also bytesSplitAt. splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- replacement_char to the predicate. break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a -- | Traverse a bytestring (right biased). foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. The function is linear in the -- number of bytes in the representation. length :: UTF8Bytes string index => UTF8 string -> index -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string] -- | Checks if there are no more bytes in the underlying representation. null :: UTF8Bytes string index => UTF8 string -> Bool -- | Extract the first character for the underlying representation, if one -- is available. It also returns the number of bytes used in the -- representation of the character. See also uncons, -- dropBytes. decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index) -- | Split after a given number of bytes in the underlying representation. -- See also splitAt. byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) -- | Take only the given number of bytes from the underlying -- representation. See also take. byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | Drop the given number of bytes from the underlying representation. See -- also drop. byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string instance Eq string => Eq (UTF8 string) instance Ord string => Ord (UTF8 string) instance UTF8Bytes string index => Show (UTF8 string) module Data.ByteString.Lazy.UTF8 -- | A space-efficient representation of a Word8 vector, supporting -- many efficient operations. -- -- A lazy ByteString contains 8-bit bytes, or by using the -- operations from Data.ByteString.Lazy.Char8 it can be -- interpreted as containing 8-bit characters. data ByteString :: * -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: ByteString -> Maybe (Char, Int64) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: ByteString -> Maybe (Char, ByteString) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: Int64 -> ByteString -> (ByteString, ByteString) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: Int64 -> ByteString -> ByteString -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: Int64 -> ByteString -> ByteString -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> ByteString -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\xFFFD'. toString :: ByteString -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: (a -> Char -> a) -> a -> ByteString -> a -- | Traverse a bytestring (right biased). foldr :: (Char -> a -> a) -> a -> ByteString -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: ByteString -> Int -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: ByteString -> [ByteString] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: ByteString -> [ByteString] module Data.ByteString.UTF8 -- | A space-efficient representation of a Word8 vector, supporting -- many efficient operations. -- -- A ByteString contains 8-bit bytes, or by using the operations -- from Data.ByteString.Char8 it can be interpreted as containing -- 8-bit characters. data ByteString :: * -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: ByteString -> Maybe (Char, Int) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: ByteString -> Maybe (Char, ByteString) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: Int -> ByteString -> (ByteString, ByteString) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: Int -> ByteString -> ByteString -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: Int -> ByteString -> ByteString -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as '\0xFFFD' -- to the predicate. span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> ByteString -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\xFFFD'. toString :: ByteString -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: (a -> Char -> a) -> a -> ByteString -> a -- | Traverse a bytestring (right biased). foldr :: (Char -> a -> a) -> a -> ByteString -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: ByteString -> Int -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. See also 'lines\''. lines :: ByteString -> [ByteString] -- | Split a string into a list of lines. Lines are terminated by '\n' or -- the end of the string. Empty lines may not be terminated by the end of -- the string. This function preserves the terminators. See also -- lines. lines' :: ByteString -> [ByteString]