-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Support for reading and writing UTF8 Strings -- -- A UTF8 layer for Strings. The utf8-string package provides operations -- for encoding UTF8 strings to Word8 lists and back, and for reading and -- writing UTF8 without truncation. @package utf8-string @version 1.0.2 -- | Support for encoding UTF8 Strings to and from [Word8] module Codec.Binary.UTF8.String -- | Encode a Haskell String to a list of Word8 values, in -- UTF8 format. encode :: String -> [Word8] -- | Decode a UTF8 string packed into a list of Word8 values, -- directly to String decode :: [Word8] -> String -- | Encode a string using encode and store the result in a -- String. encodeString :: String -> String -- | Decode a string using decode using a String as input. -- This is not safe but it is necessary if UTF-8 encoded text has been -- loaded into a String prior to being decoded. decodeString :: String -> String -- | Encode a single Haskell Char to a list of Word8 values, -- in UTF8 format. encodeChar :: Char -> [Word8] -- | isUTF8Encoded str tries to recognize input string as being in -- UTF-8 form. isUTF8Encoded :: String -> Bool -- | utf8Encode str is a convenience function; checks to see if -- str isn't UTF-8 encoded before doing so. Sometimes useful, -- but you are better off keeping track of the encoding so as to avoid -- the cost of checking. utf8Encode :: String -> String module Codec.Binary.UTF8.Generic class (Num s, Ord s) => UTF8Bytes b s | b -> s bsplit :: UTF8Bytes b s => s -> b -> (b, b) bdrop :: UTF8Bytes b s => s -> b -> b buncons :: UTF8Bytes b s => b -> Maybe (Word8, b) elemIndex :: UTF8Bytes b s => Word8 -> b -> Maybe s empty :: UTF8Bytes b s => b null :: UTF8Bytes b s => b -> Bool pack :: UTF8Bytes b s => [Word8] -> b tail :: UTF8Bytes b s => b -> b -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: UTF8Bytes b s => b -> Maybe (Char, s) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: UTF8Bytes b s => b -> Maybe (Char, b) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: UTF8Bytes b s => s -> b -> (b, b) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: UTF8Bytes b s => s -> b -> b -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: UTF8Bytes b s => s -> b -> b -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. span :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: UTF8Bytes b s => String -> b -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\0xFFFD'. toString :: UTF8Bytes b s => b -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a -- | Traverse a bytestring (right biased). foldr :: UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: UTF8Bytes b s => b -> s -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. See also lines'. lines :: UTF8Bytes b s => b -> [b] -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. This function preserves the -- terminators. See also lines. lines' :: UTF8Bytes b s => b -> [b] instance Codec.Binary.UTF8.Generic.UTF8Bytes Data.ByteString.Internal.ByteString GHC.Types.Int instance Codec.Binary.UTF8.Generic.UTF8Bytes Data.ByteString.Lazy.Internal.ByteString GHC.Int.Int64 instance Codec.Binary.UTF8.Generic.UTF8Bytes [GHC.Word.Word8] GHC.Types.Int module Data.ByteString.Lazy.UTF8 -- | A space-efficient representation of a Word8 vector, supporting -- many efficient operations. -- -- A lazy ByteString contains 8-bit bytes, or by using the -- operations from Data.ByteString.Lazy.Char8 it can be -- interpreted as containing 8-bit characters. data ByteString -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: ByteString -> Maybe (Char, Int64) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: ByteString -> Maybe (Char, ByteString) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: Int64 -> ByteString -> (ByteString, ByteString) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: Int64 -> ByteString -> ByteString -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: Int64 -> ByteString -> ByteString -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> ByteString -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\0xFFFD'. toString :: ByteString -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: (a -> Char -> a) -> a -> ByteString -> a -- | Traverse a bytestring (right biased). foldr :: (Char -> a -> a) -> a -> ByteString -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: ByteString -> Int -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. See also lines'. lines :: ByteString -> [ByteString] -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. This function preserves the -- terminators. See also lines. lines' :: ByteString -> [ByteString] module Data.ByteString.UTF8 -- | A space-efficient representation of a Word8 vector, supporting -- many efficient operations. -- -- A ByteString contains 8-bit bytes, or by using the operations -- from Data.ByteString.Char8 it can be interpreted as containing -- 8-bit characters. data ByteString -- | Try to extract a character from a byte string. Returns Nothing -- if there are no more bytes in the byte string. Otherwise, it returns a -- decoded character and the number of bytes used in its representation. -- Errors are replaced by character '\0xFFFD'. decode :: ByteString -> Maybe (Char, Int) -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Malformed characters -- are replaced by '\0xFFFD'. uncons :: ByteString -> Maybe (Char, ByteString) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: Int -> ByteString -> (ByteString, ByteString) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: Int -> ByteString -> ByteString -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: Int -> ByteString -> ByteString -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString) -- | Converts a Haskell char into a UTF8 encoded bytestring. fromChar :: Char -> ByteString -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> ByteString -- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid -- characters are replaced with '\0xFFFD'. toString :: ByteString -> String -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: (a -> Char -> a) -> a -> ByteString -> a -- | Traverse a bytestring (right biased). foldr :: (Char -> a -> a) -> a -> ByteString -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. length :: ByteString -> Int -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. See also lines'. lines :: ByteString -> [ByteString] -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. This function preserves the -- terminators. See also lines. lines' :: ByteString -> [ByteString] module Data.String.UTF8 -- | The type of strings that are represented using the UTF8 encoding. The -- parameter is the type of the container for the representation. data UTF8 string class (Num s, Ord s) => UTF8Bytes b s | b -> s -- | Converts a Haskell string into a UTF8 encoded string. Complexity: -- linear. fromString :: UTF8Bytes string index => String -> UTF8 string -- | Convert a UTF8 encoded string into a Haskell string. Invalid -- characters are replaced by replacement_char. Complexity: -- linear. toString :: UTF8Bytes string index => UTF8 string -> String fromRep :: string -> UTF8 string toRep :: UTF8 string -> string -- | This character is used to mark errors in a UTF8 encoded string. replacement_char :: Char -- | Get the first character of a byte string, if any. Invalid characters -- are replaced by replacement_char. uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string) -- | Split after a given number of characters. Negative values are treated -- as if they are 0. splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) -- | take n s returns the first n characters of -- s. If s has less than n characters, then we -- return the whole of s. take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | drop n s returns the s without its first n -- characters. If s has less than n characters, then we -- return an empty string. drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that satisfy the predicate; the second part -- is the rest of the string. Invalid characters are passed as -- '\0xFFFD' to the predicate. span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) -- | Split a string into two parts: the first is the longest prefix that -- contains only characters that do not satisfy the predicate; the second -- part is the rest of the string. Invalid characters are passed as -- replacement_char to the predicate. break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) -- | Traverse a bytestring (left biased). This function is strict in the -- accumulator. foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a -- | Traverse a bytestring (right biased). foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a -- | Counts the number of characters encoded in the bytestring. Note that -- this includes replacement characters. The function is linear in the -- number of bytes in the representation. length :: UTF8Bytes string index => UTF8 string -> index -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. See also lines'. lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string] -- | Split a string into a list of lines. Lines are terminated by -- '\n' or the end of the string. Empty lines may not be -- terminated by the end of the string. This function preserves the -- terminators. See also lines. lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string] -- | Checks if there are no more bytes in the underlying representation. null :: UTF8Bytes string index => UTF8 string -> Bool -- | Extract the first character for the underlying representation, if one -- is available. It also returns the number of bytes used in the -- representation of the character. See also uncons. decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index) -- | Split after a given number of bytes in the underlying representation. -- See also splitAt. byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) -- | Take only the given number of bytes from the underlying -- representation. See also take. byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string -- | Drop the given number of bytes from the underlying representation. See -- also drop. byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string instance GHC.Classes.Ord string => GHC.Classes.Ord (Data.String.UTF8.UTF8 string) instance GHC.Classes.Eq string => GHC.Classes.Eq (Data.String.UTF8.UTF8 string) instance Codec.Binary.UTF8.Generic.UTF8Bytes string index => GHC.Show.Show (Data.String.UTF8.UTF8 string) instance Codec.Binary.UTF8.Generic.UTF8Bytes string index => Data.String.IsString (Data.String.UTF8.UTF8 string)