-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Support for reading and writing UTF8 Strings
--
-- A UTF8 layer for Strings. The utf8-string package provides operations
-- for encoding UTF8 strings to Word8 lists and back, and for reading and
-- writing UTF8 without truncation.
@package utf8-string
@version 1.0.2
-- | Support for encoding UTF8 Strings to and from [Word8]
module Codec.Binary.UTF8.String
-- | Encode a Haskell String to a list of Word8 values, in
-- UTF8 format.
encode :: String -> [Word8]
-- | Decode a UTF8 string packed into a list of Word8 values,
-- directly to String
decode :: [Word8] -> String
-- | Encode a string using encode and store the result in a
-- String.
encodeString :: String -> String
-- | Decode a string using decode using a String as input.
-- This is not safe but it is necessary if UTF-8 encoded text has been
-- loaded into a String prior to being decoded.
decodeString :: String -> String
-- | Encode a single Haskell Char to a list of Word8 values,
-- in UTF8 format.
encodeChar :: Char -> [Word8]
-- | isUTF8Encoded str tries to recognize input string as being in
-- UTF-8 form.
isUTF8Encoded :: String -> Bool
-- | utf8Encode str is a convenience function; checks to see if
-- str isn't UTF-8 encoded before doing so. Sometimes useful,
-- but you are better off keeping track of the encoding so as to avoid
-- the cost of checking.
utf8Encode :: String -> String
module Codec.Binary.UTF8.Generic
class (Num s, Ord s) => UTF8Bytes b s | b -> s
bsplit :: UTF8Bytes b s => s -> b -> (b, b)
bdrop :: UTF8Bytes b s => s -> b -> b
buncons :: UTF8Bytes b s => b -> Maybe (Word8, b)
elemIndex :: UTF8Bytes b s => Word8 -> b -> Maybe s
empty :: UTF8Bytes b s => b
null :: UTF8Bytes b s => b -> Bool
pack :: UTF8Bytes b s => [Word8] -> b
tail :: UTF8Bytes b s => b -> b
-- | Try to extract a character from a byte string. Returns Nothing
-- if there are no more bytes in the byte string. Otherwise, it returns a
-- decoded character and the number of bytes used in its representation.
-- Errors are replaced by character '\0xFFFD'.
decode :: UTF8Bytes b s => b -> Maybe (Char, s)
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Malformed characters
-- are replaced by '\0xFFFD'.
uncons :: UTF8Bytes b s => b -> Maybe (Char, b)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: UTF8Bytes b s => s -> b -> (b, b)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: UTF8Bytes b s => s -> b -> b
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: UTF8Bytes b s => s -> b -> b
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
span :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: UTF8Bytes b s => String -> b
-- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid
-- characters are replaced with '\0xFFFD'.
toString :: UTF8Bytes b s => b -> String
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a
-- | Traverse a bytestring (right biased).
foldr :: UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters.
length :: UTF8Bytes b s => b -> s
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. See also lines'.
lines :: UTF8Bytes b s => b -> [b]
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. This function preserves the
-- terminators. See also lines.
lines' :: UTF8Bytes b s => b -> [b]
instance Codec.Binary.UTF8.Generic.UTF8Bytes Data.ByteString.Internal.ByteString GHC.Types.Int
instance Codec.Binary.UTF8.Generic.UTF8Bytes Data.ByteString.Lazy.Internal.ByteString GHC.Int.Int64
instance Codec.Binary.UTF8.Generic.UTF8Bytes [GHC.Word.Word8] GHC.Types.Int
module Data.ByteString.Lazy.UTF8
-- | A space-efficient representation of a Word8 vector, supporting
-- many efficient operations.
--
-- A lazy ByteString contains 8-bit bytes, or by using the
-- operations from Data.ByteString.Lazy.Char8 it can be
-- interpreted as containing 8-bit characters.
data ByteString
-- | Try to extract a character from a byte string. Returns Nothing
-- if there are no more bytes in the byte string. Otherwise, it returns a
-- decoded character and the number of bytes used in its representation.
-- Errors are replaced by character '\0xFFFD'.
decode :: ByteString -> Maybe (Char, Int64)
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Malformed characters
-- are replaced by '\0xFFFD'.
uncons :: ByteString -> Maybe (Char, ByteString)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: Int64 -> ByteString -> ByteString
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: Int64 -> ByteString -> ByteString
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: String -> ByteString
-- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid
-- characters are replaced with '\0xFFFD'.
toString :: ByteString -> String
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: (a -> Char -> a) -> a -> ByteString -> a
-- | Traverse a bytestring (right biased).
foldr :: (Char -> a -> a) -> a -> ByteString -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters.
length :: ByteString -> Int
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. See also lines'.
lines :: ByteString -> [ByteString]
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. This function preserves the
-- terminators. See also lines.
lines' :: ByteString -> [ByteString]
module Data.ByteString.UTF8
-- | A space-efficient representation of a Word8 vector, supporting
-- many efficient operations.
--
-- A ByteString contains 8-bit bytes, or by using the operations
-- from Data.ByteString.Char8 it can be interpreted as containing
-- 8-bit characters.
data ByteString
-- | Try to extract a character from a byte string. Returns Nothing
-- if there are no more bytes in the byte string. Otherwise, it returns a
-- decoded character and the number of bytes used in its representation.
-- Errors are replaced by character '\0xFFFD'.
decode :: ByteString -> Maybe (Char, Int)
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Malformed characters
-- are replaced by '\0xFFFD'.
uncons :: ByteString -> Maybe (Char, ByteString)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: Int -> ByteString -> (ByteString, ByteString)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: Int -> ByteString -> ByteString
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: Int -> ByteString -> ByteString
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Converts a Haskell char into a UTF8 encoded bytestring.
fromChar :: Char -> ByteString
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: String -> ByteString
-- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid
-- characters are replaced with '\0xFFFD'.
toString :: ByteString -> String
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: (a -> Char -> a) -> a -> ByteString -> a
-- | Traverse a bytestring (right biased).
foldr :: (Char -> a -> a) -> a -> ByteString -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters.
length :: ByteString -> Int
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. See also lines'.
lines :: ByteString -> [ByteString]
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. This function preserves the
-- terminators. See also lines.
lines' :: ByteString -> [ByteString]
module Data.String.UTF8
-- | The type of strings that are represented using the UTF8 encoding. The
-- parameter is the type of the container for the representation.
data UTF8 string
class (Num s, Ord s) => UTF8Bytes b s | b -> s
-- | Converts a Haskell string into a UTF8 encoded string. Complexity:
-- linear.
fromString :: UTF8Bytes string index => String -> UTF8 string
-- | Convert a UTF8 encoded string into a Haskell string. Invalid
-- characters are replaced by replacement_char. Complexity:
-- linear.
toString :: UTF8Bytes string index => UTF8 string -> String
fromRep :: string -> UTF8 string
toRep :: UTF8 string -> string
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Invalid characters
-- are replaced by replacement_char.
uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- replacement_char to the predicate.
break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a
-- | Traverse a bytestring (right biased).
foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters. The function is linear in the
-- number of bytes in the representation.
length :: UTF8Bytes string index => UTF8 string -> index
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. See also lines'.
lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
-- | Split a string into a list of lines. Lines are terminated by
-- '\n' or the end of the string. Empty lines may not be
-- terminated by the end of the string. This function preserves the
-- terminators. See also lines.
lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
-- | Checks if there are no more bytes in the underlying representation.
null :: UTF8Bytes string index => UTF8 string -> Bool
-- | Extract the first character for the underlying representation, if one
-- is available. It also returns the number of bytes used in the
-- representation of the character. See also uncons.
decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)
-- | Split after a given number of bytes in the underlying representation.
-- See also splitAt.
byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
-- | Take only the given number of bytes from the underlying
-- representation. See also take.
byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
-- | Drop the given number of bytes from the underlying representation. See
-- also drop.
byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
instance GHC.Classes.Ord string => GHC.Classes.Ord (Data.String.UTF8.UTF8 string)
instance GHC.Classes.Eq string => GHC.Classes.Eq (Data.String.UTF8.UTF8 string)
instance Codec.Binary.UTF8.Generic.UTF8Bytes string index => GHC.Show.Show (Data.String.UTF8.UTF8 string)
instance Codec.Binary.UTF8.Generic.UTF8Bytes string index => Data.String.IsString (Data.String.UTF8.UTF8 string)