-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Support for reading and writing UTF8 Strings
--
-- A UTF8 layer for IO and Strings. The utf8-string package provides
-- operations for encoding UTF8 strings to Word8 lists and back, and for
-- reading and writing UTF8 without truncation.
@package utf8-string
@version 0.3.7
-- | Support for encoding UTF8 Strings to and from [Word8]
module Codec.Binary.UTF8.String
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
encode :: String -> [Word8]
-- | Decode a UTF8 string packed into a list of Word8 values, directly to
-- String
decode :: [Word8] -> String
-- | Encode a string using encode and store the result in a
-- String.
encodeString :: String -> String
-- | Decode a string using decode using a String as input. |
-- This is not safe but it is necessary if UTF-8 encoded text | has been
-- loaded into a String prior to being decoded.
decodeString :: String -> String
-- | Encode a single Haskell Char to a list of Word8 values, in UTF8
-- format.
encodeChar :: Char -> [Word8]
-- | isUTF8Encoded str tries to recognize input string as being in
-- UTF-8 form.
isUTF8Encoded :: String -> Bool
-- | utf8Encode str is a convenience function; checks to see if
-- str isn't UTF-8 encoded before doing so. Sometimes useful,
-- but you are better off keeping track of the encoding so as to avoid
-- the cost of checking.
utf8Encode :: String -> String
module Codec.Binary.UTF8.Generic
class (Num s, Ord s) => UTF8Bytes b s | b -> s
bsplit :: UTF8Bytes b s => s -> b -> (b, b)
bdrop :: UTF8Bytes b s => s -> b -> b
buncons :: UTF8Bytes b s => b -> Maybe (Word8, b)
elemIndex :: UTF8Bytes b s => Word8 -> b -> Maybe s
empty :: UTF8Bytes b s => b
null :: UTF8Bytes b s => b -> Bool
pack :: UTF8Bytes b s => [Word8] -> b
tail :: UTF8Bytes b s => b -> b
-- | Try to extract a character from a byte string. Returns Nothing
-- if there are no more bytes in the byte string. Otherwise, it returns a
-- decoded character and the number of bytes used in its representation.
-- Errors are replaced by character '\0xFFFD'.
decode :: UTF8Bytes b s => b -> Maybe (Char, s)
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Malformed characters
-- are replaced by '\0xFFFD'.
uncons :: UTF8Bytes b s => b -> Maybe (Char, b)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: UTF8Bytes b s => s -> b -> (b, b)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: UTF8Bytes b s => s -> b -> b
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: UTF8Bytes b s => s -> b -> b
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as '\0xFFFD'
-- to the predicate.
span :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: UTF8Bytes b s => String -> b
-- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid
-- characters are replaced with '\xFFFD'.
toString :: UTF8Bytes b s => b -> String
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a
-- | Traverse a bytestring (right biased).
foldr :: UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters.
length :: UTF8Bytes b s => b -> s
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. See also 'lines\''.
lines :: UTF8Bytes b s => b -> [b]
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. This function preserves the terminators. See also
-- lines.
lines' :: UTF8Bytes b s => b -> [b]
instance UTF8Bytes [Word8] Int
instance UTF8Bytes ByteString Int64
instance UTF8Bytes ByteString Int
module Data.String.UTF8
-- | The type of strings that are represented using the UTF8 encoding. The
-- parameter is the type of the container for the representation.
data UTF8 string
class (Num s, Ord s) => UTF8Bytes b s | b -> s
-- | Converts a Haskell string into a UTF8 encoded string. Complexity:
-- linear.
fromString :: UTF8Bytes string index => String -> UTF8 string
-- | Convert a UTF8 encoded string into a Haskell string. Invalid
-- characters are replaced by replacement_char. Complexity:
-- linear.
toString :: UTF8Bytes string index => UTF8 string -> String
fromRep :: string -> UTF8 string
toRep :: UTF8 string -> string
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Invalid characters
-- are replaced by replacement_char.
uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0. See also bytesSplitAt.
splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as '\0xFFFD'
-- to the predicate.
span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- replacement_char to the predicate.
break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a
-- | Traverse a bytestring (right biased).
foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters. The function is linear in the
-- number of bytes in the representation.
length :: UTF8Bytes string index => UTF8 string -> index
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. See also 'lines\''.
lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. This function preserves the terminators. See also
-- lines.
lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
-- | Checks if there are no more bytes in the underlying representation.
null :: UTF8Bytes string index => UTF8 string -> Bool
-- | Extract the first character for the underlying representation, if one
-- is available. It also returns the number of bytes used in the
-- representation of the character. See also uncons,
-- dropBytes.
decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)
-- | Split after a given number of bytes in the underlying representation.
-- See also splitAt.
byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
-- | Take only the given number of bytes from the underlying
-- representation. See also take.
byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
-- | Drop the given number of bytes from the underlying representation. See
-- also drop.
byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
instance Eq string => Eq (UTF8 string)
instance Ord string => Ord (UTF8 string)
instance UTF8Bytes string index => Show (UTF8 string)
-- | String IO preserving UTF8 encoding.
module System.IO.UTF8
-- | The print function outputs a value of any printable type to the
-- standard output device. This function differs from the System.IO.print
-- in that it preserves any UTF8 encoding of the shown value.
print :: Show a => a -> IO ()
-- | Write a UTF8 string to the standard output device
putStr :: String -> IO ()
-- | The same as putStr, but adds a newline character.
putStrLn :: String -> IO ()
-- | Read a UTF8 line from the standard input device
getLine :: IO String
-- | The readLn function combines getLine and
-- readIO, preserving UTF8
readLn :: Read a => IO a
openBinaryFile :: FilePath -> IOMode -> IO Handle
withBinaryFile :: FilePath -> IOMode -> (Handle -> IO a) -> IO a
-- | The readFile function reads a file and returns the contents of
-- the file as a UTF8 string. The file is read lazily, on demand, as with
-- getContents.
readFile :: FilePath -> IO String
-- | The computation writeFile file str function writes the
-- UTF8 string str, to the file file.
writeFile :: FilePath -> String -> IO ()
-- | The computation appendFile file str function appends
-- the UTF8 string str, to the file file.
appendFile :: FilePath -> String -> IO ()
interact :: (String -> String) -> IO ()
-- | Lazily read stdin as a UTF8 string.
getContents :: IO String
-- | Read a UTF8 line from a Handle
hGetLine :: Handle -> IO String
-- | Lazily read a UTF8 string from a Handle
hGetContents :: Handle -> IO String
-- | Write a UTF8 string to a Handle.
hPutStr :: Handle -> String -> IO ()
-- | Write a UTF8 string to a Handle, appending a newline.
hPutStrLn :: Handle -> String -> IO ()
-- | Support for UTF-8 based environment manipulation
module System.Environment.UTF8
getArgs :: IO [String]
getProgName :: IO String
getEnv :: String -> IO String
withArgs :: [String] -> IO a -> IO a
withProgName :: String -> IO a -> IO a
getEnvironment :: IO [(String, String)]
module Data.ByteString.UTF8
-- | A space-efficient representation of a Word8 vector, supporting many
-- efficient operations. A ByteString contains 8-bit characters
-- only.
--
-- Instances of Eq, Ord, Read, Show, Data, Typeable
data ByteString :: *
-- | Try to extract a character from a byte string. Returns Nothing
-- if there are no more bytes in the byte string. Otherwise, it returns a
-- decoded character and the number of bytes used in its representation.
-- Errors are replaced by character '\0xFFFD'.
decode :: ByteString -> Maybe (Char, Int)
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Malformed characters
-- are replaced by '\0xFFFD'.
uncons :: ByteString -> Maybe (Char, ByteString)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: Int -> ByteString -> (ByteString, ByteString)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: Int -> ByteString -> ByteString
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: Int -> ByteString -> ByteString
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as '\0xFFFD'
-- to the predicate.
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: String -> ByteString
-- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid
-- characters are replaced with '\xFFFD'.
toString :: ByteString -> String
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: (a -> Char -> a) -> a -> ByteString -> a
-- | Traverse a bytestring (right biased).
foldr :: (Char -> a -> a) -> a -> ByteString -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters.
length :: ByteString -> Int
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. See also 'lines\''.
lines :: ByteString -> [ByteString]
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. This function preserves the terminators. See also
-- lines.
lines' :: ByteString -> [ByteString]
module Data.ByteString.Lazy.UTF8
-- | A space-efficient representation of a Word8 vector, supporting many
-- efficient operations. A ByteString contains 8-bit characters
-- only.
--
-- Instances of Eq, Ord, Read, Show, Data, Typeable
data ByteString :: *
-- | Try to extract a character from a byte string. Returns Nothing
-- if there are no more bytes in the byte string. Otherwise, it returns a
-- decoded character and the number of bytes used in its representation.
-- Errors are replaced by character '\0xFFFD'.
decode :: ByteString -> Maybe (Char, Int64)
-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
-- | Get the first character of a byte string, if any. Malformed characters
-- are replaced by '\0xFFFD'.
uncons :: ByteString -> Maybe (Char, ByteString)
-- | Split after a given number of characters. Negative values are treated
-- as if they are 0.
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
-- | take n s returns the first n characters of
-- s. If s has less than n characters, then we
-- return the whole of s.
take :: Int64 -> ByteString -> ByteString
-- | drop n s returns the s without its first n
-- characters. If s has less than n characters, then we
-- return an empty string.
drop :: Int64 -> ByteString -> ByteString
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that satisfy the predicate; the second part
-- is the rest of the string. Invalid characters are passed as '\0xFFFD'
-- to the predicate.
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Split a string into two parts: the first is the longest prefix that
-- contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string. Invalid characters are passed as
-- '\0xFFFD' to the predicate.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: String -> ByteString
-- | Convert a UTF8 encoded bytestring into a Haskell string. Invalid
-- characters are replaced with '\xFFFD'.
toString :: ByteString -> String
-- | Traverse a bytestring (left biased). This function is strict in the
-- accumulator.
foldl :: (a -> Char -> a) -> a -> ByteString -> a
-- | Traverse a bytestring (right biased).
foldr :: (Char -> a -> a) -> a -> ByteString -> a
-- | Counts the number of characters encoded in the bytestring. Note that
-- this includes replacement characters.
length :: ByteString -> Int
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. See also 'lines\''.
lines :: ByteString -> [ByteString]
-- | Split a string into a list of lines. Lines are terminated by '\n' or
-- the end of the string. Empty lines may not be terminated by the end of
-- the string. This function preserves the terminators. See also
-- lines.
lines' :: ByteString -> [ByteString]