-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | String encoding conversion
--
-- Provides an interface to the POSIX iconv library functions for string
-- encoding conversion.
@package iconv
@version 0.4
-- | String encoding conversion
module Codec.Text.IConv
-- | Convert text from one named string encoding to another.
--
--
-- - The conversion is done lazily.
-- - An exception is thrown if conversion between the two encodings is
-- not supported.
-- - An exception is thrown if there are any encoding conversion
-- errors.
--
convert :: EncodingName -> EncodingName -> ByteString -> ByteString
-- | A string encoding name, eg "UTF-8" or "LATIN1".
--
-- The range of string encodings available is determined by the
-- capabilities of the underlying iconv implementation.
--
-- When using the GNU C or libiconv libraries, the permitted values are
-- listed by the iconv --list command, and all combinations of
-- the listed values are supported.
type EncodingName = String
-- | Convert text ignoring encoding conversion problems.
--
-- If invalid byte sequences are found in the input they are ignored and
-- conversion continues if possible. This is not always possible
-- especially with stateful encodings. No placeholder character is
-- inserted into the output so there will be no indication that invalid
-- byte sequences were encountered.
--
-- If there are characters in the input that have no direct corresponding
-- character in the output encoding then they are dealt in one of two
-- ways, depending on the Fuzzy argument. We can try and
-- Transliterate them into the nearest corresponding character(s)
-- or use a replacement character (typically '?' or the Unicode
-- replacement character). Alternatively they can simply be
-- Discarded.
--
-- In either case, no exceptions will occur. In the case of unrecoverable
-- errors, the output will simply be truncated. This includes the case of
-- unrecognised or unsupported encoding names; the output will be empty.
--
--
-- - This function only works with the GNU iconv implementation which
-- provides this feature beyond what is required by the iconv
-- specification.
--
convertFuzzy :: Fuzzy -> EncodingName -> EncodingName -> ByteString -> ByteString
data Fuzzy
Transliterate :: Fuzzy
Discard :: Fuzzy
-- | This variant does the conversion all in one go, so it is able to
-- report any conversion errors up front. It exposes all the possible
-- error conditions and never throws exceptions
--
-- The disadvantage is that no output can be produced before the whole
-- input is consumed. This might be problematic for very large inputs.
convertStrictly :: EncodingName -> EncodingName -> ByteString -> Either ByteString ConversionError
-- | This version provides a more complete but less convenient conversion
-- interface. It exposes all the possible error conditions and never
-- throws exceptions.
--
-- The conversion is still lazy. It returns a list of spans, where a span
-- may be an ordinary span of output text or a conversion error. This
-- somewhat complex interface allows both for lazy conversion and for
-- precise reporting of conversion problems. The other functions
-- convert and convertStrictly are actually simple wrappers
-- on this function.
convertLazily :: EncodingName -> EncodingName -> ByteString -> [Span]
data ConversionError
-- | The conversion from the input to output string encoding is not
-- supported by the underlying iconv implementation. This is usually
-- because a named encoding is not recognised or support for it was not
-- enabled on this system.
--
-- The POSIX standard does not guarantee that all possible combinations
-- of recognised string encoding are supported, however most common
-- implementations do support all possible combinations.
UnsuportedConversion :: EncodingName -> EncodingName -> ConversionError
-- | This covers two possible conversion errors:
--
--
-- - There is a byte sequence in the input that is not valid in the
-- input encoding.
-- - There is a valid character in the input that has no corresponding
-- character in the output encoding.
--
--
-- Unfortunately iconv does not let us distinguish these two cases. In
-- either case, the Int parameter gives the byte offset in the input of
-- the unrecognised bytes or unconvertable character.
InvalidChar :: Int -> ConversionError
-- | This error covers the case where the end of the input has trailing
-- bytes that are the initial bytes of a valid character in the input
-- encoding. In other words, it looks like the input ended in the middle
-- of a multi-byte character. This would often be an indication that the
-- input was somehow truncated. Again, the Int parameter is the byte
-- offset in the input where the incomplete character starts.
IncompleteChar :: Int -> ConversionError
-- | An unexpected iconv error. The iconv spec lists a number of possible
-- expected errors but does not guarantee that there might not be other
-- errors.
--
-- This error can occur either immediately, which might indicate that the
-- iconv installation is messed up somehow, or it could occur later which
-- might indicate resource exhaustion or some other internal iconv error.
--
-- Use Foreign.C.Error.errnoToIOError to get slightly more
-- information on what the error could possibly be.
UnexpectedError :: Errno -> ConversionError
reportConversionError :: ConversionError -> Exception
-- | Output spans from encoding conversion. When nothing goes wrong we
-- expect just a bunch of Spans. If there are conversion errors we
-- get other span types.
data Span
-- | An ordinary output span in the target encoding
Span :: !ByteString -> Span
-- | An error in the conversion process. If this occurs it will be the last
-- span.
ConversionError :: !ConversionError -> Span