-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | ByteString ↔ Text converter based on GHC.IO.Encoding
--   
--   Please see the README on GitHub at
--   <a>https://github.com/msakai/bytestring-encoding#readme</a>
@package bytestring-encoding
@version 0.1.1.0


-- | <a>ByteString</a> ↔ <a>Text</a> converter based on
--   <a>GHC.IO.Encoding</a>.
module Data.ByteString.Lazy.Encoding

-- | Encode a lazy <a>Text</a> into a lazy <a>ByteString</a> using a given
--   <a>TextEncoding</a>.
encode :: TextEncoding -> Text -> ByteString

-- | Decode a lazy <a>ByteString</a> to a lazy <a>Text</a> using a given
--   <a>TextEncoding</a>.
decode :: TextEncoding -> ByteString -> Text

-- | A <a>TextEncoding</a> is a specification of a conversion scheme
--   between sequences of bytes and sequences of Unicode characters.
--   
--   For example, UTF-8 is an encoding of Unicode characters into a
--   sequence of bytes. The <a>TextEncoding</a> for UTF-8 is <a>utf8</a>.
data TextEncoding

-- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes directly to
--   the first 256 Unicode code points, and is thus not a complete Unicode
--   encoding. An attempt to write a character greater than <tt>'\255'</tt>
--   to a <a>Handle</a> using the <a>latin1</a> encoding will result in an
--   error.
latin1 :: TextEncoding

-- | The UTF-8 Unicode encoding
utf8 :: TextEncoding

-- | The UTF-8 Unicode encoding, with a byte-order-mark (BOM; the byte
--   sequence 0xEF 0xBB 0xBF). This encoding behaves like <a>utf8</a>,
--   except that on input, the BOM sequence is ignored at the beginning of
--   the stream, and on output, the BOM sequence is prepended.
--   
--   The byte-order-mark is strictly unnecessary in UTF-8, but is sometimes
--   used to identify the encoding of a file.
utf8_bom :: TextEncoding

-- | The UTF-16 Unicode encoding (a byte-order-mark should be used to
--   indicate endianness).
utf16 :: TextEncoding

-- | The UTF-16 Unicode encoding (litte-endian)
utf16le :: TextEncoding

-- | The UTF-16 Unicode encoding (big-endian)
utf16be :: TextEncoding

-- | The UTF-32 Unicode encoding (a byte-order-mark should be used to
--   indicate endianness).
utf32 :: TextEncoding

-- | The UTF-32 Unicode encoding (litte-endian)
utf32le :: TextEncoding

-- | The UTF-32 Unicode encoding (big-endian)
utf32be :: TextEncoding

-- | The Unicode encoding of the current locale
--   
--   This is the initial locale encoding: if it has been subsequently
--   changed by <a>setLocaleEncoding</a> this value will not reflect that
--   change.
localeEncoding :: TextEncoding

-- | An encoding in which Unicode code points are translated to bytes by
--   taking the code point modulo 256. When decoding, bytes are translated
--   directly into the equivalent code point.
--   
--   This encoding never fails in either direction. However, encoding
--   discards information, so encode followed by decode is not the
--   identity.
char8 :: TextEncoding

-- | Look up the named Unicode encoding. May fail with
--   
--   <ul>
--   <li><a>isDoesNotExistError</a> if the encoding is unknown</li>
--   </ul>
--   
--   The set of known encodings is system-dependent, but includes at least:
--   
--   <ul>
--   <li><pre>UTF-8</pre></li>
--   <li><tt>UTF-16</tt>, <tt>UTF-16BE</tt>, <tt>UTF-16LE</tt></li>
--   <li><tt>UTF-32</tt>, <tt>UTF-32BE</tt>, <tt>UTF-32LE</tt></li>
--   </ul>
--   
--   There is additional notation (borrowed from GNU iconv) for specifying
--   how illegal characters are handled:
--   
--   <ul>
--   <li>a suffix of <tt>//IGNORE</tt>, e.g. <tt>UTF-8//IGNORE</tt>, will
--   cause all illegal sequences on input to be ignored, and on output will
--   drop all code points that have no representation in the target
--   encoding.</li>
--   <li>a suffix of <tt>//TRANSLIT</tt> will choose a replacement
--   character for illegal sequences or code points.</li>
--   <li>a suffix of <tt>//ROUNDTRIP</tt> will use a PEP383-style escape
--   mechanism to represent any invalid bytes in the input as Unicode
--   codepoints (specifically, as lone surrogates, which are normally
--   invalid in UTF-32). Upon output, these special codepoints are detected
--   and turned back into the corresponding original byte.</li>
--   </ul>
--   
--   In theory, this mechanism allows arbitrary data to be roundtripped via
--   a <a>String</a> with no loss of data. In practice, there are two
--   limitations to be aware of:
--   
--   <ol>
--   <li>This only stands a chance of working for an encoding which is an
--   ASCII superset, as for security reasons we refuse to escape any bytes
--   smaller than 128. Many encodings of interest are ASCII supersets (in
--   particular, you can assume that the locale encoding is an ASCII
--   superset) but many (such as UTF-16) are not.</li>
--   <li>If the underlying encoding is not itself roundtrippable, this
--   mechanism can fail. Roundtrippable encodings are those which have an
--   injective mapping into Unicode. Almost all encodings meet this
--   criteria, but some do not. Notably, Shift-JIS (CP932) and Big5 contain
--   several different encodings of the same Unicode codepoint.</li>
--   </ol>
--   
--   On Windows, you can access supported code pages with the prefix
--   <tt>CP</tt>; for example, <tt>"CP1250"</tt>.
mkTextEncoding :: String -> IO TextEncoding


-- | <a>ByteString</a> ↔ <a>Text</a> converter based on
--   <a>GHC.IO.Encoding</a>.
module Data.ByteString.Encoding

-- | Encode a strict <a>Text</a> into strict <a>ByteString</a> using a
--   given <a>TextEncoding</a>.
encode :: TextEncoding -> Text -> ByteString

-- | Decode a strict <a>ByteString</a> to a strit <a>Text</a> using a given
--   <a>TextEncoding</a>.
decode :: TextEncoding -> ByteString -> Text

-- | A <a>TextEncoding</a> is a specification of a conversion scheme
--   between sequences of bytes and sequences of Unicode characters.
--   
--   For example, UTF-8 is an encoding of Unicode characters into a
--   sequence of bytes. The <a>TextEncoding</a> for UTF-8 is <a>utf8</a>.
data TextEncoding

-- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes directly to
--   the first 256 Unicode code points, and is thus not a complete Unicode
--   encoding. An attempt to write a character greater than <tt>'\255'</tt>
--   to a <a>Handle</a> using the <a>latin1</a> encoding will result in an
--   error.
latin1 :: TextEncoding

-- | The UTF-8 Unicode encoding
utf8 :: TextEncoding

-- | The UTF-8 Unicode encoding, with a byte-order-mark (BOM; the byte
--   sequence 0xEF 0xBB 0xBF). This encoding behaves like <a>utf8</a>,
--   except that on input, the BOM sequence is ignored at the beginning of
--   the stream, and on output, the BOM sequence is prepended.
--   
--   The byte-order-mark is strictly unnecessary in UTF-8, but is sometimes
--   used to identify the encoding of a file.
utf8_bom :: TextEncoding

-- | The UTF-16 Unicode encoding (a byte-order-mark should be used to
--   indicate endianness).
utf16 :: TextEncoding

-- | The UTF-16 Unicode encoding (litte-endian)
utf16le :: TextEncoding

-- | The UTF-16 Unicode encoding (big-endian)
utf16be :: TextEncoding

-- | The UTF-32 Unicode encoding (a byte-order-mark should be used to
--   indicate endianness).
utf32 :: TextEncoding

-- | The UTF-32 Unicode encoding (litte-endian)
utf32le :: TextEncoding

-- | The UTF-32 Unicode encoding (big-endian)
utf32be :: TextEncoding

-- | The Unicode encoding of the current locale
--   
--   This is the initial locale encoding: if it has been subsequently
--   changed by <a>setLocaleEncoding</a> this value will not reflect that
--   change.
localeEncoding :: TextEncoding

-- | An encoding in which Unicode code points are translated to bytes by
--   taking the code point modulo 256. When decoding, bytes are translated
--   directly into the equivalent code point.
--   
--   This encoding never fails in either direction. However, encoding
--   discards information, so encode followed by decode is not the
--   identity.
char8 :: TextEncoding

-- | Look up the named Unicode encoding. May fail with
--   
--   <ul>
--   <li><a>isDoesNotExistError</a> if the encoding is unknown</li>
--   </ul>
--   
--   The set of known encodings is system-dependent, but includes at least:
--   
--   <ul>
--   <li><pre>UTF-8</pre></li>
--   <li><tt>UTF-16</tt>, <tt>UTF-16BE</tt>, <tt>UTF-16LE</tt></li>
--   <li><tt>UTF-32</tt>, <tt>UTF-32BE</tt>, <tt>UTF-32LE</tt></li>
--   </ul>
--   
--   There is additional notation (borrowed from GNU iconv) for specifying
--   how illegal characters are handled:
--   
--   <ul>
--   <li>a suffix of <tt>//IGNORE</tt>, e.g. <tt>UTF-8//IGNORE</tt>, will
--   cause all illegal sequences on input to be ignored, and on output will
--   drop all code points that have no representation in the target
--   encoding.</li>
--   <li>a suffix of <tt>//TRANSLIT</tt> will choose a replacement
--   character for illegal sequences or code points.</li>
--   <li>a suffix of <tt>//ROUNDTRIP</tt> will use a PEP383-style escape
--   mechanism to represent any invalid bytes in the input as Unicode
--   codepoints (specifically, as lone surrogates, which are normally
--   invalid in UTF-32). Upon output, these special codepoints are detected
--   and turned back into the corresponding original byte.</li>
--   </ul>
--   
--   In theory, this mechanism allows arbitrary data to be roundtripped via
--   a <a>String</a> with no loss of data. In practice, there are two
--   limitations to be aware of:
--   
--   <ol>
--   <li>This only stands a chance of working for an encoding which is an
--   ASCII superset, as for security reasons we refuse to escape any bytes
--   smaller than 128. Many encodings of interest are ASCII supersets (in
--   particular, you can assume that the locale encoding is an ASCII
--   superset) but many (such as UTF-16) are not.</li>
--   <li>If the underlying encoding is not itself roundtrippable, this
--   mechanism can fail. Roundtrippable encodings are those which have an
--   injective mapping into Unicode. Almost all encodings meet this
--   criteria, but some do not. Notably, Shift-JIS (CP932) and Big5 contain
--   several different encodings of the same Unicode codepoint.</li>
--   </ol>
--   
--   On Windows, you can access supported code pages with the prefix
--   <tt>CP</tt>; for example, <tt>"CP1250"</tt>.
mkTextEncoding :: String -> IO TextEncoding