Copyright	(c) Dong Han 2017-2018
License	BSD
Maintainer	winterland1989@gmail.com
Stability	experimental
Portability	non-portable
Safe Haskell	None
Language	Haskell2010

Std.Data.Text.UTF8Codec

Description

UTF-8 codecs and helpers.

Synopsis

encodeCharLength :: Char -> Int
encodeChar :: MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##)
encodeCharModifiedUTF8 :: PrimMonad m => MutablePrimArray (PrimState m) Word8 -> Int -> Char -> m Int
encodeCharModifiedUTF8# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##)
decodeChar :: PrimArray Word8 -> Int -> (#Char, Int#)
decodeChar_ :: PrimArray Word8 -> Int -> Char
decodeChar# :: ByteArray# -> Int# -> (#Char#, Int##)
decodeCharLen :: PrimArray Word8 -> Int -> Int
decodeCharLen# :: ByteArray# -> Int# -> Int#
decodeCharReverse :: PrimArray Word8 -> Int -> (#Char, Int#)
decodeCharReverse_ :: PrimArray Word8 -> Int -> Char
decodeCharReverse# :: ByteArray# -> Int# -> (#Char#, Int##)
decodeCharLenReverse :: PrimArray Word8 -> Int -> Int
decodeCharLenReverse# :: ByteArray# -> Int# -> Int#
between# :: Word# -> Word# -> Word# -> Bool
isContinueByte# :: Word# -> Bool
chr1# :: Word# -> Char#
chr2# :: Word# -> Word# -> Char#
chr3# :: Word# -> Word# -> Word# -> Char#
chr4# :: Word# -> Word# -> Word# -> Word# -> Char#
copyChar :: Int -> MutablePrimArray s Word8 -> Int -> PrimArray Word8 -> Int -> ST s ()
copyChar' :: Int -> MutablePrimArray s Word8 -> Int -> MutablePrimArray s Word8 -> Int -> ST s ()
replacementChar :: Char

Documentation

encodeCharLength :: Char -> Int Source #

Return a codepoint's encoded length in bytes

If the codepoint is invalid, we return 3(encoded bytes length of replacement char U+FFFD).

encodeChar :: MutablePrimArray s Word8 -> Int -> Char -> ST s Int Source #

Encode a Char into bytes, write replacementChar for invalid unicode codepoint.

This function assumed there're enough space for encoded bytes, and return the advanced index.

encodeChar# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##) Source #

The unboxed version of encodeChar.

This function is marked as NOINLINE to reduce code size, and stop messing up simplifier due to too much branches.

encodeCharModifiedUTF8 :: PrimMonad m => MutablePrimArray (PrimState m) Word8 -> Int -> Char -> m Int Source #

Encode a Char into bytes with non-standard UTF-8 encoding(Used in Data.CBytes).

'\NUL' is encoded as two bytes C0 80 , '\xD800' ~ '\xDFFF' is encoded as a three bytes normal UTF-8 codepoint. This function assumed there're enough space for encoded bytes, and return the advanced index.

encodeCharModifiedUTF8# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##) Source #

The unboxed version of encodeCharModifiedUTF8.

decodeChar :: PrimArray Word8 -> Int -> (#Char, Int#) Source #

Decode a Char from bytes

This function assumed all bytes are UTF-8 encoded, and the index param point to the beginning of a codepoint, the decoded character and the advancing offset are returned.

It's annoying to use unboxed tuple here but we really don't want allocation even if GHC can't optimize it away.

decodeChar_ :: PrimArray Word8 -> Int -> Char Source #

decodeChar# :: ByteArray# -> Int# -> (#Char#, Int##) Source #

The unboxed version of decodeChar

This function is marked as NOINLINE to reduce code size, and stop messing up simplifier due to too much branches.

decodeCharLen :: PrimArray Word8 -> Int -> Int Source #

Decode a codepoint's length in bytes

This function assumed all bytes are UTF-8 encoded, and the index param point to the beginning of a codepoint.

decodeCharLen# :: ByteArray# -> Int# -> Int# Source #

The unboxed version of decodeCharLen

This function is marked as NOINLINE to reduce code size, and stop messing up simplifier due to too much branches.

decodeCharReverse :: PrimArray Word8 -> Int -> (#Char, Int#) Source #

Decode a Char from bytes in rerverse order.

This function assumed all bytes are UTF-8 encoded, and the index param point to the end of a codepoint, the decoded character and the backward advancing offset are returned.

decodeCharReverse_ :: PrimArray Word8 -> Int -> Char Source #

decodeCharReverse# :: ByteArray# -> Int# -> (#Char#, Int##) Source #

The unboxed version of decodeCharReverse

This function is marked as NOINLINE to reduce code size, and stop messing up simplifier due to too much branches.

decodeCharLenReverse :: PrimArray Word8 -> Int -> Int Source #

Decode a codepoint's length in bytes in reverse order.

This function assumed all bytes are UTF-8 encoded, and the index param point to the end of a codepoint.

decodeCharLenReverse# :: ByteArray# -> Int# -> Int# Source #

The unboxed version of decodeCharLenReverse

This function is marked as NOINLINE to reduce code size, and stop messing up simplifier due to too much branches.

between# :: Word# -> Word# -> Word# -> Bool Source #

isContinueByte# :: Word# -> Bool Source #

chr1# :: Word# -> Char# Source #

chr2# :: Word# -> Word# -> Char# Source #

chr3# :: Word# -> Word# -> Word# -> Char# Source #

chr4# :: Word# -> Word# -> Word# -> Word# -> Char# Source #

copyChar :: Int -> MutablePrimArray s Word8 -> Int -> PrimArray Word8 -> Int -> ST s () Source #

Unrolled copy loop for copying a utf8-encoded codepoint from source array to target array.

copyChar' :: Int -> MutablePrimArray s Word8 -> Int -> MutablePrimArray s Word8 -> Int -> ST s () Source #

Unrolled copy loop for copying a utf8-encoded codepoint from source array to target array.

replacementChar :: Char Source #

xFFFD, which will be encoded as 0xEF 0xBF 0xBD 3 bytes.