{- | Utilities for the UTF-8 encoding. -} module Iri.Utf8CodePoint where import Iri.Prelude {-| Church encoding of a UTF8-encoded character. -} type Utf8CodePoint = forall a. (Word8 -> a) -> (Word8 -> Word8 -> a) -> (Word8 -> Word8 -> Word8 -> a) -> (Word8 -> Word8 -> Word8 -> Word8 -> a) -> a {-# INLINE char #-} char :: Char -> Utf8CodePoint char = unicodeCodePoint . ord {-# INLINE unicodeCodePoint #-} unicodeCodePoint :: Int -> Utf8CodePoint unicodeCodePoint x f1 f2 f3 f4 = if x <= 0x7F then f1 (fromIntegral x) else if x <= 0x07FF then f2 (fromIntegral ((x `shiftR` 6) + 0xC0)) (fromIntegral ((x .&. 0x3F) + 0x80)) else if x <= 0xFFFF then f3 (fromIntegral (x `shiftR` 12) + 0xE0) (fromIntegral ((x `shiftR` 6) .&. 0x3F) + 0x80) (fromIntegral (x .&. 0x3F) + 0x80) else f4 (fromIntegral (x `shiftR` 18) + 0xF0) (fromIntegral ((x `shiftR` 12) .&. 0x3F) + 0x80) (fromIntegral ((x `shiftR` 6) .&. 0x3F) + 0x80) (fromIntegral (x .&. 0x3F) + 0x80)