-- |
-- Module      : Data.CompactString.Encodings
-- License     : BSD-style
-- Maintainer  : twanvl@gmail.com
-- Stability   : experimental
-- Portability : untested
-- 
-- Different encodings of characters into bytes.
--
module Data.CompactString.Encodings (
        
        -- * Unicode encodings
        UTF8(..),
        BE(..), LE(..), Native,
        UTF16(..), UTF16BE, UTF16LE, UTF16Native,
        UTF32(..), UTF32BE, UTF32LE, UTF32Native,
        
        -- * Other encodings
        ASCII(..),
        Latin1(..),
        
        -- * Non-standard encodings
        Compact(..)
        
        ) where

import Data.Word
import Data.Bits

import Foreign.Ptr              (Ptr)

import Control.Monad            (when, unless)

import Data.CompactString.Internal

-- -----------------------------------------------------------------------------
--
-- Encoding : UTF8
--

-- | Tag representing the UTF-8 encoding.
--   Use @'CompactString' UTF8@ for UTF-8 encoded strings.
data UTF8 = UTF8

instance Encoding UTF8 where
        pokeCharFun _ c = case ord c of
         x | x < 0x80    -> (1, \p ->    poke        p   $  fromIntegral  x )
           | x < 0x800   -> (2, \p -> do poke        p   $  fromIntegral (x `shiftR`  6)           .|. 0xC0
                                         pokeByteOff p 1 $ (fromIntegral  x              .&. 0x3F) .|. 0x80)
           | x < 0x10000 -> (3, \p -> do poke        p   $  fromIntegral (x `shiftR` 12)           .|. 0xE0
                                         pokeByteOff p 1 $ (fromIntegral (x `shiftR`  6) .&. 0x3F) .|. 0x80
                                         pokeByteOff p 2 $ (fromIntegral  x              .&. 0x3F) .|. 0x80)
           | otherwise   -> (4, \p -> do poke        p   $  fromIntegral (x `shiftR` 18)           .|. 0xF0
                                         pokeByteOff p 1 $ (fromIntegral (x `shiftR` 12) .&. 0x3F) .|. 0x80
                                         pokeByteOff p 2 $ (fromIntegral (x `shiftR`  6) .&. 0x3F) .|. 0x80
                                         pokeByteOff p 3 $ (fromIntegral  x              .&. 0x3F) .|. 0x80)
        {-# INLINE pokeCharFun #-}
        
        
        peekChar _ p = do
                a <- peek p
                case charLenUTF8 a of
                 1 ->     return (1, decodeUTF8_1 a)
                 2 -> do  b <- peekByteOff p 1
                          return (2, decodeUTF8_2 a b)
                 3 -> do  b <- peekByteOff p 1
                          c <- peekByteOff p 2
                          return (3, decodeUTF8_3 a b c)
                 _ -> do  b <- peekByteOff p 1
                          c <- peekByteOff p 2
                          d <- peekByteOff p 3
                          return (4, decodeUTF8_4 a b c d)
        
        peekCharLen _ p = do
                a <- peek p
                return (charLenUTF8 a)
        {-# INLINE peekCharLen #-}
        
        peekCharRev _ p =
                peek p             >>= \a -> if not (a `testBit` 7) then return (1, decodeUTF8_1 a)     else
                peekByteOff p (-1) >>= \b -> if     (b `testBit` 6) then return (2, decodeUTF8_2 b a)   else
                peekByteOff p (-2) >>= \c -> if     (c `testBit` 6) then return (3, decodeUTF8_3 c b a) else
                peekByteOff p (-3) >>= \d ->                             return (4, decodeUTF8_4 d c b a)
        
        peekCharLenRev _ p = 
                peek p             >>= \a -> if not (a `testBit` 7) then return 1 else
                peekByteOff p (-1) >>= \b -> if     (b `testBit` 6) then return 2 else
                peekByteOff p (-2) >>= \c -> if     (c `testBit` 6) then return 3 else
                                                                         return 4
        {-# INLINE peekCharLenRev #-}
        
        
        peekCharSafe _ l p = do
                a <- peek p
                case () of
                 _ | not (a `testBit` 7) -> returnChr 1 (fromIntegral a)
                   | not (a `testBit` 6) -> failMessage "decode UTF8" "Invalid UTF8"
                   | not (a `testBit` 5) -> do  require 2
                                                b <- peekMore 1
                                                let x = ((fromIntegral a .&. 0x1F) `shiftL` 6) .|. b
                                                tooShort (x < 0x80)
                                                returnChr 2 x
                   | not (a `testBit` 4) -> do  require 3
                                                b <- peekMore 1
                                                c <- peekMore 2
                                                let x = ((fromIntegral a .&. 0x0F) `shiftL` 12) .|. (b `shiftL` 6) .|. c
                                                tooShort (x < 0x800)
                                                returnChr 3 x
                   | not (a `testBit` 3) -> do  require 4
                                                b <- peekMore 1
                                                c <- peekMore 2
                                                d <- peekMore 3
                                                let x = ((fromIntegral a .&. 0x07) `shiftL` 18) .|. (b `shiftL` 12) .|. (c `shiftL` 6) .|. d
                                                tooShort (x < 0x10000)
                                                returnChr 4 x
                   | otherwise           -> failMessage "decode UTF8" "Invalid UTF8"
         where	peekMore off = do  x <- peekByteOff p off
                                   when (x < 0x80 || x > 0xBF) (failMessage "decode UTF8" "Invalid UTF8")
                                   return $ fromIntegral (x .&. 0x3F)
                require len = when (l < len) (failMessage "decode UTF8" "Not enough input")
                tooShort b  = when b         (failMessage "decode UTF8" "Shorter encoding possible")
        
        
        copyChar enc src dst = do
                l <- peekCharLen enc src
                memcpy dst src (fromIntegral l)
                return l
        copyCharRev enc src dst = do
                l <- peekCharLenRev enc src
                memcpy dst (src `plusPtr` (1 - l)) (fromIntegral l)
                return l
        
        
        containsASCII  _ = True
        validOrdering  _ = True
        validSubstring _ = True
        byteCount _ n = 4 * n -- A char is at most 4 bytes


-- | Length of a character is determined by the first byte
charLenUTF8 :: Word8 -> Int
charLenUTF8 x
 | not (x `testBit` 7) = 1
 | not (x `testBit` 5) = 2
 | not (x `testBit` 4) = 3
 | otherwise           = 4 -- assumes the bytestring is valid UTF8
{-# INLINE charLenUTF8 #-}

-- | Decode a UTF8 encoded character
decodeUTF8_1 :: Word8 -> Char
decodeUTF8_1 a
 = unsafeChr $ (fromIntegral a)
decodeUTF8_2 :: Word8 -> Word8 -> Char
decodeUTF8_2 a b
 = unsafeChr $ (fromIntegral a `shiftL` 6)
         `xor` (fromIntegral b)
         `xor` 0x3080
decodeUTF8_3 :: Word8 -> Word8 -> Word8 -> Char
decodeUTF8_3 a b c
 = unsafeChr $ (fromIntegral a `shiftL` 12)
         `xor` (fromIntegral b `shiftL` 6)
         `xor` (fromIntegral c)
         `xor` 0xE2080
decodeUTF8_4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
decodeUTF8_4 a b c d
 = unsafeChr $ (fromIntegral a `shiftL` 18)
         `xor` (fromIntegral b `shiftL` 12)
         `xor` (fromIntegral c `shiftL` 6)
         `xor` (fromIntegral d)
         `xor` 0x3C82080
-- NOTE: the masks at the end are from the tag bits
-- for example the last one is:
--   11110001000010000010000000


-- -----------------------------------------------------------------------------
--
-- Endianness
--

-- | Tag representing big endian encoding
data BE = BE
-- | Tag representing little endian encoding
data LE = LE
-- | The platform native endianness
type Native = LE -- TODO : detect

-- | Class representing endianness
class Endian e where
        -- | Read a 16 bit word from the given location
        peekWord16 :: Proxy (t e) -> Ptr Word8 -> IO Word16
        -- | Write a 16 bit word to the given location
        pokeWord16 :: Proxy (t e) -> Ptr Word8 -> Word16 -> IO ()
        -- | Read a 32 bit word from the given location
        peekWord32 :: Proxy (t e) -> Ptr Word8 -> IO Word32
        -- | Write a 32 bit word to the given location
        pokeWord32 :: Proxy (t e) -> Ptr Word8 -> Word32 -> IO ()
        -- | Is this a big endian encoding
        isBE :: Proxy (t e) -> Bool

instance Endian BE where
        peekWord16 _ ptr = do
                a <- peek        ptr
                b <- peekByteOff ptr 1
                return ((fromIntegral a `shiftL` 8) .|. fromIntegral b)
        {-# INLINE peekWord16 #-}
        pokeWord16 _ ptr x = do
                poke        ptr   $ fromIntegral (x `shiftR` 8)
                pokeByteOff ptr 1 $ fromIntegral  x
        {-# INLINE pokeWord16 #-}
        peekWord32 _ ptr = do
                a <- peek        ptr
                b <- peekByteOff ptr 1
                c <- peekByteOff ptr 2
                d <- peekByteOff ptr 3
                return $ (fromIntegral a `shiftL` 24) .|.
                         (fromIntegral b `shiftL` 16) .|.
                         (fromIntegral c `shiftL` 8 ) .|.
                         (fromIntegral d            )
        {-# INLINE peekWord32 #-}
        pokeWord32 _ ptr x = do
                poke        ptr   $ fromIntegral (x `shiftR` 24)
                pokeByteOff ptr 1 $ fromIntegral (x `shiftR` 16)
                pokeByteOff ptr 2 $ fromIntegral (x `shiftR` 8)
                pokeByteOff ptr 3 $ fromIntegral x
        {-# INLINE pokeWord32 #-}
        isBE _ = True

instance Endian LE where
        peekWord16 _ ptr = do
                a <- peek        ptr
                b <- peekByteOff ptr 1
                return ((fromIntegral b `shiftL` 8) .|. fromIntegral a)
        {-# INLINE peekWord16 #-}
        pokeWord16 _ ptr x = do
                poke        ptr   $ fromIntegral  x
                pokeByteOff ptr 1 $ fromIntegral (x `shiftR` 8)
        {-# INLINE pokeWord16 #-}
        peekWord32 _ ptr = do
                a <- peek        ptr
                b <- peekByteOff ptr 1
                c <- peekByteOff ptr 2
                d <- peekByteOff ptr 3
                return $ (fromIntegral d `shiftL` 24) .|.
                         (fromIntegral c `shiftL` 16) .|.
                         (fromIntegral b `shiftL` 8 ) .|.
                         (fromIntegral a            )
        {-# INLINE peekWord32 #-}
        pokeWord32 _ ptr x = do
                poke        ptr   $ fromIntegral x
                pokeByteOff ptr 1 $ fromIntegral (x `shiftR` 8)
                pokeByteOff ptr 2 $ fromIntegral (x `shiftR` 16)
                pokeByteOff ptr 3 $ fromIntegral (x `shiftR` 24)
        {-# INLINE pokeWord32 #-}
        isBE _ = False

-- -----------------------------------------------------------------------------
--
-- Encoding : UTF16
--

-- | Tag representing the UTF-16 encoding
data UTF16 endianness = UTF16 endianness

-- | Tag representing the big endian UTF-16 encoding, aka. UTF-16BE.
type UTF16BE = UTF16 BE

-- | Tag representing the little endian UTF-16 encoding, aka. UTF-16LE.
type UTF16LE = UTF16 LE

-- | Tag representing the platform native UTF-16 encoding.
type UTF16Native = UTF16 Native

instance Endian e => Encoding (UTF16 e) where
        pokeCharFun enc c
         | isSurrogate x = (2, \_ -> failMessage "encoding UTF16" "Surrogate character")
         | x <= 0xFFFF   = (2, \p -> pokeWord16 enc p $ fromIntegral x)
         | otherwise     = (4, \p -> pokeWord16 enc p               h
                                >> pokeWord16 enc (p `plusPtr` 2) l)
         where  x  = ord c
                x' = x - 0x10000
                h  = fromIntegral $ (x' `shiftR` 10) .|. 0xD800
                l  = fromIntegral $ (x' .&. 0x3FF)   .|. 0xDC00
        
        pokeCharLen _ c = if ord c <= 0xFFFF then 2 else 4
        
        peekChar       enc p = do  a <- peekWord16 enc p
                                   if isSurrogate a
                                    then do
                                       b <- peekWord16 enc (p `plusPtr` 2)
                                       decodeUTF16 a b
                                    else
                                       return (2, unsafeChr $ fromIntegral a)
        peekCharRev    enc p = do  a <- peekWord16 enc (p `plusPtr` (-1))
                                   if isSurrogate a
                                    then do
                                       b <- peekWord16 enc (p `plusPtr` (-3))
                                       decodeUTF16 b a
                                    else
                                       return (2, unsafeChr $ fromIntegral a)
        peekCharLen    enc p = do  a <- peekWord16 enc p
                                   return (if isSurrogate a then 4 else 2)
        peekCharLenRev enc p = do  a <- peekWord16 enc (p `plusPtr` (-1))
                                   return (if isSurrogate a then 4 else 2)
        
        peekCharSafe enc l p = do  a <- peekWord16 enc p
                                   if isSurrogate a
                                    then do
                                       when (l < 4) (failMessage "decode UTF16" "Not enough input")
                                       b <- peekWord16 enc (p `plusPtr` 2)
                                       decodeUTF16Safe a b
                                    else
                                       return (2, unsafeChr $ fromIntegral a)
        validateLength _ len
         | len `mod` 2 /= 0 = failMessage "decode UTF16" "Not enough input"
         | otherwise        = return ()
        
        containsASCII  _ = False
        validEquality  _ = False -- there are two ways to encode a character
        validOrdering  _ = False
        validSubstring _ = False
        
        charCount _ n = n `div` 2 -- at least 2 bytes per char
        byteCount _ n = n * 4     -- at most  4 bytes per char
        newSize   _ n = n * 2

isSurrogate :: (Num a, Ord a) => a -> Bool
isSurrogate c = c >= 0xD800 && c <= 0xDFFF

-- | Decode an UTF16 surrogate pair, does no error checking
decodeUTF16 :: Word16 -> Word16 -> IO (Int,Char)
decodeUTF16 a b
 | a <= 0xDBFF = return (4, unsafeChr $ (((fromIntegral a .&. 0x3FF) `shiftL` 10) .|. (fromIntegral b .&. 0x3FF)) + 0x10000)
 | otherwise   = return (4, unsafeChr $ (((fromIntegral b .&. 0x3FF) `shiftL` 10) .|. (fromIntegral a .&. 0x3FF)) + 0x10000)

decodeUTF16Safe :: Word16 -> Word16 -> IO (Int,Char)
decodeUTF16Safe a b
 | ahi && blo = returnChr 4 $ (((fromIntegral a .&. 0x3FF) `shiftL` 10) .|. (fromIntegral b .&. 0x3FF)) + 0x10000
 | alo && bhi = returnChr 4 $ (((fromIntegral b .&. 0x3FF) `shiftL` 10) .|. (fromIntegral a .&. 0x3FF)) + 0x10000
 | otherwise  = failMessage "decode UTF16" "Unpaired surrogate"
 where  ahi = a >= 0xD800 && a <= 0xDBFF
        alo = a >= 0xDC00 && a <= 0xDFFF
        bhi = b >= 0xD800 && b <= 0xDBFF
        blo = b >= 0xDC00 && b <= 0xDFFF

-- -----------------------------------------------------------------------------
--
-- Encoding : UTF32
--

-- | Tag representing the UTF-32 encoding
data UTF32 endianness = UTF32 endianness

-- | Tag representing the big endian UTF-32 encoding, aka. UTF-32BE.
type UTF32BE = UTF32 BE

-- | Tag representing the little endian UTF-32 encoding, aka. UTF-32LE.
type UTF32LE = UTF32 LE

-- | Tag representing the platform native UTF-32 encoding.
type UTF32Native = UTF32 Native

instance Endian e => Encoding (UTF32 e) where
        pokeCharFun enc c = (4, \p -> pokeWord32 enc p (fromIntegral (ord c)))
        
        pokeCharLen _ _ = 4
        
        peekChar       enc p = do  a <- peekWord32 enc p
                                   return (4, unsafeChr $ fromIntegral a)
        peekCharRev    enc p = do  a <- peekWord32 enc (p `plusPtr` (-3))
                                   return (4, unsafeChr $ fromIntegral a)
        peekCharLen    _   _ = return 4
        peekCharLenRev _   _ = return 4
        
        peekCharSafe enc _ p = do  a <- peekWord32 enc p
                                   returnChr 4 a
        validateLength _ len
         | len `mod` 4 /= 0 = failMessage "decode UTF32" "Not enough input"
         | otherwise        = return ()
        
        containsASCII  _   = False
        validOrdering  enc = isBE enc
        validSubstring _   = True
        
        charCount _ n = n `div` 4 -- at least 4 bytes per char
        byteCount _ n = n * 4     -- at most  4 bytes per char
        newSize   _ n = n

-- -----------------------------------------------------------------------------
--
-- Encoding : ASCII
--

-- | Tag representing the ASCII encoding.
data ASCII = ASCII

instance Encoding ASCII where
        pokeCharFun _ c
         | c <= '\x7F' = (1, \p -> poke p (fromIntegral (ord c)))
         | otherwise   = (1, \_ -> failMessage "encode ASCII" "Not an ASCII character")
        
        pokeCharLen _ _ = 1
        
        peekChar       _ p = do  a <- peek p
                                 return (1, unsafeChr $ fromIntegral a)
        peekCharRev    _ p = do  a <- peek p
                                 return (1, unsafeChr $ fromIntegral a)
        peekCharLen    _ _ = return 1
        peekCharLenRev _ _ = return 1
        
        peekCharSafe _ _ p = do  a <- peek p
                                 unless (a <= 0x7F) (failMessage "decode ASCII" "Not an ASCII character")
                                 returnChr 1 (fromIntegral a)
        
        containsASCII  _ = True
        validOrdering  _ = True
        validSubstring _ = True
        
        charCount _ n = n
        byteCount _ n = n
        newSize   _ n = n

-- -----------------------------------------------------------------------------
--
-- Encoding : Latin1
--

-- | Tag representing the ISO 8859-1 encoding (latin 1).
data Latin1 = Latin1

instance Encoding Latin1 where
        pokeCharFun _ c
         | c <= '\xFF' = (1, \p -> poke p (fromIntegral (ord c)))
         | otherwise   = (1, \_ -> failMessage "encode Latin1" "Not a Latin1 character")
        
        pokeCharLen _ _ = 1
        
        peekChar       _ p = do  a <- peek p
                                 return (1, unsafeChr $ fromIntegral a)
        peekCharRev    _ p = do  a <- peek p
                                 return (1, unsafeChr $ fromIntegral a)
        peekCharLen    _ _ = return 1
        peekCharLenRev _ _ = return 1
        
        peekCharSafe _ _ p = do  a <- peek p
                                 unless (a <= 0xFF) (failMessage "decode Latin1" "Not a Latin1 character")
                                 returnChr 1 (fromIntegral a)
        
        containsASCII  _ = True
        validOrdering  _ = True
        validSubstring _ = True
        
        charCount _ n = n
        byteCount _ n = n
        newSize   _ n = n

-- -----------------------------------------------------------------------------
--
-- Encoding : Compact
--

-- | Tag representing a custom encoding optimized for memory usage.
--
-- This encoding looks like UTF-8, but is slightly more efficient.
-- It requires at most 3 byes per character, as opposed to 4 for UTF-8.
--
-- Encoding looks like:
--
-- >                   0zzzzzzz -> 0zzzzzzz
-- >          00yyyyyy yzzzzzzz -> 1xxxxxxx 1yyyyyyy
-- > 000xxxxx xxyyyyyy yzzzzzzz -> 1xxxxxxx 0yyyyyyy 1zzzzzzz
--
-- The reasoning behind the tag bits is that this allows the char to be read both forwards
-- and backwards.
data Compact = Compact

instance Encoding Compact where
        pokeCharFun _ c = case ord c of
         x | x < 0x80   -> (1, \p ->    poke        p   $ fromIntegral  x )
           | x < 0x4000 -> (2, \p -> do poke        p   $ fromIntegral (x `shiftR`  7) .|. 0x80
                                        pokeByteOff p 1 $ fromIntegral  x              .|. 0x80 )
           | otherwise  -> (3, \p -> do poke        p   $ fromIntegral (x `shiftR` 14) .|. 0x80
                                        pokeByteOff p 1 $ fromIntegral (x `shiftR`  7) .&. 0x7F
                                        pokeByteOff p 2 $ fromIntegral  x              .|. 0x80 )
        {-# INLINE pokeCharFun #-}
        
        pokeCharLen _ c = case ord c of
         x | x < 0x80   -> 1
           | x < 0x4000 -> 2
           | otherwise  -> 3
        {-# INLINE pokeCharLen #-}
        
        
        peekChar _ p = do
                aw <- peek p
                let a = fromIntegral aw
                if a `testBit` 7
                 then do
                    bw <- peekByteOff p 1
                    let b = fromIntegral bw
                    if b `testBit` 7
                     then                            -- 2 bytes
                        return (2, unsafeChr $ (a `shiftL` 7) `xor` b `xor` 0x4080 )
                     else do                         -- 3 bytes
                        cw <- peekByteOff p 2
                        let c = fromIntegral cw
                        return (3, unsafeChr $ (a `shiftL` 14) `xor` (b `shiftL` 7) `xor` c `xor` 0x200080 )
                 else return (1, unsafeChr a)        -- 1 byte
        
        peekCharLen _ p = do
                a <- peek p
                if a `testBit` 7
                 then do
                    b <- peekByteOff p 1
                    return (if b `testBit` 7 then 2 else 3)
                 else return 1
        {-# INLINE peekCharLen #-}
        
        peekCharRev _ p = do
                cw <- peek p
                let c = fromIntegral cw
                if c `testBit` 7
                 then do
                    bw <- peekByteOff p (-1)
                    let b = fromIntegral bw
                    if b `testBit` 7
                     then                            -- 2 bytes
                        return (2, unsafeChr $ (b `shiftL` 7) `xor` c `xor` 0x4080 )
                     else do                         -- 3 bytes
                        aw <- peekByteOff p (-2)
                        let a = fromIntegral aw
                        return (3, unsafeChr $ (a `shiftL` 14) `xor` (b `shiftL` 7) `xor` c `xor` 0x200080 )
                 else return (1, unsafeChr c)        -- 1 byte
        
        peekCharLenRev _ p = do
                a <- peek p
                if a `testBit` 7
                 then do
                    b <- peekByteOff p (-1)
                    return (if b `testBit` 7 then 2 else 3)
                 else return 1
        {-# INLINE peekCharLenRev #-}
        
        peekCharSafe _ l p = do
                aw <- peek p
                let a = fromIntegral aw
                if a `testBit` 7
                 then do
                    require 2
                    bw <- peekByteOff p 1
                    let b = fromIntegral bw
                    if b `testBit` 7
                     then do                         -- 2 bytes
                        let x = ((a `shiftL` 7) `xor` b `xor` 0x4080)
                        tooShort (x < 0x80)
                        returnChr 2 x
                     else do                         -- 3 bytes
                        require 3
                        cw <- peekByteOff p 2
                        let c = fromIntegral cw
                        unless (c `testBit` 7) (failMessage "decode Compact" "Invalid byte sequence")
                        let x = ((a `shiftL` 14) `xor` (b `shiftL` 7) `xor` c `xor` 0x200080)
                        tooShort (x < 0x4000)
                        returnChr 3 x
                 else returnChr 1 a        -- 1 byte
         where  require len = when (l < len) (failMessage "decode Compact" "Not enough input")
                tooShort b  = when b         (failMessage "decode UTF8" "Shorter encoding possible")
        
        copyChar _ src dst = do
                aw <- peek src
                poke dst aw
                if aw `testBit` 7
                 then do
                     bw <- peekByteOff src 1
                     pokeByteOff dst 1 bw
                     if bw `testBit` 7
                      then do -- 2 bytes
                         return 2
                      else do -- 3 bytes
                         peekByteOff src 2 >>= pokeByteOff dst 2
                         return 3
                 else do      -- 1 byte
                         return 1
        
        copyCharRev _ src dst = do
                aw <- peek src
                if aw `testBit` 7
                 then do
                     bw <- peekByteOff src (-1)
                     if bw `testBit` 7
                      then do -- 2 bytes
                         poke        dst   bw
                         pokeByteOff dst 1 aw
                         return 2
                      else do -- 3 bytes
                         cw <- peekByteOff src (-2)
                         poke        dst   cw
                         pokeByteOff dst 1 bw
                         pokeByteOff dst 2 aw
                         return 3
                 else do      -- 1 byte
                         poke dst aw
                         return 1
        
        
        containsASCII  _ = True
        validOrdering  _ = False
        validSubstring _ = False
        byteCount _ n = 3 * n -- A char is at most 3 bytes