{-# LANGUAGE CPP, MagicHash #-}

-- |
-- Module      : Data.Text.Internal.Unsafe.Char
-- Copyright   : (c) 2008, 2009 Tom Harper,
--               (c) 2009, 2010 Bryan O'Sullivan,
--               (c) 2009 Duncan Coutts
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Fast character manipulation functions.
module Data.Text.Internal.Unsafe.Char
    (
      ord
    , unsafeChr
    , unsafeChr8
    , unsafeChr32
    , unsafeWrite
    -- , unsafeWriteRev
    ) where

#ifdef ASSERTS
import Control.Exception (assert)
#endif
import Control.Monad.ST (ST)
import Data.Bits ((.&.))
import Data.Text.Internal.Unsafe.Shift (shiftR)
import GHC.Exts (Char(..), Int(..), chr#, ord#, word2Int#)
import GHC.Word (Word8(..), Word16(..), Word32(..))
import qualified Data.Text.Array as A

ord :: Char -> Int
ord (C# c#) = I# (ord# c#)
{-# INLINE ord #-}

unsafeChr :: Word16 -> Char
unsafeChr (W16# w#) = C# (chr# (word2Int# w#))
{-# INLINE unsafeChr #-}

unsafeChr8 :: Word8 -> Char
unsafeChr8 (W8# w#) = C# (chr# (word2Int# w#))
{-# INLINE unsafeChr8 #-}

unsafeChr32 :: Word32 -> Char
unsafeChr32 (W32# w#) = C# (chr# (word2Int# w#))
{-# INLINE unsafeChr32 #-}

-- | Write a character into the array at the given offset.  Returns
-- the number of bytes written.
unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int
unsafeWrite marr i c
    -- One-byte character
    | n < 0x80 = do
#if defined(ASSERTS)
        assert (i >= 0) . assert (i < A.length marr) $ return ()
#endif
        writeAt i n
        return 1

    -- Two-byte character
    | n < 0x0800 = do
#if defined(ASSERTS)
        assert (i >= 0) . assert (i + 1 < A.length marr) $ return ()
#endif
        writeAt i       $ (n `shiftR` 6) + 0xC0
        writeAt (i + 1) $ (n .&. 0x3F)   + 0x80
        return 2

    -- Three-byte character
    | n < 0x10000 = do
#if defined(ASSERTS)
        assert (i >= 0) . assert (i + 2 < A.length marr) $ return ()
#endif
        writeAt i       $ (n `shiftR` 12)           + 0xE0
        writeAt (i + 1) $ ((n `shiftR` 6) .&. 0x3F) + 0x80
        writeAt (i + 2) $ (n .&. 0x3F)              + 0x80
        return 3

    -- Four-byte character
    | otherwise = do
#if defined(ASSERTS)
        assert (i >= 0) . assert (i + 3 < A.length marr) $ return ()
#endif
        writeAt i       $ (n `shiftR` 18)            + 0xF0
        writeAt (i + 1) $ ((n `shiftR` 12) .&. 0x3F) + 0x80
        writeAt (i + 2) $ ((n `shiftR` 6)  .&. 0x3F) + 0x80
        writeAt (i + 3) $ (n .&. 0x3F)               + 0x80
        return 4
  where 
    n = ord c
    writeAt i' n' = A.unsafeWrite marr i' (fromIntegral n')
    {-# INLINE writeAt #-}
{-# INLINE unsafeWrite #-}

{-
unsafeWriteRev :: A.MArray s Word16 -> Int -> Char -> ST s Int
unsafeWriteRev marr i c
    | n < 0x10000 = do
        assert (i >= 0) . assert (i < A.length marr) $
          A.unsafeWrite marr i (fromIntegral n)
        return (i-1)
    | otherwise = do
        assert (i >= 1) . assert (i < A.length marr) $
          A.unsafeWrite marr (i-1) lo
        A.unsafeWrite marr i hi
        return (i-2)
    where n = ord c
          m = n - 0x10000
          lo = fromIntegral $ (m `shiftR` 10) + 0xD800
          hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
{-# INLINE unsafeWriteRev #-}
-}