{-# INCLUDE "StringTable_cbits.h" #-}
{-# LINE 1 "src/StringTable/Atom.hsc" #-}
module StringTable.Atom(
{-# LINE 2 "src/StringTable/Atom.hsc" #-}
    Atom(),
    ToAtom(..),
    FromAtom(..),
    HasHash(..),
    intToAtom,
    isValidAtom,
    unsafeIntToAtom,
    atomCompare,
    unsafeByteIndex,
    dumpTable,
    dumpToFile,
    dumpStringTableStats
    ) where


{-# LINE 17 "src/StringTable/Atom.hsc" #-}

import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
import Control.Monad
import Data.Binary
import Data.Binary.Get
import Data.Binary.Put
import Foreign
import Foreign.Marshal
import Data.Word
import Data.Char
import Foreign.C
import Data.Monoid
import Data.Dynamic
import Data.Bits
import Data.Generics (Data)

newtype Atom = Atom (Word32)
{-# LINE 36 "src/StringTable/Atom.hsc" #-}
    deriving(Typeable,Eq,Ord,Data)

class FromAtom a where
    fromAtom :: Atom -> a
    fromAtomIO :: Atom -> IO a

    fromAtomIO a = return (fromAtom a)
    fromAtom a = unsafePerformIO (fromAtomIO a)

class ToAtom a where
    toAtom :: a -> Atom
    toAtomIO :: a -> IO Atom

    toAtomIO a = return (toAtom a)
    toAtom a = unsafePerformIO (toAtomIO a)

class HasHash a where
    hash32 :: a -> Word32

instance HasHash Atom where
    hash32 a = let (x,y) = fromAtom a :: CStringLen in unsafePerformIO $ hash2 0 x (fromIntegral y)

instance HasHash BS.ByteString where
    hash32 bs = unsafePerformIO $ do
        BS.unsafeUseAsCStringLen bs $ \ (x,y) -> hash2 0 x (fromIntegral y)

instance HasHash String where
    hash32 s = unsafePerformIO $ withCStringLen s $ \ (x,y) -> hash2 0 x (fromIntegral y)

instance FromAtom (String -> String) where
    fromAtom x = shows (fromAtom x :: String)

instance ToAtom Atom where
    toAtom x = x

instance FromAtom Atom where
    fromAtom x = x

instance ToAtom Char where
    toAtom x = toAtom [x]

instance ToAtom CStringLen where
    toAtomIO (cs,len) = do
        if (len > (256))
{-# LINE 80 "src/StringTable/Atom.hsc" #-}
            then fail "StringTable: atom is too big"
            else stAdd cs (fromIntegral len)



instance ToAtom CString where
    toAtomIO cs = do
        len <- BS.c_strlen cs
        toAtomIO (cs,fromIntegral len :: Int)

instance ToAtom String where
    toAtomIO s = toAtomIO (BS.pack (toUTF s))

instance FromAtom String where
    fromAtom = fromUTF . BS.unpack . fromAtom

instance ToAtom BS.ByteString where
    toAtomIO bs = BS.unsafeUseAsCStringLen bs toAtomIO

instance FromAtom CStringLen where
    fromAtom a@(Atom v) = (stPtr a,fromIntegral $ (v `shiftR` (1)) .&. (255))
{-# LINE 101 "src/StringTable/Atom.hsc" #-}

instance FromAtom Word where
    fromAtom (Atom i) = fromIntegral i

instance FromAtom Int where
    fromAtom (Atom i) = fromIntegral i

instance FromAtom BS.ByteString where
    fromAtomIO a = do
        sl <- fromAtomIO a :: IO CStringLen
        BS.unsafePackCStringLen sl

instance Monoid Atom where
    mempty = toAtom BS.empty
    mappend x y = unsafePerformIO $ atomAppend x y

instance Show Atom where
    showsPrec _ atom = (fromAtom atom ++)

instance Read Atom where
    readsPrec _ s = [ (toAtom s,"") ]

intToAtom :: Monad m => Int -> m Atom
intToAtom i = if isValidAtom i then return (Atom $ fromIntegral i) else fail $ "intToAtom: " ++ show i

isValidAtom :: Int -> Bool
isValidAtom i = odd i

unsafeIntToAtom :: Int -> Atom
unsafeIntToAtom x = Atom (fromIntegral x)

unsafeByteIndex :: Atom -> Int -> Word8
unsafeByteIndex atom off = fromIntegral (unsafePerformIO $ peek (stPtr atom `advancePtr` off))

foreign import ccall unsafe "stringtable_lookup" stAdd :: CString -> CInt -> IO Atom
foreign import ccall unsafe "stringtable_ptr" stPtr :: Atom -> CString
foreign import ccall unsafe "stringtable_stats" dumpStringTableStats :: IO ()
foreign import ccall unsafe "dump_table" dumpTable :: IO ()
foreign import ccall unsafe "atom_append" atomAppend :: Atom -> Atom -> IO Atom
foreign import ccall unsafe "lexigraphic_compare" c_atomCompare :: Atom -> Atom -> CInt
foreign import ccall unsafe "dump_to_file" dumpToFile :: IO ()
foreign import ccall unsafe hash2  :: Word32 -> CString -> CInt -> IO Word32

atomCompare a b = if c == 0 then EQ else if c > 0 then GT else LT where
    c = c_atomCompare a b



-- | Convert Unicode characters to UTF-8.
toUTF :: String -> [Word8]
toUTF [] = []
toUTF (x:xs) | ord x<=0x007F = (fromIntegral $ ord x):toUTF xs
	     | ord x<=0x07FF = fromIntegral (0xC0 .|. ((ord x `shift` (-6)) .&. 0x1F)):
			       fromIntegral (0x80 .|. (ord x .&. 0x3F)):
			       toUTF xs
	     | otherwise     = fromIntegral (0xE0 .|. ((ord x `shift` (-12)) .&. 0x0F)):
			       fromIntegral (0x80 .|. ((ord x `shift` (-6)) .&. 0x3F)):
			       fromIntegral (0x80 .|. (ord x .&. 0x3F)):
			       toUTF xs

-- | Convert UTF-8 to Unicode.

fromUTF :: [Word8] -> String
fromUTF xs = fromUTF' (map fromIntegral xs) where
    fromUTF' [] = []
    fromUTF' (all@(x:xs))
	| x<=0x7F = (chr (x)):fromUTF' xs
	| x<=0xBF = err
	| x<=0xDF = twoBytes all
	| x<=0xEF = threeBytes all
	| otherwise   = err
    twoBytes (x1:x2:xs) = chr  ((((x1 .&. 0x1F) `shift` 6) .|.
			       (x2 .&. 0x3F))):fromUTF' xs
    twoBytes _ = error "fromUTF: illegal two byte sequence"

    threeBytes (x1:x2:x3:xs) = chr ((((x1 .&. 0x0F) `shift` 12) .|.
				    ((x2 .&. 0x3F) `shift` 6) .|.
				    (x3 .&. 0x3F))):fromUTF' xs
    threeBytes _ = error "fromUTF: illegal three byte sequence"

    err = error "fromUTF: illegal UTF-8 character"

instance Binary Atom where
    get = do
        x <- getWord8
        bs <- getBytes (fromIntegral x)
        return $ toAtom bs
    put a = do
        let bs = fromAtom a
        putWord8 $ fromIntegral $ BS.length bs
        putByteString bs