module StringTable.Atom(
Atom(),
ToAtom(..),
FromAtom(..),
HasHash(..),
intToAtom,
isValidAtom,
unsafeIntToAtom,
atomCompare,
unsafeByteIndex,
dumpTable,
dumpToFile,
dumpStringTableStats
) where
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
import qualified System.IO.Unsafe as Unsafe
import Control.Monad
import Data.Binary
import Data.Binary.Get
import Data.Binary.Put
import Foreign
import Foreign.Marshal
import Data.Word
import Data.Char
import Foreign.C
import Data.Monoid
import Data.Dynamic
import Data.Bits
import Data.Generics (Data)
newtype Atom = Atom (Word32)
deriving(Typeable,Eq,Ord,Data)
class FromAtom a where
fromAtom :: Atom -> a
fromAtomIO :: Atom -> IO a
fromAtomIO a = return (fromAtom a)
fromAtom a = Unsafe.unsafePerformIO (fromAtomIO a)
class ToAtom a where
toAtom :: a -> Atom
toAtomIO :: a -> IO Atom
toAtomIO a = return (toAtom a)
toAtom a = Unsafe.unsafePerformIO (toAtomIO a)
class HasHash a where
hash32 :: a -> Word32
instance HasHash Atom where
hash32 a = let (x,y) = fromAtom a :: CStringLen in Unsafe.unsafePerformIO $ hash2 0 x (fromIntegral y)
instance HasHash BS.ByteString where
hash32 bs = Unsafe.unsafePerformIO $ do
BS.unsafeUseAsCStringLen bs $ \ (x,y) -> hash2 0 x (fromIntegral y)
instance HasHash String where
hash32 s = Unsafe.unsafePerformIO $ withCStringLen s $ \ (x,y) -> hash2 0 x (fromIntegral y)
instance FromAtom (String -> String) where
fromAtom x = shows (fromAtom x :: String)
instance ToAtom Atom where
toAtom x = x
instance FromAtom Atom where
fromAtom x = x
instance ToAtom Char where
toAtom x = toAtom [x]
instance ToAtom CStringLen where
toAtomIO (cs,len) = do
if (len > (256))
then fail "StringTable: atom is too big"
else stAdd cs (fromIntegral len)
instance ToAtom CString where
toAtomIO cs = do
len <- BS.c_strlen cs
toAtomIO (cs,fromIntegral len :: Int)
instance ToAtom String where
toAtomIO s = toAtomIO (BS.pack (toUTF s))
instance FromAtom String where
fromAtom = fromUTF . BS.unpack . fromAtom
instance ToAtom BS.ByteString where
toAtomIO bs = BS.unsafeUseAsCStringLen bs toAtomIO
instance FromAtom CStringLen where
fromAtom a@(Atom v) = (stPtr a,fromIntegral $ (v `shiftR` (1)) .&. (255))
instance FromAtom Word where
fromAtom (Atom i) = fromIntegral i
instance FromAtom Int where
fromAtom (Atom i) = fromIntegral i
instance FromAtom BS.ByteString where
fromAtomIO a = do
sl <- fromAtomIO a :: IO CStringLen
BS.unsafePackCStringLen sl
instance Monoid Atom where
mempty = toAtom BS.empty
mappend x y = Unsafe.unsafePerformIO $ atomAppend x y
instance Show Atom where
showsPrec _ atom = (fromAtom atom ++)
instance Read Atom where
readsPrec _ s = [ (toAtom s,"") ]
intToAtom :: Monad m => Int -> m Atom
intToAtom i = if isValidAtom i then return (Atom $ fromIntegral i) else fail $ "intToAtom: " ++ show i
isValidAtom :: Int -> Bool
isValidAtom i = odd i
unsafeIntToAtom :: Int -> Atom
unsafeIntToAtom x = Atom (fromIntegral x)
unsafeByteIndex :: Atom -> Int -> Word8
unsafeByteIndex atom off = fromIntegral (Unsafe.unsafePerformIO $ peek (stPtr atom `advancePtr` off))
foreign import ccall unsafe "stringtable_lookup" stAdd :: CString -> CInt -> IO Atom
foreign import ccall unsafe "stringtable_ptr" stPtr :: Atom -> CString
foreign import ccall unsafe "stringtable_stats" dumpStringTableStats :: IO ()
foreign import ccall unsafe "dump_table" dumpTable :: IO ()
foreign import ccall unsafe "atom_append" atomAppend :: Atom -> Atom -> IO Atom
foreign import ccall unsafe "lexigraphic_compare" c_atomCompare :: Atom -> Atom -> CInt
foreign import ccall unsafe "dump_to_file" dumpToFile :: IO ()
foreign import ccall unsafe hash2 :: Word32 -> CString -> CInt -> IO Word32
atomCompare a b = if c == 0 then EQ else if c > 0 then GT else LT where
c = c_atomCompare a b
toUTF :: String -> [Word8]
toUTF [] = []
toUTF (x:xs) | ord x<=0x007F = (fromIntegral $ ord x):toUTF xs
| ord x<=0x07FF = fromIntegral (0xC0 .|. ((ord x `shift` (6)) .&. 0x1F)):
fromIntegral (0x80 .|. (ord x .&. 0x3F)):
toUTF xs
| otherwise = fromIntegral (0xE0 .|. ((ord x `shift` (12)) .&. 0x0F)):
fromIntegral (0x80 .|. ((ord x `shift` (6)) .&. 0x3F)):
fromIntegral (0x80 .|. (ord x .&. 0x3F)):
toUTF xs
fromUTF :: [Word8] -> String
fromUTF xs = fromUTF' (map fromIntegral xs) where
fromUTF' [] = []
fromUTF' (all@(x:xs))
| x<=0x7F = (chr (x)):fromUTF' xs
| x<=0xBF = err
| x<=0xDF = twoBytes all
| x<=0xEF = threeBytes all
| otherwise = err
twoBytes (x1:x2:xs) = chr ((((x1 .&. 0x1F) `shift` 6) .|.
(x2 .&. 0x3F))):fromUTF' xs
twoBytes _ = error "fromUTF: illegal two byte sequence"
threeBytes (x1:x2:x3:xs) = chr ((((x1 .&. 0x0F) `shift` 12) .|.
((x2 .&. 0x3F) `shift` 6) .|.
(x3 .&. 0x3F))):fromUTF' xs
threeBytes _ = error "fromUTF: illegal three byte sequence"
err = error "fromUTF: illegal UTF-8 character"
instance Binary Atom where
get = do
x <- getWord8
bs <- getBytes (fromIntegral x)
return $ toAtom bs
put a = do
let bs = fromAtom a
putWord8 $ fromIntegral $ BS.length bs
putByteString bs