module Data.Atom.Simple ( Symbol, intern ) where import Data.Char ( ord ) import qualified Data.Map as M import System.IO.Unsafe ( unsafePerformIO ) import Data.IORef ( newIORef, atomicModifyIORef ) -- ------------------------------------------------------------------- -- Public Interface -- | A 'Symbol'. This is essentially a 'String', but with different -- performance characteristics: -- -- * @O(n)@ creation time (using 'insert') -- -- * @O(1)@ equality comparison. -- -- * @O(1)@ comparison (in practice). The result of 'compare' is -- independent of evaluation order. -- -- It is currently implemented as follows. -- -- * Each symbol contains a unique integer, which allows @O(1)@ -- comparison. -- -- * Each symbol contains an infinite chain of hashes, these are used -- for comparison. In practice, it is very rare that more than the -- first of those hashes is ever evaluated. The first hash is -- cached, so that most comparisons will not need any indirections. -- -- * The 'String' representation of the symbol. Use 'show' to return -- it. At any time, there will be only one symbol of a given name -- in memory. -- data Symbol = MkSymbol {-# UNPACK #-} !Int -- identity {-# UNPACK #-} !Int -- 1st hash [Int] -- other hashes String -- name instance Show Symbol where show (MkSymbol i h _ s) = "<" ++ show i ++ "," ++ show h ++ ">" ++ s instance Eq Symbol where MkSymbol i1 _ _ _ == MkSymbol i2 _ _ _ = i1 == i2 instance Ord Symbol where MkSymbol i1 c1 cs1 _ `compare` MkSymbol i2 c2 cs2 _ | i1 == i2 = EQ | otherwise = case c1 `compare` c2 of EQ -> cs1 `compare` cs2 ans -> ans -- | Turn a 'String' into a 'Symbol'. -- -- Note, however, that this function contains a space leak. It has -- internal state (the symbol table) but is referentially transparent. -- Unfortunately, there is no way to delete items from the symbol -- table. -- -- (This function is, of course, thread-safe.) intern :: String -> Symbol intern = unsafePerformIO $ do tab <- newIORef (SymTbl 0 M.empty) return $ \s -> unsafePerformIO $ atomicModifyIORef tab $ \tbl@(SymTbl n t) -> case M.lookup s t of Just sym -> (tbl, sym) Nothing -> let n' = n + 1 sym = mkSymbol n s in (SymTbl n' (M.insert s sym t), sym) {-# NOINLINE intern #-} -- ------------------------------------------------------------------- -- TODO: Use a trie? Or a hash table? data SymTbl = SymTbl !Int !(M.Map String Symbol) mkSymbol :: Int -> String -> Symbol mkSymbol sym_identity str = MkSymbol sym_identity h hashes str where ints = map ord str (h:hashes) = [ hash p ints | p <- bigprimes ] --------------------------------------------------------------------------- -- hash stuff hash :: Int -> [Int] -> Int hash p [] = 0 hash p (i:is) = i + p * hash p is -- Note: While these aren't very efficient it is unlikely that more -- than the first couple of elements are ever evaluated during the -- whole run of a program. primes, bigprimes :: [Int] primes = 2 : [ n | n <- [3..], all (n !/) (takeWhile (<= sqr n) primes) ] where a !/ b = a `mod` b /= 0 sqr = floor . sqrt . fromIntegral bigprimes = dropWhile (<= 258) primes