module OldHash (hashInt, hashString) where import Data.Bits (shiftR) import Data.Char (ord) import Data.Int (Int32, Int64) import Data.List (foldl') golden :: Int32 golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32 -- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32 -- but that has bad mulHi properties (even adding 2^32 to get its inverse) -- Whereas the above works well and contains no hash duplications for -- [-32767..65536] hashInt32 :: Int32 -> Int32 hashInt32 x = mulHi x golden + x hashInt :: Int -> Int32 hashInt x = hashInt32 (fromIntegral x) -- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply mulHi :: Int32 -> Int32 -> Int32 mulHi a b = fromIntegral (r `shiftR` 32) where r :: Int64 r = fromIntegral a * fromIntegral b -- | A sample hash function for Strings. We keep multiplying by the -- golden ratio and adding. The implementation is: -- -- > hashString = foldl' f golden -- > where f m c = fromIntegral (ord c) * magic + hashInt32 m -- > magic = 0xdeadbeef -- -- Where hashInt32 works just as hashInt shown above. -- -- Knuth argues that repeated multiplication by the golden ratio -- will minimize gaps in the hash space, and thus it's a good choice -- for combining together multiple keys to form one. -- -- Here we know that individual characters c are often small, and this -- produces frequent collisions if we use ord c alone. A -- particular problem are the shorter low ASCII and ISO-8859-1 -- character strings. We pre-multiply by a magic twiddle factor to -- obtain a good distribution. In fact, given the following test: -- -- > testp :: Int32 -> Int -- > testp k = (n - ) . length . group . sort . map hs . take n $ ls -- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']] -- > hs = foldl' f golden -- > f m c = fromIntegral (ord c) * k + hashInt32 m -- > n = 100000 -- -- We discover that testp magic = 0. hashString :: String -> Int32 hashString = foldl' f golden where f m c = fromIntegral (ord c) * magic + hashInt32 m magic = 0xdeadbeef