Data/Generics/Fixplate/Util/Hash/Table.hs


-- | Hash tables, implemented as a structure similar to @Map hash (Map key value)]@.
--
-- What this data structure can also give you is a unique value (a @(hash,Int)@ pair)
-- for each key, even during building the table: It is guaranteed to be unique 
-- in the past and future lifetime of a single hashtable (that is, one realization 
-- of the world-line), among all the keys appearing in that history.
--
-- Set operations (union, intersection) clearly break this principle; this is
-- resolved by declaring these operations to be /left-biased/, in the sense that
-- they retain the unique values of the left table (so @union t1 t2@ belongs to
-- to @t1@'s world-line, but not to @t2@'s one).
--
-- If a key is first removed then added back again, it will get a new value.
--
-- To be Haskell98 compatible (no multi-param type classes), when constructing 
-- a new hash table, we have to support the function computing (or just fetching, if
-- it is cached) the hash value. This function is then stored in the data type.
--

{-# LANGUAGE CPP #-}
module Data.Generics.Fixplate.Util.Hash.Table
  ( HashTable , Bucket(..) , Leaf(..)
  , getHashValue , unHashTable
    -- * Construction and deconstruction
  , empty , singleton
  , fromList , toList 
  , null 
  , bag
    -- * Membership
  , lookup , member 
    -- * Insertion / deletion
  , insert , insertWith
  , delete
    -- * Union
  , union , unionWith         
  , unionsWith , unionsWith'
    -- * Intersection
  , intersection, intersectionWith 
  , intersectionsWith , intersectionsWith'
    -- * Difference
  , difference , differenceWith
    -- * Unique indices
  , getUniqueIndex
  , keysWith
  , mapWithUniqueIndices
#ifdef WITH_QUICKCHECK
    -- * Tests
  , runtests_HashTable  
  , prop_insert       , prop_delete
  , prop_insertDelete , prop_deleteInsert
  , prop_insertInsert , prop_deleteDelete
  , prop_fromListToList
  , prop_intersection , prop_intersectionWith
  , prop_union        , prop_unionWith
  , prop_difference   , prop_differenceWith
  , prop_uniqueValues
#endif
  ) 
  where

--------------------------------------------------------------------------------

import Prelude hiding ( lookup , null )

import Data.List ( foldl' )

import qualified Data.Map as Map ; import Data.Map (Map) 
-- import qualified Data.Set as Set ; import Data.Set (Set) 

#ifdef WITH_QUICKCHECK
import Test.QuickCheck
import Test.QuickCheck.Modifiers
import Data.Generics.Fixplate.Misc
import Data.List ( sort , group , nubBy , nub , (\\) , foldl' , scanl )
import Control.Monad
import Control.Applicative ( (<$>) )
import Debug.Trace
#endif

--------------------------------------------------------------------------------
-- helper functions

mapInsertWith :: Ord k => (a -> v) -> (a -> v -> v) -> k -> a -> Map k v ->  Map k v
mapInsertWith f g k x = x `seq` Map.alter worker k where
  worker Nothing   =          Just $! (f x)
  worker (Just y)  = y `seq` (Just $! (g x y))

{-
mapIsSingleton :: Map k v -> Maybe (k,v)
mapIsSingleton table = if Map.size table == 1 
  then let [(k,v)] = Map.toList table in Just (k,v)
  else Nothing

mapIsSingleton_ :: Map k v -> Maybe v
mapIsSingleton_ table = if Map.size table == 1 
  then let [(_,v)] = Map.toList table in Just v
  else Nothing
-}

--------------------------------------------------------------------------------
-- buckets

data Leaf v     = Leaf   {-# UNPACK #-} !Int v                    -- the index of the key, plus a value
data Bucket k v = Bucket {-# UNPACK #-} !Int !(Map k (Leaf v))    -- the next free index, plus the elements in the bucket

fromLeaf :: Leaf v -> v
fromLeaf (Leaf _ x) = x

emptyBucket :: Bucket k v
emptyBucket = Bucket 0 (Map.empty)

bucketSingleton :: k -> v -> Bucket k v
bucketSingleton k x = Bucket 1 (Map.singleton k (Leaf 0 x))

bucketInsert :: Ord k => k -> v -> Bucket k v -> Bucket k v
bucketInsert = bucketInsertWith id const    -- not (flip const), since a -> v -> v !!!

bucketInsertWith :: Ord k => (a -> v) -> (a -> v -> v) -> k -> a -> Bucket k v -> Bucket k v
bucketInsertWith f g k x (Bucket n table) = x `seq` new where
  new = Bucket (n+1) (Map.alter worker k table)
  worker Nothing            =          Just $! (Leaf n (f x))
  worker (Just (Leaf j y))  = y `seq` (Just $! (Leaf j (g x y)))

{-
bucketIsSingleton :: Bucket k v -> Maybe (k,v)
bucketIsSingleton (Bucket _ table) = if Map.size table == 1 
  then let [(k,Leaf _ v)] = Map.toList table in Just (k,v)
  else Nothing

bucketIsSingleton_ :: Bucket k v -> Maybe v
bucketIsSingleton_ (Bucket _ table) = if Map.size table == 1 
  then let [Leaf _ v] = Map.elems table in Just v
  else Nothing
-}

--------------------------------------------------------------------------------

data HashTable hash k v = HashTable 
  { getHashValue :: k -> hash
  , unHashTable  :: Map hash (Bucket k v) 
  }

empty :: (Ord hash, Ord k) => (k -> hash) -> HashTable hash k v
empty gethash = HashTable gethash (Map.empty)

singleton :: (Ord hash, Ord k) => (k -> hash) -> k -> v -> HashTable hash k v
singleton gethash k v = HashTable gethash $ Map.singleton h (bucketSingleton k v) where
  h = gethash k

fromList :: (Ord hash, Ord k) => (k -> hash) -> [(k,v)] -> HashTable hash k v
fromList gethash = foldl' (\old (k,v) -> insert k v old) (empty gethash)

-- | Note that the returned list is ordered by hash, /not/ by keys like 'Data.Map'!
toList :: Ord k => HashTable hash k v -> [(k,v)]
toList (HashTable _ table) = 
  [ (k,v) 
  | Bucket _ sub  <- Map.elems table 
  , (k, Leaf _ v) <- Map.toList sub 
  ]

null :: (Ord hash, Ord k) => HashTable hash k v -> Bool
null t = case toList t of
  [] -> True
  _  -> False

-- | Keys together with their associated unique values
keysWith :: Ord k => (k -> hash -> Int -> a) -> HashTable hash k v -> [a]
keysWith f (HashTable _ table) = 
  [ f k hash j 
  | (hash, Bucket _ sub) <- Map.toList table 
  , (k, Leaf j _) <- Map.toList sub 
  ]

--------------------------------------------------------------------------------

lookup :: (Ord hash, Ord k) => k -> HashTable hash k v -> Maybe v
lookup key (HashTable gethash table) = 
  case Map.lookup h table of
    Just (Bucket n sub) -> case Map.lookup key sub of
      Just (Leaf _ v) -> Just v
      Nothing         -> Nothing
    Nothing  -> Nothing      
  where
    h = gethash key

-- | Look up a unique index, in the form of a @(hash,Int)@ pair, for any key.
-- If the user-supplied function is /injective/, then the result is guaranteed to be uniquely
-- associated to the given key in the past and future history of this table (but of
-- course not unique among different future histories).
--
getUniqueIndex :: (Ord hash, Ord k) => (hash -> Int -> a) -> k -> HashTable hash k v -> Maybe a
getUniqueIndex f key (HashTable gethash table) = 
  case Map.lookup h table of
    Just bucket@(Bucket _ sub) -> case Map.lookup key sub of
      Just (Leaf j _) -> Just (f h j)
      Nothing         -> Nothing       
    Nothing  -> Nothing      
  where
    h = gethash key

member :: (Ord hash, Ord k) => k -> HashTable hash k v -> Bool
member key table = case lookup key table of
  Just _  -> True
  Nothing -> False

--------------------------------------------------------------------------------

insert :: (Ord hash, Ord k) => k -> v -> HashTable hash k v -> HashTable hash k v
insert k v (HashTable gethash table) = HashTable gethash $ mapInsertWith f g h v table where
  h = gethash k
  f v     = bucketSingleton k v
  g v sub = bucketInsert    k v sub

insertWith :: (Ord hash, Ord k) => (a -> v) -> (a -> v -> v) -> k -> a -> HashTable hash k v -> HashTable hash k v
insertWith ff gg k x (HashTable gethash table) = HashTable gethash $ mapInsertWith f g h x table where
  h = gethash k
  f x     = bucketSingleton k (ff x)
  g x sub = bucketInsertWith ff gg k x sub

delete :: (Ord hash, Ord k) => k -> HashTable hash k v -> HashTable hash k v
delete k (HashTable gethash table) = HashTable gethash $ Map.alter worker h table where
  h = gethash k
  worker Nothing               = Nothing
  worker (Just (Bucket n sub)) = Just $ Bucket n (Map.delete k sub)

--------------------------------------------------------------------------------
-- union

-- | > union == unionWith const
union :: (Ord hash, Ord k) => HashTable hash k a -> HashTable hash k a -> HashTable hash k a
union = unionWith const

-- | This is unsafe in the sense that the two @getHash@ functions 
-- (supplied when the hash tables were created) must agree. The same applies for all the set operations.
--
-- It is also left-biased in the sense that the unique indices from the left hashtable are retained,
-- while the unique indices from the right hashtable are /changed/.
unionWith :: (Ord hash, Ord k) => (v -> v -> v) -> HashTable hash k v -> HashTable hash k v -> HashTable hash k v 
unionWith g (HashTable gethash table1) (HashTable _ table2) = HashTable gethash (Map.unionWith worker table1 table2)
  where
    worker (Bucket n sub1) (Bucket m sub2) = Bucket (n+m) (Map.unionWith h sub1 $ Map.map offset sub2) where
      h (Leaf i x) (Leaf _ y) = Leaf i (g x y)
      offset       (Leaf j y) = Leaf (n+j) y

-- | This is unsafe both in the above sense and also that it does not accepts the empty list (for the same reason).
-- The result belongs to the world-line of the first table.
unionsWith :: (Ord hash, Ord k) => (v -> v -> v) -> [HashTable hash k v] -> HashTable hash k v 
unionsWith g tables = case tables of
  [x]    -> x
  []     -> error "HashTable/unionsWith: empty list"
  xs     -> foldl1 (unionWith g) xs

-- | This one accepts the empty list. The empty imput creates a new world-line.
unionsWith' :: (Ord hash, Ord k) => (k -> hash) -> (v -> v -> v) -> [HashTable hash k v] -> HashTable hash k v 
unionsWith' gethash g tables = case tables of
  [x]    -> x
  []     -> empty gethash
  xs     -> foldl1 (unionWith g) xs

--------------------------------------------------------------------------------
-- intersection

-- | > intersection == intersectionWith const
intersection :: (Ord hash, Ord k) => HashTable hash k a -> HashTable hash k b -> HashTable hash k a
intersection = intersectionWith const
 
-- NOTE the `Map.union` and `Map.difference` here!!!!!
-- This is necessary so that the world-line property remains true: if there is a hash present in the left table
-- but not in the right table, then we have to put an empty bucket in the resulting table while retaining the 
-- next unique index value). Unfortunately "Data.Map" does not have a flexible enough set operation to be used here...
intersectionWith :: (Ord hash, Ord k) => (a -> b -> c) -> HashTable hash k a -> HashTable hash k b -> HashTable hash k c
intersectionWith g (HashTable gethash table1) (HashTable _ table2) = 
  HashTable gethash (Map.union a_minus_b a_cap_b) {- disjoint union -} where
    a_cap_b   = Map.intersectionWith cap_worker table1 table2
    a_minus_b = Map.map empty_worker (Map.difference table1 table2)
  
    cap_worker (Bucket n sub1) (Bucket _ sub2) = Bucket n (Map.intersectionWith h sub1 sub2) where
      h (Leaf i x) (Leaf _ y) = Leaf i (g x y)

    -- empty_worker :: Bucket k a -> Bucket k c
    empty_worker (Bucket n sub1) = Bucket n (Map.empty)

intersectionsWith :: (Ord hash, Ord k) => (v -> v -> v) -> [HashTable hash k v] -> HashTable hash k v 
intersectionsWith g tables = case tables of
  [x]    -> x
  []     -> error "HashTable/intersectionWith: empty list"
  xs     -> foldl1 (intersectionWith g) xs

intersectionsWith' :: (Ord hash, Ord k) => (k -> hash) -> (v -> v -> v) -> [HashTable hash k v] -> HashTable hash k v 
intersectionsWith' gethash g tables = case tables of
  [x]    -> x
  []     -> empty gethash
  xs     -> foldl1 (intersectionWith g) xs

--------------------------------------------------------------------------------
-- difference

difference :: (Ord hash, Ord k) => HashTable hash k a -> HashTable hash k b -> HashTable hash k a
difference = differenceWith (\_ _ -> Nothing)

differenceWith :: (Ord hash, Ord k) => (a -> b -> Maybe a) -> HashTable hash k a -> HashTable hash k b -> HashTable hash k a
differenceWith g (HashTable gethash table1) (HashTable _ table2) = HashTable gethash (Map.differenceWith worker table1 table2) 
  where
    worker (Bucket n sub1) (Bucket _ sub2) = Just (Bucket n (Map.differenceWith h sub1 sub2)) where
      h (Leaf i x) (Leaf _ y) = case g x y of
        Just z  -> Just (Leaf i z)
        Nothing -> Nothing
  
--------------------------------------------------------------------------------

-- | Creates a multi-set from a list.
bag :: (Ord hash, Ord k) => (k -> hash) -> [k] -> HashTable hash k Int
bag gethash = foldl' (\old k -> insertWith id (+) k 1 old) (empty gethash)

--------------------------------------------------------------------------------

mapWithUniqueIndices :: (Ord hash, Ord k) => (hash -> Int -> a -> b) -> HashTable hash k a -> HashTable hash k b
mapWithUniqueIndices user (HashTable gethash table) = HashTable gethash (Map.mapWithKey worker table) where
  worker hash (Bucket n sub) = Bucket n (Map.map g sub) where
    g (Leaf j x) = Leaf j (user hash j x)

--------------------------------------------------------------------------------
#ifdef WITH_QUICKCHECK
-- * tests

runtests_HashTable :: IO ()
runtests_HashTable = do
  quickCheck prop_insert
  quickCheck prop_delete
  quickCheck prop_insertDelete
  quickCheck prop_deleteInsert
  quickCheck prop_insertInsert
  quickCheck prop_deleteDelete
  quickCheck prop_fromListToList
  quickCheck prop_intersection
  quickCheck prop_intersectionWith
  quickCheck prop_union
  quickCheck prop_unionWith
  quickCheck prop_difference
  quickCheck prop_differenceWith
  replicateM_ 5 $ quickCheck prop_uniqueValues
--  quickCheck prop_
--  quickCheck prop_

-------------------------

debug x y = trace ("-- " ++ show x ++ " --") y

newtype Key = Key Int deriving (Eq,Ord,Show)

instance (Ord k, Ord hash, Show k, Show v) => Show (HashTable hash k v) where
  show t = "HashTable<< " ++ show (toList t) ++ " >>"

instance Arbitrary Key where
  arbitrary = do
    n <- choose (0, 255)
    return (Key n)

newtype Hash = Hash Int deriving (Eq,Ord,Show)

calcHash :: Key -> Hash 
calcHash (Key k) = Hash (mod k 17)

newtype Table v = Table (HashTable Hash Key v) deriving Show 

instance Arbitrary v => Arbitrary (Table v) where
  arbitrary = do
    xs <- arbitrary
    let t = fromList calcHash xs
    {- debug (length xs) $ -}
    return (Table t)

newtype NonEmptyTable v = NonEmptyTable (HashTable Hash Key v) deriving Show 

instance Arbitrary v => Arbitrary (NonEmptyTable v) where
  arbitrary = do
    NonEmpty xs <- arbitrary
    let t = fromList calcHash xs
    {- debug (length xs) $ -}
    return (NonEmptyTable t)

data Pointed v = Pointed (HashTable Hash Key v) (Key,v) deriving Show

instance Arbitrary v => Arbitrary (Pointed v) where
  arbitrary = do
    NonEmptyTable t <- arbitrary
    let list = toList t
        n = length list
    i <- choose (0,n-1)
    let kv =list!!i
    return (Pointed t kv)

sortedToList :: Ord a => HashTable Hash Key a -> [(Key,a)]
sortedToList = sort . toList

-------------------------

data Step v
  = Insert     Key v 
  | InsertWith Key v
  | Delete     Key  
  | Union      (Table v)
  | Intersect  (Table v)
  | Difference (Table v)
  deriving Show

instance Arbitrary v => Arbitrary (Step v) where
  arbitrary = do
    frequency
      [ ( 10 , do { k<-arbitrary ; v<-arbitrary ; return (Insert     k v) } )
      , (  5 , do { k<-arbitrary ; v<-arbitrary ; return (InsertWith k v) } )
      , ( 10 , do { k<-arbitrary ; return (Delete k)       } )
      , (  3 , do { t<-arbitrary ; return (Union      t  ) } )
      , (  2 , do { t<-arbitrary ; return (Difference t  ) } )
      , (  1 , do { t<-arbitrary ; return (Intersect  t  ) } )
      ]

newtype NoDeleteStep v = NoDeleteStep (Step v)

instance Arbitrary v => Arbitrary (NoDeleteStep v) where
  arbitrary = NoDeleteStep <$> do
    frequency
      [ ( 10 , do { k<-arbitrary ; v<-arbitrary ; return (Insert     k v) } )
      , (  5 , do { k<-arbitrary ; v<-arbitrary ; return (InsertWith k v) } )
      , (  3 , do { t<-arbitrary ;                return (Union      t  ) } )
      ]

step :: (v -> v -> v) -> Step v -> HashTable Hash Key v -> HashTable Hash Key v 
step f step old = case step of
  Insert     k v       -> insert          k v old
  InsertWith k v       -> insertWith id f k v old
  Delete     k         -> delete          k   old
  Union      (Table t) -> union        old t
  Intersect  (Table t) -> intersection old t
  Difference (Table t) -> difference   old t

type History v = [Step v]

runHistory :: (v -> v -> v) -> History v -> HashTable Hash Key v -> [HashTable Hash Key v]
runHistory f steps ini = scanl (flip (step f)) ini steps

data U = U Hash Int deriving (Eq,Ord,Show)

-------------------------

prop_insert :: Key -> Char -> Table Char -> Bool
prop_insert k v (Table table) = lookup k (insert k v table) == Just v

prop_delete :: Pointed Char -> Bool
prop_delete (Pointed table (k,_)) = lookup k (delete k table) == Nothing 

prop_insertInsert :: Key -> Char -> Table Char -> Bool
prop_insertInsert k v (Table table) = toList (insert k v table) == toList (insert k v (insert k v table))

prop_deleteDelete :: Pointed Char -> Bool
prop_deleteDelete (Pointed table (k,_)) = toList (delete k table) == toList (delete k (delete k table))

prop_insertDelete :: Key -> Char -> Table Char -> Bool
prop_insertDelete k v (Table table) = lookup k (delete k $ insert k v table) == Nothing

prop_deleteInsert :: Pointed Char -> Bool
prop_deleteInsert (Pointed table (k,v)) = lookup k (insert k v $ delete k table) == Just v 

prop_fromListToList :: [(Key,Char)] -> Bool
prop_fromListToList xs = sortedToList (fromList calcHash xs) == Map.toList (Map.fromList xs)

prop_intersection :: [(Key,Char)] -> [(Key,Bool)] -> Bool
prop_intersection xs ys = sortedToList (intersection t1 t2) == Map.toList (Map.intersection m1 m2) where
  t1 = fromList calcHash xs
  t2 = fromList calcHash ys
  m1 = Map.fromList xs
  m2 = Map.fromList ys

prop_intersectionWith :: [(Key,Char)] -> [(Key,String)] -> Bool
prop_intersectionWith xs ys = sortedToList (intersectionWith (:) t1 t2) == Map.toList (Map.intersectionWith (:) m1 m2) where
  t1 = fromList calcHash xs
  t2 = fromList calcHash ys
  m1 = Map.fromList xs
  m2 = Map.fromList ys

prop_union :: [(Key,Char)] -> [(Key,Char)] -> Bool
prop_union xs ys = sortedToList (union t1 t2) == Map.toList (Map.union m1 m2) where
  t1 = fromList calcHash xs
  t2 = fromList calcHash ys
  m1 = Map.fromList xs
  m2 = Map.fromList ys

prop_unionWith :: [(Key,String)] -> [(Key,String)] -> Bool
prop_unionWith xs ys = sortedToList (unionWith (++) t1 t2) == Map.toList (Map.unionWith (++) m1 m2) where
  t1 = fromList calcHash xs
  t2 = fromList calcHash ys
  m1 = Map.fromList xs
  m2 = Map.fromList ys

prop_difference :: [(Key,Char)] -> [(Key,Bool)] -> Bool
prop_difference xs ys = sortedToList (difference t1 t2) == Map.toList (Map.difference m1 m2) where
  t1 = fromList calcHash xs
  t2 = fromList calcHash ys
  m1 = Map.fromList xs
  m2 = Map.fromList ys

prop_differenceWith :: [(Key,Char)] -> [(Key,Bool)] -> Bool
prop_differenceWith xs ys = sortedToList (differenceWith f t1 t2) == Map.toList (Map.differenceWith f m1 m2) where
  t1 = fromList calcHash xs
  t2 = fromList calcHash ys
  m1 = Map.fromList xs
  m2 = Map.fromList ys
  f x b = if b then Just x else Nothing

-- we try to test whether values are really unique and really constant during a wordline
prop_uniqueValues :: History Float -> Table Float -> Bool
prop_uniqueValues history (Table initial) = areUnique && areInjective {- && ... -} where
  worldline = runHistory (\x y -> x-y) history initial :: [HashTable Hash Key Float]
  lists = ((flip map) worldline $ \table -> keysWith (\k h j -> (U h j, k)) table) :: [[(U,Key)]]

  -- at each point in time, a single value must appear only once in the table
  areUnique = and [ isUnique xs | xs <- lists ]
  isUnique uks = let us = map fst uks in sort us == sort (nub us)

  -- taking the whole wordline, it must be true that to a single unique value there is only a single key associated
  -- (the opposite is not true, since a key can be deleted then reinserted, gaining a new value)
  areInjective = and $ map test $ groupSortOn fst $ concat lists where
    test :: [(U,Key)] -> Bool
    test xs = (length (groupSortOn fst xs) == 1)     -- this is redundant, but hey, we are also testing the test :)
           && (length (groupSortOn snd xs) == 1)
 

#endif
--------------------------------------------------------------------------------