{- |
Module      :  GiST
Copyright   :  (c) Mihail Bogojeski, Alexander Svozil
License     :  GPL

Maintainer  :  mihailbogojeski@gmail.com
Stability   :  experimental
Portability :  portable

The implementation of the basic GiST operations. The behaviour
of the operations is largely influenced by the predicate used, allowing the GiST to behave
like a different type of balanced search tree for a different predicate. Although the operations
are influenced by the predicate, it is always ensured that the tree stays balanced after an 
insertion or deletion, regardless of the predicate used. It is also recommended that the minimum
and maximum fill factor for the tree are constant throughout the whole program to ensure optimal
behaviour
-}


{-# LANGUAGE MultiParamTypeClasses
    ,FlexibleInstances
    ,FlexibleContexts
    #-}

module Data.GiST.GiST
    (  
        -- ** Types 
        GiST
        ,Entry(..)
        ,Predicates(..)
        ,LeafEntry,NodeEntry,Penalty
        ,entryPredicate
        -- ** GiST operations
        ,search, insert, delete, empty, save, load, getData, size
    ) where

import Data.GiST.Types
import qualified Data.Text.IO as TIO
import qualified Data.Text as T

   
-- | Searches the GiST for leaf nodes that satisfy the given search predicate
search  :: Predicates p a => p a -> GiST p a -> [a] 
search  p (Leaf es)     = [fst e | e <- es, consistent p (LeafEntry e)] 
search  _ (Node [])     = []
search  p (Node (e:es))
    |consistent p (NodeEntry e) = (search p (fst e)) ++ (search p (Node es))
    |otherwise                  = search p (Node es)
  
-- | Inserts an entry into the tree, rebalancing the tree if necessary.
-- Rebalancing is done to satisfy the minimum and maximum fill factor
-- of the tree (represented as an integer tuple)
insert  :: Predicates p a => LeafEntry p a -> (Int, Int) -> GiST p a -> GiST p a 
insert (toIns, pred) (min,max) (Node es)
        |search pred (Node es) /= [] = Node es
        |length newEs <= max   =  Node newEs
        |otherwise              = Node [(Node $ map unNodeEntry es1, union $ map entryPredicate es1)
                                    ,(Node $ map unNodeEntry es2, union $ map entryPredicate es2)] 
            -- The new entries after inserting
    where   newEs = case insertSubtree of
                        Right newSub -> [if (e == minSubtree)
                                            then newSub
                                            else e
                                        |e <- es]
                        Left split -> (filter (/=minSubtree) es)++[fst split,snd split] 
            -- The optimal subtree to insert into
            minSubtree = chooseSubtree es (toIns,pred)
            -- The changed (and additional) subtree after insert
            insertSubtree = insertAndSplit minSubtree (min,max) (toIns,pred)
            -- The split of the node entries (in case of overpopulation)
            (es1,es2) =  pickSplit $ map NodeEntry newEs
         
insert (toIns, p) (min,max) (Leaf es)
        |search p (Leaf es) /= [] = Leaf es
        |length newEs <= max    = Leaf newEs
        |otherwise              = Node [(Leaf $ map unLeafEntry es1,union $ map entryPredicate es1)
                                    ,(Leaf $ map unLeafEntry es2, union $ map entryPredicate es2)] 
            -- The new entries after insert
    where   newEs = (toIns, p):es
            -- The split of the node entries (in case of overpopulation)
            (es1,es2) =  pickSplit $ map LeafEntry newEs 

-- | Deletes a leaf entry from the tree, rebalancing the tree if necessary. 
-- Rebalancing is done to satisfy the minimum and maximum fill factor
-- of the tree (represented as an integer tuple)
delete  :: Predicates p a => LeafEntry p a -> (Int, Int) -> GiST p a -> GiST p a 
delete (toDel, p) (min,max) (Node es) 
        |length newEs == 1  = insertMultiple toAdd (makeRoot $ head newEs) (min,max) 
        |otherwise          = insertMultiple toAdd (Node newEs) (min, max)
            -- The new entries after delete without Null entries
    where   newEs = filter (not.isNull) (map fst delNodes)  
            -- The propagated entries to add
            toAdd = concat (map snd delNodes)
            -- The entries after delete
            delNodes =  [if (consistent p (NodeEntry e)) 
                            then (deleteAndCondense e (min,max) (toDel,p))
                            else (e,[])
                        |e <- es]
                        
delete (toDel, p) (min,max) (Leaf es) = Leaf [e | e <- es, not $ consistent p (LeafEntry e)]
    
-- | Create a new empty GiST
empty :: GiST p a 
empty = Leaf []

-- | Loads a GiST from file
load :: (Read a, Read (p a)) => FilePath -> IO (GiST p a) 
load f = do s <- TIO.readFile f
            return (read $ T.unpack s)

-- | Saves the GiST to file
save :: (Show a, Show (p a)) => GiST p a -> FilePath -> IO ()
save gist f = TIO.writeFile f $ T.pack (show gist)




-- | A helper function that propagates insertion through the subtrees and splits when necessary.
-- If the node is overpopulated after the insertion, the node is split into
-- two smaller nodes which are then added to the parent        
insertAndSplit :: (Predicates p a) => NodeEntry p a -> (Int,Int) -> LeafEntry p a -> Either (NodeEntry p a, NodeEntry p a) (NodeEntry p a)
insertAndSplit (Node es,p) (min,max) (toIns,pred)
            |length newEs <= max  =  Right (Node newEs,union $ map snd newEs)
            |otherwise = Left ((Node  (map unNodeEntry es1), union $ map entryPredicate es1)
                        ,(Node (map unNodeEntry es2), union $ map entryPredicate es2)) 
                -- The new entries after insert
        where   newEs = case insertSubtree of
                            Right newSub -> [if (e == minSubtree)
                                                then newSub
                                                else e
                                            |e <- es]
                            Left split -> (filter (/=minSubtree) es)++[fst split,snd split] 
                -- The optimal subtree to insert into
                minSubtree = chooseSubtree es (toIns,pred)
                -- The changed (and additional) subtree after insert
                insertSubtree = insertAndSplit minSubtree (min,max) (toIns,pred)
                -- The split of the node entries (in case of overpopulation)
                (es1,es2) =  pickSplit $ map NodeEntry newEs

insertAndSplit (Leaf es,p) (min,max) (toIns,pred)
            |length newEs <= max  = Right (Leaf newEs,union $ map snd newEs)
            |otherwise = Left ((Leaf (map unLeafEntry es1), union $ map entryPredicate es1)
                        ,(Leaf (map unLeafEntry es2), union $ map entryPredicate es2)) 
            -- The optimal subtree to insert into
    where   newEs = ((toIns,pred) : es)
            -- The split of the node entries (in case of overpopulation)
            (es1,es2) =  pickSplit $ map LeafEntry newEs 


-- A helper function that propagates deletion through the subtrees and condenses.
-- If an internal node is underpopulated after deletion, the node and all it's subnodes are removed
-- and all their leaf entries are stored for reinsertion. The deletion is then propagated to the parent
-- of the node
deleteAndCondense :: (Predicates p a) => NodeEntry p a -> (Int,Int) -> LeafEntry p a -> (NodeEntry p a, [LeafEntry p a])
deleteAndCondense (Node es, pred) (min, max) (toDel, p)
        |length newEs < min = ((Null, pred), toAdd ++ getEntries (Node es))   
        |otherwise          = ((Node newEs, union $ map snd newEs), toAdd)
            -- The new entries after delete without Null entries
    where   newEs = filter (not.isNull) (map fst delNodes)  
            -- The propagated entries to add
            toAdd = concat (map snd delNodes) 
            -- The entries after delete
            delNodes =  [if (consistent p (NodeEntry e)) 
                            then (deleteAndCondense e (min,max) (toDel,p))
                            else (e,[])
                        |e <- es]
-- If a leaf is underpopulated after deletion, the leaf is removed and all its entries are
-- stored for reinsertion. The deletion is then propagated to the parent of the node
deleteAndCondense ((Leaf es),pred) (min, max) (toDel, p) 
    |length newEs < min = ((Null,pred), newEs)
    |otherwise          = ((Leaf newEs, union $ map snd newEs),[])        
                -- The new entries after delete without Null entries
        where   newEs = [e | e <- es, not $ consistent p (LeafEntry e)] 



-- Inserts multiple entries into a GiST   
insertMultiple :: (Predicates p a) => [LeafEntry p a] -> GiST p a -> (Int,Int) -> GiST p a
insertMultiple [] gist _ = gist
insertMultiple (e:es) gist (min,max) = insertMultiple es afterInsert (min,max)
    where afterInsert = insert e (min,max) gist



-- Chooses the most appropriate subtree to insert the entry into
chooseSubtree   :: (Predicates p a )=>[(NodeEntry p a)] -> LeafEntry p a -> (NodeEntry p a) 
chooseSubtree subtrees e    = minPenalty penalties (head penalties)
        where   penalties = [(ne, penalty (snd e) (snd ne))|ne <- subtrees]


-- Return the minimum penalty and corresponding entry from a list od entries and penalties
minPenalty :: [(NodeEntry p a, Penalty)]->(NodeEntry p a, Penalty) -> NodeEntry p a
minPenalty [] p = fst p 
minPenalty ((ne, pen):ps) (nemin, minpen) 
    |pen < minpen   = minPenalty ps (ne, pen) 
    |otherwise      = minPenalty ps (nemin, minpen) 


-- Takes an entry and extracts the GiST from it
makeRoot :: NodeEntry p a -> GiST p a
makeRoot (Node es, p) = Node es
makeRoot (Leaf es, p) = Leaf es


-- Checks if an entry contains a Null GiST
isNull :: NodeEntry p a -> Bool
isNull (Null, p) = True
isNull _ = False


-- Returns all the entries stored in a GiST
getEntries :: GiST p a -> [LeafEntry p a]
getEntries (Node es) = concat [getEntries sub | (sub,_) <- es]
getEntries (Leaf es) = es


-- | Returns all the data stored in a GiST
getData :: GiST p a -> [a]
getData gist = map fst $ getEntries gist

-- | Return the number of values in a GiST
size :: GiST p a -> Int
size gist = length $ getEntries gist