-- | Materializes tiles which are reachable through multiple root tiles as
-- temporary tables and everything else by using common tables expressions.
-- It is possible to choose the binding strategy for common table expressions:
--
--     * Bind in lowest possible CTE, results in toughest possible scoping,
--       tiles are only bound where they are actually used.
--
--     * Bind in highest possible CTE, tiles are bound at the highest
--       possible CTE, results in very few common table expressions.
--
module Database.Algebra.SQL.Materialization.Combined
    ( BindingStrategy(Lowest, Highest)
    , materialize
    , materializeByBindingStrategy
    ) where

import Control.Monad (when)
import Control.Monad.State.Strict
    ( State
    , gets
    , modify
    , execState
    )
import qualified Data.IntMap.Lazy as IntMap
    ( IntMap
    , alter
    , empty
    , foldrWithKey
    , insert
    , lookup
    )
import qualified Data.List as L (intersect)
import Data.Maybe
    ( fromMaybe
    , isJust
    )

import Database.Algebra.SQL.Materialization
import qualified Database.Algebra.SQL.Materialization.Graph as G
import qualified Database.Algebra.SQL.Query as Q
    ( DefinitionQuery(DQTemporaryTable)
    , FromExpr(FEVariable, FETableReference)
    , Query(QValueQuery, QDefinitionQuery)
    , ValueQuery(VQSelect, VQWith)
    )
import Database.Algebra.SQL.Query.Substitution
import Database.Algebra.SQL.Tile.Flatten
import Database.Algebra.SQL.Materialization.Util

-- TODO maybe replace lists with sets? because difference should be faster
-- TODO since all root tiles are enumerated with negative numbers, the root
-- vertex check could be simply: v < 0 (O(n) -> O(1))
-- TODO in addition to the previous point add this information to the definiton
-- of transform
-- TODO maybe add schemata to the value query builder function

{-
    Definition: Single parent ancestor
        Every vertex v for which |parents(v)| <= 1 is a single parent ancestor
        for every vertex w which can only be reached through paths containing v.

        The single parent ancestors of a vertex u are called spa(u).

    Lemma:
        Let p_1, ..., p_n be parents of v, then the set of single parent
        ancestors is defined as:
            spa(v) = \bigcap_{i = 1, ..., n} spa'(p_i)

            spa'(v) = spa(v) \cup {v}

    Proof: Induction over the vertices

        Hypothesis:

                spa(v) contains all single parent ancestors of v
            <=> \forall u \in spa(v): u is in every path to v

        Basis:
            v has no parents:
                spa(v) = {}


                   spa(v) = {}
                => \forall u \in spa(v): u is in every path to v

        Inductive step:
            v has parents p_1, ..., p_n:
                spa(v) = spa'(p_1) \cap spa'(p_2) \cap ... \cap spa'(p_n)
                       = (spa(p_1) \cup {p_1}) \cap ...
                                               \cap (spa(p_n) \cup {p_n})

                    u \in spa(v)
                => u \in ((spa(p_1) \cup {p_1}) \cap ...
                                                \cap (spa(p_n) \cup {p_n}))

                =>      u \in (\bigcap_{i = 1, ..., n} {p_i})
                   \lor u \in (\bigcap_{i = 1, ..., n} spa(p_i))

                \overline{Inductive hypothesis}{=>}
                    \forall u \in spa(v): u is in every path to v

                If n = 1, then the first part of the disjunction will occur,
                and/or the inductive hypothesis is used for the second part.

    Note that vertices contained in maps are always topologically sorted,
    because the transform algorithm generates those by an in-order traversal.
-}

-- | Describes the binding behaviour within a dependency tree.
data BindingStrategy = Lowest | Highest

-- | Merges all tiles reachable by a single root tile into nested common table
-- expressions (depending on their scope) and all tiles reachable by multiple
-- root tiles into a temporary table. The binding strategy determines whether it
-- is merged in the highest possible CTE or in the lowest.
materializeByBindingStrategy :: BindingStrategy -> MatFun
materializeByBindingStrategy bs =
    materializeByFunction $ case bs of
        Lowest  -> chooseLowestSPA
        Highest -> chooseHighestSPA

-- | Same as 'materializeByBehaviour' with 'Lowest' as behaviour.
materialize :: MatFun
materialize = materializeByFunction chooseLowestSPA

-- | Merges all tiles reachable by a single root tile into nested common table
-- expressions (depending on their scope) and all tiles reachable by multiple
-- root tiles into a temporary table.
materializeByFunction :: (IntMap.IntMap [G.Vertex] -> IntMap.IntMap [G.Vertex])
                      -> MatFun
materializeByFunction chooseSingleSPA transformResult =
    queriesFromSPA graph rootVertices tmpVertices reversedSpaMap
  where reversedSpaMap         = inToOutAdjMap chosenSpaMap
        chosenSpaMap           = chooseSingleSPA iSpaMap
        tmpVertices            = IntMap.foldrWithKey f [] iSpaMap
        f k l r                = case l of
            [] -> if k `elem` rootVertices
                  then r
                  else k : r
            _  -> r
        iSpaMap                = findSPA graph rootVertices
        (rootTiles, enumDeps)  = flattenTransformResultWith id
                                                            Q.FEVariable
                                                            transformResult
        graph                  = graphFromFlatResult $ enumRootTiles ++ enumDeps
        -- Enumerated root tiles.
        enumRootTiles          = zip [-1, -2 ..] rootTiles
        rootVertices           = map fst enumRootTiles

-- | The lowest single parent ancestor state contains:
--     * A map of vertices mapping to their single parent ancestors.
--
type SState = IntMap.IntMap [G.Vertex]

-- | The state monad used to find single parent ancestors.
type SFinder = State SState

-- | Returns the list of single parent ancestors for a vertex or the empty list
-- if the vertex has not been processed yet.
sfGetSingleParentAncestors :: G.Vertex -> SFinder [G.Vertex]
sfGetSingleParentAncestors v = do
    result <- gets $ IntMap.lookup v

    return $ case result of
                 -- Already calculated, return the spas and v itself.
                 Just spas -> v : spas
                 -- No entry yet. (Won't be called.)
                 Nothing   -> []

-- | Take a list of vertices and intersect their single parent ancestors with
-- each other, effectively calculating the single parent ancestors for this
-- vertex.
sfComputeSingleParentAncestors :: G.Vertex -> [G.Vertex] -> SFinder ()
sfComputeSingleParentAncestors v (pv:pvs) = do
    spa <- sfGetSingleParentAncestors pv
    spas <- mapM sfGetSingleParentAncestors pvs

    modify $ IntMap.insert v $ foldr L.intersect spa spas

sfComputeSingleParentAncestors v []       =
    -- v is a top level vertex.
    modify $ IntMap.insert v []

sfVertexProcessed :: G.Vertex -> SFinder Bool
sfVertexProcessed v = gets $ isJust . IntMap.lookup v

traverseGraph :: Graph    -- ^ The used graph.
              -> G.Vertex -- ^ The current vertex.
              -> SFinder ()
traverseGraph graph v = do
    processedList <- mapM sfVertexProcessed parents

    -- Check whether all parents have been processed.
    when (and processedList) $ do
        sfComputeSingleParentAncestors v parents

        -- Recurse over its children.
        mapM_ (traverseGraph graph) $ G.children v graph

  where parents = G.parents v graph

-- | This function descends the given root vertices and returns the single
-- parent ancestors for each vertex, reachable by any of the given root
-- vertices.
-- A vertex with parents, which are not reachable through the given root nodes
-- can and will not be computed.
findSPA :: Graph
        -> [G.Vertex]
        -> IntMap.IntMap [G.Vertex]
findSPA graph rootVertices =
    -- Collect the results with the SFinder MonadState.
    execState (mapM_ (traverseGraph graph) rootVertices) IntMap.empty

-- | Chooses the lowest single parent ancestor for each vertex.
chooseLowestSPA :: IntMap.IntMap [G.Vertex] -> IntMap.IntMap [G.Vertex]
chooseLowestSPA = fmap $ take 1

-- | Chooses the highest single parent ancestor for each vertex.
chooseHighestSPA :: IntMap.IntMap [G.Vertex] -> IntMap.IntMap [G.Vertex]
chooseHighestSPA = fmap f
  where f l = case l of
            [] -> []
            _  -> [last l]

-- | Reverses an out-adjacency map into an in-adjacency map.
inToOutAdjMap :: IntMap.IntMap [G.Vertex] -> IntMap.IntMap [G.Vertex]
inToOutAdjMap = IntMap.foldrWithKey f IntMap.empty
  where f key vertices rMap = foldr (g key) rMap vertices
        g key               = IntMap.alter (h key)
        h v (Just x)        = Just $ v : x
        h v Nothing         = Just [v]

-- | Constructs a list of queries from the given arguments.
-- Takes an out-adjacency map of the single parent ancestors (which means the
-- child vertices are mapped from their corresponding single parent ancestor).
-- The map should resemble a tree structure (i.e. no vertex has multiple
-- parents), otherwise queries are executed multiple times.
queriesFromSPA :: Graph                    -- ^ Labeled graph.
               -> [G.Vertex]               -- ^ Root vertices.
               -> [G.Vertex]               -- ^ Temporary vertices.
               -> IntMap.IntMap [G.Vertex] -- ^ Out adjacency list.
               -> ([Q.Query], [Q.Query])
queriesFromSPA graph rootVertices tmpVertices reversedSpaMap =
    (tmpQueries, rootQueries)
  where tmpQueries  = map tmpFun tmpVertices
        tmpFun v    = Q.QDefinitionQuery . Q.DQTemporaryTable (build v)
                                                              $ mat v
        rootQueries = map (Q.QValueQuery . build) rootVertices
        -- The materializer: t0, t1, t2, ...
        mat vertex  = 't' : show vertex
        build       = buildValueQuery graph reversedSpaMap mat

-- | Traverses the reversed SPA map like a tree, building a tree of CTEs,
-- starting at the given vertex.
buildValueQuery :: Graph                    -- ^ The corresponding graph.
                -> IntMap.IntMap [G.Vertex] -- ^ The reversed spa map.
                -> (G.Vertex -> String)     -- ^ The materializer.
                -> G.Vertex                 -- ^ Vertex to build the query for.
                -> Q.ValueQuery
buildValueQuery graph reversedSpaMap mat v =
    if null bindings
    then body
    else Q.VQWith bindings body
  where childVertices = fromMaybe [] $ IntMap.lookup v reversedSpaMap
        childQueries  = map (buildValueQuery graph reversedSpaMap mat)
                            childVertices
        bindings      = zip3 (map mat childVertices)
                             (repeat Nothing)
                             childQueries
        body          = Q.VQSelect
                        $ replaceReferencesSelectStmt (Q.FETableReference . mat)
                                                      select
        select        = fromMaybe (error "missing node label") $ G.node v graph