{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
-- | This module defines the evaluation strategy of the library.
--
-- It currently uses a bottom-up semi-naive evaluator.
module Database.Datalog.Evaluate (
  applyRuleSet,
  select
  ) where

import Control.Applicative
import qualified Control.Monad.Catch as E
import Control.Monad ( foldM, liftM )
import Control.Monad.ST.Strict
import Data.Graph
import Data.Hashable
import Data.HashMap.Strict ( HashMap )
import qualified Data.HashMap.Strict as HM
import Data.Maybe ( fromMaybe )
import Data.Monoid
import Data.Vector.Mutable ( STVector )
import qualified Data.Vector.Mutable as V

import Database.Datalog.Database
import Database.Datalog.Rules

-- | Bindings are vectors of values.  Each variable in a rule is
-- assigned an index in the Bindings during the adornment process.
-- When evaluating a rule, if a free variable is encountered, all of
-- its possible values are entered at the index for that variable in a
-- Bindings vector.  When a bound variable is encountered, its current
-- value is looked up from the Bindings.  If that value does not match
-- the concrete tuple being examined, that tuple is rejected.
--
-- The mapping of variable to index into the bindings vector is stored
-- in the Rule data structure.
newtype Bindings s a = Bindings (STVector s a)


-- | Apply a set of rules.  All of the rules must have the same head
-- relation.  This is what implements the semi-naive evaluation
-- strategy.  For each rule of the form
--
-- > T(x,y) |- G(x,z), T(z,y).
--
-- simulate the rule
--
-- > ΔT(x,y) |- G(x,z), ΔT(z,y).
--
-- That is, at each step only look at the *new* tuples for each
-- recursive relation.  The intuition is that, if a new tuple is to be
-- generated on the next step, it must reference a new tuple from this
-- step (otherwise it would have already been generated) If a relation
-- appears twice in a body:
--
-- > T(x,y) |- T(x,z), T(z,y).
--
-- we have to substitute ΔT once for *each* occurrence of T in the
-- body, with the other occurrences referencing the non-Δ table:
--
-- > ΔT(x,y) |- ΔT(x,z), T(z,y).
-- > ΔT(x,y) |- T(x,z), ΔT(z,y).
--
-- While collecting all of the new tuples (see projectLiteral), a new
-- Δ table is generated.
applyRuleSet :: (E.MonadThrow m, Eq a, Hashable a, Show a)
                => Database a -> [Rule a] -> m (Database a)
applyRuleSet _ [] = error "applyRuleSet: Empty rule set not possible"
applyRuleSet db rss@(r:_) = return $ runST $ do
  bss <- concat <$> mapM (applyRules db) (orderRules rss)
  db' <- projectLiterals db h bss
  return db' -- `debug` show db'
  where
    h = ruleHead r

-- | Each of the lists of generated bindings has its own
-- ruleVariableMap, so zip them together so that project has them
-- paired up and ready to use.
--
-- Apply a set of rules
applyRules :: (Eq a, Hashable a, Show a)
              => Database a
              -> [Rule a]
              -> ST s [(Rule a, [Bindings s a])]
applyRules db rs = do
  bs <- mapM (applyRule db) rs
  return $ zip rs bs

-- | Toplogically sort rules (with SCCs treated as a unit).  This
-- means that dependency rules will be fired before the rules that
-- depend on them, which is the best evaluation order we can hope for.
orderRules :: forall a . (Eq a, Hashable a) => [Rule a] -> [[Rule a]]
orderRules rs = map toList (stronglyConnComp deps)
  where
    toList (AcyclicSCC r) = [r]
    toList (CyclicSCC rss) = rss
    toKeyM = HM.fromList (zip rs [0..])
    toKey :: Rule a -> Int
    toKey r = fromMaybe (error "Missing toKeyM entry") $ HM.lookup r toKeyM

    deps = foldr toContext [] rs
    toContext r@(Rule _ b _) acc =
      -- All of the rules for a given relation are in the same SCC
      -- stratum, so we will see them all in @rs@
      let brules = concatMap relationToRules b
      in (r, toKey r, map toKey brules) : acc
    relationToRules rel = filter (hasRelHead rel) rs
    hasRelHead c (Rule h _ _) =
      case c of
        Literal ac -> adornedClauseRelation h == adornedClauseRelation ac
        -- This should probably be impossible since negated terms
        -- would be in a different stratum.
        NegatedLiteral ac -> adornedClauseRelation h == adornedClauseRelation ac
        _ -> False

-- | A worker to apply a single rule to the database (producing a new
-- database).  This handles deciding if we need to do any Δ-table
-- substitutions.  If not, it just does a simple fold with
-- joinLiteral.
applyRule :: (Eq a, Hashable a, Show a)
             => Database a -> Rule a -> ST s [Bindings s a]
applyRule db r = do
  -- We need to substitute the ΔT table in for *one* occurrence of the
  -- T relation in the rule body at a time.  It must be substituted in
  -- at *each* position where T appears.
  case any (referencesRelation hr) b of
    -- If the relation does not appear in the body at all, we don't
    -- need to do the delta substitution.
    False -> do
      v0 <- V.new (HM.size m)
      foldM (joinLiteral db) [Bindings v0] b
    -- Otherwise, swap the delta table in for each each occurrence of
    -- the relation in the body.
    True -> concat <$> foldM (joinWithDeltaAt db hr b m) [] b
  where
    h = ruleHead r
    hr = adornedClauseRelation h
    b = ruleBody r
    m = ruleVariableMap r

-- | Return True if the given literal references the given Relation
referencesRelation:: Relation -> Literal AdornedClause a -> Bool
referencesRelation hrel rel =
  case rel of
    Literal l -> adornedClauseRelation l == hrel
    NegatedLiteral l -> adornedClauseRelation l == hrel
    _ -> False

-- | The worker that substitutes a Δ-table for each clause referencing
-- the relation @hr@.
joinWithDeltaAt :: (Eq a, Hashable a)
                   => Database a
                   -> Relation
                   -> [Literal AdornedClause a]
                   -> HashMap k v
                   -> [[Bindings s a]]
                   -> Literal AdornedClause a
                   -> ST s [[Bindings s a]]
joinWithDeltaAt db hr b m acc c =
  case referencesRelation hr c of
    -- This clause doesn't reference the relation so don't do anything
    False -> return acc
    -- This clause does reference it, so we need to evaluate the
    -- entire rule here.  swapJoin handles substituting the Δ table
    -- for the relation in this clause (see withDeltaRelation - it
    -- makes a new database with the Δ swapped for the data of this
    -- relation).
    True -> do
      v0 <- V.new (HM.size m)
      bs <- foldM swapJoin [Bindings v0] b
      return (bs : acc)
  where
    swapJoin bs thisClause =
      case thisClause == c of
        False -> joinLiteral db bs thisClause
        True -> withDeltaRelation db hr $ \db' -> joinLiteral db' bs thisClause

-- | Ensure that the relation named by the clause argument is in the
-- database.  Get the DBRelation.  Then fold over the Bindings,
-- constructing a tuple for each one (that is inserted into the
-- relation).  Then build a new database with that relation replaced
-- with the new one.
projectLiterals :: (Eq a, Hashable a, Show a)
                   => Database a
                   -> AdornedClause a
                   -> [(Rule a, [Bindings s a])]
                   -> ST s (Database a)
projectLiterals db c bssMaps = do
  let r = adornedClauseRelation c
      rel = ensureDatabaseRelation db r (length (adornedClauseTerms c))
      rel' = resetRelationDelta rel
  -- We reset the delta since we are computing the new delta for the
  -- next iteration.  The act of adding tuples to the relation
  -- automatically computes the delta.
  rel'' <- foldM (\irel (rule, bs) -> foldM (project rule) irel bs) rel' bssMaps
  return $ replaceRelation db rel''
  where
    project rule !r b = do
      t <- bindingsToTuple (ruleHead rule) (ruleVariableMap rule) b
      return $ addTupleToRelation r t

-- | Determine if a PartialTuple and a concrete Tuple from the
-- database match.  Walks the partial tuple (which is sorted by index)
-- and the current tuple in parallel and tries to avoid allocations as
-- much as possible.
tupleMatches :: (Eq a) => PartialTuple a -> Tuple a -> Bool
tupleMatches (PartialTuple pvs) (Tuple vs) =
  parallelTupleWalk pvs vs

parallelTupleWalk :: (Eq a) => [Maybe a] -> [a] -> Bool
parallelTupleWalk [] [] = True
parallelTupleWalk (p:ps) (v:vs) =
  case p of
    Nothing -> parallelTupleWalk ps vs
    Just pv -> pv == v && parallelTupleWalk ps vs
parallelTupleWalk _ _ = error "Partial tuple length mismatch"

{-# INLINE scanSpace #-}
-- | The common worker for 'select' and 'matchAny'
scanSpace :: (Eq a)
             => ((Tuple a -> Bool) -> [Tuple a] -> b)
             -> Database a
             -> Relation
             -> PartialTuple a
             -> b
scanSpace f db p pt = f (tupleMatches pt) space
  where
    -- FIXME: This is where we use the index, if available.  If not,
    -- we have to fall back to a table scan.  Instead of computing
    -- indices up front, it may be best to only compute them on the
    -- fly (and then only if they will be referenced again later).
    -- They can be thrown away as soon as they can't be referenced
    -- again.  This will save storage and up-front costs.

    -- Note that the relation might not exist in the database here
    -- because this is the first time data is being inferred for the
    -- EDB.  In that case, just start with empty data and the project
    -- step will insert the table into the database for the next step.
    space = fromMaybe mempty (dataForRelation db p)

-- | Return all of the tuples in the given relation that match the
-- given PartialTuple
select :: (Eq a) => Database a -> Relation -> PartialTuple a -> [Tuple a]
select = scanSpace filter

-- | Return true if any tuples in the given relation match the given
-- 'PartialTuple'
anyMatch :: (Eq a) => Database a -> Relation -> PartialTuple a -> Bool
anyMatch = scanSpace any

{-# INLINE joinLiteralWith #-}
-- | The common worker for the non-conditional clause join functions.
joinLiteralWith :: AdornedClause a
                   -> [Bindings s a]
                   -> (Bindings s a -> PartialTuple a -> ST s [Bindings s a])
                   -> ST s [Bindings s a]
joinLiteralWith c bs f = concatMapM (apply c f) bs
  where
    apply cl fn b = do
      pt <- buildPartialTuple cl b
      fn b pt

-- | Join a literal with the current set of bindings.  This can
-- increase the number of bindings (for a non-negated clause) or
-- decrease the number of bindings (for a negated or conditional
-- clause).
joinLiteral :: (Eq a, Hashable a)
               => Database a
               -> [Bindings s a]
               -> Literal AdornedClause a
               -> ST s [Bindings s a]
joinLiteral db bs (Literal c) = joinLiteralWith c bs (normalJoin db c)
joinLiteral db bs (NegatedLiteral c) = joinLiteralWith c bs (negatedJoin db c)
joinLiteral _ bs (ConditionalClause _ p vs m) =
  foldM (applyJoinCondition p vs m) [] bs

-- | Extract the values that the predicate requires from the current
-- bindings.  Apply the predicate and if it returns True, retain the
-- set of bindings; otherwise, discard it.
applyJoinCondition :: (Eq a, Hashable a)
                      => ([a] -> Bool)
                      -> [Term a]
                      -> HashMap (Term a) Int
                      -> [Bindings s a]
                      -> Bindings s a
                      -> ST s [Bindings s a]
applyJoinCondition p vs m acc b@(Bindings binds) = do
  vals <- mapM extractBinding vs
  case p vals of
    True -> return $! b : acc
    False -> return acc
  where
    extractBinding t =
      let Just ix = HM.lookup t m
      in V.read binds ix

-- | Non-negated join; it works by selecting all of the tuples
-- matching the input PartialTuple and then recording all of the newly
-- bound variable values (i.e., the free variables in the rule).  This
-- produces one set of bindings for each possible value of the free
-- variables in the rule (or could be empty if there are no matching
-- tuples).
normalJoin :: (Eq a, Hashable a) => Database a -> AdornedClause a -> Bindings s a
              -> PartialTuple a -> ST s [Bindings s a]
normalJoin db c binds pt = mapM (projectTupleOntoLiteral c binds) ts
  where
    ts = select db (adornedClauseRelation c) pt

-- | Retain the input binding if there are no matches in the database
-- for the input PartialTuple.  Otherwise reject it.
negatedJoin :: (Eq a, Hashable a) => Database a -> AdornedClause a -> Bindings s a
               -> PartialTuple a -> ST s [Bindings s a]
negatedJoin db c binds pt =
  case anyMatch db (adornedClauseRelation c) pt of
    True -> return []
    False -> return [binds]

-- | For each term in the clause, take it as a literal if it is bound
-- or is an atom.  Otherwise, leave it as free (not mentioned in the
-- partial tuple).
buildPartialTuple :: AdornedClause a -> Bindings s a -> ST s (PartialTuple a)
buildPartialTuple c (Bindings bs) =
  PartialTuple <$> mapM toPartial (adornedClauseTerms c)
  where
    toPartial ta =
      case ta of
        (Atom a, BoundAtom) -> return $! Just a
        (_, Bound slot) -> do
          b <- V.read bs slot
          return $! Just b
        _ -> return Nothing


-- | For each free variable in the tuple (according to the adorned
-- clause), enter its value into the input bindings
projectTupleOntoLiteral :: AdornedClause a -> Bindings s a -> Tuple a -> ST s (Bindings s a)
projectTupleOntoLiteral c (Bindings binds) (Tuple t) = do
  -- We need a copy here because the input bindings are shared among
  -- many calls to this function
  b <- V.clone binds
  let atoms = zip (adornedClauseTerms c) t
  mapM_ (bindFreeVariable b) atoms
  return $! Bindings b
  where
    bindFreeVariable b ((_, adornment), val) =
      case adornment of
        Free ix -> V.write b ix val
        _ -> return ()

-- | Convert a set of variable bindings to a tuple that matches the
-- input clause (which should have all variables).  This is basically
-- unifying variables with the head of the rule.
bindingsToTuple :: (Eq a, Hashable a, Show a)
                   => AdornedClause a
                   -> HashMap (Term a) Int
                   -> Bindings s a
                   -> ST s (Tuple a)
bindingsToTuple c vmap (Bindings bs) = do
  vals <- mapM variableTermToValue (adornedClauseTerms c)
  return $ Tuple vals
  where
    variableTermToValue (t, _) =
      case HM.lookup t vmap of
        Nothing -> error ("NonVariableInRuleHead " ++ show c ++ " " ++ show t ++ " " ++ show vmap)
        Just ix -> V.read bs ix


-- Helpers

{-# INLINE mapM' #-}
-- | This is an alternative definition of mapM that accumulates its
-- results on the heap instead of the stack.  This should avoid some
-- stack overflows when processing some million+ element lists..
mapM' :: (Monad m) => (a -> m b) -> [a] -> m [b]
mapM' f = go []
  where
    go acc [] = return (reverse acc)
    go acc (a:as) = do
      x <- f a
      go (x:acc) as

{-# INLINE concatMapM #-}
concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
concatMapM f xs = liftM concat (mapM' f xs)