{-# language StandaloneKindSignatures #-}

module Rel8.Query
  ( Query( Query )
  )
where

-- base
import Control.Applicative ( liftA2 )
import Control.Monad ( liftM2 )
import Data.Kind ( Type )
import Data.Monoid ( Any( Any ) )
import Prelude

-- opaleye
import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye
import qualified Opaleye.Internal.PackMap as Opaleye
import qualified Opaleye.Internal.PrimQuery as Opaleye hiding (lateral)
import qualified Opaleye.Internal.QueryArr as Opaleye
import qualified Opaleye.Internal.Tag as Opaleye

-- rel8
import Rel8.Query.Set ( unionAll )
import Rel8.Query.Opaleye ( fromOpaleye )
import Rel8.Query.Values ( values )
import Rel8.Table ( fromColumns, toColumns )
import Rel8.Table.Alternative
  ( AltTable, (<|>:)
  , AlternativeTable, emptyTable
  )
import Rel8.Table.Projection ( Projectable, apply, project )

-- semigroupoids
import Data.Functor.Apply ( Apply, (<.>) )
import Data.Functor.Bind ( Bind, (>>-) )


-- | The @Query@ monad allows you to compose a @SELECT@ query. This monad has
-- semantics similar to the list (@[]@) monad.
type Query :: Type -> Type
newtype Query a =
  Query (
    -- This is based on Opaleye's Select monad, but with two addtions. We
    -- maintain a stack of PrimExprs from parent previous subselects. In
    -- practice, these are always the results of dummy calls to random().
    --
    -- We also return a Bool that indicates to the parent subselect whether
    -- or not that stack of PrimExprs were used at any point. If they weren't,
    -- then the call to random() is never added to the query.
    --
    -- This is all needed to implement evaluate. Consider the following code:
    --
    -- do
    --   x <- values [lit 'a', lit 'b', lit 'c']
    --   y <- evaluate $ nextval "user_id_seq"
    --   pure (x, y)
    --
    -- If we just used Opaleye's Select monad directly, the SQL would come out
    -- like this:
    --
    -- SELECT
    --   a, b
    -- FROM
    --   (VALUES ('a'), ('b'), ('c')) Q1(a),
    --   LATERAL (SELECT nextval('user_id_seq')) Q2(b);
    --
    -- From the Haskell code, you would intuitively expect to get back the
    -- results of three different calls to nextval(), but from Postgres' point
    -- of view, because the Q2 subquery doesn't reference anything from the Q1
    -- query, it thinks it only needs to call nextval() once. This is actually
    -- exactly the same problem you get with the deprecated ListT IO monad from
    -- the transformers package — *> behaves differently to >>=, so
    -- using ApplicativeDo can change the results of a program. ApplicativeDo
    -- is exactly the optimisation Postgres does on a "LATERAL" query that
    -- doesn't make any references to previous subselects.
    --
    -- Rel8's solution is generate the following SQL instead:
    --
    -- SELECT
    --   a, b
    -- FROM
    --   (SELECT
    --      random() AS dummy,
    --      *
    --    FROM
    --      (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1,
    --   LATERAL (SELECT
    --     CASE
    --       WHEN dummy IS NOT NULL
    --       THEN nextval('user_id_seq')
    --     END) Q2(b);
    --
    -- We use random() here as the dummy value (and not some constant) because
    -- Postgres will again optimize if it sees that a value is constant
    -- (and thus only call nextval() once), but because random() is marked as
    -- VOLATILE, this inhibits Postgres from doing that optimisation.
    --
    -- Why not just reference the a column from the previous query directly
    -- instead of adding a dummy value? Basically, even if we extract out all
    -- the bindings introduced in a PrimQuery, we can't always be sure which
    -- ones refer to constant values, so if we end up laterally referencing a
    -- constant value, then all of this would be for nothing.
    --
    -- Why not just add the call to the previous subselect directly, like so:
    --
    -- SELECT
    --   a, b
    -- FROM
    --   (SELECT
    --      nextval('user_id_seq') AS eval,
    --      *
    --    FROM
    --      (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1,
    --   LATERAL (SELECT eval) Q2(b);
    --
    -- That would work in this case. But consider the following Rel8 code:
    --
    -- do
    --   x <- values [lit 'a', lit 'b', lit 'c']
    --   y <- values [lit 'd', lit 'e', lit 'f']
    --   z <- evaluate $ nextval "user_id_seq"
    --   pure (x, y, z)
    --
    -- How many calls to nextval should there be? Our Haskell intuition says
    -- nine. But that's not what you would get if you used the above
    -- technique. The problem is, which VALUES query should the nextval be
    -- added to? You can choose one or the other to get three calls to
    -- nextval, but you still need to make a superfluous LATERAL references to
    -- the other if you want nine calls. So for the above Rel8 code we generate
    -- the following SQL:
    --
    -- SELECT
    --   a, b, c
    -- FROM
    --   (SELECT
    --      random() AS dummy,
    --      *
    --    FROM
    --      (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1,
    --   (SELECT
    --      random() AS dummy,
    --      *
    --    FROM
    --      (VALUES ('d'), ('e'), ('f')) Q2(b)) Q2,
    --   LATERAL (SELECT
    --     CASE
    --       WHEN Q1.dummy IS NOT NULL AND Q2.dummy IS NOT NULL
    --       THEN nextval('user_id_seq')
    --     END) Q3(c);
    --
    -- This gives nine calls to nextval() as we would expect.
    [Opaleye.PrimExpr] -> Opaleye.Select (Any, a)
  )


instance Projectable Query where
  project f = fmap (fromColumns . apply f . toColumns)


instance Functor Query where
  fmap f (Query a) = Query (fmap (fmap (fmap f)) a)


instance Apply Query where
  (<.>) = (<*>)


instance Applicative Query where
  pure = fromOpaleye . pure
  liftA2 = liftM2


instance Bind Query where
  (>>-) = (>>=)


instance Monad Query where
  Query q >>= f = Query $ \dummies -> Opaleye.stateQueryArr $ \_ tag ->
    let
      qa = q dummies
      ((m, a), query, tag') = Opaleye.runStateQueryArr qa () tag
      Query q' = f a
      (dummies', query', tag'') =
        ( dummy : dummies
        , query <> Opaleye.aRebind bindings
        , Opaleye.next tag'
        )
        where
          (dummy, bindings) = Opaleye.run $ name random
            where
              random = Opaleye.FunExpr "random" []
              name = Opaleye.extractAttr "dummy" tag'
      qa' = Opaleye.lateral $ \_ -> q' dummies'
      ((m'@(Any needsDummies), b), query'', tag''') = Opaleye.runStateQueryArr qa' () tag''
      query'''
        | needsDummies = query' <> query''
        | otherwise = query <> query''
      m'' = m <> m'
    in
      ((m'', b), query''', tag''')


-- | '<|>:' = 'unionAll'.
instance AltTable Query where
  (<|>:) = unionAll


-- | 'emptyTable' = 'values' @[]@.
instance AlternativeTable Query where
  emptyTable = values []