{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
-----------------------------------------------------------------------------
-- |
-- Copyright   :  (c) Edward Kmett 2010-2021
-- License     :  BSD3
-- Maintainer  :  ekmett@gmail.com
-- Stability   :  experimental
-- Portability :  GHC only
--
-- This module provides reverse-mode Automatic Differentiation using post-hoc linear time
-- topological sorting.
--
-- For reverse mode AD we use 'System.Mem.StableName.StableName' to recover sharing information from
-- the tape to avoid combinatorial explosion, and thus run asymptotically faster
-- than it could without such sharing information, but the use of side-effects
-- contained herein is benign.
--
-----------------------------------------------------------------------------

MODULE
  ( AD_EXPORT
  , auto
  -- * Gradient
  , grad
  , grad'
  , gradWith
  , gradWith'
  -- * Jacobian
  , jacobian
  , jacobian'
  , jacobianWith
  , jacobianWith'
  -- * Hessian
  , hessian
  , hessianF
  -- * Derivatives
  , diff
  , diff'
  , diffF
  , diffF'
  -- * Unsafe Variadic Gradient
  -- $vgrad
  , vgrad, vgrad'
  , Grad
  ) where

import Data.Functor.Compose
import Numeric.AD.Internal.On
import Numeric.AD.Mode
IMPORTS

-- $setup
--
-- >>> import Numeric.AD.Internal.Doctest

-- | The 'grad' function calculates the gradient of a non-scalar-to-scalar function with kahn-mode AD in a single pass.
--
-- >>> grad (\[x,y,z] -> x*y+z) [1.0,2.0,3.0]
-- [2.0,1.0,1.0]
grad
  :: BASE1_1(Traversable f, Num a)
  => (f AD_TYPE -> AD_TYPE)
  -> f SCALAR_TYPE
  -> f SCALAR_TYPE
grad f as = unbind vs (partialArray bds $ f vs) where
  (vs,bds) = bind as
{-# INLINE grad #-}

-- | The 'grad'' function calculates the result and gradient of a non-scalar-to-scalar function with kahn-mode AD in a single pass.
--
-- >>> grad' (\[x,y,z] -> x*y+z) [1.0,2.0,3.0]
-- (5.0,[2.0,1.0,1.0])
grad'
  :: BASE1_1(Traversable f, Num a)
  => (f AD_TYPE -> AD_TYPE)
  -> f SCALAR_TYPE
  -> (SCALAR_TYPE, f SCALAR_TYPE)
grad' f as = (primal r, unbind vs $ partialArray bds r) where
  (vs, bds) = bind as
  r = f vs
{-# INLINE grad' #-}

-- | @'grad' g f@ function calculates the gradient of a non-scalar-to-scalar function @f@ with kahn-mode AD in a single pass.
-- The gradient is combined element-wise with the argument using the function @g@.
--
-- @
-- 'grad' = 'gradWith' (\_ dx -> dx)
-- 'id' = 'gradWith' const
-- @
gradWith
  :: BASE1_1(Traversable f, Num a)
  => (SCALAR_TYPE -> SCALAR_TYPE -> b)
  -> (f AD_TYPE -> AD_TYPE)
  -> f SCALAR_TYPE
  -> f b
gradWith g f as = 
  UNBINDWITH g vs (partialArray bds $ f vs) where
  (vs,bds) = bind as
{-# INLINE gradWith #-}

-- | @'grad'' g f@ calculates the result and gradient of a non-scalar-to-scalar function @f@ with kahn-mode AD in a single pass
-- the gradient is combined element-wise with the argument using the function @g@.
--
-- @'grad'' == 'gradWith'' (\_ dx -> dx)@
gradWith'
  :: BASE1_1(Traversable f, Num a)
  => (SCALAR_TYPE -> SCALAR_TYPE -> b)
  -> (f AD_TYPE -> AD_TYPE)
  -> f SCALAR_TYPE
  -> (SCALAR_TYPE, f b)
gradWith' g f as
  = (primal r, UNBINDWITH g vs $ partialArray bds r) where
  (vs, bds) = bind as
  r = f vs
{-# INLINE gradWith' #-}

-- | The 'jacobian' function calculates the jacobian of a non-scalar-to-non-scalar function with kahn AD lazily in @m@ passes for @m@ outputs.
--
-- >>> jacobian (\[x,y] -> [y,x,x*y]) [2.0,1.0]
-- [[0.0,1.0],[1.0,0.0],[1.0,2.0]]
jacobian
  :: BASE2_1(Traversable f, Functor g, Num a)
  => (f AD_TYPE -> g AD_TYPE)
  -> f SCALAR_TYPE
  -> g (f SCALAR_TYPE)
jacobian f as = unbind vs . partialArray bds <$> f vs where
  (vs, bds) = bind as
{-# INLINE jacobian #-}

-- | The 'jacobian'' function calculates both the result and the Jacobian of a nonscalar-to-nonscalar function, using @m@ invocations of kahn AD,
-- where @m@ is the output dimensionality. Applying @fmap snd@ to the result will recover the result of 'jacobian'
-- | An alias for 'gradF''
--
-- ghci> jacobian' (\[x,y] -> [y,x,x*y]) [2.0,1.0]
-- [(1.0,[0.0,1.0]),(2.0,[1.0,0.0]),(2.0,[1.0,2.0])]
jacobian'
  :: BASE2_1(Traversable f, Functor g, Num a)
  => (f AD_TYPE -> g AD_TYPE)
  -> f SCALAR_TYPE
  -> g (SCALAR_TYPE, f SCALAR_TYPE)
jacobian' f as = row <$> f vs where
  (vs, bds) = bind as
  row a = (primal a, unbind vs (partialArray bds a))
{-# INLINE jacobian' #-}

-- | 'jacobianWith g f' calculates the Jacobian of a non-scalar-to-non-scalar function @f@ with kahn AD lazily in @m@ passes for @m@ outputs.
--
-- Instead of returning the Jacobian matrix, the elements of the matrix are combined with the input using the @g@.
--
-- @
-- 'jacobian' = 'jacobianWith' (\_ dx -> dx)
-- 'jacobianWith' 'const' = (\f x -> 'const' x '<$>' f x)
-- @
jacobianWith
  :: BASE2_1(Traversable f, Functor g, Num a)
  => (SCALAR_TYPE -> SCALAR_TYPE -> b)
  -> (f AD_TYPE -> g AD_TYPE)
  -> f SCALAR_TYPE
  -> g (f b)
jacobianWith g f as = UNBINDWITH g vs . partialArray bds <$> f vs where
  (vs, bds) = bind as
{-# INLINE jacobianWith #-}

-- | 'jacobianWith' g f' calculates both the result and the Jacobian of a nonscalar-to-nonscalar function @f@, using @m@ invocations of kahn AD,
-- where @m@ is the output dimensionality. Applying @fmap snd@ to the result will recover the result of 'jacobianWith'
--
-- Instead of returning the Jacobian matrix, the elements of the matrix are combined with the input using the @g@.
--
-- @'jacobian'' == 'jacobianWith'' (\_ dx -> dx)@
jacobianWith'
  :: BASE2_1(Traversable f, Functor g, Num a)
  => (SCALAR_TYPE -> SCALAR_TYPE -> b)
  -> (f AD_TYPE -> g AD_TYPE)
  -> f SCALAR_TYPE
  -> g (SCALAR_TYPE, f b)
jacobianWith' g f as = row <$> f vs where
  (vs, bds) = bind as
  row a = (primal a, UNBINDWITH g vs (partialArray bds a))
{-# INLINE jacobianWith' #-}

-- | Compute the derivative of a function.
--
-- >>> diff sin 0
-- 1.0
--
-- >>> cos 0
-- 1.0
diff :: BASE0_1(Num a)
     (AD_TYPE -> AD_TYPE)
  -> SCALAR_TYPE
  -> SCALAR_TYPE
diff f a = derivative $ f (var a 0)
{-# INLINE diff #-}

-- | The 'diff'' function calculates the value and derivative, as a
-- pair, of a scalar-to-scalar function.
--
--
-- >>> diff' sin 0
-- (0.0,1.0)
diff'
  :: BASE0_1(Num a)
     (AD_TYPE -> AD_TYPE)
  -> SCALAR_TYPE
  -> (SCALAR_TYPE, SCALAR_TYPE)
diff' f a = derivative' $ f (var a 0)
{-# INLINE diff' #-}

-- | Compute the derivatives of a function that returns a vector with regards to its single input.
--
-- >>> diffF (\a -> [sin a, cos a]) 0
-- [1.0,0.0]
diffF
  :: BASE1_1(Functor f, Num a)
  => (AD_TYPE -> f AD_TYPE)
  -> SCALAR_TYPE
  -> f SCALAR_TYPE
diffF f a = derivative <$> f (var a 0)
{-# INLINE diffF #-}

-- | Compute the derivatives of a function that returns a vector with regards to its single input
-- as well as the primal answer.
--
-- >>> diffF' (\a -> [sin a, cos a]) 0
-- [(0.0,1.0),(1.0,0.0)]
diffF'
  :: BASE1_1(Functor f, Num a)
  => (AD_TYPE -> f AD_TYPE)
  -> SCALAR_TYPE
  -> f (SCALAR_TYPE, SCALAR_TYPE)
diffF' f a = derivative' <$> f (var a 0)
{-# INLINE diffF' #-}

-- | Compute the 'hessian' via the 'jacobian' of the gradient. gradient is computed in 'Kahn' mode and then the 'jacobian' is computed in 'Kahn' mode.
--
-- However, since the @'grad' f :: f a -> f a@ is square this is not as fast as using the forward-mode 'jacobian' of a reverse mode gradient provided by 'Numeric.AD.hessian'.
--
-- >>> hessian (\[x,y] -> x*y) [1.0,2.0]
-- [[0.0,1.0],[1.0,0.0]]
hessian
  :: BASE1_1(Traversable f, Num a)
  => (f (On (Kahn AD_TYPE))
  -> On (Kahn AD_TYPE))
  -> f SCALAR_TYPE
  -> f (f SCALAR_TYPE)
hessian f = jacobian (GRAD (off . f . fmap On))

-- | Compute the order 3 Hessian tensor on a non-scalar-to-non-scalar function via the 'Kahn'-mode Jacobian of the 'Kahn'-mode Jacobian of the function.
--
-- Less efficient than 'Numeric.AD.Mode.Mixed.hessianF'.
hessianF
  :: BASE2_1(Traversable f, Functor g, Num a)
  => (f (On (Kahn AD_TYPE)) -> g (On (Kahn AD_TYPE)))
  -> f SCALAR_TYPE
  -> g (f (f SCALAR_TYPE))
hessianF f = getCompose . jacobian (Compose . JACOBIAN (fmap off . f . fmap On))

-- $vgrad
--
-- Variadic combinators for variadic mixed-mode automatic differentiation.
--
-- Unfortunately, variadicity comes at the expense of being able to use
-- quantification to avoid sensitivity confusion, so be careful when
-- counting the number of 'auto' calls you use when taking the gradient
-- of a function that takes gradients!