Portability  GHC only 

Stability  experimental 
Maintainer  ekmett@gmail.com 
Safe Haskell  None 
 AD modes
 Gradients (Reverse Mode)
 Higher Order Gradients (SparseonReverse)
 Variadic Gradients (Sparse or Kahn)
 Jacobians (Sparse or Reverse)
 Higher Order Jacobian (SparseonReverse)
 Transposed Jacobians (Forward Mode)
 Hessian (SparseOnReverse)
 Hessian Tensors (Sparse or SparseOnReverse)
 Hessian Tensors (Sparse)
 Hessian Vector Products (ForwardOnReverse)
 Derivatives (Forward Mode)
 Derivatives (Tower)
 Directional Derivatives (Forward Mode)
 Directional Derivatives (Tower)
 Taylor Series (Tower)
 Maclaurin Series (Tower)
 Gradient Descent
MixedMode Automatic Differentiation.
Each combinator exported from this module chooses an appropriate AD mode. The following basic operations are supported, modified as appropriate by the suffixes below:

grad
computes the gradient (partial derivatives) of a function at a point 
jacobian
computes the Jacobian matrix of a function at a point 
diff
computes the derivative of a function at a point 
du
computes a directional derivative of a function at a point 
hessian
compute the Hessian matrix (matrix of second partial derivatives) of a function at a point
The suffixes have the following meanings:

'
 also return the answer 
With
lets the user supply a function to blend the input with the output 
F
is a version of the base function lifted to return aTraversable
(orFunctor
) result 
s
means the function returns all higher derivatives in a list or fbranchingStream

T
means the result is transposed with respect to the traditional formulation. 
0
means that the resulting derivative list is padded with 0s at the end.
 data AD s a
 class (Num t, Num (Scalar t)) => Mode t where
 grad :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > f a
 grad' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > (a, f a)
 gradWith :: (Traversable f, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > f b
 gradWith' :: (Traversable f, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > (a, f b)
 grads :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) > AD s (Sparse a)) > f a > Cofree f a
 class Num a => Grad i o o' a  i > a o o', o > a i o', o' > a i o
 vgrad :: Grad i o o' a => i > o
 vgrad' :: Grad i o o' a => i > o'
 class Num a => Grads i o a  i > a o, o > a i
 vgrads :: Grads i o a => i > o
 jacobian :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (f a)
 jacobian' :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (a, f a)
 jacobianWith :: (Traversable f, Functor g, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (f b)
 jacobianWith' :: (Traversable f, Functor g, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (a, f b)
 jacobians :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) > g (AD s (Sparse a))) > f a > g (Cofree f a)
 jacobianT :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f a > f (g a)
 jacobianWithT :: (Traversable f, Functor g, Num a) => (a > a > b) > (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f a > f (g b)
 hessian :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Sparse a))) > On (Reverse s (Sparse a))) > f a > f (f a)
 hessian' :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) > AD s (Sparse a)) > f a > (a, f (a, f a))
 hessianF :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Sparse a))) > g (On (Reverse s (Sparse a)))) > f a > g (f (f a))
 hessianF' :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) > g (AD s (Sparse a))) > f a > g (a, f (a, f a))
 hessianProduct :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Forward a))) > On (Reverse s (Forward a))) > f (a, a) > f a
 hessianProduct' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Forward a))) > On (Reverse s (Forward a))) > f (a, a) > f (a, a)
 diff :: Num a => (forall s. AD s (Forward a) > AD s (Forward a)) > a > a
 diffF :: (Functor f, Num a) => (forall s. AD s (Forward a) > f (AD s (Forward a))) > a > f a
 diff' :: Num a => (forall s. AD s (Forward a) > AD s (Forward a)) > a > (a, a)
 diffF' :: (Functor f, Num a) => (forall s. AD s (Forward a) > f (AD s (Forward a))) > a > f (a, a)
 diffs :: Num a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > [a]
 diffsF :: (Functor f, Num a) => (forall s. AD s (Tower a) > f (AD s (Tower a))) > a > f [a]
 diffs0 :: Num a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > [a]
 diffs0F :: (Functor f, Num a) => (forall s. AD s (Tower a) > f (AD s (Tower a))) > a > f [a]
 du :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) > AD s (Forward a)) > f (a, a) > a
 du' :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) > AD s (Forward a)) > f (a, a) > (a, a)
 duF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f (a, a) > g a
 duF' :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f (a, a) > g (a, a)
 dus :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) > AD s (Tower a)) > f [a] > [a]
 dus0 :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) > AD s (Tower a)) > f [a] > [a]
 dusF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) > g (AD s (Tower a))) > f [a] > g [a]
 dus0F :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) > g (AD s (Tower a))) > f [a] > g [a]
 taylor :: Fractional a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > a > [a]
 taylor0 :: Fractional a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > a > [a]
 maclaurin :: Fractional a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > [a]
 maclaurin0 :: Fractional a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > [a]
 gradientDescent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > [f a]
 gradientAscent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > [f a]
 conjugateGradientDescent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) > Or s (On (Forward (Forward a))) (Kahn a)) > f a > [f a]
 conjugateGradientAscent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) > Or s (On (Forward (Forward a))) (Kahn a)) > f a > [f a]
Documentation
Typeable2 AD  
Bounded a => Bounded (AD s a)  
Enum a => Enum (AD s a)  
Eq a => Eq (AD s a)  
Floating a => Floating (AD s a)  
Fractional a => Fractional (AD s a)  
Num a => Num (AD s a)  
Ord a => Ord (AD s a)  
Read a => Read (AD s a)  
Real a => Real (AD s a)  
RealFloat a => RealFloat (AD s a)  
RealFrac a => RealFrac (AD s a)  
Show a => Show (AD s a)  
Erf a => Erf (AD s a)  
InvErf a => InvErf (AD s a)  
Mode a => Mode (AD s a) 
AD modes
class (Num t, Num (Scalar t)) => Mode t whereSource
Mode Double  
Mode Float  
Mode Int  
Mode Int8  
Mode Int16  
Mode Int32  
Mode Int64  
Mode Integer  
Mode Word  
Mode Word8  
Mode Word16  
Mode Word32  
Mode Word64  
Mode Natural  
Mode ForwardDouble  
Integral a => Mode (Ratio a)  
RealFloat a => Mode (Complex a)  
Num a => Mode (Id a)  
Num a => Mode (Tower a)  
Num a => Mode (Sparse a)  
(Mode t, Mode (Scalar t)) => Mode (On t)  
Num a => Mode (Kahn a)  
Num a => Mode (Forward a)  
(Num a, Traversable f) => Mode (Dense f a)  
Mode a => Mode (AD s a)  
(Reifies * s Tape, Num a) => Mode (Reverse s a)  
(Mode a, Mode b, Chosen s, ~ * (Scalar a) (Scalar b)) => Mode (Or s a b) 
Gradients (Reverse Mode)
grad :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > f aSource
The grad
function calculates the gradient of a nonscalartoscalar function with reversemode AD in a single pass.
>>>
grad (\[x,y,z] > x*y+z) [1,2,3]
[2,1,1]
grad' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > (a, f a)Source
The grad'
function calculates the result and gradient of a nonscalartoscalar function with reversemode AD in a single pass.
>>>
grad' (\[x,y,z] > x*y+z) [1,2,3]
(5,[2,1,1])
gradWith :: (Traversable f, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > f bSource
gradWith' :: (Traversable f, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > (a, f b)Source
Higher Order Gradients (SparseonReverse)
grads :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) > AD s (Sparse a)) > f a > Cofree f aSource
Variadic Gradients (Sparse or Kahn)
Variadic combinators for variadic mixedmode automatic differentiation.
Unfortunately, variadicity comes at the expense of being able to use
quantification to avoid sensitivity confusion, so be careful when
counting the number of auto
calls you use when taking the gradient
of a function that takes gradients!
Jacobians (Sparse or Reverse)
jacobian :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (f a)Source
The jacobian
function calculates the jacobian of a nonscalartononscalar function with reverse AD lazily in m
passes for m
outputs.
>>>
jacobian (\[x,y] > [y,x,x*y]) [2,1]
[[0,1],[1,0],[1,2]]
jacobian' :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (a, f a)Source
The jacobian'
function calculates both the result and the Jacobian of a nonscalartononscalar function, using m
invocations of reverse AD,
where m
is the output dimensionality. Applying fmap snd
to the result will recover the result of jacobian
 An alias for gradF'
>>>
jacobian' (\[x,y] > [y,x,x*y]) [2,1]
[(1,[0,1]),(2,[1,0]),(2,[1,2])]
jacobianWith :: (Traversable f, Functor g, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (f b)Source
'jacobianWith g f' calculates the Jacobian of a nonscalartononscalar function f
with reverse AD lazily in m
passes for m
outputs.
Instead of returning the Jacobian matrix, the elements of the matrix are combined with the input using the g
.
jacobian
==jacobianWith
(_ dx > dx)jacobianWith
const
== (f x >const
x<$>
f x)
jacobianWith' :: (Traversable f, Functor g, Num a) => (a > a > b) > (forall s. Reifies s Tape => f (Reverse s a) > g (Reverse s a)) > f a > g (a, f b)Source
jacobianWith
g f' calculates both the result and the Jacobian of a nonscalartononscalar function f
, using m
invocations of reverse AD,
where m
is the output dimensionality. Applying fmap snd
to the result will recover the result of jacobianWith
Instead of returning the Jacobian matrix, the elements of the matrix are combined with the input using the g
.
jacobian'
==jacobianWith'
(_ dx > dx)
Higher Order Jacobian (SparseonReverse)
jacobians :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) > g (AD s (Sparse a))) > f a > g (Cofree f a)Source
Transposed Jacobians (Forward Mode)
jacobianT :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f a > f (g a)Source
A fast, simple, transposed Jacobian computed with forwardmode AD.
jacobianWithT :: (Traversable f, Functor g, Num a) => (a > a > b) > (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f a > f (g b)Source
Hessian (SparseOnReverse)
hessian :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Sparse a))) > On (Reverse s (Sparse a))) > f a > f (f a)Source
Compute the Hessian via the Jacobian of the gradient. gradient is computed in reverse mode and then the Jacobian is computed in sparse (forward) mode.
>>>
hessian (\[x,y] > x*y) [1,2]
[[0,1],[1,0]]
hessian' :: (Traversable f, Num a) => (forall s. f (AD s (Sparse a)) > AD s (Sparse a)) > f a > (a, f (a, f a))Source
Hessian Tensors (Sparse or SparseOnReverse)
hessianF :: (Traversable f, Functor g, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Sparse a))) > g (On (Reverse s (Sparse a)))) > f a > g (f (f a))Source
Compute the order 3 Hessian tensor on a nonscalartononscalar function using 'Sparse'on'Reverse'
>>>
hessianF (\[x,y] > [x*y,x+y,exp x*cos y]) [1,2]
[[[0.0,1.0],[1.0,0.0]],[[0.0,0.0],[0.0,0.0]],[[1.1312043837568135,2.4717266720048188],[2.4717266720048188,1.1312043837568135]]]
Hessian Tensors (Sparse)
hessianF' :: (Traversable f, Functor g, Num a) => (forall s. f (AD s (Sparse a)) > g (AD s (Sparse a))) > f a > g (a, f (a, f a))Source
Hessian Vector Products (ForwardOnReverse)
hessianProduct :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Forward a))) > On (Reverse s (Forward a))) > f (a, a) > f aSource
computes the product of the hessian hessianProduct
f wvH
of a nonscalartoscalar function f
at w =
with a vector fst
$ wvv = snd $ wv
using "Pearlmutter's method" from http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.29.6143, which states:
H v = (d/dr) grad_w (w + r v)  r = 0
Or in other words, we take the directional derivative of the gradient. The gradient is calculated in reverse mode, then the directional derivative is calculated in forward mode.
hessianProduct' :: (Traversable f, Num a) => (forall s. Reifies s Tape => f (On (Reverse s (Forward a))) > On (Reverse s (Forward a))) > f (a, a) > f (a, a)Source
computes both the gradient of a nonscalartoscalar hessianProduct'
f wvf
at w =
and the product of the hessian fst
$ wvH
at w
with a vector v = snd $ wv
using "Pearlmutter's method". The outputs are returned wrapped in the same functor.
H v = (d/dr) grad_w (w + r v)  r = 0
Or in other words, we return the gradient and the directional derivative of the gradient. The gradient is calculated in reverse mode, then the directional derivative is calculated in forward mode.
Derivatives (Forward Mode)
diffF :: (Functor f, Num a) => (forall s. AD s (Forward a) > f (AD s (Forward a))) > a > f aSource
diffF' :: (Functor f, Num a) => (forall s. AD s (Forward a) > f (AD s (Forward a))) > a > f (a, a)Source
Derivatives (Tower)
diffsF :: (Functor f, Num a) => (forall s. AD s (Tower a) > f (AD s (Tower a))) > a > f [a]Source
diffs0F :: (Functor f, Num a) => (forall s. AD s (Tower a) > f (AD s (Tower a))) > a > f [a]Source
Directional Derivatives (Forward Mode)
du :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) > AD s (Forward a)) > f (a, a) > aSource
Compute the directional derivative of a function given a zipped up Functor
of the input values and their derivatives
du' :: (Functor f, Num a) => (forall s. f (AD s (Forward a)) > AD s (Forward a)) > f (a, a) > (a, a)Source
Compute the answer and directional derivative of a function given a zipped up Functor
of the input values and their derivatives
duF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f (a, a) > g aSource
Compute a vector of directional derivatives for a function given a zipped up Functor
of the input values and their derivatives.
duF' :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Forward a)) > g (AD s (Forward a))) > f (a, a) > g (a, a)Source
Compute a vector of answers and directional derivatives for a function given a zipped up Functor
of the input values and their derivatives.
Directional Derivatives (Tower)
dus0 :: (Functor f, Num a) => (forall s. f (AD s (Tower a)) > AD s (Tower a)) > f [a] > [a]Source
dusF :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) > g (AD s (Tower a))) > f [a] > g [a]Source
dus0F :: (Functor f, Functor g, Num a) => (forall s. f (AD s (Tower a)) > g (AD s (Tower a))) > f [a] > g [a]Source
Taylor Series (Tower)
Maclaurin Series (Tower)
maclaurin0 :: Fractional a => (forall s. AD s (Tower a) > AD s (Tower a)) > a > [a]Source
Gradient Descent
gradientDescent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > [f a]Source
The gradientDescent
function performs a multivariate
optimization, based on the naivegradientdescent in the file
stalingrad/examples/flowtests/presaddle1a.vlad
from the
VLAD compiler Stalingrad sources. Its output is a stream of
increasingly accurate results. (Modulo the usual caveats.)
It uses reverse mode automatic differentiation to compute the gradient.
gradientAscent :: (Traversable f, Fractional a, Ord a) => (forall s. Reifies s Tape => f (Reverse s a) > Reverse s a) > f a > [f a]Source
Perform a gradient descent using reverse mode automatic differentiation to compute the gradient.
conjugateGradientDescent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) > Or s (On (Forward (Forward a))) (Kahn a)) > f a > [f a]Source
Perform a conjugate gradient descent using reverse mode automatic differentiation to compute the gradient, and using forwardonforward mode for computing extrema.
>>>
let sq x = x * x
>>>
let rosenbrock [x,y] = sq (1  x) + 100 * sq (y  sq x)
>>>
rosenbrock [0,0]
1>>>
rosenbrock (conjugateGradientDescent rosenbrock [0, 0] !! 5) < 0.1
True
conjugateGradientAscent :: (Traversable f, Ord a, Fractional a) => (forall s. Chosen s => f (Or s (On (Forward (Forward a))) (Kahn a)) > Or s (On (Forward (Forward a))) (Kahn a)) > f a > [f a]Source
Perform a conjugate gradient ascent using reverse mode automatic differentiation to compute the gradient.