-------------------------------------------------------------------------------
-- |
-- Module    :  Torch.Indef.Static.NN.Conv1d
-- Copyright :  (c) Sam Stites 2017
-- License   :  BSD3
-- Maintainer:  sam@stites.io
-- Stability :  experimental
-- Portability: non-portable
--
-- Temporal (1D) Convolutions
-------------------------------------------------------------------------------
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}

#if MIN_VERSION_base(4,12,0)
{-# LANGUAGE NoStarIsType #-}
#endif

{-# OPTIONS_GHC -fplugin GHC.TypeLits.Normalise #-}
{-# OPTIONS_GHC -fno-cse #-}
module Torch.Indef.Static.NN.Conv1d
  ( Conv1d(..)
  , weights
  , bias
  , featureSize
  , kernelWidth
  , stepSize

  , conv1d
  , conv1d_forward
  , conv1d_backwardGradInput
  , conv1d_updGradParams

  , conv1dBatch
  , conv1d_forwardBatch
  , conv1d_backwardGradInputBatch
  , conv1d_updGradParamsBatch

  -- still need work:
  , _temporalRowConvolution_updateOutput
  , _temporalRowConvolution_updateGradInput
  , _temporalRowConvolution_updGradParameters
  ) where

import Numeric.Backprop
import Numeric.Dimensions
import Data.Kind (Type)
import Data.List (intercalate)
import Data.Singletons.Prelude (type (>), type (<))
import Data.Singletons.TypeLits
import System.IO.Unsafe
import Torch.Indef.Types
import Torch.Indef.Static.Tensor
import Torch.Indef.Static.Tensor.Copy
import Torch.Indef.Static.Tensor.Math
import Torch.Indef.Static.Tensor.Math.CompareT
import Torch.Indef.Static.NN.Backprop ()

import qualified Torch.Indef.Dynamic.NN as Dynamic


-- | ADT representation of a convolutional 1d layer.
--
-- FIXME: the type is a bit of a hiccup: can we remove the kernel dimensions or
-- move pad/stride into the phantoms?
--
-- See 'Conv2d' for ideas.
newtype Conv1d f o kW dW
  = Conv1d { getTensors :: (Tensor '[o, f*kW], Tensor '[o]) }

instance (KnownDim f, KnownDim o, KnownDim kW, KnownDim dW)
  => Show (Conv1d f o kW dW) where
  show c = intercalate ","
    [ "Conv1d ("
    ++ "features: " ++ show (featureSize c)
    , " output: "   ++ show (outputSize c)
    , " kernel: "   ++ show (kernelWidth c)
    , " step: "     ++ show (stepSize c)
    ++ ")"
    ]

instance (KnownDim (f*kW), KnownDim o) => Backprop (Conv1d f o kW dW) where
  zero = const . Conv1d $ (constant 0, constant 0)
  one  = const . Conv1d $ (constant 1, constant 1)
  add c0 c1 = Conv1d (weights c0 + weights c1, bias c0 + bias c1)

-- | get the weights from a 'Conv1d' ADT
weights :: Conv1d f o kW dW -> Tensor '[o, f*kW]
weights (Conv1d (w, _)) = w

-- | get the bias from a 'Conv1d' ADT
bias :: Conv1d f o kW dW -> Tensor '[o]
bias (Conv1d (_, b)) = b

-- | get the featureSize from a 'Conv1d' ADT
featureSize :: forall f o kW dW . KnownDim f => Conv1d f o kW dW -> Int
featureSize _ = fromIntegral (dimVal (dim :: Dim f))

-- | get the outputSize from a 'Conv1d' ADT
outputSize :: forall f o kW dW . KnownDim o => Conv1d f o kW dW -> Int
outputSize _ = fromIntegral (dimVal (dim :: Dim o))

-- | kW: The kernel width of the convolution
kernelWidth :: forall f o kW dW . KnownDim kW => Conv1d f o kW dW -> Int
kernelWidth _ = fromIntegral (dimVal (dim :: Dim kW))

-- | dW: The step of the convolution. Default is 1 in C.
stepSize :: forall f o kW dW . KnownDim dW => Conv1d f o kW dW -> Int
stepSize _ = fromIntegral (dimVal (dim :: Dim dW))

-- ========================================================================= --

-- | Type constraints required for temporal convolution
type TemporalConvC s f kW dW o =
  ( All KnownDim '[s,f,kW,dW,o]
  , (s > kW) ~ 'True
  , (kW > 0) ~ 'True
  , (dW > 0) ~ 'True
  -- , o ~ ((Div (s - kW) dW) + 1)
  )

-- | Backprop convolution function
conv1d
  :: forall s seq f kW dW o
  .  Reifies s W
  => KnownDim (f*kW)
  => TemporalConvC seq f kW dW o
  => Double
  -> BVar s (Conv1d f o kW dW)
  -> BVar s (Tensor '[seq, f])
  -> BVar s (Tensor '[seq, o])
conv1d = _conv1d

-- | Backprop convolution function with batching
conv1dBatch
  :: forall s seq f kW dW o b
  .  Reifies s W
  => KnownDim b
  => KnownDim (f*kW)
  => TemporalConvC seq f kW dW o
  => Double
  -> BVar s (Conv1d f o kW dW)
  -> BVar s (Tensor '[b, seq, f])
  -> BVar s (Tensor '[b, seq, o])
conv1dBatch = _conv1d

-- | Backprop convolution function without specifying constraints or dimensions
_conv1d
  -- :: forall s seq f kW dW o d d'
  :: Reifies s W
  => KnownDim (f*kW)
  => All KnownDim '[f,kW,dW,o]
  => All Dimensions '[d,d']
  => Double
  -> BVar s (Conv1d f o kW dW)
  -> BVar s (Tensor d)
  -> BVar s (Tensor d')
_conv1d lr = liftOp2 . op2 $ \c inp ->
  (unsafePerformIO $ _conv1d_forward c inp, \gout ->
      ( unsafePerformIO $ _conv1d_updGradParams c inp gout lr
      , unsafePerformIO $ _conv1d_backwardGradInput c inp gout
      ) )

-------------------------------------------------------------------------------
-- * Functions for temporal convolution

-- | If the input sequence is a 2D tensor of dimension
-- (nInputFrame x inputFrameSize), the output sequence will be
-- (nOutputFrame x outputFrameSize) where
--
--    nOutputFrame = (nInputFrame - kW) / dW + 1
--
conv1d_forward
  :: TemporalConvC s f kW dW o
  => Conv1d f o kW dW
  -> Tensor '[s, f]
  -> IO (Tensor '[s, o])
conv1d_forward = _conv1d_forward

-- | backward pass, computing the gradient input
conv1d_backwardGradInput
  :: TemporalConvC seq f kW dW o
  => Conv1d f o kW dW             -- ^ conv1d state
  -> Tensor '[seq, f]             -- ^ input: s for 'sequence dimension', f for 'feature dimension'
  -> Tensor '[seq, o]             -- ^ grad output
  -> IO (Tensor '[seq, f])        -- ^ grad input
conv1d_backwardGradInput = _conv1d_backwardGradInput

-- | backward pass, computing the weight and bias parameters
--
-- WARNING: this is _pure_ which may be slow for large tensors.
-- Speeding this up will be in active development as the need arises
-- (see issue hasktorch/hasktorch#85)
conv1d_updGradParams
  :: TemporalConvC s f kW dW o
  => Conv1d f o kW dW       -- ^ input state of conv1d (which includes weights and bias)
  -> Tensor '[s, f]         -- ^ input tensor
  -> Tensor '[s, o]         -- ^ output gradient
  -> Double                 -- ^ scale
  -> IO (Conv1d f o kW dW)  -- ^ gradient of (weights, bias)
conv1d_updGradParams = _conv1d_updGradParams

-------------------------------------------------------------------------------
-- * Functions for temporal convolution with a batch dimension

-- | Applies a 1D convolution over an input sequence composed of nInputFrame
-- frames. The input tensor in forward(input) is expected to be a 2D tensor
-- (nInputFrame x inputFrameSize) or a 3D tensor
-- (nBatchFrame x nInputFrame x inputFrameSize).
conv1d_forwardBatch
  :: TemporalConvC s f kW dW o
  => Conv1d f o kW dW
  -> Tensor '[b,s,f]
  -> IO (Tensor '[b,s,o])
conv1d_forwardBatch = _conv1d_forward

-- | 'conv1d_backwardGradInput' with a batch dimension
conv1d_backwardGradInputBatch
  :: TemporalConvC s f kW dW o
  => KnownDim b
  => Conv1d f o kW dW             -- ^ conv1d state
  -> Tensor '[b, s, f]            -- ^ input: s for 'sequence dimension', f for 'feature dimension'
  -> Tensor '[b, s, o]            -- ^ grad output
  -> IO (Tensor '[b, s, f])       -- ^ output
conv1d_backwardGradInputBatch = _conv1d_backwardGradInput

-- | 'conv1d_updGradParams' with a batch dimension
conv1d_updGradParamsBatch
  :: TemporalConvC s f kW dW o
  => KnownDim b
  => Conv1d f o kW dW             -- ^ conv1d state
  -> Tensor '[b, s, f]            -- ^ input: s for 'sequence dimension', f for 'feature dimension'
  -> Tensor '[b, s, o]            -- ^ grad output
  -> Double                       -- ^ scale
  -> IO (Conv1d f o kW dW)        -- ^ output
conv1d_updGradParamsBatch = _conv1d_updGradParams


-------------------------------------------------------------------------------
-- * helper functions

-- | forward pass without locking down the tensor dimensions
{-# NOINLINE _conv1d_forward #-}
_conv1d_forward :: All KnownDim '[f,kW,dW,o] => Conv1d f o kW dW -> Tensor d -> IO (Tensor d')
_conv1d_forward conv inp = do
  let out = empty
  Dynamic._temporalConvolution_updateOutput
    (asDynamic inp) (asDynamic out)
    (asDynamic (weights conv)) (asDynamic (bias conv))
    (kernelWidth conv) (stepSize conv)
    (featureSize conv) (outputSize conv)
  pure out

-- | backward pass, computing the gradient input, without locking down the tensor dimensions
{-# NOINLINE _conv1d_backwardGradInput #-}
_conv1d_backwardGradInput
  :: forall f o kW dW inputDim goutDim
  .  All KnownDim '[f,kW,dW,o]
  => All Dimensions '[inputDim,goutDim]
  => Conv1d f o kW dW
  -> Tensor inputDim
  -> Tensor goutDim
  -> IO (Tensor inputDim)
_conv1d_backwardGradInput conv input gradOut = do
  let gradIn = empty
  Dynamic._temporalConvolution_updateGradInput
    (asDynamic input)  (asDynamic gradOut)
    (asDynamic gradIn) (asDynamic (weights conv))
    (kernelWidth conv) (stepSize conv)
  pure gradIn

{-# NOINLINE _conv1d_updGradParams #-}
-- | backward pass, computing the weight updates, without locking down the tensor dimensions
_conv1d_updGradParams
  :: forall f o kW dW inputDim gOutDim
  .  All KnownDim '[f,kW,dW,o]
  => All Dimensions '[inputDim,gOutDim]
  => Conv1d f o kW dW       -- ^ input state of conv1d (which includes weights and bias)
  -> Tensor inputDim        -- ^ input tensor
  -> Tensor gOutDim         -- ^ output gradient
  -> Double                 -- ^ scale
  -> IO (Conv1d f o kW dW)  -- ^ gradient of (weights, bias)
_conv1d_updGradParams c@(Conv1d (w, b)) input gout scale = do
  -- FIXME: this is _not going to scale well_ and coming up with a mutable version of Conv layers will be nessecary
  let w' = copy w
  let b' = copy b
  Dynamic._temporalConvolution_accGradParameters
    (asDynamic input) (asDynamic gout) (asDynamic w') (asDynamic b')
    (kernelWidth c) (stepSize c) scale
  pure $ Conv1d (w', b')

-- ========================================================================= --

-- |  temporalRowConvolution forward pass (updates the output tensor)
_temporalRowConvolution_updateOutput :: Tensor d -> Tensor d' -> Tensor d'' -> Tensor d''' -> Tensor d -> Tensor d -> Int -> Int -> Int -> Bool -> IO ()
_temporalRowConvolution_updateOutput t0 t1 t2 t3 t4 t5 = Dynamic._temporalRowConvolution_updateOutput (asDynamic t0) (asDynamic t1) (asDynamic t2) (asDynamic t3) (asDynamic t4) (asDynamic t5)

-- |  temporalRowConvolution backward-update (updates the layer and bias tensors)
_temporalRowConvolution_updateGradInput :: Tensor d -> Tensor d' -> Tensor d'' -> Tensor d''' -> Tensor d -> Tensor d -> Int -> Int -> Int -> Bool -> IO ()
_temporalRowConvolution_updateGradInput t0 t1 t2 t3 t4 t5 = Dynamic._temporalRowConvolution_updateGradInput (asDynamic t0) (asDynamic t1) (asDynamic t2) (asDynamic t3) (asDynamic t4) (asDynamic t5)

-- |  temporalRowConvolution backward-update (updates the layer and bias tensors). Called 'accGradParameters' in C to indicate accumulating the gradient parameters.
_temporalRowConvolution_updGradParameters :: Tensor d -> Tensor d' -> Tensor d'' -> Tensor d''' -> Tensor d -> Tensor d -> Int -> Int -> Int -> Bool -> Double -> IO ()
_temporalRowConvolution_updGradParameters t0 t1 t2 t3 t4 t5 = Dynamic._temporalRowConvolution_accGradParameters (asDynamic t0) (asDynamic t1) (asDynamic t2) (asDynamic t3) (asDynamic t4) (asDynamic t5)