{-# LANGUAGE RecordWildCards #-}


-- | Feature extraction module for DAG-aware CRFs.


module Data.CRF.Chain2.Tiers.DAG.Feature
(
-- * Feature
  Feat (..)

-- * Featre extraction
-- ** Present features
, presentFeats
-- ** Hidden features
, EdgeIx (..)
, hiddenFeats
, obFeatsOn
, trFeatsOn

-- * Feature selection
, FeatSel
, selectPresent
, selectHidden

-- * Indexing
, lbNum
, lbIxs
, edgeIxs
, prevEdgeIxs
, nextEdgeIxs
, initialEdgeIxs
, finalEdgeIxs
) where


import           Control.Applicative ((<$>))

import qualified Data.Number.LogFloat as L
import qualified Data.Vector as V
import           Data.Maybe (maybeToList)

import           Data.DAG (DAG, EdgeID)
import qualified Data.DAG as DAG

import           Data.CRF.Chain2.Tiers.Core (X, Y, Ob, Cb, CbIx, Feat)
import qualified Data.CRF.Chain2.Tiers.Core as C


----------------------------------------------------
-- Present features
----------------------------------------------------


-- | Observation features with probabilities for a given edge.
obFeats :: EdgeID -> DAG a (X, Y) -> [(Feat, L.LogFloat)]
obFeats edgeID dag =
  [ (ft, px)
  | let edgeLabel = DAG.edgeLabel edgeID dag
  , (x, px) <- C.unY (snd edgeLabel)
  , o       <- C.unX (fst edgeLabel)
  , ft      <- C.obFeats o x ]


-- | Zero-order transition features with probabilities for a given edge.
trFeats1 :: EdgeID -> DAG a (X, Y) -> [(Feat, L.LogFloat)]
trFeats1 i dag =
  [ (ft, px)
  | null (prevEdges i) -- TODO: see ticket on Trello
  , (x, px) <- edgeLabel i
  , ft <- C.trFeats1 x ]
  where
    edgeLabel = C.unY . snd . flip DAG.edgeLabel dag
    prevEdges = flip DAG.prevEdges dag


-- | First-order transition features with probabilities for a given edge.
trFeats2 :: EdgeID -> DAG a (X, Y) -> [(Feat, L.LogFloat)]
trFeats2 i dag =
  [ (ft, px * py)
  | (x, px) <- edgeLabel i
  , j <- prevEdges i
  , null (prevEdges j) -- TODO: see ticket on Trello
  , (y, py) <- edgeLabel j
  , ft <- C.trFeats2 x y ]
  where
    edgeLabel = C.unY . snd . flip DAG.edgeLabel dag
    prevEdges = flip DAG.prevEdges dag


-- | Second-order transition features with probabilities for a given edge.
trFeats3 :: EdgeID -> DAG a (X, Y) -> [(Feat, L.LogFloat)]
trFeats3 i dag =
  [ (ft, px * py * pz)
  | (x, px) <- edgeLabel i
  , j <- prevEdges i
  , (y, py) <- edgeLabel j
  , k <- prevEdges j
  , (z, pz) <- edgeLabel k
  , ft <- C.trFeats3 x y z ]
  where
    edgeLabel = C.unY . snd . flip DAG.edgeLabel dag
    prevEdges = flip DAG.prevEdges dag


-- | Present 'Feat'ures of all kinds occurring w.r.t. to the given edge.
presentFeatsOn :: EdgeID -> DAG a (X, Y) -> [(Feat, L.LogFloat)]
presentFeatsOn edgeID dag
  =  obFeats  edgeID dag
  ++ trFeats1 edgeID dag
  ++ trFeats2 edgeID dag
  ++ trFeats3 edgeID dag


-- | Present 'Feat'ures of all kinds occurring in the given DAG.
presentFeats :: DAG a (X, Y) -> [(Feat, L.LogFloat)]
presentFeats dag = concat
  [ presentFeatsOn edgeID dag
  | edgeID <- DAG.dagEdges dag ]


---------------------------------------------
-- Indexing
---------------------------------------------


-- | List of observations on the given edge of the sentence.
obList :: DAG a X -> EdgeID -> [Ob]
obList dag i = C.unX $ DAG.edgeLabel i dag
{-# INLINE obList #-}


-- | Vector of potential labels on the given edge of the sentence.
lbVec :: DAG a X -> EdgeID -> V.Vector Cb
lbVec dag i = C._unR $ DAG.edgeLabel i dag
{-# INLINE lbVec #-}


-- | Number of potential labels at the given position of the sentence.
lbNum :: DAG a X -> EdgeID -> Int
lbNum dag = V.length . lbVec dag
{-# INLINE lbNum #-}


-- | Potential label at the given position and at the given index.
lbOn :: DAG a X -> EdgeID -> CbIx -> Maybe Cb
lbOn dag = (V.!?) . lbVec dag
{-# INLINE lbOn #-}


-- | List of label indices at the given edge.
lbIxs :: DAG a X -> EdgeID -> [CbIx]
lbIxs dag i = [0 .. lbNum dag i - 1]
{-# INLINE lbIxs #-}


-- | The list of EdgeIx's corresponding to the given edge.
edgeIxs :: DAG a X -> EdgeID -> [EdgeIx]
edgeIxs dag i =
  [ EdgeIx {edgeID=i, lbIx=u}
  | u <- lbIxs dag i ]


-- | The list of EdgeIx's corresponding to the previous edges.
-- If the argument edgeID is `Nothing` or if the list of previous
-- edges is empty, the result will be a singleton list containing
-- `Nothing` (which represents a special out-of-bounds EdgeIx).
prevEdgeIxs :: DAG a X -> Maybe EdgeID -> [Maybe EdgeIx]
prevEdgeIxs _ Nothing = [Nothing]
prevEdgeIxs dag (Just i)
  | null js = [Nothing]
  | otherwise = Just <$>
    [ EdgeIx {edgeID=j, lbIx=u}
    | j <- js, u <- lbIxs dag j ]
  where js = DAG.prevEdges i dag


-- | Similar to `prevEdgeIxs` but returns the succeeding edges.
nextEdgeIxs :: DAG a X -> Maybe EdgeID -> [Maybe EdgeIx]
nextEdgeIxs _ Nothing = [Nothing]
nextEdgeIxs dag (Just i)
  | null js = [Nothing]
  | otherwise = Just <$>
    [ EdgeIx {edgeID=j, lbIx=u}
    | j <- js, u <- lbIxs dag j ]
  where js = DAG.nextEdges i dag


-- | Obtain the list of initial EdgeIxs.
initialEdgeIxs :: DAG a X -> [EdgeIx]
initialEdgeIxs dag = concat
  [ edgeIxs dag i
  | i <- DAG.dagEdges dag
  , DAG.isInitialEdge i dag ]


-- | Obtain the list of final EdgeIxs.
finalEdgeIxs :: DAG a X -> [EdgeIx]
finalEdgeIxs dag = concat
  [ edgeIxs dag i
  | i <- DAG.dagEdges dag
  , DAG.isFinalEdge i dag ]


----------------------------------------------------
-- Hidden features
----------------------------------------------------


-- | Edge with the corresponding label index.
data EdgeIx = EdgeIx
  { edgeID :: {-# UNPACK #-} !EdgeID
    -- ^ ID of an edge
  , lbIx   :: {-# UNPACK #-} !CbIx
    -- ^ Index of the corresponding complex label
  }
  deriving (Show, Eq, Ord)


-- | Observation features on a given position and with respect
-- to a given label (determined by index).
obFeatsOn :: DAG a X -> EdgeIx -> [Feat]
obFeatsOn dag EdgeIx{..} = concat
  [ C.obFeats o e
  | e <- maybeToList $ lbOn dag edgeID lbIx
  , o <- obList dag edgeID ]
-- obFeatsOn :: DAG a X -> EdgeID -> CbIx -> [Feat]
-- obFeatsOn dag edgeID lbIx = concat
--   [ C.obFeats o e
--   | e <- maybeToList $ lbOn dag edgeID lbIx
--   , o <- obList dag edgeID ]
{-# INLINE obFeatsOn #-}


-- | Transition features on a given position and with respect
-- to given labels (determined by indexes).
trFeatsOn
  :: DAG a X
  -> Maybe EdgeIx -- ^ Current EdgeIx
  -> Maybe EdgeIx -- ^ Previous EdgeIx
  -> Maybe EdgeIx -- ^ One before the previous EdgeIx
  -> [Feat]
trFeatsOn dag u' v' w' = doit
  (lbOn' =<< u')
  (lbOn' =<< v')
  (lbOn' =<< w')
  where
    lbOn' EdgeIx{..} = lbOn dag edgeID lbIx
    doit (Just u) (Just v) (Just w) = C.trFeats3 u v w
    doit (Just u) (Just v) _        = C.trFeats2 u v
    doit (Just u) _ _               = C.trFeats1 u
    doit _ _ _                      = []
{-# INLINE trFeatsOn #-}


-- | Features hidden in the dataset element.
hiddenFeats :: DAG a X -> [Feat]
hiddenFeats dag =
  obFs ++ trFs
  where
    obFs = concat
      [ obFeatsOn dag u
      | i <- DAG.dagEdges dag
      , u <- edgeIxs dag i ]
    trFs = concat
      [ trFeatsOn dag u v w
      | i <- DAG.dagEdges dag
      , u <- Just <$> edgeIxs dag i
      , v <- prevEdgeIxs dag (edgeID <$> u)
      , w <- prevEdgeIxs dag (edgeID <$> v) ]


----------------------------------------------------
-- Feature selection
----------------------------------------------------


-- | A feature selection function type.
type FeatSel a = DAG a (X, Y) -> [Feat]


-- | The 'presentFeats' adapted to fit feature selection specs.
selectPresent :: FeatSel a
selectPresent = map fst . presentFeats


-- | The 'hiddenFeats' adapted to fit feature selection specs.
selectHidden :: FeatSel a
selectHidden = hiddenFeats . fmap fst