-- | Various data-sciencey functions.
module Analyze.Ops
  ( oneHot
  ) where

import           Analyze.Common      (Data)
import           Analyze.RFrame      (RFrame (..), RFrameUpdate (..), col, splitCols, update)
import           Control.Monad.Catch (MonadThrow (..))
import qualified Data.HashSet        as HS
import           Data.Vector         (Vector)
import qualified Data.Vector         as V

uniq :: Data k => Vector k -> Vector k
uniq ks = V.reverse (V.fromList newKsR)
  where
    acc (hs, uks) k =
      if HS.member k hs
        then (hs, uks)
        else (HS.insert k hs, k:uks)
    (_, newKsR) = V.foldl acc (HS.empty, []) ks

match :: Eq k => Vector k -> v -> v -> k -> Vector v
match ks yesVal noVal tk = V.map (\k -> if k == tk then yesVal else noVal) ks

-- | One-hot encode a given column. (See tests for usage.)
oneHot :: (Data k, MonadThrow m) => (k -> v -> k) -> k -> v -> v -> RFrame k v -> m (RFrame k v)
oneHot combine key yesVal noVal frame = do
  let (target, cold) = splitCols (== key) frame
  rawVs <- col key target
  let cookedKs = V.map (combine key) rawVs
      newKs = uniq cookedKs
      newVs = V.map (match newKs yesVal noVal) cookedKs
      hot = RFrameUpdate newKs newVs
  update hot cold