module RL.Utils where

import qualified Control.Monad.Rnd as Rnd

import RL.Imports

-- | Return @eps@-greedy action for some state of problem @pr@. The state is
-- described with assosiated list of weighted actions @as@
eps_greedy_action :: (Fractional num, Ord num, Real num, Eq a, MonadRnd g m)
  => num -> (Bool -> a -> a) -> [(a,num)] -> m (a,num)
eps_greedy_action eps greedy as = do
  let (abest, qbest) = maximumBy (compare`on`snd) as
  let arest = filter (\x -> fst x /= abest) as
  join $ Rnd.fromList [
    swap (toRational (1.0-eps), do
      -- traceM "greedy"
      return (greedy True abest, qbest)),
    swap (toRational eps, do
      -- traceM "random"
      (r,q) <- Rnd.uniform arest
      return (greedy False r, q))
    ]