module RL.Utils where
import qualified Control.Monad.Rnd as Rnd
import RL.Imports
eps_greedy_action :: (Fractional num, Ord num, Real num, Eq a, MonadRnd g m)
=> num -> (Bool -> a -> a) -> [(a,num)] -> m (a,num)
eps_greedy_action eps greedy as = do
let (abest, qbest) = maximumBy (compare`on`snd) as
let arest = filter (\x -> fst x /= abest) as
join $ Rnd.fromList [
swap (toRational (1.0eps), do
return (greedy True abest, qbest)),
swap (toRational eps, do
(r,q) <- Rnd.uniform arest
return (greedy False r, q))
]