Safe Haskell | None |
---|---|
Language | Haskell2010 |
- data MC_Opts = MC_Opts {}
- defaultOpts :: MC_Opts
- type MC_Number = Double
- type Q s a = M s a MC_Number
- type V s = HashMap s MC_Number
- emptyQ :: MC_Number -> Q s a
- q2v :: (Bounded a, Enum a, Eq a, Hashable a, Eq s, Hashable s) => Q s a -> V s
- diffV :: (Eq s, Hashable s) => V s -> V s -> MC_Number
- toV :: (Bounded a, Enum a, Eq a, Hashable a, Eq s, Hashable s) => Q s a -> V s
- class (Fractional num, Ord s, Ord a, Show s, Show a, Bounded a, Enum a) => MC_Problem pr s a num | pr -> s, pr -> a, pr -> num where
- queryQ :: (Hashable s, Hashable k, MonadState (M s k v) f, Eq s, Eq k, Enum k, Bounded k) => s -> f [(k, v)]
- modifyQ :: (Hashable a, Hashable s, MonadState (M s a num) m, Eq a, Eq s, Enum a, Bounded a) => s -> a -> (num -> num) -> m ()
- data MC pr m s a = MC {
- mc_pr :: pr
- mc_transition :: s -> a -> m s
- mc_es_learn :: (Monad m, Hashable s, Hashable a, MC_Problem pr s a MC_Number) => MC_Opts -> Q s a -> s -> a -> MC pr m s a -> m (Q s a)
Documentation
class (Fractional num, Ord s, Ord a, Show s, Show a, Bounded a, Enum a) => MC_Problem pr s a num | pr -> s, pr -> a, pr -> num where Source #
queryQ :: (Hashable s, Hashable k, MonadState (M s k v) f, Eq s, Eq k, Enum k, Bounded k) => s -> f [(k, v)] Source #
modifyQ :: (Hashable a, Hashable s, MonadState (M s a num) m, Eq a, Eq s, Enum a, Bounded a) => s -> a -> (num -> num) -> m () Source #
mc_es_learn :: (Monad m, Hashable s, Hashable a, MC_Problem pr s a MC_Number) => MC_Opts -> Q s a -> s -> a -> MC pr m s a -> m (Q s a) Source #
MC-ES learning algorithm, pg 5.4. Alpha-learing rate is used instead of total averaging, maximum episode length is limited to make sure policy it terminates