RL.DP

type Probability

type P s a

type V s num

diffV

class DP_Problem pr s a num

action

initV

invariant_probable_actions

invariant_closed_transition

invariant_no_dead_states

invariant_terminal

invariant_policy_actions

invariant_policy_prob

invariant

policy_eq

uniformPolicy

data Opts num s a

defaultOpts

data EvalState num s

es_v'

es_v

es_iter

es_delta

initEvalState

policy_eval

policy_action_value

policy_improve

data DP pr m s a num

policy_iteration