{-| - This module relies on older version of library and thus couldn't be - compailed. - - FIXME: update the Gambler module --} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE RecordWildCards #-} module Examples.Ch4_Gambler where import Control.Applicative import Control.Monad import Control.Monad.Trans import qualified Data.List as List import Data.Map.Strict (Map, (!)) import qualified Data.Map.Strict as Map import Data.Set (Set) import qualified Data.Set as Set import Data.Ratio import Text.Printf import Debug.Trace import Types as RL import DP as RL data Bet = Bet { bet_amount :: Int } deriving(Show, Eq, Ord) data Game num = Game { game_win_score :: Int } deriving(Show) data Gambler = Gambler { g_pocket :: Int } deriving(Show, Eq, Ord) instance (Fractional num, Ord num) => DP_Problem num Game Gambler Bet where rl_states Game{..} = Set.fromList [Gambler pocket | pocket <- [0..game_win_score]] rl_actions Game{..} Gambler{..} = if g_pocket >= game_win_score || g_pocket <= 0 then Set.empty else let border = g_pocket `min` (game_win_score - g_pocket) in Set.fromList [Bet x | x <- [1..border]] rl_transitions Game{..} Gambler{..} Bet{..} = Set.fromList [ (Gambler (g_pocket - bet_amount), 6%10) , (Gambler (g_pocket + bet_amount), 4%10)] rl_reward Game{..} _ _ Gambler{..} = if g_pocket >= game_win_score then 1 else 0 rl_terminal_states Game{..} = Set.fromList [ Gambler 0, Gambler game_win_score ] thegame :: Game Rational thegame = Game 100 example_4_3 :: forall num . (Fractional num, Ord num, Real num) => Game num -> IO () example_4_3 thegame = let showValPolicy :: (StateVal num Gambler, GenericPolicy Gambler Bet) -> String showValPolicy (v@StateVal{..}, GenericPolicy{..}) = unlines $ flip map (Map.toAscList v_map `zip` Map.toAscList _p_map) $ \((_, vs) , (s@Gambler{..}, set)) -> let actmap = List.sortOn (\(p,a)-> a) $ Set.toList set show_actmap = let d :: Bet -> Double d a = fromRational $ toRational $ stateval v s a in List.intercalate "," $ flip map actmap $ \(a@Bet{..},p) -> show bet_amount ++ " (" ++ (printf "%2.5f" (d a)) ++ ")" (Bet{..}, mx) | null actmap = (Bet 0, 0) | otherwise = head $ actmap show_vs = printf "% 2.5f" (fromRational $ toRational vs :: Double) in (printf ("%02d: ") g_pocket) ++ (replicate bet_amount '#') ++ show bet_amount ++ " " ++ show_vs ++ " " ++ show_actmap debugState :: (StateVal num Gambler, GenericPolicy Gambler Bet) -> IO () debugState = putStrLn . showValPolicy opts :: EvalOpts num s a opts = defaultOpts { eo_max_iter = 1 , eo_gamma = 1.0 , eo_etha = 0.00001 -- , eo_debug = const $ return () } stateval :: (DP_Problem num Game s a) => StateVal num s -> s-> a -> num stateval v s a = policy_action_value thegame s a opts v in do (v,p) <- policy_iteraton thegame (uniformGenericPolicy thegame) (zero_sate_values thegame) opts debugState (v,p)