-- | A continuous-time Markov decision process (CTMDP) is an MDP where
-- transitions between states take a random amount of time. Each
-- transition time is assumed to be exponentially distributed with an
-- action- and state-dependent transition rate.
--
-- The record accessors of the 'CTMDP' type conflict with those of the
-- 'MDP' type, so either import only the 'mkCTMDP' and 'uniformize'
-- functions or import this module qualified.
module Algorithms.MDP.CTMDP
( CTMDP (..)
, mkCTMDP
, Rates
, uniformize
) where
import qualified Data.Vector as V
import Algorithms.MDP (MDP(MDP))
import Algorithms.MDP hiding (MDP (..))
-- | A Continuous-time Markov decision process.
--
-- A CTMDP is a continuous-time analog of an MDP. In a CTMDP each
-- stage takes a variable amount of time. Each stage lasts an
-- expontially distributed amount of time characterized by a state-
-- and action-dependent rate parameter. Instead of simply having costs
-- associated with a state and an action, the costs of a CTMDP are
-- broken up into fixed and rate costs. Fixed costs are incured as an
-- action are chosen, while rate costs are paid for the duration of
-- the stage.
--
-- Here the type variable 'a' represents the type of the states, 'b'
-- represents the type of the actions, and 't' represents the numeric
-- type used in computations. Generally choosing 't' to be a Double is
-- fine, although there is no reason a higher-precision type cannot be
-- used.
--
-- This type should not be constructed directly; use the 'mkCTMDP'
-- constructor instead.
data CTMDP a b t = CTMDP
{ _states :: V.Vector a
, _actions :: V.Vector b
, _fixedCosts :: V.Vector (V.Vector t)
, _rateCosts :: V.Vector (V.Vector t)
, _rates :: V.Vector (V.Vector t)
, _trans :: V.Vector (V.Vector (V.Vector t))
, _discount :: t
, _actionSet :: V.Vector (V.Vector Int)
}
-- | A function mapping an action and a state to a transition rate.
type Rates a b t = b -> a -> t
-- | Create a CTMDP.
mkCTMDP :: (Eq b) =>
[a] -- ^ The state space
-> [b] -- ^ The action space
-> Transitions a b t -- ^ The transition probabilities
-> Rates a b t -- ^ The transition rates
-> Costs a b t -- ^ The action-dependent fixed costs
-> Costs a b t -- ^ The action-dependent rate costs
-> ActionSet a b -- ^ The state-dependent actions
-> t -- ^ The discount factor in (0, 1]
-> CTMDP a b t -- ^ The resulting CTMDP
mkCTMDP states actions trans rates fixedCost rateCost actionSet discount =
let
_states = V.fromList states
_actions = V.fromList actions
_states' = V.fromList [0..length states - 1]
_actions' = V.fromList [0..length actions - 1]
mkCostVecFor cf ac = V.fromList $ map (cf ac) states
_fixedCosts = V.fromList $ map (mkCostVecFor fixedCost) actions
_rateCosts = V.fromList $ map (mkCostVecFor rateCost) actions
mkProbAS a s = V.fromList $ map (trans a s) states
mkProbA a = V.fromList $ map (mkProbAS a) states
_trans = V.fromList $ map mkProbA actions
mkTransVec ac = V.fromList $ map (rates ac) states
_rates = V.fromList $ map mkTransVec actions
actionPairs = zip [0..] actions
actionSet' st = V.fromList $ map fst $ filter ((`elem` acs) . snd) actionPairs
where
acs = actionSet st
_actionSet = V.fromList $ map actionSet' states
in
CTMDP
{ _states = _states
, _actions = _actions
, _fixedCosts = _fixedCosts
, _rateCosts = _rateCosts
, _rates = _rates
, _trans = _trans
, _discount = discount
, _actionSet = _actionSet
}
-- | Convert a CTMDP into an MDP.
uniformize :: (Ord t, Fractional t) => CTMDP a b t -> MDP a b t
uniformize ctmdc =
let
states = _states ctmdc
actions = _actions ctmdc
trans = _trans ctmdc
rateCosts = _rateCosts ctmdc
fixedCosts = _fixedCosts ctmdc
rates = _rates ctmdc
actionSet = _actionSet ctmdc
discount = _discount ctmdc
nStates = length states
nActions = length actions
-- The fastest transition rate
nu = maximum (fmap maximum rates)
-- The discount factor for the continuous-time problem
beta = nu * (1 / discount - 1)
-- We rescale the probabilities by increasing the probability of a
-- self-transition
rescaleProb ac s v = V.imap (\t z -> newP t z) v
where
newP t z = if s == t
then (nu - r + z * r) / (beta + nu)
else r * z / (beta + nu)
r = rates V.! ac V.! s
trans' = V.imap (\a vv -> V.imap (\s v -> rescaleProb a s v) vv) trans
-- We create costs that combine fixed and rate costs
costFor ac s = nu * ((beta + r) * f + rc) / (beta + nu)
where
f = fixedCosts V.! ac V.! s
rc = rateCosts V.! ac V.! s
r = rates V.! ac V.! s
costs' = V.generate nActions (\ac -> V.generate nStates (costFor ac))
discount' = nu / (beta + nu)
in
MDP states actions costs' trans' discount' actionSet