-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library for fast, easy-to-use Q-learning. -- -- A library for fast, easy-to-use Q-learning. @package QLearn @version 0.1.0.0 module Data.QLearn -- | Data type specifying the parameters and Q table for a particular Q -- learner. qAlpha is the learning rate associated with each iterative -- update. qGamma is the discount rate on rewards. qGrid is a matrix -- (dimension number of states by number of actions) that specifies the -- Q(s,a) function learned by this Q learner. qEpsilon is a function that -- maps from the number of iterations left to epsilon for the epsilon -- greedy strategy (can return 1 uniformly if an epsilon greedy strategy -- is not wanted). data QLearner -- | Wrapper around Int, specifying a state index. data State State :: Int -> State Stop :: State -- | Wrapper around Int, specifying an action index. data Action -- | Wrapper around Double, specifying a reward value. data Reward -- | Data type specifying the environment in which the Q learner operates. -- envExecute is the function used to execute actions at a particular -- state, returning the new state and the award associated with the -- state, action pair. envPossible returns the actions possible at any -- given state. data Environment -- | Given alpha, gamma, the number of states and the maximum number of -- actions possible at any state, returns a QLearner initialized with a -- zero Q-table. initQLearner :: Double -> Double -> (Int -> Double) -> Int -> Int -> QLearner -- | Given the envExecute and envPossible functions, constructs an -- Environment. This is purely for for uniformity of the API. You are -- welcome to use the data type constructor Environment since they -- are equivalent. initEnvironment :: (State -> Action -> (State, Reward)) -> (State -> [Action]) -> Environment -- | Given an Environment, a Q learner and the state the Q Learner is on, -- returns the Q Learner with an updated Q table and the new state of the -- Q learner within the Environment. Also takes the number of time steps -- left for the epsilon computation. moveLearner :: Int -> StdGen -> Environment -> QLearner -> State -> ((QLearner, State), StdGen) -- | Same thing as "moveLearner" but prints out the Q table and the current -- state after moving the QLearner. moveLearnerAndPrint :: Int -> StdGen -> Environment -> QLearner -> State -> IO ((QLearner, State), StdGen) -- | A grid consisting of some number used primarily for examples. Here's -- what it looks like: [[1.0,2.0,3.0,4.0], [5.0,6.0,7.0,8.0], -- [12.0,11.0,10.0,9.0], [13.0,14.0,15.0,16.0]] testGrid :: Vector (Vector Double) -- | A "envPossible" function for use in the Environment data type, -- specifically for environments that look like grids. possibleGrid :: Vector (Vector Double) -> State -> [Action] -- | Takes a grid descirbing reward values (often from environments that -- look like grids), a state, an action and returns the new state and new -- reward. executeGrid :: Vector (Vector Double) -> State -> Action -> (State, Reward) -- | Repeatedly moves (i.e. moves the given number of times) the qLearner -- and prints the Q table at every move until a stop state is -- encountered. moveLearnerPrintRepeat :: Int -> StdGen -> Environment -> QLearner -> State -> IO () -- | Create a V.Vector (V.Vector Double) from a [[Double]]. Used to create -- grid-based environments for the agent. gridFromList :: [[Double]] -> Vector (Vector Double) instance GHC.Show.Show Data.QLearn.State