-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | A library for fast, easy-to-use Q-learning.
--   
--   A library for fast, easy-to-use Q-learning.
@package QLearn
@version 0.1.0.0

module Data.QLearn

-- | Data type specifying the parameters and Q table for a particular Q
--   learner. qAlpha is the learning rate associated with each iterative
--   update. qGamma is the discount rate on rewards. qGrid is a matrix
--   (dimension number of states by number of actions) that specifies the
--   Q(s,a) function learned by this Q learner. qEpsilon is a function that
--   maps from the number of iterations left to epsilon for the epsilon
--   greedy strategy (can return 1 uniformly if an epsilon greedy strategy
--   is not wanted).
data QLearner

-- | Wrapper around Int, specifying a state index.
data State
State :: Int -> State
Stop :: State

-- | Wrapper around Int, specifying an action index.
data Action

-- | Wrapper around Double, specifying a reward value.
data Reward

-- | Data type specifying the environment in which the Q learner operates.
--   envExecute is the function used to execute actions at a particular
--   state, returning the new state and the award associated with the
--   state, action pair. envPossible returns the actions possible at any
--   given state.
data Environment

-- | Given alpha, gamma, the number of states and the maximum number of
--   actions possible at any state, returns a QLearner initialized with a
--   zero Q-table.
initQLearner :: Double -> Double -> (Int -> Double) -> Int -> Int -> QLearner

-- | Given the envExecute and envPossible functions, constructs an
--   Environment. This is purely for for uniformity of the API. You are
--   welcome to use the data type constructor <a>Environment</a> since they
--   are equivalent.
initEnvironment :: (State -> Action -> (State, Reward)) -> (State -> [Action]) -> Environment

-- | Given an Environment, a Q learner and the state the Q Learner is on,
--   returns the Q Learner with an updated Q table and the new state of the
--   Q learner within the Environment. Also takes the number of time steps
--   left for the epsilon computation.
moveLearner :: Int -> StdGen -> Environment -> QLearner -> State -> ((QLearner, State), StdGen)

-- | Same thing as "moveLearner" but prints out the Q table and the current
--   state after moving the QLearner.
moveLearnerAndPrint :: Int -> StdGen -> Environment -> QLearner -> State -> IO ((QLearner, State), StdGen)

-- | A grid consisting of some number used primarily for examples. Here's
--   what it looks like: [[1.0,2.0,3.0,4.0], [5.0,6.0,7.0,8.0],
--   [12.0,11.0,10.0,9.0], [13.0,14.0,15.0,16.0]]
testGrid :: Vector (Vector Double)

-- | A "envPossible" function for use in the Environment data type,
--   specifically for environments that look like grids.
possibleGrid :: Vector (Vector Double) -> State -> [Action]

-- | Takes a grid descirbing reward values (often from environments that
--   look like grids), a state, an action and returns the new state and new
--   reward.
executeGrid :: Vector (Vector Double) -> State -> Action -> (State, Reward)

-- | Repeatedly moves (i.e. moves the given number of times) the qLearner
--   and prints the Q table at every move until a stop state is
--   encountered.
moveLearnerPrintRepeat :: Int -> StdGen -> Environment -> QLearner -> State -> IO ()

-- | Create a V.Vector (V.Vector Double) from a [[Double]]. Used to create
--   grid-based environments for the agent.
gridFromList :: [[Double]] -> Vector (Vector Double)
instance GHC.Show.Show Data.QLearn.State