module Data.QLearn
( QLearner
, State(State, Stop)
, Action
, Reward
, Environment
, initQLearner
, initEnvironment
, moveLearner
, moveLearnerAndPrint
, testGrid
, possibleGrid
, executeGrid
, moveLearnerPrintRepeat
, gridFromList
) where
import qualified Data.Vector as V
import Numeric
import Data.List
import System.Random
data QLearner = QLearner {qAlpha::Double, qGamma::Double, qEpsilon::(Int -> Double),
qGrid::V.Vector (V.Vector Double)}
data State = State {getStateValue::Int} | Stop deriving (Show)
data Action = Action {getActionValue::Int}
data Reward = Reward {getRewardValue::Double}
data Environment = Environment {envExecute::(State -> Action -> (State, Reward)),
envPossible::(State -> [Action])}
initQLearner :: Double -> Double -> (Int -> Double) -> Int -> Int -> QLearner
initQLearner alpha gamma epsilon numStates numActions =
QLearner alpha gamma epsilon $ createZeroQ numStates numActions
initEnvironment :: (State -> Action -> (State, Reward)) -> (State -> [Action]) -> Environment
initEnvironment execute possible = Environment execute possible
unwrapExecute :: (State -> Action -> (State, Reward)) -> Int -> Int -> (Int, Double)
unwrapExecute execute state action = let execRet = execute (State state) (Action action)
in (getStateValue $ fst execRet, getRewardValue $ snd execRet)
unwrapPossible :: (State -> [Action]) -> Int -> [Int]
unwrapPossible possible state = let possibRet = possible (State state)
in map (\x -> getActionValue x) possibRet
moveLearner :: Int -> StdGen -> Environment -> QLearner -> State -> ((QLearner, State), StdGen)
moveLearner times g env qlearner Stop = ((qlearner, Stop), g)
moveLearner times g (Environment execute' possible') (QLearner alpha gamma epsilon qtable) (State s) =
let epRet = checkEpsilon g epsilon times
execute = unwrapExecute execute'
possible = unwrapPossible possible'
doRandom = fst $ epRet
g' = snd $ epRet in
if doRandom then let randRet = qRandomIter g execute possible s qtable
iter = fst randRet
g'' = snd randRet
qtable' = fst iter
state' = snd iter in
((QLearner alpha gamma epsilon qtable', State state'), g'')
else let iter = qLearnIter execute possible s qtable
qtable' = fst iter
state' = snd iter in
((QLearner alpha gamma epsilon qtable', State state'), g')
moveLearnerAndPrint :: Int -> StdGen -> Environment -> QLearner -> State -> IO ((QLearner, State), StdGen)
moveLearnerAndPrint times g env qlearner Stop = do
putStrLn "Stop state."
return ((qlearner, Stop), g)
moveLearnerAndPrint times g env qlearner state = do
let iter = moveLearner times g env qlearner state
g' = snd iter
qlearner' = fst $ fst iter
state' = snd $ fst iter
putStrLn $ (++) "Reached: " $ show state'
putStrLn $ prettyPrintQ $ qGrid qlearner'
return ((qlearner', state'), g')
moveLearnerPrintRepeat :: Int -> StdGen -> Environment -> QLearner -> State -> IO ()
moveLearnerPrintRepeat _ _ _ _ Stop = putStrLn "Stopped repeating due to stop state."
moveLearnerPrintRepeat 0 g env qlearner state = putStrLn "Done."
moveLearnerPrintRepeat numTimes g env qlearner state = do
moveRet <- moveLearnerAndPrint numTimes g env qlearner state
let g' = snd moveRet
qlearner' = fst $ fst moveRet
state' = snd $ fst moveRet
moveLearnerPrintRepeat (numTimes 1) g' env qlearner' state'
maxSpaceRow :: V.Vector Double -> Int
maxSpaceRow vec = if V.null vec
then 0
else max (length $ showGFloat (Just 2) (V.head vec) "") (maxSpaceRow $ V.tail vec)
maxSpaceMat :: V.Vector (V.Vector Double) -> Int
maxSpaceMat mat = if V.null mat
then 0
else max (maxSpaceRow $ V.head mat) (maxSpaceMat $ V.tail mat)
padSpaces :: Int -> String -> String
padSpaces space str = str ++ replicate (space (length str)) ' '
prettyPrintRow :: Int -> V.Vector Double -> String
prettyPrintRow space row = if V.null row
then ""
else (padSpaces space $ showGFloat (Just 2) (V.head row) "") ++ " " ++ (prettyPrintRow space $ V.tail row)
prettyPrintQ' :: Int -> V.Vector (V.Vector Double) -> String
prettyPrintQ' space mat = if V.null mat
then ""
else (prettyPrintRow space $ V.head mat) ++ "\n" ++ (prettyPrintQ' space $ V.tail mat)
prettyPrintQ :: V.Vector (V.Vector Double) -> String
prettyPrintQ mat = let space = maxSpaceMat mat in prettyPrintQ' space mat
createZeroQ :: Int -> Int -> V.Vector (V.Vector Double)
createZeroQ s a = V.generate s (\n -> V.replicate a 0.0)
updateQRow :: Int -> Double -> V.Vector Double -> V.Vector Double
updateQRow index value q_row = q_row V.// [(index, value)]
indexQ :: Int -> Int -> V.Vector (V.Vector Double) -> Double
indexQ s a q = q V.! s V.! a
multIndex row (index:indices) = (row V.! index) : []
unwrapMaybe (Just a) = a
unwrapMaybe Nothing = 0
maxAction :: (Int -> [Int]) -> Int -> V.Vector (V.Vector Double) -> Int
maxAction possible s q = let possibleActions = possible s
possibleValues = map (\action -> q V.! s V.! action) possibleActions
in possibleActions !! (unwrapMaybe $ elemIndex (maximum possibleValues) possibleValues)
randomAction :: StdGen -> (Int -> [Int]) -> Int -> V.Vector (V.Vector Double) -> (Int, StdGen)
randomAction g possible s q = let possibleActions = possible s
randomRet = randomR (0, length possibleActions 1) g in
(possibleActions !! (fst randomRet), snd randomRet)
maxActionValue :: Int -> V.Vector (V.Vector Double) -> Double
maxActionValue s q = V.maximum (q V.! s)
updatedQ :: Int -> Int -> Double -> Int -> Double -> Double -> V.Vector (V.Vector Double) -> V.Vector (V.Vector Double)
updatedQ s a r s' gamma alpha q = q V.// [(s, updateQRow a updatedValue $ q V.! s)] where
updatedValue = (indexQ s a q) + alpha * (r + gamma * (maxActionValue s' q) (indexQ s a q))
createRewardTable :: Int -> Int -> V.Vector (V.Vector Double)
createRewardTable s a = V.generate s (\n -> V.replicate a 0.0)
createGrid :: Int -> V.Vector (V.Vector Double)
createGrid s = createRewardTable s s
qLearnIter :: (Int -> Int -> (Int, Double)) -> (Int -> [Int]) -> Int -> V.Vector (V.Vector Double) -> (V.Vector (V.Vector Double), Int)
qLearnIter execute possible state q = let action = maxAction possible state q
retExec = execute state action
state' = fst retExec
reward = snd retExec in (updatedQ state action reward state' 0.8 0.4 q, state')
qRandomIter :: StdGen -> (Int -> Int -> (Int, Double)) -> (Int -> [Int]) -> Int -> V.Vector (V.Vector Double) -> ((V.Vector (V.Vector Double), Int), StdGen)
qRandomIter g execute possible state q = let randomRet = randomAction g possible state q
action = fst randomRet
g' = snd randomRet
retExec = execute state action
reward = snd retExec
state' = fst retExec in ((updatedQ state action reward state' 0.8 0.4 q, state'), g')
linearTo2D :: Int -> Int -> Int -> (Int, Int)
linearTo2D rows cols lin_index = (lin_index `div` cols, (lin_index `mod` cols))
twoDToLinear :: Int -> Int -> (Int, Int) -> Int
twoDToLinear rows cols (r, c) = (r * cols) + c
applyGridAction :: Int -> Int -> Int -> Int -> Int
applyGridAction rows cols state 0 = let state2DIndex = linearTo2D rows cols state
state2DIndex' = (fst state2DIndex 1, (snd state2DIndex))
in twoDToLinear rows cols state2DIndex'
applyGridAction rows cols state 1 = let state2DIndex = linearTo2D rows cols state
state2DIndex' = (fst state2DIndex + 1, snd state2DIndex)
in twoDToLinear rows cols state2DIndex'
applyGridAction rows cols state 2 = let state2DIndex = linearTo2D rows cols state
state2DIndex' = (fst state2DIndex, snd state2DIndex 1)
in twoDToLinear rows cols state2DIndex'
applyGridAction rows cols state 3 = let state2DIndex = linearTo2D rows cols state
state2DIndex' = (fst state2DIndex, snd state2DIndex + 1)
in twoDToLinear rows cols state2DIndex'
applyGridAction rows cols state _ = 1
executeGrid :: V.Vector (V.Vector Double) -> State -> Action -> (State, Reward)
executeGrid grid (State state) (Action action) = let exRet = executeOnGrid grid state action
in (State $ fst exRet, Reward $ snd exRet)
executeOnGrid :: V.Vector (V.Vector Double) -> Int -> Int -> (Int, Double)
executeOnGrid grid state action = let rows = V.length $ grid
cols = V.length $ (grid V.! 0)
coord = linearTo2D rows cols state
reward = grid V.! (fst coord) V.! (snd coord)
state' = applyGridAction rows cols state action
in (state', reward)
gridFromList :: [[Double]] -> V.Vector (V.Vector Double)
gridFromList (list:[]) = V.fromList [V.fromList list]
gridFromList (list:lists) = V.cons (V.fromList list) (gridFromList lists)
testGrid :: V.Vector (V.Vector Double)
testGrid = gridFromList [[1.0,2.0,3.0,4.0],
[5.0,6.0,7.0,8.0],
[12.0,11.0,10.0,9.0],
[13.0,14.0,15.0,16.0]]
gridPossibleX i j rows cols
| j <= 0 = [3]
| j >= rows1 = [2]
| otherwise = [2,3]
gridPossibleY i j rows cols
| i <= 0 = [1]
| i >= cols1 = [0]
| otherwise = [0, 1]
possibleGrid :: V.Vector (V.Vector Double) -> State -> [Action]
possibleGrid grid (State state) = map (\x -> Action x) $ gridPossible grid state
gridPossible :: V.Vector (V.Vector Double) -> Int -> [Int]
gridPossible grid state = let rows = V.length grid
cols = V.length $ (grid V.! 0)
i = fst $ linearTo2D rows cols state
j = snd $ linearTo2D rows cols state
in (gridPossibleX i j rows cols) ++ (gridPossibleY i j rows cols)
qPrint grid times s q = do
putStrLn $ (++) "Original state: " $ show $ s
let iter = qLearnIter (executeOnGrid grid) (gridPossible grid) s q
let qgrid = fst $ iter
let state = snd $ iter
putStrLn $ prettyPrintQ $ qgrid
putStrLn $ (++) "State: " $ show $ state
qPrint grid (times 1) state qgrid
checkEpsilon :: StdGen -> (Int -> Double) -> Int -> (Bool, StdGen)
checkEpsilon g epsilon times = let randRet = randomR (0, 1) g
randVal = fst randRet
g' = snd randRet in
if randVal < (epsilon times) then (True, g') else (False, g')
pick (x, y) v = if v then x else y
qEpsilonPrint :: StdGen -> (Int -> Double) -> V.Vector (V.Vector Double) -> Int -> Int -> V.Vector (V.Vector Double) -> IO ()
qEpsilonPrint g epsilon grid 0 s q = putStrLn "Done!"
qEpsilonPrint g epsilon grid times s q = do
let execute = executeOnGrid grid
possible = gridPossible grid
epRet = checkEpsilon g epsilon times
doRandom = fst $ epRet
g' = snd $ epRet in
if doRandom
then do
putStrLn "Doing a random action!"
let randomRet = qRandomIter g' execute possible s q
let iter = fst randomRet
let g'' = snd randomRet
let qgrid = fst $ iter
let state = snd $ iter
putStrLn $ prettyPrintQ $ qgrid
putStrLn $ (++) "State: " $ show $ state
qEpsilonPrint g'' epsilon grid (times 1) state qgrid
else do
putStrLn "Doing a normal action"
putStrLn $ (++) "Original state: " $ show $ s
let iter = qLearnIter (executeOnGrid grid) (gridPossible grid) s q
let qgrid = fst $ iter
let state = snd $ iter
putStrLn $ prettyPrintQ $ qgrid
putStrLn $ (++) "State: " $ show $ state
qEpsilonPrint g' epsilon grid (times 1) state qgrid
epsilon :: Int -> Int -> Double
epsilon totalTimes timesLeft = 1