{- Copyright (C) 2015 Leon Medvinsky This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} {-| Module : Neet.Genome Description : Encodings NEAT genomes Copyright : (c) Leon Medvinsky, 2015 License : GPL-3 Maintainer : lmedvinsky@hotmail.com Stability : experimental Portability : ghc -} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DefaultSignatures #-} {-# LANGUAGE MultiWayIf #-} module Neet.Genome ( -- * Genes NodeId(..) , NodeType(..) , NodeGene(..) , ConnGene(..) , InnoId(..) , ConnSig -- * Genome , Genome(..) -- ** Construction , fullConn , sparseConn -- ** Stats , genomeComplexity -- ** Breeding , mutateAdd , mutateSub , crossover , breed -- ** Distance , distance -- ** Fitness , GenScorer(..) -- ** Visualization , renderGenome , printGenome -- ** Debugging , validateGenome ) where import Control.Applicative import Control.Monad import Control.Monad.Random import Control.Arrow (first) import Data.Map.Strict (Map) import qualified Data.Traversable as T import qualified Data.Foldable as F import qualified Data.Map.Strict as M import qualified Data.IntSet as IS import qualified Data.Set as S import qualified Data.IntMap as IM import Data.IntMap (IntMap) import Data.Maybe import Control.Monad.Fresh.Class import Neet.Parameters import Data.GraphViz import Data.GraphViz.Attributes.Complete import GHC.Generics (Generic) import Data.Serialize (Serialize) import Text.Printf -- | The IDs node genes use to refer to nodes. newtype NodeId = NodeId { getNodeId :: Int } deriving (Show, Eq, Ord, PrintDot, Serialize) -- | Types of nodes data NodeType = Input | Hidden | Output deriving (Show, Eq, Generic) instance Serialize NodeType -- | Node genes data NodeGene = NodeGene { nodeType :: NodeType , yHint :: Rational -- ^ A hint for recurrency } deriving (Show, Generic) instance Serialize NodeGene -- | Connection genes data ConnGene = ConnGene { connIn :: NodeId , connOut :: NodeId , connWeight :: Double , connEnabled :: Bool , connRec :: Bool -- ^ A hint for recurrency } deriving (Show, Generic) instance Serialize ConnGene -- | Innovation IDs newtype InnoId = InnoId { getInnoId :: Int } deriving (Show, Eq, Ord) -- | A NEAT genome. The innovation numbers are stored in here, and not the genes, -- to prevent data duplication. data Genome = Genome { ioCount :: Int , nodeGenes :: IntMap NodeGene , connGenes :: IntMap ConnGene , nextNode :: NodeId } deriving (Show, Generic) instance Serialize Genome -- | Takes the number of inputs, the number of outputs, and gives a genome with -- the inputs fully connected to the outputs with random weights. The order of -- the connections are deterministic, so when generating a population, you -- can just start the innovation number at (iSize + 1) * oSize, since the network -- includes an additional input for the bias. fullConn :: MonadRandom m => MutParams -> Int -> Int -> m Genome fullConn MutParams{..} iSize oSize = do let inCount = iSize + 1 inIDs = [1..inCount] outIDs = [inCount + 1..oSize + inCount] inputGenes = zip inIDs $ repeat (NodeGene Input 0) outputGenes = zip outIDs $ repeat (NodeGene Output 1) nodeGenes = IM.fromList $ inputGenes ++ outputGenes nextNode = NodeId $ inCount + oSize + 1 nodePairs = (,) <$> inIDs <*> outIDs ioCount = inCount + oSize conns <- zipWith (\(inN, outN) w -> ConnGene (NodeId inN) (NodeId outN) w True False) nodePairs `liftM` getRandomRs (-weightRange,weightRange) let connGenes = IM.fromList $ zip [1..] conns return $ Genome{..} -- | Like 'fullConn', but with only some input-outputs connected. First integer -- parameters is the max number of connections to start with. sparseConn :: MonadRandom m => MutParams -> Int -> Int -> Int -> m Genome sparseConn MutParams{..} cons iSize oSize = do let inCount = iSize + 1 inIDs = [1..inCount] outIDs = [inCount + 1..oSize + inCount] inputGenes = zip inIDs $ repeat (NodeGene Input 0) outputGenes = zip outIDs $ repeat (NodeGene Output 1) nodeGenes = IM.fromList $ inputGenes ++ outputGenes nextNode = NodeId $ inCount + oSize + 1 nodePairs = (,) <$> inIDs <*> outIDs idNodePairs = zip [1..] nodePairs ioCount = inCount + oSize conPairs <- replicateM cons (uniform idNodePairs) conns <- zipWith (\(inno,(inN, outN)) w -> (inno, ConnGene (NodeId inN) (NodeId outN) w True False)) conPairs `liftM` getRandomRs (-weightRange, weightRange) let connGenes = IM.fromList conns return $ Genome{..} -- | Mutate the weights - perturb or make entirely new weights mutateWeights :: MonadRandom m => MutParams -> Genome -> m Genome mutateWeights MutParams{..} gen@Genome{..} = do roll <- getRandomR (0,1) if roll > mutWeightRate then return gen else setConns gen `liftM` T.mapM mutOne connGenes where setConns g cs = g { connGenes = cs } mutOne conn = do roll <- getRandomR (0,1) let newWeight | roll <= newWeightRate = getRandomR (-weightRange,weightRange) | otherwise = do pert <- getRandomR (-pertAmount,pertAmount) return $ connWeight conn + pert w <- newWeight return $ conn { connWeight = w } -- | Signature of a connection, used in matching innovations fromthe same generation. data ConnSig = ConnSig NodeId NodeId deriving (Show, Eq, Ord) -- | Get a 'ConnSig' toConnSig :: ConnGene -> ConnSig toConnSig gene = ConnSig (connIn gene) (connOut gene) -- | Adds a single connection, updating the innovation context addConn :: MonadFresh InnoId m => ConnGene -> (Map ConnSig InnoId, IntMap ConnGene) -> m (Map ConnSig InnoId, IntMap ConnGene) addConn conn (innos, conns) = case M.lookup siggy innos of Just inno -> return (innos, IM.insert (getInnoId inno) conn conns) Nothing -> do nI@(InnoId newInno) <- fresh return (M.insert siggy nI innos, IM.insert newInno conn conns) where siggy = toConnSig conn -- | Mutation of additional connection. 'Map' parameter is context of previous -- innovations. This could be global, or per species generation. mutateConn :: (MonadFresh InnoId m, MonadRandom m) => MutParams -> Map ConnSig InnoId -> Genome -> m (Map ConnSig InnoId, Genome) mutateConn params innos g = do roll <- getRandomR (0,1) if roll > addConnRate params then return (innos, g) else case allowed of [] -> return (innos, g) _ -> do (innos', conns') <- addRandConn innos (connGenes g) return $ (innos', g { connGenes = conns' }) where -- | Which connections are already filled up by genes. Value is a dummy -- value because taken is only used in difference anyway. taken :: Map ConnSig Bool taken = M.fromList . map (\c -> (toConnSig c, True)) . IM.elems . connGenes $ g -- | Whether a gene is an input gene notInput (NodeGene Input _) = False notInput _ = True -- | The genome's nodes, in an assoc list nodes = IM.toList $ nodeGenes g -- | Nodes that are not input nonInputs = filter (notInput . snd) nodes -- | Make a pair of 'ConnSig' and the recurrentness makePair (n1,g1) (n2,g2) = (ConnSig (NodeId n1) (NodeId n2), yHint g2 <= yHint g1) -- | Possible input -> output pairs candidates = M.fromList $ if recurrencies params then makePair <$> nodes <*> nonInputs else filter nonRec $ makePair <$> nodes <*> nonInputs nonRec (_,reccy) = not reccy -- | Which pairs are not taken allowed = M.toList $ M.difference candidates taken -- | Picks one of the available pairs pickOne :: MonadRandom m => m (ConnSig, Bool) pickOne = uniform allowed pickWeight :: MonadRandom m => m Double pickWeight = let r = weightRange params in getRandomR (-r,r) -- | Randomly chooses one of the available connections and creates a -- gene for it addRandConn :: (MonadRandom m, MonadFresh InnoId m) => Map ConnSig InnoId -> IntMap ConnGene -> m (Map ConnSig InnoId, IntMap ConnGene) addRandConn innos' conns = do (ConnSig inNode outNode, recc) <- pickOne w <- pickWeight let newConn = ConnGene inNode outNode w True recc addConn newConn (innos',conns) -- | Mutation of additional node. mutateNode :: (MonadRandom m, MonadFresh InnoId m) => MutParams -> Map ConnSig InnoId -> Genome -> m (Map ConnSig InnoId, Genome) mutateNode params innos g = do roll <- getRandomR (0,1) if roll <= addNodeRate params then addRandNode else return (innos, g) where conns = connGenes g nodes = nodeGenes g -- | Pick one of the 'InnoId' 'ConnGene' pairs from conns pickConn :: MonadRandom m => m (Int, ConnGene) pickConn = uniform $ IM.toList conns -- | What will the new node's ID be newId = nextNode g -- | What should 'nextNode' be updated to newNextNode = case newId of NodeId x -> NodeId (x + 1) -- | Takes a connection gene and its associated InnoID, and splits -- it with a node addNode :: MonadFresh InnoId m => InnoId -> ConnGene -> m (Map ConnSig InnoId, Genome) addNode inno gene = do let ConnSig (NodeId inId) (NodeId outId) = toConnSig gene -- | Gene of the input node of this connection inGene = nodes IM.! inId -- | Gene of the output node of this connection outGene = nodes IM.! outId -- | The new node gene newGene = NodeGene Hidden ((yHint inGene + yHint outGene) / 2) -- | The new map of nodes, after inserting the new one newNodes = IM.insert (getNodeId newId) newGene nodes -- | The disabled version of the old connection disabledConn = gene { connEnabled = False } -- | The gene for the connection between the input and the new node backGene = ConnGene (NodeId inId) newId 1 True (connRec gene) -- | The gene for the connection between the new node and the output forwardGene = ConnGene newId (NodeId outId) (connWeight gene) True (connRec gene) (innos', newConns) <- addConn backGene >=> addConn forwardGene $ (innos, conns) return $ (innos', g { nodeGenes = newNodes , connGenes = IM.insert (getInnoId inno) disabledConn newConns , nextNode = newNextNode }) -- | Pick an available connection randomly and make a gene for it addRandNode :: (MonadRandom m, MonadFresh InnoId m) => m (Map ConnSig InnoId, Genome) addRandNode = pickConn >>= uncurry (addNode . InnoId) isOrphanNode :: NodeId -> IntMap ConnGene -> Bool isOrphanNode nId imap = F.all doesntContain imap where doesntContain ConnGene{..} = nId /= connIn && nId /= connOut -- | Mutation that can delete a connection mutDelConn :: MonadRandom m => MutParams -> Genome -> m Genome mutDelConn MutParams{..} genome@Genome{..} = do roll <- getRandomR (0,1) if IM.size connGenes <= 1 || roll > delConnChance then return genome else do (connId, deleteThis) <- uniform $ IM.toList connGenes let newConns = IM.delete connId connGenes inOfDeleted = connIn deleteThis outOfDeleted = connOut deleteThis inIsHidden = nodeType (nodeGenes IM.! (getNodeId inOfDeleted)) == Hidden outIsHidden = nodeType (nodeGenes IM.! (getNodeId outOfDeleted)) == Hidden possiblyRemoved | inIsHidden && isOrphanNode inOfDeleted newConns = IM.delete (getNodeId inOfDeleted) nodeGenes | otherwise = nodeGenes possiblyRemoved2 | outIsHidden && isOrphanNode outOfDeleted newConns = IM.delete (getNodeId outOfDeleted) possiblyRemoved | otherwise = possiblyRemoved return genome { nodeGenes = possiblyRemoved2, connGenes = newConns } doesntContainNode :: NodeId -> ConnGene -> Bool doesntContainNode nId cg = connIn cg /= nId && connOut cg /= nId -- | Mutation that may delete a node mutDelNode :: MonadRandom m => MutParams -> Genome -> m Genome mutDelNode MutParams{..} genome@Genome{..} = do roll <- getRandomR (0,1) if | IM.size nodeGenes == ioCount -> return genome | roll > delNodeChance -> return genome | otherwise -> do deleteThis <- uniform delCandidates let newNodes = IM.delete deleteThis nodeGenes newConns = IM.filter (doesntContainNode (NodeId deleteThis)) connGenes return $ genome { nodeGenes = newNodes, connGenes = newConns } where delCandidates = drop ioCount $ IM.keys nodeGenes -- | Mutates the genome, but uses subtractive mutations instead of additive. mutateSub :: MonadRandom m => MutParams -> Genome -> m Genome mutateSub params = mutDelNode params >=> mutDelConn params >=> mutateWeights params -- | Mutates the genome, using the specified parameters and innovation context. mutateAdd :: (MonadRandom m, MonadFresh InnoId m) => MutParams -> Map ConnSig InnoId -> Genome -> m (Map ConnSig InnoId, Genome) mutateAdd params innos g = do g' <- mutateWeights params g uncurry (mutateNode params) >=> uncurry (mutateConn params) $ (innos, g') -- | Super left biased merge -- loners on the right map don't get in superLeft :: (a -> b -> c) -> (a -> c) -> IntMap a -> IntMap b -> IntMap c superLeft comb mk = IM.mergeWithKey (\_ a b -> Just $ comb a b) (IM.map mk) (const IM.empty) -- | Choose between two alternatives with coin chance flipCoin :: MonadRandom m => a -> a -> m a flipCoin a1 a2 = do roll <- getRandom return $ if roll then a1 else a2 -- | Crossover on just the connections. Put the fittest map first. crossConns :: MonadRandom m => MutParams -> IntMap ConnGene -> IntMap ConnGene -> m (IntMap ConnGene) crossConns params m1 m2 = T.sequence $ superLeft flipConn return m1 m2 where flipConn c1 c2 = do if connEnabled c1 && connEnabled c2 then flipCoin c1 c2 else do c <- flipCoin c1 c2 roll <- getRandomR (0,1) let enabled | roll <= disableChance params = False | otherwise = True return c { connEnabled = enabled } -- | Crossover on just nodes crossNodes :: IntMap NodeGene -> IntMap NodeGene -> IntMap NodeGene crossNodes m1 m2 = superLeft (\a _ -> a) id m1 m2 -- | Crossover. The first argument is the fittest genome. crossover :: MonadRandom m => MutParams -> Genome -> Genome -> m Genome crossover params g1 g2 = Genome (ioCount g1) newNodes `liftM` newConns `ap` return newNextNode where newNextNode = max (nextNode g1) (nextNode g2) newConns = crossConns params (connGenes g1) (connGenes g2) newNodes = crossNodes (nodeGenes g1) (nodeGenes g2) -- | Breed two genomes together breed :: (MonadRandom m, MonadFresh InnoId m) => MutParams -> Map ConnSig InnoId -> Genome -> Genome -> m (Map ConnSig InnoId, Genome) breed params innos g1 g2 = crossover params g1 g2 >>= mutateAdd params innos -- | Gets differences where they exist differences :: IntMap ConnGene -> IntMap ConnGene -> IntMap Double differences = IM.mergeWithKey (\_ c1 c2 -> Just $ oneDiff c1 c2) (const IM.empty) (const IM.empty) where oneDiff c1 c2 = abs $ connWeight c1 - connWeight c2 -- | Genetic distance between two genomes distance :: Parameters -> Genome -> Genome -> Double distance params g1 g2 = c1 * exFactor + c2 * disFactor + c3 * weightFactor where DistParams c1 c2 c3 _ = distParams . specParams $ params conns1 = connGenes g1 conns2 = connGenes g2 weightDiffs = differences conns1 conns2 weightsSize = IM.size weightDiffs weightFactor | weightsSize > 0 = IM.foldl (+) 0 weightDiffs / fromIntegral weightsSize | otherwise = 0 ids1 = IM.keysSet conns1 ids2 = IM.keysSet conns2 -- | The lower of the top bounds of innovation numbers edge = min (IS.findMax ids1) (IS.findMax ids2) -- | Excess and Disjoint exJoints = (ids1 `IS.difference` ids2) `IS.union` (ids2 `IS.difference` ids1) (excess, disjoint) = IS.partition (>= edge) exJoints exFactor = fromIntegral $ IS.size excess disFactor = fromIntegral $ IS.size disjoint graphParams :: GraphvizParams NodeId NodeGene Double Rational Rational graphParams = Params { isDirected = True , globalAttributes = [ GraphAttrs [ RankDir FromLeft , Splines LineEdges ] , NodeAttrs [ FixedSize SetNodeSize ] ] , clusterBy = categorizer , isDotCluster = const True , clusterID = iderizer , fmtCluster = clusterizer , fmtNode = const [] , fmtEdge = \(_,_,w) -> [ toLabel $ (printf "%.2f" w :: String) ] } where categorizer (nId, ng) = C (yHint ng) (N (nId, yHint ng)) iderizer 0 = Str "Input Layer" iderizer 1 = Str "Output Layer" iderizer rat = Num (Dbl $ fromRational rat) whiteAttr = Color [WC (X11Color White) Nothing] blueAttr = Color [WC (X11Color Blue4) Nothing ] redAttr = Color [WC (X11Color Red2) Nothing ] greenAttr = Color [WC (X11Color SeaGreen) Nothing ] solidAttr = Style [ SItem Solid [] ] circAttr = Shape Circle clusterizer 0 = [ GraphAttrs [ whiteAttr, rank MinRank ] , NodeAttrs [ solidAttr, blueAttr, circAttr ] ] clusterizer 1 = [ GraphAttrs [ whiteAttr, rank MaxRank ] , NodeAttrs [ solidAttr, redAttr, circAttr ] ] clusterizer _ = [ GraphAttrs [ whiteAttr ] , NodeAttrs [ solidAttr, greenAttr, circAttr ] ] -- | This graph produced is ugly and janky and will have bugs, like hidden nodes -- occasionally appearing with output nodes, and weird clustering overall. If you -- see some problems in the graph, confirm with the Show instance or something -- else that there really is a problem. renderGenome :: Genome -> IO () renderGenome g = runGraphvizCanvas Dot graph Xlib where nodes = map (first NodeId) . IM.toList . nodeGenes $ g edges = mapMaybe mkEdge . IM.elems . connGenes $ g mkEdge ConnGene{..} = if connEnabled then Just (connIn, connOut, connWeight) else Nothing graph = graphElemsToDot graphParams nodes edges -- | A nicer way to display a 'Genome' than the Show instance. printGenome :: Genome -> IO () printGenome g = putStrLn $ unlines stuff where unwrap (NodeId x) = x eText True = "" eText False = "(Disabled)" stuff = [header, nHeader] ++ nInfo ++ [cHeader] ++ cInfo header = "Genetic Info:" nHeader = "Nodes:" nInfo = map mkNInfo . IM.toList $ nodeGenes g mkNInfo (x, NodeGene t _) = show x ++ "(" ++ show t ++ ")" cHeader = "\n\nConnections:" cInfo = map mkCInfo . IM.toList $ connGenes g mkCInfo (i, ConnGene{..}) = "\nInnovation " ++ show i ++ "\nConnection from " ++ show (unwrap connIn) ++ " to " ++ show (unwrap connOut) ++ " " ++ eText connEnabled ++ " with weight " ++ show connWeight -- | Parameters for search. The type parameter determines the intermediate -- type for determining if a solution is valid. data GenScorer score = GS { gScorer :: Genome -> score -- ^ Scoring function , fitnessFunction :: score -> Double -- ^ Convert the score to a fitness , winCriteria :: score -> Bool -- ^ Determines if a result is win } uniq :: Ord a => [a] -> Bool uniq = go S.empty where go _ [] = True go set (x:xs) = not (S.member x set) && go (S.insert x set) xs -- | Validates a 'Genome', returning Nothing on success. validateGenome :: Genome -> Maybe [String] validateGenome Genome{..} = case errRes of [] -> Nothing xs -> Just xs where nodeOk = case IM.maxViewWithKey nodeGenes of Nothing -> Nothing Just ((nid,_), _) | nid < getNodeId nextNode -> Nothing | otherwise -> Just "NodeId too low" connOk (ConnSig (NodeId n1) (NodeId n2)) | IM.member n1 nodeGenes && IM.member n2 nodeGenes = Nothing | otherwise = Just "Connection gene between nonexistent nodes" connsOk = join . listToMaybe $ map connOk sigList sigList = map toConnSig . IM.elems $ connGenes nonDup | uniq sigList = Nothing | otherwise = Just "Non unique connection signatures" ioCountGood | IM.size (IM.filter (\n -> nodeType n == Input || nodeType n == Output) nodeGenes) == ioCount = Nothing | otherwise = Just "ioCount bad" errRes = catMaybes [nodeOk, connsOk, nonDup, ioCountGood] -- | Total number of links and nodes. genomeComplexity :: Genome -> Int genomeComplexity gen = IM.size (nodeGenes gen) + IM.size (nodeGenes gen)