-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Gene Expression Programming evolutionary algorithm in Haskell -- -- Gene Expression Programming evolutionary algorithm implemented in -- Haskell. @package HSGEP @version 0.1.4 -- | GEP parameters. These are related to both population management, -- selection, and rates of genetic operators. The rates are a set of -- probabilities of each operator being applied during each step of the -- selection and reproduction phase. -- -- Author: mjsottile@computer.org module GEP.Params -- | The Rates structure is used to hold the probability of various events -- occurring during the evolution of the GEP algorithm. data Rates Rates :: Double -> Double -> Double -> Double -> Double -> Double -> Double -> Rates -- | Probability of any single symbol being mutated per individual pMutate :: Rates -> Double -- | Probability of an individual experiencing insertion sequence -- transposition pIS :: Rates -> Double -- | Probability of an individual experiencing root insertion sequence -- transposition pRIS :: Rates -> Double -- | Probability of an individual experiencing gene transposition pGT :: Rates -> Double -- | Probability of a 1pt recombination event p1R :: Rates -> Double -- | Probability of a 2pt recombination event p2R :: Rates -> Double -- | Probability of a gene recombination event pGR :: Rates -> Double -- | The SimParams structure reprents the parameters for a run of the GEP -- algorithm. This includes gross parameters unrelated to individuals -- such as the population size, parameters related to selection, and -- parameters related to specific genetic operators. data SimParams SimParams :: Int -> Double -> Double -> Int -> Double -> Int -> Int -> SimParams -- | Population size popSize :: SimParams -> Int -- | Exponent for defining the roulette wheel bin sizes rouletteExponent :: SimParams -> Double -- | Fitness of the ideal individual maxFitness :: SimParams -> Double -- | Number of generations to run the algorithm for numGenerations :: SimParams -> Int -- | Parameter m for fitness value computation from the GEP paper. selectionRange :: SimParams -> Double -- | Maximum length of an IS transpose seq. maxISLen :: SimParams -> Int -- | Maximum length of an RIS transpose seq. maxRISLen :: SimParams -> Int instance Show Rates instance Show SimParams -- | Monad based on state for passing random number state around for GEP. -- The choice of Mersenne.Pure64 was for performance, and the pure -- version will play nicely with threading. -- -- Author: mjsottile@computer.org module GEP.Rmonad type GEPMonad a = Rand a -- | Generate a random number as a Double between 0.0 and the given upper -- bound. nextF :: Double -> Rand Double -- | Generate a random integer between 1 and the upper bound (inclusive). nextR :: Int -> Rand Int -- | Generate a random integer in the specified range that is NOT equal to -- the integer provided. nextRDifferent :: Int -> Int -> Rand Int -- | Generate a list of random integers. nextRList :: Int -> Int -> Rand [Int] -- | Generate a list of n random integers such that each entry occurs at -- most once. Each number in the list must be unique. nextRListUnique :: Int -> [Int] -> Int -> Rand [Int] nextRListPairs :: Int -> Int -> Rand [(Int, Int)] -- | Document me! generatePairs :: Int -> Rand [(Int, Int)] runRmonad :: Rand a -> PureMT -> (a, PureMT) -- | This module defines the types used for implementing GEP problems and -- operations. A few functions are also provided for convenience here for -- performing common operations. module GEP.Types -- | Data type representing a genome. The genome contains all necessary -- parameters to interpret a chromosome. These include the alphabet -- (split between terminal and nonterminal characters), connective -- characters for multi-gene chromosomes, the maximum arity of any -- nonterminal, the length of the head of a gene, and the number of genes -- per chromosome. data Genome Genome :: [Symbol] -> [Symbol] -> Symbol -> Int -> Int -> Int -> Genome -- | Set of terminal symbols terminals :: Genome -> [Symbol] -- | Set of nonterminal symbols nonterminals :: Genome -> [Symbol] -- | Symbol connecting genes in a chromosome geneConnector :: Genome -> Symbol -- | Highest arity nonterminal function maxArity :: Genome -> Int -- | Length of gene head sequence headLength :: Genome -> Int -- | Number of genes per chromosome numGenes :: Genome -> Int -- | A symbol in a chromosome type Symbol = Char -- | A sequence of symbols not neccessaryly a gene or chromosome. Used in -- gene operations. type Sequence = [Char] -- | A gene in a chromosome is a list of symbols type Gene = Sequence -- | A chromosome is a list of symbols. We avoided using a list of genes to -- maintain the view of a chromosome as nothing more than a flattened, -- linear sequence of genes. type Chromosome = Sequence -- | Symbol table used for fitness tests. We assume that there is exactly -- one pair per symbol. If there are symbols missing, fitness testing may -- fail (the library does not have facilities yet to allow for default -- values). If a symbol occurs multiple times in the symbol table, no -- guarantee is provided for which value will be chosen. type SymTable a = [(Symbol, a)] -- | Function to express an individual into a list of ET structures type ExpressionFunction a = Chromosome -> Genome -> a -- | Return the length of the tail of a gene for a given genome tailLength :: Genome -> Int -- | Return length of a gene (tail + head) for a given genome geneLength :: Genome -> Int -- | Given a genome, provide the list of all symbols possible in a -- chromosome. This is just nonterminals ++ terminals. allsymbols :: Genome -> [Symbol] -- | Fracture a chromosome into a set of genes chromToGenes :: Chromosome -> Int -> [Gene] -- | Assemble a chromosome from a set of genes genesToChrom :: [Gene] -> Chromosome -- | Test if a symbol is a nonterminal isNonterminal :: Symbol -> Genome -> Bool instance Show Genome -- | Operations on the chromosomes of individuals. The following -- assumptions are made. -- -- -- -- The functions provided in this module are purely functional. See -- GEP.MonadicGeneOperations for code that invokes these from -- within the GEP.Rmonad monad. module GEP.GeneOperations -- | One-point crossover crossover1pt :: (Chromosome, Chromosome) -> Int -> (Chromosome, Chromosome) -- | Two-point crossover crossover2pt :: (Chromosome, Chromosome) -> Int -> Int -> (Chromosome, Chromosome) -- | Gene crossover crossoverGene :: (Sequence, Sequence) -> Int -> Int -> (Sequence, Sequence) -- | Gene transposition. transposeGene :: Chromosome -> Genome -> Int -> Chromosome -- | Insertion sequence transposition. transposeIS :: Chromosome -> Genome -> Int -> Int -> Int -> Int -> Chromosome -- | Root insertion sequence transposition. transposeRIS :: Sequence -> Genome -> Int -> Int -> Int -> Sequence -- | This module contains wrappers around the purely functional gene -- operations in GEP.GeneOperations in order to string the random -- number generation state through via the GEP.Rmonad. These -- helper functions are responsible for sampling the random number -- generator to determine the parameters for applying the genetic -- operators. -- -- The reasoning behind using a specialized Random monad instead of the -- system generator provided by IO is that this allows independent -- generators to be used should we support multiple threads of execution. -- Parallel random number generation requires distinct generators, not a -- shared one. -- -- Author: mjsottile@computer.org module GEP.MonadicGeneOperations -- | IS Transposition helper isTransposer :: Genome -> SimParams -> Chromosome -> GEPMonad Chromosome -- | RIS Transposition helper risTransposer :: Genome -> SimParams -> Chromosome -> GEPMonad Chromosome -- | Gene transposition helper geneTransposer :: Genome -> Chromosome -> GEPMonad Chromosome -- | One-point crossover helper. Takes a genome, a pair of individuals, and -- selects the crossover point before generating the new pair of -- resulting individuals after crossover. x1PHelper :: Genome -> (Chromosome, Chromosome) -> GEPMonad (Chromosome, Chromosome) -- | Two-point crossover helper. Takes a genome, a pair of individuals, and -- selects the crossover points before generating the new pair of -- resulting individuals after crossover. x2PHelper :: Genome -> (Chromosome, Chromosome) -> GEPMonad (Chromosome, Chromosome) -- | Gene crossover helper. Takes a genome, a pair of individuals, and -- selects the crossover gene before generating the new pair of -- individuals resulting after crossover. xGHelper :: Genome -> (Chromosome, Chromosome) -> GEPMonad (Chromosome, Chromosome) -- | Randomized functions for GEP applications. Attempting to isolate all -- code that needs to be run under the Rmonad here. -- -- Author: mjsottile@computer.org module GEP.Random -- | Select a random symbol from the provided list. randomSymbol :: [a] -> GEPMonad a -- | Select a sequence of random symbols from the provided list. randomSymbolList :: [a] -> Int -> GEPMonad [a] -- | Generate a new individual given a genome specification. newIndividual :: Genome -> Int -> GEPMonad Chromosome -- | Create a population of fresh random individuals given a genome -- |specification. newPopulation :: Genome -> Int -> GEPMonad [Chromosome] mutate :: Genome -> Rates -> Chromosome -> GEPMonad Chromosome -- | Routines for selection after fitness evaluation. Selection is the -- process of taking some input population P, a set of fitness values -- such that each p in P has a fitness score f(p,X) under some fitness -- test X, and selecting which members of P participate in the creation -- of the next population P'. -- -- A common technique is roulette wheel selection. In essence, this means -- that we create a roulette wheel with one slot per individual where the -- width of each slot is a function of the fitness of the individuals. -- So, those individuals with very good fitness will have wide slots and -- a correspondingly high likelihood of selection, while poor fitness -- individuals will have tiny slots and a low probability of being -- selected. -- -- Fitness testing takes place outside this module. This module is only -- concerned with the selection process (ie: generating the roulette -- wheel). -- -- Author: mjsottile@computer.org module GEP.Selection -- | Generate n roulette weights with a generator exponent e. A helper -- function weight_function is used to generate the actual weights. For -- example, w = (k^e)^(-1) for k from 1 to n leads to a set of weights -- such that the size of the slots decreases exponentially as fitness -- decreases. When e=1, this decrease is linear. The list that is -- returned is the width of each slot such that the total of the weights -- adds to 1.0. generate_roulette_weights :: Double -> Double -> [Double] -- | Given a set of roulette weights and a number of spins of the wheel, -- return a list of indices corresponding to the winning slot for each -- spin. This is used to perform the actual selection after a set of -- roulette weights are generated. roulette :: [Double] -> Int -> GEPMonad [Int] -- | Given a list of indices and a list of data elements, create a new list -- of data elements composed of the elements listed in the index list. -- The output list may contain duplicates. selector :: [Int] -> [a] -> [a] -- | Given a set of pairs (f,i) where f is the fitness of the individual i, -- return the pair representing the individual with the best fitness. We -- may return nothing if an empty set is passed in to begin with, so the -- return type is a Maybe pair. getBest :: [(Double, Chromosome)] -> Maybe (Double, Chromosome) -- | Code to read configuration files. -- -- Author: mjsottile@computer.org module GEP.Util.ConfigurationReader readParameters :: String -> IO (Rates, Genome, SimParams) -- | This module contains code related to fitness evaluation. The main -- purpose of the code is to both evaluate fitnesses of individuals and -- to sort individuals by fitness. These are intended to all be higher -- order functions that assume nothing about the purpose of the -- individuals or the types of inputs being used for fitness testing. The -- only assumption made currently is that the outputs for test cases are -- floating point numbers. That likely should change for general purpose -- usage. -- -- mjsottile@computer.org module GEP.Fitness -- | Fitness function type type FitnessFunction a b = a -> TestCase b -> Double -> Double -> Double -- | A test case maps a list of terminals to float values type TestCase a = SymTable a -- | A test dictionary is a set of test cases type TestDict a = [TestCase a] -- | The set of outputs expected for each entry in the test dictionary type TestOuts = [Double] -- | Fitness evaluator for generic individuals. This needs to go away and -- use a more general approach like evaluateFitness above. fitness_tester :: a -> FitnessFunction a b -> TestDict b -> TestOuts -> Double -> Double -- | Given a list of fitness values and a corresponding list of -- individuals, return a list of tuples pairing the fitness value with -- the individuals for only those individuals that have a valid fitness -- value. This means those that are +/- infinity or NaN are removed. fitness_filter :: [Double] -> [Chromosome] -> [(Double, Chromosome)] -- | Sort a set of individuals with fitness values by their fitness sortByFitness :: [(Double, Chromosome)] -> [(Double, Chromosome)] -- | Code representing a single step of the GEP algorithm resides here. -- -- single step of fitness evaluation, selection and reproduction to make -- a new population -- -- process includes: -- --
    --
  1. expression of individuals
  2. --
  3. fitness evaluation
  4. --
  5. filtration to eliminate individuals yielding impossible fitness -- values (infinite or NaN)
  6. --
  7. preservation of best individual
  8. --
  9. generation of roulette selection weights
  10. --
  11. roulette selection of individuals
  12. --
  13. perform mutation operator
  14. --
  15. IS transposition
  16. --
  17. RIS transposition
  18. --
  19. Gene transposition
  20. --
  21. 1Pt recombination
  22. --
  23. 2Pt recombination
  24. --
  25. Gene recombination
  26. --
-- -- Author: mjsottile@computer.org module GEP.TimeStep multiStep :: [Chromosome] -> Genome -> SimParams -> Rates -> ExpressionFunction a -> FitnessFunction a b -> TestDict b -> TestOuts -> Int -> Double -> GEPMonad (Double, [Chromosome]) module GEP.GenericDriver -- | Generic driver to be called from specific GEP program instances in -- their main routine. gepDriver :: SimParams -> Rates -> Genome -> TestDict b -> TestOuts -> FitnessFunction a b -> ExpressionFunction a -> IO (Double, [Chromosome])