-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Gene Expression Programming evolutionary algorithm in Haskell
--
-- Gene Expression Programming evolutionary algorithm implemented in
-- Haskell.
@package HSGEP
@version 0.1.4
-- | GEP parameters. These are related to both population management,
-- selection, and rates of genetic operators. The rates are a set of
-- probabilities of each operator being applied during each step of the
-- selection and reproduction phase.
--
-- Author: mjsottile@computer.org
module GEP.Params
-- | The Rates structure is used to hold the probability of various events
-- occurring during the evolution of the GEP algorithm.
data Rates
Rates :: Double -> Double -> Double -> Double -> Double -> Double -> Double -> Rates
-- | Probability of any single symbol being mutated per individual
pMutate :: Rates -> Double
-- | Probability of an individual experiencing insertion sequence
-- transposition
pIS :: Rates -> Double
-- | Probability of an individual experiencing root insertion sequence
-- transposition
pRIS :: Rates -> Double
-- | Probability of an individual experiencing gene transposition
pGT :: Rates -> Double
-- | Probability of a 1pt recombination event
p1R :: Rates -> Double
-- | Probability of a 2pt recombination event
p2R :: Rates -> Double
-- | Probability of a gene recombination event
pGR :: Rates -> Double
-- | The SimParams structure reprents the parameters for a run of the GEP
-- algorithm. This includes gross parameters unrelated to individuals
-- such as the population size, parameters related to selection, and
-- parameters related to specific genetic operators.
data SimParams
SimParams :: Int -> Double -> Double -> Int -> Double -> Int -> Int -> SimParams
-- | Population size
popSize :: SimParams -> Int
-- | Exponent for defining the roulette wheel bin sizes
rouletteExponent :: SimParams -> Double
-- | Fitness of the ideal individual
maxFitness :: SimParams -> Double
-- | Number of generations to run the algorithm for
numGenerations :: SimParams -> Int
-- | Parameter m for fitness value computation from the GEP paper.
selectionRange :: SimParams -> Double
-- | Maximum length of an IS transpose seq.
maxISLen :: SimParams -> Int
-- | Maximum length of an RIS transpose seq.
maxRISLen :: SimParams -> Int
instance Show Rates
instance Show SimParams
-- | Monad based on state for passing random number state around for GEP.
-- The choice of Mersenne.Pure64 was for performance, and the pure
-- version will play nicely with threading.
--
-- Author: mjsottile@computer.org
module GEP.Rmonad
type GEPMonad a = Rand a
-- | Generate a random number as a Double between 0.0 and the given upper
-- bound.
nextF :: Double -> Rand Double
-- | Generate a random integer between 1 and the upper bound (inclusive).
nextR :: Int -> Rand Int
-- | Generate a random integer in the specified range that is NOT equal to
-- the integer provided.
nextRDifferent :: Int -> Int -> Rand Int
-- | Generate a list of random integers.
nextRList :: Int -> Int -> Rand [Int]
-- | Generate a list of n random integers such that each entry occurs at
-- most once. Each number in the list must be unique.
nextRListUnique :: Int -> [Int] -> Int -> Rand [Int]
nextRListPairs :: Int -> Int -> Rand [(Int, Int)]
-- | Document me!
generatePairs :: Int -> Rand [(Int, Int)]
runRmonad :: Rand a -> PureMT -> (a, PureMT)
-- | This module defines the types used for implementing GEP problems and
-- operations. A few functions are also provided for convenience here for
-- performing common operations.
module GEP.Types
-- | Data type representing a genome. The genome contains all necessary
-- parameters to interpret a chromosome. These include the alphabet
-- (split between terminal and nonterminal characters), connective
-- characters for multi-gene chromosomes, the maximum arity of any
-- nonterminal, the length of the head of a gene, and the number of genes
-- per chromosome.
data Genome
Genome :: [Symbol] -> [Symbol] -> Symbol -> Int -> Int -> Int -> Genome
-- | Set of terminal symbols
terminals :: Genome -> [Symbol]
-- | Set of nonterminal symbols
nonterminals :: Genome -> [Symbol]
-- | Symbol connecting genes in a chromosome
geneConnector :: Genome -> Symbol
-- | Highest arity nonterminal function
maxArity :: Genome -> Int
-- | Length of gene head sequence
headLength :: Genome -> Int
-- | Number of genes per chromosome
numGenes :: Genome -> Int
-- | A symbol in a chromosome
type Symbol = Char
-- | A sequence of symbols not neccessaryly a gene or chromosome. Used in
-- gene operations.
type Sequence = [Char]
-- | A gene in a chromosome is a list of symbols
type Gene = Sequence
-- | A chromosome is a list of symbols. We avoided using a list of genes to
-- maintain the view of a chromosome as nothing more than a flattened,
-- linear sequence of genes.
type Chromosome = Sequence
-- | Symbol table used for fitness tests. We assume that there is exactly
-- one pair per symbol. If there are symbols missing, fitness testing may
-- fail (the library does not have facilities yet to allow for default
-- values). If a symbol occurs multiple times in the symbol table, no
-- guarantee is provided for which value will be chosen.
type SymTable a = [(Symbol, a)]
-- | Function to express an individual into a list of ET structures
type ExpressionFunction a = Chromosome -> Genome -> a
-- | Return the length of the tail of a gene for a given genome
tailLength :: Genome -> Int
-- | Return length of a gene (tail + head) for a given genome
geneLength :: Genome -> Int
-- | Given a genome, provide the list of all symbols possible in a
-- chromosome. This is just nonterminals ++ terminals.
allsymbols :: Genome -> [Symbol]
-- | Fracture a chromosome into a set of genes
chromToGenes :: Chromosome -> Int -> [Gene]
-- | Assemble a chromosome from a set of genes
genesToChrom :: [Gene] -> Chromosome
-- | Test if a symbol is a nonterminal
isNonterminal :: Symbol -> Genome -> Bool
instance Show Genome
-- | Operations on the chromosomes of individuals. The following
-- assumptions are made.
--
--
-- - Symbols are numbered 1 through n for a chromosome of length
-- n.
-- - Genes are numbered 0 through m-1 for a chromosome with m
-- genes.
--
--
-- The functions provided in this module are purely functional. See
-- GEP.MonadicGeneOperations for code that invokes these from
-- within the GEP.Rmonad monad.
module GEP.GeneOperations
-- | One-point crossover
crossover1pt :: (Chromosome, Chromosome) -> Int -> (Chromosome, Chromosome)
-- | Two-point crossover
crossover2pt :: (Chromosome, Chromosome) -> Int -> Int -> (Chromosome, Chromosome)
-- | Gene crossover
crossoverGene :: (Sequence, Sequence) -> Int -> Int -> (Sequence, Sequence)
-- | Gene transposition.
transposeGene :: Chromosome -> Genome -> Int -> Chromosome
-- | Insertion sequence transposition.
transposeIS :: Chromosome -> Genome -> Int -> Int -> Int -> Int -> Chromosome
-- | Root insertion sequence transposition.
transposeRIS :: Sequence -> Genome -> Int -> Int -> Int -> Sequence
-- | This module contains wrappers around the purely functional gene
-- operations in GEP.GeneOperations in order to string the random
-- number generation state through via the GEP.Rmonad. These
-- helper functions are responsible for sampling the random number
-- generator to determine the parameters for applying the genetic
-- operators.
--
-- The reasoning behind using a specialized Random monad instead of the
-- system generator provided by IO is that this allows independent
-- generators to be used should we support multiple threads of execution.
-- Parallel random number generation requires distinct generators, not a
-- shared one.
--
-- Author: mjsottile@computer.org
module GEP.MonadicGeneOperations
-- | IS Transposition helper
isTransposer :: Genome -> SimParams -> Chromosome -> GEPMonad Chromosome
-- | RIS Transposition helper
risTransposer :: Genome -> SimParams -> Chromosome -> GEPMonad Chromosome
-- | Gene transposition helper
geneTransposer :: Genome -> Chromosome -> GEPMonad Chromosome
-- | One-point crossover helper. Takes a genome, a pair of individuals, and
-- selects the crossover point before generating the new pair of
-- resulting individuals after crossover.
x1PHelper :: Genome -> (Chromosome, Chromosome) -> GEPMonad (Chromosome, Chromosome)
-- | Two-point crossover helper. Takes a genome, a pair of individuals, and
-- selects the crossover points before generating the new pair of
-- resulting individuals after crossover.
x2PHelper :: Genome -> (Chromosome, Chromosome) -> GEPMonad (Chromosome, Chromosome)
-- | Gene crossover helper. Takes a genome, a pair of individuals, and
-- selects the crossover gene before generating the new pair of
-- individuals resulting after crossover.
xGHelper :: Genome -> (Chromosome, Chromosome) -> GEPMonad (Chromosome, Chromosome)
-- | Randomized functions for GEP applications. Attempting to isolate all
-- code that needs to be run under the Rmonad here.
--
-- Author: mjsottile@computer.org
module GEP.Random
-- | Select a random symbol from the provided list.
randomSymbol :: [a] -> GEPMonad a
-- | Select a sequence of random symbols from the provided list.
randomSymbolList :: [a] -> Int -> GEPMonad [a]
-- | Generate a new individual given a genome specification.
newIndividual :: Genome -> Int -> GEPMonad Chromosome
-- | Create a population of fresh random individuals given a genome
-- |specification.
newPopulation :: Genome -> Int -> GEPMonad [Chromosome]
mutate :: Genome -> Rates -> Chromosome -> GEPMonad Chromosome
-- | Routines for selection after fitness evaluation. Selection is the
-- process of taking some input population P, a set of fitness values
-- such that each p in P has a fitness score f(p,X) under some fitness
-- test X, and selecting which members of P participate in the creation
-- of the next population P'.
--
-- A common technique is roulette wheel selection. In essence, this means
-- that we create a roulette wheel with one slot per individual where the
-- width of each slot is a function of the fitness of the individuals.
-- So, those individuals with very good fitness will have wide slots and
-- a correspondingly high likelihood of selection, while poor fitness
-- individuals will have tiny slots and a low probability of being
-- selected.
--
-- Fitness testing takes place outside this module. This module is only
-- concerned with the selection process (ie: generating the roulette
-- wheel).
--
-- Author: mjsottile@computer.org
module GEP.Selection
-- | Generate n roulette weights with a generator exponent e. A helper
-- function weight_function is used to generate the actual weights. For
-- example, w = (k^e)^(-1) for k from 1 to n leads to a set of weights
-- such that the size of the slots decreases exponentially as fitness
-- decreases. When e=1, this decrease is linear. The list that is
-- returned is the width of each slot such that the total of the weights
-- adds to 1.0.
generate_roulette_weights :: Double -> Double -> [Double]
-- | Given a set of roulette weights and a number of spins of the wheel,
-- return a list of indices corresponding to the winning slot for each
-- spin. This is used to perform the actual selection after a set of
-- roulette weights are generated.
roulette :: [Double] -> Int -> GEPMonad [Int]
-- | Given a list of indices and a list of data elements, create a new list
-- of data elements composed of the elements listed in the index list.
-- The output list may contain duplicates.
selector :: [Int] -> [a] -> [a]
-- | Given a set of pairs (f,i) where f is the fitness of the individual i,
-- return the pair representing the individual with the best fitness. We
-- may return nothing if an empty set is passed in to begin with, so the
-- return type is a Maybe pair.
getBest :: [(Double, Chromosome)] -> Maybe (Double, Chromosome)
-- | Code to read configuration files.
--
-- Author: mjsottile@computer.org
module GEP.Util.ConfigurationReader
readParameters :: String -> IO (Rates, Genome, SimParams)
-- | This module contains code related to fitness evaluation. The main
-- purpose of the code is to both evaluate fitnesses of individuals and
-- to sort individuals by fitness. These are intended to all be higher
-- order functions that assume nothing about the purpose of the
-- individuals or the types of inputs being used for fitness testing. The
-- only assumption made currently is that the outputs for test cases are
-- floating point numbers. That likely should change for general purpose
-- usage.
--
-- mjsottile@computer.org
module GEP.Fitness
-- | Fitness function type
type FitnessFunction a b = a -> TestCase b -> Double -> Double -> Double
-- | A test case maps a list of terminals to float values
type TestCase a = SymTable a
-- | A test dictionary is a set of test cases
type TestDict a = [TestCase a]
-- | The set of outputs expected for each entry in the test dictionary
type TestOuts = [Double]
-- | Fitness evaluator for generic individuals. This needs to go away and
-- use a more general approach like evaluateFitness above.
fitness_tester :: a -> FitnessFunction a b -> TestDict b -> TestOuts -> Double -> Double
-- | Given a list of fitness values and a corresponding list of
-- individuals, return a list of tuples pairing the fitness value with
-- the individuals for only those individuals that have a valid fitness
-- value. This means those that are +/- infinity or NaN are removed.
fitness_filter :: [Double] -> [Chromosome] -> [(Double, Chromosome)]
-- | Sort a set of individuals with fitness values by their fitness
sortByFitness :: [(Double, Chromosome)] -> [(Double, Chromosome)]
-- | Code representing a single step of the GEP algorithm resides here.
--
-- single step of fitness evaluation, selection and reproduction to make
-- a new population
--
-- process includes:
--
--
-- - expression of individuals
-- - fitness evaluation
-- - filtration to eliminate individuals yielding impossible fitness
-- values (infinite or NaN)
-- - preservation of best individual
-- - generation of roulette selection weights
-- - roulette selection of individuals
-- - perform mutation operator
-- - IS transposition
-- - RIS transposition
-- - Gene transposition
-- - 1Pt recombination
-- - 2Pt recombination
-- - Gene recombination
--
--
-- Author: mjsottile@computer.org
module GEP.TimeStep
multiStep :: [Chromosome] -> Genome -> SimParams -> Rates -> ExpressionFunction a -> FitnessFunction a b -> TestDict b -> TestOuts -> Int -> Double -> GEPMonad (Double, [Chromosome])
module GEP.GenericDriver
-- | Generic driver to be called from specific GEP program instances in
-- their main routine.
gepDriver :: SimParams -> Rates -> Genome -> TestDict b -> TestOuts -> FitnessFunction a b -> ExpressionFunction a -> IO (Double, [Chromosome])