-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | HMEP Multi Expression Programming – -- a genetic programming variant -- -- A multi expression programming implementation with focus on speed. -- -- https://en.wikipedia.org/wiki/Multi_expression_programming @package hmep @version 0.1.1 -- |

Core MEP data structures

module AI.MEP.Types -- | A chromosome is a vector of genes type Chromosome a = Vector (Gene a Int) -- | Either a terminal symbol or a three-address code (a function and two -- pointers) data Gene a i C :: a -> Gene a i Var :: Int -> Gene a i Op :: F a -> i -> i -> Gene a i -- | Eq instance for Gene -- | Show instance for Gene -- | A function and its symbolic representation type F a = (Char, a -> a -> a) -- | List of chromosomes type Population a = [Chromosome a] -- | Evaluated population type Generation a = [Phenotype a] -- | Loss value, chromosome, and the best expression indices vector type Phenotype a = (Double, Chromosome a, Vector Int) instance (GHC.Classes.Eq a, GHC.Classes.Eq i) => GHC.Classes.Eq (AI.MEP.Types.Gene a i) instance (GHC.Show.Show a, GHC.Show.Show i) => GHC.Show.Show (AI.MEP.Types.Gene a i) -- |

Various utilities for running MEP algorithm

module AI.MEP.Run -- | Generate code for the functions with a single output generateCode :: Phenotype Double -> String -- | Evaluate each subexpression in a chromosome evaluateChromosome :: Num a => Chromosome a -> Vector a -> Vector a -- | Loss function for regression problems with one input and one output. -- Not normalized with respect to the dataset size. regressionLoss1 :: (Num result, Ord result) => (b -> b -> result) -> [(a, b)] -> (Vector a -> Vector b) -> (Vector Int, result) -- | Average population loss avgLoss :: Generation Double -> Double -- | Copyright Bogdan Penkovsky (c) 2017 -- --

Multiple Expression Programming

-- --

Example application: trigonometry cheating

-- -- Suppose, you forgot certain trigonometric identities. For instance, -- you want to express cos^2(x) using sin(x). No problem, set the target -- function cos^2(x) in the dataset and add sin to the arithmetic set of -- operators {+,-,*,/}. See app/Main.hs. -- -- After running -- --
--   $ stack build && stack exec hmep-demo
--   
--   
-- -- We obtain -- --
--   Average loss in the initial population 15.268705681244962
--   Population 10: average loss 14.709728527360586
--   Population 20: average loss 13.497114190675477
--   Population 30: average loss 8.953185872653737
--   Population 40: average loss 8.953185872653737
--   Population 50: average loss 3.3219954564955856e-15
--   
--   
-- -- The average value of 3.3e-15 is close to zero, indicating that the -- exact expression was found! -- -- The produced output was: -- --
--   Interpreted expression:
--   v1 = sin x0
--   v2 = v1 * v1
--   result = 1 - v2
--   
--   
-- -- From here we can infer that cos^2(x) = 1 - v2 = 1 - v1 * v1 = 1 - -- sin^2(x). -- -- Sweet! module AI.MEP -- | A chromosome is a vector of genes type Chromosome a = Vector (Gene a Int) -- | Either a terminal symbol or a three-address code (a function and two -- pointers) data Gene a i -- | List of chromosomes type Population a = [Chromosome a] -- | Loss value, chromosome, and the best expression indices vector type Phenotype a = (Double, Chromosome a, Vector Int) -- | MEP configuration data Config a Config :: Double -> Double -> Double -> Double -> Int -> Int -> Int -> Vector (F a) -> Int -> Config a -- | Probability of constant generation [p'const] :: Config a -> Double -- | Probability of variable generation. The probability of operator -- generation is inferred automatically as 1 - p'const - p'var. [p'var] :: Config a -> Double -- | Mutation probability [p'mutation] :: Config a -> Double -- | Crossover probability [p'crossover] :: Config a -> Double -- | The chromosome length [c'length] :: Config a -> Int -- | A (sub)population size [c'popSize] :: Config a -> Int -- | Number of subpopulations (1 or more) [not implemented] [c'popN] :: Config a -> Int -- | Functions pool with their symbolic representations [c'ops] :: Config a -> Vector (F a) -- | The input dimensionality [c'vars] :: Config a -> Int -- |
--   defaultConfig = Config
--     {
--       p'const = 0.1
--     , p'var = 0.4
--     , p'mutation = 0.1
--     , p'crossover = 0.9
--   
--     , c'length = 50
--     , c'popSize = 100
--     , c'popN = 1
--     , c'ops = V.empty  -- <-- To be overridden
--     , c'vars = 1
--     }
--   
defaultConfig :: Config Double -- | A function to minimize. -- -- The argument is a vector evaluation function whose input is a vector -- (length c'vars) and ouput is a vector with a different length -- c'length. -- -- The result is a vector of the best indices and a scalar loss value. type LossFunction a = (Vector a -> Vector a) -> (Vector Int, Double) -- | Randomly generate a new population initialize :: PrimMonad m => Config Double -> RandT m (Population Double) -- | Using LossFunction, find how fit is each chromosome in the -- population evaluatePopulation :: Num a => LossFunction a -> Population a -> Generation a -- | Loss function for regression problems with one input and one output. -- Not normalized with respect to the dataset size. regressionLoss1 :: (Num result, Ord result) => (b -> b -> result) -> [(a, b)] -> (Vector a -> Vector b) -> (Vector Int, result) -- | Average population loss avgLoss :: Generation Double -> Double -- | The best phenotype in the generation best :: Generation a -> Phenotype a -- | The worst phenotype in the generation worst :: Generation a -> Phenotype a -- | Selection operator that produces the next evaluated population. -- -- Standard algorithm: the best offspring O replaces the worst individual -- W in the current population if O is better than W. evolve :: PrimMonad m => Config Double -> LossFunction Double -> (Chromosome Double -> RandT m (Chromosome Double)) -> (Chromosome Double -> Chromosome Double -> RandT m (Chromosome Double, Chromosome Double)) -> (Generation Double -> RandT m (Chromosome Double)) -> Generation Double -> RandT m (Generation Double) -- | Binary tournament selection binaryTournament :: (PrimMonad m, Ord a) => Generation a -> RandT m (Chromosome a) -- | Uniform crossover operator crossover :: PrimMonad m => Chromosome a -> Chromosome a -> RandT m (Chromosome a, Chromosome a) -- | Mutation operator with up to three mutations per chromosome mutation3 :: PrimMonad m => Config Double -> Chromosome Double -> RandT m (Chromosome Double) -- | Mutation operator with a fixed mutation probability of each gene smoothMutation :: PrimMonad m => Double -> Config Double -> Chromosome Double -> RandT m (Chromosome Double) -- | Randomly initialize a new chromosome. By definition, the first gene is -- terminal (a constant or a variable). newChromosome :: PrimMonad m => Config Double -> RandT m (Chromosome Double) -- | Generate code for the functions with a single output generateCode :: Phenotype Double -> String data RandT (m :: * -> *) a :: (* -> *) -> * -> * -- | Alias for mwc: Take a RandT value and run it in -- IO, generating all the random values described by the -- RandT. -- -- It initializes the random number generator. For performance reasons, -- it is recommended to minimize the number of calls to runRandIO. runRandIO :: RandT IO a -> IO a