-- |
--  Operations on the chromosomes of individuals.  The following assumptions
--  are made.
-- 
--   * Symbols are numbered 1 through n for a chromosome of length n.
-- 
--   * Genes are numbered 0 through m-1 for a chromosome with m genes.
--   
--  The functions provided in this module are purely functional.  See
--  "GEP.MonadicGeneOperations" for code that invokes these from within the
--  "GEP.Rmonad" monad.
--  

module GEP.GeneOperations (
  crossover1pt,
  crossover2pt,
  crossoverGene,
  transposeGene,
  transposeIS,
  transposeRIS
) where

import GEP.Types

-- There is a set of basic (not GA) operations on Sequences, Genes and
-- Chromosomes, mainly composition and splitting.
-- These should be encapsulated to allow flexible transition from one type of
-- sequences---e.g. [Char]---to any other---e.g. ByteString---wo affecting the
-- GA operators.


-- | Splits a sequence into three by given positions. Similar to the splitAt but
--   for two positions. The positions must be in a non-descending order. This is
--   not checked.
splitThirds :: (Int, Int) -> Sequence -> (Sequence, Sequence, Sequence)
splitThirds (l1, l2) x = (fx,mx,bx)
  where
    (fx,tmp) = splitAt l1 x
    (mx,bx) = splitAt (l2-l1) tmp


--  The rest of the code covers the GA operators.
--


-- | 
--  One-point crossover
crossover1pt :: (Chromosome, Chromosome) -- ^ Pair of individuals before crossover
             -> Int                  -- ^ Crossover point
             -> (Chromosome, Chromosome)  -- ^ Pair of individuals after crossover
crossover1pt (x,y) loc = (x', y')
  where
    (fx, bx) = splitAt (loc-1) x
    (fy, by) = splitAt (loc-1) y
    x' = fx++by
    y' = fy++bx

-- |
--  Two-point crossover
crossover2pt :: (Chromosome, Chromosome) -- ^ Pair of individuals before crossover
             -> Int                  -- ^ Crossover point 1
             -> Int                  -- ^ Crossover point 2
             -> (Chromosome, Chromosome)  -- ^ Pair of individuals after crossover
crossover2pt (x,y) loc1 loc2 = (x',y')
  where
    -- make sure we know which location is lower than the other
    minLoc = min loc1 loc2
    maxLoc = max loc1 loc2
    (fx,mx,bx) = splitThirds (minLoc-1, maxLoc-1) x
    (fy,my,by) = splitThirds (minLoc-1, maxLoc-1) y
    x' = fx++my++bx
    y' = fy++mx++by

--
-- Helper to extract a gene from a sequence and return the sequence
-- before the gene, the gene itself, and the sequence after the gene.
--
geneExtract :: Chromosome -> Int -> Int -> (Sequence, Gene, Sequence)
geneExtract x gene geneLen = (before, theGene, after)
  where
    geneStart = geneLen * gene
    geneEnd   = geneStart + geneLen
    (before,theGene,after) = splitThirds (geneStart, geneEnd) x

-- |
--  Gene crossover
crossoverGene :: (Sequence, Sequence) -- ^ Pair of individuals before crossover
              -> Int                  -- ^ Gene number for crossover
              -> Int                  -- ^ Gene length in symbols
              -> (Sequence, Sequence) -- ^ Pair of individuals after crossover
crossoverGene (x,y) gene geneLen = (x',y')
  where
    (fx,mx,bx) = geneExtract x gene geneLen
    (fy,my,by) = geneExtract y gene geneLen
    x' = fx++my++bx
    y' = fy++mx++by

--
-- Find a root insertion sequence within a sequence.  This means looking
-- for the first subsequence that starts with a nonterminal. If no such
-- subsequence exists, return an empty list.
--
findRIS :: Genome -> Sequence -> Sequence
findRIS g = dropWhile isT
    where isT x = not $ isNonterminal x g

-- |
--  Root insertion sequence transposition.
transposeRIS :: Sequence -- ^ Sequence to perform RIS transposition on
             -> Genome   -- ^ Genome information
             -> Int      -- ^ Gene to perform RIS transposition within
             -> Int      -- ^ Position within gene to start search for
                         --   RIS for transposition
             -> Int      -- ^ Length of RIS
             -> Sequence -- ^ Sequence after RIS transposition performed
transposeRIS x genome gene pos len = 
    fx ++ risSeq ++ keepHead ++ geneTail ++ bx
  where
    -- pull the gene out that we want
    geneLen = (geneLength genome)
    (fx,theGene,bx) = geneExtract x gene geneLen

    -- separate into head and tail
    (geneHead, geneTail) = splitAt (headLength genome) theGene

    -- find the root insertion sequence within the candidate region given
    -- by the search start position
    risCandidateRegion = drop pos theGene
    risSeq = take len (findRIS genome risCandidateRegion)

    -- determine how much of the head to preserve based on the length of
    -- the root insertion sequence
    keepHeadlen = (headLength genome) - (length risSeq)

    -- extract the parts of the head and tail of the original gene that
    -- are preserved after transposition
    keepHead    = take keepHeadlen geneHead

insertIntoGene :: Gene -> Sequence -> Int -> Int -> Gene
insertIntoGene x ins hl pos = (take hl (pre++ins++post))++tX
  where
    hX = take hl x
    tX = drop hl x
    pre = take pos x
    post = drop pos hX

-- |
--  Insertion sequence transposition.
transposeIS :: Chromosome  -- ^ Chromosome
            -> Genome    -- ^ Genome
            -> Int       -- ^ Gene number
            -> Int       -- ^ Position to take from within a gene
            -> Int       -- ^ Length to take
            -> Int       -- ^ Position to put within a gene
            -> Chromosome  -- ^ Resulting chromosome
transposeIS x genome genenum takepos len putpos = 
    genesBefore ++ gene' ++ genesAfter
  where
    geneLen = (geneLength genome)
    (genesBefore, gene, genesAfter) = geneExtract x genenum geneLen
    iseq = take len (drop takepos gene)
    gene' = insertIntoGene gene iseq (headLength genome) putpos

-- |
--  Gene transposition.
transposeGene :: Chromosome -- ^ Chromosome
              -> Genome   -- ^ Genome
              -> Int      -- ^ Gene number
              -> Chromosome -- ^ Resulting chromosome
transposeGene x genome gnum = concat [gene, pregene, postgene]
  where
    geneLen = (headLength genome) + (tailLength genome)
    gene = take geneLen (drop (geneLen * gnum) x)
    pregene = take (geneLen * gnum) x
    postgene = drop (geneLen * (gnum+1)) x