-- GenerateDiversity module.
-- By G.W. Schwartz
--
{- | Collection of functions for the collection of fragments for the
diversity calculations.
-}

{-# LANGUAGE BangPatterns #-}

module Math.Diversity.GenerateDiversity ( fragmentPos
                                        , generatePositionMap ) where

-- Built in
import qualified Data.Map.Strict as Map
import Data.List
import Data.Fasta.String
import qualified Data.Sequence as Seq
import Debug.Trace

-- Cabal
import qualified Data.List.Split as Split

-- Local
import Math.Diversity.Types

-- | Get the sample ID of a sequence
getSample :: Int -> FastaSequence -> Sample
getSample x = (!! (x - 1)) . Split.splitOn "|" . fastaHeader

-- | Generates fragment list from string of "win" length. This version
-- differs from normal as it takes a tuple with the position as the first
-- entry. Is in tail recursive form
fragmentPos :: Bool -> Int -> [(Position, Char)] -> [(Position, Fragment)]
fragmentPos whole win ls = fragmentPosLoop ls []
  where
    fragmentPosLoop [] !acc = acc
    fragmentPosLoop !xs !acc
        | whole && null xs = error "Empty line in file!!"
        | whole            = [combine xs]
        | length xs < win  = acc
        | otherwise        = fragmentPosLoop
                             (tail xs)
                             (combine (take win xs) : acc)
    combine ((!x, !y):ys)  = (x, y:(map snd ys))

-- | Generate the frequency from a FastaSequence
generatePositionMap :: Bool
                    -> Int
                    -> Bool
                    -> Window
                    -> FastaSequence
                    -> PositionMap
generatePositionMap !sample !sampleField !whole !win = posSeqList
  where
    posSeqList !x       = Map.fromList
                        . map (\(!p, !f) -> (p, Map.singleton (sampleIt sample x f) 1))
                        . fragmentPos whole win
                        . filter (noGaps . snd)
                        . zip [1..]
                        . fastaSeq
                        $ x
    noGaps y            = y /= '-' && y /= '.'
    sampleIt True !s !f = (getSample sampleField s, f)
    sampleIt False _ !f = ("Sample", f)