module Math.Diversity.GenerateDiversity ( fragmentPos
, generatePositionMap ) where
import qualified Data.Map.Strict as Map
import Data.List
import Data.Fasta.String
import qualified Data.Sequence as Seq
import Debug.Trace
import qualified Data.List.Split as Split
import Math.Diversity.Types
getSample :: Int -> FastaSequence -> Sample
getSample x = (!! (x 1)) . Split.splitOn "|" . fastaHeader
fragmentPos :: Bool -> Int -> [(Position, Char)] -> [(Position, Fragment)]
fragmentPos whole win ls = fragmentPosLoop ls []
where
fragmentPosLoop [] !acc = acc
fragmentPosLoop !xs !acc
| whole && null xs = error "Empty line in file!!"
| whole = [combine xs]
| length xs < win = acc
| otherwise = fragmentPosLoop
(tail xs)
(combine (take win xs) : acc)
combine ((!x, !y):ys) = (x, y:(map snd ys))
generatePositionMap :: Bool
-> Int
-> Bool
-> Window
-> FastaSequence
-> PositionMap
generatePositionMap !sample !sampleField !whole !win = posSeqList
where
posSeqList !x = Map.fromList
. map (\(!p, !f) -> (p, Map.singleton (sampleIt sample x f) 1))
. fragmentPos whole win
. filter (noGaps . snd)
. zip [1..]
. fastaSeq
$ x
noGaps y = y /= '-' && y /= '.'
sampleIt True !s !f = (getSample sampleField s, f)
sampleIt False _ !f = ("Sample", f)