-- GenerateDiversity module. -- By G.W. Schwartz -- {- | Collection of functions for the collection of fragments for the diversity calculations. -} {-# LANGUAGE BangPatterns #-} module Math.Diversity.GenerateDiversity ( fragmentPos , generatePositionMap ) where -- Built in import qualified Data.Map as M import Data.List import Data.Fasta.String import qualified Data.Sequence as Seq -- Cabal import qualified Data.List.Split as Split -- Local import Math.Diversity.Types -- | Get the sample ID of a sequence getSample :: Int -> FastaSequence -> Sample getSample x = (!! (x - 1)) . Split.splitOn "|" . fastaHeader -- | Generates fragment list from string of "win" length. This version -- differs from normal as it takes a tuple with the position as the first -- entry fragmentPos :: Bool -> Int -> [(Position, Fragment)] -> [(Position, Fragment)] fragmentPos whole win xs | whole && null xs = error "Empty line in file!!" | whole = [combine xs] | length xs < win = [] | otherwise = combine (take win xs) : fragmentPos whole win (tail xs) where combine = foldl1' (\(!x, !ys) (_, y) -> (x, ys Seq.>< y)) -- | Generate the PositionMap from a list of FastaSequences generatePositionMap :: Bool -> Int -> Bool -> Window -> [FastaSequence] -> PositionMap generatePositionMap sample sampleField whole win = M.fromListWith (++) . posSeqList where posSeqList = map toList . concatMap (\x -> map (\(!p, !f) -> (p, sampleIt sample x f)) . fragmentPos whole win . map (\(!p, !f) -> (p, Seq.singleton f)) . filter (\(_, !f) -> noGaps f) . zip [1..] . fastaSeq $ x) toList (x, y) = (x, [y]) noGaps y = y /= '-' && y /= '.' sampleIt True s f = (getSample sampleField s, f) sampleIt False _ f = ("Sample", f)