{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Merge
( mergeAbundance
, mergeMates
, bamToFasta
) where
import Data.Function (on)
import Data.List
import Data.Maybe
import Data.Monoid
import qualified Data.IntMap.Strict as IMap
import qualified Data.Map.Strict as Map
import Control.Lens
import qualified Data.Text as T
import qualified Data.Text.Read as T
import Safe
import TextShow
import Types
import Utility
mergeAbundance :: FrequencyMap -> DuplicationRow -> DuplicationRow
mergeAbundance (FrequencyMap m) (DuplicationRow row) =
DuplicationRow
. Map.insert "frequency" (showt . findWithError rowAccession $ m)
$ row
where
rowAccession =
flip (Safe.at) 1 . T.splitOn "|" . findWithError "fHeader" $ row
getPositionMap :: Position -> Sequence -> PositionMap
getPositionMap (Position p) =
PositionMap . IMap.fromList . zip [p..] . T.unpack . unSequence
fillGapsWith :: Fill
-> [(Position, Char)]
-> [(Position, Char)]
-> [(Position, Char)]
fillGapsWith _ !acc [] = reverse acc
fillGapsWith fill [] (x:xs) = fillGapsWith fill [x] xs
fillGapsWith
fill
acc@((Position !previousPos, _):_)
allNext@((Position !currentPos, !currentChar):xs) =
if abs (currentPos - previousPos) > 1
then fillGapsWith
fill
((Position (previousPos + 1), unFill fill) : acc)
allNext
else fillGapsWith
fill
((Position currentPos, currentChar) : acc)
xs
mergeSequences :: Fill
-> (Position, Sequence)
-> (Position, Sequence)
-> Sequence
mergeSequences fill left =
Sequence
. T.pack
. fmap snd
. fillGapsWith fill []
. fmap (over _1 Position)
. IMap.toAscList
. IMap.union (unPositionMap . uncurry getPositionMap $ left)
. unPositionMap
. uncurry getPositionMap
mergePair :: Fill -> [BamRow] -> Maybe BamRow
mergePair fill xs =
case filter (not . isSupplementary) xs of
[] -> Nothing
[x] -> Just x
[BamRow left, BamRow right] ->
Just
. BamRow
. set (ix 9) ( unSequence
$ mergeSequences fill (posSeq left) (posSeq right)
)
$ left
xs -> error ("Too many mate pairs (even after supplementary removal) for: " <> show xs)
where
posSeq xs = ( either error (Position . fst) . T.decimal $ xs !! 3
, Sequence $ xs !! 9
)
isSupplementary =
(== "1") . drop 11 . decodeSAMFlag . read . T.unpack . (!! 1) . unBamRow
mergeMates :: Fill -> [BamRow] -> [BamRow]
mergeMates fill =
catMaybes . fmap (mergePair fill) . groupBy ((==) `on` (head . unBamRow))