module Bio.Location.ContigLocation ( ContigLoc(..), fromStartEnd, fromPosLen
, bounds, startPos, endPos
, slide, extend, posInto, posOutof
, seqData, seqDataPadded, isWithin, overlaps
, display
)
where
import Prelude hiding (length)
import Control.Monad.Error
import qualified Data.ByteString.Lazy.Char8 as LBS
import Bio.Sequence.SeqData
import qualified Bio.Location.Position as Pos
import Bio.Location.Strand
data ContigLoc = ContigLoc { offset5 :: !Offset
, length :: !Offset
, strand :: !Strand
} deriving (Eq, Ord, Show)
instance Stranded ContigLoc where
revCompl (ContigLoc seq5 len str) = ContigLoc seq5 len $ revCompl str
fromStartEnd :: Offset -> Offset -> ContigLoc
fromStartEnd start end
| start < end = ContigLoc start (1 + end start) Fwd
| otherwise = ContigLoc end (1 + start end) RevCompl
fromPosLen :: Pos.Pos -> Offset -> ContigLoc
fromPosLen (Pos.Pos off5 Fwd) len = ContigLoc off5 len Fwd
fromPosLen (Pos.Pos off3 RevCompl) len = ContigLoc (off3 (len 1)) len RevCompl
bounds :: ContigLoc -> (Offset, Offset)
bounds (ContigLoc seq5 len _) = (seq5, seq5 + len 1)
startPos :: ContigLoc -> Pos.Pos
startPos (ContigLoc seq5 len str)
= case str of
Fwd -> Pos.Pos seq5 str
RevCompl -> Pos.Pos (seq5 + len 1) str
endPos :: ContigLoc -> Pos.Pos
endPos (ContigLoc seq5 len str)
= case str of
Fwd -> Pos.Pos (seq5 + len 1) str
RevCompl -> Pos.Pos seq5 str
slide :: Offset -> ContigLoc -> ContigLoc
slide dpos (ContigLoc seq5 len str) = ContigLoc (seq5 + dpos) len str
seqData :: (Error e, MonadError e m) => SeqData -> ContigLoc -> m SeqData
seqData sequ (ContigLoc seq5 len str)
| seq5 < 0 = outOfBounds
| otherwise = case LBS.take len $ LBS.drop seq5 sequ of
fwdseq | LBS.length fwdseq == len -> return $ stranded str fwdseq
| otherwise -> outOfBounds
where outOfBounds = throwError $ strMsg $ "contig seq loc " ++ show (seq5, seq5 + len 1) ++ " out of SeqData bounds"
seqDataPadded :: SeqData -> ContigLoc -> SeqData
seqDataPadded sequ (ContigLoc seq5 len str) = stranded str fwdseq
where fwdseq
| seq5 + len <= 0 = LBS.replicate len 'N'
| seq5 >= LBS.length sequ = LBS.replicate len 'N'
| seq5 < 0 = LBS.replicate (negate seq5) 'N' `LBS.append` takePadded (len + seq5) sequ
| otherwise = takePadded len $ LBS.drop seq5 sequ
takePadded sublen subsequ
| sublen <= LBS.length subsequ = LBS.take sublen subsequ
| otherwise = subsequ `LBS.append` LBS.replicate (sublen LBS.length subsequ) 'N'
posInto :: Pos.Pos -> ContigLoc -> Maybe Pos.Pos
posInto (Pos.Pos pos pStrand) (ContigLoc seq5 len cStrand)
| pos < seq5 || pos >= seq5 + len = Nothing
| otherwise = Just $ case cStrand of
Fwd -> Pos.Pos (pos seq5) pStrand
RevCompl -> Pos.Pos (seq5 + len (pos + 1)) (revCompl pStrand)
posOutof :: Pos.Pos -> ContigLoc -> Maybe Pos.Pos
posOutof (Pos.Pos pos pStrand) (ContigLoc seq5 len cStrand)
| pos < 0 || pos >= len = Nothing
| otherwise = Just $ case cStrand of
Fwd -> Pos.Pos (pos + seq5) pStrand
RevCompl -> Pos.Pos (seq5 + len (pos + 1)) (revCompl pStrand)
extend :: (Offset, Offset) -> ContigLoc -> ContigLoc
extend (ext5, ext3) (ContigLoc seq5 len str)
= case str of
Fwd -> ContigLoc (seq5 ext5) (len + ext5 + ext3) str
RevCompl -> ContigLoc (seq5 ext3) (len + ext5 + ext3) str
isWithin :: Pos.Pos -> ContigLoc -> Bool
isWithin (Pos.Pos pos pStrand) (ContigLoc seq5 len cStrand)
= (pos >= seq5) && (pos < seq5 + len) && (cStrand == pStrand)
overlaps :: ContigLoc -> ContigLoc -> Bool
overlaps contig1 contig2
= case (bounds contig1, bounds contig2) of
((low1, high1),(low2, high2)) -> (strand contig1 == strand contig2)
&& (low1 <= high2) && (low2 <= high1)
display :: ContigLoc -> String
display cloc = show (Pos.offset $ startPos cloc) ++ "to" ++ show (Pos.offset $ endPos cloc)