-- |Data types for working with locations in a sequence.  Zero-based
--  'Int64' indices are used throughout, to facilitate direct use of
--  indexing functions on 'SeqData'.

module Bio.Location.Location ( Loc(..), bounds, length, startPos, endPos
                             , extend, posInto, posOutof, isWithin, overlaps
                             , seqData, seqDataPadded
                             , display
                             )
    where 

import Prelude hiding (length)

import Control.Arrow ((***))
import Control.Monad.Error
import qualified Data.ByteString.Lazy.Char8 as LBS
import Data.List (intercalate)
--import Data.Maybe

import qualified Bio.Location.ContigLocation as CLoc
import qualified Bio.Location.Position as Pos
import Bio.Location.Strand
import Bio.Sequence.SeqData

-- | General (disjoint) sequence region consisting of a concatenated
--   set of contiguous regions
newtype Loc = Loc [CLoc.ContigLoc] deriving (Eq, Ord, Show)

instance Stranded Loc where
    revCompl (Loc contigs) = Loc $ reverse $ map revCompl contigs

-- | Length of the region
length :: Loc -> Offset
length (Loc contigs) = sum $ map CLoc.length contigs

-- | The bounds of a 'Loc', consisting of the lowest & highest
--   sequence indices lying within the region.  The first element of
--   the pair will always be lower than the second.
bounds :: Loc -> (Offset, Offset)
bounds (Loc []) = error "locBounds on zero-contig Loc"
bounds (Loc contigs) = (minimum *** maximum) $ unzip $ map CLoc.bounds contigs

-- | 0-based starting (5' in the region orientation) offset of the
--   region on its sequence.
startPos :: Loc -> Pos.Pos
startPos (Loc [])      = error "startPos: zero-contig Loc"
startPos (Loc contigs) = CLoc.startPos $ head contigs

-- | 0-based ending (3' in the region orientation) offset of the
--   region on its sequence.
endPos :: Loc -> Pos.Pos
endPos (Loc [])      = error "endPos: zero-contig Loc"
endPos (Loc contigs) = CLoc.endPos $ last contigs

-- | Subsequence 'SeqData' for a 'Loc', provided that the region is
--   entirely within the sequence.
seqData :: (Error e, MonadError e m) => SeqData -> Loc -> m SeqData
seqData sequ (Loc contigs)
    = liftM LBS.concat $ mapM (CLoc.seqData sequ) contigs

seqDataPadded :: SeqData -> Loc -> SeqData
seqDataPadded sequ (Loc contigs)
    = LBS.concat $ map (CLoc.seqDataPadded sequ) contigs

-- | For a 'Pos' and a 'Loc' region on the same sequence, find the
--   corresponding 'Pos' relative to the region, if the 'Pos' is
--   within the region.  If the 'Loc' region has redundant positions
--   for a given sequence position, the first is returned.
posInto :: Pos.Pos -> Loc -> Maybe Pos.Pos
posInto seqpos (Loc contigs) = posIntoContigs seqpos contigs

posIntoContigs :: Pos.Pos -> [CLoc.ContigLoc] -> Maybe Pos.Pos
posIntoContigs _ [] = Nothing
posIntoContigs seqpos (contig@(CLoc.ContigLoc _ len _):rest)
    = case CLoc.posInto seqpos contig of
        just@(Just _) -> just
        Nothing -> liftM (flip Pos.slide len) $ posIntoContigs seqpos rest

-- | For a 'Loc' region on a sequence and a 'Pos' relative to the
--   region, find the corresponding 'Pos' on the sequence, provided
--   that the position is within the bounds of the region.
posOutof :: Pos.Pos -> Loc -> Maybe Pos.Pos
posOutof pos (Loc contigs) = posOutofContigs pos contigs

posOutofContigs :: Pos.Pos -> [CLoc.ContigLoc] -> Maybe Pos.Pos
posOutofContigs _ [] = Nothing
posOutofContigs seqpos (contig@(CLoc.ContigLoc _ len _):rest)
    = case CLoc.posOutof seqpos contig of
        just@(Just _) -> just
        Nothing -> posOutofContigs (Pos.slide seqpos $ negate len) rest

-- | Extend a 'Loc' region by incorporating contigous nucleotide
--   regions of the specified lengths on the 5' and 3' ends
extend :: (Offset, Offset) -- ^ (5' extension, 3' extension)
       -> Loc -> Loc
extend _ (Loc []) = error "extendLoc on zero-contig Loc"
extend (ext5, ext3) (Loc contigs) = Loc $ case extendContigs3 contigs of
                                            [] -> error "Empty contigs after extendContigs3"
                                            (cfirst:crest) -> (CLoc.extend (ext5, 0) cfirst):crest
    where extendContigs3 [] = error "Empty contigs in extendContigs3"
          extendContigs3 [clast] = [CLoc.extend (0, ext3) clast]
          extendContigs3 (contig:crest) = contig : extendContigs3 crest

-- | For a 'Pos' and a 'Loc' on the same sequence, does the position
--   fall within the 'Loc' region?
isWithin :: Pos.Pos -> Loc -> Bool
isWithin seqpos (Loc contigs) = or $ map (CLoc.isWithin seqpos) contigs

overlappingContigs :: Loc -> Loc -> [(CLoc.ContigLoc, CLoc.ContigLoc)]
overlappingContigs (Loc contigs1) (Loc contigs2) 
    = filter (uncurry CLoc.overlaps) [(c1, c2) | c1 <- contigs1, c2 <- contigs2 ]

-- | For a pair of 'Loc' regions on the same sequence, do they overlap
--   at all?
overlaps :: Loc -> Loc -> Bool
overlaps l1 l2 = not $ null $ overlappingContigs l1 l2

display :: Loc -> String
display (Loc contigs) = intercalate ";" $ map CLoc.display contigs