module Bio.Bam.Regions
    ( Region(..)
    , Regions(..)
    , Subsequence(..)

    , toList
    , fromList
    , overlaps
    ) where

import Bio.Bam.Header ( Refseq(..) )
import Bio.Prelude hiding ( toList )

import qualified Data.IntMap.Strict as IM

data Region = Region { Region -> Refseq
refseq :: !Refseq, Region -> Int
start :: !Int, Region -> Int
end :: !Int }
  deriving (Region -> Region -> Bool
(Region -> Region -> Bool)
-> (Region -> Region -> Bool) -> Eq Region
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Region -> Region -> Bool
$c/= :: Region -> Region -> Bool
== :: Region -> Region -> Bool
$c== :: Region -> Region -> Bool
Eq, Eq Region
Eq Region =>
(Region -> Region -> Ordering)
-> (Region -> Region -> Bool)
-> (Region -> Region -> Bool)
-> (Region -> Region -> Bool)
-> (Region -> Region -> Bool)
-> (Region -> Region -> Region)
-> (Region -> Region -> Region)
-> Ord Region
Region -> Region -> Bool
Region -> Region -> Ordering
Region -> Region -> Region
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Region -> Region -> Region
$cmin :: Region -> Region -> Region
max :: Region -> Region -> Region
$cmax :: Region -> Region -> Region
>= :: Region -> Region -> Bool
$c>= :: Region -> Region -> Bool
> :: Region -> Region -> Bool
$c> :: Region -> Region -> Bool
<= :: Region -> Region -> Bool
$c<= :: Region -> Region -> Bool
< :: Region -> Region -> Bool
$c< :: Region -> Region -> Bool
compare :: Region -> Region -> Ordering
$ccompare :: Region -> Region -> Ordering
$cp1Ord :: Eq Region
Ord, Int -> Region -> ShowS
[Region] -> ShowS
Region -> String
(Int -> Region -> ShowS)
-> (Region -> String) -> ([Region] -> ShowS) -> Show Region
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Region] -> ShowS
$cshowList :: [Region] -> ShowS
show :: Region -> String
$cshow :: Region -> String
showsPrec :: Int -> Region -> ShowS
$cshowsPrec :: Int -> Region -> ShowS
Show)

-- | A subset of a genome.  The idea is to map the reference sequence
-- (represented by its number) to a 'Subseqeunce'.
newtype Regions = Regions (IntMap Subsequence) deriving Int -> Regions -> ShowS
[Regions] -> ShowS
Regions -> String
(Int -> Regions -> ShowS)
-> (Regions -> String) -> ([Regions] -> ShowS) -> Show Regions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Regions] -> ShowS
$cshowList :: [Regions] -> ShowS
show :: Regions -> String
$cshow :: Regions -> String
showsPrec :: Int -> Regions -> ShowS
$cshowsPrec :: Int -> Regions -> ShowS
Show

-- | A mostly contiguous subset of a sequence, stored as a set of
-- non-overlapping intervals in an 'IntMap' from start position to end
-- position (half-open intervals, naturally).
newtype Subsequence = Subsequence (IntMap Int) deriving Int -> Subsequence -> ShowS
[Subsequence] -> ShowS
Subsequence -> String
(Int -> Subsequence -> ShowS)
-> (Subsequence -> String)
-> ([Subsequence] -> ShowS)
-> Show Subsequence
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Subsequence] -> ShowS
$cshowList :: [Subsequence] -> ShowS
show :: Subsequence -> String
$cshow :: Subsequence -> String
showsPrec :: Int -> Subsequence -> ShowS
$cshowsPrec :: Int -> Subsequence -> ShowS
Show

toList :: Regions -> [(Refseq, Subsequence)]
toList :: Regions -> [(Refseq, Subsequence)]
toList (Regions m :: IntMap Subsequence
m) = [ (Word32 -> Refseq
Refseq (Word32 -> Refseq) -> Word32 -> Refseq
forall a b. (a -> b) -> a -> b
$ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k, Subsequence
v) | (k :: Int
k,v :: Subsequence
v) <- IntMap Subsequence -> [(Int, Subsequence)]
forall a. IntMap a -> [(Int, a)]
IM.toList IntMap Subsequence
m ]

fromList :: [Region] -> Regions
fromList :: [Region] -> Regions
fromList = (Regions -> Region -> Regions) -> Regions -> [Region] -> Regions
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Region -> Regions -> Regions) -> Regions -> Region -> Regions
forall a b c. (a -> b -> c) -> b -> a -> c
flip Region -> Regions -> Regions
add) (IntMap Subsequence -> Regions
Regions IntMap Subsequence
forall a. IntMap a
IM.empty)

add :: Region -> Regions -> Regions
add :: Region -> Regions -> Regions
add (Region (Refseq r :: Word32
r) b :: Int
b e :: Int
e) (Regions m :: IntMap Subsequence
m) =
    let single :: Maybe Subsequence
single = Subsequence -> Maybe Subsequence
forall a. a -> Maybe a
Just (Subsequence -> Maybe Subsequence)
-> (IntMap Int -> Subsequence) -> IntMap Int -> Maybe Subsequence
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. IntMap Int -> Subsequence
Subsequence (IntMap Int -> Maybe Subsequence)
-> IntMap Int -> Maybe Subsequence
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IntMap Int
forall a. Int -> a -> IntMap a
IM.singleton Int
b Int
e
    in IntMap Subsequence -> Regions
Regions (IntMap Subsequence -> Regions) -> IntMap Subsequence -> Regions
forall a b. (a -> b) -> a -> b
$ (Maybe Subsequence -> Maybe Subsequence)
-> Int -> IntMap Subsequence -> IntMap Subsequence
forall a. (Maybe a -> Maybe a) -> Int -> IntMap a -> IntMap a
IM.alter (Maybe Subsequence
-> (Subsequence -> Maybe Subsequence)
-> Maybe Subsequence
-> Maybe Subsequence
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Maybe Subsequence
single (Subsequence -> Maybe Subsequence
forall a. a -> Maybe a
Just (Subsequence -> Maybe Subsequence)
-> (Subsequence -> Subsequence) -> Subsequence -> Maybe Subsequence
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Int -> Subsequence -> Subsequence
addInt Int
b Int
e)) (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
r) IntMap Subsequence
m


addInt :: Int -> Int -> Subsequence -> Subsequence
addInt :: Int -> Int -> Subsequence -> Subsequence
addInt b :: Int
b e :: Int
e (Subsequence m0 :: IntMap Int
m0) = IntMap Int -> Subsequence
Subsequence (IntMap Int -> Subsequence) -> IntMap Int -> Subsequence
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IntMap Int -> IntMap Int
merge_into Int
b Int
e IntMap Int
m0
  where
    merge_into :: Int -> Int -> IntMap Int -> IntMap Int
merge_into x :: Int
x y :: Int
y m :: IntMap Int
m = case Int -> IntMap Int -> Maybe (Int, Int)
forall a. Int -> IntMap a -> Maybe (Int, a)
IM.lookupLT Int
y IntMap Int
m of
        Just (u :: Int
u,v :: Int
v) | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
u Bool -> Bool -> Bool
&& Int
y Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
v -> Int -> Int -> IntMap Int -> IntMap Int
merge_into Int
x Int
v (IntMap Int -> IntMap Int) -> IntMap Int -> IntMap Int
forall a b. (a -> b) -> a -> b
$ Int -> IntMap Int -> IntMap Int
forall a. Int -> IntMap a -> IntMap a
IM.delete Int
u IntMap Int
m    -- extend to the left
                   | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
u           -> Int -> Int -> IntMap Int -> IntMap Int
merge_into Int
x Int
y (IntMap Int -> IntMap Int) -> IntMap Int -> IntMap Int
forall a b. (a -> b) -> a -> b
$ Int -> IntMap Int -> IntMap Int
forall a. Int -> IntMap a -> IntMap a
IM.delete Int
u IntMap Int
m    -- subsume
                   | Int
y Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
v          -> IntMap Int
m                                 -- subsumed
                   | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
v          -> Int -> Int -> IntMap Int -> IntMap Int
merge_into Int
u Int
y (IntMap Int -> IntMap Int) -> IntMap Int -> IntMap Int
forall a b. (a -> b) -> a -> b
$ Int -> IntMap Int -> IntMap Int
forall a. Int -> IntMap a -> IntMap a
IM.delete Int
u IntMap Int
m    -- extend to the right
        _                            -> Int -> Int -> IntMap Int -> IntMap Int
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert  Int
x Int
y IntMap Int
m                  -- no overlap

overlaps :: Int -> Int -> Subsequence -> Bool
overlaps :: Int -> Int -> Subsequence -> Bool
overlaps b :: Int
b e :: Int
e (Subsequence m :: IntMap Int
m) = case Int -> IntMap Int -> Maybe (Int, Int)
forall a. Int -> IntMap a -> Maybe (Int, a)
IM.lookupLT Int
e IntMap Int
m of
        Just (_,v :: Int
v) -> Int
b Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
v
        Nothing    -> Bool
False