{-|
Module : Data.Interval
Description : Closed intervals of totally ordered types, e.g. time intervals
Copyright : (c) Lackmann Phymetric
License : GPL-3
Maintainer : olaf.klinke@phymetric.de
Stability : experimental
This module provides the two-parameter type class 'Interval' of types
that represent closed intervals (meaning the end-points are included)
possibly with some extra annotation.
This approach is shared by the Data.IntervalMap.Generic.Interval module of the
package.
A particular use case are time intervals annotated with event data.
The simplest example of an interval type @i@ with end points of type @e@
is the type @i = (e,e)@.
The functions exported from this module are mainly concerned with overlap queries,
that is, to identify which intervals in a collection overlap a given interval
and if so, to what extent.
This functionality is encapsuled in the class 'IntersectionQuery'.
If the collection of intervals is known to overlap in end-points only,
one can simply use a sequence ordered by left end-point as the search structure.
For arbitrary collections we provide the 'ITree' structure
(centered interval tree) which stores intervals in subtrees and bins
that are annotated with their convex hull, so that it can be decided
easily whether there is an interval inside which overlaps a given interval.
The behaviour of the functions is undefined for intervals that
violate the implicit assumption that the left end-point is less than or equal to
the right end-point.
The functionality provided is similar to the Interval data type in the
package
but we focus on closed intervals and let the user decide which
concrete data type to use.
Most functions are property-checked for correctness.
Checks were implemented by Henning Thielemann.
-}
{-# LANGUAGE FlexibleInstances,FlexibleContexts,FunctionalDependencies,MultiParamTypeClasses,CPP #-}
{-# LANGUAGE DeriveFoldable, DeriveFunctor, DeriveTraversable #-}
module Data.Interval (
-- * Types and type classes
Interval(..),
IntersectionQuery(..),
Adjust(..),
TimeDifference(..),
NonNestedSeq(..),
-- * Comparing intervals
intersects,properlyIntersects,contains,properlyContains,
covered,coveredBy,
overlap,properOverlap,
-- * Time intervals
overlapTime,
fractionCovered,
prevailing,
intervalDuration,
-- * Operations on intervals
maybeUnion,maybeIntersection,
hull,
hullSeq,
hullSeqNonNested,
without,
contiguous,components,componentsSeq,
sortByRight,
fromEndPoints,
-- * Streaming intervals
splitIntersecting,
splitProperlyIntersecting,
-- * Interval search tree
ITree,
itree,
emptyITree,
insert,
hullOfTree,
-- ** Debug
invariant, toTree,
-- * Testing
intersecting,intersectingProperly,
filterM,
joinSeq,
propSplit,
splitSeq
) where
import Data.Tree (Tree)
import qualified Data.Tree as Tree
import qualified Data.Sequence as Seq
import qualified Data.Monoid ((<>))
import Data.Filtrable (Filtrable(..))
import Data.Traversable (Traversable)
import Data.Foldable (toList, maximumBy, foldl', foldr')
import Data.Sequence (Seq, ViewL(EmptyL,(:<)), ViewR(EmptyR,(:>)), (><), (<|))
import Data.Function (on)
import Data.Functor.Identity (Identity(Identity, runIdentity))
import Data.Time (UTCTime, addUTCTime, diffUTCTime, utc, NominalDiffTime)
#if MIN_VERSION_time(1,9,0)
import Data.Time (LocalTime, utcToLocalTime, zonedTimeToLocalTime, diffLocalTime, addLocalTime)
#else
import Data.Time (LocalTime, utcToLocalTime, zonedTimeToLocalTime)
#endif
import Data.Time (ZonedTime, localTimeToUTC, zonedTimeToUTC)
import Control.Arrow ((***))
import Control.Applicative (Alternative, empty, (<|>))
-- $setup
-- >>> import Data.IntervalTest
-- >>> import qualified Data.Sequence as Seq
-- >>> import qualified Data.List as List
-- >>> import Data.Function (on)
-- >>> import Data.Maybe (isJust, fromJust, catMaybes)
-- >>> import Data.Foldable (toList)
-- >>> import Test.QuickCheck ((==>))
-- >>> without' :: (Int,Int) -> (Int,Int) -> [(Int,Int)]; without' = without
-- | class of intervals with end points in a totally ordered type
class (Ord e) => Interval e i | i -> e where
lb :: i -> e -- ^ lower bound
lb = fst.endPoints
ub :: i -> e -- ^ upper bound
ub = snd.endPoints
endPoints :: i -> (e,e) -- ^ end points (inclusive)
endPoints i = (lb i,ub i)
{-# MINIMAL (lb,ub) | endPoints #-}
instance (Ord e) => Interval e (e,e) where
endPoints = id
instance (Ord e) => Interval e (Identity e) where
lb = runIdentity
ub = runIdentity
-- | class of search structures for interval intersection queries,
-- returning a 'Foldable' of intervals.
class Foldable f => IntersectionQuery t e f | t -> f where
getIntersects :: (Interval e i, Interval e j) => i -> t j -> f j
-- ^ all intervalls touching the first one
getProperIntersects :: (Interval e i, Interval e j) => i -> t j -> f j
-- ^ all intervals properly intersecting the first one
someIntersects :: (Interval e i, Interval e j) => i -> t j -> Bool
-- ^ does any interval touch the first one?
someProperlyIntersects :: (Interval e i, Interval e j) => i -> t j -> Bool
-- ^ does any interval properly intersect the first one?
maybeBounds :: Interval e i => t i -> Maybe (e,e)
-- ^ the convex hull of the contents
storedIntervals :: Interval e i => t i -> f i
-- ^ dump the entire search structure's content
instance Ord e => IntersectionQuery (ITree e) e Seq where
getIntersects = getIntersectsIT
getProperIntersects = getProperIntersectsIT
someIntersects = someIntersectsIT
someProperlyIntersects = someProperlyIntersectsIT
maybeBounds = hullOfTree
storedIntervals = iTreeContents
instance Ord e => IntersectionQuery NonNestedSeq e Seq where
getIntersects = (.getSeq) . findSeq intersects
getProperIntersects = (.getSeq) . findSeq properlyIntersects
someIntersects = (.getSeq) . existsSeq intersects
someProperlyIntersects = (.getSeq) . existsSeq properlyIntersects
maybeBounds = hullSeqNonNested . getSeq
storedIntervals = getSeq
-- | 'Seq'uences support 'IntersectionQuery' efficiently only in the case
-- when the sequence has the property that for
-- any split @xs = ys <> zs@ into non-empty parts the convex hull
-- of each part is the 'lb' and 'ub' of the leftmost and rightmost element,
-- respectively.
-- This property is guaranteed by 'fromEndPoints'
-- but does not hold in the case where the sequence contains
-- nested intervals:
--
-- >>> propSplit (\xs -> hullSeqNonNested xs == hullSeq xs) . splitSeq . sortByRight $ Seq.fromList ([(1,3),(2,4),(4,5),(3,6)] :: [(Int,Int)])
-- False
--
-- Thus, when querying against a set of intervals with nesting, you must use an 'ITree' instead.
--
-- prop> forevery genNonNestedIntervalSeq $ \xs -> propSplit (\subseq -> hullSeqNonNested subseq == hullSeq subseq) (splitSeq xs)
newtype NonNestedSeq a = FromSortedSeq {getSeq :: Seq a} deriving (Eq,Ord,Show,Functor,Foldable,Traversable)
instance Semigroup (NonNestedSeq a) where
(FromSortedSeq xs) <> (FromSortedSeq ys) = FromSortedSeq (xs <> ys)
instance Monoid (NonNestedSeq a) where
mempty = FromSortedSeq mempty
mappend = (<>)
instance Applicative NonNestedSeq where
pure = FromSortedSeq . pure
(FromSortedSeq fs) <*> (FromSortedSeq xs) = FromSortedSeq (fs <*> xs)
-- | Beware that using @<*>@ may destroy non-nestedness.
instance Alternative NonNestedSeq where
empty = mempty
(<|>) = (<>)
-- | Beware that using @>>=@ may destroy non-nestedness.
instance Monad NonNestedSeq where
return = pure
(FromSortedSeq xs) >>= k = FromSortedSeq (xs >>= (getSeq.k))
instance Filtrable NonNestedSeq where
mapMaybe f (FromSortedSeq xs) = FromSortedSeq (mapMaybe f xs)
-- | Time types supporting differences
class TimeDifference t where
diffTime :: t -> t -> NominalDiffTime
addTime :: NominalDiffTime -> t -> t
instance TimeDifference UTCTime where
diffTime = diffUTCTime
addTime = addUTCTime
#if MIN_VERSION_time(1,9,0)
instance TimeDifference LocalTime where
diffTime = diffLocalTime
addTime = addLocalTime
#else
instance TimeDifference LocalTime where
diffTime x y = diffUTCTime (localTimeToUTC utc x) (localTimeToUTC utc y)
addTime x = utcToLocalTime utc . addUTCTime x . localTimeToUTC utc
#endif
-- | 'addTime' preserves the 'TimeZone'
instance TimeDifference ZonedTime where
diffTime x y = diffUTCTime (zonedTimeToUTC x) (zonedTimeToUTC y)
addTime x z = z {zonedTimeToLocalTime = addTime x (zonedTimeToLocalTime z)}
-- | Convenience function, the 'diffTime' between the 'endPoints'.
intervalDuration :: (TimeDifference t, Interval t i) => i -> NominalDiffTime
intervalDuration i = diffTime (ub i) (lb i)
-- | Find out the overlap of two time intervals.
--
-- prop> forevery genInterval $ \i -> overlapTime i i == intervalDuration i
-- prop> foreveryPair genInterval $ \i j -> not (i `properlyIntersects` j) ==> overlapTime i j == 0
-- prop> foreveryPair genInterval $ \i j -> overlapTime i j == (sum $ fmap intervalDuration $ maybeIntersection i j)
overlapTime :: (TimeDifference t, Interval t i, Interval t j) =>
i -> j -> NominalDiffTime
overlapTime i j = let
x = max (lb i) (lb j)
y = min (ub i) (ub j)
in if x < y then diffTime y x else 0
-- | Prevailing annotation in the first time interval
--
-- prop> forevery genInterval $ \i c -> prevailing i (Seq.singleton (c,i)) == Just (c::Char)
-- prop> foreveryPairOf genInterval genLabeledSeq $ \i js -> isJust (prevailing i js) == any (intersects i . snd) js
-- prop> forevery genInterval $ \i -> foreveryPair genLabeledSeq $ \js ks -> all (flip elem $ catMaybes [prevailing i js, prevailing i ks]) $ prevailing i (js<>ks)
prevailing :: (Interval t i, Interval t j, TimeDifference t) =>
i -> Seq (a,j) -> Maybe a
prevailing i js =
let ks = Seq.filter (intersects i . snd) js
in if Seq.null ks
then Nothing
else Just $ fst $ maximumBy (compare `on` (overlapTime i . snd)) ks
-- ExtPkg: non-empty - partial maximumBy -> NonEmpty.maximumBy
-- | class of Intervals whose bounds can be adjusted
class Interval e i => Adjust e i | i -> e where
adjustBounds :: (e -> e) -> (e -> e) -> i -> i -- ^ adjust lower and upper bound
shift :: (e -> e) -> i -> i -- ^ change both bounds using the same function
shift f = adjustBounds f f
{-# MINIMAL (adjustBounds) #-}
instance Ord e => Adjust e (e,e) where
adjustBounds f g (x,y) = (f x,g y)
-- | the union of two intervals is an interval if they intersect.
--
-- prop> foreveryPair genInterval $ \i j -> isJust (maybeUnion i j) ==> fromJust (maybeUnion i j) `contains` i && fromJust (maybeUnion i j) `contains` j
-- prop> foreveryPair genInterval $ \i j -> i `intersects` j ==> (maybeUnion i j >>= maybeIntersection i) == Just i
maybeUnion :: (Interval e j, Interval e i, Adjust e i) => j -> i -> Maybe i
maybeUnion j i = if j `intersects` i
then Just (adjustBounds (min (lb j)) (max (ub j)) i)
else Nothing
-- | the intersection of two intervals is either empty or an interval.
--
-- prop> foreveryPair genInterval $ \i j -> i `intersects` j ==> i `contains` fromJust (maybeIntersection i j)
maybeIntersection :: (Interval e j, Interval e i, Adjust e i) => j -> i -> Maybe i
maybeIntersection j i = if j `intersects` i
then Just (adjustBounds (max (lb j)) (min (ub j)) i)
else Nothing
-- | /O(n)/ convex hull
--
-- prop> \xs -> isJust (hull xs) ==> all (\x -> fromJust (hull xs) `contains` x) (xs :: [(Int,Int)])
hull :: (Interval e i,Foldable f,Functor f) => f i -> Maybe (e,e)
hull xs = if null xs
then Nothing
else Just (minimum (fmap lb xs), maximum (fmap ub xs))
-- | Set difference. The resulting list has zero, one or two elements.
--
-- >>> without' (1,5) (4,5)
-- [(1,4)]
-- >>> without' (1,5) (2,3)
-- [(1,2),(3,5)]
-- >>> without' (1,5) (1,5)
-- []
-- >>> without' (1,5) (0,1)
-- [(1,5)]
--
-- prop> foreveryPair genInterval $ \i j -> length (i `without` j) <= 2
-- prop> forevery genInterval $ \i -> i `without` i == []
-- prop> foreveryPair genInterval $ \i j -> all (contains i) (i `without` j)
-- prop> foreveryPair genInterval $ \i j -> not $ any (properlyIntersects j) (i `without` j)
without :: (Adjust e i,Interval e j) => i -> j -> [i]
without i j = if j `contains` i then [] else
if ub j <= lb i || lb j >= ub i
then [i] -- intervals don't overlap
else if i `properlyContains` j
then [adjustBounds id (const (lb j)) i,adjustBounds (const (ub j)) id i] -- slashed in half
else if lb j <= lb i
then [adjustBounds (const (ub j)) id i] -- j overhangs on the left
else [adjustBounds id (const (lb j)) i] -- j overhangs on the right
-- | 'intersects' is not an equivalence relation, because it is not transitive.
-- Hence 'groupBy' 'intersects' does not do what one might expect.
-- This function does the expected and groups overlapping intervals
-- into contiguous blocks.
--
-- prop> forevery genSortedIntervals $ all (\xs -> and $ List.zipWith intersects xs (tail xs)) . contiguous
-- prop> forevery genSortedIntervals $ all ((1==).length.components) . contiguous
contiguous :: Interval e i => [i] -> [[i]]
contiguous [] = []
contiguous (i:is) = (i:js) : contiguous ks where
(js,ks) = go (endPoints i) is
go :: Interval e i => (e,e) -> [i] -> ([i],[i])
go j@(x,_y) ls@(l:ls') = if j `intersects` l
then let (foo,bar) = go (x,ub l) ls' in (l:foo,bar)
else ([],ls)
go _ [] = ([],[])
-- | Connected components of a list sorted by 'sortByRight',
-- akin to 'groupBy' 'intersects'.
-- The precondition is not checked.
--
-- prop> forevery genSortedIntervals $ \xs -> all (\i -> any (flip contains i) (components xs)) xs
-- prop> forevery genSortedIntervals $ \xs -> let cs = components xs in all (\(i,j) -> i == j || not (i `intersects` j)) [(c1,c2) | c1 <- cs, c2 <- cs]
components :: (Interval e i, Adjust e i) => [i] -> [i]
components [] = []
-- right-to-left union
components (x:xs) = let cs = components xs in case cs of
[] -> [x]
(c:cs') -> case maybeUnion x c of
Nothing -> x:cs
Just c' -> c':cs'
{-- left-to-right union
components (i:is) = c i is where
c x [] = [x]
c x (y:ys) = case maybeUnion x y of
Nothing -> x : c y ys
Just z -> c z ys
--}
-- | same as 'components'. Is there a way to unify both?
--
-- prop> forevery genSortedIntervals $ \xs -> componentsSeq (Seq.fromList xs) == Seq.fromList (components xs)
-- prop> forevery genSortedIntervalSeq $ \xs -> let cs = componentsSeq xs in all (\(i,j) -> i == j || not (i `intersects` j)) $ do {c1 <- cs; c2 <- cs; return (c1,c2)}
componentsSeq :: (Interval e i, Adjust e i) => Seq i -> Seq i
componentsSeq ys = case Seq.viewr ys of
EmptyR -> empty
xs :> x -> c xs x where
c bs a = case Seq.viewr bs of
EmptyR -> Seq.singleton a
bs' :> b -> case maybeUnion b a of
Nothing -> c bs' b Seq.|> a
Just ab -> c bs' ab
-- | compute the components of the part of @i@ covered by the intervals.
--
-- prop> foreveryPairOf genInterval genIntervalSeq $ \i js -> all (contains i) (covered i js)
-- prop> foreveryPairOf genInterval genIntervalSeq $ \i js -> covered i (covered i js) == covered i js
covered :: (Interval e i,Interval e j,Adjust e j) => i -> Seq j -> Seq j
covered i = componentsSeq . sortByRight . mapMaybe (maybeIntersection i)
-- | 'True' if the first interval is completely covered by the given intervals
--
-- prop> foreveryPair genInterval $ \i j -> j `contains` i == i `coveredBy` [j]
-- prop> foreveryPairOf genInterval genSortedIntervals $ \i js -> i `coveredBy` js ==> any (flip contains i) (components js)
coveredBy :: (Interval e i, Interval e j, Foldable f) => i -> f j -> Bool
i `coveredBy` js = null $ foldl (\remains j -> flip without j =<< remains) [endPoints i] js
-- | percentage of coverage of the first interval by the second sequence of intervals
--
-- prop> foreveryPairOf genNonEmptyInterval genIntervalSeq $ \i js -> i `coveredBy` js == (fractionCovered i js >= (1::Rational))
-- prop> foreveryPairOf genNonEmptyInterval genNonEmptyIntervalSeq $ \i js -> any (properlyIntersects i) js == (fractionCovered i js > (0::Rational))
fractionCovered :: (TimeDifference t, Interval t i, Interval t j, Fractional a) =>
j -> Seq i -> a
fractionCovered i xs = let
totalTime = intervalDuration i
coveredTime = foldl' (\s j -> s + intervalDuration j) 0 $ covered i $ fmap endPoints xs
-- ^ sum of the lengths of the interections with i
in if totalTime==0 then 1 else (fromRational.toRational) (coveredTime/totalTime) -- (fromInteger (round (coveredTime*100/totalTime)))/100
-- | Overlap ordering. Returns 'LT' or 'GT' if the intervals are disjoint,
-- 'EQ' if the intervals overlap.
-- Note that this violates the following property:
--
-- @
-- 'overlap' x y == 'EQ' && 'overlap' y z == 'EQ' => 'overlap' x z == 'EQ'
-- @
--
-- i.e., 'overlap' is not transitive.
--
-- prop> foreveryPair genInterval $ \i j -> i `intersects` j == (overlap i j == EQ)
overlap :: (Interval e i, Interval e j) => i -> j -> Ordering
overlap i j = case (compare (ub i) (lb j),compare (ub j) (lb i)) of
(LT,_) -> LT
(_,LT) -> GT
_ -> EQ
-- | Overlap ordering. Returns 'LT' or 'GT' if the intervals
-- are disjoint or touch in end point(s) only,
-- 'EQ' if the intervals properly overlap.
-- Note that this violates the following property:
--
-- @
-- 'properOverlap' x y == 'EQ' && 'properOverlap' y z == 'EQ' => 'properOverlap' x z == 'EQ'
-- @
--
-- i.e., 'properOverlap' is not transitive.
--
-- prop> foreveryPair genInterval $ \i j -> i `properlyIntersects` j == (properOverlap i j == EQ)
properOverlap :: (Interval e i, Interval e j) => i -> j -> Ordering
properOverlap i j = case ((ub i) <= (lb j),(ub j) <= (lb i)) of
(True,_) -> LT
(_,True) -> GT
_ -> EQ
-- | intersection query.
--
-- >>> ((1,2)::(Int,Int)) `intersects` ((2,3)::(Int,Int))
-- True
--
-- prop> foreveryPair genInterval $ \i j -> (lb i <= ub i && lb j <= ub j && i `intersects` j) == (max (lb i) (lb j) <= min (ub i) (ub j))
intersects :: (Interval e i,Interval e j) => i -> j -> Bool
i `intersects` j = not (ub i < lb j || ub j < lb i)
-- The definition of 'intersects' yields the following algorithm
-- for intersection queries.
-- Given the query interval i, sort the list of possible intersecting intervals
-- by 'ub' and consider the suffix of intervals j with lb i <= ub j.
-- Sort that suffix by 'lb' and take the prefix with lb j <= ub i.
-- | proper intersection.
--
-- prop> foreveryPair genInterval $ \i j -> ((i `intersects` j) && not (i `properlyIntersects` j)) == (ub i == lb j || ub j == lb i)
properlyIntersects :: (Interval e i,Interval e j) => i -> j -> Bool
i `properlyIntersects` j = not (ub i <= lb j || ub j <= lb i)
-- | subset containment
--
-- prop> forevery genInterval $ \i -> i `contains` i
-- prop> foreveryPair genInterval $ \i j -> (i `contains` j && j `contains` i) == (i==j)
-- prop> foreveryPair genInterval $ \i j -> i `contains` j == (maybeUnion i j == Just i)
contains :: (Interval e i,Interval e j) => i -> j -> Bool
i `contains` j = lb i <= lb j && ub j <= ub i
-- | proper subset containment
properlyContains :: (Interval e i,Interval e j) => i -> j -> Bool
i `properlyContains` j = lb i < lb j && ub i > ub j
-- | construct a sorted 'contiguous' sequence of intervals
-- from a sorted sequence of bounds.
-- Fails if the input sequence is not sorted.
--
-- prop> forevery genSortedList $ \xs -> (components $ toList $ fromEndPoints xs) == if length xs < 2 then [] else [(head xs, last xs)]
-- prop> forevery genSortedList $ \xs -> hullSeqNonNested (fromEndPoints xs) == if length xs < 2 then Nothing else Just (head xs,last xs)
fromEndPoints :: (Ord e) => [e] -> Seq (e,e)
fromEndPoints [] = empty
fromEndPoints [_] = empty
fromEndPoints [x,y] = if x <= y then Seq.singleton (x,y) else error "unsorted list"
fromEndPoints (x:xs) = let s = fromEndPoints xs in case Seq.viewl s of
(y,_) :< _ -> (x,y) <| s
EmptyL -> error "Intervals.fromEndPoints: this should never happen"
-- | lexicographical sort by 'ub', then inverse 'lb'.
-- In the resulting list, the intervals intersecting
-- a given interval form a contiguous sublist.
--
-- prop> foreveryPairOf genInterval genSortedIntervalSeq $ \i js -> toList (getIntersects i (FromSortedSeq js)) `List.isSubsequenceOf` toList js
-- prop> forevery genSortedIntervalSeq $ \xs -> propSplit (\subseq -> subseq == sortByRight subseq) (splitSeq xs)
sortByRight :: (Interval e i) => Seq i -> Seq i
sortByRight = Seq.sortBy (\i j -> compare (ub i) (ub j) <> compare (lb j) (lb i))
-- | /O(n)/ Extract all intervals intersecting a given one.
intersecting :: (Interval e i,Interval e j) => j -> Seq i -> Seq i
intersecting j = Seq.filter (intersects j)
-- | /O(n)/ Extract all intervals properly intersecting a given one.
intersectingProperly :: (Interval e i,Interval e j) => j -> Seq i -> Seq i
intersectingProperly j = Seq.filter (properlyIntersects j)
--intersectingProperly j = (takeWhileL (properlyIntersects j)).(dropWhileL (not.(properlyIntersects j)))
-- | /O(n)/ convex hull of a sorted ('sortByRight') sequence of intervals.
-- the upper bound is guaranteed to be in the rightmost interval,
-- but we have no guarantee of the lower bound.
--
-- prop> forevery genSortedIntervalSeq $ \xs -> hullSeq xs == if Seq.null xs then Nothing else Just (minimum (fmap lb xs),maximum (fmap ub xs))
-- prop> forevery genSortedIntervalSeq $ \xs -> hullSeq xs == hull (toList xs)
hullSeq :: Interval e i => Seq i -> Maybe (e,e)
hullSeq xs = case Seq.viewr xs of
EmptyR -> Nothing
_others :> rightmost -> Just (minimum (fmap lb xs),ub rightmost)
-- | When you face the problem of matching two series of intervals against each other,
-- a streaming approach might be more efficient than transforming
-- one of the streams into a search structure.
-- This function drops intervals from the list until
-- the (contiguous) block of intersecting intervals
-- is found. This block (except intervals containing the 'ub' of the query)
-- is removed from the stream.
-- When used as a state transformer on a stream @[i]@ of non-properly overlapping intervals,
-- then one obtains the stream of blocks intersecting the stream of queries.
--
-- >>> splitIntersecting ((2,5) :: (Int,Int)) ([(0,1),(2,2),(2,3),(3,6),(6,7)] :: [(Int,Int)])
-- ([(2,2),(2,3),(3,6)],[(3,6),(6,7)])
--
-- prop> foreveryPairOf genInterval genNonNestedIntervalSeq $ \i js' -> let js = toList js' in fst (splitIntersecting i js) == filter (intersects i) js
-- prop> foreveryPairOf genInterval genNonNestedIntervalSeq $ \i js' -> let js = toList js' in all (\j -> not (ub j < ub i)) (snd (splitIntersecting i js))
splitIntersecting :: (Interval e i, Interval e j) => i -> [j] -> ([j],[j])
splitIntersecting _ [] = ([],[])
splitIntersecting i js@(j:js') = case i `overlap` j of
GT -> splitIntersecting i js'
LT -> ([],js)
EQ -> let
keep = ub j >= ub i
(block,notIntersecting) = splitIntersecting i js'
in (j:block,if keep then j:notIntersecting else notIntersecting)
-- | Like 'splitIntersecting' but disregards those intervals
-- that merely touch the query.
-- Retains overlapping intervals properly containing the 'ub' of the query.
-- When used as a state transformer on an ascending stream @[i]@ of non-properly overlapping intervals,
-- then one obtains the stream of blocks properly intersecting the stream of queries.
--
-- >>> splitProperlyIntersecting ((2,5) :: (Int,Int)) ([(0,1),(2,3),(2,2),(3,5),(5,6),(6,7)] :: [(Int,Int)])
-- ([(2,3),(3,5)],[(5,6),(6,7)])
--
-- prop> foreveryPairOf genInterval genNonNestedIntervalSeq $ \i js' -> let js = toList js' in fst (splitProperlyIntersecting i js) == filter (properlyIntersects i) js
-- prop> foreveryPairOf genInterval genNonNestedIntervalSeq $ \i js' -> let js = toList js' in all (not.properlyContains i) (snd (splitProperlyIntersecting i js))
splitProperlyIntersecting :: (Interval e i, Interval e j) => i -> [j] -> ([j],[j])
splitProperlyIntersecting _ [] = ([],[])
splitProperlyIntersecting i js@(j:js') = case i `properOverlap` j of
GT -> splitProperlyIntersecting i js'
LT -> ([],js)
EQ -> let
keep = ub j > ub i
(block,notIntersecting) = splitProperlyIntersecting i js'
in (j:block,if keep then j:notIntersecting else notIntersecting)
-- | Search tree of intervals.
data ITree e i = Bin (Seq i) | Split (Seq i) e e e (ITree e i) (ITree e i)
-- Internal nodes store the convex hull of its subtrees.
-- Each bin contains a sorted sequence of intervals.
-- In the node @Split top x y z left right@
-- the convex hull of @left@ is @(x,y)@,
-- the convex hull of @right@ is @(y,z)@
-- and the intervals in @top@ are those straddling the split point @y@.
instance Functor (ITree e) where
fmap f (Bin xs) = Bin (fmap f xs)
fmap f (Split up x y z left right) = Split (fmap f up) x y z (fmap f left) (fmap f right)
instance Foldable (ITree e) where
foldMap f (Bin xs) = foldMap f xs
foldMap f (Split up _ _ _ left right) = foldMap f left <> foldMap f up <> foldMap f right
-- | the empty 'ITree'
emptyITree :: ITree e i
emptyITree = Bin empty
-- | smallest interval covering the entire tree. 'Nothing' if the tree is empty.
--
-- prop> forevery genSortedIntervalSeq $ \xs -> hullSeq xs == hullOfTree (itree 4 xs)
hullOfTree :: (Interval e i) => ITree e i -> Maybe (e,e)
hullOfTree (Bin xs) = if Seq.null xs then Nothing else Just (minimum (fmap lb xs),maximum (fmap ub xs))
hullOfTree (Split _ x _ y _ _) = Just (x,y)
iTreeContents :: ITree e i -> Seq i
iTreeContents (Bin xs) = xs
iTreeContents (Split cross _ _ _ left right) = (iTreeContents left) <> cross <> (iTreeContents right)
-- | invariant to be maintained for proper intersection queries
--
-- prop> forevery genIntervalSeq $ \xs -> invariant . itree 4 $ xs
invariant :: Interval e i => ITree e i -> Bool
invariant (Bin _) = True
invariant (Split up x y z left right) = x <= y && y <= z && invUp && invLeft && invRight where
invUp = all (intersects (Identity y)) up && all (contains (x,z)) up
invLeft = all (contains (x,y)) left && invariant left
invRight = all (contains (y,z)) right && invariant right
-- | Intersection query. O(binsize+log(n/binsize)).
--
-- prop> foreveryPairOf genInterval genIntervalSeq $ \i t -> on (==) sortByRight (getIntersects i $ itree 2 t) (i `intersecting` t)
getIntersectsIT :: (Interval e i, Interval e j) => i -> ITree e j -> Seq j
getIntersectsIT i (Bin bin) = i `intersecting` bin
getIntersectsIT i (Split up x y z left right) = let
m = i `intersecting` up
l = if i `intersects` (x,y) then getIntersectsIT i left else empty
r = if i `intersects` (y,z) then getIntersectsIT i right else empty
in if i `intersects` (x,z) then m >< l >< r else empty
-- | Intersection query. O(binsize+log(n/binsize)).
--
-- prop> foreveryPairOf genInterval genIntervalSeq $ \i t -> on (==) sortByRight (getProperIntersects i $ itree 2 t) (i `intersectingProperly` t)
getProperIntersectsIT :: (Interval e i, Interval e j) => i -> ITree e j -> Seq j
getProperIntersectsIT i (Bin bin) = i `intersectingProperly` bin
getProperIntersectsIT i (Split up x y z left right) = let
m = i `intersectingProperly` up
l = if i `properlyIntersects` (x,y) then getProperIntersectsIT i left else empty
r = if i `properlyIntersects` (y,z) then getProperIntersectsIT i right else empty
in if i `properlyIntersects` (x,z) then m >< l >< r else empty
-- | When the actual result of 'getIntersectsIT' is not important,
-- only whether there are intersections.
someIntersectsIT :: (Interval e i, Interval e j) => i -> ITree e j -> Bool
someIntersectsIT i = not . null . getIntersectsIT i
-- | When the actual result of 'getIntersectsIT' is not important,
-- only whether there are intersections.
someProperlyIntersectsIT :: (Interval e i, Interval e j) => i -> ITree e j -> Bool
someProperlyIntersectsIT i = not . null . getProperIntersectsIT i
{-- | retrieve the left-most interval from the tree, or 'Nothing' if it is empty.
leftmostInterval :: (Interval e i) => ITree e i -> Maybe i
leftmostInterval (Bin bin) = case Seq.viewl bin of
EmptyL -> Nothing
i :< _ -> Just i
leftmostInterval (Split up _ _ _ left right) = let
headl xs = case Seq.viewl xs of
EmptyL -> Nothing
i :< _ -> Just i
in (headl . sortByRight . Seq.fromList . catMaybes) [leftmostInterval left,headl up,leftmostInterval right]
--}
-- | transform the interval tree into the tree of hulls
toTree :: Interval e i => ITree e i -> Tree (e,e)
toTree (Bin _) = error "Interval.toTree: just a bin"
toTree (Split _ x y z left right) = Tree.Node {Tree.rootLabel = (x,z), Tree.subForest = [l,r]} where
l = case left of
(Bin _) -> Tree.Node {Tree.rootLabel = (x,y), Tree.subForest = []}
_ -> toTree left
r = case right of
(Bin _) -> Tree.Node {Tree.rootLabel = (y,z), Tree.subForest = []}
_ -> toTree right
-- The only invariant required of a Block is that
-- its leftmost interval has the same 'lb' as the
-- convex hull of the entire Block
newtype Block e i = Block (Seq i)
-- ExtPkg: non-empty allows NonEmpty Seq - makes blockstart total
blockstart :: Interval e i => Block e i -> e
blockstart (Block xs) = case Seq.viewl xs of
EmptyL -> error "blockstart: empty Block"
x :< _ -> lb x
blocknull :: Block e i -> Bool
blocknull (Block xs) = null xs
instance Interval e i => Eq (Block e i) where
(==) = (==) `on` blockstart
instance Interval e i => Ord (Block e i) where
compare = compare `on` (blockstart)
instance Functor (Block e) where
fmap f (Block xs) = Block (fmap f xs)
instance Foldable (Block e) where
foldMap f (Block xs) = foldMap f xs
instance Semigroup (Block e i) where
(Block xs) <> (Block ys) = Block (xs >< ys)
instance Monoid (Block e i) where
mempty = Block empty
mappend = (<>)
instance Interval e i => Interval e (Block e i) where
lb = blockstart
ub = maximum . fmap ub
instance Show i => Show (Block e i) where
show (Block xs) = "Block "++(show (toList xs))
-- | generalises Control.Monad.filterM
filterM :: (Applicative f, Traversable t, Alternative m) => (a -> f Bool) -> t a -> f (m a)
filterM f = (fmap (foldr (<|>) empty)) . traverse (\a -> fmap (\b -> if b then pure a else empty) (f a))
crossesAny :: (Interval e i, Foldable f) => i -> f (Block e i) -> Bool
crossesAny i = any (((ub i) >).blockstart)
removeCrossers :: Interval e i => Block e i -> Seq (Block e i) -> (Seq i,Block e i)
removeCrossers (Block xs) blocks = let (crossers,xs') = filterM f xs in (crossers,Block xs') where
f i = if i `crossesAny` blocks
then (Seq.singleton i,False)
else return True
-- fold over the list of blocks and gather all intervals
-- overlapping block boundaries. Remove blocks that are rendered empty by this.
gatherCrossers :: Interval e i => Seq (Block e i) -> (Seq i,Seq (Block e i))
gatherCrossers blks = case Seq.viewl blks of
(block :< blocks) -> let
(crossers,blocks') = gatherCrossers blocks
(crossers',block') = removeCrossers block blocks'
cons = if blocknull block' then id else ((<|) block')
in (crossers' >< crossers,cons blocks')
EmptyL -> (empty,empty)
{-- after applying gatherCrossers to a sorted list of sorted blocks,
-- all intervals within the blocks are contained in the interval
-- from the blockstart to the blockstart of the next block.
-- Hence we can use these blocks to build an interval tree,
-- where the crossers go into certain 'up' components.
--
-- prop> forevery genIntervalSeq $ invariantCrossers . snd . gatherCrossers . blocksOf 4 . blocksort
invariantCrossers :: Interval e i => Seq (Block e i) -> Bool
invariantCrossers blocks = all inv (Seq.zip blocks (Seq.drop 1 blocks)) where
inv :: Interval e j => (Block e j,Block e j) -> Bool
inv (this,next) = let h = (blockstart this,blockstart next) in all (contains h) this
-- We checked this property individually but it is subsumed by 'invariant'.
--}
blocksOf :: Int -> Seq i -> Seq (Block e i)
blocksOf n = fmap Block . Seq.chunksOf n
-- | The result of 'splitSeq', either the empty sequence,
-- a singleton
-- or two subsequences of roughly the same size
data SplitSeq a = EmptySeq | SingletonSeq a | TwoSeqs (Seq a) (Seq a) deriving (Show)
-- | re-assemble a split into a sequence
joinSeq :: SplitSeq a -> Seq a
joinSeq EmptySeq = empty
joinSeq (SingletonSeq a) = pure a
joinSeq (TwoSeqs xs ys) = xs <> ys
-- | test if a sequence property holds for each sequence in the split.
propSplit :: (Seq a -> Bool) -> SplitSeq a -> Bool
propSplit p (TwoSeqs xs ys) = p xs && p ys
propSplit p s = p (joinSeq s)
-- | Split a Sequence in half, needed for the 'IntersectionQuery' instance.
-- prop> forevery genIntervalSeq $ \is -> joinSeq (splitSeq is) == is
splitSeq :: Seq a -> SplitSeq a
splitSeq xs = let (l,r) = Seq.splitAt (length xs `div` 2) xs in case (null l,null r) of
(_,True) -> EmptySeq
(True,False) -> let (x :< _) = Seq.viewl r in SingletonSeq x
(False,False) -> TwoSeqs l r
-- build a tree from a sequence of mutually non-overlapping blocks
buildFromSeq :: Interval e i => Seq (Block e i) -> ITree e i
buildFromSeq blocks = case splitSeq blocks of
EmptySeq -> emptyITree
SingletonSeq (Block bin) -> Bin bin
TwoSeqs lblocks rblocks -> let
y = let b :< _ = Seq.viewl rblocks in blockstart b
left = buildFromSeq lblocks
right = buildFromSeq rblocks
x = maybe y fst (hullOfTree left)
z = maybe y snd (hullOfTree right)
in Split empty x y z left right
-- | insert the interval at the deepest possible location into the tree.
-- Does not change the overall structure, in particular no re-balancing is performed.
insert :: Interval e i => i -> ITree e i -> ITree e i
insert i (Bin xs) = Bin (i <| xs)
insert i (Split up x y z left right) = if ub i <= y
then let
left' = (insert i left)
x' = maybe x (min x.fst) (hullOfTree left')
in Split up x' y z left' right
else if lb i < y
then Split (i <| up) (min x (lb i)) y (max z (ub i)) left right
else let
right' = insert i right
z' = maybe z (max z.snd) (hullOfTree right')
in Split up x y z' left right'
-- | Construct an interval tree with bins of maximal given size.
-- The function first sorts the intervals,
-- then splits into chunks of given size.
-- The leftmost endpoints of the chunks define boundary points.
-- Next, all intervals properly overlapping a boundary are removed
-- from the chunks and kept separately.
-- The chunks are arranged as the leaves of a binary search tree.
-- Then the intervals overlapping boundaries are placed
-- at internal nodes of the tree.
-- Hence if all intervals are mutually non-overlapping properly,
-- the resulting tree is a pure binary search tree with bins of
-- given size as leaves.
itree :: Interval e i => Int -> Seq i -> ITree e i
itree n = uncurry ($).(f *** buildFromSeq).gatherCrossers.blocksOf n.blocksort where
f = flip (foldr' insert)
-- We must sort so that 'blockstart'
-- yields the 'lb' of the convex hull of the block.
-- The rest is not important.
blocksort :: Interval e i => Seq i -> Seq i
blocksort = Seq.unstableSortBy (compare `on` lb)
-- * Non-overlapping intervals
-- | /O(1)/ bounds of an ordered, non-nested sequence of intervals. 'Nothing', if empty.
--
-- prop> forevery genNonNestedIntervalSeq $ \xs -> hullSeqNonNested xs == hullSeq xs
hullSeqNonNested :: Interval e i => Seq i -> Maybe (e,e)
hullSeqNonNested xs = case Seq.viewl xs of
EmptyL -> Nothing
leftmost :< others -> Just (lb leftmost, case Seq.viewr others of
_ :> rightmost -> ub rightmost
EmptyR -> ub leftmost)
-- | Query an ordered 'Seq'uence of non-nested intervals
-- for a predicate @p@ that has the property
--
-- @
-- j `contains` k && p i k ==> p i j
-- @
--
-- and return all elements satisfying the predicate.
--
-- prop> foreveryPairOf genInterval genNonNestedIntervalSeq $ \i js -> getIntersects i (FromSortedSeq js) == intersecting i js
findSeq :: (Interval e i, Interval e j) => (i -> (e,e) -> Bool) -> i -> Seq j -> Seq j
findSeq p i js = case hullSeqNonNested js of
Nothing -> empty
Just h -> if p i h
then case splitSeq js of
SingletonSeq _j -> js
TwoSeqs l r -> findSeq p i l >< findSeq p i r
EmptySeq -> empty -- should never happen
else empty
-- | Query an ordered 'Seq'uence of non-nested intervals
-- for a predicate @p@ that has the property
--
-- @
-- j `contains` k && p i k ==> p i j
-- @
existsSeq :: (Interval e i, Interval e j) => (i -> (e,e) -> Bool) -> i -> Seq j -> Bool
existsSeq p i js = case hullSeqNonNested js of
Nothing -> False
Just h -> if p i h
then case splitSeq js of
SingletonSeq _j -> True
TwoSeqs l r -> existsSeq p i l || existsSeq p i r
EmptySeq -> False -- should never happen
else False