{-# LANGUAGE DeriveGeneric   #-}
{-# LANGUAGE RecordWildCards #-}

module Epidemic.Types.Observations
  ( Observation(..)
  , ReconstructedTree(..)
  , maybeReconstructedTree
  , PointProcessEvents(..)
  , pointProcessEvents
  , reconstructedTreeEvents
  , observedEvents
  , aggregated
  ) where

import qualified Data.Aeson                as Json
import qualified Data.List                 as List
import           Epidemic.Types.Events     (EpidemicEvent (..),
                                            EpidemicTree (..),
                                            maybeEpidemicTree)
import           Epidemic.Types.Population (asPeople)
import           Epidemic.Types.Time       (TimeInterval (..), TimeStamp (..),
                                            inInterval)
import           GHC.Generics

-- | A wrapper for an 'EpidemicEvent' to indicate that this is an even that was
-- observed rather than just an event of the epidemic process.
newtype Observation =
  Observation EpidemicEvent
  deriving (Int -> Observation -> ShowS
[Observation] -> ShowS
Observation -> String
(Int -> Observation -> ShowS)
-> (Observation -> String)
-> ([Observation] -> ShowS)
-> Show Observation
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Observation] -> ShowS
$cshowList :: [Observation] -> ShowS
show :: Observation -> String
$cshow :: Observation -> String
showsPrec :: Int -> Observation -> ShowS
$cshowsPrec :: Int -> Observation -> ShowS
Show, Eq Observation
Eq Observation
-> (Observation -> Observation -> Ordering)
-> (Observation -> Observation -> Bool)
-> (Observation -> Observation -> Bool)
-> (Observation -> Observation -> Bool)
-> (Observation -> Observation -> Bool)
-> (Observation -> Observation -> Observation)
-> (Observation -> Observation -> Observation)
-> Ord Observation
Observation -> Observation -> Bool
Observation -> Observation -> Ordering
Observation -> Observation -> Observation
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Observation -> Observation -> Observation
$cmin :: Observation -> Observation -> Observation
max :: Observation -> Observation -> Observation
$cmax :: Observation -> Observation -> Observation
>= :: Observation -> Observation -> Bool
$c>= :: Observation -> Observation -> Bool
> :: Observation -> Observation -> Bool
$c> :: Observation -> Observation -> Bool
<= :: Observation -> Observation -> Bool
$c<= :: Observation -> Observation -> Bool
< :: Observation -> Observation -> Bool
$c< :: Observation -> Observation -> Bool
compare :: Observation -> Observation -> Ordering
$ccompare :: Observation -> Observation -> Ordering
$cp1Ord :: Eq Observation
Ord, Observation -> Observation -> Bool
(Observation -> Observation -> Bool)
-> (Observation -> Observation -> Bool) -> Eq Observation
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Observation -> Observation -> Bool
$c/= :: Observation -> Observation -> Bool
== :: Observation -> Observation -> Bool
$c== :: Observation -> Observation -> Bool
Eq, (forall x. Observation -> Rep Observation x)
-> (forall x. Rep Observation x -> Observation)
-> Generic Observation
forall x. Rep Observation x -> Observation
forall x. Observation -> Rep Observation x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Observation x -> Observation
$cfrom :: forall x. Observation -> Rep Observation x
Generic)

instance Json.FromJSON Observation

instance Json.ToJSON Observation

instance TimeStamp Observation where
  absTime :: Observation -> AbsoluteTime
absTime (Observation EpidemicEvent
ee) = EpidemicEvent -> AbsoluteTime
forall a. TimeStamp a => a -> AbsoluteTime
absTime EpidemicEvent
ee

-- | A representation of the events that can be observed in an epidemic but
-- which are not included in the reconstructed tree, ie the unsequenced
-- observations.
newtype PointProcessEvents =
  PointProcessEvents [Observation]

-- | Extract the events from an epidemic tree which are observed but not part of
-- the reconstructed tree, ie the ones that are not sequenced.
pointProcessEvents :: EpidemicTree -> PointProcessEvents
pointProcessEvents :: EpidemicTree -> PointProcessEvents
pointProcessEvents Shoot {} = [Observation] -> PointProcessEvents
PointProcessEvents []
pointProcessEvents (Leaf EpidemicEvent
e) =
  case EpidemicEvent
e of
    IndividualSample {Bool
Person
AbsoluteTime
indSampSeq :: EpidemicEvent -> Bool
indSampPerson :: EpidemicEvent -> Person
indSampTime :: EpidemicEvent -> AbsoluteTime
indSampSeq :: Bool
indSampPerson :: Person
indSampTime :: AbsoluteTime
..} ->
      [Observation] -> PointProcessEvents
PointProcessEvents [EpidemicEvent -> Observation
Observation EpidemicEvent
e | Bool -> Bool
not Bool
indSampSeq]
    PopulationSample {Bool
People
AbsoluteTime
popSampSeq :: EpidemicEvent -> Bool
popSampPeople :: EpidemicEvent -> People
popSampTime :: EpidemicEvent -> AbsoluteTime
popSampSeq :: Bool
popSampPeople :: People
popSampTime :: AbsoluteTime
..} ->
      [Observation] -> PointProcessEvents
PointProcessEvents [EpidemicEvent -> Observation
Observation EpidemicEvent
e | Bool -> Bool
not Bool
popSampSeq]
    EpidemicEvent
_ -> [Observation] -> PointProcessEvents
PointProcessEvents []
pointProcessEvents (Branch EpidemicEvent
_ EpidemicTree
lt EpidemicTree
rt) =
  let (PointProcessEvents [Observation]
lEs) = EpidemicTree -> PointProcessEvents
pointProcessEvents EpidemicTree
lt
      (PointProcessEvents [Observation]
rEs) = EpidemicTree -> PointProcessEvents
pointProcessEvents EpidemicTree
rt
      allEs :: [Observation]
allEs = [Observation] -> [Observation]
forall a. Ord a => [a] -> [a]
List.sort ([Observation] -> [Observation]) -> [Observation] -> [Observation]
forall a b. (a -> b) -> a -> b
$ [Observation]
lEs [Observation] -> [Observation] -> [Observation]
forall a. [a] -> [a] -> [a]
++ [Observation]
rEs
   in [Observation] -> PointProcessEvents
PointProcessEvents [Observation]
allEs

-- | A representation of the reconstructed tree, ie the tree where the leaves
-- correspond to sequenced observations.
data ReconstructedTree
  = RBranch Observation ReconstructedTree ReconstructedTree
  | RLeaf Observation
  deriving (Int -> ReconstructedTree -> ShowS
[ReconstructedTree] -> ShowS
ReconstructedTree -> String
(Int -> ReconstructedTree -> ShowS)
-> (ReconstructedTree -> String)
-> ([ReconstructedTree] -> ShowS)
-> Show ReconstructedTree
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ReconstructedTree] -> ShowS
$cshowList :: [ReconstructedTree] -> ShowS
show :: ReconstructedTree -> String
$cshow :: ReconstructedTree -> String
showsPrec :: Int -> ReconstructedTree -> ShowS
$cshowsPrec :: Int -> ReconstructedTree -> ShowS
Show, ReconstructedTree -> ReconstructedTree -> Bool
(ReconstructedTree -> ReconstructedTree -> Bool)
-> (ReconstructedTree -> ReconstructedTree -> Bool)
-> Eq ReconstructedTree
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ReconstructedTree -> ReconstructedTree -> Bool
$c/= :: ReconstructedTree -> ReconstructedTree -> Bool
== :: ReconstructedTree -> ReconstructedTree -> Bool
$c== :: ReconstructedTree -> ReconstructedTree -> Bool
Eq)

-- | The reconstructed phylogeny obtained by pruning an 'EpidemicTree' which
-- contains represents the transmission tree of the epidemic. In the case where
-- there are no sequenced samples in the epidemic then there is no tree to
-- reconstruct which is why this function is in the either monad.
maybeReconstructedTree :: EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree :: EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree Shoot {} = String -> Either String ReconstructedTree
forall a b. a -> Either a b
Left String
"EpidemicTree is only a Shoot"
maybeReconstructedTree (Leaf EpidemicEvent
e) =
  case EpidemicEvent
e of
    IndividualSample {Bool
Person
AbsoluteTime
indSampSeq :: Bool
indSampPerson :: Person
indSampTime :: AbsoluteTime
indSampSeq :: EpidemicEvent -> Bool
indSampPerson :: EpidemicEvent -> Person
indSampTime :: EpidemicEvent -> AbsoluteTime
..} ->
      if Bool
indSampSeq
        then ReconstructedTree -> Either String ReconstructedTree
forall a b. b -> Either a b
Right (ReconstructedTree -> Either String ReconstructedTree)
-> ReconstructedTree -> Either String ReconstructedTree
forall a b. (a -> b) -> a -> b
$ Observation -> ReconstructedTree
RLeaf (EpidemicEvent -> Observation
Observation EpidemicEvent
e)
        else String -> Either String ReconstructedTree
forall a b. a -> Either a b
Left String
"Leaf with non-sequenced event individual sample"
    PopulationSample {Bool
People
AbsoluteTime
popSampSeq :: Bool
popSampPeople :: People
popSampTime :: AbsoluteTime
popSampSeq :: EpidemicEvent -> Bool
popSampPeople :: EpidemicEvent -> People
popSampTime :: EpidemicEvent -> AbsoluteTime
..} ->
      if Bool
popSampSeq
        then ReconstructedTree -> Either String ReconstructedTree
forall a b. b -> Either a b
Right (ReconstructedTree -> Either String ReconstructedTree)
-> ReconstructedTree -> Either String ReconstructedTree
forall a b. (a -> b) -> a -> b
$ Observation -> ReconstructedTree
RLeaf (EpidemicEvent -> Observation
Observation EpidemicEvent
e)
        else String -> Either String ReconstructedTree
forall a b. a -> Either a b
Left String
"Leaf with non-sequenced event population sample"
    EpidemicEvent
_ -> String -> Either String ReconstructedTree
forall a b. a -> Either a b
Left String
"Bad leaf in the EpidemicTree"
maybeReconstructedTree (Branch e :: EpidemicEvent
e@Infection {} EpidemicTree
lt EpidemicTree
rt)
  | EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
lt Bool -> Bool -> Bool
&& EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
rt = do
    ReconstructedTree
rlt <- EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree EpidemicTree
lt
    ReconstructedTree
rrt <- EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree EpidemicTree
rt
    ReconstructedTree -> Either String ReconstructedTree
forall a b. b -> Either a b
Right (ReconstructedTree -> Either String ReconstructedTree)
-> ReconstructedTree -> Either String ReconstructedTree
forall a b. (a -> b) -> a -> b
$ Observation
-> ReconstructedTree -> ReconstructedTree -> ReconstructedTree
RBranch (EpidemicEvent -> Observation
Observation EpidemicEvent
e) ReconstructedTree
rlt ReconstructedTree
rrt
  | EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
lt = EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree EpidemicTree
lt
  | EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
rt = EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree EpidemicTree
rt
  | Bool
otherwise = String -> Either String ReconstructedTree
forall a b. a -> Either a b
Left String
"Neither subtree has a sequenced leaf"
maybeReconstructedTree Branch {} = String -> Either String ReconstructedTree
forall a b. a -> Either a b
Left String
"EpidemicTree is a bad branch"

-- | Predicate for whether an 'EpidemicTree' has any leaf which corresponds to a
-- sequenced observation and hence should be included in a @ReconstructedTree@.
hasSequencedLeaf :: EpidemicTree -> Bool
hasSequencedLeaf :: EpidemicTree -> Bool
hasSequencedLeaf Shoot {} = Bool
False
hasSequencedLeaf (Leaf EpidemicEvent
e) =
  case EpidemicEvent
e of
    IndividualSample {Bool
Person
AbsoluteTime
indSampSeq :: Bool
indSampPerson :: Person
indSampTime :: AbsoluteTime
indSampSeq :: EpidemicEvent -> Bool
indSampPerson :: EpidemicEvent -> Person
indSampTime :: EpidemicEvent -> AbsoluteTime
..} -> Bool
indSampSeq
    PopulationSample {Bool
People
AbsoluteTime
popSampSeq :: Bool
popSampPeople :: People
popSampTime :: AbsoluteTime
popSampSeq :: EpidemicEvent -> Bool
popSampPeople :: EpidemicEvent -> People
popSampTime :: EpidemicEvent -> AbsoluteTime
..} -> Bool
popSampSeq
    EpidemicEvent
_                     -> Bool
False
hasSequencedLeaf (Branch EpidemicEvent
_ EpidemicTree
lt EpidemicTree
rt) = EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
lt Bool -> Bool -> Bool
|| EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
rt

-- | The events that were observed during the epidemic, ie those in the
-- reconstructed tree and any unsequenced samples. If this is not possible an
-- error message will be returned.
observedEvents :: [EpidemicEvent] -> Either String [Observation]
observedEvents :: [EpidemicEvent] -> Either String [Observation]
observedEvents [EpidemicEvent]
epiEvents = do
  EpidemicTree
epiTree <- [EpidemicEvent] -> Either String EpidemicTree
maybeEpidemicTree [EpidemicEvent]
epiEvents
  let (PointProcessEvents [Observation]
unseqObss) = EpidemicTree -> PointProcessEvents
pointProcessEvents EpidemicTree
epiTree
  [Observation]
reconTreeEvents <-
    if EpidemicTree -> Bool
hasSequencedLeaf EpidemicTree
epiTree
      then ReconstructedTree -> [Observation]
reconstructedTreeEvents (ReconstructedTree -> [Observation])
-> Either String ReconstructedTree -> Either String [Observation]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> EpidemicTree -> Either String ReconstructedTree
maybeReconstructedTree EpidemicTree
epiTree
      else [Observation] -> Either String [Observation]
forall a b. b -> Either a b
Right []
  [Observation] -> Either String [Observation]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Observation] -> Either String [Observation])
-> [Observation] -> Either String [Observation]
forall a b. (a -> b) -> a -> b
$ [Observation] -> [Observation]
forall a. Ord a => [a] -> [a]
List.sort ([Observation] -> [Observation])
-> ([Observation] -> [Observation])
-> [Observation]
-> [Observation]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Observation] -> [Observation]
forall a. Eq a => [a] -> [a]
List.nub ([Observation] -> [Observation]) -> [Observation] -> [Observation]
forall a b. (a -> b) -> a -> b
$ [Observation]
unseqObss [Observation] -> [Observation] -> [Observation]
forall a. [a] -> [a] -> [a]
++ [Observation]
reconTreeEvents

-- | A sorted list of all of the observations in the reconstructed tree.
reconstructedTreeEvents :: ReconstructedTree -> [Observation]
reconstructedTreeEvents :: ReconstructedTree -> [Observation]
reconstructedTreeEvents ReconstructedTree
rt =
  case ReconstructedTree
rt of
    RBranch Observation
obs ReconstructedTree
rtl ReconstructedTree
rtr ->
      [Observation] -> [Observation]
forall a. Ord a => [a] -> [a]
List.sort ([Observation] -> [Observation]) -> [Observation] -> [Observation]
forall a b. (a -> b) -> a -> b
$
      Observation
obs Observation -> [Observation] -> [Observation]
forall a. a -> [a] -> [a]
: (ReconstructedTree -> [Observation]
reconstructedTreeEvents ReconstructedTree
rtl [Observation] -> [Observation] -> [Observation]
forall a. [a] -> [a] -> [a]
++ ReconstructedTree -> [Observation]
reconstructedTreeEvents ReconstructedTree
rtr)
    RLeaf Observation
obs -> [Observation
obs]

-- | Aggregate the sequenced and unsequenced individual level samples
aggregated :: [TimeInterval] -> [TimeInterval] -> [Observation] -> [Observation]
aggregated :: [TimeInterval] -> [TimeInterval] -> [Observation] -> [Observation]
aggregated [TimeInterval]
seqAggInts [TimeInterval]
unseqAggInts = [Observation] -> [Observation]
forall a. Ord a => [a] -> [a]
List.sort ([Observation] -> [Observation])
-> ([Observation] -> [Observation])
-> [Observation]
-> [Observation]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Observation] -> [Observation]
aggUnsequenced ([Observation] -> [Observation])
-> ([Observation] -> [Observation])
-> [Observation]
-> [Observation]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Observation] -> [Observation]
aggSequenced
  where
    aggUnsequenced :: [Observation] -> [Observation]
aggUnsequenced = [TimeInterval] -> Bool -> [Observation] -> [Observation]
_aggregate [TimeInterval]
unseqAggInts Bool
False
    aggSequenced :: [Observation] -> [Observation]
aggSequenced = [TimeInterval] -> Bool -> [Observation] -> [Observation]
_aggregate [TimeInterval]
seqAggInts Bool
True

-- | Aggregate observations in each of the intervals given the correct
-- sequencing status.
_aggregate :: [TimeInterval] -> Bool -> [Observation] -> [Observation]
_aggregate :: [TimeInterval] -> Bool -> [Observation] -> [Observation]
_aggregate [TimeInterval]
intervals Bool
onlySequenced [Observation]
obs = ([Observation] -> TimeInterval -> [Observation])
-> [Observation] -> [TimeInterval] -> [Observation]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' [Observation] -> TimeInterval -> [Observation]
f [Observation]
obs [TimeInterval]
intervals
  where
    f :: [Observation] -> TimeInterval -> [Observation]
f [Observation]
os TimeInterval
i = TimeInterval -> Bool -> [Observation] -> [Observation]
_aggregateInInterval TimeInterval
i Bool
onlySequenced [Observation]
os

-- | Aggregate all the observations that fall in the interval and have the
-- correct sequencing status.
_aggregateInInterval :: TimeInterval -> Bool -> [Observation] -> [Observation]
_aggregateInInterval :: TimeInterval -> Bool -> [Observation] -> [Observation]
_aggregateInInterval interval :: TimeInterval
interval@TimeInterval {(AbsoluteTime, AbsoluteTime)
TimeDelta
timeIntDuration :: TimeInterval -> TimeDelta
timeIntEndPoints :: TimeInterval -> (AbsoluteTime, AbsoluteTime)
timeIntDuration :: TimeDelta
timeIntEndPoints :: (AbsoluteTime, AbsoluteTime)
..} Bool
onlySequenced [Observation]
obs =
  let asPopulationSample :: [Observation] -> AbsoluteTime -> Observation
asPopulationSample [Observation]
os AbsoluteTime
absT =
        EpidemicEvent -> Observation
Observation (EpidemicEvent -> Observation) -> EpidemicEvent -> Observation
forall a b. (a -> b) -> a -> b
$
        AbsoluteTime -> People -> Bool -> EpidemicEvent
PopulationSample
          AbsoluteTime
absT
          ([Person] -> People
asPeople [EpidemicEvent -> Person
indSampPerson EpidemicEvent
ee | Observation EpidemicEvent
ee <- [Observation]
os])
          Bool
onlySequenced
      (AbsoluteTime
_, AbsoluteTime
aggTime) = (AbsoluteTime, AbsoluteTime)
timeIntEndPoints
      toBeAggregated :: Observation -> Bool
toBeAggregated Observation
o =
        case Observation
o of
          Observation (IndividualSample {Bool
Person
AbsoluteTime
indSampSeq :: Bool
indSampPerson :: Person
indSampTime :: AbsoluteTime
indSampSeq :: EpidemicEvent -> Bool
indSampPerson :: EpidemicEvent -> Person
indSampTime :: EpidemicEvent -> AbsoluteTime
..}) ->
            TimeInterval -> Observation -> Bool
forall a. TimeStamp a => TimeInterval -> a -> Bool
inInterval TimeInterval
interval Observation
o Bool -> Bool -> Bool
&&
            (if Bool
onlySequenced
               then Bool
indSampSeq
               else Bool -> Bool
not Bool
indSampSeq)
          Observation
_ -> Bool
False
      ([Observation]
obs2Agg, [Observation]
otherObs) = (Observation -> Bool)
-> [Observation] -> ([Observation], [Observation])
forall a. (a -> Bool) -> [a] -> ([a], [a])
List.partition Observation -> Bool
toBeAggregated [Observation]
obs
      newPopSample :: Observation
newPopSample = [Observation] -> AbsoluteTime -> Observation
asPopulationSample [Observation]
obs2Agg AbsoluteTime
aggTime
   in Observation
newPopSample Observation -> [Observation] -> [Observation]
forall a. a -> [a] -> [a]
: [Observation]
otherObs