{-# LANGUAGE FlexibleInstances #-}
module Distributed.Failure.Phi (
Phi(..), phi
) where
import Data.Foldable
import Data.List.NonEmpty (NonEmpty)
import Data.Sequence (Seq, (<|))
import qualified Data.Sequence as Seq
import Data.Time
import Distributed.Failure.Class
import Statistics.Distribution
import Statistics.Distribution.Normal
data Phi =
Phi {
_pThresh :: Double
, _pWindow :: Int
, _pLog :: Seq Double
}
deriving (Show, Eq, Ord)
{- | Start a phi-acrual failure detector, given a Φ, a window size, and the
- starting messurements.
-}
phi :: Double -> Int -> NonEmpty DiffTime -> Phi
phi t w l = Phi t w (Seq.fromList . fmap realToFrac . toList $ l)
instance FailureDetector Phi where
observe (Phi t w l) d = Phi t w (Seq.take w $ (realToFrac d) <| l)
suspected (Phi t _ l) d =
t <= negate (logBase 10 pLater)
where
s = realToFrac . length $ l
m = sum l/s
sd = sqrt $ (sum . fmap (\i -> (i - m)^(2::Int)) $ l)/(s-1)
-- Our effective standard deviation, is the calculated sd as above when in range.
-- Sadly, during startup, or when our message regularity is higher then our
-- clock precision, our sd diverges. In these cases we just take the mean,
-- or, when the mean is also divergent, one.
-- This means that in edge cases we do not respect the requested phi fully.
-- The values of phi are still related to each other in the same ways though.
-- Addtionally we still satisfy the failure detector requirements in that
-- we still will eventually suspect any process that fails to communicate,
-- and that we will return to trusting a correct process.
esd = if (sd > 0) && (sd < (1/0))
then sd
else if m>0 then m else 1
dist = normalDistr m esd
pLater = complCumulative dist (realToFrac d)