-- | This module provides log-domain functionality. Ed Kmett provides, with -- @log-domain@, a generic way to handle numbers in the log-domain, some which -- is used under the hood here. We want some additional type safety and also -- connect with the 'SemiRing' module. module Numeric.LogDomain where import Control.Monad.Except import Numeric.Log as NL import qualified Data.Vector.Fusion.Stream.Monadic as SM import qualified Data.Vector.Fusion.Util as SM import Debug.Trace import Numeric -- | Instances for @LogDomain x@ should be for specific types. class LogDomain x where -- | The type family to connect a type @x@ with the type @Ln x@ in the -- log-domain. type Ln x ∷ * -- | Transport a value in @x@ into the log-domain. @logdom@ should throw an -- exception if @log x@ is not valid. logdom ∷ (MonadError String m) ⇒ x → m (Ln x) -- | Unsafely transport x into the log-domain. unsafelogdom ∷ x → Ln x -- | Transport a value @Ln x@ back into the linear domain @x@. lindom ∷ Ln x → x instance LogDomain Double where type Ln Double = Log Double {-# Inline logdom #-} logdom x | x < 0 = throwError "log of negative number" | otherwise = return $ unsafelogdom x {-# Inline unsafelogdom #-} unsafelogdom = Exp . log {-# Inline lindom #-} lindom = exp . ln -- | This is similar to 'Numeric.Log.sum' but requires only one pass over the -- data. It will be useful if the first two elements in the stream are large. -- If the user has some control over how the stream is generated, this function -- might show better performance than 'Numeric.Log.sum' and better numeric -- stability than 'fold 0 (+)' -- -- TODO this needs to be benchmarked against @fold 0 (+)@, since in -- @DnaProteinAlignment@ @sumS@ seems to be slower! sumS ∷ (Monad m, Ord a, RealFloat a, Show a) ⇒ Log a → SM.Stream m (Log a) → m (Log a) {-# Inline sumS #-} sumS zero (SM.Stream step s0) = sLoop1 SM.SPEC zero s0 where -- we need to find the first @x@ that is not @(-1/0)@ to handle @x-m@ -- correctly. We loop @sLoop1@ until we have the first finite @y@ and use -- that as the @m@ for @sLoop2@. sLoop1 SM.SPEC (Exp x) s = step s >>= \case SM.Done → return $ Exp x SM.Skip s1 → sLoop1 SM.SPEC (Exp x) s1 SM.Yield (Exp y) s2 | isInfinite y → sLoop1 SM.SPEC (Exp $ max x y) s2 -- either (1/0) or (-1/0) are handled correctly | otherwise → sLoop2 SM.SPEC m (1∷Int) (expm1 (x-m) + expm1 (y-m)) s2 where m = max x y -- from here on we are fine sLoop2 SM.SPEC m cnt acc s = step s >>= \case SM.Done → return $ Exp $ m + log1p (acc + fromIntegral cnt) SM.Skip s2 → sLoop2 SM.SPEC m cnt acc s2 SM.Yield (Exp x) s2 → sLoop2 SM.SPEC m (cnt+1) (acc + expm1 (x-m)) s2 -- | @log-sum-exp@ for streams, without incurring examining the stream twice, -- but with the potential for numeric problems. In pricinple, the numeric error -- of this function should be better than individual binary function -- application and worse than an optimized @sum@ function. -- -- Needs to be written in direct style, as otherwise any constructors (to tell -- us if we collected two elements already) remain. logsumexpS ∷ (Monad m, Ord a, Num a, Floating a) ⇒ SM.Stream m a → m a {-# Inline logsumexpS #-} logsumexpS (SM.Stream step s0) = lseLoop0 SM.SPEC s0 where lseLoop0 SM.SPEC s = step s >>= \case SM.Done → return 0 SM.Skip s0' → lseLoop0 SM.SPEC s0' SM.Yield x s1 → lseLoop1 SM.SPEC x s1 lseLoop1 SM.SPEC x s = step s >>= \case SM.Done → return x SM.Skip s1' → lseLoop1 SM.SPEC x s1' SM.Yield y sA → let !m = max x y in lseLoopAcc SM.SPEC m (exp (x-m) + exp (y-m)) sA lseLoopAcc SM.SPEC !m !acc s = step s >>= \case SM.Done → return $ m + log acc SM.Skip sA' → lseLoopAcc SM.SPEC m acc sA' SM.Yield z sA' → lseLoopAcc SM.SPEC m (acc + exp (z-m)) sA'