Data/RandProc.hs

{- |RandProc.hs - a Haskell library for working with random processes in a mathematically rigorous way
   
   (Concepts taken from /Random Processes - a Mathematical Approach for Engineers/ by:

                          - Robert M. Gray

                          - Lee D. Davisson
                          
                          Prentice-Hall Information and System Sciences Series, Thomas Kailath, Series Editor)

  $Id: RandProc.hs 31 2011-06-22 13:49:48Z dbanas $

  David Banas

  12 March 2011

  Copyright (c) 2011 by David Banas; All rights reserved World wide.

  /Revision History:/

  [@Date      SVN #@] Description

  [@2011-03-13    3@] Data structures stabilized. 'isSigma' working under minimal,
                      discrete sample testing.

  [@2011-03-18    4@] Added 'isProbMeas', as well as monadic debugging versions of
                      both it and 'isSigma'.
                      Added an example probability space representing a fair die.

  [@2011-03-29    7@] Custom intersection functions added and briefly tested.

  [@2011-04-02    8@] Custom union functions added and briefly tested.
                      Solution is crude: it is O(N^2), and requires 2 passes over
                      the sample list every time a join is successful. Perhaps, a
                      pre-sort?

  [@2011-06-06    9@] Attempted fix of 'getCompEvent'
                      Added 'smplComp' function, as helper to revised 'getCompEvent'.
                      Changed 'Point' to accept Double.
                      Moved all sample spaces to new file, 'Main.hs'.
                      Added input sorting to 'range'.
                      Changed Ranges to be open intervals, in order to allow for
                      complementing out a Point from them.

  [@2011-06-11   10@] Major re-write.
                      'getCompEvent' fixed. All 5 test spaces checking out ok.

  [@2011-06-18   21@] Removed sample set order dependency from 'checkSigma'.
                      All 7 test spaces checking out ok.

  [@2011-06-19   22@] Added 'union of events is an event' test to 'checkSigma'.

  [@2011-06-20   23@] Changed 'Event' from data constructor to type alias, in order
                      to eliminate many instances of 'Event . f . getSamps' code.

  [@2011-06-20   25@] Modified 'smpsSetInt' to use a fold.

  [@2011-06-20   26@] Defined public interface.

  [@2011-06-21   27@] Modified comments for Haddock, and generated HTML docs.

  [@2011-06-22   31@] Moved into 'Data' directory.

  [End of Subversion revision history] This source has been moved to darcs.

  [@2011-06-27@] Made `smplSetUnion` more efficient, and tuned remaining
                 performance bottlenecks.

  /To Do:/

-}

{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, OverlappingInstances, NoMonomorphismRestriction #-}

module Data.RandProc (
    ProbSpace(ProbSpace)
    ,Measure(Measure)
    ,Sample(Empty)
    ,TestResult(..)
    ,ErrType(..)
    ,ProbSpaceTest(ProbSpaceTest)
    ,point
    ,range
    ,makeProbSpace 
    ,checkSpace
    ,getRsltStr 
    ,checkProbMeas
    ,checkSigma
    ,rangeBegin
    ,rangeEnd
    ,getProb
    ,getEvent
    ,getCompEvent
    ,eventInt
    ,smplComp
    ,isElem
    ,noDupEvents
    ,smplInt
    ,smplSetInt
    ,smplUnion
    ,smplSetUnion
    ,subs
) where

import Data.List

eps :: Double
eps = 0.000001 -- Used in floating point "equality" tests.

{- |We take a probability space to consist of the following:

     - an 'abstract space' composed of either discrete or continuous (or a mix) samples

     - an 'event space', which must be a Sigma field defined over the abstract space

     - a 'probability measure' defined over the event space

   [Note:] For the sake of efficient coding, the /event space/ and the
         /probability measure/ are combined in the Haskell data structure,
         below. This is permissable, because there has to be a 1:1
         correspondance between them anyway. And it is preferable, because it:

         - keeps the probabilities more closely associated w/ the events, and

         - avoids duplication of code (i.e. - the list of events).
-}
data ProbSpace = ProbSpace {
    space :: [Sample]
    ,measure :: [Measure]
} deriving (Show)

{- |This is our abstract data type, which represents a sample in the abstract space.

It has a constructor representing every possible element in the abstract space
we're modeling. (Currently, just points and ranges of /Double/s.)

Normally, none of the constructors of this type will be called directly.
Instead, helper functions are provided, such as 'point' and 'range', which
hide the implementation details from the user, and present a stable interface.

Currently, the sole exception to the above is the /Empty/ constructor, which is
really just a hack intended to put off the job of making the functions in this
library more intelligent, with regard to their handling of empty lists.
-}
data Sample  = Point Double
             | Range (Double, Double)
             | Empty
             | Full
    deriving (Eq, Show)
-- [Note:] When constructing a range, it is the user's responsibility to ensure
--       that the first element of the couple is less than the second.
instance Ord Sample where
    -- (<)
    _                < Empty = False
    Empty            < _     = True
    Full             < _     = False
    _                < Full  = True
    (Point p)        < (Point p') = p < p'
    (Point p)        < (Range (r1, _)) = p <= r1
    (Range (r1, r2)) < (Point p) = Point p >= Range (r1, r2)
    (Range (r1, r2)) < (Range (r3, r4))
        | r1 == r3 = r2 < r4
        | otherwise = r1 < r3

    -- (>)
    Empty            > _     = False
    _                > Empty = True
    _                > Full  = False
    Full             > _     = True
    (Point p)        > (Point p') = p > p'
    (Point p)        > (Range (r1, r2)) = Point p >= Range (r1, r2)
    (Range (r1, r2)) > (Point p) = Point p < Range (r1, r2)
    (Range (r1, r2)) > (Range (r3, r4))
        | r1 == r3 = r2 > r4
        | otherwise = r1 > r3

    -- (<=), (>=)
    (<=) s1 = not . (s1 >)
    (>=) s1 = not . (s1 <)

-- custom data type and helper function for communicating sample types:
-- (Helpful in places where pattern matching can't be used.)
data SampleType = STPoint
                | STRange
                | STEmpty
                | STFull
    deriving (Eq, Show)

sampleType :: Sample -> SampleType
sampleType (Point _) = STPoint
sampleType (Range _) = STRange
sampleType Empty     = STEmpty
sampleType Full      = STFull

-- |The custom type /Event/ is just an alias for a list of Samples.
type Event = [Sample]

-- |/Measure/ has 2 fields:
--
-- * /event/ - a list of samples from the space, and
--
-- * /prob/  - a number between 0 and 1 giving the events probability of
--             occurence.
data Measure = Measure {
    event :: Event
    ,prob :: Double
}
    deriving (Eq, Ord, Show)

-- |This is the helper function intended to be used for constructing a point sample.
point :: Double -> Sample
point = Point

-- |This is the helper function intended to be used for constructing a range sample.
-- The range is considered /open/. That is, its end points are not included.
range       :: (Double, Double) -> Sample
range (a, b)
  | a == b    = point a
  | a < b     = Range (a, b)
  | otherwise = Range (b, a)

-- |This helper function generates a complete and valid probability space,
--  given a discrete sample space and set of probabilities.
makeProbSpace :: [(Sample, Double)] -> ProbSpace
makeProbSpace [] = ProbSpace {
                       space = [
                           Empty
                           ,Full
                       ]
                       ,measure = [
                           Measure {
                               event = [Empty]
                               ,prob = 0
                           }
                           ,Measure {
                               event = [Full]
                               ,prob = 1
                           }
                       ]
                   }
makeProbSpace ps = ProbSpace {
                       space = [fst p | p <- ps]
                       ,measure = Measure {
                               event = [Empty]
                               ,prob = 0
                           }
                        : [Measure e (sum [snd p | p <- ps, fst p `elem` e]) | e <- ss]
                   }
    where ss = filter (not . null) $ subs $ map fst ps

-- Helpful info getters:

-- |Gets the beginning point of a range, which is /not/ included in the range,
-- since ranges are considered to be open.
rangeBegin :: Sample -> Double
rangeBegin (Point _)     = undefined
rangeBegin (Range (a,_)) = a
rangeBegin Empty         = undefined
rangeBegin Full          = undefined

-- |Gets the ending point of a range, which is /not/ included in the range,
rangeEnd :: Sample -> Double
rangeEnd (Point _)     = undefined
rangeEnd (Range (_,b)) = b
rangeEnd Empty         = undefined
rangeEnd Full          = undefined

-- |Extracts the probability from a Measure.
getProb :: Measure -> Double
getProb = prob

-- |Extracts the Event from a Measure.
getEvent :: Measure -> Event
getEvent = event

-- |Get the complement of an event from the sample space.
getCompEvent :: [Sample] -> Event -> Event
getCompEvent [] _ = [Empty]
getCompEvent (s:ss) e = smplSetUnion $ foldl eventInt [s] (map (smplComp s) e) ++ getCompEvent ss e

-- |Calculates the intersection of 2 events (i.e. - list of samples).
eventInt :: Event -> Event -> Event
eventInt s1 s2
    | null (filter (/= Empty) s1) = [Empty]
    | null (filter (/= Empty) s2) = [Empty]
    | otherwise = smplSetUnion $ concatMap (\s -> (map (smplInt s) s1)) s2

-- |Returns that portion of the first sample that is disjoint from the second.
smplComp :: Sample -> Sample -> [Sample]
smplComp Empty _ = [Empty]
smplComp s Empty = [s]
smplComp (Point n) (Point m)
  | m == n                                              = [Empty]
  | otherwise                                           = [Point n]
smplComp (Point n) (Range (a, b))
  | (n > a) && (n < b)                                  = [Empty]
  | otherwise                                           = [Point n]
smplComp (Range (a, b)) (Point n)
  | (n > a) && (n < b)                                  = [Range (a, n), Range (n, b)]
  | otherwise                                           = [Range (a, b)]
smplComp (Range (a, b)) (Range (c, d))
  | (c >= b) || (a >= d)                                = [Range (a, b)]
  | (c <= a) && (d >= b)                                = [Empty]
  | (c > a) && (d < b)                                  = [Range (a, c), Range (d, b), Point c, Point d]
  | a < c                                               = [Range (a, c), Point c]
  | otherwise                                           = [Range (d, b), Point d]
smplComp _ Full = [Empty]
smplComp Full _ = undefined

-- |Determine if a sample is an element of a space.
--
-- (Need this, as opposed to just using `elem`, in order to accomodate ranges.)
isElem :: [Sample] -> Sample -> Bool
isElem [] _    = False
isElem _ Empty = True
isElem (s:ss) s' = testElem s s' || isElem ss s'

-- testElem : Test whether the second sample is an element of the first.
testElem :: Sample -> Sample -> Bool
testElem Empty _                       = False
testElem _ Empty                       = True
testElem (Point x) (Point y)           = x == y
testElem (Point _) (Range _)           = False
testElem (Range (x, y)) (Point z)      = (z > x) && (z < y)
testElem (Range (x, y)) (Range (w, z)) = (w >= x) && (z <= y)
testElem Full _                        = True
testElem _ Full                        = False

-- |Checks a list of measures against duplicate events.
noDupEvents :: [Measure] -> Bool
noDupEvents []     = True
noDupEvents (m:ms) = notElem (event m) es && noDupEvents ms
    where es = map event ms

-- custom union and intersection functions for our 'Sample' data type

-- |Returns the intersection between 2 samples.
smplInt :: Sample -> Sample -> Sample
smplInt Empty _ = Empty
smplInt _ Empty = Empty
smplInt Full s  = s
smplInt s Full  = s
smplInt (Point n) (Point m)
  | m == n                = Point n
  | otherwise             = Empty
smplInt (Point n) (Range (a, b))
  | (n > a) && (n < b)    = Point n
  | otherwise             = Empty
smplInt (Range (a, b)) (Point n) = smplInt (Point n) (Range (a, b))
smplInt (Range (a, b)) (Range (c, d))
  | (c >= b) || (a >= d)  = Empty
  | otherwise             = Range (max a c, min b d)

-- |Reduces a list of samples to a single sample representing their intersection.
smplSetInt :: [Sample] -> Sample
smplSetInt = foldl smplInt Full

-- |Returns the union of 2 samples.
--
-- Unlike 'smplInt', /smplUnion/ must return a list since, if the 2 input
-- samples aren't adjacent or overlapping, the union of them is a list
-- containing both.
smplUnion :: Sample -> Sample -> [Sample]
smplUnion Empty s = [s]
smplUnion s Empty = [s]
smplUnion (Point n) (Point m)
  | m == n = [Point n]
  | m < n     = [Point m, Point n]
  | otherwise = [Point n, Point m]
smplUnion (Point n) (Range (a, b))
  -- Ranges are considered open, in order to allow complementing a point from them.
  | (n > a) && (n < b)  = [Range (a, b)]
  | n >= b              = [Range (a, b), Point n]
  | otherwise           = [Point n, Range (a, b)]
-- The union operation is commutative.
smplUnion (Range (a, b)) (Point n) = smplUnion (Point n) (Range (a, b))
smplUnion (Range (a, b)) (Range (c, d))
  | c >= b = [Range (a, b), Range (c, d)]
  | a >= d = [Range (c, d), Range (a, b)]
  | otherwise             = [Range (min a c, max b d)]
smplUnion Full _ = [Full]
smplUnion _ Full = [Full]

-- |Absorbs a single input sample into a reverse-sorted list, as far as possible,
--  via unioning. Assumes the single sample to be > the first list entry.
smplUnionRecursRev :: [Sample] -> Sample -> [Sample]
smplUnionRecursRev [] s = [s]
smplUnionRecursRev (Empty : ss) s = smplUnionRecursRev ss s
smplUnionRecursRev (s : ss) Empty = smplUnionRecursRev ss s
smplUnionRecursRev (Point m : ss) (Point n) 
  | m == n    = smplUnionRecursRev ss (Point n)
  | otherwise = Point n : Point m : ss
smplUnionRecursRev ((Range (a, b)) : ss) (Point n) 
  | (n > a) && (n < b)  = smplUnionRecursRev ss (Range (a, b))
  | otherwise           = Point n : Range (a, b) : ss
smplUnionRecursRev ((Point n) : ss) (Range (a, b)) = Range (a, b) : Point n : ss
smplUnionRecursRev ((Range (c, d)) : ss) (Range (a, b)) 
  | a >= d    = Range (a, b) : Range (c, d) : ss
  | otherwise = smplUnionRecursRev ss (Range (c, max b d))
smplUnionRecursRev _ Full       = [Full]
smplUnionRecursRev (Full : _) _ = [Full]

-- |Collapses a list of samples down to the maximally reduced set, which still
-- composes a proper union of the input.
smplSetUnion :: [Sample] -> [Sample]
smplSetUnion = consolidateRPR . foldl smplUnionRecursRev [] . sort

-- consolidateRPR : Reduces all occurences of:
--                  Range (a,b), Point (b), Range (b,c) into:
--                  Range (a,c).
consolidateRPR :: [Sample] -> [Sample]
consolidateRPR = scanRPR . sort

-- scanRPR : Scans through a list of samples looking for the pattern:
--           Range (a,b), Point (b), Range (b,c) and consolidates those into:
--           Range (a,c).
--
-- Note) This function depends upon receiving a SORTED sample list!
scanRPR :: [Sample] -> [Sample]
scanRPR []             = []
scanRPR (s:ss)
    | sampleType s == STRange =
        if headIsPoint (rangeEnd s) ss &&
           headIsRange (tail ss) &&
           rangeBegin (head (tail ss)) == rangeEnd s
        then
            scanRPR $ Range (rangeBegin s, rangeEnd (head (tail ss))) : tail (tail ss)
        else s : scanRPR ss
    | otherwise = s : scanRPR ss

-- headIsPoint : Tests whether the head of the incoming list is a particular
--               Point value.
headIsPoint :: Double -> [Sample] -> Bool
headIsPoint _ []     = False
headIsPoint a (s:_) = s == Point a

-- headIsRange : Tests whether the head of the incoming list is a
--               Range.
headIsRange :: [Sample] -> Bool
headIsRange []     = False
headIsRange (s:_) = sampleType s == STRange

-- |Custom data type used for test results and error reporting.
data TestResult = Fail {err :: ErrType}
                | Pass
    deriving (Show)
instance Eq TestResult where
    Pass == Pass      = True
    Fail e == Fail e' = e == e'
    _ == _            = False

-- |Custom data type for reporting different errors
data ErrType = UnknownErr
             | EmptySampleSpace
             | EmptyEventSpace
             | MissingNullEvent
             | MissingCertainEvent
             | BadEventSamples
             | MissingCompEvent
             | MissingUnionEvent
             | EventMeasLenMismatch
             | DupEventsInMeas
             | MissingEventsInMeas
             | NullEventNonZeroProb
             | CertainEventNonUnityProb
             | EventAndCompNoSumOne
    deriving (Eq, Show)

-- |getErrStr : Turns a value of type /ErrType/ into a more readable string.
getErrStr :: ErrType -> String
getErrStr UnknownErr               = "Unknown error"
getErrStr EmptySampleSpace         = "Empty sample space"
getErrStr EmptyEventSpace          = "Empty event space"
getErrStr MissingNullEvent         = "The null event is missing from the event space."
getErrStr MissingCertainEvent      = "The certain event is missing from the event space."
getErrStr BadEventSamples          = "At least one event contains samples not in the sample space."
getErrStr MissingCompEvent         = "At least one event's compliment is missing from the event space."
getErrStr MissingUnionEvent        = "At least one union of events is missing from the event space."
getErrStr EventMeasLenMismatch     = "Lengths of event and measure lists don't match."
getErrStr DupEventsInMeas          = "There are duplicate events in the measure list."
getErrStr MissingEventsInMeas      = "Some events aren't covered in the measure list."
getErrStr NullEventNonZeroProb     = "The null event has been assigned a non-zero probability."
getErrStr CertainEventNonUnityProb = "The certain event has been assigned a probability other than 1."
getErrStr EventAndCompNoSumOne     = "At least one pair of event and compliment have probabillities that don't add to 1."

-- |Turns a value of type /TestResult/ into a human readable string.
getRsltStr :: TestResult -> String
getRsltStr Pass = "Ok"
getRsltStr tr   = getErrStr $ err tr

-- |Checks whether event space is actually a Sigma field over the sample space.
checkSigma :: ProbSpace -> TestResult
checkSigma ps
    | null (filter (/= Empty) sp)           = Fail EmptySampleSpace
    | null es                               = Fail EmptyEventSpace
    | notElem [Empty] es                    = Fail MissingNullEvent
    | notElem sp es && notElem [Full] es    = Fail MissingCertainEvent
    | not $ all (all (\ s -> isElem sp s || (s == Empty))) es
                                            = Fail BadEventSamples
    | not $ all (\e -> getCompEvent sp e `elem` es) es
                                            = Fail MissingCompEvent
    | not $ all (`elem` es) (eventUnions es)
                                            = Fail MissingUnionEvent
    | otherwise                             = Pass
    where es = map (sort . event) (measure ps)
          ss = filter (not . null) $ map (filter (/= Empty) . event) (measure ps)
          sp = space ps
         
-- |Power set generator, specific to a list of `Event`s
--
-- Made necessary by the fact that generating all possible unions of all
-- possible events grows as 2^(2^N), N = # of samples, due to much redundancy.
eventUnions :: [Event] -> [Event]
eventUnions es = concat $ foldl' (\xs x -> removeDups (map (smplSetUnion . concat . (x:)) xs : xs )) [[]] es'
    where es' = filter (not . null) $ map (filter (/= Empty)) es

-- |Power set generator
subs :: [a] -> [[a]]
subs = foldl' (\xs x -> xs ++ map (x:) xs) [[]]

-- |Remove duplicates from a list.
removeDups :: (Eq a) => [a] -> [a]
removeDups [] = []
removeDups (x:xs) = x : removeDups ys
    where ys = [y | y <- xs, y /= x]

-- |Checks a value of type 'ProbSpace' for correctness, and returns a value of
-- type 'TestResult'.
checkProbMeas :: ProbSpace -> TestResult
checkProbMeas ps
    | cs /= Pass                                      = cs
    | not (noDupEvents (measure ps))                  = Fail DupEventsInMeas
    | not $ all (\m -> getProb m == 0.0) (filter (\m -> getEvent m == [] || getEvent m == [Empty]) (measure ps))
                                                      = Fail NullEventNonZeroProb
    | not $ all (\m -> getProb m == 1.0) (filter (\m -> getEvent m == space ps) (measure ps))
                                                      = Fail CertainEventNonUnityProb
    | not $ all (\m -> 1.0 - getProb m - getProb (head (filter (\m' -> sort (getEvent m') == getCompEvent (space ps) (getEvent m)) (measure ps))) < eps) (measure ps)
                                                      = Fail EventAndCompNoSumOne
    | otherwise                                       = Pass
  where cs = checkSigma ps

{- |Custom data structure, used for constructing individual test cases.

/Fields:/

* ps - a pointer to the ProbSpace being tested

* res - the expected result

* name - a label for identifying this test

-}
data ProbSpaceTest = ProbSpaceTest {
    ps    :: ProbSpace
    ,res  :: TestResult -- expected test result
    ,name :: String
}

-- |Takes a test case and returns a string indicating the result of the test.
checkSpace :: ProbSpaceTest -> IO Bool
checkSpace pst = do
    putStr ("Checking  " ++ name pst ++ "... ")
    if result == res pst then do
        putStrLn "Ok."
        return True
      else do
        putStrLn $ "FAILED!: Expected " ++ show (res pst) ++ "; Got " ++ show result
        return False
    where result = checkProbMeas (ps pst)