{- |RandProc.hs - a Haskell library for working with random processes in a mathematically rigorous way (Concepts taken from /Random Processes - a Mathematical Approach for Engineers/ by: - Robert M. Gray - Lee D. Davisson Prentice-Hall Information and System Sciences Series, Thomas Kailath, Series Editor) $Id: RandProc.hs 31 2011-06-22 13:49:48Z dbanas $ David Banas 12 March 2011 Copyright (c) 2011 by David Banas; All rights reserved World wide. /Revision History:/ [@Date SVN #@] Description [@2011-03-13 3@] Data structures stabilized. 'isSigma' working under minimal, discrete sample testing. [@2011-03-18 4@] Added 'isProbMeas', as well as monadic debugging versions of both it and 'isSigma'. Added an example probability space representing a fair die. [@2011-03-29 7@] Custom intersection functions added and briefly tested. [@2011-04-02 8@] Custom union functions added and briefly tested. Solution is crude: it is O(N^2), and requires 2 passes over the sample list every time a join is successful. Perhaps, a pre-sort? [@2011-06-06 9@] Attempted fix of 'getCompEvent' Added 'smplComp' function, as helper to revised 'getCompEvent'. Changed 'Point' to accept Double. Moved all sample spaces to new file, 'Main.hs'. Added input sorting to 'range'. Changed Ranges to be open intervals, in order to allow for complementing out a Point from them. [@2011-06-11 10@] Major re-write. 'getCompEvent' fixed. All 5 test spaces checking out ok. [@2011-06-18 21@] Removed sample set order dependency from 'checkSigma'. All 7 test spaces checking out ok. [@2011-06-19 22@] Added 'union of events is an event' test to 'checkSigma'. [@2011-06-20 23@] Changed 'Event' from data constructor to type alias, in order to eliminate many instances of 'Event . f . getSamps' code. [@2011-06-20 25@] Modified 'smpsSetInt' to use a fold. [@2011-06-20 26@] Defined public interface. [@2011-06-21 27@] Modified comments for Haddock, and generated HTML docs. [@2011-06-22 31@] Moved into 'Data' directory. [End of Subversion revision history] This source has been moved to darcs. [@2011-06-27@] Made `smplSetUnion` more efficient, and tuned remaining performance bottlenecks. /To Do:/ -} {-# LANGUAGE FlexibleInstances, TypeSynonymInstances, OverlappingInstances, NoMonomorphismRestriction #-} module Data.RandProc ( ProbSpace(ProbSpace) ,Measure(Measure) ,Sample(Empty) ,TestResult(..) ,ErrType(..) ,checkProbMeas ,point ,range ,makeProbSpace ,rangeBegin ,rangeEnd ,getProb ,getEvent ,getCompEvent ,eventInt ,smplComp ,isElem ,noDupEvents ,smplInt ,smplSetInt ,smplUnion ,smplSetUnion ,checkSigma ,getRsltStr ) where import Data.List eps :: Double eps = 0.000001 -- Used in floating point "equality" tests. {- |We take a probability space to consist of the following: - an 'abstract space' composed of either discrete or continuous (or a mix) samples - an 'event space', which must be a Sigma field defined over the abstract space - a 'probability measure' defined over the event space [Note:] For the sake of efficient coding, the /event space/ and the /probability measure/ are combined in the Haskell data structure, below. This is permissable, because there has to be a 1:1 correspondance between them anyway. And it is preferable, because it: - keeps the probabilities more closely associated w/ the events, and - avoids duplication of code (i.e. - the list of events). -} data ProbSpace = ProbSpace { space :: [Sample] ,measure :: [Measure] } deriving (Show) {- |This is our abstract data type, which represents a sample in the abstract space. It has a constructor representing every possible element in the abstract space we're modeling. (Currently, just points and ranges of /Double/s.) Normally, none of the constructors of this type will be called directly. Instead, helper functions are provided, such as 'point' and 'range', which hide the implementation details from the user, and present a stable interface. Currently, the sole exception to the above is the /Empty/ constructor, which is really just a hack intended to put off the job of making the functions in this library more intelligent, with regard to their handling of empty lists. -} data Sample = Point Double | Range (Double, Double) | Empty | Full deriving (Eq, Show) -- [Note:] When constructing a range, it is the user's responsibility to ensure -- that the first element of the couple is less than the second. instance Ord Sample where -- (<) _ < Empty = False Empty < _ = True Full < _ = False _ < Full = True (Point p) < (Point p') = p < p' (Point p) < (Range (r1, _)) = p <= r1 (Range (r1, r2)) < (Point p) = Point p >= Range (r1, r2) (Range (r1, r2)) < (Range (r3, r4)) | r1 == r3 = r2 < r4 | otherwise = r1 < r3 -- (>) Empty > _ = False _ > Empty = True _ > Full = False Full > _ = True (Point p) > (Point p') = p > p' (Point p) > (Range (r1, r2)) = Point p >= Range (r1, r2) (Range (r1, r2)) > (Point p) = Point p < Range (r1, r2) (Range (r1, r2)) > (Range (r3, r4)) | r1 == r3 = r2 > r4 | otherwise = r1 > r3 -- (<=), (>=) (<=) s1 = not . (s1 >) (>=) s1 = not . (s1 <) -- custom data type and helper function for communicating sample types: -- (Helpful in places where pattern matching can't be used.) data SampleType = STPoint | STRange | STEmpty | STFull deriving (Eq, Show) sampleType :: Sample -> SampleType sampleType (Point _) = STPoint sampleType (Range _) = STRange sampleType Empty = STEmpty sampleType Full = STFull -- |The custom type /Event/ is just an alias for a list of Samples. type Event = [Sample] -- |/Measure/ has 2 fields: -- -- * /event/ - a list of samples from the space, and -- -- * /prob/ - a number between 0 and 1 giving the events probability of -- occurence. data Measure = Measure { event :: Event ,prob :: Double } deriving (Eq, Ord, Show) -- |This is the helper function intended to be used for constructing a point sample. point :: Double -> Sample point = Point -- |This is the helper function intended to be used for constructing a range sample. -- The range is considered /open/. That is, its end points are not included. range :: (Double, Double) -> Sample range (a, b) | a == b = point a | a < b = Range (a, b) | otherwise = Range (b, a) -- |This helper function generates a complete and valid probability space, -- given a discrete sample space and set of probabilities. makeProbSpace :: [(Sample, Double)] -> ProbSpace makeProbSpace [] = ProbSpace { space = [ Empty ,Full ] ,measure = [ Measure { event = [Empty] ,prob = 0 } ,Measure { event = [Full] ,prob = 1 } ] } makeProbSpace ps = ProbSpace { space = [fst p | p <- ps] ,measure = Measure { event = [Empty] ,prob = 0 } : [Measure e (sum [snd p | p <- ps, fst p `elem` e]) | e <- ss] } where ss = filter (not . null) $ subs $ map fst ps -- Helpful info getters: -- |Gets the beginning point of a range, which is /not/ included in the range, -- since ranges are considered to be open. rangeBegin :: Sample -> Double rangeBegin (Point _) = undefined rangeBegin (Range (a,_)) = a rangeBegin Empty = undefined rangeBegin Full = undefined -- |Gets the ending point of a range, which is /not/ included in the range, rangeEnd :: Sample -> Double rangeEnd (Point _) = undefined rangeEnd (Range (_,b)) = b rangeEnd Empty = undefined rangeEnd Full = undefined -- |Extracts the probability from a Measure. getProb :: Measure -> Double getProb = prob -- |Extracts the Event from a Measure. getEvent :: Measure -> Event getEvent = event -- |Get the complement of an event from the sample space. getCompEvent :: [Sample] -> Event -> Event getCompEvent [] _ = [Empty] getCompEvent (s:ss) e = smplSetUnion $ foldl eventInt [s] (map (smplComp s) e) ++ getCompEvent ss e -- |Calculates the intersection of 2 events (i.e. - list of samples). eventInt :: Event -> Event -> Event eventInt s1 s2 | null (filter (/= Empty) s1) = [Empty] | null (filter (/= Empty) s2) = [Empty] | otherwise = smplSetUnion $ concatMap (\s -> (map (smplInt s) s1)) s2 -- |Returns that portion of the first sample that is disjoint from the second. smplComp :: Sample -> Sample -> [Sample] smplComp Empty _ = [Empty] smplComp s Empty = [s] smplComp (Point n) (Point m) | m == n = [Empty] | otherwise = [Point n] smplComp (Point n) (Range (a, b)) | (n > a) && (n < b) = [Empty] | otherwise = [Point n] smplComp (Range (a, b)) (Point n) | (n > a) && (n < b) = [Range (a, n), Range (n, b)] | otherwise = [Range (a, b)] smplComp (Range (a, b)) (Range (c, d)) | (c >= b) || (a >= d) = [Range (a, b)] | (c <= a) && (d >= b) = [Empty] | (c > a) && (d < b) = [Range (a, c), Range (d, b), Point c, Point d] | a < c = [Range (a, c), Point c] | otherwise = [Range (d, b), Point d] smplComp _ Full = [Empty] smplComp Full _ = undefined -- |Determine if a sample is an element of a space. -- -- (Need this, as opposed to just using `elem`, in order to accomodate ranges.) isElem :: [Sample] -> Sample -> Bool isElem [] _ = False isElem _ Empty = True isElem (s:ss) s' = testElem s s' || isElem ss s' -- testElem : Test whether the second sample is an element of the first. testElem :: Sample -> Sample -> Bool testElem Empty _ = False testElem _ Empty = True testElem (Point x) (Point y) = x == y testElem (Point _) (Range _) = False testElem (Range (x, y)) (Point z) = (z > x) && (z < y) testElem (Range (x, y)) (Range (w, z)) = (w >= x) && (z <= y) testElem Full _ = True testElem _ Full = False -- |Checks a list of measures against duplicate events. noDupEvents :: [Measure] -> Bool noDupEvents [] = True noDupEvents (m:ms) = notElem (event m) es && noDupEvents ms where es = map event ms -- custom union and intersection functions for our 'Sample' data type -- |Returns the intersection between 2 samples. smplInt :: Sample -> Sample -> Sample smplInt Empty _ = Empty smplInt _ Empty = Empty smplInt Full s = s smplInt s Full = s smplInt (Point n) (Point m) | m == n = Point n | otherwise = Empty smplInt (Point n) (Range (a, b)) | (n > a) && (n < b) = Point n | otherwise = Empty smplInt (Range (a, b)) (Point n) = smplInt (Point n) (Range (a, b)) smplInt (Range (a, b)) (Range (c, d)) | (c >= b) || (a >= d) = Empty | otherwise = Range (max a c, min b d) -- |Reduces a list of samples to a single sample representing their intersection. smplSetInt :: [Sample] -> Sample smplSetInt = foldl smplInt Full -- |Returns the union of 2 samples. -- -- Unlike 'smplInt', /smplUnion/ must return a list since, if the 2 input -- samples aren't adjacent or overlapping, the union of them is a list -- containing both. smplUnion :: Sample -> Sample -> [Sample] smplUnion Empty s = [s] smplUnion s Empty = [s] smplUnion (Point n) (Point m) | m == n = [Point n] | m < n = [Point m, Point n] | otherwise = [Point n, Point m] smplUnion (Point n) (Range (a, b)) -- Ranges are considered open, in order to allow complementing a point from them. | (n > a) && (n < b) = [Range (a, b)] | n >= b = [Range (a, b), Point n] | otherwise = [Point n, Range (a, b)] -- The union operation is commutative. smplUnion (Range (a, b)) (Point n) = smplUnion (Point n) (Range (a, b)) smplUnion (Range (a, b)) (Range (c, d)) | c >= b = [Range (a, b), Range (c, d)] | a >= d = [Range (c, d), Range (a, b)] | otherwise = [Range (min a c, max b d)] smplUnion Full _ = [Full] smplUnion _ Full = [Full] -- |Absorbs a single input sample into a reverse-sorted list, as far as possible, -- via unioning. Assumes the single sample to be > the first list entry. smplUnionRecursRev :: [Sample] -> Sample -> [Sample] smplUnionRecursRev [] s = [s] smplUnionRecursRev (Empty : ss) s = smplUnionRecursRev ss s smplUnionRecursRev (s : ss) Empty = smplUnionRecursRev ss s smplUnionRecursRev (Point m : ss) (Point n) | m == n = smplUnionRecursRev ss (Point n) | otherwise = Point n : Point m : ss smplUnionRecursRev ((Range (a, b)) : ss) (Point n) | (n > a) && (n < b) = smplUnionRecursRev ss (Range (a, b)) | otherwise = Point n : Range (a, b) : ss smplUnionRecursRev ((Point n) : ss) (Range (a, b)) = Range (a, b) : Point n : ss smplUnionRecursRev ((Range (c, d)) : ss) (Range (a, b)) | a >= d = Range (a, b) : Range (c, d) : ss | otherwise = smplUnionRecursRev ss (Range (c, max b d)) smplUnionRecursRev _ Full = [Full] smplUnionRecursRev (Full : _) _ = [Full] -- |Collapses a list of samples down to the maximally reduced set, which still -- composes a proper union of the input. smplSetUnion :: [Sample] -> [Sample] smplSetUnion = consolidateRPR . foldl smplUnionRecursRev [] . sort -- consolidateRPR : Reduces all occurences of: -- Range (a,b), Point (b), Range (b,c) into: -- Range (a,c). consolidateRPR :: [Sample] -> [Sample] consolidateRPR = scanRPR . sort -- scanRPR : Scans through a list of samples looking for the pattern: -- Range (a,b), Point (b), Range (b,c) and consolidates those into: -- Range (a,c). -- -- Note) This function depends upon receiving a SORTED sample list! scanRPR :: [Sample] -> [Sample] scanRPR [] = [] scanRPR (s:ss) | sampleType s == STRange = if headIsPoint (rangeEnd s) ss && headIsRange (tail ss) && rangeBegin (head (tail ss)) == rangeEnd s then scanRPR $ Range (rangeBegin s, rangeEnd (head (tail ss))) : tail (tail ss) else s : scanRPR ss | otherwise = s : scanRPR ss -- headIsPoint : Tests whether the head of the incoming list is a particular -- Point value. headIsPoint :: Double -> [Sample] -> Bool headIsPoint _ [] = False headIsPoint a (s:_) = s == Point a -- headIsRange : Tests whether the head of the incoming list is a -- Range. headIsRange :: [Sample] -> Bool headIsRange [] = False headIsRange (s:_) = sampleType s == STRange -- |Custom data type used for test results and error reporting. data TestResult = Fail {err :: ErrType} | Pass deriving (Show) instance Eq TestResult where Pass == Pass = True Fail e == Fail e' = e == e' _ == _ = False -- |Custom data type for reporting different errors data ErrType = UnknownErr | EmptySampleSpace | EmptyEventSpace | MissingNullEvent | MissingCertainEvent | BadEventSamples | MissingCompEvent | MissingUnionEvent | EventMeasLenMismatch | DupEventsInMeas | MissingEventsInMeas | NullEventNonZeroProb | CertainEventNonUnityProb | EventAndCompNoSumOne deriving (Eq, Show) -- |getErrStr : Turns a value of type /ErrType/ into a more readable string. getErrStr :: ErrType -> String getErrStr UnknownErr = "Unknown error" getErrStr EmptySampleSpace = "Empty sample space" getErrStr EmptyEventSpace = "Empty event space" getErrStr MissingNullEvent = "The null event is missing from the event space." getErrStr MissingCertainEvent = "The certain event is missing from the event space." getErrStr BadEventSamples = "At least one event contains samples not in the sample space." getErrStr MissingCompEvent = "At least one event's compliment is missing from the event space." getErrStr MissingUnionEvent = "At least one union of events is missing from the event space." getErrStr EventMeasLenMismatch = "Lengths of event and measure lists don't match." getErrStr DupEventsInMeas = "There are duplicate events in the measure list." getErrStr MissingEventsInMeas = "Some events aren't covered in the measure list." getErrStr NullEventNonZeroProb = "The null event has been assigned a non-zero probability." getErrStr CertainEventNonUnityProb = "The certain event has been assigned a probability other than 1." getErrStr EventAndCompNoSumOne = "At least one pair of event and compliment have probabillities that don't add to 1." -- |Turns a value of type /TestResult/ into a human readable string. getRsltStr :: TestResult -> String getRsltStr Pass = "Ok" getRsltStr tr = getErrStr $ err tr -- |Checks whether event space is actually a Sigma field over the sample space. checkSigma :: ProbSpace -> TestResult checkSigma ps | null (filter (/= Empty) sp) = Fail EmptySampleSpace | null es = Fail EmptyEventSpace | notElem [Empty] es = Fail MissingNullEvent | notElem sp es && notElem [Full] es = Fail MissingCertainEvent | not $ all (all (\ s -> isElem sp s || (s == Empty))) es = Fail BadEventSamples | not $ all (\e -> getCompEvent sp e `elem` es) es = Fail MissingCompEvent | not $ all (`elem` es) (eventUnions es) = Fail MissingUnionEvent | otherwise = Pass where es = map (sort . event) (measure ps) ss = filter (not . null) $ map (filter (/= Empty) . event) (measure ps) sp = space ps -- |Power set generator, specific to a list of `Event`s -- -- Made necessary by the fact that generating all possible unions of all -- possible events grows as 2^(2^N), N = # of samples, due to much redundancy. eventUnions :: [Event] -> [Event] eventUnions es = concat $ foldl' (\xs x -> removeDups (map (smplSetUnion . concat . (x:)) xs : xs )) [[]] es' where es' = filter (not . null) $ map (filter (/= Empty)) es -- |Power set generator, which removes duplicates as it generates subs :: [a] -> [[a]] subs = foldl' (\xs x -> xs ++ map (x:) xs) [[]] -- |Remove duplicates from a list. removeDups :: (Eq a) => [a] -> [a] removeDups [] = [] removeDups (x:xs) = x : removeDups ys where ys = [y | y <- xs, y /= x] -- |Checks a value of type 'ProbSpace' for correctness, and returns a value of -- type 'TestResult'. checkProbMeas :: ProbSpace -> TestResult checkProbMeas ps | cs /= Pass = cs | not (noDupEvents (measure ps)) = Fail DupEventsInMeas | not $ all (\m -> getProb m == 0.0) (filter (\m -> getEvent m == [] || getEvent m == [Empty]) (measure ps)) = Fail NullEventNonZeroProb | not $ all (\m -> getProb m == 1.0) (filter (\m -> getEvent m == space ps) (measure ps)) = Fail CertainEventNonUnityProb | not $ all (\m -> 1.0 - getProb m - getProb (head (filter (\m' -> sort (getEvent m') == getCompEvent (space ps) (getEvent m)) (measure ps))) < eps) (measure ps) = Fail EventAndCompNoSumOne | otherwise = Pass where cs = checkSigma ps