----------------------------------------------------------------------------- -- Copyright 2020, Ideas project team. This file is distributed under the -- terms of the Apache License 2.0. For more information, see the files -- "LICENSE.txt" and "NOTICE.txt", which are included in the distribution. ----------------------------------------------------------------------------- module Domain.Statistics.Data ( Data (..) , isSingleSample , sampleSize , sampleSum , sampleMean , sampleVariance ) where import Ideas.Common.Rewriting import Domain.Statistics.Symbols -- | Data component data Data = SingleSample [Double] -- Single sample | TwoSample [Double] [Double] Bool -- Two samples, either paired or not deriving (Eq, Show, Read) instance IsTerm Data where toTerm (SingleSample xs) = unary singleSampleSymbol (toTerm xs) toTerm (TwoSample xs ys b) = ternary twoSampleSymbol (toTerm xs) (toTerm ys) (toTermBool b) fromTerm (TCon s [x]) | s == singleSampleSymbol = SingleSample <$> fromTerm x fromTerm (TCon s [xs, ys, b]) | s == twoSampleSymbol = TwoSample <$> fromTerm xs <*> fromTerm ys <*> fromTermBool b fromTerm t = fail $ "Invalid term: " ++ show t ++ " not Data" -- | Utility functions isSingleSample :: Data -> Bool isSingleSample (SingleSample _) = True isSingleSample _ = False -- | sampleSize :: Data -> Int sampleSize (SingleSample xs) = length xs sampleSize (TwoSample xs _ True) = length xs -- Both samples should have the same sample size sampleSize _ = error "two sample sizes for unpaired samples" -- | sampleSum :: Data -> [Double] sampleSum (SingleSample xs) = [sum xs] sampleSum (TwoSample xs ys _) = [sum xs, sum ys] -- | sampleMean :: Data -> [Double] sampleMean (SingleSample xs) = [ sum xs / fromIntegral (length xs) ] sampleMean (TwoSample xs ys _) = [ sum xs / fromIntegral (length xs) , sum ys / fromIntegral (length ys) ] -- | sampleVariance :: Bool -> Data -> [Double] sampleVariance True = sampleVarianceT sampleVariance False = sampleVarianceZ -- | sampleVarianceZ :: Data -> [Double] sampleVarianceZ (SingleSample xs) = let mean = head $ sampleMean (SingleSample xs) in [foldr (\x r -> r + (x - mean) ** 2) 0 xs / fromIntegral (length xs)] sampleVarianceZ (TwoSample xs ys _) = let (mean1:mean2:_) = sampleMean (TwoSample xs ys False) in [ foldr (\x r -> r + (x - mean1) ** 2) 0 xs / fromIntegral (length xs) , foldr (\x r -> r + (x - mean2) ** 2) 0 ys / fromIntegral (length ys) ] -- | sampleVarianceT :: Data -> [Double] sampleVarianceT (SingleSample xs) = let mean = head $ sampleMean (SingleSample xs) in [foldr (\x r -> r + (x - mean) ** 2) 0 xs / fromIntegral (length xs - 1)] sampleVarianceT (TwoSample xs ys _) = let (mean1:mean2:_) = sampleMean (TwoSample xs ys False) in [ foldr (\x r -> r + (x - mean1) ** 2) 0 xs / fromIntegral (length xs - 1) , foldr (\x r -> r + (x - mean2) ** 2) 0 ys / fromIntegral (length ys - 1) ]