----------------------------------------------------------------------------- -- Copyright 2020, Ideas project team. This file is distributed under the -- terms of the Apache License 2.0. For more information, see the files -- "LICENSE.txt" and "NOTICE.txt", which are included in the distribution. ----------------------------------------------------------------------------- {-# LANGUAGE FlexibleInstances #-} module Domain.Statistics.Component ( Choice (..), Component (..), ComponentId (..) , Sided (..), TestType (..), RejectionHypotheses(..) , choices, getCId, fromCId, sidedRelation , isData, isExpr, isRelation , isTestType, isSided, isRejectionHypotheses, isConclusion ) where import Control.Applicative ((<|>)) import Control.Monad import Data.Char import Data.Function import Data.List import Domain.Statistics.Symbols import Domain.Math.Data.Relation import Domain.Math.Expr import Domain.Statistics.Data import Ideas.Common.Rewriting hiding (trueSymbol,falseSymbol) import Prelude hiding (lookup) data Component = CData Data -- Data component type (sample data) | CExpr Expr -- Expression component type | CRelation (Relation Expr) | CChoice Choice -- Expressing various choices deriving Eq instance Show Component where show (CRelation r) = show r show (CExpr e) = show e show (CChoice c) = show c show (CData d) = show d instance IsTerm Component where toTerm (CData x) = unary cDataSymbol (toTerm x) toTerm (CChoice x) = toTerm x toTerm (CRelation x) = toTerm x toTerm (CExpr x) = toTerm x -- Check to see if the whole x can be parsed as an expression fromTerm (TCon s [val]) | s == cDataSymbol = CData <$> fromTerm val -- First try to convert it to a relation because relation is more specific -- than an Expression (IMPORTANT) fromTerm t = CChoice <$> fromTerm t <|> CRelation <$> fromTerm t <|> CExpr <$> fromTerm t data Choice = TestType TestType | Sided Sided | RejectionHypotheses RejectionHypotheses | Conclusion Bool deriving Eq instance Show Choice where show (TestType tt) = show tt show (Sided sd) = show sd show (RejectionHypotheses rh) = show rh show (Conclusion c) = map toLower (show c) instance IsTerm Choice where toTerm (TestType x) = toTerm x toTerm (Sided x) = toTerm x toTerm (RejectionHypotheses rh) = toTerm rh toTerm (Conclusion x) = toTermBool x fromTerm t = Sided <$> fromTerm t <|> TestType <$> fromTerm t <|> RejectionHypotheses <$> fromTerm t <|> Conclusion <$> fromTermBool t choices :: [Choice] choices = [ TestType tt | tt <- [minBound .. maxBound] ] ++ [ Sided sd | sd <- [minBound .. maxBound] ] ++ [ RejectionHypotheses rj | rj <- [minBound .. maxBound] ] ++ [ Conclusion c | c <- [minBound .. maxBound] ] data TestType = TTestOne | TTestTwo | TTestPaired | ZTest | RPearson | Anova | ChiSquared deriving (Eq, Enum, Bounded) instance Show TestType where -- to do: use same string as corresponding symbols (without dashes) show TTestOne = "t-test-one-sample" show TTestTwo = "t-test-two-sample" show TTestPaired = "t-test-paired" show ZTest = "z-test" show RPearson = "r-pearson" show Anova = "anova" show ChiSquared = "chi-squared" data Sided = TwoSided | LeftSided | RightSided deriving (Eq, Enum, Bounded) instance Show Sided where show TwoSided = "two" show LeftSided = "left" show RightSided = "right" sidedRelation :: Sided -> Expr -> Expr -> Relation Expr sidedRelation LeftSided lhs rhs = makeType LessThan lhs rhs sidedRelation RightSided lhs rhs = makeType GreaterThan lhs rhs sidedRelation TwoSided lhs rhs = makeType GreaterThan (abs lhs) rhs data RejectionHypotheses = RejectH0 | AcceptH0 | DontRejectH0 | RejectH1 | AcceptH1 | DontRejectH1 deriving (Eq, Enum, Bounded) instance Show RejectionHypotheses where show RejectH0 = "rejecth0" show AcceptH0 = "accepth0" show DontRejectH0 = "dontrejecth0" show RejectH1 = "rejecth1" show AcceptH1 = "accepth1" show DontRejectH1 = "dontrejecth1" data ComponentId = DataSet | SampleSize -- n | SampleMean -- M | SampleVariance -- s^2 | SampleSdev -- s | ObservedFrequencies -- fo (two-dimensional matrix), for chi squared | ObservedRowTotals | ObservedColumnTotals | ObservedTotal | ExpectedFrequencies -- fe (two-dimensional matrix), for chi squared | PopulationMean -- mu | PopulationSdev -- sigma | StandardError -- sigmaM or SEM | SignificanceLevel -- alpha | NullHypothesis | AlternativeHypothesis | PooledVariance -- S_p | Df -- df | DfBetween -- for Anova | DfWithin -- for Anova | Groups | Sidedness | TestChoice | Critical | PValue | RejectionCritical | ConclusionPValue | ConclusionCritical | ConclusionHypotheses | TestFormula | TestValue | Proportion -- P | Correlation -- r | One ComponentId -- Belonging to the first sample (in case of multiple samples) | Two ComponentId -- Belonging to the second sample | Other String deriving (Eq, Show) instance Ord ComponentId where compare = compare `on` getCId instance IsTerm ComponentId where toTerm = toTerm . getCId fromTerm t = fromCId <$> fromTerm t getCId :: ComponentId -> String getCId (Other nm) = nm getCId (One nm) = getCId nm ++ "1" getCId (Two nm) = getCId nm ++ "2" getCId cId = case lookup cId cIdTable of Just nm -> nm _ -> error "Missing name for ComponentId" fromCId :: String -> ComponentId fromCId s | suffix == "1" = One (fromCId $ init s) | suffix == "2" = Two (fromCId $ init s) | otherwise = case [ x | (x, y) <- cIdTable, y == s ] of [x] -> x _ -> Other s where suffix = take 1 (reverse s) cIdTable :: [(ComponentId, String)] cIdTable = [ (DataSet, "data") , (SampleSize, "n") , (SampleMean, "samplemean") -- was: "mean" , (SampleVariance, "samplevariance") -- was: "variance" , (SampleSdev, "samplesd") -- was: "sd" , (ObservedFrequencies, "fo") , (ObservedRowTotals, "forowtotal") , (ObservedColumnTotals, "focolumntotal") , (ObservedTotal, "fototal") , (ExpectedFrequencies, "fe") , (PopulationMean, "populationmean") -- was: "pmean" , (PopulationSdev, "populationsd") -- was "psd" , (StandardError, "standarderror") -- was: "se" , (SignificanceLevel, "alpha") , (NullHypothesis, "h0") , (AlternativeHypothesis, "ha") , (PooledVariance, "pooledvariance") -- was "sp2" , (Df, "df") , (DfWithin, "dfwithin") , (DfBetween, "dfbetween") , (Groups, "groups") , (Sidedness, "sided") , (TestChoice, "test") , (Critical, "critical") , (PValue, "pvalue") , (RejectionCritical, "rejectioncritical") -- was: "rejection" , (ConclusionPValue, "conclusionpvalue") , (ConclusionCritical, "conclusioncritical") -- was: "conclusion" , (ConclusionHypotheses, "conclusionhypotheses") , (TestFormula, "testformula") , (TestValue, "testvalue") , (Proportion, "p") , (Correlation, "r") ] ---------------------------------------------------------- -- Component utilities isData :: Monad m => Component -> m Data isData (CData d) = return d isData _ = failMsg "data" isExpr :: Monad m => Component -> m Expr isExpr (CExpr e) = return e isExpr _ = failMsg "expr" isRelation :: Monad m => Component -> m (Relation Expr) isRelation (CRelation r) = return r isRelation _ = failMsg "relation" isTestType :: Monad m => Component -> m TestType isTestType (CChoice (TestType tt)) = return tt isTestType _ = failMsg "testtype" isSided :: Monad m => Component -> m Sided isSided (CChoice (Sided s)) = return s isSided _ = failMsg "sided" isRejectionHypotheses :: Monad m => Component -> m RejectionHypotheses isRejectionHypotheses (CChoice (RejectionHypotheses h)) = return h isRejectionHypotheses _ = failMsg "rejectionhypotheses" isConclusion :: Monad m => Component -> m Bool isConclusion (CChoice (Conclusion b)) = return b isConclusion _ = failMsg "conclusion" failMsg :: Monad m => String -> m a failMsg s = fail ("component is not of type " ++ s) ---------------------------------------------------------- -- Component utilities -- | Types that have only atomic constructors are easy and follow a pattern for -- their implementation instance IsTerm TestType where toTerm TTestOne = symbol tTestOneSymbol toTerm TTestTwo = symbol tTestTwoSymbol toTerm TTestPaired = symbol tTestPairedSymbol toTerm ZTest = symbol zTestSymbol toTerm RPearson = symbol rPearsonSymbol toTerm Anova = symbol anovaSymbol toTerm ChiSquared = symbol chiSquaredSymbol fromTerm (TCon s []) | s == tTestOneSymbol = return TTestOne | s == tTestTwoSymbol = return TTestTwo | s == tTestPairedSymbol = return TTestPaired | s == zTestSymbol = return ZTest | s == rPearsonSymbol = return RPearson | s == anovaSymbol = return Anova | s == chiSquaredSymbol = return ChiSquared fromTerm (TVar "rpearson") = return RPearson -- temporary fix!!! fromTerm (TVar "anova") = return Anova -- temporary fix!!! fromTerm (TVar "chisquared") = return ChiSquared -- temporary fix!!! fromTerm t = fail $ "Invalid term: " ++ show t ++ " not TestType" instance IsTerm Sided where toTerm TwoSided = symbol twoSidedSymbol toTerm LeftSided = symbol leftSidedSymbol toTerm RightSided = symbol rightSidedSymbol fromTerm (TCon s []) | s == twoSidedSymbol = return TwoSided | s == leftSidedSymbol = return LeftSided | s == rightSidedSymbol = return RightSided fromTerm t = fail $ "Invalid term: " ++ show t ++ " not Sided" instance IsTerm RejectionHypotheses where toTerm RejectH0 = symbol rejectH0Symbol toTerm AcceptH0 = symbol acceptH0Symbol toTerm DontRejectH0 = symbol dontRejectH0Symbol toTerm RejectH1 = symbol rejectH1Symbol toTerm AcceptH1 = symbol acceptH1Symbol toTerm DontRejectH1 = symbol dontRejectH1Symbol fromTerm (TCon s []) | s == rejectH0Symbol = return RejectH0 | s == acceptH0Symbol = return AcceptH0 | s == dontRejectH0Symbol = return DontRejectH0 | s == rejectH1Symbol = return RejectH1 | s == acceptH1Symbol = return AcceptH1 | s == dontRejectH1Symbol = return DontRejectH1 fromTerm t = fail $ "Invalid term: " ++ show t ++ " not RejectionHypotheses"