----------------------------------------------------------------------------- -- Copyright 2020, Ideas project team. This file is distributed under the -- terms of the Apache License 2.0. For more information, see the files -- "LICENSE.txt" and "NOTICE.txt", which are included in the distribution. ----------------------------------------------------------------------------- -- to do: split constraints in check against initial value, and inferred value module Domain.Hypothesis.Constraints ( hypothesisConstraints, equalDouble ) where import Control.Monad import Control.Applicative import Data.Maybe import Domain.Hypothesis.Common import Domain.Hypothesis.Rules import Domain.Math.Data.Relation import Domain.Math.Numeric.Views import Domain.Statistics.ComponentSet import Ideas.Text.OpenMath.Dictionary.Arith1 import Ideas.Common.Constraint import Domain.Statistics.Views import Ideas.Common.Library hiding (Predicate) import Domain.Math.Expr.Data import Prelude hiding (until) hypothesisConstraints :: [Constraint ComponentSet] hypothesisConstraints = [ checkHA , checkH0 , checkAlpha , checkSided -- check this before checkCritical , checkTest -- check this before checkCritical , checkDf -- check this before checkCritical , checkDfAnova -- check this before checkCritical , checkGroups , checkCritical, checkRejectionCritical , checkSampleMean, checkSampleSdev, checkPopulationSdev , checkPopulationMean, checkSampleSize, checkCorrelation , checkStandardError , checkTestFormula, checkTestValue , checkPValue -- checking conclusions should be last , checkConclusionPValue, checkConclusionCritical, checkConclusionHypotheses , checkObservedTotals, checkExpectedFrequencies ] checkSampleMean, checkSampleSdev, checkPopulationSdev, checkPopulationMean, checkSampleSize, checkCorrelation :: Constraint ComponentSet checkSampleMean = checkForInitial "samplemean" SampleMean checkSampleSdev = checkForInitial "samplesdev" SampleSdev checkPopulationSdev = checkForInitial "populationsdev" PopulationSdev checkPopulationMean = checkForInitial "populationmean" PopulationMean checkSampleSize = checkForInitial "samplesize" SampleSize checkCorrelation = checkForInitial "correlation" Correlation checkForInitial :: String -> ComponentId -> Constraint ComponentSet checkForInitial s cid = makeConstraint ("check" # s) $ \cs -> do relevance $ guard $ isInitial cid cs && isDerived cid cs x <- get cid (initials cs) y <- get cid (derived cs) guard $ x == y checkAlpha :: Constraint ComponentSet checkAlpha = makeConstraint "check.alpha" $ \cs -> do relevance $ guard $ isDerived SignificanceLevel cs if isInitial SignificanceLevel cs then do -- initial alpha overrules the default value 0.05 x <- get SignificanceLevel (initials cs) y <- get SignificanceLevel (derived cs) guard $ x == y else do -- use the default value 0.05 alpha <- getExpr SignificanceLevel cs val <- matchM doubleView alpha guard $ val == 0.05 checkH0 :: Constraint ComponentSet checkH0 = makeConstraint "check.h0" $ \cs -> do relevance $ guard $ isDerived NullHypothesis cs test <- inferTestChoice cs case test of ChiSquared -> do h0 <- getExpr NullHypothesis cs guard (h0 == independent) _ -> do h0 <- getRelation NullHypothesis cs h0' <- h0FromHA <$> getRelation AlternativeHypothesis (initials cs) unless (leftHandSide h0 == leftHandSide h0') $ fail "parameter mismatch" unless (rightHandSide h0 == rightHandSide h0') $ fail "value mismatch" unless (relationType h0 == relationType h0' || relationType h0 == EqualTo) $ fail "sign mismatch" checkHA :: Constraint ComponentSet checkHA = makeConstraint "check.ha" $ \cs -> do relevance $ guard $ isDerived AlternativeHypothesis cs test <- inferTestChoice cs case test of ChiSquared -> do ha <- getExpr AlternativeHypothesis cs guard (ha == dependent) _ -> do ha <- getRelation AlternativeHypothesis cs ha' <- case get AlternativeHypothesis (initials cs) of Just (CRelation x) -> return x _ -> empty unless (leftHandSide ha == leftHandSide ha') $ fail "parameter mismatch" unless (rightHandSide ha == rightHandSide ha') $ fail "value mismatch" unless (relationType ha == relationType ha') $ fail "sign mismatch" checkConclusionPValue :: Constraint ComponentSet checkConclusionPValue = makeConstraint "check.conclusion.p-value" $ \cs -> do relevance $ guard $ isDerived ConclusionPValue cs conc <- getRelation ConclusionPValue cs let cs' = substitute cs p <- matchM doubleView =<< getExpr PValue cs' alpha <- matchM doubleView =<< getExpr SignificanceLevel cs' guard $ if p <= alpha then relationType conc == LessThanOrEqualTo else relationType conc == GreaterThan checkSided :: Constraint ComponentSet checkSided = makeConstraint "check.sided" $ \cs -> do relevance $ guard $ isDerived Sidedness cs -- TODO: make order of checking constraints explicit -- && isNothing (isViolated checkHA cs) sided <- getSided Sidedness cs case getTestType TestChoice (initials cs) of Just Anova -> unless (sided == RightSided) (fail "anova") Just ChiSquared -> unless (sided == RightSided) (fail "chi-squared") _ -> do ha <- getRelation AlternativeHypothesis cs guard $ sided == sidedFromHA ha checkTest :: Constraint ComponentSet checkTest = makeConstraint "check.test" $ \cs -> do relevance $ guard $ isDerived TestChoice cs if isInitial TestChoice cs then do -- initial testType overrules the validTests function x <- get TestChoice (initials cs) y <- get TestChoice (derived cs) guard $ x == y else do test <- getTestType TestChoice cs guard $ test `elem` validTests cs checkTestValue, checkTestFormula :: Constraint ComponentSet checkTestFormula = checkTestValueFor "test-formula" TestFormula checkTestValue = checkTestValueFor "test-value" TestValue checkTestValueFor :: String -> ComponentId -> Constraint ComponentSet checkTestValueFor s cid = makeConstraint ("check" # s) $ \cs -> do relevance $ guard $ isDerived cid cs rel <- getRelation cid cs let testTypes = inferTestChoices cs testType <- case leftHandSide rel of Var "z" | ZTest `elem` testTypes -> return ZTest Var "r" | RPearson `elem` testTypes -> return RPearson Var "F" | Anova `elem` testTypes -> return Anova Var "chisq" | ChiSquared `elem` testTypes -> return ChiSquared Var "t" -> case filter isTTest testTypes of [] -> fail "test mismatch" hd:_ -> return hd _ -> fail "test mismatch" let tv = rightHandSide rel sub = getSubstitution cs tv' <- matchM doubleView (sub |-> tv) -- special case: when checking test value, first check wether an initial -- test value is present case getRhsExpr TestValue (initials cs) of Just initialTv | cid == TestValue -> do val <- matchM doubleView initialTv testDoubles tv' val _ -> do target <- case getRhsExpr TestFormula (initials cs) of Just rhs -> return rhs _ | testType == ChiSquared -> rightHandSide <$> chiSquaredTestValue cs _ -> testFormulaFromTest testType (chooseTypeOfTest cs) target' <- matchM doubleView (sub |-> target) testDoubles tv' target' checkDf :: Constraint ComponentSet checkDf = makeConstraint "check.df" $ \cs -> do relevance $ guard $ isDerived Df cs let cs' = substitute cs df <- matchM doubleView =<< getExpr Df cs' case getExpr Df (initials cs) of Just initialDf -> do val <- matchM doubleView initialDf guard $ equalDouble df val _ -> do test <- inferTestChoice cs target <- case test of ChiSquared -> toExpr <$> chiSquaredDf cs _ -> degreesOfFreedomFromTest test (chooseTypeOfTest cs) let sub = getSubstitution cs target' <- matchM doubleView (sub |-> target) guard $ equalDouble df target' checkDfAnova :: Constraint ComponentSet checkDfAnova = makeConstraint "check.df-anova" $ \cs -> do relevance $ guard $ isDerived DfBetween cs || isDerived DfWithin cs let cs' = substitute cs (between, within) <- inferDfBetweenWithin cs' -- check df-between dfB <- getExpr DfBetween (derived cs) >>= matchM doubleView unless (equalDouble dfB between) $ fail "df-between" -- check df-within dfW <- getExpr DfWithin (derived cs) >>= matchM doubleView unless (equalDouble dfW within) $ fail "df-within" checkGroups :: Constraint ComponentSet checkGroups = makeConstraint "check.groups" $ \cs -> do relevance $ guard $ isDerived Groups cs groups <- getExpr Groups (derived cs) >>= matchM doubleView -- for now, only support for two groups guard (groups == 2) checkCritical :: Constraint ComponentSet checkCritical = makeConstraint "check.critical" $ \cs -> do relevance $ guard $ isDerived Critical cs unless (derived cs `contains` AlternativeHypothesis) $ fail "alternative hypothesis missing" let cs' = substitute cs critical <- getRelation Critical cs criticalValue <- matchM doubleView (rightHandSide critical) let testTypes = inferTestChoices cs testType <- case leftHandSide critical of Var "zcrit" | ZTest `elem` testTypes -> return ZTest Var "rcrit" | RPearson `elem` testTypes -> return RPearson Var "Fcrit" | Anova `elem` testTypes -> return Anova Var "chicrit" | ChiSquared `elem` testTypes -> return ChiSquared Var "tcrit" -> case filter isTTest testTypes of [] -> fail "test mismatch" hd:_ -> return hd this -> fail $ "test mismatch " ++ show this sided <- inferSidedness cs alpha <- matchM doubleView =<< getExpr SignificanceLevel cs value' <- case testType of Anova -> do (dfBetween, dfWithin) <- inferDfBetweenWithin cs computeCriticalF dfBetween dfWithin alpha ChiSquared -> do df <- chiSquaredDf cs computeCriticalChi sided alpha (fromIntegral df) _ -> do -- TO DO: use computeCriticalF for Anova let cs'' = case inferDf cs' of Just df -> substitute (append Df (CExpr df) cs') Nothing -> cs' let df = matchM doubleView =<< getExpr Df cs'' computeCritical testType sided alpha df testDoubles value' criticalValue checkRejectionCritical :: Constraint ComponentSet checkRejectionCritical = makeConstraint "check.rejectioncritical" $ \cs -> do relevance $ guard $ isDerived RejectionCritical cs unless (derived cs `contains` AlternativeHypothesis) $ fail "alternative hypothesis missing" crit <- getRelation RejectionCritical cs sided <- inferSidedness cs let testTypes = inferTestChoices cs (testType, rel) <- case withoutAbs (leftHandSide crit) of Var "z" | ZTest `elem` testTypes -> return (ZTest, sidedRelation sided (Var "z") (Var "zcrit")) Var "r" | RPearson `elem` testTypes -> return (RPearson, sidedRelation sided (Var "r") (Var "rcrit")) Var "F" | Anova `elem` testTypes -> return (Anova, sidedRelation sided (Var "F") (Var "Fcrit")) expr | expr == chiSquared && ChiSquared `elem` testTypes -> return (ChiSquared, sidedRelation sided chiSquared (Var "chicrit")) Var "t" -> case filter isTTest testTypes of [] -> fail "test mismatch" hd:_ -> return (hd, sidedRelation sided (Var "t") (Var "tcrit")) expr -> fail $ "test mismatch" ++ show expr unless (withoutAbs (leftHandSide rel) == withoutAbs (leftHandSide crit) && rightHandSide rel == rightHandSide crit) $ fail "test mismatch" unless (leftHandSide rel == leftHandSide crit && relationType rel == relationType crit) $ if testType == Anova then fail "anova mismatch" else if testType == ChiSquared then fail "chi-squared mismatch" else fail "sidedness mismatch" withoutAbs :: Expr -> Expr withoutAbs (Sym s [a]) | s == newSymbol absSymbol = a withoutAbs expr = expr checkPValue :: Constraint ComponentSet checkPValue = makeConstraint "check.p-value" $ \cs -> do relevance $ guard $ isDerived PValue cs unless (derived cs `contains` AlternativeHypothesis) $ fail "alternative hypothesis missing" let cs' = substitute cs t <- matchM doubleView =<< fmap rightHandSide (getRelation TestValue cs') test <- inferTestChoice cs pvalue <- matchM doubleView =<< getExpr PValue cs when (pvalue < 0 || pvalue > 1) $ fail "value not a probability" sided <- inferSidedness cs let cs'' = case inferDf cs' of Just df -> substitute (append Df (CExpr df) cs') Nothing -> cs' let df = matchM doubleView =<< getExpr Df cs'' pvalue' <- computePValue test sided t df -- compare with 3 decimals testDoublesWith 0.00055 pvalue' pvalue $ if derived cs `contains` TestValue then empty else fail "TestValue missing" checkConclusionCritical :: Constraint ComponentSet checkConclusionCritical = makeConstraint "check.conclusion-critical" $ \cs -> do relevance $ guard $ isDerived ConclusionCritical cs rej <- inferRejectionCritical cs -- to do: rejection critical is added to component set only to get the substituted relation let cs' = substitute (append RejectionCritical (CRelation rej) cs) rejection <- getRelation RejectionCritical cs' lhs <- matchM doubleView $ leftHandSide rejection rhs <- matchM doubleView $ rightHandSide rejection let result = eval (relationType rejection) lhs rhs conclusion <- getConclusion ConclusionCritical cs guard $ result == conclusion checkConclusionHypotheses :: Constraint ComponentSet checkConclusionHypotheses = makeConstraint "check.conclusion-hypotheses" $ \cs -> do relevance $ guard $ isDerived ConclusionHypotheses cs concl <- getRejectionHypotheses ConclusionHypotheses cs b1 <- case fmap relationType (inferConclusionPValue cs) of Just LessThanOrEqualTo | concl == RejectH0 -> return True Just GreaterThan | concl == DontRejectH0 -> return True Just _ -> fail "conclusion mismatch pvalue" Nothing -> return False b2 <- case inferConclusionCritical cs of Just False | concl == DontRejectH0 -> return True | concl `elem` [AcceptH0, RejectH1] -> fail "convention DontRejectH0" Just True | concl == RejectH0 -> return True | concl `elem` [AcceptH1, DontRejectH1] -> fail "convention RejectH0" Just _ -> fail "conclusion mismatch critical" Nothing -> return False guard (b1 || b2) checkStandardError :: Constraint ComponentSet checkStandardError = makeConstraint "check.standard-error" $ \cs -> do relevance $ guard $ isDerived StandardError cs rel <- getRelation StandardError cs n <- matchM doubleView =<< getExpr SampleSize cs case (leftHandSide rel, getRhsExpr PopulationSdev cs, getRhsExpr SampleSdev cs) of (Var "sigmaM", Just expr, Nothing) -> do psdev <- matchM doubleView expr se <- matchM doubleView (rightHandSide rel) testDoubles se (psdev/sqrt n) (Var "SEM", Nothing, Just expr) -> do sdev <- matchM doubleView expr se <- matchM doubleView (rightHandSide rel) testDoubles se (sdev/sqrt n) _ -> fail "standard error mismatch" checkObservedTotals :: Constraint ComponentSet checkObservedTotals = makeConstraint "check.observed-totals" $ \cs -> do relevance $ guard $ all (derived cs `contains`) [ObservedRowTotals, ObservedColumnTotals, ObservedTotal] rowTotals <- getExpr ObservedRowTotals cs >>= fromExpr columnTotals <- getExpr ObservedColumnTotals cs >>= fromExpr total <- getExpr ObservedTotal cs >>= fromExpr table <- getTable ObservedFrequencies cs let (expRowTotals, expColumnTotals, expTotal) = computeTotals table unless (rowTotals == expRowTotals) $ fail "rows" unless (columnTotals == expColumnTotals) $ fail "columns" unless (total == expTotal) $ fail "total" checkExpectedFrequencies :: Constraint ComponentSet checkExpectedFrequencies = makeConstraint "check.expected-frequencies" $ \cs -> do relevance $ guard $ derived cs `contains` ExpectedFrequencies expectedFrequencies <- getExpr ExpectedFrequencies cs >>= matchDoubleTable observed <- getTable ObservedFrequencies cs let totals = computeTotals observed computedFrequencies = computeExpectedFrequencies totals guard (equalTableDouble expectedFrequencies computedFrequencies) matchDoubleTable :: MonadPlus m => Expr -> m [[Double]] matchDoubleTable = fromExpr >=> mapM (mapM (matchM doubleView)) equalTableDouble :: [[Double]] -> [[Double]] -> Bool equalTableDouble = equalListBy (equalListBy equalDouble) equalListBy :: (a -> a -> Bool) -> [a] -> [a] -> Bool equalListBy eq xs ys = length xs == length ys && and (zipWith eq xs ys) ---------------------------------------------------------- testDoubles :: Double -> Double -> Result () testDoubles value target = testDoublesWith defaultDelta value target empty testDoublesWith :: Double -> Double -> Double -> Result () -> Result () testDoublesWith delta value target resultNotEqual = unless (equalDoubleWith delta value target) $ if equalDoubleWith (delta*10) value target then fail "almost equal" else resultNotEqual equalDouble :: Double -> Double -> Bool equalDouble = equalDoubleWith defaultDelta equalDoubleWith :: Double -> Double -> Double -> Bool equalDoubleWith delta x y = abs (x - y) < delta defaultDelta :: Double defaultDelta = 0.0055 -- afronden op 3 cijfers achter de komma -- rounded :: Double -> Double -- rounded x = fromInteger (round (x * 1000)) / 1000.0