------------------------------------------------------------------------ -- | -- Module : Data.Datamining.Clustering.SGMQC -- Copyright : (c) Amy de Buitléir 2012-2018 -- License : BSD-style -- Maintainer : amy@nualeargais.ie -- Stability : experimental -- Portability : portable -- -- Tests -- ------------------------------------------------------------------------ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeFamilies #-} {-# OPTIONS_GHC -fno-warn-type-defaults -fno-warn-orphans #-} module Data.Datamining.Clustering.SGMQC ( test ) where import Control.DeepSeq (deepseq) import Data.Datamining.Clustering.SGMInternal import Data.Datamining.Pattern (absDifference, adjustNum) import Data.List (minimumBy, (\\)) import qualified Data.Map.Strict as M import Data.Ord (comparing) import Data.Word (Word16) import System.Random (Random) import Test.Framework as TF (Test, testGroup) import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.QuickCheck (Arbitrary, Gen, Positive, Property, arbitrary, choose, getPositive, property, shrink, sized, suchThat, vectorOf, (==>)) newtype UnitInterval a = UnitInterval {getUnitInterval :: a} deriving ( Eq, Ord, Show, Read) instance Functor UnitInterval where fmap f (UnitInterval x) = UnitInterval (f x) instance (Num a, Ord a, Random a, Arbitrary a) => Arbitrary (UnitInterval a) where arbitrary = fmap UnitInterval $ choose (0,1) shrink (UnitInterval x) = [ UnitInterval x' | x' <- shrink x, x' >= 0, x' <= 1] prop_Exponential_starts_at_r0 :: UnitInterval Double -> Positive Double -> Property prop_Exponential_starts_at_r0 r0 d = property $ abs (exponential r0' d' 0 - r0') < 0.01 where r0' = getUnitInterval r0 d' = getPositive d prop_Exponential_ge_0 :: UnitInterval Double -> Positive Double -> Positive Int -> Property prop_Exponential_ge_0 r0 d t = property $ exponential r0' d' t' >= 0 where r0' = getUnitInterval r0 d' = getPositive d t' = getPositive t positive :: (Num a, Ord a, Arbitrary a) => Gen a positive = arbitrary `suchThat` (> 0) data TestSGM = TestSGM (SGM Int Double Word16 Double) String instance Show TestSGM where show (TestSGM _ desc) = desc buildTestSGM :: Double -> Double -> Int -> Double -> Bool -> [Double] -> TestSGM buildTestSGM r0 d maxSz dt ad ps = TestSGM s' desc where lrf = exponential r0 d s = makeSGM lrf maxSz dt ad absDifference adjustNum desc = "buildTestSGM " ++ show r0 ++ " " ++ show d ++ " " ++ show maxSz ++ " " ++ show dt ++ " " ++ show ad ++ " " ++ show ps s' = trainBatch s ps sizedTestSGM :: Int -> Gen TestSGM sizedTestSGM n = do maxSz <- choose (1, n+1) let numPatterns = n r0 <- choose (0, 1) d <- positive dt <- choose (0, 1) ad <- arbitrary ps <- vectorOf numPatterns arbitrary return $ buildTestSGM r0 d maxSz dt ad ps instance Arbitrary TestSGM where arbitrary = sized sizedTestSGM prop_classify_chooses_best_fit :: TestSGM -> Double -> Property prop_classify_chooses_best_fit (TestSGM s _) x = property $ bmu == bmu2 where (bmu, _, report, _) = trainAndClassify s x bmu2 = fst (minimumBy (comparing f) . M.toList $ report) f (_, (_, d)) = d prop_classify_never_creates_model :: TestSGM -> Double -> Property prop_classify_never_creates_model (TestSGM s _) x = not (isEmpty s) ==> bmu `elem` (labels s) where (bmu, _, _) = classify s x prop_classify_never_causes_error_unless_som_empty :: TestSGM -> Double -> Property prop_classify_never_causes_error_unless_som_empty (TestSGM s _) p = not (isEmpty s) ==> property $ deepseq x True where x = classify s p prop_trainNode_reduces_diff :: TestSGM -> Double -> Property prop_trainNode_reduces_diff (TestSGM s _) x = not (isEmpty s) ==> diffAfter < diffBefore || diffBefore == 0 || learningRate s (time s) < 1e-10 where (bmu, diffBefore, _) = classify s x s2 = trainNode s bmu x (_, diffAfter, _) = classify s2 x prop_diff_lt_threshold_after_training :: TestSGM -> Double -> Property prop_diff_lt_threshold_after_training (TestSGM s _) x = numModels s < maxSize s ==> diffAfter < diffThreshold s where (_, _, _, s') = trainAndClassify s x (_, diffAfter, _) = classify s' x prop_training_reduces_diff :: TestSGM -> Double -> Property prop_training_reduces_diff (TestSGM s _) x = not (isEmpty s) ==> diffAfter < diffBefore || diffBefore == 0 || learningRate s (time s) < 1e-10 where (_, diffBefore, _) = classify s x s2 = train s x (_, diffAfter, _) = classify s2 x -- TODO prop: map will never exceed maxSize prop_train_only_modifies_one_model :: TestSGM -> Double -> Property prop_train_only_modifies_one_model (TestSGM s _) p = numModels s < maxSize s ==> otherModelsBefore == otherModelsAfter where (bmu, _, _, s2) = trainAndClassify s p otherModelsBefore = M.delete bmu . M.map fst . toMap $ s otherModelsAfter = M.delete bmu . M.map fst . toMap $ s2 prop_train_increments_counter :: TestSGM -> Double -> Property prop_train_increments_counter (TestSGM s _) x = numModels s < maxSize s ==> countAfter == countBefore + 1 -- We have to check if the SGM is full, otherwise we'll replace an -- existing model (and its counter), which means that the total -- count could change by an arbitrary amount. where countBefore = time s countAfter = time $ train s x -- | The training set consists of the same vectors in the same order, -- several times over. So the resulting classifications should consist -- of the same integers in the same order, over and over. prop_batch_training_works :: TestSGM -> [Double] -> Property prop_batch_training_works (TestSGM s _) ps -- = maxSize s > length ps -- ==> classifications == (concat . replicate 5) firstSet = property $ classifications == (concat . replicate 5) firstSet where trainingSet = (concat . replicate 5) ps sRightSize = if maxSize s >= length ps then s else s { maxSize=length ps + 1} s' = trainBatch sRightSize trainingSet classifications = map (justBMU . classify s') trainingSet justBMU = \(bmu, _, _) -> bmu firstSet = take (length ps) classifications -- | WARNING: This can fail when two nodes are close enough in -- value so that after training they become identical. prop_classification_is_consistent :: TestSGM -> Double -> Property prop_classification_is_consistent (TestSGM s _) x = property $ bmu == bmu' where (bmu, _, _, s2) = trainAndClassify s x (bmu', _, _) = classify s2 x prop_classification_results_are_consistent :: TestSGM -> Double -> Property prop_classification_results_are_consistent (TestSGM s _) x = property $ bmuDiff == minimum diffs where (_, bmuDiff, report, _) = trainAndClassify s x diffs = map (\(_, (_, d)) -> d) . M.toList $ report prop_classification_stabilises :: TestSGM -> [Double] -> Property prop_classification_stabilises (TestSGM s _) ps = (not . null $ ps) && maxSize s > length ps ==> k2 == k1 where sStable = trainBatch s . concat . replicate 10 $ ps (k1, _, _, sStable2) = trainAndClassify sStable (head ps) sStable3 = trainBatch sStable2 ps (k2, _, _) = classify sStable3 (head ps) prop_models_not_deleted_unless_allowed :: TestSGM -> Double -> Property prop_models_not_deleted_unless_allowed (TestSGM s _) x = (not . allowDeletion $ s) ==> null (labelsBefore \\ labelsAfter) where labelsBefore = M.keys $ modelMap s labelsAfter = M.keys $ modelMap s' (_, _, _, s') = trainAndClassify s x prop_models_not_deleted_unless_allowed2 :: TestSGM -> Double -> Property prop_models_not_deleted_unless_allowed2 (TestSGM s _) x = (not . allowDeletion $ s) ==> null (labelsBefore \\ labelsAfter) where labelsBefore = M.keys $ modelMap s labelsAfter = M.keys $ modelMap s' s' = train s x test :: Test test = testGroup "QuickCheck Data.Datamining.Clustering.SGM" [ testProperty "prop_Exponential_starts_at_r0" prop_Exponential_starts_at_r0, testProperty "prop_Exponential_ge_0" prop_Exponential_ge_0, testProperty "prop_classify_chooses_best_fit" prop_classify_chooses_best_fit, testProperty "prop_classify_never_creates_model" prop_classify_never_creates_model, testProperty "prop_classify_never_causes_error_unless_som_empty" prop_classify_never_causes_error_unless_som_empty, testProperty "prop_trainNode_reduces_diff" prop_trainNode_reduces_diff, testProperty "prop_diff_lt_threshold_after_training" prop_diff_lt_threshold_after_training, testProperty "prop_training_reduces_diff" prop_training_reduces_diff, testProperty "prop_train_only_modifies_one_model" prop_train_only_modifies_one_model, testProperty "prop_train_increments_counter" prop_train_increments_counter, testProperty "prop_batch_training_works" prop_batch_training_works, testProperty "prop_classification_is_consistent" prop_classification_is_consistent, testProperty "prop_classification_results_are_consistent" prop_classification_results_are_consistent, testProperty "prop_classification_stabilises" prop_classification_stabilises, testProperty "prop_models_not_deleted_unless_allowed" prop_models_not_deleted_unless_allowed, testProperty "prop_models_not_deleted_unless_allowed2" prop_models_not_deleted_unless_allowed2 ]