------------------------------------------------------------------------ -- | -- Module : Data.Datamining.Clustering.SGMQC -- Copyright : (c) Amy de Buitléir 2012-2018 -- License : BSD-style -- Maintainer : amy@nualeargais.ie -- Stability : experimental -- Portability : portable -- -- Tests -- ------------------------------------------------------------------------ {-# LANGUAGE MultiParamTypeClasses, TypeFamilies, FlexibleInstances, FlexibleContexts #-} {-# OPTIONS_GHC -fno-warn-type-defaults -fno-warn-orphans #-} module Data.Datamining.Clustering.SGMQC ( test ) where import Control.DeepSeq (deepseq) import Data.Datamining.Pattern (adjustNum, absDifference) import Data.Datamining.Clustering.SGMInternal import Data.List ((\\), minimumBy) import qualified Data.Map.Strict as M import Data.Ord (comparing) import Data.Word (Word16) import System.Random (Random) import Test.Framework as TF (Test, testGroup) import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.QuickCheck ((==>), Gen, Arbitrary, Property, Positive, arbitrary, shrink, choose, property, sized, suchThat, vectorOf, getPositive) newtype UnitInterval a = UnitInterval {getUnitInterval :: a} deriving ( Eq, Ord, Show, Read) instance Functor UnitInterval where fmap f (UnitInterval x) = UnitInterval (f x) instance (Num a, Ord a, Random a, Arbitrary a) => Arbitrary (UnitInterval a) where arbitrary = fmap UnitInterval $ choose (0,1) shrink (UnitInterval x) = [ UnitInterval x' | x' <- shrink x, x' >= 0, x' <= 1] prop_Exponential_starts_at_r0 :: UnitInterval Double -> Positive Double -> Property prop_Exponential_starts_at_r0 r0 d = property $ abs (exponential r0' d' 0 - r0') < 0.01 where r0' = getUnitInterval r0 d' = getPositive d prop_Exponential_ge_0 :: UnitInterval Double -> Positive Double -> Positive Int -> Property prop_Exponential_ge_0 r0 d t = property $ exponential r0' d' t' >= 0 where r0' = getUnitInterval r0 d' = getPositive d t' = getPositive t positive :: (Num a, Ord a, Arbitrary a) => Gen a positive = arbitrary `suchThat` (> 0) data TestSGM = TestSGM (SGM Int Double Word16 Double) String instance Show TestSGM where show (TestSGM _ desc) = desc buildTestSGM :: Double -> Double -> Int -> Double -> Bool -> [Double] -> TestSGM buildTestSGM r0 d maxSz dt ad ps = TestSGM s' desc where lrf = exponential r0 d s = makeSGM lrf maxSz dt ad absDifference adjustNum desc = "buildTestSGM " ++ show r0 ++ " " ++ show d ++ " " ++ show maxSz ++ " " ++ show dt ++ " " ++ show ad ++ " " ++ show ps s' = trainBatch s ps sizedTestSGM :: Int -> Gen TestSGM sizedTestSGM n = do maxSz <- choose (1, n+1) let numPatterns = n r0 <- choose (0, 1) d <- positive dt <- choose (0, 1) ad <- arbitrary ps <- vectorOf numPatterns arbitrary return $ buildTestSGM r0 d maxSz dt ad ps instance Arbitrary TestSGM where arbitrary = sized sizedTestSGM prop_classify_chooses_best_fit :: TestSGM -> Double -> Property prop_classify_chooses_best_fit (TestSGM s _) x = property $ bmu == bmu2 where (bmu, _, report, _) = trainAndClassify s x bmu2 = fst (minimumBy (comparing f) . M.toList $ report) f (_, (_, d)) = d prop_classify_never_creates_model :: TestSGM -> Double -> Property prop_classify_never_creates_model (TestSGM s _) x = not (isEmpty s) ==> bmu `elem` (labels s) where (bmu, _, _) = classify s x prop_classify_never_causes_error_unless_som_empty :: TestSGM -> Double -> Property prop_classify_never_causes_error_unless_som_empty (TestSGM s _) p = not (isEmpty s) ==> property $ deepseq x True where x = classify s p prop_trainNode_reduces_diff :: TestSGM -> Double -> Property prop_trainNode_reduces_diff (TestSGM s _) x = not (isEmpty s) ==> diffAfter < diffBefore || diffBefore == 0 || learningRate s (time s) < 1e-10 where (bmu, diffBefore, _) = classify s x s2 = trainNode s bmu x (_, diffAfter, _) = classify s2 x prop_diff_lt_threshold_after_training :: TestSGM -> Double -> Property prop_diff_lt_threshold_after_training (TestSGM s _) x = numModels s < maxSize s ==> diffAfter < diffThreshold s where (_, _, _, s') = trainAndClassify s x (_, diffAfter, _) = classify s' x prop_training_reduces_diff :: TestSGM -> Double -> Property prop_training_reduces_diff (TestSGM s _) x = not (isEmpty s) ==> diffAfter < diffBefore || diffBefore == 0 || learningRate s (time s) < 1e-10 where (_, diffBefore, _) = classify s x s2 = train s x (_, diffAfter, _) = classify s2 x -- TODO prop: map will never exceed maxSize prop_train_only_modifies_one_model :: TestSGM -> Double -> Property prop_train_only_modifies_one_model (TestSGM s _) p = numModels s < maxSize s ==> otherModelsBefore == otherModelsAfter where (bmu, _, _, s2) = trainAndClassify s p otherModelsBefore = M.delete bmu . M.map fst . toMap $ s otherModelsAfter = M.delete bmu . M.map fst . toMap $ s2 prop_train_increments_counter :: TestSGM -> Double -> Property prop_train_increments_counter (TestSGM s _) x = numModels s < maxSize s ==> countAfter == countBefore + 1 -- We have to check if the SGM is full, otherwise we'll replace an -- existing model (and its counter), which means that the total -- count could change by an arbitrary amount. where countBefore = time s countAfter = time $ train s x -- | The training set consists of the same vectors in the same order, -- several times over. So the resulting classifications should consist -- of the same integers in the same order, over and over. prop_batch_training_works :: TestSGM -> [Double] -> Property prop_batch_training_works (TestSGM s _) ps -- = maxSize s > length ps -- ==> classifications == (concat . replicate 5) firstSet = property $ classifications == (concat . replicate 5) firstSet where trainingSet = (concat . replicate 5) ps sRightSize = if maxSize s >= length ps then s else s { maxSize=length ps + 1} s' = trainBatch sRightSize trainingSet classifications = map (justBMU . classify s') trainingSet justBMU = \(bmu, _, _) -> bmu firstSet = take (length ps) classifications -- | WARNING: This can fail when two nodes are close enough in -- value so that after training they become identical. prop_classification_is_consistent :: TestSGM -> Double -> Property prop_classification_is_consistent (TestSGM s _) x = property $ bmu == bmu' where (bmu, _, _, s2) = trainAndClassify s x (bmu', _, _) = classify s2 x prop_classification_results_are_consistent :: TestSGM -> Double -> Property prop_classification_results_are_consistent (TestSGM s _) x = property $ bmuDiff == minimum diffs where (_, bmuDiff, report, _) = trainAndClassify s x diffs = map (\(_, (_, d)) -> d) . M.toList $ report prop_classification_stabilises :: TestSGM -> [Double] -> Property prop_classification_stabilises (TestSGM s _) ps = (not . null $ ps) && maxSize s > length ps ==> k2 == k1 where sStable = trainBatch s . concat . replicate 10 $ ps (k1, _, _, sStable2) = trainAndClassify sStable (head ps) sStable3 = trainBatch sStable2 ps (k2, _, _) = classify sStable3 (head ps) prop_models_not_deleted_unless_allowed :: TestSGM -> Double -> Property prop_models_not_deleted_unless_allowed (TestSGM s _) x = (not . allowDeletion $ s) ==> null (labelsBefore \\ labelsAfter) where labelsBefore = M.keys $ modelMap s labelsAfter = M.keys $ modelMap s' (_, _, _, s') = trainAndClassify s x prop_models_not_deleted_unless_allowed2 :: TestSGM -> Double -> Property prop_models_not_deleted_unless_allowed2 (TestSGM s _) x = (not . allowDeletion $ s) ==> null (labelsBefore \\ labelsAfter) where labelsBefore = M.keys $ modelMap s labelsAfter = M.keys $ modelMap s' s' = train s x test :: Test test = testGroup "QuickCheck Data.Datamining.Clustering.SGM" [ testProperty "prop_Exponential_starts_at_r0" prop_Exponential_starts_at_r0, testProperty "prop_Exponential_ge_0" prop_Exponential_ge_0, testProperty "prop_classify_chooses_best_fit" prop_classify_chooses_best_fit, testProperty "prop_classify_never_creates_model" prop_classify_never_creates_model, testProperty "prop_classify_never_causes_error_unless_som_empty" prop_classify_never_causes_error_unless_som_empty, testProperty "prop_trainNode_reduces_diff" prop_trainNode_reduces_diff, testProperty "prop_diff_lt_threshold_after_training" prop_diff_lt_threshold_after_training, testProperty "prop_training_reduces_diff" prop_training_reduces_diff, testProperty "prop_train_only_modifies_one_model" prop_train_only_modifies_one_model, testProperty "prop_train_increments_counter" prop_train_increments_counter, testProperty "prop_batch_training_works" prop_batch_training_works, testProperty "prop_classification_is_consistent" prop_classification_is_consistent, testProperty "prop_classification_results_are_consistent" prop_classification_results_are_consistent, testProperty "prop_classification_stabilises" prop_classification_stabilises, testProperty "prop_models_not_deleted_unless_allowed" prop_models_not_deleted_unless_allowed, testProperty "prop_models_not_deleted_unless_allowed2" prop_models_not_deleted_unless_allowed2 ]