{- | Emping 0.6 (provisional) Tue 19 May 2009 05:52:26 PM CEST Module DefRules contains functions to transform a coded table of attribute value rows into nominal rules of the form: antecedent implies consequent. The consequent is a value of a (user selected) attribute, and the antecedent is a conjunction of values of (other) attributes A coded fact list can be checked for duplicates, though duplicate rows in a table do not affect the results. Rules are called ambiguous, if they have the same antecedent but a different consequent. Ambiguous rules do have an effect. Any sub set of the duplicate antecedent would be a contradiction, and is therefore excluded as a reduction. If the table is not fully normalized, ALL rules for some consequent value may be ambiguous. Then there are no reduced rules for that consequent! -} module DefRules (getDups, cleanFacts, getAmbiguousRules,factsToPartition, factsTo_NB_Partition, hasBlankValues, Antec, Rule, partitionToSets ) where import Data.List (partition, elemIndex ) import Data.Array (Array, (!), elems ) import Data.Set (Set, fromList ) import Codec (AVp ) import CsvParse ( blankId ) -- partDups partitions an list of AVp lists into duplicates and their counts. partDups :: [[AVp]] -> [([[AVp]], Int)] partDups tb = [(x, length x) | x <- parts ] where parts = partitionBy (\x y -> x == y ) tb -- | get a representative of each duplicate AVp list, with its frequency, or [] getDups :: [[AVp]] -> [([AVp],Int)] getDups tb | dbls == [] = [] | otherwise = [ ((head . fst) x, snd x) | x <- dbls ] where dbls = [ x | x <- dupfacs, (snd x) > 1 ] dupfacs = partDups tb -- removes all duplicates from list of AVp lists, also works if there are none remDups :: [[AVp]] -> [[AVp]] remDups tb = map (head . fst) dupfacs where dupfacs = partDups tb -- split an AVP list into an antecedent consequent pair -- works because only one value of an attribute can be in a conjunction f2r :: Int -> [AVp] -> ([AVp],AVp) f2r att fact = (ant, cons) where (ant, [cons]) = partition (\u -> (fst u) /= att) fact -- split all rows in the coded table in antecedent consequent pairs factsToRules :: Int -> [[AVp]] -> [([AVp],AVp)] factsToRules att fls = map (f2r att) fls -- remove rules with a blank consequent value remBlankConseq :: Array Int (String, [String]) -> [([AVp],AVp)] -> [([AVp],AVp)] remBlankConseq namearr ruls = case blankix of Nothing -> ruls Just blank -> filter (\u -> (snd $ snd u) /= blank) ruls where att = (fst .snd . head) ruls blankix = elemIndex blankId (snd (namearr ! att)) -- partitions list of antecedent consequent pairs to consequents partitionRules :: [([AVp],AVp)] -> [[([AVp], AVp)]] partitionRules ruls = partitionBy (\x y -> (snd x) == (snd y)) ruls -- | get all ambiguous rules and fact duplicates (!!!!) in a rule partition getAmbiguousRules :: [[([AVp], AVp)]] -> [[([AVp], AVp)]] getAmbiguousRules grp = filter (\x -> (length x) > 1) anteqs where anteqs = partitionBy eq (concat grp) eq (a1, _) (a2,_) = a1 == a2 -- | if the user decides to check, duplicates are removed cleanFacts :: Bool -> [[AVp]] -> [[AVp]] cleanFacts clean tb | clean = remDups tb | otherwise = tb -- | gets rules (from clean facts or not) and partitions them factsToPartition :: Int -> [[AVp]] -> [[([AVp],AVp)]] factsToPartition att tb = partitionRules $ factsToRules att tb factsTo_NB_Partition :: Array Int (String, [String]) -> Int -> [[AVp]] -> [[([AVp],AVp)]] factsTo_NB_Partition namearr att tb = partitionRules $ remBlankConseq namearr $ factsToRules att tb -- | checks for any blank values in data, attributes are not checked hasBlankValues :: Array Int (String, [String]) -> Bool hasBlankValues namearr = any hasbl values where values = (snd . unzip . elems) namearr hasbl ls = case elemIndex blankId ls of Nothing -> False _ -> True -- | type synonym for an antecedent as a set of attribute-value pairs type Antec = Set AVp -- | type synonym for a rule with an antecedent as a set type Rule = (Antec, AVp) -- transform the antecedent of a rule into a Set r2set :: ([AVp], AVp) -> Rule r2set (a,c) = (fromList a, c) -- | transform the antecedents of all rules to Sets partitionToSets :: [[([AVp],AVp)]] -> [[Rule]] partitionToSets prt = map (map r2set) prt --------------- general helper function ------------------------------- -- partitions a list according to an equivalence relation partitionBy :: (a -> a -> Bool) -> [a] -> [[a]] partitionBy _ [] = [] partitionBy eq ls = x:(partitionBy eq y) where (x,y) = partition ((head ls) `eq`) ls ------------------------------------------------------------------------