--- --- Yu Hirate, Hayato Yamana: Generalized Sequential Pattern Mining with Item Interval, Journal of Computer Vol. 1, No 3, June 2006 --- -- -- Configuration -- items := [a, b, c, d, e, f] ISDB := [[[(0, [a]), (86400, [a, b, c]), (259200, [a, c])]] ,[[(0, [a, d]), (259200, [c])]] ,[[(0, [a, e, f]), (172800, [a, b])]]] N := length ISDB minSup := ceiling (0.5 * N) C1 := 0 -- min_interval C2 := 172800 -- max_interval C3 := 0 -- min_whole_interval C4 := 300000 -- max_whole_interval I t := floor (rtof (t / (60 * 60 * 24))) -- -- Utils -- query := list (integer, eq) sequence := list (time, list eq) time := matcher | interval $ $ as (integer, integer) with | $t -> [(I t, t)] | $ as something with | $tgt -> [tgt] -- -- Algorithm -- -- calculate ISDB|α project α ISDB := match α as query with | (#0, $x) :: $α' -> project' α' (map (\xss -> matchAllDFS xss as set sequence with | (_ ++ ($t, _ ++ #x :: $cs) :: $ls) :: _ -> (0, cs) :: (map (\t' xs -> (t' - t, xs)) ls)) ISDB) project' α ISDB := match α as query with | [] -> ISDB | ($a, $x) :: $α' -> project' (map (\b y -> (b - a, y)) α') (map (\xss -> matchAllDFS xss as set sequence with | (_ ++ (interval #a $t, _ ++ #x :: $cs) :: $ls) :: _ -> (0, cs) :: (map (\t' xs -> (t' - t, xs)) ls)) ISDB) -- main function gspm items ISDB I minSup C1 C2 C3 C4 := let φ := [] in let R := [] in let fs := filter (\α ISDB' -> match ISDB' as multiset sequence with | loop $i (1, minSup) (![] :: ...) _ -> True | _ -> False) (map (\α -> (α, project α ISDB)) (map (\x -> [(0, x)]) items)) in let iss := map (\α ISDB' -> α) fs in iss ++ concat (map (\α ISDB' -> projection α ISDB' I minSup C1 C2 C3 C4) fs) projection α ISDB' I minSup C1 C2 C3 C4 := let fs := filter (\a t x -> C1 <= t && t <= C2) (freqItem ISDB' minSup C1 C2 C3 C4) in let iss' := map (\a t x -> α ++ [(a, x)]) fs in -- TODO: apply C4 -- TODO: apply C3 iss' ++ concat (map (\α' -> projection α' (project α' ISDB) I minSup C1 C2 C3 C4) iss') freqItem ISDB minSup C1 C2 C3 C4 := matchAll ISDB as list (list sequence) with | first (interval $a $t) $x (loop $i (2, minSup) (first (interval #a _) #x ...) !(first (interval #a _) #x _)) -> (a, t, x) first := \pt px ps => {@ ++ (@ ++ (@ ++ ((interval $t _ & ~pt), _ ++ ($x & ~px) :: _) :: _) :: _) :: @, (!(_ ++ (_ ++ (_ ++ (interval #t _, _ ++ #x :: _) :: _) :: _) :: _), !(_ ++ (_ ++ (interval #t _, _ ++ #x :: _) :: _) :: _), !(_ ++ (interval #t _, _ ++ #x :: _) :: _), ~ps)} -- -- Execute -- --gspm items ISDB I minSup C1 C2 C3 C4 -- -- Test -- assertEqual "project (level 1)" (project [(0, a)] ISDB) [[[(0, []), (86400, [a, b, c]), (259200, [a, c])], [(0, [b, c]), (172800, [a, c])], [(0, [c])]], [[(0, [d]), (259200, [c])]], [[(0, [e, f]), (172800, [a, b])], [(0, [b])]]] assertEqual "project (level 2)" (project [(0, a),(0, b)] ISDB) [[[(0, [c]), (172800, [a, c])]], [], [[(0, [])]]] assertEqual "project (level 2)" (project [(0, a),(2, a)] ISDB) [[[(0, [c])]], [], [[(0, [b])]]] assertEqual "freqItem" (freqItem [[[(0, []), (86400, [a, b, c]), (259200, [a, c])], [(0, [b, c]), (172800, [a, c])], [(0, [c])]], [[(0, [d]), (259200, [c])]], [[(0, [e, f]), (172800, [a, b])], [(0, [b])]]] minSup C1 C2 C3 C4) [(0, 0, b), (3, 259200, c), (2, 172800, a)] (filter (\a t x -> C1 <= t && t <= C2) (freqItem [[[(0, []), (86400, [a, b, c]), (259200, [a, c])], [(0, [b, c]), (172800, [a, c])], [(0, [c])]], [[(0, [d]), (259200, [c])]], [[(0, [b])]]] minSup C1 C2 C3 C4)) --[(0, 0, b)] gspm items ISDB I minSup C1 C2 C3 C4 [[(0, a)], [(0, b)], [(0, c)], [(0, a), (0, b)], [(0, a), (2, a)]]