module Shrub where import Data.List import Data.Char data Shrub a = Branch !(Shrub a) !(Shrub a) !(Shrub a) !(Shrub a) | Leaf !a | Empty deriving (Show) addShrub :: Shrub a -> String -> a -> Shrub a addShrub _ [] v = Leaf v addShrub sb@(Branch a c g t) ('A':sq) v = Branch (addShrub a sq v) c g t addShrub sb@(Branch a c g t) ('C':sq) v = Branch a (addShrub c sq v) g t addShrub sb@(Branch a c g t) ('G':sq) v = Branch a c (addShrub g sq v) t addShrub sb@(Branch a c g t) ('T':sq) v = Branch a c g (addShrub t sq v) addShrub Empty ('A':sq) v = Branch (addShrub Empty sq v) Empty Empty Empty addShrub Empty ('C':sq) v = Branch Empty (addShrub Empty sq v) Empty Empty addShrub Empty ('G':sq) v = Branch Empty Empty (addShrub Empty sq v) Empty addShrub Empty ('T':sq) v = Branch Empty Empty Empty (addShrub Empty sq v) addShrub (Leaf _) l v = addShrub Empty l v addShrubDegen :: Shrub a -> String -> a -> Shrub a addShrubDegen _ [] v = Leaf v addShrubDegen sb@(Branch a c g t) (b:sq) v | isUpper b = branch b | isLower b = branch $ toUpper b where addA = (addShrubDegen a sq v) addC = (addShrubDegen c sq v) addG = (addShrubDegen g sq v) addT = (addShrubDegen t sq v) branch b = case b of 'A' -> Branch addA c g t 'C' -> Branch a addC g t 'G' -> Branch a c addG t 'T' -> Branch a c g addT 'M' -> Branch addA addC g t 'Y' -> Branch a addC g addT 'R' -> Branch addA c addG t 'K' -> Branch a c addG addT 'S' -> Branch a addC addG t 'W' -> Branch addA c g addT 'B' -> Branch a addC addG addT 'V' -> Branch addA addC addG t 'D' -> Branch addA c addG addT 'H' -> Branch addA addC g addT 'I' -> Branch addA addC addG addT 'N' -> Branch addA addC addG addT addShrubDegen Empty (b:sq) v | isUpper b = branch b | isLower b = branch $ toUpper b where rest = addShrubDegen Empty sq v branch b = case b of 'A' -> Branch rest Empty Empty Empty 'C' -> Branch Empty rest Empty Empty 'G' -> Branch Empty Empty rest Empty 'T' -> Branch Empty Empty Empty rest 'M' -> Branch rest rest Empty Empty 'Y' -> Branch Empty rest Empty rest 'R' -> Branch rest Empty rest Empty 'K' -> Branch Empty Empty rest rest 'S' -> Branch Empty rest rest Empty 'W' -> Branch rest Empty Empty rest 'B' -> Branch Empty rest rest rest 'H' -> Branch rest rest Empty rest 'V' -> Branch rest rest rest Empty 'D' -> Branch rest Empty rest rest 'N' -> Branch rest rest rest rest 'I' -> Branch rest rest rest rest addShrubDegen (Leaf _) l v = addShrubDegen Empty l v subtractMultiple :: Shrub a -> [String] -> Shrub a subtractMultiple sb [] = sb subtractMultiple sb (x:xs) = subtractMultiple (subtractShrubDegen sb x) xs subtractShrubDegen :: Shrub a -> String -> Shrub a subtractShrubDegen Empty _ = Empty subtractShrubDegen v@(Leaf _) _ = Empty subtractShrubDegen sb@(Branch _ _ _ _) [] = sb subtractShrubDegen sb@(Branch a c g t) (b:xs) = let empt :: Shrub a -> Shrub a empt (Branch Empty Empty Empty Empty) = Empty empt oth = oth in case b of 'A' -> empt $ Branch (subtractShrubDegen a xs) c g t 'C' -> empt $ Branch a (subtractShrubDegen c xs) g t 'G' -> empt $ Branch a c (subtractShrubDegen g xs) t 'T' -> empt $ Branch a c g (subtractShrubDegen t xs) 'I' -> empt $ Branch (subtractShrubDegen a xs) (subtractShrubDegen c xs) (subtractShrubDegen g xs) (subtractShrubDegen t xs) fromListDegen :: [(String, a)] -> Shrub a fromListDegen = loop Empty where loop sb [] = sb loop sb ((sq, v):xs) = loop (addShrubDegen sb sq v) xs fromList :: [(String, a)] -> Shrub a fromList = loop Empty where loop sb [] = sb loop sb ((sq, v):xs) = loop (addShrub sb sq v) xs searchShrub sb sq m = nubMax $ searchShrub' sb sq m searchShrub' :: Shrub a -> String -> Int -> [(Int,a)] searchShrub' Empty _ _ = [] searchShrub' (Leaf v) [] n = [(n,v)] searchShrub' (Branch a c g t) [] n = concatMap (\q -> searchShrub' q [] n) [a,c,g,t] searchShrub' (Leaf v) l n = [(n,v)] searchShrub' sb@(Branch a c g t) l@(x:xs) n = let perfmatch = case x of 'A' -> searchShrub' a xs n 'C' -> searchShrub' c xs n 'G' -> searchShrub' g xs n 'T' -> searchShrub' t xs n otherwise -> [] indels = concatMap (\q -> searchShrub' q l (n-1)) [a,c,g,t] ++ case l of (_:_) -> searchShrub' sb xs (n-1) _ -> [] subs = case x of 'A' -> concatMap (\q -> searchShrub' q xs (n-1)) [c,g,t] 'C' -> concatMap (\q -> searchShrub' q xs (n-1)) [a,g,t] 'G' -> concatMap (\q -> searchShrub' q xs (n-1)) [a,c,t] 'T' -> concatMap (\q -> searchShrub' q xs (n-1)) [a,c,g] otherwise -> concatMap (\q -> searchShrub' q xs (n-1)) [a,c,g,t] in perfmatch ++ if n > 0 then subs else [] perfMatch' :: Shrub a -> String -> Int -> [(Int,a)] perfMatch' (Branch a c g t) (x:xs) n = case x of 'A' -> searchShrub' a xs n 'C' -> searchShrub' c xs n 'G' -> searchShrub' g xs n 'T' -> searchShrub' t xs n otherwise -> [] searchShrub'' :: Shrub a -> String -> Int -> Int -> [(Int,a)] searchShrub'' Empty _ _ _ = [] searchShrub'' (Leaf v) [] n _ = [(n,v)] searchShrub'' (Branch a c g t) [] n nid = concatMap (\q -> searchShrub'' q [] n nid) [a,c,g,t] searchShrub'' (Leaf v) l n _ = [(n,v)] searchShrub'' sb@(Branch a c g t) l@(x:xs) n nid = let perfmatch = case x of 'A' -> searchShrub'' a xs n nid 'C' -> searchShrub'' c xs n nid 'G' -> searchShrub'' g xs n nid 'T' -> searchShrub'' t xs n nid otherwise -> [] indels = concatMap (\q -> searchShrub'' q l (n-1) (nid-1)) [a,c,g,t] ++ case l of (_:_) -> searchShrub'' sb xs (n-1) (nid-1) _ -> [] subs = case x of 'A' -> concatMap (\q -> searchShrub'' q xs (n-1) nid) [c,g,t] 'C' -> concatMap (\q -> searchShrub'' q xs (n-1) nid) [a,g,t] 'G' -> concatMap (\q -> searchShrub'' q xs (n-1) nid) [a,c,t] 'T' -> concatMap (\q -> searchShrub'' q xs (n-1) nid) [a,c,g] otherwise -> concatMap (\q -> searchShrub'' q xs (n-1) nid) [a,c,g,t] in perfmatch ++ if n > 0 then subs ++ if (nid > 0) then indels else [] else [] -- | Search Shrub with constraint to ignore a result searchShrubNE :: Eq a => Shrub a -> String -> a -> Int -> Int -> [(Int,a)] searchShrubNE Empty _ _ _ _ = [] searchShrubNE (Leaf v) [] om n _ | v == om = [] |otherwise = [(n,v)] searchShrubNE (Branch a c g t) [] om n nid = concatMap (\q -> searchShrubNE q [] om n nid) [a,c,g,t] searchShrubNE (Leaf v) l om n _ | v == om = [] |otherwise = [(n,v)] searchShrubNE sb@(Branch a c g t) l@(x:xs) om n nid = let perfmatch = case x of 'A' -> searchShrubNE a xs om n nid 'C' -> searchShrubNE c xs om n nid 'G' -> searchShrubNE g xs om n nid 'T' -> searchShrubNE t xs om n nid otherwise -> [] freeindel = nid > 0 n' = if freeindel then n-1 else n-2 nid' = if freeindel then nid-1 else nid' indels = if not freeindel && n < 2 then [] else if n' < 0 then [] else case l of (_:_:xss) -> concatMap (\q -> searchShrubNE q l om n' nid') [a, c, g, t] ++ concatMap (\q -> searchShrubNE q xss om n' nid') [a, c, g, t] _ -> concatMap (\q -> searchShrubNE q l om n' nid') [a, c, g, t] subs = case x of 'A' -> concatMap (\q -> searchShrubNE q xs om (n-1) nid) [c,g,t] 'C' -> concatMap (\q -> searchShrubNE q xs om (n-1) nid) [a,g,t] 'G' -> concatMap (\q -> searchShrubNE q xs om (n-1) nid) [a,c,t] 'T' -> concatMap (\q -> searchShrubNE q xs om (n-1) nid) [a,c,g] otherwise -> concatMap (\q -> searchShrubNE q xs om (n-1) nid) [a,c,g,t] in if n == 0 then perfmatch else perfmatch ++ subs ++ indels where -- | Give you nid 1 cost indel, then remaining 2 cost indels searchShrub3 :: Shrub a -> String -> Int -> Int -> [(Int,a)] searchShrub3 Empty _ _ _ = [] searchShrub3 (Leaf v) [] n _ = [(n,v)] searchShrub3 (Branch a c g t) [] n nid = concatMap (\q -> searchShrub3 q [] n nid) [a,c,g,t] searchShrub3 (Leaf v) l n _ = [(n,v)] searchShrub3 sb@(Branch a c g t) l@(x:xs) n nid = let perfmatch = case x of 'A' -> searchShrub3 a xs n nid 'C' -> searchShrub3 c xs n nid 'G' -> searchShrub3 g xs n nid 'T' -> searchShrub3 t xs n nid 'I' -> concatMap (\q -> searchShrub3 q xs n nid) [a,c,g,t] otherwise -> [] freeindel = nid > 0 n' = if freeindel then n-1 else n-2 nid' = if freeindel then nid-1 else nid indels = if not freeindel && n < 2 then [] else if n' < 0 then [] else concatMap (\q -> searchShrub3 q l n' nid') [a,c,g,t] ++ case l of (_:_:xss) -> searchShrub3 sb xss n' nid' _ -> concatMap (\q -> searchShrub3 q l n' nid') [a,c,g,t] subs = case x of 'A' -> concatMap (\q -> searchShrub3 q xs (n-1) nid) [c,g,t] 'C' -> concatMap (\q -> searchShrub3 q xs (n-1) nid) [a,g,t] 'G' -> concatMap (\q -> searchShrub3 q xs (n-1) nid) [a,c,t] 'T' -> concatMap (\q -> searchShrub3 q xs (n-1) nid) [a,c,g] 'I' -> [] otherwise -> concatMap (\q -> searchShrub3 q xs (n-1) nid) [a,c,g,t] in if n == 0 then perfmatch else perfmatch ++ subs ++ indels -- | Melting point shrub search searchShrubTM :: Int -> Shrub a -> String -> Int -> [(Int,a)] searchShrubTM _ Empty _ _ = [] searchShrubTM thr (Leaf v) [] n | n >= thr = [(n,v)] | otherwise = [] searchShrubTM thr (Branch a c g t) [] n = concatMap (\q -> searchShrubTM thr q [] n) [a,c,g,t] searchShrubTM thr (Leaf v) l n | n >= thr = [(n,v)] | otherwise = [] searchShrubTM thr sb@(Branch a c g t) l@(x:xs) n | n >= thr = concatMap (\q -> searchShrubTM thr q [] n) [a,c,g,t] | otherwise = let ssbt = searchShrubTM thr perfmatch = case x of 'A' -> ssbt a xs (n+1) 'C' -> ssbt c xs (n+2) 'G' -> ssbt g xs (n+2) 'T' -> ssbt t xs (n+1) otherwise -> [] indels = concatMap (\q -> ssbt q l (n-3)) [a,c,g,t] ++ case l of (_:_:xss) -> ssbt sb xss (n-3) _ -> [] subs = case x of 'A' -> concatMap (\q -> ssbt q xs (n-2)) [c,g,t] 'C' -> concatMap (\q -> ssbt q xs (n-2)) [a,g,t] 'G' -> concatMap (\q -> ssbt q xs (n-2)) [a,c,t] 'T' -> concatMap (\q -> ssbt q xs (n-2)) [a,c,g] 'I' -> concatMap (\q -> ssbt q xs (n-1)) [a,c,g,t] otherwise -> concatMap (\q -> ssbt q xs (n-2)) [a,c,g,t] in perfmatch ++ subs ++ indels searchShrubPN :: Int -> Shrub a -> String -> Int -> [(Int,a)] searchShrubPN _ Empty _ _ = [] searchShrubPN _ (Leaf v) [] n = [(n,v)] searchShrubPN _ (Branch a c g t) [] n = concatMap (\q -> searchShrubPN 0 q [] n) [a,c,g,t] searchShrubPN _ (Leaf v) l n = [(n,v)] searchShrubPN pr sb@(Branch a c g t) l@(x:xs) n = let ssp = searchShrubPN pr' pr' = if pr > 0 then pr-1 else pr indels = concatMap (\q -> ssp q l (n-1)) [a,c,g,t] ++ case l of (_:_) -> ssp sb xs (n-1) _ -> [] subs = case x of 'A' -> concatMap (\q -> ssp q xs (n-1)) [c,g,t] 'C' -> concatMap (\q -> ssp q xs (n-1)) [a,g,t] 'G' -> concatMap (\q -> ssp q xs (n-1)) [a,c,t] 'T' -> concatMap (\q -> ssp q xs (n-1)) [a,c,g] 'I' -> concatMap (\q -> ssp q xs n) [a,c,g,t] otherwise -> concatMap (\q -> ssp q xs (n-1)) [a,c,g,t] perfmatch = case x of 'A' -> ssp a xs n 'C' -> ssp c xs n 'G' -> ssp g xs n 'T' -> ssp t xs n otherwise -> [] in perfmatch ++ if n == 0 || pr > 0 then [] else subs ++ indels searchShrubTK :: Int -> Shrub a -> String -> Int -> [(Int,a)] searchShrubTK _ Empty _ _ = [] searchShrubTK _ (Leaf v) [] n = [(n,v)] searchShrubTK _ (Branch a c g t) [] n = concatMap (\q -> searchShrubTK 0 q [] n) [a,c,g,t] searchShrubTK 0 (Branch a c g t) _ n = concatMap (\q -> searchShrubTK 0 q [] n) [a,c,g,t] searchShrubTK _ (Leaf v) l n = [(n,v)] searchShrubTK tk sb@(Branch a c g t) l@(x:xs) n = let ssp = searchShrubTK tk' tk' = tk - 1 indels = concatMap (\q -> ssp q l (n-1)) [a,c,g,t] ++ case xs of (_:_) -> searchShrubTK tk sb xs (n-1) _ -> [] subs = case x of 'A' -> concatMap (\q -> ssp q xs (n-1)) [c,g,t] 'C' -> concatMap (\q -> ssp q xs (n-1)) [a,g,t] 'G' -> concatMap (\q -> ssp q xs (n-1)) [a,c,t] 'T' -> concatMap (\q -> ssp q xs (n-1)) [a,c,g] 'I' -> concatMap (\q -> ssp q xs n) [a,c,g,t] otherwise -> concatMap (\q -> ssp q xs (n-1)) [a,c,g,t] perfmatch = case x of 'A' -> ssp a xs n 'C' -> ssp c xs n 'G' -> ssp g xs n 'T' -> ssp t xs n otherwise -> [] in perfmatch ++ if n > 0 then subs ++ indels else [] searchShrubTK2 :: Int -> Shrub a -> String -> Int -> Int -> [(Int,a)] searchShrubTK2 _ Empty _ _ _ = [] searchShrubTK2 _ (Leaf v) [] n _ = [(n,v)] searchShrubTK2 _ (Branch a c g t) [] n nid = concatMap (\q -> searchShrubTK2 0 q [] n nid) [a,c,g,t] searchShrubTK2 0 (Branch a c g t) _ n nid = concatMap (\q -> searchShrubTK2 0 q [] n nid) [a,c,g,t] searchShrubTK2 _ (Leaf v) _ n _ = [(n,v)] searchShrubTK2 tk sb@(Branch a c g t) l@(x:xs) n nid = let ssp = searchShrubTK2 tk' tk' = tk - 1 indels = if nid < 1 then [] else concatMap (\q -> searchShrubTK2 tk q l (n-1) (nid-1)) [a,c,g,t] ++ case l of (_:_) -> searchShrubTK2 tk sb xs (n-1) (nid-1) _ -> [] subs = case x of 'A' -> concatMap (\q -> ssp q xs (n-1) nid) [c,g,t] 'C' -> concatMap (\q -> ssp q xs (n-1) nid) [a,g,t] 'G' -> concatMap (\q -> ssp q xs (n-1) nid) [a,c,t] 'T' -> concatMap (\q -> ssp q xs (n-1) nid) [a,c,g] 'I' -> concatMap (\q -> ssp q xs n nid) [a,c,g,t] otherwise -> concatMap (\q -> ssp q xs (n-1) nid) [a,c,g,t] perfmatch = case x of 'A' -> ssp a xs n nid 'C' -> ssp c xs n nid 'G' -> ssp g xs n nid 'T' -> ssp t xs n nid otherwise -> [] in perfmatch ++ if n > 0 then subs ++ indels else [] searchShrubTK3 :: Int -> Shrub a -> String -> Int -> Int -> [(Int,a)] searchShrubTK3 _ Empty _ _ _ = [] searchShrubTK3 _ (Leaf v) [] n _ = [(n,v)] searchShrubTK3 _ (Branch a c g t) [] n nid = concatMap (\q -> searchShrubTK3 0 q [] n nid) [a,c,g,t] searchShrubTK3 0 (Branch a c g t) _ n nid = concatMap (\q -> searchShrubTK3 0 q [] n nid) [a,c,g,t] searchShrubTK3 _ (Leaf v) _ n _ = [(n,v)] searchShrubTK3 tk sb@(Branch a c g t) l@(x:xs) n nid = let ssp = searchShrubTK3 tk' tk' = tk - 1 freeindel = nid > 0 n' = if freeindel then n-1 else n-2 nid' = if freeindel then nid-1 else nid indels = if n' < 0 then [] else concatMap (\q -> searchShrubTK3 tk' q l n' nid') [a,c,g,t] ++ case l of (_:_) -> searchShrubTK3 tk sb xs n' nid' _ -> [] subs = case x of 'A' -> concatMap (\q -> ssp q xs (n-1) nid) [c,g,t] 'C' -> concatMap (\q -> ssp q xs (n-1) nid) [a,g,t] 'G' -> concatMap (\q -> ssp q xs (n-1) nid) [a,c,t] 'T' -> concatMap (\q -> ssp q xs (n-1) nid) [a,c,g] 'I' -> concatMap (\q -> ssp q xs n nid) [a,c,g,t] otherwise -> concatMap (\q -> ssp q xs (n-1) nid) [a,c,g,t] perfmatch = case x of 'A' -> ssp a xs n nid 'C' -> ssp c xs n nid 'G' -> ssp g xs n nid 'T' -> ssp t xs n nid otherwise -> [] in perfmatch ++ if n > 0 then subs ++ indels else [] searchShrubXF :: Shrub a -> String -> Int -> Bool -> [(Int,a)] searchShrubXF Empty _ _ _ = [] searchShrubXF (Leaf v) _ n bul = if bul then [(n,v)] else [] searchShrubXF (Branch a c g t) [] n _ = [] searchShrubXF sb@(Branch a c g t) l@(x:xs) n _ = let perfmatch = case x of 'A' -> searchShrubXF a xs n True 'C' -> searchShrubXF c xs n True 'G' -> searchShrubXF g xs n True 'T' -> searchShrubXF t xs n True otherwise -> [] indels = concatMap (\q -> searchShrubXF q l (n-1) False) [a,c,g,t] ++ case l of (_:_) -> searchShrubXF sb xs (n-1) False _ -> [] subs = case x of 'A' -> concatMap (\q -> searchShrubXF q xs (n-1) False) [c,g,t] 'C' -> concatMap (\q -> searchShrubXF q xs (n-1) False) [a,g,t] 'G' -> concatMap (\q -> searchShrubXF q xs (n-1) False) [a,c,t] 'T' -> concatMap (\q -> searchShrubXF q xs (n-1) False) [a,c,g] otherwise -> concatMap (\q -> searchShrubXF q xs (n-1) False) [a,c,g,t] in perfmatch ++ if n > 0 then subs else [] -- | IonTorrent Shrub search searchShrubIT :: Shrub a -> String -> Int -> [(Int, (a, String))] searchShrubIT Empty _ _ = [] searchShrubIT (Leaf v) s n = [(n, (v, s))] searchShrubIT (Branch a c g t) [] n = [] --concatMap (\q -> searchShrubIT q [] n) [a,c,g,t] searchShrubIT sb@(Branch a c g t) l@(x:xs) n | n > 0 = perfmatch ++ subs ++ indels | otherwise = perfmatch where perfmatch = case x of 'A' -> searchShrubIT a xs n 'C' -> searchShrubIT c xs n 'G' -> searchShrubIT g xs n 'T' -> searchShrubIT t xs n otherwise -> [] indels = concatMap (\q -> searchShrubIT q l (n-1)) [a,c,g,t] ++ searchShrubIT sb xs (n-1) subs = case x of 'A' -> concatMap (\q -> searchShrubIT q xs (n-3)) [c,g,t] 'C' -> concatMap (\q -> searchShrubIT q xs (n-3)) [a,g,t] 'G' -> concatMap (\q -> searchShrubIT q xs (n-3)) [a,c,t] 'T' -> concatMap (\q -> searchShrubIT q xs (n-3)) [a,c,g] otherwise -> concatMap (\q -> searchShrubIT q xs (n-3)) [a,c,g,t] nubMax :: (Ord a, Eq b) => [(a,b)] -> [(a,b)] nubMax = foldl' go [] where go l i@(m, iv) = maybe (i:l) (\fn -> if m > fst (l !! fn) then i:(take fn l ++ drop (fn+1) l) else l) (findIndex (\(p, q) -> iv == q) l) --nubMax' :: (Ord a, Eq b) => [(a,b)] -> [(a,b)] --nubMax' = map (uncurry (flip (,))) . HM.toList $ foldl' go HM.empty where --go hm i@(m, iv) = maybe (HM.insert iv m hm) (\fn -> if m > fn then HM.insert iv m hm else hm) (HM.lookup iv hm) searchShrubCount :: Shrub a -> String -> Int -> [(Int,Int,a)] searchShrubCount sb sq mm = sshrub sb sq 0 mm where sshrub Empty _ _ _ = [] sshrub (Leaf v) [] cnt n = [(n,cnt,v)] sshrub (Branch a c g t) [] cnt n = concatMap (\q -> sshrub q [] cnt n) [a,c,g,t] sshrub (Leaf v) l cnt n = [(n,cnt,v)] sshrub sb@(Branch a c g t) l@(x:xs) cnt n = let cnt' = cnt + 1 perfmatch = case x of 'A' -> sshrub a xs cnt' n 'C' -> sshrub c xs cnt' n 'G' -> sshrub g xs cnt' n 'T' -> sshrub t xs cnt' n otherwise -> [] indels = concatMap (\q -> sshrub q l cnt (n-1)) [a,c,g,t] ++ case l of (_:_) -> sshrub sb xs cnt' (n-1) _ -> [] subs = case x of 'A' -> concatMap (\q -> sshrub q xs cnt' (n-1)) [c,g,t] 'C' -> concatMap (\q -> sshrub q xs cnt' (n-1)) [a,g,t] 'G' -> concatMap (\q -> sshrub q xs cnt' (n-1)) [a,c,t] 'T' -> concatMap (\q -> sshrub q xs cnt' (n-1)) [a,c,g] otherwise -> concatMap (\q -> sshrub q xs cnt' (n-1)) [a,c,g,t] in perfmatch ++ if n > 0 then subs else []