{-# Language PackageImports #-} module Main where import "biopsl" Bio.Alignment.PSL -- import Bio.Sequence (SeqData) import qualified Data.HashSet as S import Data.List (partition,foldl',sortBy) import qualified Data.ByteString.Lazy.Char8 as B import Data.Hashable import Options main :: IO () main = do opts <- getArgs pss <- mapM readPSL $ files opts mapM_ (if long_output opts then print_cluster else print_list) . cluster_sl qname tname . concat $ pss print_list :: (S.HashSet B.ByteString, [PSL]) -> IO () print_list (s,_) = B.putStrLn . B.unwords . S.toList $ s print_cluster :: (S.HashSet B.ByteString, [PSL]) -> IO () print_cluster (s,b) = do B.putStrLn . B.unwords . S.toList $ s B.putStrLn $ unparsePSL $ sortOn tname $ sortOn tstarts $ b sortOn :: Ord a1 => (a -> a1) -> [a] -> [a] sortOn f = sortBy (\x y -> compare (f x) (f y)) cluster_sl :: (Hashable a, Ord a) => (b->a) -> (b->a) -> [b] -> [(S.HashSet a,[b])] cluster_sl f1 f2 = foldl' csl [] where csl cs b = -- can be short circuited for more performance let (acs,tmp) = partition (S.member (f1 b) . fst) cs (bcs,rest) = partition (S.member (f2 b) . fst) tmp in case (acs,bcs) of ([(ao,as)],[(bo,bs)]) -> (S.union ao bo,b:as++bs):rest ([(ao,as)],[]) -> (S.insert (f2 b) ao, b:as):rest ([],[(bo,bs)]) -> (S.insert (f1 b) bo, b:bs):rest ([],[]) -> (S.fromList [f1 b,f2 b],[b]):rest _ -> error "Grave mistake" select :: (t -> Bool) -> [t] -> ([t],[t]) select p = go [] where go acc (x:xs) | p x = ([x],acc++xs) | otherwise = go (x:acc) xs go acc [] = ([],acc)