module Data.BBI.BigBed ( BBedFile , openBBedFile , closeBBedFile , query ) where import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import qualified Data.Map as M import Conduit import Data.BBI import Data.BBI.Utils newtype BBedFile = BBedFile BbiFile openBBedFile :: FilePath -> IO BBedFile openBBedFile fl = do bbi <- openBbiFile fl case (_filetype . _header) bbi of BigBed -> return $ BBedFile bbi _ -> error "not a bigbed file" closeBBedFile :: BBedFile -> IO () closeBBedFile (BBedFile fl) = closeBbiFile fl -- | Find all regions that are overlapped with the query in the bigbed file. query :: (B.ByteString, Int, Int) -- ^ (Chr, start, end) -> BBedFile -> ConduitT () (B.ByteString, Int, Int, B.ByteString) IO () query (chr, s, e) (BBedFile fl) = case getChromId fl chr of Just i -> do blks <- lift $ overlappingBlocks fl (i, s, e) readBlocks fl blks .| concatMapC (map f . decodeBlock endi) .| filterC (\(_,s',e',_) -> isOverlapped (s',e') (s,e)) Nothing -> return () where endi = _endian $ _header fl id2Chr = M.fromList $ map (\(a,(b,_)) -> (b,a) ) $ M.toList $ _chromTree fl f (a,b,c,d) = (M.findWithDefault undefined a id2Chr, b, c, d) -- | Whether two intervals are overlapped. isOverlapped :: (Int, Int) -> (Int, Int) -> Bool isOverlapped (lo1, hi1) (lo2, hi2) = not (lo2 > hi1 || lo1 > hi2) {-# INLINE isOverlapped #-} -- | Decode blocks in BigBed. decodeBlock :: Endianness -> B.ByteString -> [(Int, Int, Int, B.ByteString)] decodeBlock e blk | B.null blk = [] | otherwise = (chr, start, end, rest) : decodeBlock e (B.tail remain) where chr = readInt32 e $ B.take 4 blk start = readInt32 e $ B.take 4 $ B.drop 4 blk end = readInt32 e $ B.take 4 $ B.drop 8 blk (rest, remain) = BC.break (=='\0') $ B.drop 12 blk {-# INLINE decodeBlock #-}