module Bio.Alignment.BED ( BED(..),Dir(..)
, readBED, writeBED
) where
import Bio.Sequence.SeqData (Offset)
import Data.Word
import qualified Data.ByteString.Lazy.Char8 as B
import Data.ByteString.Lazy.Char8 (ByteString)
data BED = BED { chrom :: ByteString
, chromStart, chromEnd :: Offset
, name :: ByteString
, score :: Int
, strand :: Dir
, thickStart, thickEnd :: Offset
, itemRGB :: (Word8,Word8,Word8)
, blockSizeStart :: [(Offset,Offset)]
}
data Dir = Fwd | Rev deriving Eq
instance Show Dir where
show Fwd = "+"
show Rev = "-"
instance Read Dir where
readsPrec _ ('+':rest) = [(Fwd,rest)]
readsPrec _ ('-':rest) = [(Rev,rest)]
readsPrec _ x = error ("Can't parse '"++x++"' as a Dir")
readBED :: FilePath -> IO [BED]
readBED f = (map (unpack1 . myWords) . B.lines) `fmap` B.readFile f
where unpack1 [c,cs,ce,nm,sc,str,ts,te,_rgb,_bc,bsz,bst] =
BED c (i cs) (i ce) nm (i sc) (i str) (i ts) (i te) (0,0,0) (zip (i' bsz) (i' bst))
unpack1 _x = error ("incorrect number of fields in BED record ("++show (length _x)++"):\n"++show _x)
i :: Read a => ByteString -> a
i = read . B.unpack
i' :: Read a => ByteString -> [a]
i' = read . (++"]") . ('[':) . B.unpack
myWords = map rmtabs . B.groupBy (const (/='\t'))
rmtabs x = if not (B.null x) && B.head x == '\t' then B.drop 1 x else x
instance Show BED where
show = B.unpack . pack1
writeBED :: FilePath -> [BED] -> IO ()
writeBED f = B.writeFile f . B.unlines . map pack1
pack1 :: BED -> ByteString
pack1 (BED c cs ce nm sc str ts te _rgb bszst) =
let bl = length bszst
(bsz,bst) = unzip bszst
b :: Show a => a -> ByteString
b = B.pack . show
b':: Show a => [a] -> ByteString
b'= B.pack . drop 1 . init . show
in B.intercalate (B.pack "\t") [c, b cs, b ce, nm, b sc, b str, b ts, b te, b (0::Int), b bl, b' bsz, b' bst]