-- | Model the BED format, according to the spec at -- http://genome.ucsc.edu/FAQ/FAQformat#format1 module Bio.Alignment.BED ( BED(..),Dir(..) , readBED, writeBED ) where import Bio.Sequence.SeqData (Offset) import Data.Word import qualified Data.ByteString.Lazy.Char8 as B import Data.ByteString.Lazy.Char8 (ByteString) -- | The BED data type Note that the specification allows a variable number of fields, with -- only the three first required. This definition requires all fields to be present. data BED = BED { chrom :: ByteString , chromStart, chromEnd :: Offset , name :: ByteString , score :: Int -- ^ Range 0..1000 , strand :: Dir , thickStart, thickEnd :: Offset , itemRGB :: (Word8,Word8,Word8) -- ^ Available BED files appear to not -- support this format. RGB is therefore -- ignored (read and written as '0') , blockSizeStart :: [(Offset,Offset)] -- ^ Lists of lenght blockCount, blockStarts -- are relative to chromStart } -- | Yet another direction data structure. data Dir = Fwd | Rev deriving Eq instance Show Dir where show Fwd = "+" show Rev = "-" instance Read Dir where readsPrec _ ('+':rest) = [(Fwd,rest)] readsPrec _ ('-':rest) = [(Rev,rest)] readsPrec _ x = error ("Can't parse '"++x++"' as a Dir") readBED :: FilePath -> IO [BED] readBED f = (map (unpack1 . myWords) . B.lines) `fmap` B.readFile f where unpack1 [c,cs,ce,nm,sc,str,ts,te,_rgb,_bc,bsz,bst] = BED c (i cs) (i ce) nm (i sc) (i str) (i ts) (i te) (0,0,0) (zip (i' bsz) (i' bst)) unpack1 _x = error ("incorrect number of fields in BED record ("++show (length _x)++"):\n"++show _x) i :: Read a => ByteString -> a i = read . B.unpack i' :: Read a => ByteString -> [a] i' = read . (++"]") . ('[':) . B.unpack myWords = map rmtabs . B.groupBy (const (/='\t')) rmtabs x = if not (B.null x) && B.head x == '\t' then B.drop 1 x else x instance Show BED where show = B.unpack . pack1 writeBED :: FilePath -> [BED] -> IO () writeBED f = B.writeFile f . B.unlines . map pack1 pack1 :: BED -> ByteString pack1 (BED c cs ce nm sc str ts te _rgb bszst) = let bl = length bszst (bsz,bst) = unzip bszst b :: Show a => a -> ByteString b = B.pack . show b':: Show a => [a] -> ByteString b'= B.pack . drop 1 . init . show in B.intercalate (B.pack "\t") [c, b cs, b ce, nm, b sc, b str, b ts, b te, b (0::Int), b bl, b' bsz, b' bst]