-- | Model the BED format, according to the spec at
-- http://genome.ucsc.edu/FAQ/FAQformat#format1

module Bio.Alignment.BED ( BED(..),Dir(..)
                         , readBED, writeBED
                         ) where

import Bio.Sequence.SeqData (Offset)
import Data.Word
import qualified Data.ByteString.Lazy.Char8 as B
import Data.ByteString.Lazy.Char8 (ByteString)

-- | The BED data type Note that the specification allows a variable number of fields, with
-- only the three first required.  This definition requires all fields to be present.
data BED = BED { chrom                :: ByteString
               , chromStart, chromEnd :: Offset
               , name                 :: ByteString
               , score                :: Int                 -- ^ Range 0..1000
               , strand               :: Dir
               , thickStart, thickEnd :: Offset
               , itemRGB              :: (Word8,Word8,Word8) -- ^ Available BED files appear to not
                                                             -- support this format.  RGB is therefore
                                                             -- ignored (read and written as '0')
               , blockSizeStart :: [(Offset,Offset)]         -- ^ Lists of lenght blockCount, blockStarts
                                                             --  are relative to chromStart
               }

-- | Yet another direction data structure.
data Dir = Fwd | Rev deriving Eq

instance Show Dir where
    show Fwd = "+"
    show Rev = "-"

instance Read Dir where
    readsPrec _ ('+':rest) = [(Fwd,rest)]
    readsPrec _ ('-':rest) = [(Rev,rest)]
    readsPrec _ x = error ("Can't parse '"++x++"' as a Dir")

readBED :: FilePath -> IO [BED]
readBED f = (map (unpack1 . myWords) . B.lines) `fmap` B.readFile f
    where unpack1 [c,cs,ce,nm,sc,str,ts,te,_rgb,_bc,bsz,bst] =
              BED c (i cs) (i ce) nm (i sc) (i str) (i ts) (i te) (0,0,0) (zip (i' bsz) (i' bst))
          unpack1 _x = error ("incorrect number of fields in BED record ("++show (length _x)++"):\n"++show _x)

          i :: Read a => ByteString -> a
          i = read . B.unpack
          i' :: Read a => ByteString -> [a]
          i' = read . (++"]") . ('[':) . B.unpack

          myWords = map rmtabs . B.groupBy (const (/='\t'))
          rmtabs x = if not (B.null x) && B.head x == '\t' then B.drop 1 x else x

instance Show BED where
    show = B.unpack . pack1

writeBED :: FilePath -> [BED] -> IO ()
writeBED f = B.writeFile f . B.unlines . map pack1

pack1 :: BED -> ByteString
pack1 (BED c cs ce nm sc str ts te _rgb bszst) =
          let bl = length bszst
              (bsz,bst) = unzip bszst
              b :: Show a => a -> ByteString
              b = B.pack . show
              b':: Show a => [a] -> ByteString
              b'= B.pack . drop 1 . init . show
          in B.intercalate (B.pack "\t") [c, b cs, b ce, nm, b sc, b str, b ts, b te, b (0::Int), b bl, b' bsz, b' bst]