{-| This models the PSL format used by e.g. the alignment tool BLAT.  
    It is a simple, textual representation of (spliced) alignments,
    with tab-separated fields.

    See http://genome.ucsc.edu/FAQ/FAQformat#format2 for details.
-}
module Bio.Alignment.PSL where

import qualified Data.ByteString.Lazy.Char8 as B
import Data.ByteString.Lazy.Char8 (ByteString)

-- encodes the fields in a Blat PSL record
-- NB! laziness issues, if not fully evaluated, this will hang onto the input.
data PSL = PSL   { match, mismatch, repmatch, ncount
                 , qgapcount, qgaplength, tgapcount, tgaplength :: Int 
                 , strand :: ByteString
                 , qname :: ByteString, qsize, qstart, qend :: Int
                 , tname :: ByteString, tsize, tstart, tend :: Int
                 , blockcount :: Int, blocksizes, qstarts, tstarts :: [Int]
                 } deriving Show
{-
sread :: ByteString -> Strand
sread b = case B.uncons b of 
  Just ('-',_) -> Minus
  Just ('+',_) -> Plus
  _ -> error ("Cant't parse '"++B.unpack b++"' as strand")

swrite :: Strand -> ByteString
swrite Minus = B.pack "-"
swrite Plus  = B.pack "+"
-}

readPSL :: FilePath -> IO [PSL]
readPSL f = parsePSL `fmap` B.readFile f

writePSL :: FilePath -> [PSL] -> IO ()
writePSL f =  B.writeFile f . unparsePSL

parsePSL :: ByteString -> [PSL]
parsePSL s = map parseLine $ B.lines $ dropHeader
  where
    -- dropHeader requires strict adherence to BLAT output.  Perhaps a looser check is better?
    dropHeader | pslHeader `B.isPrefixOf` s = B.drop (B.length pslHeader) s
               | otherwise = error "PSL header mismatch: (Todo: insert specifics here)"
    parseLine l = let fs = B.split '\t' l
                      ri :: Int -> Int
                      ri i = maybe (error ("Can't parse field "++show i++" '"++B.unpack (fs!!i)++"' as an integer")) fst $ B.readInt (fs!!i)
                      rl j = map (\w -> maybe undefined fst (B.readInt w)) $ init $ B.split ',' (fs!!j)
                  in PSL (ri 0) (ri 1) (ri 2) (ri 3) (ri 4) (ri 5) (ri 6) (ri 7) (fs!!8) (fs!!9) (ri 10) (ri 11) (ri 12) (fs!!13) (ri 14) (ri 15) (ri 16) (ri 17) (rl 18) (rl 19) (rl 20)
                           


unparsePSL :: [PSL] -> ByteString
unparsePSL bs = addHeader $ B.unlines $ map format1 bs
  where addHeader = B.append pslHeader
        format1 :: PSL -> ByteString
        format1 = undefined

pslHeader = B.pack ("psLayout version 3\n\n"
                    ++"match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n"
                    ++"     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n"
                    ++"---------------------------------------------------------------------------------------------------------------------------------------------------------------\n")