module Bio.Sequence.PhdData where

import Bio.Core.Sequence
import qualified Data.ByteString.Lazy as LB
import qualified Data.ByteString.Lazy.Char8 as LBC

{-- A .phd file consists of a DNA block with base and quality 
    values, followed by one or more (optional) tag blocks. --}
data Phd = Phd Comment DNABlock (Maybe [PhdTag]) deriving (Show)

{-- These types are subject to change if it improves functionality,
    but for now it's simplest to just call them String, Int etc.--}
data Comment = Comment
    { chromatFile        :: FilePath
    , abiThumbprint      :: String
    , phredVersion       :: String
    , callMethod         :: String
    , qualityLevels      :: Int
    , time               :: String
    , traceArrayMinIndex :: Int
    , traceArrayMaxIndex :: Int
    , trim               :: String
    , chem               :: String
    , dye                :: String
    } deriving (Eq)

instance Show Comment where
    show (Comment cf abit pv cm ql ti mintai maxtai tr ch dy) =
      ("\n" ++) $ unlines 
                    [ "CHROMAT_FILE: "   ++ cf
                    , "ABI_THUMBPRINT: " ++ abit
                    , "PHRED_VERSION: "  ++ pv 
                    , "CALL_METHOD: "    ++ cm
                    , "QUALITY_LEVELS: " ++ show ql 
                    , "TIME: "           ++ show ti
                    , "TRACE_ARRAY_MIN_INDEX: " ++ show mintai
                    , "TRACE_ARRAY_MAX_INDEX: " ++ show maxtai
                    , "TRIM: "           ++ tr
                    , "CHEM: "           ++ ch
                    , "DYE: "            ++ dy
                    ]

data DNABlock = DNABlock
    { label        :: String
    , bases        :: SeqData
    , qualities    :: QualData
    , traceIndices :: [Int]
    }

instance Show DNABlock where
  show = LBC.unpack . toFasta

data PhdTag = PhdTag
    { tagType              :: String
    , source               :: String
    , unpaddedReadPosition :: [Offset]
    , date                 :: String
    , comment              :: String  
    } deriving (Eq)

instance Show PhdTag where
  show (PhdTag tt so urp da co) =
    ("\n" ++) $ unlines $ map (" " ++)
      [ "TYPE: "              ++ show tt
      , "SOURCE: "            ++ show so
      , "UNPADDED_READ_POS: " ++ show (map unOff urp)
      , "DATE: "              ++ show da
      , "COMMENT: "           ++ show co ]

instance BioSeq DNABlock where
  seqlabel  db = SeqLabel $ LBC.pack $ label db
  seqdata   db = bases db
  seqlength db = Offset $ LBC.length $ unSD $ bases db

instance BioSeqQual DNABlock where
  seqqual = qualities

-- Some default values for the data types, useful for debugging in ghci

defaultComment = Comment { chromatFile        = ""
                         , abiThumbprint      = "0"
                         , phredVersion       = "0.980904.e"
                         , callMethod         = "phred"
                         , qualityLevels      = 99
                         , time               = "" 
                         , traceArrayMinIndex = 0
                         , traceArrayMaxIndex = 1
                         , trim               = ""
                         , chem               = "unknown"
                         , dye                = "unknown" }

defaultDNABlock = DNABlock { label        = "some_dna"
                           , bases        = SeqData  $ LBC.pack "aatgcatcta"
                           , qualities    = QualData $ LBC.pack "0000000000"
                           , traceIndices = [0,1,2,3,4,5,6,7,8,9,10] }

defaultPhdTag = PhdTag { tagType              = "polymorphism"
                       , source               = "polyphred"
                       , unpaddedReadPosition = [5, 5]
                       , date                 = "01/01/70 00:00:00"
                       , comment              = "" }