-- | MAF, multiple alignment format provides multiple alignments with
-- genome-wide scale. Often used for n-way alignments with n genomes aligned
-- against one reference genome.
--
-- We follow <http://genome.ucsc.edu/FAQ/FAQformat.html#format5>
--
-- TODO this is not a complete implementation of the MAF format

module Biobase.MAF where

import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M



-- | The header of a MAF file. Reads the initial lines beginning with '#'.

data MAF = MAF
  { mafheader :: KVs
  , comments :: [BS.ByteString]
  , metadata :: [BS.ByteString]
  , blocks :: [Alignment]
  } deriving (Show)

-- | Each alignment.

data Alignment = Alignment
  { header :: KVs
  , sequences :: [Aligned]
  } deriving (Show)

-- | Some key-value pairs are defined; these are in a map

type KVs = M.Map BS.ByteString BS.ByteString

-- | a single aligned sequence.
--
-- TODO this should later on be a "BioSeq" (cf. Ketil Maldes work) but that
-- change stays in an experimental repo until "BioSeq" is available on hackage

data Aligned = Aligned
  { key :: BS.ByteString
  , start :: Int
  , length :: Int
  , strand :: Char
  , genomesize :: Int
  , value :: BS.ByteString
  } deriving (Show)