-- | MAF, multiple alignment format provides multiple alignments with -- genome-wide scale. Often used for n-way alignments with n genomes aligned -- against one reference genome. -- -- We follow -- -- TODO this is not a complete implementation of the MAF format module Biobase.MAF where import qualified Data.ByteString.Char8 as BS import qualified Data.Map as M -- | The header of a MAF file. Reads the initial lines beginning with '#'. data MAF = MAF { mafheader :: KVs , comments :: [BS.ByteString] , metadata :: [BS.ByteString] , blocks :: [Alignment] } deriving (Show) -- | Each alignment. data Alignment = Alignment { header :: KVs , sequences :: [Aligned] } deriving (Show) -- | Some key-value pairs are defined; these are in a map type KVs = M.Map BS.ByteString BS.ByteString -- | a single aligned sequence. -- -- TODO this should later on be a "BioSeq" (cf. Ketil Maldes work) but that -- change stays in an experimental repo until "BioSeq" is available on hackage data Aligned = Aligned { key :: BS.ByteString , start :: Int , length :: Int , strand :: Char , genomesize :: Int , value :: BS.ByteString } deriving (Show)