-- | Mafft data. -- -- NOTE: For the "-" strand, the start position is given by ("genome size" - -- "start position" + "length") -- -- TODO mafft blocks contain "i" parts which i haven't seen yet. These have to -- be parsed, too. module Biobase.DataSource.Mafft where import qualified Data.ByteString.Lazy.Char8 as BS -- | A complete Mafft file. data Mafft = Mafft { comments :: [BS.ByteString] , blocks :: [Block] } deriving (Show) -- | One Mafft block. -- -- THERE is "a ", "i ", "s " data Block = Block { alignment :: BS.ByteString , sequences :: [Sequence] } deriving (Show) -- | Encodes one sequence entry. data Sequence = Sequence { species :: BS.ByteString , start :: Int , length :: Int , strand :: PlusMinus , genomeSize :: Int , alignedSequence :: CompressedBS } deriving (Show) newtype PlusMinus = PlusMinus Char deriving (Eq,Show) newtype CompressedBS = CompressedBS BS.ByteString deriving (Eq,Show)