module Bio.GB.Type ( GenBankSequence (..) , Meta (..) , Form (..) , Locus (..) , Version (..) , Source (..) , Reference (..) , Feature (..) ) where import Bio.Sequence (IsMarking, MarkedSequence) import Data.Text (Text) -- | Type that represents contents of .gb file that is used to store information about -- genetic constructions. -- data GenBankSequence = GenBankSequence { meta :: Meta -- ^ meta-information about the sequence , gbSeq :: MarkedSequence Feature Char -- ^ sequence that is marked by 'Feature's } deriving (Eq, Show) -------------------------------------------------------------------------------- -- Block with meta-information. -------------------------------------------------------------------------------- -- | Meta-information about sequence. -- data Meta = Meta { locus :: Locus -- ^ general info about sequence , definition :: Maybe Text -- ^ brief description of sequence , accession :: Maybe Text -- ^ the unique identifier for a sequence record , version :: Maybe Version -- ^ id of sequence in GenBank database , keywords :: Maybe Text -- ^ word or phrase describing the sequence , source :: Maybe Source -- ^ free-format information including an abbreviated form of the organism name, -- sometimes followed by a molecule type , references :: [Reference] -- ^ publications by the authors of the sequence that discuss the data reported in the record , comments :: [Text] -- ^ comments about the sequence (note that there can be (!!!) empty comments) } deriving (Eq, Show) -- | First line that should be present in every .gb file. Contains general info about sequence. -- data Locus = Locus { name :: Text -- ^ name of sequence , len :: Int -- ^ length of sequence , molType :: Text -- ^ type of molecule that is sequenced , form :: Maybe Form -- ^ form of sequence , gbDivision :: Maybe Text -- ^ GenBank division to which a record belongs , modificationDate :: Text -- ^ date of last modification of sequence } deriving (Eq, Show) -- | At this moment there are two known (to me) -- forms of seuqences that can be present in .gb file. -- data Form = Linear | Circular deriving (Eq, Show) -- | Id of sequence in GenBank database. -- data Version = Version { versionT :: Text -- ^ id itself , gbId :: Maybe Text -- ^ GenInfo Identifier that is assigned when sequence changes } deriving (Eq, Show) -- | Information about source of this sequence. -- data Source = Source { sourceT :: Text -- ^ free-format (as if all this format is not too much "free format") information -- including an abbreviated form of the organism name, -- sometimes followed by a molecule type , organism :: Maybe Text -- ^ the formal scientific name for the source organism } deriving (Eq, Show) -- | Publications by the authors of the sequence that discuss the data reported in the record. -- data Reference = Reference { referenceT :: Text -- ^ reference itself , authors :: Maybe Text -- ^ list of authors in the order in which they appear in the cited article , title :: Maybe Text -- ^ title of the published work , journal :: Maybe Text -- ^ MEDLINE abbreviation of the journal name , pubmed :: Maybe Text -- ^ PubMed Identifier } deriving (Eq, Show) -------------------------------------------------------------------------------- -- Block with FEATURES table. -- -- FEATURES table contains information about genes and gene products, as well as regions of biological -- significance reported in the sequence. These can include regions of the sequence -- that code for proteins and RNA molecules, as well as a number of other features. -- More about FEATURES table: http://www.insdc.org/documents/feature_table.html -------------------------------------------------------------------------------- -- | One single feature. -- data Feature = Feature { fName :: Text -- ^ main information about feature , fStrand53 :: Bool -- ^ set to True if sequence is contained on 5'-3' strand. -- Set to False otherwise , fProps :: [(Text, Text)] -- ^ properties of feature (such as "label", "gene", "note" etc.) } deriving (Eq, Show, Ord) instance IsMarking Feature