module Bio.GB.Type
  ( GenBankSequence (..)
  , Meta (..)
  , Form (..)
  , Locus (..)
  , Version (..)
  , Source (..)
  , Reference (..)
  , Feature (..)
  ) where

import           Bio.Sequence (IsMarking, MarkedSequence)
import           Data.Text    (Text)

-- | Type that represents contents of .gb file that is used to store information about
-- genetic constructions.
--
data GenBankSequence = GenBankSequence { GenBankSequence -> Meta
meta  :: Meta                        -- ^ meta-information about the sequence
                                       , GenBankSequence -> MarkedSequence Feature Char
gbSeq :: MarkedSequence Feature Char -- ^ sequence that is marked by 'Feature's
                                       }
  deriving (GenBankSequence -> GenBankSequence -> Bool
(GenBankSequence -> GenBankSequence -> Bool)
-> (GenBankSequence -> GenBankSequence -> Bool)
-> Eq GenBankSequence
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GenBankSequence -> GenBankSequence -> Bool
$c/= :: GenBankSequence -> GenBankSequence -> Bool
== :: GenBankSequence -> GenBankSequence -> Bool
$c== :: GenBankSequence -> GenBankSequence -> Bool
Eq, Int -> GenBankSequence -> ShowS
[GenBankSequence] -> ShowS
GenBankSequence -> String
(Int -> GenBankSequence -> ShowS)
-> (GenBankSequence -> String)
-> ([GenBankSequence] -> ShowS)
-> Show GenBankSequence
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GenBankSequence] -> ShowS
$cshowList :: [GenBankSequence] -> ShowS
show :: GenBankSequence -> String
$cshow :: GenBankSequence -> String
showsPrec :: Int -> GenBankSequence -> ShowS
$cshowsPrec :: Int -> GenBankSequence -> ShowS
Show)

--------------------------------------------------------------------------------
-- Block with meta-information.
--------------------------------------------------------------------------------

-- | Meta-information about sequence.
--
data Meta = Meta { Meta -> Locus
locus      :: Locus         -- ^ general info about sequence
                 , Meta -> Maybe Text
definition :: Maybe Text    -- ^ brief description of sequence
                 , Meta -> Maybe Text
accession  :: Maybe Text    -- ^ the unique identifier for a sequence record
                 , Meta -> Maybe Version
version    :: Maybe Version -- ^ id of sequence in GenBank database
                 , Meta -> Maybe Text
keywords   :: Maybe Text    -- ^ word or phrase describing the sequence
                 , Meta -> Maybe Source
source     :: Maybe Source  -- ^ free-format information including an abbreviated form of the organism name,
                                               --   sometimes followed by a molecule type
                 , Meta -> [Reference]
references :: [Reference]   -- ^ publications by the authors of the sequence that discuss the data reported in the record
                 , Meta -> [Text]
comments   :: [Text]        -- ^ comments about the sequence (note that there can be (!!!) empty comments)
                 }
  deriving (Meta -> Meta -> Bool
(Meta -> Meta -> Bool) -> (Meta -> Meta -> Bool) -> Eq Meta
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Meta -> Meta -> Bool
$c/= :: Meta -> Meta -> Bool
== :: Meta -> Meta -> Bool
$c== :: Meta -> Meta -> Bool
Eq, Int -> Meta -> ShowS
[Meta] -> ShowS
Meta -> String
(Int -> Meta -> ShowS)
-> (Meta -> String) -> ([Meta] -> ShowS) -> Show Meta
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Meta] -> ShowS
$cshowList :: [Meta] -> ShowS
show :: Meta -> String
$cshow :: Meta -> String
showsPrec :: Int -> Meta -> ShowS
$cshowsPrec :: Int -> Meta -> ShowS
Show)

-- | First line that should be present in every .gb file. Contains general info about sequence.
--
data Locus = Locus { Locus -> Text
name             :: Text       -- ^ name of sequence
                   , Locus -> Int
len              :: Int        -- ^ length of sequence
                   , Locus -> Text
molType          :: Text       -- ^ type of molecule that is sequenced
                   , Locus -> Maybe Form
form             :: Maybe Form -- ^ form of sequence
                   , Locus -> Maybe Text
gbDivision       :: Maybe Text -- ^ GenBank division to which a record belongs
                   , Locus -> Text
modificationDate :: Text       -- ^ date of last modification of sequence
                   }
  deriving (Locus -> Locus -> Bool
(Locus -> Locus -> Bool) -> (Locus -> Locus -> Bool) -> Eq Locus
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Locus -> Locus -> Bool
$c/= :: Locus -> Locus -> Bool
== :: Locus -> Locus -> Bool
$c== :: Locus -> Locus -> Bool
Eq, Int -> Locus -> ShowS
[Locus] -> ShowS
Locus -> String
(Int -> Locus -> ShowS)
-> (Locus -> String) -> ([Locus] -> ShowS) -> Show Locus
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Locus] -> ShowS
$cshowList :: [Locus] -> ShowS
show :: Locus -> String
$cshow :: Locus -> String
showsPrec :: Int -> Locus -> ShowS
$cshowsPrec :: Int -> Locus -> ShowS
Show)

-- | At this moment there are two known (to me)
-- forms of seuqences that can be present in .gb file.
--
data Form = Linear | Circular
  deriving (Form -> Form -> Bool
(Form -> Form -> Bool) -> (Form -> Form -> Bool) -> Eq Form
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Form -> Form -> Bool
$c/= :: Form -> Form -> Bool
== :: Form -> Form -> Bool
$c== :: Form -> Form -> Bool
Eq, Int -> Form -> ShowS
[Form] -> ShowS
Form -> String
(Int -> Form -> ShowS)
-> (Form -> String) -> ([Form] -> ShowS) -> Show Form
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Form] -> ShowS
$cshowList :: [Form] -> ShowS
show :: Form -> String
$cshow :: Form -> String
showsPrec :: Int -> Form -> ShowS
$cshowsPrec :: Int -> Form -> ShowS
Show)

-- | Id of sequence in GenBank database.
--
data Version = Version { Version -> Text
versionT :: Text       -- ^ id itself
                       , Version -> Maybe Text
gbId     :: Maybe Text -- ^ GenInfo Identifier that is assigned when sequence changes
                       }
  deriving (Version -> Version -> Bool
(Version -> Version -> Bool)
-> (Version -> Version -> Bool) -> Eq Version
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Version -> Version -> Bool
$c/= :: Version -> Version -> Bool
== :: Version -> Version -> Bool
$c== :: Version -> Version -> Bool
Eq, Int -> Version -> ShowS
[Version] -> ShowS
Version -> String
(Int -> Version -> ShowS)
-> (Version -> String) -> ([Version] -> ShowS) -> Show Version
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Version] -> ShowS
$cshowList :: [Version] -> ShowS
show :: Version -> String
$cshow :: Version -> String
showsPrec :: Int -> Version -> ShowS
$cshowsPrec :: Int -> Version -> ShowS
Show)

-- | Information about source of this sequence.
--
data Source = Source { Source -> Text
sourceT  :: Text       -- ^ free-format (as if all this format is not too much "free format") information
                                              -- including an abbreviated form of the organism name,
                                              -- sometimes followed by a molecule type
                     , Source -> Maybe Text
organism :: Maybe Text -- ^ the formal scientific name for the source organism
                     }
  deriving (Source -> Source -> Bool
(Source -> Source -> Bool)
-> (Source -> Source -> Bool) -> Eq Source
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Source -> Source -> Bool
$c/= :: Source -> Source -> Bool
== :: Source -> Source -> Bool
$c== :: Source -> Source -> Bool
Eq, Int -> Source -> ShowS
[Source] -> ShowS
Source -> String
(Int -> Source -> ShowS)
-> (Source -> String) -> ([Source] -> ShowS) -> Show Source
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Source] -> ShowS
$cshowList :: [Source] -> ShowS
show :: Source -> String
$cshow :: Source -> String
showsPrec :: Int -> Source -> ShowS
$cshowsPrec :: Int -> Source -> ShowS
Show)

-- | Publications by the authors of the sequence that discuss the data reported in the record.
--
data Reference = Reference { Reference -> Text
referenceT :: Text       -- ^ reference itself
                           , Reference -> Maybe Text
authors    :: Maybe Text -- ^ list of authors in the order in which they appear in the cited article
                           , Reference -> Maybe Text
title      :: Maybe Text -- ^ title of the published work
                           , Reference -> Maybe Text
journal    :: Maybe Text -- ^ MEDLINE abbreviation of the journal name
                           , Reference -> Maybe Text
pubmed     :: Maybe Text -- ^ PubMed Identifier
                           }
  deriving (Reference -> Reference -> Bool
(Reference -> Reference -> Bool)
-> (Reference -> Reference -> Bool) -> Eq Reference
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Reference -> Reference -> Bool
$c/= :: Reference -> Reference -> Bool
== :: Reference -> Reference -> Bool
$c== :: Reference -> Reference -> Bool
Eq, Int -> Reference -> ShowS
[Reference] -> ShowS
Reference -> String
(Int -> Reference -> ShowS)
-> (Reference -> String)
-> ([Reference] -> ShowS)
-> Show Reference
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Reference] -> ShowS
$cshowList :: [Reference] -> ShowS
show :: Reference -> String
$cshow :: Reference -> String
showsPrec :: Int -> Reference -> ShowS
$cshowsPrec :: Int -> Reference -> ShowS
Show)

--------------------------------------------------------------------------------
-- Block with FEATURES table.
--
-- FEATURES table contains information about genes and gene products, as well as regions of biological
-- significance reported in the sequence. These can include regions of the sequence
-- that code for proteins and RNA molecules, as well as a number of other features.
-- More about FEATURES table: http://www.insdc.org/documents/feature_table.html
--------------------------------------------------------------------------------

-- | One single feature.
--
data Feature = Feature { Feature -> Text
fName     :: Text           -- ^ main information about feature
                       , Feature -> Bool
fStrand53 :: Bool           -- ^ set to True if sequence is contained on 5'-3' strand.
                                                     --   Set to False otherwise
                       , Feature -> [(Text, Text)]
fProps    :: [(Text, Text)] -- ^ properties of feature (such as "label", "gene", "note" etc.)
                       }
  deriving (Feature -> Feature -> Bool
(Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool) -> Eq Feature
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Feature -> Feature -> Bool
$c/= :: Feature -> Feature -> Bool
== :: Feature -> Feature -> Bool
$c== :: Feature -> Feature -> Bool
Eq, Int -> Feature -> ShowS
[Feature] -> ShowS
Feature -> String
(Int -> Feature -> ShowS)
-> (Feature -> String) -> ([Feature] -> ShowS) -> Show Feature
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Feature] -> ShowS
$cshowList :: [Feature] -> ShowS
show :: Feature -> String
$cshow :: Feature -> String
showsPrec :: Int -> Feature -> ShowS
$cshowsPrec :: Int -> Feature -> ShowS
Show, Eq Feature
Eq Feature
-> (Feature -> Feature -> Ordering)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Feature)
-> (Feature -> Feature -> Feature)
-> Ord Feature
Feature -> Feature -> Bool
Feature -> Feature -> Ordering
Feature -> Feature -> Feature
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Feature -> Feature -> Feature
$cmin :: Feature -> Feature -> Feature
max :: Feature -> Feature -> Feature
$cmax :: Feature -> Feature -> Feature
>= :: Feature -> Feature -> Bool
$c>= :: Feature -> Feature -> Bool
> :: Feature -> Feature -> Bool
$c> :: Feature -> Feature -> Bool
<= :: Feature -> Feature -> Bool
$c<= :: Feature -> Feature -> Bool
< :: Feature -> Feature -> Bool
$c< :: Feature -> Feature -> Bool
compare :: Feature -> Feature -> Ordering
$ccompare :: Feature -> Feature -> Ordering
$cp1Ord :: Eq Feature
Ord)

instance IsMarking Feature