module Bio.GB.Type
  ( GenBankSequence (..)
  , Meta (..)
  , Form (..)
  , Locus (..)
  , Version (..)
  , Source (..)
  , Reference (..)
  , Feature (..)
  , Parser
  ) where

import Bio.Sequence    (IsMarking, MarkedSequence)
import Control.DeepSeq (NFData)
import Data.Text       (Text)
import Data.Void       (Void)
import GHC.Generics    (Generic)
import Text.Megaparsec (Parsec)

type Parser = Parsec Void Text

-- | Type that represents contents of .gb file that is used to store information about
-- genetic constructions.
--
data GenBankSequence
  = GenBankSequence
      { GenBankSequence -> Meta
meta  :: Meta
        -- ^ meta-information about the sequence
      , GenBankSequence -> MarkedSequence Feature Char
gbSeq :: MarkedSequence Feature Char
        -- ^ sequence that is marked by 'Feature's
      }
  deriving (GenBankSequence -> GenBankSequence -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GenBankSequence -> GenBankSequence -> Bool
$c/= :: GenBankSequence -> GenBankSequence -> Bool
== :: GenBankSequence -> GenBankSequence -> Bool
$c== :: GenBankSequence -> GenBankSequence -> Bool
Eq, Int -> GenBankSequence -> ShowS
[GenBankSequence] -> ShowS
GenBankSequence -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GenBankSequence] -> ShowS
$cshowList :: [GenBankSequence] -> ShowS
show :: GenBankSequence -> String
$cshow :: GenBankSequence -> String
showsPrec :: Int -> GenBankSequence -> ShowS
$cshowsPrec :: Int -> GenBankSequence -> ShowS
Show, forall x. Rep GenBankSequence x -> GenBankSequence
forall x. GenBankSequence -> Rep GenBankSequence x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep GenBankSequence x -> GenBankSequence
$cfrom :: forall x. GenBankSequence -> Rep GenBankSequence x
Generic, GenBankSequence -> ()
forall a. (a -> ()) -> NFData a
rnf :: GenBankSequence -> ()
$crnf :: GenBankSequence -> ()
NFData)

--------------------------------------------------------------------------------
-- Block with meta-information.
--------------------------------------------------------------------------------

-- | Meta-information about sequence.
--
data Meta
  = Meta
      { Meta -> Locus
locus      :: Locus
        -- ^ general info about sequence
      , Meta -> Maybe Text
definition :: Maybe Text
        -- ^ brief description of sequence
      , Meta -> Maybe Text
accession  :: Maybe Text
        -- ^ the unique identifier for a sequence record
      , Meta -> Maybe Version
version    :: Maybe Version
        -- ^ id of sequence in GenBank database
      , Meta -> Maybe Text
keywords   :: Maybe Text
        -- ^ word or phrase describing the sequence
      , Meta -> Maybe Source
source     :: Maybe Source
        -- ^ free-format information including an abbreviated form of the organism name,
        --   sometimes followed by a molecule type
      , Meta -> [Reference]
references :: [Reference]
        -- ^ publications by the authors of the sequence that discuss the data reported in the record
      , Meta -> [Text]
comments   :: [Text]
        -- ^ comments about the sequence (note that there can be (!!!) empty comments)
      }
  deriving (Meta -> Meta -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Meta -> Meta -> Bool
$c/= :: Meta -> Meta -> Bool
== :: Meta -> Meta -> Bool
$c== :: Meta -> Meta -> Bool
Eq, Int -> Meta -> ShowS
[Meta] -> ShowS
Meta -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Meta] -> ShowS
$cshowList :: [Meta] -> ShowS
show :: Meta -> String
$cshow :: Meta -> String
showsPrec :: Int -> Meta -> ShowS
$cshowsPrec :: Int -> Meta -> ShowS
Show, forall x. Rep Meta x -> Meta
forall x. Meta -> Rep Meta x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Meta x -> Meta
$cfrom :: forall x. Meta -> Rep Meta x
Generic, Meta -> ()
forall a. (a -> ()) -> NFData a
rnf :: Meta -> ()
$crnf :: Meta -> ()
NFData)

-- | First line that should be present in every .gb file. Contains general info about sequence.
--
data Locus
  = Locus
      { Locus -> Text
name             :: Text
        -- ^ name of sequence
      , Locus -> Int
len              :: Int
        -- ^ length of sequence
      , Locus -> Text
molType          :: Text
        -- ^ type of molecule that is sequenced
      , Locus -> Maybe Form
form             :: Maybe Form
        -- ^ form of sequence
      , Locus -> Maybe Text
gbDivision       :: Maybe Text
        -- ^ GenBank division to which a record belongs
      , Locus -> Text
modificationDate :: Text
        -- ^ date of last modification of sequence
      }
  deriving (Locus -> Locus -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Locus -> Locus -> Bool
$c/= :: Locus -> Locus -> Bool
== :: Locus -> Locus -> Bool
$c== :: Locus -> Locus -> Bool
Eq, Int -> Locus -> ShowS
[Locus] -> ShowS
Locus -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Locus] -> ShowS
$cshowList :: [Locus] -> ShowS
show :: Locus -> String
$cshow :: Locus -> String
showsPrec :: Int -> Locus -> ShowS
$cshowsPrec :: Int -> Locus -> ShowS
Show, forall x. Rep Locus x -> Locus
forall x. Locus -> Rep Locus x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Locus x -> Locus
$cfrom :: forall x. Locus -> Rep Locus x
Generic, Locus -> ()
forall a. (a -> ()) -> NFData a
rnf :: Locus -> ()
$crnf :: Locus -> ()
NFData)

-- | At this moment there are two known (to me)
-- forms of seuqences that can be present in .gb file.
--
data Form
  = Linear
  | Circular
  deriving (Form -> Form -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Form -> Form -> Bool
$c/= :: Form -> Form -> Bool
== :: Form -> Form -> Bool
$c== :: Form -> Form -> Bool
Eq, Int -> Form -> ShowS
[Form] -> ShowS
Form -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Form] -> ShowS
$cshowList :: [Form] -> ShowS
show :: Form -> String
$cshow :: Form -> String
showsPrec :: Int -> Form -> ShowS
$cshowsPrec :: Int -> Form -> ShowS
Show, forall x. Rep Form x -> Form
forall x. Form -> Rep Form x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Form x -> Form
$cfrom :: forall x. Form -> Rep Form x
Generic, Form -> ()
forall a. (a -> ()) -> NFData a
rnf :: Form -> ()
$crnf :: Form -> ()
NFData)

-- | Id of sequence in GenBank database.
--
data Version
  = Version
      { Version -> Text
versionT :: Text
        -- ^ id itself
      , Version -> Maybe Text
gbId     :: Maybe Text
        -- ^ GenInfo Identifier that is assigned when sequence changes
      }
  deriving (Version -> Version -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Version -> Version -> Bool
$c/= :: Version -> Version -> Bool
== :: Version -> Version -> Bool
$c== :: Version -> Version -> Bool
Eq, Int -> Version -> ShowS
[Version] -> ShowS
Version -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Version] -> ShowS
$cshowList :: [Version] -> ShowS
show :: Version -> String
$cshow :: Version -> String
showsPrec :: Int -> Version -> ShowS
$cshowsPrec :: Int -> Version -> ShowS
Show, forall x. Rep Version x -> Version
forall x. Version -> Rep Version x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Version x -> Version
$cfrom :: forall x. Version -> Rep Version x
Generic, Version -> ()
forall a. (a -> ()) -> NFData a
rnf :: Version -> ()
$crnf :: Version -> ()
NFData)

-- | Information about source of this sequence.
--
data Source
  = Source
      { Source -> Text
sourceT  :: Text
        -- ^ free-format (as if all this format is not too much "free format") information
        -- including an abbreviated form of the organism name,
        -- sometimes followed by a molecule type
      , Source -> Maybe Text
organism :: Maybe Text
        -- ^ the formal scientific name for the source organism
      }
  deriving (Source -> Source -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Source -> Source -> Bool
$c/= :: Source -> Source -> Bool
== :: Source -> Source -> Bool
$c== :: Source -> Source -> Bool
Eq, Int -> Source -> ShowS
[Source] -> ShowS
Source -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Source] -> ShowS
$cshowList :: [Source] -> ShowS
show :: Source -> String
$cshow :: Source -> String
showsPrec :: Int -> Source -> ShowS
$cshowsPrec :: Int -> Source -> ShowS
Show, forall x. Rep Source x -> Source
forall x. Source -> Rep Source x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Source x -> Source
$cfrom :: forall x. Source -> Rep Source x
Generic, Source -> ()
forall a. (a -> ()) -> NFData a
rnf :: Source -> ()
$crnf :: Source -> ()
NFData)

-- | Publications by the authors of the sequence that discuss the data reported in the record.
--
data Reference
  = Reference
      { Reference -> Text
referenceT :: Text
        -- ^ reference itself
      , Reference -> Maybe Text
authors    :: Maybe Text
        -- ^ list of authors in the order in which they appear in the cited article
      , Reference -> Maybe Text
title      :: Maybe Text
        -- ^ title of the published work
      , Reference -> Maybe Text
journal    :: Maybe Text
        -- ^ MEDLINE abbreviation of the journal name
      , Reference -> Maybe Text
pubmed     :: Maybe Text
        -- ^ PubMed Identifier
      }
  deriving (Reference -> Reference -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Reference -> Reference -> Bool
$c/= :: Reference -> Reference -> Bool
== :: Reference -> Reference -> Bool
$c== :: Reference -> Reference -> Bool
Eq, Int -> Reference -> ShowS
[Reference] -> ShowS
Reference -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Reference] -> ShowS
$cshowList :: [Reference] -> ShowS
show :: Reference -> String
$cshow :: Reference -> String
showsPrec :: Int -> Reference -> ShowS
$cshowsPrec :: Int -> Reference -> ShowS
Show, forall x. Rep Reference x -> Reference
forall x. Reference -> Rep Reference x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Reference x -> Reference
$cfrom :: forall x. Reference -> Rep Reference x
Generic, Reference -> ()
forall a. (a -> ()) -> NFData a
rnf :: Reference -> ()
$crnf :: Reference -> ()
NFData)

--------------------------------------------------------------------------------
-- Block with FEATURES table.
--
-- FEATURES table contains information about genes and gene products, as well as regions of biological
-- significance reported in the sequence. These can include regions of the sequence
-- that code for proteins and RNA molecules, as well as a number of other features.
-- More about FEATURES table: http://www.insdc.org/documents/feature_table.html
--------------------------------------------------------------------------------

-- | One single feature.
--
data Feature
  = Feature
      { Feature -> Text
fName  :: Text
        -- ^ main information about feature
      , Feature -> [(Text, Text)]
fProps :: [(Text, Text)]
        -- ^ properties of feature (such as "label", "gene", "note" etc.)
      }
  deriving (Feature -> Feature -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Feature -> Feature -> Bool
$c/= :: Feature -> Feature -> Bool
== :: Feature -> Feature -> Bool
$c== :: Feature -> Feature -> Bool
Eq, Int -> Feature -> ShowS
[Feature] -> ShowS
Feature -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Feature] -> ShowS
$cshowList :: [Feature] -> ShowS
show :: Feature -> String
$cshow :: Feature -> String
showsPrec :: Int -> Feature -> ShowS
$cshowsPrec :: Int -> Feature -> ShowS
Show, Eq Feature
Feature -> Feature -> Bool
Feature -> Feature -> Ordering
Feature -> Feature -> Feature
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Feature -> Feature -> Feature
$cmin :: Feature -> Feature -> Feature
max :: Feature -> Feature -> Feature
$cmax :: Feature -> Feature -> Feature
>= :: Feature -> Feature -> Bool
$c>= :: Feature -> Feature -> Bool
> :: Feature -> Feature -> Bool
$c> :: Feature -> Feature -> Bool
<= :: Feature -> Feature -> Bool
$c<= :: Feature -> Feature -> Bool
< :: Feature -> Feature -> Bool
$c< :: Feature -> Feature -> Bool
compare :: Feature -> Feature -> Ordering
$ccompare :: Feature -> Feature -> Ordering
Ord, forall x. Rep Feature x -> Feature
forall x. Feature -> Rep Feature x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Feature x -> Feature
$cfrom :: forall x. Feature -> Rep Feature x
Generic, Feature -> ()
forall a. (a -> ()) -> NFData a
rnf :: Feature -> ()
$crnf :: Feature -> ()
NFData)

instance IsMarking Feature