module Bio.GB.Type
  ( GenBankSequence (..)
  , Meta (..)
  , Form (..)
  , Locus (..)
  , Version (..)
  , Source (..)
  , Reference (..)
  , Feature (..)
  , Parser
  ) where

import Bio.Sequence    (IsMarking, MarkedSequence)
import Control.DeepSeq (NFData)
import Data.Text       (Text)
import Data.Void       (Void)
import GHC.Generics    (Generic)
import Text.Megaparsec (Parsec)

type Parser = Parsec Void Text

-- | Type that represents contents of .gb file that is used to store information about
-- genetic constructions.
--
data GenBankSequence
  = GenBankSequence
      { GenBankSequence -> Meta
meta  :: Meta
        -- ^ meta-information about the sequence
      , GenBankSequence -> MarkedSequence Feature Char
gbSeq :: MarkedSequence Feature Char
        -- ^ sequence that is marked by 'Feature's
      }
  deriving (GenBankSequence -> GenBankSequence -> Bool
(GenBankSequence -> GenBankSequence -> Bool)
-> (GenBankSequence -> GenBankSequence -> Bool)
-> Eq GenBankSequence
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GenBankSequence -> GenBankSequence -> Bool
== :: GenBankSequence -> GenBankSequence -> Bool
$c/= :: GenBankSequence -> GenBankSequence -> Bool
/= :: GenBankSequence -> GenBankSequence -> Bool
Eq, Int -> GenBankSequence -> ShowS
[GenBankSequence] -> ShowS
GenBankSequence -> String
(Int -> GenBankSequence -> ShowS)
-> (GenBankSequence -> String)
-> ([GenBankSequence] -> ShowS)
-> Show GenBankSequence
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GenBankSequence -> ShowS
showsPrec :: Int -> GenBankSequence -> ShowS
$cshow :: GenBankSequence -> String
show :: GenBankSequence -> String
$cshowList :: [GenBankSequence] -> ShowS
showList :: [GenBankSequence] -> ShowS
Show, (forall x. GenBankSequence -> Rep GenBankSequence x)
-> (forall x. Rep GenBankSequence x -> GenBankSequence)
-> Generic GenBankSequence
forall x. Rep GenBankSequence x -> GenBankSequence
forall x. GenBankSequence -> Rep GenBankSequence x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. GenBankSequence -> Rep GenBankSequence x
from :: forall x. GenBankSequence -> Rep GenBankSequence x
$cto :: forall x. Rep GenBankSequence x -> GenBankSequence
to :: forall x. Rep GenBankSequence x -> GenBankSequence
Generic, GenBankSequence -> ()
(GenBankSequence -> ()) -> NFData GenBankSequence
forall a. (a -> ()) -> NFData a
$crnf :: GenBankSequence -> ()
rnf :: GenBankSequence -> ()
NFData)

--------------------------------------------------------------------------------
-- Block with meta-information.
--------------------------------------------------------------------------------

-- | Meta-information about sequence.
--
data Meta
  = Meta
      { Meta -> Locus
locus      :: Locus
        -- ^ general info about sequence
      , Meta -> Maybe Text
definition :: Maybe Text
        -- ^ brief description of sequence
      , Meta -> Maybe Text
accession  :: Maybe Text
        -- ^ the unique identifier for a sequence record
      , Meta -> Maybe Version
version    :: Maybe Version
        -- ^ id of sequence in GenBank database
      , Meta -> Maybe Text
keywords   :: Maybe Text
        -- ^ word or phrase describing the sequence
      , Meta -> Maybe Source
source     :: Maybe Source
        -- ^ free-format information including an abbreviated form of the organism name,
        --   sometimes followed by a molecule type
      , Meta -> [Reference]
references :: [Reference]
        -- ^ publications by the authors of the sequence that discuss the data reported in the record
      , Meta -> [Text]
comments   :: [Text]
        -- ^ comments about the sequence (note that there can be (!!!) empty comments)
      }
  deriving (Meta -> Meta -> Bool
(Meta -> Meta -> Bool) -> (Meta -> Meta -> Bool) -> Eq Meta
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Meta -> Meta -> Bool
== :: Meta -> Meta -> Bool
$c/= :: Meta -> Meta -> Bool
/= :: Meta -> Meta -> Bool
Eq, Int -> Meta -> ShowS
[Meta] -> ShowS
Meta -> String
(Int -> Meta -> ShowS)
-> (Meta -> String) -> ([Meta] -> ShowS) -> Show Meta
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Meta -> ShowS
showsPrec :: Int -> Meta -> ShowS
$cshow :: Meta -> String
show :: Meta -> String
$cshowList :: [Meta] -> ShowS
showList :: [Meta] -> ShowS
Show, (forall x. Meta -> Rep Meta x)
-> (forall x. Rep Meta x -> Meta) -> Generic Meta
forall x. Rep Meta x -> Meta
forall x. Meta -> Rep Meta x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Meta -> Rep Meta x
from :: forall x. Meta -> Rep Meta x
$cto :: forall x. Rep Meta x -> Meta
to :: forall x. Rep Meta x -> Meta
Generic, Meta -> ()
(Meta -> ()) -> NFData Meta
forall a. (a -> ()) -> NFData a
$crnf :: Meta -> ()
rnf :: Meta -> ()
NFData)

-- | First line that should be present in every .gb file. Contains general info about sequence.
--
data Locus
  = Locus
      { Locus -> Text
name             :: Text
        -- ^ name of sequence
      , Locus -> Int
len              :: Int
        -- ^ length of sequence
      , Locus -> Text
molType          :: Text
        -- ^ type of molecule that is sequenced
      , Locus -> Maybe Form
form             :: Maybe Form
        -- ^ form of sequence
      , Locus -> Maybe Text
gbDivision       :: Maybe Text
        -- ^ GenBank division to which a record belongs
      , Locus -> Text
modificationDate :: Text
        -- ^ date of last modification of sequence
      }
  deriving (Locus -> Locus -> Bool
(Locus -> Locus -> Bool) -> (Locus -> Locus -> Bool) -> Eq Locus
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Locus -> Locus -> Bool
== :: Locus -> Locus -> Bool
$c/= :: Locus -> Locus -> Bool
/= :: Locus -> Locus -> Bool
Eq, Int -> Locus -> ShowS
[Locus] -> ShowS
Locus -> String
(Int -> Locus -> ShowS)
-> (Locus -> String) -> ([Locus] -> ShowS) -> Show Locus
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Locus -> ShowS
showsPrec :: Int -> Locus -> ShowS
$cshow :: Locus -> String
show :: Locus -> String
$cshowList :: [Locus] -> ShowS
showList :: [Locus] -> ShowS
Show, (forall x. Locus -> Rep Locus x)
-> (forall x. Rep Locus x -> Locus) -> Generic Locus
forall x. Rep Locus x -> Locus
forall x. Locus -> Rep Locus x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Locus -> Rep Locus x
from :: forall x. Locus -> Rep Locus x
$cto :: forall x. Rep Locus x -> Locus
to :: forall x. Rep Locus x -> Locus
Generic, Locus -> ()
(Locus -> ()) -> NFData Locus
forall a. (a -> ()) -> NFData a
$crnf :: Locus -> ()
rnf :: Locus -> ()
NFData)

-- | At this moment there are two known (to me)
-- forms of seuqences that can be present in .gb file.
--
data Form
  = Linear
  | Circular
  deriving (Form -> Form -> Bool
(Form -> Form -> Bool) -> (Form -> Form -> Bool) -> Eq Form
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Form -> Form -> Bool
== :: Form -> Form -> Bool
$c/= :: Form -> Form -> Bool
/= :: Form -> Form -> Bool
Eq, Int -> Form -> ShowS
[Form] -> ShowS
Form -> String
(Int -> Form -> ShowS)
-> (Form -> String) -> ([Form] -> ShowS) -> Show Form
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Form -> ShowS
showsPrec :: Int -> Form -> ShowS
$cshow :: Form -> String
show :: Form -> String
$cshowList :: [Form] -> ShowS
showList :: [Form] -> ShowS
Show, (forall x. Form -> Rep Form x)
-> (forall x. Rep Form x -> Form) -> Generic Form
forall x. Rep Form x -> Form
forall x. Form -> Rep Form x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Form -> Rep Form x
from :: forall x. Form -> Rep Form x
$cto :: forall x. Rep Form x -> Form
to :: forall x. Rep Form x -> Form
Generic, Form -> ()
(Form -> ()) -> NFData Form
forall a. (a -> ()) -> NFData a
$crnf :: Form -> ()
rnf :: Form -> ()
NFData)

-- | Id of sequence in GenBank database.
--
data Version
  = Version
      { Version -> Text
versionT :: Text
        -- ^ id itself
      , Version -> Maybe Text
gbId     :: Maybe Text
        -- ^ GenInfo Identifier that is assigned when sequence changes
      }
  deriving (Version -> Version -> Bool
(Version -> Version -> Bool)
-> (Version -> Version -> Bool) -> Eq Version
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Version -> Version -> Bool
== :: Version -> Version -> Bool
$c/= :: Version -> Version -> Bool
/= :: Version -> Version -> Bool
Eq, Int -> Version -> ShowS
[Version] -> ShowS
Version -> String
(Int -> Version -> ShowS)
-> (Version -> String) -> ([Version] -> ShowS) -> Show Version
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Version -> ShowS
showsPrec :: Int -> Version -> ShowS
$cshow :: Version -> String
show :: Version -> String
$cshowList :: [Version] -> ShowS
showList :: [Version] -> ShowS
Show, (forall x. Version -> Rep Version x)
-> (forall x. Rep Version x -> Version) -> Generic Version
forall x. Rep Version x -> Version
forall x. Version -> Rep Version x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Version -> Rep Version x
from :: forall x. Version -> Rep Version x
$cto :: forall x. Rep Version x -> Version
to :: forall x. Rep Version x -> Version
Generic, Version -> ()
(Version -> ()) -> NFData Version
forall a. (a -> ()) -> NFData a
$crnf :: Version -> ()
rnf :: Version -> ()
NFData)

-- | Information about source of this sequence.
--
data Source
  = Source
      { Source -> Text
sourceT  :: Text
        -- ^ free-format (as if all this format is not too much "free format") information
        -- including an abbreviated form of the organism name,
        -- sometimes followed by a molecule type
      , Source -> Maybe Text
organism :: Maybe Text
        -- ^ the formal scientific name for the source organism
      }
  deriving (Source -> Source -> Bool
(Source -> Source -> Bool)
-> (Source -> Source -> Bool) -> Eq Source
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Source -> Source -> Bool
== :: Source -> Source -> Bool
$c/= :: Source -> Source -> Bool
/= :: Source -> Source -> Bool
Eq, Int -> Source -> ShowS
[Source] -> ShowS
Source -> String
(Int -> Source -> ShowS)
-> (Source -> String) -> ([Source] -> ShowS) -> Show Source
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Source -> ShowS
showsPrec :: Int -> Source -> ShowS
$cshow :: Source -> String
show :: Source -> String
$cshowList :: [Source] -> ShowS
showList :: [Source] -> ShowS
Show, (forall x. Source -> Rep Source x)
-> (forall x. Rep Source x -> Source) -> Generic Source
forall x. Rep Source x -> Source
forall x. Source -> Rep Source x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Source -> Rep Source x
from :: forall x. Source -> Rep Source x
$cto :: forall x. Rep Source x -> Source
to :: forall x. Rep Source x -> Source
Generic, Source -> ()
(Source -> ()) -> NFData Source
forall a. (a -> ()) -> NFData a
$crnf :: Source -> ()
rnf :: Source -> ()
NFData)

-- | Publications by the authors of the sequence that discuss the data reported in the record.
--
data Reference
  = Reference
      { Reference -> Text
referenceT :: Text
        -- ^ reference itself
      , Reference -> Maybe Text
authors    :: Maybe Text
        -- ^ list of authors in the order in which they appear in the cited article
      , Reference -> Maybe Text
title      :: Maybe Text
        -- ^ title of the published work
      , Reference -> Maybe Text
journal    :: Maybe Text
        -- ^ MEDLINE abbreviation of the journal name
      , Reference -> Maybe Text
pubmed     :: Maybe Text
        -- ^ PubMed Identifier
      }
  deriving (Reference -> Reference -> Bool
(Reference -> Reference -> Bool)
-> (Reference -> Reference -> Bool) -> Eq Reference
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Reference -> Reference -> Bool
== :: Reference -> Reference -> Bool
$c/= :: Reference -> Reference -> Bool
/= :: Reference -> Reference -> Bool
Eq, Int -> Reference -> ShowS
[Reference] -> ShowS
Reference -> String
(Int -> Reference -> ShowS)
-> (Reference -> String)
-> ([Reference] -> ShowS)
-> Show Reference
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Reference -> ShowS
showsPrec :: Int -> Reference -> ShowS
$cshow :: Reference -> String
show :: Reference -> String
$cshowList :: [Reference] -> ShowS
showList :: [Reference] -> ShowS
Show, (forall x. Reference -> Rep Reference x)
-> (forall x. Rep Reference x -> Reference) -> Generic Reference
forall x. Rep Reference x -> Reference
forall x. Reference -> Rep Reference x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Reference -> Rep Reference x
from :: forall x. Reference -> Rep Reference x
$cto :: forall x. Rep Reference x -> Reference
to :: forall x. Rep Reference x -> Reference
Generic, Reference -> ()
(Reference -> ()) -> NFData Reference
forall a. (a -> ()) -> NFData a
$crnf :: Reference -> ()
rnf :: Reference -> ()
NFData)

--------------------------------------------------------------------------------
-- Block with FEATURES table.
--
-- FEATURES table contains information about genes and gene products, as well as regions of biological
-- significance reported in the sequence. These can include regions of the sequence
-- that code for proteins and RNA molecules, as well as a number of other features.
-- More about FEATURES table: http://www.insdc.org/documents/feature_table.html
--------------------------------------------------------------------------------

-- | One single feature.
--
data Feature
  = Feature
      { Feature -> Text
fName  :: Text
        -- ^ main information about feature
      , Feature -> [(Text, Text)]
fProps :: [(Text, Text)]
        -- ^ properties of feature (such as "label", "gene", "note" etc.)
      }
  deriving (Feature -> Feature -> Bool
(Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool) -> Eq Feature
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Feature -> Feature -> Bool
== :: Feature -> Feature -> Bool
$c/= :: Feature -> Feature -> Bool
/= :: Feature -> Feature -> Bool
Eq, Int -> Feature -> ShowS
[Feature] -> ShowS
Feature -> String
(Int -> Feature -> ShowS)
-> (Feature -> String) -> ([Feature] -> ShowS) -> Show Feature
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Feature -> ShowS
showsPrec :: Int -> Feature -> ShowS
$cshow :: Feature -> String
show :: Feature -> String
$cshowList :: [Feature] -> ShowS
showList :: [Feature] -> ShowS
Show, Eq Feature
Eq Feature =>
(Feature -> Feature -> Ordering)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Bool)
-> (Feature -> Feature -> Feature)
-> (Feature -> Feature -> Feature)
-> Ord Feature
Feature -> Feature -> Bool
Feature -> Feature -> Ordering
Feature -> Feature -> Feature
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Feature -> Feature -> Ordering
compare :: Feature -> Feature -> Ordering
$c< :: Feature -> Feature -> Bool
< :: Feature -> Feature -> Bool
$c<= :: Feature -> Feature -> Bool
<= :: Feature -> Feature -> Bool
$c> :: Feature -> Feature -> Bool
> :: Feature -> Feature -> Bool
$c>= :: Feature -> Feature -> Bool
>= :: Feature -> Feature -> Bool
$cmax :: Feature -> Feature -> Feature
max :: Feature -> Feature -> Feature
$cmin :: Feature -> Feature -> Feature
min :: Feature -> Feature -> Feature
Ord, (forall x. Feature -> Rep Feature x)
-> (forall x. Rep Feature x -> Feature) -> Generic Feature
forall x. Rep Feature x -> Feature
forall x. Feature -> Rep Feature x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Feature -> Rep Feature x
from :: forall x. Feature -> Rep Feature x
$cto :: forall x. Rep Feature x -> Feature
to :: forall x. Rep Feature x -> Feature
Generic, Feature -> ()
(Feature -> ()) -> NFData Feature
forall a. (a -> ()) -> NFData a
$crnf :: Feature -> ()
rnf :: Feature -> ()
NFData)

instance IsMarking Feature