module Biobase.Types.Accession where

import Data.Aeson
import Data.Binary
import Data.Hashable (Hashable)
import Data.Ix (Ix)
import Data.Serialize
import Data.Serialize.Text
import Data.String
import Data.Stringable
import Data.Text.Binary
import Data.Text (Text)
import GHC.Generics (Generic)



-- * 'Accession' with phantom types.
--
-- <http://www.ncbi.nlm.nih.gov/Sequin/acc.html>
--
-- <http://www.uniprot.org/help/accession_numbers>
--
-- <http://en.wikipedia.org/wiki/Accession_number_%28bioinformatics%29>

-- | The accession number is a unique identifier in bioinformatics.
--
-- Depending on the source, accession numbers follow different alphanumeric
-- formats! While letters-than-numbers is quite common, swissprot uses
-- a mix. Hence, we just use a text string as accession.
--
-- A phantom type is provided to enable type safety annotations. Helper
-- functions provide smart construction from the @Accession@ tagged generic
-- type.

newtype Accession t = Accession { _getAccession :: Text }
  deriving (Eq,Ord,Read,Show,Generic)

accession :: Stringable s => s -> Accession t
accession = Accession . toText
{-# Inline accession #-}

tagAccession :: Accession f -> Accession t
tagAccession = Accession . _getAccession
{-# Inline tagAccession #-}

instance IsString (Accession t) where
  fromString = accession
  {-# Inline fromString #-}

instance Binary    (Accession t)
instance FromJSON  (Accession t)
instance Hashable  (Accession t)
instance Serialize (Accession t)
instance ToJSON    (Accession t)

-- ** NCBI phantom types

-- | nucleotide sequence

data Nucleotide

-- | protein sequence

data Protein

-- ** Rfam phantom types
--
-- The format is RFxxxxx, PFxxxxx, or CLxxxxx.

-- | Tag as being a clan.

data Clan

-- | Tag as being a Pfam model.

data Pfam

-- | Tag as being an Rfam model. Used for Stockholm and CM files.

data Rfam



-- | Species have an accession number, too.

data Species