{-| Encodes a table of amino acid properties. 
    Based on Livingstone & Barton, CABIOS, 9, 745-756, 1993, as seen at:
    http://www.compbio.dundee.ac.uk/user/ws-dev1/jalview/latest/help/html/misc/aaproperties.html
   NB: based on the graphic, not the table (in which P is polar, but T is not)
|-}

module Bio.Sequence.AminoProperties where

import Prelude hiding (or)
import Bio.Sequence.SeqData

type AAProp = Amino -> Bool

oneOf = flip elem
or f g = \x -> f x || g x
  
aliphatic, aromatic, hydrophobic, polar, small, tiny, charged, negative, positive :: AAProp

-- NB: Cysteine has two variants: C s-s (small, non-polar) and C s-h (tiny, polar)
-- http://www.russell.embl-heidelberg.de/aas/aas.html

aliphatic   = oneOf [Ile,Leu,Val]
aromatic    = oneOf [Phe,Tyr,Trp,His]
hydrophobic = aromatic `or` aliphatic `or` oneOf [Cys,Ala,Gly,Thr,Met,Lys]
polar       = charged `or` oneOf [Asn,Ser,Thr,Tyr,Trp,Gln] -- and C s-h
small       = tiny `or` oneOf [Pro,Cys,Val,Thr,Asp,Asn] -- +Asx?
tiny        = oneOf [Ala,Gly,Ser] -- C s-h
charged     = negative `or` positive
negative    = oneOf [Asp,Glu]
positive    = oneOf [Lys,His,Arg]

-- Based on Kyte and Doolittle, according to Wikipedia
hydropathy :: Amino -> Double
hydropathy x = case x of
    Ala ->  1.8 
    Arg -> -4.5 
    Asn -> -3.5 
    Asp -> -3.5 
    Cys ->  2.5 
    Gln -> -3.5 
    Glu -> -3.5 
    Gly -> -0.4 
    His -> -3.2 
    Ile ->  4.5 
    Leu ->  3.8 
    Lys -> -3.9 
    Met ->  1.9 
    Phe ->  2.8 
    Pro -> -1.6 
    Ser -> -0.8 
    Thr -> -0.7 
    Trp -> -0.9 
    Tyr -> -1.3 
    Val ->  4.2
    _   ->  0
    
mass :: Amino -> Double
mass x = case x of 
  -- tiny
  Gly ->  57.021464
  Ala ->  71.037114 
  Ser ->  87.032029 
  -- small
  Pro ->  97.052764 
  Val ->  99.068414
  Thr -> 101.04768 
  Cys -> 103.00919 
  Asn -> 114.04293 
  Asp -> 115.02694
  
  Leu -> 113.08406 
  Ile -> 113.08406 
  Gln -> 128.05858
  Lys -> 128.09496
  Glu -> 129.04259
  Met -> 131.04048
  His -> 137.05891
  Phe -> 147.06841
  Arg -> 156.10111
  Tyr -> 163.06333
  Trp -> 186.07931
  _   -> 0

-- | The propensities for forming secondary structures
--  From Zvelebil and Baum: Understanding Bioinformatics, Chapter 11
--  citing Chou and Fasman.
--  Today, more complex methods like GOR are recommended instead.
helixP, strandP :: Amino -> Double
helixP x = case x of
  Ala -> 1.42
  Cys -> 0.70
  Asp -> 1.01
  Glu -> 1.51
  Phe -> 1.13
  Gly -> 0.61
  His -> 1.00
  Ile -> 1.08
  Lys -> 1.16
  Leu -> 1.21
  Met -> 1.45
  Asn -> 0.67
  Pro -> 0.57
  Gln -> 1.11
  Arg -> 0.98
  Ser -> 0.77
  Thr -> 0.83
  Val -> 1.06
  Trp -> 1.08
  Tyr -> 0.69
  _   -> 0    -- sensible?
  
strandP x = case x of  
  Ala -> 0.83
  Cys -> 1.19
  Asp -> 0.54
  Glu -> 0.37
  Phe -> 1.38
  Gly -> 0.75
  His -> 0.87
  Ile -> 1.60
  Lys -> 0.74
  Leu -> 1.30
  Met -> 1.05
  Asn -> 0.89
  Pro -> 0.55
  Gln -> 1.10
  Arg -> 0.93
  Ser -> 0.75
  Thr -> 1.19
  Val -> 1.70
  Trp -> 1.37
  Tyr -> 1.40
  _   -> 0