{-| Encodes a table of amino acid properties. Based on Livingstone & Barton, CABIOS, 9, 745-756, 1993, as seen at: http://www.compbio.dundee.ac.uk/user/ws-dev1/jalview/latest/help/html/misc/aaproperties.html NB: based on the graphic, not the table (in which P is polar, but T is not) |-} module Bio.Sequence.AminoProperties where import Prelude hiding (or) import Bio.Sequence.SeqData type AAProp = Amino -> Bool oneOf = flip elem or f g = \x -> f x || g x aliphatic, aromatic, hydrophobic, polar, small, tiny, charged, negative, positive :: AAProp -- NB: Cysteine has two variants: C s-s (small, non-polar) and C s-h (tiny, polar) -- http://www.russell.embl-heidelberg.de/aas/aas.html aliphatic = oneOf [Ile,Leu,Val] aromatic = oneOf [Phe,Tyr,Trp,His] hydrophobic = aromatic `or` aliphatic `or` oneOf [Cys,Ala,Gly,Thr,Met,Lys] polar = charged `or` oneOf [Asn,Ser,Thr,Tyr,Trp,Gln] -- and C s-h small = tiny `or` oneOf [Pro,Cys,Val,Thr,Asp,Asn] -- +Asx? tiny = oneOf [Ala,Gly,Ser] -- C s-h charged = negative `or` positive negative = oneOf [Asp,Glu] positive = oneOf [Lys,His,Arg] -- Based on Kyte and Doolittle, according to Wikipedia hydropathy :: Amino -> Double hydropathy x = case x of Ala -> 1.8 Arg -> -4.5 Asn -> -3.5 Asp -> -3.5 Cys -> 2.5 Gln -> -3.5 Glu -> -3.5 Gly -> -0.4 His -> -3.2 Ile -> 4.5 Leu -> 3.8 Lys -> -3.9 Met -> 1.9 Phe -> 2.8 Pro -> -1.6 Ser -> -0.8 Thr -> -0.7 Trp -> -0.9 Tyr -> -1.3 Val -> 4.2 _ -> 0 mass :: Amino -> Double mass x = case x of -- tiny Gly -> 57.021464 Ala -> 71.037114 Ser -> 87.032029 -- small Pro -> 97.052764 Val -> 99.068414 Thr -> 101.04768 Cys -> 103.00919 Asn -> 114.04293 Asp -> 115.02694 Leu -> 113.08406 Ile -> 113.08406 Gln -> 128.05858 Lys -> 128.09496 Glu -> 129.04259 Met -> 131.04048 His -> 137.05891 Phe -> 147.06841 Arg -> 156.10111 Tyr -> 163.06333 Trp -> 186.07931 _ -> 0 -- | The propensities for forming secondary structures -- From Zvelebil and Baum: Understanding Bioinformatics, Chapter 11 -- citing Chou and Fasman. -- Today, more complex methods like GOR are recommended instead. helixP, strandP :: Amino -> Double helixP x = case x of Ala -> 1.42 Cys -> 0.70 Asp -> 1.01 Glu -> 1.51 Phe -> 1.13 Gly -> 0.61 His -> 1.00 Ile -> 1.08 Lys -> 1.16 Leu -> 1.21 Met -> 1.45 Asn -> 0.67 Pro -> 0.57 Gln -> 1.11 Arg -> 0.98 Ser -> 0.77 Thr -> 0.83 Val -> 1.06 Trp -> 1.08 Tyr -> 0.69 _ -> 0 -- sensible? strandP x = case x of Ala -> 0.83 Cys -> 1.19 Asp -> 0.54 Glu -> 0.37 Phe -> 1.38 Gly -> 0.75 His -> 0.87 Ile -> 1.60 Lys -> 0.74 Leu -> 1.30 Met -> 1.05 Asn -> 0.89 Pro -> 0.55 Gln -> 1.10 Arg -> 0.93 Ser -> 0.75 Thr -> 1.19 Val -> 1.70 Trp -> 1.37 Tyr -> 1.40 _ -> 0