{-# LANGUAGE OverloadedStrings #-}
-- | Basic properties of chemical 'Element's as suggested by Cambridge Structural Database.
--
-- NOTE: This module contains functions that could use hash tables for the lookup instead.
-- See https://ghc.haskell.org/trac/ghc/ticket/10565
module Bio.PDB.Structure.Elements(Element(..),
                                  -- Finding element for a given atom
                                  assignElement,
                                  -- Guessing element name from atom name (for standard residues only.)
                                  guessElement,
                                  -- properties of elements
                                  atomicNumber, atomicMass,
                                  covalentRadius,    maxCovalentRadius,
                                  vanDerWaalsRadius, maxVanDerWaalsRadius) where

import Prelude hiding (error, String)
import qualified Prelude     as S (String)
import Data.ByteString.Char8 as BS
import System.IO.Unsafe(unsafePerformIO)
import System.IO(stderr)
import Bio.PDB.Structure(Atom(..))
import Data.HashMap.Strict as M
import Data.Maybe

-- | Type alias for 'Element' names.
type Element = BS.ByteString
-- TODO: May be better as a newtype, and make sure that other modules use this declaration

-- | Internal method that reports error to stderr, and return given default value.
defaultingLookup :: BS.ByteString -> v -> HashMap S.String v -> Element -> v
defaultingLookup msg defaultValue dict e =
      fromMaybe withWarning
    $ M.lookup (BS.unpack e) dict
  where
    withWarning = unsafePerformIO $ do BS.hPutStrLn stderr $ BS.concat [msg, BS.pack . show . BS.unpack $ e]
                                       return defaultValue

-- | Atomic number of a given element
atomicNumber :: Element -> Int
atomicNumber = defaultingLookup "Unknown atomic number for element:"
                                0 dict 
  where
    dict = M.fromList [
              ( "C",   6)
             ,( "N",   7)
             ,( "O",   8)
             ,( "P",  15)
             ,( "S",  16)
             ,( "H",   1)
             ,("AC",  89)
             ,("AG",  47)
             ,("AL",  13)
             ,("AM",  95)
             ,("AR",  18)
             ,("AS",  33)
             ,("AT",  85)
             ,("AU",  79)
             ,( "B",   5)
             ,("BA",  56)
             ,("BE",   4)
             ,("BH", 107)
             ,("BI",  83)
             ,("BK",  97)
             ,("BR",  35)
             ,("CA",  20)
             ,("CD",  48)
             ,("CE",  58)
             ,("CF",  98)
             ,("CL",  17)
             ,("CM",  96)
             ,("CO",  27)
             ,("CR",  24)
             ,("CS",  55)
             ,("CU",  29)
             ,("DB", 105)
             ,("DS", 110)
             ,("DY",  66)
             ,("ER",  68)
             ,("ES",  99)
             ,("EU",  63)
             ,( "F",   9)
             ,("FE",  26)
             ,("FM", 100)
             ,("FR",  87)
             ,("GA",  31)
             ,("GD",  64)
             ,("GE",  32)
             ,("HE",   2)
             ,("HF",  72)
             ,("HG",  80)
             ,("HO",  67)
             ,("HS", 108)
             ,( "I",  53)
             ,("IN",  49)
             ,("IR",  77)
             ,( "K",  19)
             ,("KR",  36)
             ,("LA",  57)
             ,("LI",   3)
             ,("LR", 103)
             ,("LU",  71)
             ,("MD", 101)
             ,("MG",  12)
             ,("MN",  25)
             ,("MO",  42)
             ,("MT", 109)
             ,("NA",  11)
             ,("NB",  41)
             ,("ND",  60)
             ,("NE",  10)
             ,("NI",  28)
             ,("NO", 102)
             ,("NP",  93)
             ,("OS",  76)
             ,("PA",  91)
             ,("PB",  82)
             ,("PD",  46)
             ,("PM",  61)
             ,("PO",  84)
             ,("PR",  59)
             ,("PT",  78)
             ,("PU",  94)
             ,("RA",  88)
             ,("RB",  37)
             ,("RE",  75)
             ,("RF", 104)
             ,("RH",  45)
             ,("RN",  86)
             ,("RU",  44)
             ,("SB",  51)
             ,("SC",  21)
             ,("SE",  34)
             ,("SG", 106)
             ,("SI",  14)
             ,("SM",  62)
             ,("SN",  50)
             ,("SR",  38)
             ,("TA",  73)
             ,("TB",  65)
             ,("TC",  43)
             ,("TE",  52)
             ,("TH",  90)
             ,("TI",  22)
             ,("TL",  81)
             ,("TM",  69)
             ,( "U",  92)
             ,( "V",  23)
             ,( "W",  74)
             ,("XE",  54)
             ,( "Y",  39)
             ,("YB",  70)
             ,("ZN",  30)
             ,("ZR",  40)
             ]

-- | Covalent radius of an element with a given name.
covalentRadius = defaultingLookup "Unknown covalent radius for element:"
                                  0.0 dict 
  where
    dict = M.fromList [
               ("AC", 2.15)
             , ("AG", 1.45)
             , ("AL", 1.21)
             , ("AM", 1.80)
             , ("AR", 1.51)
             , ("AS", 1.21)
             , ("AT", 1.21)
             , ("AU", 1.36)
             , ( "B", 0.83)
             , ("BA", 2.15)
             , ("BE", 0.96)
             , ("BH", 1.50)
             , ("BI", 1.48)
             , ("BK", 1.54)
             , ("BR", 1.21)
             , ( "C", 0.68)
             , ("CA", 1.76)
             , ("CD", 1.44)
             , ("CE", 2.04)
             , ("CF", 1.83)
             , ("CL", 0.99)
             , ("CM", 1.69)
             , ("CO", 1.26)
             , ("CR", 1.39)
             , ("CS", 2.44)
             , ("CU", 1.32)
             , ("DB", 1.50)
             , ("DS", 1.50)
             , ("DY", 1.92)
             , ("ER", 1.89)
             , ("ES", 1.50)
             , ("EU", 1.98)
             , ( "F", 0.64)
             , ("FE", 1.52)
             , ("FM", 1.50)
             , ("FR", 2.60)
             , ("GA", 1.22)
             , ("GD", 1.96)
             , ("GE", 1.17)
             , ( "H", 0.23)
             , ("HE", 1.50)
             , ("HF", 1.75)
             , ("HG", 1.32)
             , ("HO", 1.92)
             , ("HS", 1.50)
             , ( "I", 1.40)
             , ("IN", 1.42)
             , ("IR", 1.41)
             , ( "K", 2.03)
             , ("KR", 1.50)
             , ("LA", 2.07)
             , ("LI", 1.28)
             , ("LR", 1.50)
             , ("LU", 1.87)
             , ("MD", 1.50)
             , ("MG", 1.41)
             , ("MN", 1.61)
             , ("MO", 1.54)
             , ("MT", 1.50)
             , ( "N", 0.68)
             , ("NA", 1.66)
             , ("NB", 1.64)
             , ("ND", 2.01)
             , ("NE", 1.50)
             , ("NI", 1.24)
             , ("NO", 1.50)
             , ("NP", 1.90)
             , ( "O", 0.68)
             , ("OS", 1.44)
             , ( "P", 1.05)
             , ("PA", 2.00)
             , ("PB", 1.46)
             , ("PD", 1.39)
             , ("PM", 1.99)
             , ("PO", 1.40)
             , ("PR", 2.03)
             , ("PT", 1.36)
             , ("PU", 1.87)
             , ("RA", 2.21)
             , ("RB", 2.20)
             , ("RE", 1.51)
             , ("RF", 1.50)
             , ("RH", 1.42)
             , ("RN", 1.50)
             , ("RU", 1.46)
             , ( "S", 1.02)
             , ("SB", 1.39)
             , ("SC", 1.70)
             , ("SE", 1.22)
             , ("SG", 1.50)
             , ("SI", 1.20)
             , ("SM", 1.98)
             , ("SN", 1.39)
             , ("SR", 1.95)
             , ("TA", 1.70)
             , ("TB", 1.94)
             , ("TC", 1.47)
             , ("TE", 1.47)
             , ("TH", 2.06)
             , ("TI", 1.60)
             , ("TL", 1.45)
             , ("TM", 1.90)
             , ( "U", 1.96)
             , ( "V", 1.53)
             , ( "W", 1.62)
             , ("XE", 1.50)
             , ( "Y", 1.90)
             , ("YB", 1.87)
             , ("ZN", 1.22)
             , ("ZR", 1.75)
             ]

{-# INLINE maxCovalentRadius #-}
-- | Upper bound of @covalentRadius@.
maxCovalentRadius :: Double
maxCovalentRadius  = covalentRadius "FR"

-- | Atomic mass of a given element in g/mol
atomicMass :: Element -> Double
atomicMass = defaultingLookup "Unknown atomic mass for element:"
                              0.0 dict 
  where
    dict = M.fromList [
              ( "C",  12.011)
             ,( "N",  14.007)
             ,( "O",  15.999)
             ,( "P",  30.974)
             ,( "S",  32.066)
             ,( "H",   1.008)
             ,("AC", 227.000)
             ,("AG", 107.868)
             ,("AL",  26.982)
             ,("AM", 243.000)
             ,("AR",  39.948)
             ,("AS",  74.922)
             ,("AT", 210.000)
             ,("AU", 196.967)
             ,( "B",  10.811)
             ,("BA", 137.327)
             ,("BE",   9.012)
             ,("BH", 264.000)
             ,("BI", 208.980)
             ,("BK", 247.000)
             ,("BR",  79.904)
             ,("CA",  40.078)
             ,("CD", 112.411)
             ,("CE", 140.116)
             ,("CF", 251.000)
             ,("CL",  35.453)
             ,("CM", 247.000)
             ,("CO",  58.933)
             ,("CR",  51.996)
             ,("CS", 132.905)
             ,("CU",  63.546)
             ,("DB", 262.000)
             ,("DS", 271.000)
             ,("DY", 162.500)
             ,("ER", 167.260)
             ,("ES", 252.000)
             ,("EU", 151.964)
             ,( "F",  18.998)
             ,("FE",  55.845)
             ,("FM", 257.000)
             ,("FR", 223.000)
             ,("GA",  69.723)
             ,("GD", 157.250)
             ,("GE",  72.610)
             ,("HE",   4.003)
             ,("HF", 178.490)
             ,("HG", 200.590)
             ,("HO", 164.930)
             ,("HS", 269.000)
             ,( "I", 126.904)
             ,("IN", 114.818)
             ,("IR", 192.217)
             ,( "K",  39.098)
             ,("KR",  83.800)
             ,("LA", 138.906)
             ,("LI",   6.941)
             ,("LR", 262.000)
             ,("LU", 174.967)
             ,("MD", 258.000)
             ,("MG",  24.305)
             ,("MN",  54.938)
             ,("MO",  95.940)
             ,("MT", 268.000)
             ,("NA",  22.991)
             ,("NB",  92.906)
             ,("ND", 144.240)
             ,("NE",  20.180)
             ,("NI",  58.693)
             ,("NO", 259.000)
             ,("NP", 237.000)
             ,("OS", 190.230)
             ,("PA", 231.036)
             ,("PB", 207.200)
             ,("PD", 106.420)
             ,("PM", 145.000)
             ,("PO", 210.000)
             ,("PR", 140.908)
             ,("PT", 195.078)
             ,("PU", 244.000)
             ,("RA", 226.000)
             ,("RB",  85.468)
             ,("RE", 186.207)
             ,("RF", 261.000)
             ,("RH", 102.906)
             ,("RN", 222.000)
             ,("RU", 101.070)
             ,("SB", 121.760)
             ,("SC",  44.956)
             ,("SE",  78.960)
             ,("SG", 266.000)
             ,("SI",  28.086)
             ,("SM", 150.360)
             ,("SN", 118.710)
             ,("SR",  87.620)
             ,("TA", 180.948)
             ,("TB", 158.925)
             ,("TC",  98.000)
             ,("TE", 127.600)
             ,("TH", 232.038)
             ,("TI",  47.867)
             ,("TL", 204.383)
             ,("TM", 168.934)
             ,( "U", 238.029)
             ,( "V",  50.942)
             ,( "W", 183.840)
             ,("XE", 131.290)
             ,( "Y",  88.906)
             ,("YB", 173.040)
             ,("ZN",  65.390)
             ,("ZR",  91.224)
             ]

-- | Van der Waals radius of the given element
vanDerWaalsRadius :: Element -> Double
vanDerWaalsRadius = defaultingLookup "Do not know van der Waals radius of "
                                     0.0 dict 
  where
    dict = M.fromList [
             ( "C", 1.70)
            ,( "N", 1.55)
            ,( "O", 1.52)
            ,( "P", 1.80)
            ,( "S", 1.80)
            ,("AC", 2.00)
            ,("AG", 1.72)
            ,("AL", 2.00)
            ,("AM", 2.00)
            ,("AR", 1.88)
            ,("AS", 1.85)
            ,("AT", 2.00)
            ,("AU", 1.66)
            , ("B", 2.00)
            ,("BA", 2.00)
            ,("BE", 2.00)
            ,("BH", 2.00)
            ,("BI", 2.00)
            ,("BK", 2.00)
            ,("BR", 1.85)
            ,("CA", 2.00)
            ,("CD", 1.58)
            ,("CE", 2.00)
            ,("CF", 2.00)
            ,("CL", 1.75)
            ,("CM", 2.00)
            ,("CO", 2.00)
            ,("CR", 2.00)
            ,("CS", 2.00)
            ,("CU", 1.40)
            ,("DB", 2.00)
            ,("DS", 2.00)
            ,("DY", 2.00)
            ,("ER", 2.00)
            ,("ES", 2.00)
            ,("EU", 2.00)
            ,( "F", 1.47)
            ,("FE", 2.00)
            ,("FM", 2.00)
            ,("FR", 2.00)
            ,("GA", 1.87)
            ,("GD", 2.00)
            ,("GE", 2.00)
            ,( "H", 1.09)
            ,("HE", 1.40)
            ,("HF", 2.00)
            ,("HG", 1.55)
            ,("HO", 2.00)
            ,("HS", 2.00)
            ,( "I", 1.98)
            ,("IN", 1.93)
            ,("IR", 2.00)
            ,( "K", 2.75)
            ,("KR", 2.02)
            ,("LA", 2.00)
            ,("LI", 1.82)
            ,("LR", 2.00)
            ,("LU", 2.00)
            ,("MD", 2.00)
            ,("MG", 1.73)
            ,("MN", 2.00)
            ,("MO", 2.00)
            ,("MT", 2.00)
            ,("NA", 2.27)
            ,("NB", 2.00)
            ,("ND", 2.00)
            ,("NE", 1.54)
            ,("NI", 1.63)
            ,("NO", 2.00)
            ,("NP", 2.00)
            ,("OS", 2.00)
            ,("PA", 2.00)
            ,("PB", 2.02)
            ,("PD", 1.63)
            ,("PM", 2.00)
            ,("PO", 2.00)
            ,("PR", 2.00)
            ,("PT", 1.72)
            ,("PU", 2.00)
            ,("RA", 2.00)
            ,("RB", 2.00)
            ,("RE", 2.00)
            ,("RF", 2.00)
            ,("RH", 2.00)
            ,("RN", 2.00)
            ,("RU", 2.00)
            ,("SB", 2.00)
            ,("SC", 2.00)
            ,("SE", 1.90)
            ,("SG", 2.00)
            ,("SI", 2.10)
            ,("SM", 2.00)
            ,("SN", 2.17)
            ,("SR", 2.00)
            ,("TA", 2.00)
            ,("TB", 2.00)
            ,("TC", 2.00)
            ,("TE", 2.06)
            ,("TH", 2.00)
            ,("TI", 2.00)
            ,("TL", 1.96)
            ,("TM", 2.00)
            ,( "U", 1.86)
            ,( "V", 2.00)
            ,( "W", 2.00)
            ,("XE", 2.16)
            ,( "Y", 2.00)
            ,("YB", 2.00)
            ,("ZN", 1.39)
            ,("ZR", 2.00)
            ]

{-# INLINE maxVanDerWaalsRadius #-}
-- | Upper bound of @vanDerWaalsRadius@.
maxVanDerWaalsRadius :: Double
maxVanDerWaalsRadius  = vanDerWaalsRadius "K"

-- | Given a PDB 'Atom' extract or guess its 'Element' name.
assignElement :: Atom -> Element
assignElement at = case element at of
                     ""   -> guessElement . atName $ at
                     code -> code

-- | Guessing an 'Element' name from PDB 'Atom' name. 
-- Returns empty string, if 'Element' can't be guessed.
guessElement :: BS.ByteString -> BS.ByteString
guessElement = \e -> fromMaybe "" $ M.lookup (BS.unpack e) els
  where
    els = M.fromList
        [ ("C"   , "C"),
          ("C1'" , "C"),
          ("C2"  , "C"),
          ("C2'" , "C"),
          ("C3'" , "C"),
          ("C4"  , "C"),
          ("C4'" , "C"),
          ("C5"  , "C"),
          ("C5'" , "C"),
          ("C6"  , "C"),
          ("C8"  , "C"),
          ("CA"  , "C"),
          ("CB"  , "C"),
          ("CD"  , "C"),
          ("CD1" , "C"),
          ("CD2" , "C"),
          ("CE"  , "C"),
          ("CE1" , "C"),
          ("CE2" , "C"),
          ("CE3" , "C"),
          ("CG"  , "C"),
          ("CG1" , "C"),
          ("CG2" , "C"),
          ("CH2" , "C"),
          ("CZ"  , "C"),
          ("CZ2" , "C"),
          ("CZ3" , "C"),
          ("N"   , "N"),
          ("N1"  , "N"),
          ("N2"  , "N"),
          ("N3"  , "N"),
          ("N4"  , "N"),
          ("N6"  , "N"),
          ("N7"  , "N"),
          ("N9"  , "N"),
          ("ND1" , "N"),
          ("ND2" , "N"),
          ("NE"  , "N"),
          ("NE1" , "N"),
          ("NE2" , "N"),
          ("NH1" , "N"),
          ("NH2" , "N"),
          ("NZ"  , "N"),
          ("O"   , "O"),
          ("O2"  , "O"),
          ("O2'" , "O"),
          ("O3'" , "O"),
          ("O4"  , "O"),
          ("O4'" , "O"),
          ("O5'" , "O"),
          ("O6"  , "O"),
          ("OD1" , "O"),
          ("OD2" , "O"),
          ("OE1" , "O"),
          ("OE2" , "O"),
          ("OG"  , "O"),
          ("OG1" , "O"),
          ("OH"  , "O"),
          ("OP1" , "O"),
          ("OP2" , "O"),
          ("OXT" , "O"),
          ("P"   , "P"),
          ("SD"  , "S"),
          ("SG"  , "S")]