module Radium.Formats.Smiles ( Smiles(..)
, readSmiles
, writeSmiles ) where
import Text.ParserCombinators.Parsec
import qualified Data.Set as Set
import Data.Maybe
data Smiles = SmilesRing Atom
Int
Smiles
| Smiles Atom
deriving( Eq, Show )
data Atom = Atom String
Int
Int
Int
Int
| Aliphatic String
| Aromatic String
| Unknown
deriving( Eq, Show )
aliphatics :: Set.Set String
aliphatics = Set.fromList ["B", "C", "N", "O", "S", "P", "F", "Cl", "Br", "I" ]
aromatics :: Set.Set Char
aromatics = Set.fromList "bcnosp"
readSmiles :: String -> Either String Smiles
readSmiles xs = case parse smiles "" xs of
Left err -> Left (show err)
Right val -> Right val
smiles :: Parser Smiles
smiles = do
a <- atom
n <- optionMaybe bound
xs <- optionMaybe smiles
return $ if isJust xs then SmilesRing a (fromMaybe 1 n) (fromJust xs) else Smiles a
bound :: Parser Int
bound = do
s <- char '-' <|> char '=' <|> char '#' <|> char '$'
return $ case s of
'=' -> 2
'#' -> 3
'$' -> 4
_ -> 1
atom :: Parser Atom
atom = bracketAtom <|> aliphaticOrganic <|> aromaticOrganic <|> unknown
bracketAtom :: Parser Atom
bracketAtom = do
_ <- char '['
i <- optionMaybe number
s <- symbolOrUnknown
hc <- optionMaybe hcount
n <- optionMaybe charge
ac <- optionMaybe atomClass
_ <- char ']'
return $ Atom s (fromMaybe 0 i) (fromMaybe 0 hc) (fromMaybe 0 n) (fromMaybe 0 ac)
symbolOrUnknown :: Parser String
symbolOrUnknown = symbol <|> string "*"
hcount :: Parser Int
hcount = do
_ <- char 'H'
hc <- optionMaybe number
let n = fromMaybe 1 hc
return $ if n == 0 then 1 else n
charge :: Parser Int
charge = do
s <- char '-' <|> char '+'
n <- number
let m = if n == 0 then 1 else n
return $ if s == '-' then (m) else m
atomClass :: Parser Int
atomClass = do
_ <- char ':'
number
number :: Parser Int
number = do
ds <- many digit
return $ if null ds then 0 else read ds :: Int
aliphaticOrganic :: Parser Atom
aliphaticOrganic = do
ss <- symbol
if Set.member ss aliphatics then return (Aliphatic ss) else fail ""
aromaticOrganic :: Parser Atom
aromaticOrganic = do
ss <- lower
if Set.member ss aromatics then return (Aromatic [ss]) else fail ""
unknown :: Parser Atom
unknown = do
_ <- char '*'
return Unknown
symbol :: Parser String
symbol = do
s <- upper
ss <- many lower
return (s:ss)
writeSmiles :: Smiles -> String
writeSmiles (SmilesRing a n xs) = writeAtom a ++ writeBounds n ++ writeSmiles xs
writeSmiles (Smiles a) = writeAtom a
writeAtom :: Atom -> String
writeAtom (Atom xs ic hc n ac) = "[" ++ showIsotopes ++ xs ++ showHyrdogen ++ showCharge ++ showClass ++ "]"
where showIsotopes = if ic > 0 then show ic else ""
showHyrdogen | hc > 1 = "H" ++ show hc
| hc == 1 = "H"
| otherwise = ""
showCharge | n < (1) = show n
| n == (1) = "-"
| n == 1 = "+"
| n > 1 = "+" ++ show n
| otherwise = ""
showClass = if ac > 0 then ":" ++ show ac else ""
writeAtom (Aliphatic xs) = xs
writeAtom (Aromatic xs) = xs
writeAtom Unknown = "*"
writeBounds :: Int -> String
writeBounds 2 = "="
writeBounds 3 = "#"
writeBounds 4 = "$"
writeBounds _ = ""