{- | Module : Radium.FormulaParser Copyright : Copyright (C) 2014 Krzysztof Langner License : BSD3 Maintainer : Krzysztof Langner Stability : alpha Portability : portable Formula parser. . Formula can be entered as H2O, 2H2O, SO4+2 (Sulfate) or (CH3)2CO (Acetone) -} module Radium.FormulaParser ( Molecule(..) , parseFormula ) where import Text.ParserCombinators.Parsec data Molecule = Ion Molecule Int | Molecule [Molecule] Int | Element String Int deriving (Eq, Show) -- | Parse formula -- -- > parseFormula "C2H4" `shouldBe` Molecule [(element 6, 2), (element 1, 4)] parseFormula :: String -> Molecule parseFormula xs = case parse ion "" xs of Left _ -> Molecule [] 0 Right val -> val -- Parse ion ion :: Parser Molecule ion = do c <- number m <- formula n <- optionMaybe ionNumber return $ case n of Just val -> Ion (Molecule m c) val _ -> Molecule m c -- Parse formula formula :: Parser [Molecule] formula = many (subformula <|> element) -- Parse subformula in brackets '(' ')' subformula :: Parser Molecule subformula = do s <- between (char '(') (char ')') formula n <- number return (Molecule s n) -- Parse element -- Element consists of name and number element :: Parser Molecule element = do s <- symbol n <- number return (Element s n) -- Parse element symbol -- Starts with upper case -- has 0, 1 or 2 lower letters symbol :: Parser String symbol = do s <- upper ss <- many lower return (s:ss) -- Parse number of elements. If number not found then return 1 number :: Parser Int number = do ds <- many digit return $ if null ds then 1 else read ds :: Int -- Parse ion number. Ion number starts with '+' or '-' ionNumber :: Parser Int ionNumber = do s <- char '-' <|> char '+' n <- number return $ if s == '+' then n else (-n)