{- |
Module : Radium.FormulaParser
Copyright : Copyright (C) 2014 Krzysztof Langner
License : BSD3

Maintainer : Krzysztof Langner <klangner@gmail.com>
Stability : alpha
Portability : portable

Formula parser. 
.
Formula can be entered as H2O, 2H2O, SO4+2 (Sulfate) or (CH3)2CO (Acetone)
-}

module Radium.FormulaParser ( Molecule(..)
                            , parseFormula
                            ) where

import Text.ParserCombinators.Parsec

data Molecule = Ion Molecule Int
              | Molecule [Molecule] Int    
              | Element String Int
              deriving (Eq, Show)

-- | Parse formula 
--
-- > parseFormula "C2H4" `shouldBe` Molecule [(element 6, 2), (element 1, 4)]  
parseFormula :: String -> Molecule
parseFormula xs = case parse ion "" xs of
    Left _ -> Molecule [] 0
    Right val -> val

-- Parse ion
ion :: Parser Molecule
ion = do 
    c <- number
    m <- formula
    n <- optionMaybe ionNumber
    return $ case n of
        Just val -> Ion (Molecule m c) val
        _ -> Molecule m c

-- Parse formula
formula :: Parser [Molecule]
formula = many (subformula <|> element)

-- Parse subformula in brackets '(' ')'
subformula :: Parser Molecule
subformula = do
    s <- between (char '(') (char ')') formula
    n <- number
    return (Molecule s n) 
    
-- Parse element
-- Element consists of name and number
element :: Parser Molecule
element = do
    s <- symbol
    n <- number
    return (Element s n) 
       
-- Parse element symbol
-- Starts with upper case
-- has 0, 1 or 2 lower letters
symbol :: Parser String
symbol = do 
    s <- upper
    ss <- many lower
    return (s:ss)
    
       
-- Parse number of elements. If number not found then return 1
number :: Parser Int
number =  do
    ds <- many digit
    return $ if null ds then 1 else read ds :: Int

-- Parse ion number. Ion number starts with '+' or '-'
ionNumber :: Parser Int
ionNumber =  do
    s <- char '-' <|> char '+'
    n <- number
    return $ if s == '+' then n else (-n)