{-| Module : Isotope.Parsers Description : Parsers for chemical and condensed formulae. Copyright : Michael Thomas License : GPL-3 Maintainer : Michael Thomas Stability : Experimental This module provides parsers for element symbols as well molecular, empirical and condensed formulae. In addition, QuasiQuoters are provided. -} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE FlexibleInstances #-} module Isotope.Parsers ( -- * Parsers elementSymbol , subFormula , molecularFormula , condensedFormula , mol , emp , con ) where import Isotope.Base import Language.Haskell.TH.Quote import Language.Haskell.TH.Lift import Text.Megaparsec import Text.Megaparsec.String import qualified Text.Megaparsec.Lexer as L import Data.String import Data.List hiding (filter) import Data.Map (Map) -- | Parses an element symbol string. elementSymbol :: Parser ElementSymbol elementSymbol = read <$> choice (try . string <$> elementSymbolStrList) where elementList = show <$> elementSymbolList reverseLengthSort x y = length y `compare` length x elementSymbolStrList = sortBy reverseLengthSort elementList -- | Parses an sub-formula (i.e., \"C2\"). subFormula :: Parser MolecularFormula subFormula = do sym <- elementSymbol num <- optional L.integer return $ case num of Nothing -> mkMolecularFormula [(sym, 1)] Just num' -> mkMolecularFormula [(sym, fromIntegral num')] -- | Parses a molecular formula (i.e. \"C6H6\"). molecularFormula :: Parser MolecularFormula molecularFormula = do formulas <- many subFormula return $ mconcat formulas -- Helper function. Parses parenthesed sections in condensed formulae, i.e., -- the \"(CH3)3\" section of \"N(CH3)3\". parenFormula :: Parser (Either MolecularFormula ([MolecularFormula], Int)) parenFormula = do _ <- char '(' formula <- some subFormula _ <- char ')' num <- optional L.integer return $ Right $ case num of Nothing -> (formula, 1) Just num' -> (formula, fromIntegral num') -- Helper function. Parses non-parenthesed sections in condensed formulae, i.e., -- the \"N\" section of \"N(CH3)3\". leftMolecularFormula :: Parser (Either MolecularFormula ([MolecularFormula], Int)) leftMolecularFormula = do formula <- subFormula return $ Left formula -- | Parses a condensed formula, i.e., \"N(CH3)3\". condensedFormula :: Parser CondensedFormula condensedFormula = do result <- many (leftMolecularFormula <|> parenFormula) return $ CondensedFormula result quoteMolecularFormula s = case parse (condensedFormula <* eof) "" s of Left err -> fail $ "Could not parse formula: " ++ show err Right v -> lift $ toMolecularFormula v quoteEmpiricalFormula s = case parse (condensedFormula <* eof) "" s of Left err -> fail $ "Could not parse formula: " ++ show err Right v -> lift $ toEmpiricalFormula v quoteCondensedFormula s = case parse (condensedFormula <* eof) "" s of Left err -> error $ "Could not parse formula: " ++ show err Right v -> lift v mol :: QuasiQuoter mol = QuasiQuoter { quoteExp = quoteMolecularFormula } emp :: QuasiQuoter emp = QuasiQuoter { quoteExp = quoteEmpiricalFormula } con :: QuasiQuoter con = QuasiQuoter { quoteExp = quoteCondensedFormula } $(deriveLift ''MolecularFormula) $(deriveLift ''EmpiricalFormula) $(deriveLift ''CondensedFormula) $(deriveLift ''Map) $(deriveLift ''ElementSymbol)