{-|
Module      : Isotope.Parsers
Description : Parsers for chemical and condensed formulae.
Copyright   : Michael Thomas
License     : GPL-3
Maintainer  : Michael Thomas <Michaelt293@gmail.com>
Stability   : Experimental

This module provides parsers for element symbols and elemental composition as
well molecular, condensed and empirical formulae. In addition, quasiquoters are
provided.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE FlexibleInstances #-}
module Isotope.Parsers (
    -- * Parsers
      elementSymbol
    , subFormula
    , elementalComposition
    , molecularFormula
    , condensedFormula
    , empiricalFormula
    , ele
    , mol
    , con
    , emp
    ) where

import Isotope.Base
import Language.Haskell.TH.Quote
import Language.Haskell.TH.Lift
import Text.Megaparsec
import Text.Megaparsec.String
import qualified Text.Megaparsec.Lexer as L
import Data.String
import Data.List hiding (filter)
import Data.Map (Map)
import Data.Monoid ((<>))

-- | Parses an element symbol string.
elementSymbol :: Parser ElementSymbol
elementSymbol = read <$> choice (try . string <$> elementSymbolStrList)
  where
    elementList = show <$> elementSymbolList
    reverseLengthSort x y = length y `compare` length x
    elementSymbolStrList = sortBy reverseLengthSort elementList

-- | Parses an sub-formula (i.e., \"C2\").
subFormula :: Parser (ElementSymbol, Int)
subFormula =
  (\sym num -> (sym, fromIntegral num)) <$> elementSymbol <*> option 1 L.integer

-- | Parses an elemental composition (i.e. \"C6H6\").
elementalComposition :: Parser ElementalComposition
elementalComposition = mkElementalComposition <$> many subFormula

-- | Parses a molecular formula (i.e. \"C6H6\").
molecularFormula :: Parser MolecularFormula
molecularFormula = mkMolecularFormula <$> many subFormula

-- | Parses a condensed formula, i.e., \"N(CH3)3\".
condensedFormula :: Parser CondensedFormula
condensedFormula =  CondensedFormula <$> many (leftCondensedFormula <|> rightCondensedFormula)
  where
    subMolecularFormula :: Parser MolecularFormula
    subMolecularFormula = mkMolecularFormula . pure <$> subFormula
    leftCondensedFormula :: Parser (Either MolecularFormula (CondensedFormula, Int))
    leftCondensedFormula = Left <$> subMolecularFormula
    rightCondensedFormula :: Parser (Either MolecularFormula (CondensedFormula, Int))
    rightCondensedFormula = do
       _ <- char '('
       formula <- condensedFormula
       _ <- char ')'
       num <- option 1 L.integer
       return $ Right (formula, fromIntegral num)

-- | Parses a empirical formula (i.e. \"CH\").
empiricalFormula :: Parser EmpiricalFormula
empiricalFormula = mkEmpiricalFormula <$> many subFormula

-- Helper function for `ElementalComposition` quasiquoter
quoteElementalComposition s =
  case parse (condensedFormula <* eof) "" s of
    Left err -> error $ "Could not parse formula: " <> show err
    Right v  -> lift $ toElementalComposition v

-- Helper function for `MolecularFormula` quasiquoter
quoteMolecularFormula s =
  case parse (condensedFormula <* eof) "" s of
    Left err -> fail $ "Could not parse formula: " <> show err
    Right v  -> lift $ toMolecularFormula v

-- Helper function for `CondensedFormula` quasiquoter
quoteCondensedFormula s =
  case parse (condensedFormula <* eof) "" s of
    Left err -> error $ "Could not parse formula: " <> show err
    Right v  -> lift v

-- Helper function for `EmpiricalFormula` quasiquoter
quoteEmpiricalFormula s =
  case parse (condensedFormula <* eof) "" s of
    Left err -> fail $ "Could not parse formula: " <> show err
    Right v  -> lift $ toEmpiricalFormula v

-- | Quasiquoter for `ElementalComposition`
ele :: QuasiQuoter
ele = QuasiQuoter
    { quoteExp = quoteElementalComposition }

-- | Quasiquoter for `MolecularFormula`
mol :: QuasiQuoter
mol = QuasiQuoter
    { quoteExp = quoteMolecularFormula }

-- | Quasiquoter for `CondensedFormula`
con :: QuasiQuoter
con = QuasiQuoter
    { quoteExp = quoteCondensedFormula }

-- | Quasiquoter for `EmpiricalFormula`
emp :: QuasiQuoter
emp = QuasiQuoter
    { quoteExp = quoteEmpiricalFormula }

$(deriveLift ''ElementSymbol)

$(deriveLift ''ElementalComposition)

$(deriveLift ''MolecularFormula)

$(deriveLift ''CondensedFormula)

$(deriveLift ''EmpiricalFormula)

$(deriveLift ''Map)