{-# LANGUAGE TupleSections #-} module Bio.MAE.Parser ( maeP , versionP , blockP , tableP ) where import Bio.MAE.Type (Block (..), Mae (..), MaeValue (..), Table (..)) import Control.Applicative ((<|>)) import Control.Monad (replicateM, when, zipWithM) import Data.Attoparsec.Text (Parser, anyChar, char, decimal, endOfInput, endOfLine, many', many1', peekChar, string, takeWhile, takeWhile1) import Data.Char (isSpace) import Data.List (transpose) import Data.Map.Strict (Map) import qualified Data.Map.Strict as M (fromList) import Data.Text (Text) import qualified Data.Text as T (cons, pack, uncons) import qualified Data.Text.Read as TR (decimal, rational, signed) import Prelude hiding (takeWhile) maeP :: Parser Mae maeP = Mae <$> versionP <*> many' blockP <* endOfInput versionP :: Parser Text versionP = many' tillEndOfLine *> inBrackets (lineP *> delimiterP *> lineP) blockP :: Parser Block blockP = uncurry <$> (Block <$> anyStringP <* many' oneSpaceP) <*> inBrackets ((,) <$> fieldsP <*> many' tableP) where fieldsP :: Parser (Map Text MaeValue) fieldsP = do fieldNames <- upToDelimiterP lineP fieldMaeValues <- replicateM (length fieldNames) lineP M.fromList <$> zipWithM (\k v -> (k,) <$> textToMaeValue k v) fieldNames fieldMaeValues textToMaeValue :: Text -> Text -> Parser MaeValue textToMaeValue k v = if v == absentMaeValue then pure Absent else case T.uncons k of Just (c, _) -> getMaeValueReader c v _ -> fail "Absent field name." where absentMaeValue :: Text absentMaeValue = "<>" getMaeValueReader :: Char -> Text -> Parser MaeValue getMaeValueReader 'i' = textToIntMaeValueReader getMaeValueReader 'r' = textToRealMaeValueReader getMaeValueReader 'b' = textToBoolMaeValueReader getMaeValueReader 's' = textToStringMaeValueReader getMaeValueReader _ = const $ fail "Unknown value type." textToIntMaeValueReader :: Text -> Parser MaeValue textToIntMaeValueReader = either fail (pure . IntMaeValue . fst) . TR.signed TR.decimal textToRealMaeValueReader :: Text -> Parser MaeValue textToRealMaeValueReader = either fail (pure . RealMaeValue . fst) . TR.signed TR.rational textToBoolMaeValueReader :: Text -> Parser MaeValue textToBoolMaeValueReader t = case t of "0" -> pure $ BoolMaeValue False "1" -> pure $ BoolMaeValue True _ -> fail "Can't parse bool value." textToStringMaeValueReader :: Text -> Parser MaeValue textToStringMaeValueReader = pure . StringMaeValue tableP :: Parser Table tableP = do name <- many' oneSpaceP *> takeWhile1 (/= leftSquareBracket) numberOfEntries <- char leftSquareBracket *> decimal <* char rightSquareBracket _ <- many' oneSpaceP contents <- inBrackets $ do fieldNames <- upToDelimiterP lineP let readers = fmap textToMaeValue fieldNames entries <- replicateM numberOfEntries $ entryP readers delimiterP pure $ M.fromList $ zip fieldNames $ transpose entries pure $ Table name contents where leftSquareBracket :: Char leftSquareBracket = '[' rightSquareBracket :: Char rightSquareBracket = ']' entryP :: [Text -> Parser MaeValue] -> Parser [MaeValue] entryP readers = do valuesT <- many1' (many' oneSpaceP *> valueTP <* many' oneSpaceP) <* tillEndOfLine when (length readers /= length valuesT - 1) $ fail "Wrong number of values in an entry." zipWithM ($) readers $ drop 1 valuesT -------------------------------------------------------------------------------- -- Utility functions. -------------------------------------------------------------------------------- inBrackets :: Parser a -> Parser a inBrackets p = char leftBracket *> many1' tillEndOfLine *> p <* many' oneSpaceP <* char rightBracket <* many1' tillEndOfLine where leftBracket :: Char leftBracket = '{' rightBracket :: Char rightBracket = '}' delimiterP :: Parser () delimiterP = many' oneSpaceP *> string delimiter *> tillEndOfLine where delimiter :: Text delimiter = ":::" upToDelimiterP :: Parser a -> Parser [a] upToDelimiterP p = ([] <$ delimiterP) <|> ((:) <$> p <*> upToDelimiterP p) oneSpaceP :: Parser Char oneSpaceP = char ' ' anyStringP :: Parser Text anyStringP = takeWhile1 (not . isSpace) valueTP :: Parser Text valueTP = ((<>) <$> string quoteT <*> ((<>) <$> notQuote <*> string quoteT)) <|> anyStringP where quote :: Char quote = '\"' quoteT :: Text quoteT = T.pack $ pure quote notQuote :: Parser Text notQuote = do curCharPeek <- peekChar case curCharPeek of Just '\\' -> do curChar <- anyChar nextCharPeek <- peekChar case nextCharPeek of Just '\"' -> anyChar >>= \x -> fmap (T.cons curChar . T.cons x) notQuote _ -> notQuote >>= pure . T.cons curChar Just '\"' -> pure mempty Just _ -> anyChar >>= \x -> fmap (T.cons x) notQuote Nothing -> pure mempty commentaryP :: Parser () commentaryP = () <$ many' (many' oneSpaceP *> char '#' *> takeWhile (`notElem` ['\n', '\r']) *> endOfLine) lineP :: Parser Text lineP = commentaryP *> many' oneSpaceP *> valueTP <* tillEndOfLine <* commentaryP tillEndOfLine :: Parser () tillEndOfLine = () <$ many' oneSpaceP <* endOfLine