{-# LANGUAGE TupleSections #-}
module Bio.MAE.Parser
( maeP
, versionP
, blockP
, tableP
) where
import Bio.MAE.Type (Block (..), Mae (..), MaeValue (..),
Table (..))
import Control.Applicative ((<|>))
import Control.Monad (replicateM, when, zipWithM)
import Data.Attoparsec.Text (Parser, anyChar, char, decimal,
endOfInput, endOfLine, many', many1',
peekChar, string, takeWhile, takeWhile1)
import Data.Char (isSpace)
import Data.List (transpose)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList)
import Data.Text (Text)
import qualified Data.Text as T (cons, pack, uncons)
import qualified Data.Text.Read as TR (decimal, rational, signed)
import Prelude hiding (takeWhile)
maeP :: Parser Mae
maeP = Mae <$> versionP
<*> many' blockP
<* endOfInput
versionP :: Parser Text
versionP = many' tillEndOfLine *> inBrackets (lineP *> delimiterP *> lineP)
blockP :: Parser Block
blockP = uncurry <$> (Block <$> anyStringP <* many' oneSpaceP)
<*> inBrackets ((,) <$> fieldsP <*> many' tableP)
where
fieldsP :: Parser (Map Text MaeValue)
fieldsP = do
fieldNames <- upToDelimiterP lineP
fieldMaeValues <- replicateM (length fieldNames) lineP
M.fromList <$> zipWithM (\k v -> (k,) <$> textToMaeValue k v) fieldNames fieldMaeValues
textToMaeValue :: Text -> Text -> Parser MaeValue
textToMaeValue k v = if v == absentMaeValue then pure Absent else
case T.uncons k of
Just (c, _) -> getMaeValueReader c v
_ -> fail "Absent field name."
where
absentMaeValue :: Text
absentMaeValue = "<>"
getMaeValueReader :: Char -> Text -> Parser MaeValue
getMaeValueReader 'i' = textToIntMaeValueReader
getMaeValueReader 'r' = textToRealMaeValueReader
getMaeValueReader 'b' = textToBoolMaeValueReader
getMaeValueReader 's' = textToStringMaeValueReader
getMaeValueReader _ = const $ fail "Unknown value type."
textToIntMaeValueReader :: Text -> Parser MaeValue
textToIntMaeValueReader = either fail (pure . IntMaeValue . fst) . TR.signed TR.decimal
textToRealMaeValueReader :: Text -> Parser MaeValue
textToRealMaeValueReader = either fail (pure . RealMaeValue . fst) . TR.signed TR.rational
textToBoolMaeValueReader :: Text -> Parser MaeValue
textToBoolMaeValueReader t =
case t of
"0" -> pure $ BoolMaeValue False
"1" -> pure $ BoolMaeValue True
_ -> fail "Can't parse bool value."
textToStringMaeValueReader :: Text -> Parser MaeValue
textToStringMaeValueReader = pure . StringMaeValue
tableP :: Parser Table
tableP = do
name <- many' oneSpaceP *> takeWhile1 (/= leftSquareBracket)
numberOfEntries <- char leftSquareBracket *> decimal <* char rightSquareBracket
_ <- many' oneSpaceP
contents <- inBrackets $ do
fieldNames <- upToDelimiterP lineP
let readers = fmap textToMaeValue fieldNames
entries <- replicateM numberOfEntries $ entryP readers
delimiterP
pure $ M.fromList $ zip fieldNames $ transpose entries
pure $ Table name contents
where
leftSquareBracket :: Char
leftSquareBracket = '['
rightSquareBracket :: Char
rightSquareBracket = ']'
entryP :: [Text -> Parser MaeValue] -> Parser [MaeValue]
entryP readers = do
valuesT <- many1' (many' oneSpaceP *> valueTP <* many' oneSpaceP) <* tillEndOfLine
when (length readers /= length valuesT - 1) $ fail "Wrong number of values in an entry."
zipWithM ($) readers $ drop 1 valuesT
inBrackets :: Parser a -> Parser a
inBrackets p = char leftBracket *> many1' tillEndOfLine
*> p
<* many' oneSpaceP <* char rightBracket <* many1' tillEndOfLine
where
leftBracket :: Char
leftBracket = '{'
rightBracket :: Char
rightBracket = '}'
delimiterP :: Parser ()
delimiterP = many' oneSpaceP *> string delimiter *> tillEndOfLine
where
delimiter :: Text
delimiter = ":::"
upToDelimiterP :: Parser a -> Parser [a]
upToDelimiterP p = ([] <$ delimiterP) <|> ((:) <$> p <*> upToDelimiterP p)
oneSpaceP :: Parser Char
oneSpaceP = char ' '
anyStringP :: Parser Text
anyStringP = takeWhile1 (not . isSpace)
valueTP :: Parser Text
valueTP = ((<>) <$> string quoteT <*> ((<>) <$> notQuote <*> string quoteT))
<|> anyStringP
where
quote :: Char
quote = '\"'
quoteT :: Text
quoteT = T.pack $ pure quote
notQuote :: Parser Text
notQuote = do
curCharPeek <- peekChar
case curCharPeek of
Just '\\' -> do
curChar <- anyChar
nextCharPeek <- peekChar
case nextCharPeek of
Just '\"' -> anyChar >>= \x -> fmap (T.cons curChar . T.cons x) notQuote
_ -> notQuote >>= pure . T.cons curChar
Just '\"' -> pure mempty
Just _ -> anyChar >>= \x -> fmap (T.cons x) notQuote
Nothing -> pure mempty
commentaryP :: Parser ()
commentaryP = () <$ many' (many' oneSpaceP *> char '#' *> takeWhile (`notElem` ['\n', '\r']) *> endOfLine)
lineP :: Parser Text
lineP = commentaryP *> many' oneSpaceP *> valueTP <* tillEndOfLine <* commentaryP
tillEndOfLine :: Parser ()
tillEndOfLine = () <$ many' oneSpaceP <* endOfLine