-- | -- -- The tables imported with these functions are from ENA: -- module Biobase.GeneticCodes.Import where import Control.Monad.Except import Data.ByteString.Char8 as BS hiding (unpack,map) import Data.ByteString.Char8 (ByteString) import Data.Text.Encoding (decodeUtf8) import Data.Text.IO as TIO import Data.Text (Text,unpack) import Data.Void import System.Exit import Text.Megaparsec import Text.Megaparsec.Char as MC import Text.Megaparsec.Char.Lexer as MCL import Biobase.Types.Codon import Biobase.GeneticCodes.Types type TTParser = Parsec Void Text -- | Import translation tables from a given file. In case of parse error, print -- the error and exit with a failure. fromFile :: (MonadIO m, MonadError String m) => FilePath -> m [TranslationTable Char Char] fromFile fp = (liftIO $ BS.readFile fp) >>= fromByteString -- | Parse a ByteString with translation tables. fromByteString :: (MonadError String m) => ByteString -> m [TranslationTable Char Char] fromByteString bs = case runParser (some parseTranslationTable) "" (decodeUtf8 bs) of -- Left err -> throwError $ parseErrorPretty err -- megaparsec 6.x Left err -> throwError $ errorBundlePretty err -- megaparsec 7.x Right rs -> return rs -- | Parses a single translation table. parseTranslationTable :: TTParser (TranslationTable Char Char) parseTranslationTable = do (i,hdr) ← parseHeader aas ← parseData "amino acids" starts' ← parseData "start codons" base1 ← parseData "Base 1" base2 ← parseData "Base 2" base3 ← parseData "Base 3" let triplets = zipWith3 Codon base1 base2 base3 let starts = map (=='M') starts' let translations = zipWith3 TranslationElement triplets starts aas return $ genTranslationTable i hdr translations -- | Parse the header, returning the Identifier and the name of the table. parseHeader :: TTParser (Int,Text) parseHeader = (,) <$> (fromIntegral <$> lexeme MC.space decimal) <* char ':' <* MC.space <*> takeWhileP Nothing (/= '\n') <* MC.space parseData :: Text -> TTParser String parseData t = string' t <* MC.space *> (unpack <$> takeP Nothing 64) <* MC.space