-- | This module provides functionality for translation between nucleotides -- and amino acids. -- -- NOTE 'aaDNAseq' is lossy. Might be a good idea to consider something -- more involved? -- -- TODO we need different functions, depending on if we have a part of -- a genome in @DNA@ form, or some messenger @RNA@. It'll probably also be -- useful to return @Either@, with @Left@ indicating error like partially -- translated sequence due to intermediate stop codons, or so. -- -- TODO 'dnaAAseq' and 'aaDNAseq' can be nicely optimized using 'flatten' -- and friends. module Biobase.Primary.Trans where import Control.Arrow ((***)) import Data.ByteString.Char8 (ByteString,unpack) import Data.FileEmbed (embedFile) import Data.Map.Strict (Map) import Data.Tuple (swap) import qualified Data.Map.Strict as M import qualified Data.Vector.Unboxed as VU import Biobase.Primary.AA import Biobase.Primary.Nuc import Biobase.Primary.Letter -- | Using the codon table, create an amino acid sequence from a @DNA@ -- sequence (encoded as 'Primary DNA'). Suffixed @seq@ as we deal with -- sequences, not letters. dnaAAseq :: Primary DNA -> Primary AA dnaAAseq = VU.fromList . go where go (VU.length -> 0) = [] go (VU.splitAt 3 -> (hs,ts)) = case M.lookup hs dnaAAmap of Just aa -> aa : go ts _ -> error $ "dnaAAseq: " ++ show (hs,ts) -- | Transform an amino acid sequence back into DNA. -- -- WARNING: This is lossy! aaDNAseq :: Primary AA -> Primary DNA aaDNAseq = VU.concatMap go where go aa = case M.lookup aa aaDNAmap of Just codon -> codon Nothing -> error $ "aaDNAseq" ++ show aa -- * Embedded codon data -- | Lossy backtransformation. aaDNAmap :: M.Map (Letter AA) (Primary DNA) aaDNAmap = M.fromList . map swap . M.assocs $ dnaAAmap {-# NOINLINE aaDNAmap #-} dnaAAmap :: Map (Primary DNA) (Letter AA) dnaAAmap = M.fromList . map (primary *** charAA) . M.assocs $ codonTable where {-# NOINLINE dnaAAmap #-} codonTable :: Map String Char codonTable = M.fromList . map (go . words) . lines . unpack $ codonListEmbedded where go [cs,[c]] = (cs,c) go e = error $ "codonTable:" ++ show e {-# NOINLINE codonTable #-} -- | Raw codon table codonListEmbedded :: ByteString codonListEmbedded = $(embedFile "sources/codontable")