-- | This module provides functionality for translation between nucleotides -- and amino acids. -- -- NOTE 'aaDNAseq' is lossy. Might be a good idea to consider something -- more involved? -- -- TODO we need different functions, depending on if we have a part of -- a genome in @DNA@ form, or some messenger @RNA@. It'll probably also be -- useful to return @Either@, with @Left@ indicating error like partially -- translated sequence due to intermediate stop codons, or so. -- -- TODO 'dnaAAseq' and 'aaDNAseq' can be nicely optimized using 'flatten' -- and friends. module Biobase.Primary.Trans where import Control.Lens import Control.Arrow ((***)) import Data.ByteString.Char8 (ByteString,unpack) import Data.FileEmbed (embedFile) import Data.Map.Strict (Map) import Data.Tuple (swap) import qualified Data.Map.Strict as M import qualified Data.Vector.Unboxed as VU import Biobase.Types.BioSequence import Biobase.Types.Codon import Biobase.Primary.AA import Biobase.Primary.Nuc import Biobase.Primary.Letter import Biobase.GeneticCodes.Translation import Biobase.GeneticCodes.Types -- | Transform translation tables into the @Letter DNA/Letter AA@ format. letterTranslationTable ∷ TranslationTable Char Char → TranslationTable (Letter DNA n) (Letter AA n) letterTranslationTable tbl = TranslationTable { _codonToAminoAcid = M.fromList . map (ftriplet *** felement) . M.toList $ tbl^.codonToAminoAcid , _aminoAcidtoCodons = M.fromList . map (charAA *** map felement) . M.toList $ tbl^.aminoAcidtoCodons , _tableID = tbl^.tableID , _tableName = tbl^.tableName } where ftriplet ∷ Codon Char → Codon (Letter DNA n) ftriplet = over each charDNA felement ∷ TranslationElement Char Char → TranslationElement (Letter DNA n) (Letter AA n) felement = over (baseCodon.each) charDNA . over aminoAcid charAA instance Translation (Codon (Letter DNA n)) where type TargetType (Codon (Letter DNA n)) = Letter AA n type CodonType (Codon (Letter DNA n)) = Letter DNA n type AAType (Codon (Letter DNA n)) = Letter AA n translate tbl t = maybe Unknown _aminoAcid $ M.lookup t (tbl^.codonToAminoAcid) {-# Inline translate #-} instance Translation (Primary DNA n) where type TargetType (Primary DNA n) = Primary AA n type CodonType (Primary DNA n) = Letter DNA n type AAType (Primary DNA n) = Letter AA n -- | -- -- TODO we could consider returning @Nothing@ in case the input is not -- power-of-three. translate tbl xs = VU.unfoldrN (VU.length xs `div` 3) go xs where go (VU.splitAt 3 → (hs,ts)) | VU.length hs < 3 = Nothing | otherwise = Just (aa,ts) where [a,b,c] = VU.toList hs aa = translate tbl $ Codon a b c {-# Inline translate #-} {- -- | Using the codon table, create an amino acid sequence from a @DNA@ -- sequence (encoded as 'Primary DNA'). Suffixed @seq@ as we deal with -- sequences, not letters. dnaAAseq :: Primary DNA -> Primary AA dnaAAseq = VU.fromList . go where go (VU.length -> 0) = [] go (VU.splitAt 3 -> (hs,ts)) = case M.lookup hs dnaAAmap of Just aa -> aa : go ts _ -> error $ "dnaAAseq: " ++ show (hs,ts) -- | Transform an amino acid sequence back into DNA. -- -- WARNING: This is lossy! aaDNAseq :: Primary AA -> Primary DNA aaDNAseq = VU.concatMap go where go aa = case M.lookup aa aaDNAmap of Just codon -> codon Nothing -> error $ "aaDNAseq" ++ show aa -- * Embedded codon data -- | Lossy backtransformation. aaDNAmap :: M.Map (Letter AA) (Primary DNA) aaDNAmap = M.fromList . map swap . M.assocs $ dnaAAmap {-# NOINLINE aaDNAmap #-} dnaAAmap :: Map (Primary DNA) (Letter AA) dnaAAmap = M.fromList . map (primary *** charAA) . M.assocs $ codonTable where {-# NOINLINE dnaAAmap #-} codonTable :: Map String Char codonTable = M.fromList . map (go . words) . lines . unpack $ codonListEmbedded where go [cs,[c]] = (cs,c) go e = error $ "codonTable:" ++ show e {-# NOINLINE codonTable #-} -- | Raw codon table codonListEmbedded :: ByteString codonListEmbedded = $(embedFile "sources/codontable") -}