-- Translation Module -- By Gregory W. Schwartz {- | Collects all functions pertaining to the translation of nucleotides to amino acids for strings. -} module Data.Fasta.String.Translation where -- Built in import Data.Either import Data.Char -- Cabal import qualified Data.List.Split as Split -- Local import Data.Fasta.String.Types -- | Converts a codon to an amino acid -- Remember, if there is an "N" in that DNA sequence, then it is invalid codon2aa :: Codon -> Either String Char codon2aa x | codon `elem` ["GCT", "GCC", "GCA", "GCG"] = Right 'A' | codon `elem` ["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"] = Right 'R' | codon `elem` ["AAT", "AAC"] = Right 'N' | codon `elem` ["GAT", "GAC"] = Right 'D' | codon `elem` ["TGT", "TGC"] = Right 'C' | codon `elem` ["CAA", "CAG"] = Right 'Q' | codon `elem` ["GAA", "GAG"] = Right 'E' | codon `elem` ["GGT", "GGC", "GGA", "GGG"] = Right 'G' | codon `elem` ["CAT", "CAC"] = Right 'H' | codon `elem` ["ATT", "ATC", "ATA"] = Right 'I' | codon `elem` ["ATG"] = Right 'M' | codon `elem` ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"] = Right 'L' | codon `elem` ["AAA", "AAG"] = Right 'K' | codon `elem` ["TTT", "TTC"] = Right 'F' | codon `elem` ["CCT", "CCC", "CCA", "CCG"] = Right 'P' | codon `elem` ["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"] = Right 'S' | codon `elem` ["ACT", "ACC", "ACA", "ACG"] = Right 'T' | codon `elem` ["TGG"] = Right 'W' | codon `elem` ["TAT", "TAC"] = Right 'Y' | codon `elem` ["GTT", "GTC", "GTA", "GTG"] = Right 'V' | codon `elem` ["TAA", "TGA", "TAG"] = Right '*' | codon `elem` ["---", "..."] = Right '-' | codon == "~~~" = Right '-' | 'N' `elem` codon = Right '-' | '-' `elem` codon = Right '-' | '.' `elem` codon = Right '-' | otherwise = Left errorMsg where codon = map toUpper x errorMsg = "Unidentified codon: " ++ codon -- | Translates a string of nucleotides. Returns a string with the error if the -- codon is invalid. translate :: Int -> FastaSequence -> Either String FastaSequence translate pos x | any isLeft' translation = Left $ head . lefts $ translation | otherwise = Right $ x { fastaSeq = rights translation } where translation = map codon2aa . filter ((== 3) . length) . Split.chunksOf 3 . drop (pos - 1) . fastaSeq $ x isLeft' (Left _) = True isLeft' _ = False