module Bio.Tools.Sequence.CodonOptimization.Constants ( ak2Codon , codon2ak , codonFrequencies , ak2MaxFrequCodon , motiveScoreWindow , forbiddenMotiveScore , defaultMotiveScore ) where import Bio.NucleicAcid.Nucleotide.Type (DNA (..)) import Bio.Protein.AminoAcid.Type (AA (..)) import Bio.Tools.Sequence.CodonOptimization.Types (Organism (..)) import Data.Map as Map (Map, fromList) motiveScoreWindow :: Int motiveScoreWindow = 100 forbiddenMotiveScore :: Double forbiddenMotiveScore = 50 defaultMotiveScore :: Double defaultMotiveScore = 0 ak2Codon :: Map AA [[DNA]] ak2Codon = fromList [ (PHE, [[DT, DT, DT], [DT, DT, DC]]) , (TYR, [[DT, DA, DT], [DT, DA, DC]]) , (CYS, [[DT, DG, DT], [DT, DG, DC]]) , (TRP, [[DT, DG, DG]]) , (LEU, [[DT, DT, DA], [DT, DT, DG], [DC, DT, DT], [DC, DT, DC], [DC, DT, DA], [DC, DT, DG]]) , (PRO, [[DC, DC, DT], [DC, DC, DC], [DC, DC, DA], [DC, DC, DG]]) , (HIS, [[DC, DA, DT], [DC, DA, DC]]) , (GLN, [[DC, DA, DA], [DC, DA, DG]]) , (ILE, [[DA, DT, DT], [DA, DT, DC], [DA, DT, DA]]) , (MET, [[DA, DT, DG]]) , (THR, [[DA, DC, DT], [DA, DC, DC], [DA, DC, DA], [DA, DC, DG]]) , (ASN, [[DA, DA, DT], [DA, DA, DC]]) , (LYS, [[DA, DA, DA], [DA, DA, DG]]) , (SER, [[DT, DC, DT], [DT, DC, DC], [DT, DC, DA], [DT, DC, DG], [DA, DG, DT], [DA, DG, DC]]) , (ARG, [[DC, DG, DT], [DC, DG, DC], [DC, DG, DA], [DC, DG, DG], [DA, DG, DA], [DA, DG, DG]]) , (VAL, [[DG, DT, DT], [DG, DT, DC], [DG, DT, DA], [DG, DT, DG]]) , (ALA, [[DG, DC, DT], [DG, DC, DC], [DG, DC, DA], [DG, DC, DG]]) , (ASP, [[DG, DA, DT], [DG, DA, DC]]) , (GLU, [[DG, DA, DA], [DG, DA, DG]]) , (GLY, [[DG, DG, DT], [DG, DG, DC], [DG, DG, DA], [DG, DG, DG]]) ] codon2ak :: Map [DNA] AA codon2ak = fromList [ ([DT, DT, DT], PHE) , ([DT, DT, DC], PHE) , ([DT, DT, DA], LEU) , ([DT, DT, DG], LEU) , ([DT, DC, DT], SER) , ([DT, DC, DC], SER) , ([DT, DC, DA], SER) , ([DT, DC, DG], SER) , ([DT, DA, DT], TYR) , ([DT, DA, DC], TYR) , ([DT, DG, DT], CYS) , ([DT, DG, DC], CYS) , ([DT, DG, DG], TRP) , ([DC, DT, DT], LEU) , ([DC, DT, DC], LEU) , ([DC, DT, DA], LEU) , ([DC, DT, DG], LEU) , ([DC, DC, DT], PRO) , ([DC, DC, DC], PRO) , ([DC, DC, DA], PRO) , ([DC, DC, DG], PRO) , ([DC, DA, DT], HIS) , ([DC, DA, DC], HIS) , ([DC, DA, DA], GLN) , ([DC, DA, DG], GLN) , ([DC, DG, DT], ARG) , ([DC, DG, DC], ARG) , ([DC, DG, DA], ARG) , ([DC, DG, DG], ARG) , ([DA, DT, DT], ILE) , ([DA, DT, DC], ILE) , ([DA, DT, DA], ILE) , ([DA, DT, DG], MET) , ([DA, DC, DT], THR) , ([DA, DC, DC], THR) , ([DA, DC, DA], THR) , ([DA, DC, DG], THR) , ([DA, DA, DT], ASN) , ([DA, DA, DC], ASN) , ([DA, DA, DA], LYS) , ([DA, DA, DG], LYS) , ([DA, DG, DT], SER) , ([DA, DG, DC], SER) , ([DA, DG, DA], ARG) , ([DA, DG, DG], ARG) , ([DG, DT, DT], VAL) , ([DG, DT, DC], VAL) , ([DG, DT, DA], VAL) , ([DG, DT, DG], VAL) , ([DG, DC, DT], ALA) , ([DG, DC, DC], ALA) , ([DG, DC, DA], ALA) , ([DG, DC, DG], ALA) , ([DG, DA, DT], ASP) , ([DG, DA, DC], ASP) , ([DG, DA, DA], GLU) , ([DG, DA, DG], GLU) , ([DG, DG, DT], GLY) , ([DG, DG, DC], GLY) , ([DG, DG, DA], GLY) , ([DG, DG, DG], GLY) ] ak2MaxFrequCodon :: Organism -> Map AA ([DNA], Double) -- taken from https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/blob/master/codon_usage_data/tables/h_sapiens_9606.csv ak2MaxFrequCodon Human = fromList [ (ALA, ([DG, DC, DC], 0.4)) , (CYS, ([DT, DG, DC], 0.54)) , (ASP, ([DG, DA, DC], 0.54)) , (GLU, ([DG, DA, DG], 0.58)) , (PHE, ([DT, DT, DC], 0.54)) , (GLY, ([DG, DG, DC], 0.34)) , (HIS, ([DC, DA, DC], 0.58)) , (ILE, ([DA, DT, DC], 0.47)) , (LYS, ([DA, DA, DG], 0.57)) , (LEU, ([DC, DT, DG], 0.4)) , (MET, ([DA, DT, DG], 1.0)) , (ASN, ([DA, DA, DC], 0.53)) , (PRO, ([DC, DC, DC], 0.32)) , (GLN, ([DC, DA, DG], 0.73)) , (ARG, ([DA, DG, DG], 0.21)) , (SER, ([DA, DG, DC], 0.24)) , (THR, ([DA, DC, DC], 0.36)) , (VAL, ([DG, DT, DG], 0.46)) , (TRP, ([DT, DG, DG], 1.0)) , (TYR, ([DT, DA, DC], 0.56)) ] -- taken from https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/blob/master/codon_usage_data/tables/e_coli_316407.csv ak2MaxFrequCodon EColi = fromList [ (ALA, ([DG, DC, DG], 0.36)) , (CYS, ([DT, DG, DC], 0.56)) , (ASP, ([DG, DA, DT], 0.63)) , (GLU, ([DG, DA, DA], 0.68)) , (PHE, ([DT, DT, DT], 0.58)) , (GLY, ([DG, DG, DC], 0.41)) , (HIS, ([DC, DA, DT], 0.57)) , (ILE, ([DA, DT, DT], 0.51)) , (LYS, ([DA, DA, DA], 0.76)) , (LEU, ([DC, DT, DG], 0.50)) , (MET, ([DA, DT, DG], 1.0)) , (ASN, ([DA, DA, DC], 0.55)) , (PRO, ([DC, DC, DG], 0.53)) , (GLN, ([DC, DA, DG], 0.65)) , (ARG, ([DC, DG, DC], 0.4)) , (SER, ([DA, DG, DC], 0.28)) , (THR, ([DA, DC, DC], 0.44)) , (VAL, ([DG, DT, DG], 0.37)) , (TRP, ([DT, DG, DG], 1.0)) , (TYR, ([DT, DA, DT], 0.57)) ] -- taken from https://www.genscript.com/tools/codon-frequency-table ak2MaxFrequCodon CHO = fromList [ (ALA, ([DG, DC, DC], 0.37)) , (CYS, ([DT, DG, DC], 0.53)) , (ASP, ([DG, DA, DC], 0.53)) , (GLU, ([DG, DA, DG], 0.59)) , (PHE, ([DT, DT, DC], 0.53)) , (GLY, ([DG, DG, DC], 0.34)) , (HIS, ([DC, DA, DC], 0.56)) , (ILE, ([DA, DT, DC], 0.51)) , (LYS, ([DA, DA, DG], 0.61)) , (LEU, ([DC, DT, DG], 0.39)) , (MET, ([DA, DT, DG], 1.0)) , (ASN, ([DA, DA, DC], 0.55)) , (PRO, ([DC, DC, DC], 0.32)) , (GLN, ([DC, DA, DG], 0.76)) , (ARG, ([DC, DG, DG], 0.19)) , (SER, ([DA, DG, DC], 0.22)) , (THR, ([DA, DC, DC], 0.37)) , (VAL, ([DG, DT, DG], 0.46)) , (TRP, ([DT, DG, DG], 1.0)) , (TYR, ([DT, DA, DC], 0.56)) ] codonFrequencies :: Organism -> Map [DNA] Double -- taken from https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/blob/master/codon_usage_data/tables/h_sapiens_9606.csv codonFrequencies Human = fromList [ ([DT, DA, DA], 0.30) , ([DT, DA, DG], 0.24) , ([DT, DG, DA], 0.47) , ([DG, DC, DA], 0.23) , ([DG, DC, DC], 0.40) , ([DG, DC, DG], 0.11) , ([DG, DC, DT], 0.27) , ([DT, DG, DC], 0.54) , ([DT, DG, DT], 0.46) , ([DG, DA, DC], 0.54) , ([DG, DA, DT], 0.46) , ([DG, DA, DA], 0.42) , ([DG, DA, DG], 0.58) , ([DT, DT, DC], 0.54) , ([DT, DT, DT], 0.46) , ([DG, DG, DA], 0.25) , ([DG, DG, DC], 0.34) , ([DG, DG, DG], 0.25) , ([DG, DG, DT], 0.16) , ([DC, DA, DC], 0.58) , ([DC, DA, DT], 0.42) , ([DA, DT, DA], 0.17) , ([DA, DT, DC], 0.47) , ([DA, DT, DT], 0.36) , ([DA, DA, DA], 0.43) , ([DA, DA, DG], 0.57) , ([DC, DT, DA], 0.07) , ([DC, DT, DC], 0.20) , ([DC, DT, DG], 0.40) , ([DC, DT, DT], 0.13) , ([DT, DT, DA], 0.08) , ([DT, DT, DG], 0.13) , ([DA, DT, DG], 1.00) , ([DA, DA, DC], 0.53) , ([DA, DA, DT], 0.47) , ([DC, DC, DA], 0.28) , ([DC, DC, DC], 0.32) , ([DC, DC, DG], 0.11) , ([DC, DC, DT], 0.29) , ([DC, DA, DA], 0.27) , ([DC, DA, DG], 0.73) , ([DA, DG, DA], 0.21) , ([DA, DG, DG], 0.21) , ([DC, DG, DA], 0.11) , ([DC, DG, DC], 0.18) , ([DC, DG, DG], 0.20) , ([DC, DG, DT], 0.08) , ([DA, DG, DC], 0.24) , ([DA, DG, DT], 0.15) , ([DT, DC, DA], 0.15) , ([DT, DC, DC], 0.22) , ([DT, DC, DG], 0.05) , ([DT, DC, DT], 0.19) , ([DA, DC, DA], 0.28) , ([DA, DC, DC], 0.36) , ([DA, DC, DG], 0.11) , ([DA, DC, DT], 0.25) , ([DG, DT, DA], 0.12) , ([DG, DT, DC], 0.24) , ([DG, DT, DG], 0.46) , ([DG, DT, DT], 0.18) , ([DT, DG, DG], 1.00) , ([DT, DA, DC], 0.56) , ([DT, DA, DT], 0.44) ] -- taken from https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/blob/master/codon_usage_data/tables/e_coli_316407.csv codonFrequencies EColi = fromList [ ([DG, DC, DA], 0.21) , ([DG, DC, DC], 0.27) , ([DG, DC, DG], 0.36) , ([DG, DC, DT], 0.16) , ([DT, DG, DC], 0.56) , ([DT, DG, DT], 0.44) , ([DG, DA, DC], 0.37) , ([DG, DA, DT], 0.63) , ([DG, DA, DA], 0.69) , ([DG, DA, DG], 0.31) , ([DT, DT, DC], 0.43) , ([DT, DT, DT], 0.57) , ([DG, DG, DA], 0.11) , ([DG, DG, DC], 0.41) , ([DG, DG, DG], 0.15) , ([DG, DG, DT], 0.34) , ([DC, DA, DC], 0.43) , ([DC, DA, DT], 0.57) , ([DA, DT, DA], 0.07) , ([DA, DT, DC], 0.42) , ([DA, DT, DT], 0.51) , ([DA, DA, DA], 0.76) , ([DA, DA, DG], 0.24) , ([DC, DT, DA], 0.04) , ([DC, DT, DC], 0.10) , ([DC, DT, DG], 0.50) , ([DC, DT, DT], 0.10) , ([DT, DT, DA], 0.13) , ([DT, DT, DG], 0.13) , ([DA, DT, DG], 1.00) , ([DA, DA, DC], 0.55) , ([DA, DA, DT], 0.45) , ([DC, DC, DA], 0.19) , ([DC, DC, DC], 0.12) , ([DC, DC, DG], 0.53) , ([DC, DC, DT], 0.16) , ([DC, DA, DA], 0.35) , ([DC, DA, DG], 0.65) , ([DA, DG, DA], 0.04) , ([DA, DG, DG], 0.02) , ([DC, DG, DA], 0.06) , ([DC, DG, DC], 0.40) , ([DC, DG, DG], 0.10) , ([DC, DG, DT], 0.38) , ([DA, DG, DC], 0.28) , ([DA, DG, DT], 0.15) , ([DT, DC, DA], 0.12) , ([DT, DC, DC], 0.15) , ([DT, DC, DG], 0.15) , ([DT, DC, DT], 0.15) , ([DA, DC, DA], 0.13) , ([DA, DC, DC], 0.44) , ([DA, DC, DG], 0.27) , ([DA, DC, DT], 0.16) , ([DG, DT, DA], 0.15) , ([DG, DT, DC], 0.22) , ([DG, DT, DG], 0.37) , ([DG, DT, DT], 0.26) , ([DT, DG, DG], 1.00) , ([DT, DA, DC], 0.43) , ([DT, DA, DT], 0.57) ] -- taken from https://www.genscript.com/tools/codon-frequency-table codonFrequencies CHO = fromList [ ([DT, DT, DT], 0.47) , ([DT, DT, DC], 0.53) , ([DT, DT, DA], 0.07) , ([DT, DT, DG], 0.14) , ([DT, DC, DT], 0.22) , ([DT, DC, DC], 0.22) , ([DT, DC, DA], 0.14) , ([DT, DC, DG], 0.05) , ([DT, DA, DT], 0.44) , ([DT, DA, DC], 0.56) , ([DT, DA, DA], 0.26) , ([DT, DA, DG], 0.22) , ([DT, DG, DT], 0.47) , ([DT, DG, DC], 0.53) , ([DT, DG, DA], 0.53) , ([DT, DG, DG], 1.00) , ([DC, DT, DT], 0.13) , ([DC, DT, DC], 0.19) , ([DC, DT, DA], 0.08) , ([DC, DT, DG], 0.39) , ([DC, DC, DT], 0.31) , ([DC, DC, DC], 0.32) , ([DC, DC, DA], 0.29) , ([DC, DC, DG], 0.08) , ([DC, DA, DT], 0.44) , ([DC, DA, DC], 0.56) , ([DC, DA, DA], 0.24) , ([DC, DA, DG], 0.76) , ([DC, DG, DT], 0.11) , ([DC, DG, DC], 0.18) , ([DC, DG, DA], 0.14) , ([DC, DG, DG], 0.19) , ([DA, DT, DT], 0.35) , ([DA, DT, DC], 0.51) , ([DA, DT, DA], 0.14) , ([DA, DT, DG], 1.00) , ([DA, DC, DT], 0.26) , ([DA, DC, DC], 0.37) , ([DA, DC, DA], 0.29) , ([DA, DC, DG], 0.08) , ([DA, DA, DT], 0.45) , ([DA, DA, DC], 0.55) , ([DA, DA, DA], 0.39) , ([DA, DA, DG], 0.61) , ([DA, DG, DT], 0.15) , ([DA, DG, DC], 0.22) , ([DA, DG, DA], 0.19) , ([DA, DG, DG], 0.19) , ([DG, DT, DT], 0.18) , ([DG, DT, DC], 0.24) , ([DG, DT, DA], 0.12) , ([DG, DT, DG], 0.46) , ([DG, DC, DT], 0.32) , ([DG, DC, DC], 0.37) , ([DG, DC, DA], 0.23) , ([DG, DC, DG], 0.07) , ([DG, DA, DT], 0.47) , ([DG, DA, DC], 0.53) , ([DG, DA, DA], 0.41) , ([DG, DA, DG], 0.59) , ([DG, DG, DT], 0.20) , ([DG, DG, DC], 0.34) , ([DG, DG, DA], 0.25) , ([DG, DG, DG], 0.21) ]