module Bio.PDB.Fasta(resname2fastacode, fastacode2resname ,
defaultResname, defaultFastaCode ,
fastaSequence, fastaGappedSequence,
fastaRecord, fastaGappedRecord ) where
import Bio.PDB.Iterable as Iter
import Bio.PDB.Structure as PDB
import Data.Map as Map
codebook_nucleic_acids = [
("A", 'A'),
("C", 'C'),
("G", 'G'),
("U", 'U'),
("I", 'I'),
("DA", 'A'),
("DG", 'G'),
("DC", 'C'),
("DT", 'T'),
("DI", 'I'),
("T", 'T')
]
codebook = codebook_nucleic_acids ++ codebook_protein
codebook_standard_protein = [
("ALA", 'A'),
("CYS", 'C'),
("ASP", 'D'),
("GLU", 'E'),
("PHE", 'F'),
("GLY", 'G'),
("HIS", 'H'),
("ILE", 'I'),
("LYS", 'K'),
("LEU", 'L'),
("MET", 'M'),
("ASN", 'N'),
("PRO", 'P'),
("GLN", 'Q'),
("ARG", 'R'),
("SER", 'S'),
("THR", 'T'),
("VAL", 'V'),
("TRP", 'W'),
("TYR", 'Y')]
codebook_protein = codebook_standard_protein ++ [
("MSE", 'M')]
resname2fastacodeDictionary = Map.fromList codebook
fastacode2resnameDictionary = Map.fromList . Prelude.map (\(a, b) -> (b, a)) $ codebook_standard_protein
defaultResname = "UNK"
defaultFastaCode = 'X'
resname2fastacode resname = Map.findWithDefault defaultFastaCode resname resname2fastacodeDictionary
fastacode2resname code = Map.findWithDefault defaultResname code fastacode2resnameDictionary
res2code :: Residue -> Char
res2code r = resname2fastacode . resName $ r
fastaSequence :: (Iterable a Residue) => a -> [Char]
fastaSequence = Iter.ifoldr (\a b -> res2code a : b) []
fastaGappedSequence :: (Iterable a Residue) => a -> [Char]
fastaGappedSequence = concat . scan2 insertGaps projectAA . Iter.ifoldr (\a b -> (resSeq a, res2code a) : b) []
where
projectAA (i, aa) = [aa]
insertGaps (i, _ ) (j, aa) = ['-' | _ <- [2..ji]] ++ [aa]
scan2 f g [] = []
scan2 f g (b:bs) = g b:scan2' b bs
where scan2' b (c:cs) = f b c:scan2' c cs
scan2' b [] = []
fastaRecord' :: Bool -> [Char] -> Chain -> [Char]
fastaRecord' withGaps ident c = ">" ++ header ++ "\n" ++ fastaSeq c
where
header = if chainId c == ' '
then ident
else ident ++ "|" ++ [chainId c]
fastaSeq = if withGaps then fastaSequence else fastaGappedSequence
fastaRecord = fastaRecord' False
fastaGappedRecord = fastaRecord' True