module ELynx.Data.Sequence.Translate
( translateSeq
)
where
import qualified Data.Vector.Unboxed as V
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.Alphabet.Character as C
import ELynx.Data.Character.Codon
import ELynx.Data.Sequence.Sequence
import ELynx.Tools.Vector
translateSeq :: UniversalCode -> Int -> Sequence -> Sequence
translateSeq uc rf (Sequence n a cs) = case a of
DNA -> Sequence n ProteinS (cs' $ translate uc)
DNAX -> Sequence n ProteinS (cs' $ translateX uc)
DNAI -> Sequence n ProteinI (cs' $ translateI uc)
_ -> error "translate: can only translate DNA, DNAX, and DNAI."
where cs' f = C.fromCVec $ translateVecWith f rf (C.toCVec cs)
translateVecWith :: (V.Unbox a, Ord a, V.Unbox b)
=> (Codon a -> b) -> Int -> V.Vector a -> V.Vector b
translateVecWith f rf cs | rf > 2 = error "translate: reading frame is larger than 2."
| rf < 0 = error "translate: reading frame is negative."
| otherwise = aas
where codons = map unsafeFromVec $ chop 3 $ V.drop rf cs
aas = V.fromList $ map f codons