module ELynx.Data.Sequence.Translate
( translateSeq,
)
where
import qualified Data.Vector.Unboxed as V
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.Alphabet.Character as C
import ELynx.Data.Character.Codon
import ELynx.Data.Sequence.Sequence
chopVec :: V.Unbox a => Int -> V.Vector a -> [V.Vector a]
chopVec n xs
| V.length xs < n = []
| otherwise = V.take n xs : chopVec n (V.drop n xs)
translateSeq :: UniversalCode -> Int -> Sequence -> Sequence
translateSeq uc rf (Sequence n d a cs) = case a of
DNA -> Sequence n d ProteinS (cs' $ translate uc)
DNAX -> Sequence n d ProteinS (cs' $ translateX uc)
DNAI -> Sequence n d ProteinI (cs' $ translateI uc)
_ -> error "translate: can only translate DNA, DNAX, and DNAI."
where
cs' f = C.fromCVec $ translateVecWith f rf (C.toCVec cs)
translateVecWith ::
(V.Unbox a, Ord a, V.Unbox b) =>
(Codon a -> b) ->
Int ->
V.Vector a ->
V.Vector b
translateVecWith f rf cs
| rf > 2 = error "translateVecWith: reading frame is larger than 2."
| rf < 0 = error "translateVecWith: reading frame is negative."
| otherwise = aas
where
codons = map unsafeFromVec $ chopVec 3 $ V.drop rf cs
aas = V.fromList $ map f codons