-- | Abstract translation functionality. Given a genetic code translation table
-- (as provided within this module), provide translation from the nucleotide to
-- the amino acid alphabet.
--
-- Limited "backtranslation" capabilities are provided. Since this process is
-- lossy, it should only be used in very specific circumstances.
--
-- TODO Translation from @BioSequence RNA is missing.

module Biobase.GeneticCodes.Translation where

import           Control.Lens
import qualified Data.Map.Strict as M
import qualified Data.ByteString.Char8 as BS

import           Biobase.Types.BioSequence
import           Biobase.Types.Codon

import           Biobase.GeneticCodes.Types



-- |

class Translation t where
  -- | Defines the target type for a given translation input.
  type TargetType t :: *
  -- | Type of the nucleotide characters.
  type CodonType t :: *
  -- | Type of the amino acid characters.
  type AAType t :: *
  -- | Translate from a given type of sequence @t@ into the target type.
  translate :: TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
  -- | This function works just like @translate@ but with an important difference of creating a
  -- target sequence that contains all possible frames. The index @mod 3@ yields the current frame.
  translateAllFrames :: TranslationTable (CodonType t) (AAType t) -> t -> TargetType t

-- | Very simple translation of individual base triplets.

instance Translation (Codon Char) where
  type TargetType  (Codon Char) = Char
  type CodonType (Codon Char) = Char
  type AAType      (Codon Char) = Char
  translate :: TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
translate TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
tbl Codon Char
t = Char
-> (TranslationElement Char Char -> Char)
-> Maybe (TranslationElement Char Char)
-> Char
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Char
'X' TranslationElement Char Char -> Char
forall c a. TranslationElement c a -> a
_aminoAcid (Maybe (TranslationElement Char Char) -> Char)
-> Maybe (TranslationElement Char Char) -> Char
forall a b. (a -> b) -> a -> b
$ Codon Char
-> Map (Codon Char) (TranslationElement Char Char)
-> Maybe (TranslationElement Char Char)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Codon Char
t (TranslationTable Char Char
TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
tblTranslationTable Char Char
-> Getting
     (Map (Codon Char) (TranslationElement Char Char))
     (TranslationTable Char Char)
     (Map (Codon Char) (TranslationElement Char Char))
-> Map (Codon Char) (TranslationElement Char Char)
forall s a. s -> Getting a s a -> a
^.Getting
  (Map (Codon Char) (TranslationElement Char Char))
  (TranslationTable Char Char)
  (Map (Codon Char) (TranslationElement Char Char))
forall c a.
Lens'
  (TranslationTable c a) (Map (Codon c) (TranslationElement c a))
codonToAminoAcid)
  {-# Inline translate #-}
  translateAllFrames :: TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
translateAllFrames = TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate
  {-# Inline translateAllFrames #-}

-- | Strings of characters are normally very inconvenient but useful in
-- backtracking cases. Fully assumes that the alphabet is DNA. Ignores
-- non-triplets at the end.

instance Translation String where
  type TargetType String = String
  type CodonType String = Char
  type AAType String = Char
  translate :: TranslationTable (CodonType String) (AAType String)
-> String -> TargetType String
translate TranslationTable (CodonType String) (AAType String)
tbl =
    let go :: String -> String
go String
xs | [Char
x,Char
y,Char
z]  String
hd = TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate TranslationTable (CodonType String) (AAType String)
TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
tbl (Char -> Char -> Char -> Codon Char
forall c. c -> c -> c -> Codon c
Codon Char
x Char
y Char
z) Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
tl
              | Bool
otherwise = []
              where (String
hd,String
tl) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
3 String
xs
    in  String -> String
String -> TargetType String
go
  {-# Inlinable translate #-}
  translateAllFrames :: TranslationTable (CodonType String) (AAType String)
-> String -> TargetType String
translateAllFrames TranslationTable (CodonType String) (AAType String)
tbl = String -> String -> String
go []
    where go :: String -> String -> String
go String
_     []     = []
          -- first two AA are unknown @?@ since the codon has not been established at this point.
          go []    (Char
x:String
xs) = Char
'?' Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String -> String
go [Char
x] String
xs
          go [Char
p]   (Char
x:String
xs) = Char
'?' Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String -> String
go [Char
p,Char
x] String
xs
          go [Char
p,Char
q] (Char
x:String
xs) = TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate TranslationTable (CodonType String) (AAType String)
TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
tbl (Char -> Char -> Char -> Codon Char
forall c. c -> c -> c -> Codon c
Codon Char
p Char
q Char
x) Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String -> String
go [Char
q,Char
x] String
xs
  {-# Inlinable translateAllFrames #-}

-- | Translation of @BioSequence DNA@. The translation tables assume DNA
-- triplets anyway. Biologically there should be a transcription step in
-- between. Ignores non-triplets at the end.

instance Translation (BioSequence DNA) where
  type TargetType (BioSequence DNA) = BioSequence AA
  type CodonType (BioSequence DNA) = Char
  type AAType (BioSequence DNA) = Char
  translate :: TranslationTable
  (CodonType (BioSequence DNA)) (AAType (BioSequence DNA))
-> BioSequence DNA -> TargetType (BioSequence DNA)
translate TranslationTable
  (CodonType (BioSequence DNA)) (AAType (BioSequence DNA))
tbl (BioSequence ByteString
xs) =
    let go :: Int -> Maybe (Char, Int)
go Int
k = (Char, Int) -> Maybe (Char, Int)
forall a. a -> Maybe a
Just (TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate TranslationTable
  (CodonType (BioSequence DNA)) (AAType (BioSequence DNA))
TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
tbl (Codon Char -> TargetType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Char -> Codon Char
forall c. c -> c -> c -> Codon c
Codon (ByteString -> Int -> Char
BS.index ByteString
xs Int
k) (ByteString -> Int -> Char
BS.index ByteString
xs (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)) (ByteString -> Int -> Char
BS.index ByteString
xs (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)) ,Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
    in  ByteString -> BioSequence AA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence AA)
-> ((ByteString, Maybe Int) -> ByteString)
-> (ByteString, Maybe Int)
-> BioSequence AA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString, Maybe Int) -> ByteString
forall a b. (a, b) -> a
fst ((ByteString, Maybe Int) -> BioSequence AA)
-> (ByteString, Maybe Int) -> BioSequence AA
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Maybe (Char, Int)) -> Int -> (ByteString, Maybe Int)
forall a.
Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a)
BS.unfoldrN (ByteString -> Int
BS.length ByteString
xs Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
3) Int -> Maybe (Char, Int)
go Int
0
  {-# Inlinable translate #-}
  translateAllFrames :: TranslationTable
  (CodonType (BioSequence DNA)) (AAType (BioSequence DNA))
-> BioSequence DNA -> TargetType (BioSequence DNA)
translateAllFrames TranslationTable
  (CodonType (BioSequence DNA)) (AAType (BioSequence DNA))
tbl (BioSequence ByteString
xs) = ByteString -> BioSequence AA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence AA)
-> ((ByteString, Maybe Int) -> ByteString)
-> (ByteString, Maybe Int)
-> BioSequence AA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString, Maybe Int) -> ByteString
forall a b. (a, b) -> a
fst ((ByteString, Maybe Int) -> BioSequence AA)
-> (ByteString, Maybe Int) -> BioSequence AA
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Maybe (Char, Int)) -> Int -> (ByteString, Maybe Int)
forall a.
Int -> (a -> Maybe (Char, a)) -> a -> (ByteString, Maybe a)
BS.unfoldrN (ByteString -> Int
BS.length ByteString
xs) Int -> Maybe (Char, Int)
go Int
0
    where go :: Int -> Maybe (Char, Int)
go Int
0 = (Char, Int) -> Maybe (Char, Int)
forall a. a -> Maybe a
Just (Char
'?', Int
1)
          go Int
1 = (Char, Int) -> Maybe (Char, Int)
forall a. a -> Maybe a
Just (Char
'?', Int
2)
          go Int
k = (Char, Int) -> Maybe (Char, Int)
forall a. a -> Maybe a
Just (TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate TranslationTable
  (CodonType (BioSequence DNA)) (AAType (BioSequence DNA))
TranslationTable (CodonType (Codon Char)) (AAType (Codon Char))
tbl (Codon Char -> TargetType (Codon Char))
-> Codon Char -> TargetType (Codon Char)
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Char -> Codon Char
forall c. c -> c -> c -> Codon c
Codon (ByteString -> Int -> Char
BS.index ByteString
xs (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)) (ByteString -> Int -> Char
BS.index ByteString
xs (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)) (ByteString -> Int -> Char
BS.index ByteString
xs Int
k), Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
  {-# Inlinable translateAllFrames #-}