module Biobase.AAseq where
import Control.Arrow ((***))
import Data.Ix (Ix(..))
import Data.Primitive.Types
import Data.Tuple (swap)
import GHC.Base (remInt,quotInt)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import qualified Data.Vector.Unboxed as VU
import Data.Array.Repa.ExtShape
import Data.Array.Repa.Index
import Data.Array.Repa.Shape
import Biobase.Primary
newtype AA = AA { unAA :: Int }
deriving (Eq,Ord,Ix)
(aStop:aA:aB:aC:aD:aE:aF:aG:aH:aI:aK:aL:aM:aN:aP:aQ:aR:aS:aT:aV:aW:aX:aY:aZ:aUndefined:_) = map AA [0..]
aaRange = [aStop .. pred aUndefined]
toAA :: Char -> AA
toAA ((`lookup` charAAList) -> Just aa) = aa
toAA c = error $ "unknown AA: " ++ show c
fromAA :: AA -> Char
fromAA ((`lookup` aACharList) -> Just c) = c
fromAA (AA aa) = error $ "unknown AA: " ++ (show aa)
class MkAAseq x where
mkAAseq :: x -> VU.Vector AA
type AAseq = VU.Vector AA
primaryToAAseq :: Primary -> AAseq
primaryToAAseq = mkAAseq . go where
go (VU.length -> 0) = []
go (VU.splitAt 3 -> (VU.toList -> hs,ts)) = case M.lookup hs nucCodonTable of
Just aa -> aa : go ts
_ -> error $ "primaryToAAseq: " ++ show (hs,ts)
charAAList =
[ ('/',aStop)
, ('A',aA)
, ('B',aB)
, ('C',aC)
, ('D',aD)
, ('E',aE)
, ('F',aF)
, ('G',aG)
, ('H',aH)
, ('I',aI)
, ('K',aK)
, ('L',aL)
, ('M',aM)
, ('N',aN)
, ('P',aP)
, ('Q',aQ)
, ('R',aR)
, ('S',aS)
, ('T',aT)
, ('V',aV)
, ('W',aW)
, ('X',aX)
, ('Y',aY)
, ('Z',aZ)
]
aACharList = map swap charAAList
codonTable = M.fromList
[ ("aaa",'K')
, ("aac",'N')
, ("aag",'K')
, ("aat",'N')
, ("aca",'T')
, ("acc",'T')
, ("acg",'T')
, ("act",'T')
, ("aga",'R')
, ("agc",'S')
, ("agg",'R')
, ("agt",'S')
, ("ata",'I')
, ("atc",'I')
, ("atg",'M')
, ("att",'I')
, ("caa",'Q')
, ("cac",'H')
, ("cag",'Q')
, ("cat",'H')
, ("cca",'P')
, ("ccc",'P')
, ("ccg",'P')
, ("cct",'P')
, ("cga",'R')
, ("cgc",'R')
, ("cgg",'R')
, ("cgt",'R')
, ("cta",'L')
, ("ctc",'L')
, ("ctg",'L')
, ("ctt",'L')
, ("gaa",'E')
, ("gac",'D')
, ("gag",'E')
, ("gat",'D')
, ("gca",'A')
, ("gcc",'A')
, ("gcg",'A')
, ("gct",'A')
, ("gga",'G')
, ("ggc",'G')
, ("ggg",'G')
, ("ggt",'G')
, ("gta",'V')
, ("gtc",'V')
, ("gtg",'V')
, ("gtt",'V')
, ("taa",'/')
, ("tac",'Y')
, ("tag",'/')
, ("tat",'Y')
, ("tca",'S')
, ("tcc",'S')
, ("tcg",'S')
, ("tct",'S')
, ("tga",'/')
, ("tgc",'C')
, ("tgg",'W')
, ("tgt",'C')
, ("tta",'L')
, ("ttc",'F')
, ("ttg",'L')
, ("ttt",'F')
]
nucCodonTable = M.fromList . map (map mkNuc *** toAA) . M.assocs $ codonTable
instance Show AA where
show n = [fromAA n]
instance Read AA where
readsPrec p [] = []
readsPrec p (x:xs)
| x==' ' = readsPrec p xs
| Just aa <- x `lookup` charAAList = [(aa,xs)]
| otherwise = []
deriving instance Prim AA
deriving instance VGM.MVector VU.MVector AA
deriving instance VG.Vector VU.Vector AA
deriving instance VU.Unbox AA
instance (Shape sh,Show sh) => Shape (sh :. AA) where
rank (sh:._) = rank sh + 1
zeroDim = zeroDim:.AA 0
unitDim = unitDim:.AA 1
intersectDim (sh1:.n1) (sh2:.n2) = intersectDim sh1 sh2 :. min n1 n2
addDim (sh1:.AA n1) (sh2:.AA n2) = addDim sh1 sh2 :. AA (n1+n2)
size (sh1:.AA n) = size sh1 * n
sizeIsValid (sh1:.AA n) = sizeIsValid (sh1:.n)
toIndex (sh1:.AA sh2) (sh1':.AA sh2') = toIndex (sh1:.sh2) (sh1':.sh2')
fromIndex (ds:.AA d) n = fromIndex ds (n `quotInt` d) :. AA r where
r | rank ds == 0 = n
| otherwise = n `remInt` d
inShapeRange (sh1:.n1) (sh2:.n2) (idx:.i) = i>=n1 && i<n2 && inShapeRange sh1 sh2 idx
listOfShape (sh:.AA n) = n : listOfShape sh
shapeOfList xx = case xx of
[] -> error "empty list in shapeOfList/Primary"
x:xs -> shapeOfList xs :. AA x
deepSeq (sh:.n) x = deepSeq sh (n `seq` x)
instance (Shape sh, Show sh, ExtShape sh) => ExtShape (sh :. AA) where
subDim (sh1:.AA n1) (sh2:.AA n2) = subDim sh1 sh2 :. AA (n1n2)
rangeList (sh1:.AA n1) (sh2:.AA n2) = [ sh:.AA n | sh <- rangeList sh1 sh2, n <- [n1 .. (n1+n2)]]
instance Enum AA where
toEnum = AA
fromEnum = unAA
instance MkAAseq [Char] where
mkAAseq = VU.fromList . map toAA
instance MkAAseq [AA] where
mkAAseq = VU.fromList
instance MkAAseq (VU.Vector Char) where
mkAAseq = VU.map toAA
instance MkAAseq BS.ByteString where
mkAAseq = VU.fromList . map toAA . BS.unpack
instance MkAAseq BSL.ByteString where
mkAAseq = VU.fromList . map toAA . BSL.unpack
instance MkAAseq T.Text where
mkAAseq = VU.fromList . map toAA . T.unpack