module Biobase.Types.BioSequence where
import Control.DeepSeq
import Control.Lens
import Data.ByteString.Char8 (ByteString)
import Data.Char (ord,chr,toUpper)
import Data.Data (Data)
import Data.Typeable (Typeable)
import Data.Void
import GHC.Exts (IsString(..))
import GHC.Generics (Generic)
import qualified Data.ByteString.Char8 as BS
import qualified Test.QuickCheck as TQ
import Test.QuickCheck (Arbitrary(..))
import qualified Data.ByteString.UTF8 as BSU
import Biobase.Types.Strand
import qualified Biobase.Types.Index as BTI
newtype SequenceIdentifier (which ∷ k) = SequenceIdentifier { _sequenceIdentifier ∷ ByteString }
deriving (Data, Typeable, Generic, Eq, Ord, Read, Show)
makeWrapped ''SequenceIdentifier
makePrisms ''SequenceIdentifier
instance NFData (SequenceIdentifier w)
instance IsString (SequenceIdentifier w) where
fromString = SequenceIdentifier . BSU.fromString
data RNA
data DNA
data XNA
data AA
newtype BioSequence (which ∷ k) = BioSequence {_bioSequence ∷ ByteString}
deriving (Data, Typeable, Generic, Eq, Ord, Read, Show, Semigroup)
makeWrapped ''BioSequence
makePrisms ''BioSequence
instance NFData (BioSequence w)
type instance Index (BioSequence w) = Int
type instance IxValue (BioSequence w) = Char
instance Ixed (BioSequence w) where
ix k = _BioSequence . ix k . iso (chr . fromIntegral) (fromIntegral . ord)
{-# Inline ix #-}
deriving instance Reversing (BioSequence w)
instance IsString (BioSequence Void) where
fromString = BioSequence . BS.pack
mkRNAseq ∷ ByteString → BioSequence RNA
mkRNAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` acgu = x
| otherwise = 'N'
acgu ∷ String
acgu = "ACGU"
instance IsString (BioSequence RNA) where
fromString = mkRNAseq . BS.pack
instance Arbitrary (BioSequence RNA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ACGU"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
mkDNAseq ∷ ByteString → (BioSequence DNA)
mkDNAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` acgt = x
| otherwise = 'N'
acgt ∷ String
acgt = "ACGT"
instance IsString (BioSequence DNA) where
fromString = mkDNAseq . BS.pack
instance Arbitrary (BioSequence DNA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ACGT"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
mkXNAseq ∷ ByteString → (BioSequence XNA)
mkXNAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` acgtu = x
| otherwise = 'N'
acgtu ∷ String
acgtu = "ACGTU"
instance IsString (BioSequence XNA) where
fromString = mkXNAseq . BS.pack
instance Arbitrary (BioSequence XNA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ACGTU"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
mkAAseq ∷ ByteString → (BioSequence AA)
mkAAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` aas = x
| otherwise = 'X'
aas ∷ String
aas = "ARNDCEQGHILKMFPSTWYVUO"
instance IsString (BioSequence AA) where
fromString = mkAAseq . BS.pack
instance Arbitrary (BioSequence AA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ARNDCEQGHILKMFPSTWYVUO"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
data BioSequenceWindow w ty k = BioSequenceWindow
{ _bswIdentifier ∷ !(SequenceIdentifier w)
, _bswPrefix ∷ !(BioSequence ty)
, _bswSequence ∷ !(BioSequence ty)
, _bswSuffix ∷ !(BioSequence ty)
, _bswStrand ∷ !Strand
, _bswIndex ∷ !(BTI.Index k)
}
deriving (Data, Typeable, Generic, Eq, Ord, Read, Show)
makeLenses ''BioSequenceWindow
instance Reversing (BioSequenceWindow w ty k) where
{-# Inlinable reversing #-}
reversing bsw = bsw
& bswPrefix .~ (bsw^.bswSuffix.reversed)
& bswSuffix .~ (bsw^.bswPrefix.reversed)
& bswSequence .~ (bsw^.bswSequence.reversed)
& bswStrand .~ (bsw^.bswStrand.reversed)
bswFullSequence ∷ Lens' (BioSequenceWindow w ty k) (BioSequence ty)
{-# Inlinable bswFullSequence #-}
bswFullSequence = lens f t
where f bsw = bsw^.bswPrefix <> bsw^.bswSequence <> bsw^.bswSuffix
t bsw (BioSequence s) =
let (pfx,ifxsfx) = BS.splitAt (bsw^.bswPrefix._BioSequence.to BS.length) s
(ifx,sfx) = BS.splitAt (bsw^.bswSequence._BioSequence.to BS.length) ifxsfx
in bsw & bswPrefix._BioSequence .~ pfx
& bswSequence._BioSequence .~ ifx
& bswSuffix._BioSequence .~ sfx
rna2dna ∷ Char → Char
rna2dna = \case
'U' → 'T'
'u' → 't'
x → x
{-# Inline rna2dna #-}
rnaComplement ∷ Char → Char
rnaComplement = \case
'A' → 'U'
'a' → 'u'
'C' → 'G'
'c' → 'g'
'G' → 'C'
'g' → 'c'
'U' → 'A'
'u' → 'a'
x → x
{-# Inline rnaComplement #-}
dna2rna ∷ Char → Char
dna2rna = \case
'T' → 'U'
't' → 'u'
x → x
{-# Inline dna2rna #-}
dnaComplement ∷ Char → Char
dnaComplement = \case
'A' → 'T'
'a' → 't'
'C' → 'G'
'c' → 'g'
'G' → 'C'
'g' → 'c'
'T' → 'A'
't' → 'a'
x → x
{-# Inline dnaComplement #-}
class Transcribe f where
type TranscribeTo f ∷ *
transcribe ∷ Iso' f (TranscribeTo f)
instance Transcribe (BioSequence DNA) where
type TranscribeTo (BioSequence DNA) = (BioSequence RNA)
transcribe = iso (over _BioSequence (BS.map dna2rna)) (over _BioSequence (BS.map rna2dna))
{-# Inline transcribe #-}
instance Transcribe (BioSequence RNA) where
type TranscribeTo (BioSequence RNA) = (BioSequence DNA)
transcribe = from transcribe
{-# Inline transcribe #-}
class Complement f where
complement ∷ Iso' f f
instance Complement (BioSequence DNA) where
complement = iso (over _BioSequence (BS.map dnaComplement)) (over _BioSequence (BS.map dnaComplement))
{-# Inline complement #-}
instance Complement (BioSequence RNA) where
complement = iso (over _BioSequence (BS.map rnaComplement)) (over _BioSequence (BS.map rnaComplement))
{-# Inline complement #-}