-- | Abstraction over bio sequences encoded as one-ascii character as one
-- symbol. We phantom-type the exact bio-sequence type and provide type classes
-- that act on known types.
--
-- Unknown bio sequences should be tagged with @Void@.
--
-- TODO give (lens) usage examples

module Biobase.Types.BioSequence where

import           Control.DeepSeq
import           Control.Lens
import           Data.ByteString.Char8 (ByteString)
import           Data.Char (ord,chr,toUpper)
import           Data.Data (Data)
import           Data.Hashable
import           Data.Typeable (Typeable)
import           Data.Void
import           GHC.Exts (IsString(..))
import           GHC.Generics (Generic)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.UTF8 as BSU
import qualified Streaming.Prelude as SP
import qualified Streaming as S
import qualified Streaming.Internal as SI
import qualified Test.QuickCheck as TQ
import           Test.QuickCheck (Arbitrary(..))
import Data.Coerce
import Debug.Trace

import Biobase.Types.Strand
import qualified Biobase.Types.Index as BTI
import Data.Info



-- * Lens operations on biosequences

{-
class BioSeqLenses b where
  -- | Lens into the first @k@ characters.
  bsTake :: Int -> Lens' b b
  -- | Lens into the last @k@ characters
  bsTakeEnd :: Int -> Lens' b b
  -- | Lens into all but the first @k@ characters
  bsDrop :: Int -> Lens' b b
  -- | Lens into all but the last @k@ characters
  bsDropEnd :: Int -> Lens' b b
  -- | Lens that splits at a position
  bsSplitAt :: Int -> Lens' b (b,b)
  -- | length of this biosequence
  bsLength :: Getter b Int
-}

-- * Sequence identifiers

newtype SequenceIdentifier (which :: k) = SequenceIdentifier { SequenceIdentifier which -> ByteString
_sequenceIdentifier :: ByteString }
  deriving stock (Typeable (SequenceIdentifier which)
DataType
Constr
Typeable (SequenceIdentifier which)
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g)
    -> SequenceIdentifier which
    -> c (SequenceIdentifier which))
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c (SequenceIdentifier which))
-> (SequenceIdentifier which -> Constr)
-> (SequenceIdentifier which -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d))
    -> Maybe (c (SequenceIdentifier which)))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e))
    -> Maybe (c (SequenceIdentifier which)))
-> ((forall b. Data b => b -> b)
    -> SequenceIdentifier which -> SequenceIdentifier which)
-> (forall r r'.
    (r -> r' -> r)
    -> r
    -> (forall d. Data d => d -> r')
    -> SequenceIdentifier which
    -> r)
-> (forall r r'.
    (r' -> r -> r)
    -> r
    -> (forall d. Data d => d -> r')
    -> SequenceIdentifier which
    -> r)
-> (forall u.
    (forall d. Data d => d -> u) -> SequenceIdentifier which -> [u])
-> (forall u.
    Int
    -> (forall d. Data d => d -> u) -> SequenceIdentifier which -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d)
    -> SequenceIdentifier which -> m (SequenceIdentifier which))
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d)
    -> SequenceIdentifier which -> m (SequenceIdentifier which))
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d)
    -> SequenceIdentifier which -> m (SequenceIdentifier which))
-> Data (SequenceIdentifier which)
SequenceIdentifier which -> DataType
SequenceIdentifier which -> Constr
(forall b. Data b => b -> b)
-> SequenceIdentifier which -> SequenceIdentifier which
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> SequenceIdentifier which
-> c (SequenceIdentifier which)
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (SequenceIdentifier which)
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u.
Int
-> (forall d. Data d => d -> u) -> SequenceIdentifier which -> u
forall u.
(forall d. Data d => d -> u) -> SequenceIdentifier which -> [u]
forall k (which :: k).
(Typeable which, Typeable k) =>
Typeable (SequenceIdentifier which)
forall k (which :: k).
(Typeable which, Typeable k) =>
SequenceIdentifier which -> DataType
forall k (which :: k).
(Typeable which, Typeable k) =>
SequenceIdentifier which -> Constr
forall k (which :: k).
(Typeable which, Typeable k) =>
(forall b. Data b => b -> b)
-> SequenceIdentifier which -> SequenceIdentifier which
forall k (which :: k) u.
(Typeable which, Typeable k) =>
Int
-> (forall d. Data d => d -> u) -> SequenceIdentifier which -> u
forall k (which :: k) u.
(Typeable which, Typeable k) =>
(forall d. Data d => d -> u) -> SequenceIdentifier which -> [u]
forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r -> r' -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r' -> r -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, Monad m) =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, MonadPlus m) =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (SequenceIdentifier which)
forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> SequenceIdentifier which
-> c (SequenceIdentifier which)
forall k (which :: k) (t :: * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d. Data d => c (t d))
-> Maybe (c (SequenceIdentifier which))
forall k (which :: k) (t :: * -> * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (SequenceIdentifier which))
forall r r'.
(r -> r' -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
forall r r'.
(r' -> r -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (SequenceIdentifier which)
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> SequenceIdentifier which
-> c (SequenceIdentifier which)
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d))
-> Maybe (c (SequenceIdentifier which))
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (SequenceIdentifier which))
$cSequenceIdentifier :: Constr
$tSequenceIdentifier :: DataType
gmapMo :: (forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
$cgmapMo :: forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, MonadPlus m) =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
gmapMp :: (forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
$cgmapMp :: forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, MonadPlus m) =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
gmapM :: (forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
$cgmapM :: forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, Monad m) =>
(forall d. Data d => d -> m d)
-> SequenceIdentifier which -> m (SequenceIdentifier which)
gmapQi :: Int
-> (forall d. Data d => d -> u) -> SequenceIdentifier which -> u
$cgmapQi :: forall k (which :: k) u.
(Typeable which, Typeable k) =>
Int
-> (forall d. Data d => d -> u) -> SequenceIdentifier which -> u
gmapQ :: (forall d. Data d => d -> u) -> SequenceIdentifier which -> [u]
$cgmapQ :: forall k (which :: k) u.
(Typeable which, Typeable k) =>
(forall d. Data d => d -> u) -> SequenceIdentifier which -> [u]
gmapQr :: (r' -> r -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
$cgmapQr :: forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r' -> r -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
gmapQl :: (r -> r' -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
$cgmapQl :: forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r -> r' -> r)
-> r
-> (forall d. Data d => d -> r')
-> SequenceIdentifier which
-> r
gmapT :: (forall b. Data b => b -> b)
-> SequenceIdentifier which -> SequenceIdentifier which
$cgmapT :: forall k (which :: k).
(Typeable which, Typeable k) =>
(forall b. Data b => b -> b)
-> SequenceIdentifier which -> SequenceIdentifier which
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (SequenceIdentifier which))
$cdataCast2 :: forall k (which :: k) (t :: * -> * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (SequenceIdentifier which))
dataCast1 :: (forall d. Data d => c (t d))
-> Maybe (c (SequenceIdentifier which))
$cdataCast1 :: forall k (which :: k) (t :: * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d. Data d => c (t d))
-> Maybe (c (SequenceIdentifier which))
dataTypeOf :: SequenceIdentifier which -> DataType
$cdataTypeOf :: forall k (which :: k).
(Typeable which, Typeable k) =>
SequenceIdentifier which -> DataType
toConstr :: SequenceIdentifier which -> Constr
$ctoConstr :: forall k (which :: k).
(Typeable which, Typeable k) =>
SequenceIdentifier which -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (SequenceIdentifier which)
$cgunfold :: forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (SequenceIdentifier which)
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> SequenceIdentifier which
-> c (SequenceIdentifier which)
$cgfoldl :: forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> SequenceIdentifier which
-> c (SequenceIdentifier which)
$cp1Data :: forall k (which :: k).
(Typeable which, Typeable k) =>
Typeable (SequenceIdentifier which)
Data, Typeable, (forall x.
 SequenceIdentifier which -> Rep (SequenceIdentifier which) x)
-> (forall x.
    Rep (SequenceIdentifier which) x -> SequenceIdentifier which)
-> Generic (SequenceIdentifier which)
forall x.
Rep (SequenceIdentifier which) x -> SequenceIdentifier which
forall x.
SequenceIdentifier which -> Rep (SequenceIdentifier which) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall k (which :: k) x.
Rep (SequenceIdentifier which) x -> SequenceIdentifier which
forall k (which :: k) x.
SequenceIdentifier which -> Rep (SequenceIdentifier which) x
$cto :: forall k (which :: k) x.
Rep (SequenceIdentifier which) x -> SequenceIdentifier which
$cfrom :: forall k (which :: k) x.
SequenceIdentifier which -> Rep (SequenceIdentifier which) x
Generic, SequenceIdentifier which -> SequenceIdentifier which -> Bool
(SequenceIdentifier which -> SequenceIdentifier which -> Bool)
-> (SequenceIdentifier which -> SequenceIdentifier which -> Bool)
-> Eq (SequenceIdentifier which)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
/= :: SequenceIdentifier which -> SequenceIdentifier which -> Bool
$c/= :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
== :: SequenceIdentifier which -> SequenceIdentifier which -> Bool
$c== :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
Eq, Eq (SequenceIdentifier which)
Eq (SequenceIdentifier which)
-> (SequenceIdentifier which
    -> SequenceIdentifier which -> Ordering)
-> (SequenceIdentifier which -> SequenceIdentifier which -> Bool)
-> (SequenceIdentifier which -> SequenceIdentifier which -> Bool)
-> (SequenceIdentifier which -> SequenceIdentifier which -> Bool)
-> (SequenceIdentifier which -> SequenceIdentifier which -> Bool)
-> (SequenceIdentifier which
    -> SequenceIdentifier which -> SequenceIdentifier which)
-> (SequenceIdentifier which
    -> SequenceIdentifier which -> SequenceIdentifier which)
-> Ord (SequenceIdentifier which)
SequenceIdentifier which -> SequenceIdentifier which -> Bool
SequenceIdentifier which -> SequenceIdentifier which -> Ordering
SequenceIdentifier which
-> SequenceIdentifier which -> SequenceIdentifier which
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall k (which :: k). Eq (SequenceIdentifier which)
forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Ordering
forall k (which :: k).
SequenceIdentifier which
-> SequenceIdentifier which -> SequenceIdentifier which
min :: SequenceIdentifier which
-> SequenceIdentifier which -> SequenceIdentifier which
$cmin :: forall k (which :: k).
SequenceIdentifier which
-> SequenceIdentifier which -> SequenceIdentifier which
max :: SequenceIdentifier which
-> SequenceIdentifier which -> SequenceIdentifier which
$cmax :: forall k (which :: k).
SequenceIdentifier which
-> SequenceIdentifier which -> SequenceIdentifier which
>= :: SequenceIdentifier which -> SequenceIdentifier which -> Bool
$c>= :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
> :: SequenceIdentifier which -> SequenceIdentifier which -> Bool
$c> :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
<= :: SequenceIdentifier which -> SequenceIdentifier which -> Bool
$c<= :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
< :: SequenceIdentifier which -> SequenceIdentifier which -> Bool
$c< :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Bool
compare :: SequenceIdentifier which -> SequenceIdentifier which -> Ordering
$ccompare :: forall k (which :: k).
SequenceIdentifier which -> SequenceIdentifier which -> Ordering
$cp1Ord :: forall k (which :: k). Eq (SequenceIdentifier which)
Ord, ReadPrec [SequenceIdentifier which]
ReadPrec (SequenceIdentifier which)
Int -> ReadS (SequenceIdentifier which)
ReadS [SequenceIdentifier which]
(Int -> ReadS (SequenceIdentifier which))
-> ReadS [SequenceIdentifier which]
-> ReadPrec (SequenceIdentifier which)
-> ReadPrec [SequenceIdentifier which]
-> Read (SequenceIdentifier which)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall k (which :: k). ReadPrec [SequenceIdentifier which]
forall k (which :: k). ReadPrec (SequenceIdentifier which)
forall k (which :: k). Int -> ReadS (SequenceIdentifier which)
forall k (which :: k). ReadS [SequenceIdentifier which]
readListPrec :: ReadPrec [SequenceIdentifier which]
$creadListPrec :: forall k (which :: k). ReadPrec [SequenceIdentifier which]
readPrec :: ReadPrec (SequenceIdentifier which)
$creadPrec :: forall k (which :: k). ReadPrec (SequenceIdentifier which)
readList :: ReadS [SequenceIdentifier which]
$creadList :: forall k (which :: k). ReadS [SequenceIdentifier which]
readsPrec :: Int -> ReadS (SequenceIdentifier which)
$creadsPrec :: forall k (which :: k). Int -> ReadS (SequenceIdentifier which)
Read, Int -> SequenceIdentifier which -> ShowS
[SequenceIdentifier which] -> ShowS
SequenceIdentifier which -> String
(Int -> SequenceIdentifier which -> ShowS)
-> (SequenceIdentifier which -> String)
-> ([SequenceIdentifier which] -> ShowS)
-> Show (SequenceIdentifier which)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k (which :: k). Int -> SequenceIdentifier which -> ShowS
forall k (which :: k). [SequenceIdentifier which] -> ShowS
forall k (which :: k). SequenceIdentifier which -> String
showList :: [SequenceIdentifier which] -> ShowS
$cshowList :: forall k (which :: k). [SequenceIdentifier which] -> ShowS
show :: SequenceIdentifier which -> String
$cshow :: forall k (which :: k). SequenceIdentifier which -> String
showsPrec :: Int -> SequenceIdentifier which -> ShowS
$cshowsPrec :: forall k (which :: k). Int -> SequenceIdentifier which -> ShowS
Show)
makeWrapped ''SequenceIdentifier
makePrisms ''SequenceIdentifier

instance NFData (SequenceIdentifier w)

instance IsString (SequenceIdentifier w) where
  fromString :: String -> SequenceIdentifier w
fromString = ByteString -> SequenceIdentifier w
forall k (which :: k). ByteString -> SequenceIdentifier which
SequenceIdentifier (ByteString -> SequenceIdentifier w)
-> (String -> ByteString) -> String -> SequenceIdentifier w
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BSU.fromString



-- * Bio-Sequences

data RNA

data DNA

data XNA

data AA



-- |
-- TODO provide extended annotation information on biosequences, too!

newtype BioSequence (which :: k) = BioSequence {BioSequence which -> ByteString
_bioSequence :: ByteString}
  deriving stock (Typeable (BioSequence which)
DataType
Constr
Typeable (BioSequence which)
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g)
    -> BioSequence which
    -> c (BioSequence which))
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c (BioSequence which))
-> (BioSequence which -> Constr)
-> (BioSequence which -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c (BioSequence which)))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e))
    -> Maybe (c (BioSequence which)))
-> ((forall b. Data b => b -> b)
    -> BioSequence which -> BioSequence which)
-> (forall r r'.
    (r -> r' -> r)
    -> r -> (forall d. Data d => d -> r') -> BioSequence which -> r)
-> (forall r r'.
    (r' -> r -> r)
    -> r -> (forall d. Data d => d -> r') -> BioSequence which -> r)
-> (forall u.
    (forall d. Data d => d -> u) -> BioSequence which -> [u])
-> (forall u.
    Int -> (forall d. Data d => d -> u) -> BioSequence which -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d)
    -> BioSequence which -> m (BioSequence which))
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d)
    -> BioSequence which -> m (BioSequence which))
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d)
    -> BioSequence which -> m (BioSequence which))
-> Data (BioSequence which)
BioSequence which -> DataType
BioSequence which -> Constr
(forall b. Data b => b -> b)
-> BioSequence which -> BioSequence which
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> BioSequence which
-> c (BioSequence which)
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (BioSequence which)
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u.
Int -> (forall d. Data d => d -> u) -> BioSequence which -> u
forall u. (forall d. Data d => d -> u) -> BioSequence which -> [u]
forall k (which :: k).
(Typeable which, Typeable k) =>
Typeable (BioSequence which)
forall k (which :: k).
(Typeable which, Typeable k) =>
BioSequence which -> DataType
forall k (which :: k).
(Typeable which, Typeable k) =>
BioSequence which -> Constr
forall k (which :: k).
(Typeable which, Typeable k) =>
(forall b. Data b => b -> b)
-> BioSequence which -> BioSequence which
forall k (which :: k) u.
(Typeable which, Typeable k) =>
Int -> (forall d. Data d => d -> u) -> BioSequence which -> u
forall k (which :: k) u.
(Typeable which, Typeable k) =>
(forall d. Data d => d -> u) -> BioSequence which -> [u]
forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, Monad m) =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, MonadPlus m) =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (BioSequence which)
forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> BioSequence which
-> c (BioSequence which)
forall k (which :: k) (t :: * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d. Data d => c (t d)) -> Maybe (c (BioSequence which))
forall k (which :: k) (t :: * -> * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (BioSequence which))
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (BioSequence which)
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> BioSequence which
-> c (BioSequence which)
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c (BioSequence which))
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (BioSequence which))
$cBioSequence :: Constr
$tBioSequence :: DataType
gmapMo :: (forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
$cgmapMo :: forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, MonadPlus m) =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
gmapMp :: (forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
$cgmapMp :: forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, MonadPlus m) =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
gmapM :: (forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
$cgmapM :: forall k (which :: k) (m :: * -> *).
(Typeable which, Typeable k, Monad m) =>
(forall d. Data d => d -> m d)
-> BioSequence which -> m (BioSequence which)
gmapQi :: Int -> (forall d. Data d => d -> u) -> BioSequence which -> u
$cgmapQi :: forall k (which :: k) u.
(Typeable which, Typeable k) =>
Int -> (forall d. Data d => d -> u) -> BioSequence which -> u
gmapQ :: (forall d. Data d => d -> u) -> BioSequence which -> [u]
$cgmapQ :: forall k (which :: k) u.
(Typeable which, Typeable k) =>
(forall d. Data d => d -> u) -> BioSequence which -> [u]
gmapQr :: (r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
$cgmapQr :: forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
gmapQl :: (r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
$cgmapQl :: forall k (which :: k) r r'.
(Typeable which, Typeable k) =>
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> BioSequence which -> r
gmapT :: (forall b. Data b => b -> b)
-> BioSequence which -> BioSequence which
$cgmapT :: forall k (which :: k).
(Typeable which, Typeable k) =>
(forall b. Data b => b -> b)
-> BioSequence which -> BioSequence which
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (BioSequence which))
$cdataCast2 :: forall k (which :: k) (t :: * -> * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c (BioSequence which))
dataCast1 :: (forall d. Data d => c (t d)) -> Maybe (c (BioSequence which))
$cdataCast1 :: forall k (which :: k) (t :: * -> *) (c :: * -> *).
(Typeable which, Typeable k, Typeable t) =>
(forall d. Data d => c (t d)) -> Maybe (c (BioSequence which))
dataTypeOf :: BioSequence which -> DataType
$cdataTypeOf :: forall k (which :: k).
(Typeable which, Typeable k) =>
BioSequence which -> DataType
toConstr :: BioSequence which -> Constr
$ctoConstr :: forall k (which :: k).
(Typeable which, Typeable k) =>
BioSequence which -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (BioSequence which)
$cgunfold :: forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (BioSequence which)
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> BioSequence which
-> c (BioSequence which)
$cgfoldl :: forall k (which :: k) (c :: * -> *).
(Typeable which, Typeable k) =>
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g)
-> BioSequence which
-> c (BioSequence which)
$cp1Data :: forall k (which :: k).
(Typeable which, Typeable k) =>
Typeable (BioSequence which)
Data, Typeable, (forall x. BioSequence which -> Rep (BioSequence which) x)
-> (forall x. Rep (BioSequence which) x -> BioSequence which)
-> Generic (BioSequence which)
forall x. Rep (BioSequence which) x -> BioSequence which
forall x. BioSequence which -> Rep (BioSequence which) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall k (which :: k) x.
Rep (BioSequence which) x -> BioSequence which
forall k (which :: k) x.
BioSequence which -> Rep (BioSequence which) x
$cto :: forall k (which :: k) x.
Rep (BioSequence which) x -> BioSequence which
$cfrom :: forall k (which :: k) x.
BioSequence which -> Rep (BioSequence which) x
Generic, BioSequence which -> BioSequence which -> Bool
(BioSequence which -> BioSequence which -> Bool)
-> (BioSequence which -> BioSequence which -> Bool)
-> Eq (BioSequence which)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
/= :: BioSequence which -> BioSequence which -> Bool
$c/= :: forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
== :: BioSequence which -> BioSequence which -> Bool
$c== :: forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
Eq, Eq (BioSequence which)
Eq (BioSequence which)
-> (BioSequence which -> BioSequence which -> Ordering)
-> (BioSequence which -> BioSequence which -> Bool)
-> (BioSequence which -> BioSequence which -> Bool)
-> (BioSequence which -> BioSequence which -> Bool)
-> (BioSequence which -> BioSequence which -> Bool)
-> (BioSequence which -> BioSequence which -> BioSequence which)
-> (BioSequence which -> BioSequence which -> BioSequence which)
-> Ord (BioSequence which)
BioSequence which -> BioSequence which -> Bool
BioSequence which -> BioSequence which -> Ordering
BioSequence which -> BioSequence which -> BioSequence which
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall k (which :: k). Eq (BioSequence which)
forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
forall k (which :: k).
BioSequence which -> BioSequence which -> Ordering
forall k (which :: k).
BioSequence which -> BioSequence which -> BioSequence which
min :: BioSequence which -> BioSequence which -> BioSequence which
$cmin :: forall k (which :: k).
BioSequence which -> BioSequence which -> BioSequence which
max :: BioSequence which -> BioSequence which -> BioSequence which
$cmax :: forall k (which :: k).
BioSequence which -> BioSequence which -> BioSequence which
>= :: BioSequence which -> BioSequence which -> Bool
$c>= :: forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
> :: BioSequence which -> BioSequence which -> Bool
$c> :: forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
<= :: BioSequence which -> BioSequence which -> Bool
$c<= :: forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
< :: BioSequence which -> BioSequence which -> Bool
$c< :: forall k (which :: k).
BioSequence which -> BioSequence which -> Bool
compare :: BioSequence which -> BioSequence which -> Ordering
$ccompare :: forall k (which :: k).
BioSequence which -> BioSequence which -> Ordering
$cp1Ord :: forall k (which :: k). Eq (BioSequence which)
Ord, ReadPrec [BioSequence which]
ReadPrec (BioSequence which)
Int -> ReadS (BioSequence which)
ReadS [BioSequence which]
(Int -> ReadS (BioSequence which))
-> ReadS [BioSequence which]
-> ReadPrec (BioSequence which)
-> ReadPrec [BioSequence which]
-> Read (BioSequence which)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall k (which :: k). ReadPrec [BioSequence which]
forall k (which :: k). ReadPrec (BioSequence which)
forall k (which :: k). Int -> ReadS (BioSequence which)
forall k (which :: k). ReadS [BioSequence which]
readListPrec :: ReadPrec [BioSequence which]
$creadListPrec :: forall k (which :: k). ReadPrec [BioSequence which]
readPrec :: ReadPrec (BioSequence which)
$creadPrec :: forall k (which :: k). ReadPrec (BioSequence which)
readList :: ReadS [BioSequence which]
$creadList :: forall k (which :: k). ReadS [BioSequence which]
readsPrec :: Int -> ReadS (BioSequence which)
$creadsPrec :: forall k (which :: k). Int -> ReadS (BioSequence which)
Read, Int -> BioSequence which -> ShowS
[BioSequence which] -> ShowS
BioSequence which -> String
(Int -> BioSequence which -> ShowS)
-> (BioSequence which -> String)
-> ([BioSequence which] -> ShowS)
-> Show (BioSequence which)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k (which :: k). Int -> BioSequence which -> ShowS
forall k (which :: k). [BioSequence which] -> ShowS
forall k (which :: k). BioSequence which -> String
showList :: [BioSequence which] -> ShowS
$cshowList :: forall k (which :: k). [BioSequence which] -> ShowS
show :: BioSequence which -> String
$cshow :: forall k (which :: k). BioSequence which -> String
showsPrec :: Int -> BioSequence which -> ShowS
$cshowsPrec :: forall k (which :: k). Int -> BioSequence which -> ShowS
Show)
  deriving newtype (b -> BioSequence which -> BioSequence which
NonEmpty (BioSequence which) -> BioSequence which
BioSequence which -> BioSequence which -> BioSequence which
(BioSequence which -> BioSequence which -> BioSequence which)
-> (NonEmpty (BioSequence which) -> BioSequence which)
-> (forall b.
    Integral b =>
    b -> BioSequence which -> BioSequence which)
-> Semigroup (BioSequence which)
forall b. Integral b => b -> BioSequence which -> BioSequence which
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
forall k (which :: k).
NonEmpty (BioSequence which) -> BioSequence which
forall k (which :: k).
BioSequence which -> BioSequence which -> BioSequence which
forall k (which :: k) b.
Integral b =>
b -> BioSequence which -> BioSequence which
stimes :: b -> BioSequence which -> BioSequence which
$cstimes :: forall k (which :: k) b.
Integral b =>
b -> BioSequence which -> BioSequence which
sconcat :: NonEmpty (BioSequence which) -> BioSequence which
$csconcat :: forall k (which :: k).
NonEmpty (BioSequence which) -> BioSequence which
<> :: BioSequence which -> BioSequence which -> BioSequence which
$c<> :: forall k (which :: k).
BioSequence which -> BioSequence which -> BioSequence which
Semigroup)
makeWrapped ''BioSequence
makePrisms ''BioSequence
makeLenses ''BioSequence

instance Hashable (BioSequence (which :: k))

instance NFData (BioSequence w)

type instance Index (BioSequence w) = Int

type instance IxValue (BioSequence w) = Char

instance Ixed (BioSequence w) where
  ix :: Index (BioSequence w)
-> Traversal' (BioSequence w) (IxValue (BioSequence w))
ix Index (BioSequence w)
k = (ByteString -> f ByteString) -> BioSequence w -> f (BioSequence w)
forall k (which :: k) k (which :: k).
Iso (BioSequence which) (BioSequence which) ByteString ByteString
_BioSequence ((ByteString -> f ByteString)
 -> BioSequence w -> f (BioSequence w))
-> ((Char -> f Char) -> ByteString -> f ByteString)
-> (Char -> f Char)
-> BioSequence w
-> f (BioSequence w)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Index ByteString -> Traversal' ByteString (IxValue ByteString)
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index ByteString
Index (BioSequence w)
k ((Word8 -> f Word8) -> ByteString -> f ByteString)
-> ((Char -> f Char) -> Word8 -> f Word8)
-> (Char -> f Char)
-> ByteString
-> f ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> Char) -> (Char -> Word8) -> Iso Word8 Word8 Char Char
forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b
iso (Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord)
  {-# Inline ix #-}

deriving newtype instance Reversing (BioSequence w)

instance IsString (BioSequence Void) where
  fromString :: String -> BioSequence Void
fromString = ByteString -> BioSequence Void
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence Void)
-> (String -> ByteString) -> String -> BioSequence Void
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.pack

instance Info (BioSequence w) where
  info :: BioSequence w -> String
info (BioSequence ByteString
s)
    | ByteString -> Int
BS.length ByteString
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
18 = ByteString -> String
BS.unpack ByteString
s
    | Bool
otherwise         = ByteString -> String
BS.unpack ByteString
h String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
".." String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
BS.unpack ByteString
l
    where (ByteString
h,ByteString
tl) = Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
9 ByteString
s
          (ByteString
_,ByteString
l ) = Int -> ByteString -> (ByteString, ByteString)
BS.splitAt (ByteString -> Int
BS.length ByteString
tlInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
9) ByteString
tl

{-
instance BioSeqLenses (BioSequence w) where
  {-# Inline bsTake #-}
  bsTake k = lens (over _BioSequence (BS.take k)) (\old new -> new <> over _BioSequence (BS.drop k) old)
  {-# Inline bsTakeEnd #-}
  bsTakeEnd k = lens (over _BioSequence (\s -> BS.drop (BS.length s -k) s)) (\old new -> over _BioSequence (\s -> BS.take (BS.length s-k) s) old <> new)
  {-# Inline bsLength #-}
  bsLength = _BioSequence.to BS.length
  {-# Inline bsDrop #-}
  bsDrop k = lens (over _BioSequence (BS.drop k)) (\old new -> over _BioSequence (BS.take k) old <> new)
  {-# Inline bsDropEnd #-}
  bsDropEnd k = lens (over _BioSequence (\s -> BS.take (BS.length s -k) s)) (\old new -> over _BioSequence (\s -> BS.take (BS.length s-k) s) old <> new)
  {-# Inline bsSplitAt #-}
  bsSplitAt k = lens (\b -> (view (bsTake k) b, view (bsDrop k) b)) (\old (h,t) -> h <> t)
-}



-- * RNA

-- |
--
-- TODO write that converts explicitly

mkRNAseq :: ByteString -> BioSequence RNA
mkRNAseq :: ByteString -> BioSequence RNA
mkRNAseq = ByteString -> BioSequence RNA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence RNA)
-> (ByteString -> ByteString) -> ByteString -> BioSequence RNA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
go (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
toUpper
  where go :: Char -> Char
go Char
x | Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
acgu = Char
x
             | Bool
otherwise     = Char
'N'
        acgu :: String
        acgu :: String
acgu = String
"ACGU"

instance IsString (BioSequence RNA) where
  fromString :: String -> BioSequence RNA
fromString = ByteString -> BioSequence RNA
mkRNAseq (ByteString -> BioSequence RNA)
-> (String -> ByteString) -> String -> BioSequence RNA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.pack

instance Arbitrary (BioSequence RNA) where
  arbitrary :: Gen (BioSequence RNA)
arbitrary = do
    Int
k  (Int, Int) -> Gen Int
forall a. Random a => (a, a) -> Gen a
TQ.choose (Int
0,Int
30)
    String
xs  Int -> Gen Char -> Gen String
forall a. Int -> Gen a -> Gen [a]
TQ.vectorOf Int
k (Gen Char -> Gen String) -> Gen Char -> Gen String
forall a b. (a -> b) -> a -> b
$ String -> Gen Char
forall a. [a] -> Gen a
TQ.elements String
"ACGU"
    BioSequence RNA -> Gen (BioSequence RNA)
forall (m :: * -> *) a. Monad m => a -> m a
return (BioSequence RNA -> Gen (BioSequence RNA))
-> (ByteString -> BioSequence RNA)
-> ByteString
-> Gen (BioSequence RNA)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BioSequence RNA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> Gen (BioSequence RNA))
-> ByteString -> Gen (BioSequence RNA)
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack String
xs
  shrink :: BioSequence RNA -> [BioSequence RNA]
shrink = BioSequence RNA -> [BioSequence RNA]
forall k k (which :: k) (which :: k).
BioSequence which -> [BioSequence which]
shrinkBioSequence

shrinkBioSequence :: BioSequence which -> [BioSequence which]
shrinkBioSequence (BioSequence ByteString
b) = (ByteString -> BioSequence which)
-> [ByteString] -> [BioSequence which]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BioSequence which
forall k (which :: k). ByteString -> BioSequence which
BioSequence
  [ let (ByteString
l,Int -> ByteString -> ByteString
BS.drop Int
1 -> ByteString
r) = Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
k ByteString
b
    in ByteString -> ByteString -> ByteString
BS.append ByteString
l ByteString
r | Int
k <- [Int
0 .. ByteString -> Int
BS.length ByteString
b Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] ]


-- * DNA

mkDNAseq :: ByteString -> (BioSequence DNA)
mkDNAseq :: ByteString -> BioSequence DNA
mkDNAseq = ByteString -> BioSequence DNA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence DNA)
-> (ByteString -> ByteString) -> ByteString -> BioSequence DNA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
go (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
toUpper
  where go :: Char -> Char
go Char
x | Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
acgt = Char
x
             | Bool
otherwise     = Char
'N'
        acgt :: String
        acgt :: String
acgt = String
"ACGT"

instance IsString (BioSequence DNA) where
  fromString :: String -> BioSequence DNA
fromString = ByteString -> BioSequence DNA
mkDNAseq (ByteString -> BioSequence DNA)
-> (String -> ByteString) -> String -> BioSequence DNA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.pack

instance Arbitrary (BioSequence DNA) where
  arbitrary :: Gen (BioSequence DNA)
arbitrary = do
    Int
k  (Int, Int) -> Gen Int
forall a. Random a => (a, a) -> Gen a
TQ.choose (Int
0,Int
100)
    String
xs  Int -> Gen Char -> Gen String
forall a. Int -> Gen a -> Gen [a]
TQ.vectorOf Int
k (Gen Char -> Gen String) -> Gen Char -> Gen String
forall a b. (a -> b) -> a -> b
$ String -> Gen Char
forall a. [a] -> Gen a
TQ.elements String
"ACGT"
    BioSequence DNA -> Gen (BioSequence DNA)
forall (m :: * -> *) a. Monad m => a -> m a
return (BioSequence DNA -> Gen (BioSequence DNA))
-> (ByteString -> BioSequence DNA)
-> ByteString
-> Gen (BioSequence DNA)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BioSequence DNA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> Gen (BioSequence DNA))
-> ByteString -> Gen (BioSequence DNA)
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack String
xs
  shrink :: BioSequence DNA -> [BioSequence DNA]
shrink = Getting [BioSequence DNA] (BioSequence DNA) [BioSequence DNA]
-> BioSequence DNA -> [BioSequence DNA]
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view ((BioSequence DNA -> [BioSequence DNA])
-> Getting [BioSequence DNA] (BioSequence DNA) [BioSequence DNA]
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to BioSequence DNA -> [BioSequence DNA]
forall a. Arbitrary a => a -> [a]
shrink)



-- * XNA

mkXNAseq :: ByteString -> (BioSequence XNA)
mkXNAseq :: ByteString -> BioSequence XNA
mkXNAseq = ByteString -> BioSequence XNA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence XNA)
-> (ByteString -> ByteString) -> ByteString -> BioSequence XNA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
go (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
toUpper
  where go :: Char -> Char
go Char
x | Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
acgtu = Char
x
             | Bool
otherwise      = Char
'N'
        acgtu :: String
        acgtu :: String
acgtu = String
"ACGTU"

instance IsString (BioSequence XNA) where
  fromString :: String -> BioSequence XNA
fromString = ByteString -> BioSequence XNA
mkXNAseq (ByteString -> BioSequence XNA)
-> (String -> ByteString) -> String -> BioSequence XNA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.pack

instance Arbitrary (BioSequence XNA) where
  arbitrary :: Gen (BioSequence XNA)
arbitrary = do
    Int
k  (Int, Int) -> Gen Int
forall a. Random a => (a, a) -> Gen a
TQ.choose (Int
0,Int
100)
    String
xs  Int -> Gen Char -> Gen String
forall a. Int -> Gen a -> Gen [a]
TQ.vectorOf Int
k (Gen Char -> Gen String) -> Gen Char -> Gen String
forall a b. (a -> b) -> a -> b
$ String -> Gen Char
forall a. [a] -> Gen a
TQ.elements String
"ACGTU"
    BioSequence XNA -> Gen (BioSequence XNA)
forall (m :: * -> *) a. Monad m => a -> m a
return (BioSequence XNA -> Gen (BioSequence XNA))
-> (ByteString -> BioSequence XNA)
-> ByteString
-> Gen (BioSequence XNA)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BioSequence XNA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> Gen (BioSequence XNA))
-> ByteString -> Gen (BioSequence XNA)
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack String
xs
  shrink :: BioSequence XNA -> [BioSequence XNA]
shrink = Getting [BioSequence XNA] (BioSequence XNA) [BioSequence XNA]
-> BioSequence XNA -> [BioSequence XNA]
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view ((BioSequence XNA -> [BioSequence XNA])
-> Getting [BioSequence XNA] (BioSequence XNA) [BioSequence XNA]
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to BioSequence XNA -> [BioSequence XNA]
forall a. Arbitrary a => a -> [a]
shrink)



-- * Amino acid sequences

mkAAseq :: ByteString -> (BioSequence AA)
mkAAseq :: ByteString -> BioSequence AA
mkAAseq = ByteString -> BioSequence AA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence AA)
-> (ByteString -> ByteString) -> ByteString -> BioSequence AA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
go (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
toUpper
  where go :: Char -> Char
go Char
x | Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
aas = Char
x
             | Bool
otherwise    = Char
'X'
        aas :: String
        aas :: String
aas = String
"ARNDCEQGHILKMFPSTWYVUO"

instance IsString (BioSequence AA) where
  fromString :: String -> BioSequence AA
fromString = ByteString -> BioSequence AA
mkAAseq (ByteString -> BioSequence AA)
-> (String -> ByteString) -> String -> BioSequence AA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.pack

instance Arbitrary (BioSequence AA) where
  arbitrary :: Gen (BioSequence AA)
arbitrary = do
    Int
k  (Int, Int) -> Gen Int
forall a. Random a => (a, a) -> Gen a
TQ.choose (Int
0,Int
100)
    String
xs  Int -> Gen Char -> Gen String
forall a. Int -> Gen a -> Gen [a]
TQ.vectorOf Int
k (Gen Char -> Gen String) -> Gen Char -> Gen String
forall a b. (a -> b) -> a -> b
$ String -> Gen Char
forall a. [a] -> Gen a
TQ.elements String
"ARNDCEQGHILKMFPSTWYVUO"
    BioSequence AA -> Gen (BioSequence AA)
forall (m :: * -> *) a. Monad m => a -> m a
return (BioSequence AA -> Gen (BioSequence AA))
-> (ByteString -> BioSequence AA)
-> ByteString
-> Gen (BioSequence AA)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BioSequence AA
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> Gen (BioSequence AA))
-> ByteString -> Gen (BioSequence AA)
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack String
xs
  shrink :: BioSequence AA -> [BioSequence AA]
shrink = Getting [BioSequence AA] (BioSequence AA) [BioSequence AA]
-> BioSequence AA -> [BioSequence AA]
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view ((BioSequence AA -> [BioSequence AA])
-> Getting [BioSequence AA] (BioSequence AA) [BioSequence AA]
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to BioSequence AA -> [BioSequence AA]
forall a. Arbitrary a => a -> [a]
shrink)



{-

-- * A window into a longer sequence with prefix/suffix information.

-- | Phantom-typed over two types, the type @w@ of the identifier, which can be
-- descriptive ("FirstInput") and the second type, identifying what kind of
-- sequence types we are dealing with. Finally, the third type provides
-- location information and should be location or streamed location.

data BioSequenceWindow w ty loc = BioSequenceWindow
  { _bswIdentifier    :: !(SequenceIdentifier w)
    -- ^ Identifier for this window. Typically some fasta identifier
  , _bswPrefix        :: !(BioSequence ty)
  , _bswInfix         :: !(BioSequence ty)
  , _bswSuffix        :: !(BioSequence ty)
  , _bswInfixLocation :: !loc
    -- ^ Location of the infix sequence
  }
  deriving (Data, Typeable, Generic, Eq, Ord, Read, Show)
makeLenses ''BioSequenceWindow

-- | Lens into the full sequence. May not change the sequence length

bswSequence :: Lens (BioSequenceWindow w ty loc) (BioSequenceWindow w ty' loc) (BioSequence ty) (BioSequence ty')
{-# Inlinable bswSequence #-}
bswSequence = lens (\w -> _bswPrefix w <> _bswInfix w <> _bswSuffix w)
                   (\w bs -> let (p,is) = bs^.bsSplitAt (w^.bswPrefix.bsLength)
                                 (i,s ) = is^.bsSplitAt (w^.bswInfix.bsLength)
                             in w { _bswPrefix = p, _bswInfix = i, _bswSuffix = s } )

-- | Get the position of the whole sequence

bswLocation :: ModifyLocation loc => Getter (BioSequenceWindow w ty loc) loc
{-# Inlinable bswLocation #-}
bswLocation = to $ \w -> locMoveLeftEnd (w^.bswPrefix.bsLength.to negate)
                 . locMoveRightEnd (w^.bswSuffix.bsLength) $ w^.bswInfixLocation

bswRetagW :: BioSequenceWindow w ty loc -> BioSequenceWindow v ty loc
{-# Inlinable bswRetagW #-}
bswRetagW = over bswIdentifier coerce

instance NFData loc => NFData (BioSequenceWindow w ty loc)

instance (Reversing loc) => Reversing (BioSequenceWindow w ty loc) where
  {-# Inlinable reversing #-}
  reversing bsw = bsw
                & bswPrefix .~ (bsw^.bswSuffix.reversed)
                & bswSuffix .~ (bsw^.bswPrefix.reversed)
                & bswInfix  .~ (bsw^.bswInfix.reversed)
                & bswInfixLocation .~ (bsw^.bswInfixLocation.reversed)



-- | Provides an informative string indicating the current window being worked on. Requires length
-- of pretty string requested. Not for computers, but for logging what is being worked on. Should be
-- one line at most, not produce line breaks.
--
-- @...PFX [Start] IFX...IFX [End] SFX ...@
--
-- TODO possibly be better as a @Doc@ for prettier printing.

instance Info (BioSequenceWindow w ty loc) where
  info bsw = "todo: info bsw"

-}



-- * DNA/RNA

-- | Simple case translation from @U@ to @T@. with upper and lower-case
-- awareness.

rna2dna :: Char -> Char
rna2dna :: Char -> Char
rna2dna = \case
  Char
'U' -> Char
'T'
  Char
'u' -> Char
't'
  Char
x   -> Char
x
{-# Inline rna2dna #-}

-- | Single character RNA complement.

rnaComplement :: Char -> Char
rnaComplement :: Char -> Char
rnaComplement = \case
  Char
'A' -> Char
'U'
  Char
'a' -> Char
'u'
  Char
'C' -> Char
'G'
  Char
'c' -> Char
'g'
  Char
'G' -> Char
'C'
  Char
'g' -> Char
'c'
  Char
'U' -> Char
'A'
  Char
'u' -> Char
'a'
  Char
x   -> Char
x
{-# Inline rnaComplement #-}

-- | Simple case translation from @T@ to @U@ with upper- and lower-case
-- awareness.

dna2rna :: Char -> Char
dna2rna :: Char -> Char
dna2rna = \case
  Char
'T' -> Char
'U'
  Char
't' -> Char
'u'
  Char
x   -> Char
x
{-# Inline dna2rna #-}

-- | Single character DNA complement.

dnaComplement :: Char -> Char
dnaComplement :: Char -> Char
dnaComplement = \case
  Char
'A' -> Char
'T'
  Char
'a' -> Char
't'
  Char
'C' -> Char
'G'
  Char
'c' -> Char
'g'
  Char
'G' -> Char
'C'
  Char
'g' -> Char
'c'
  Char
'T' -> Char
'A'
  Char
't' -> Char
'a'
  Char
x   -> Char
x
{-# Inline dnaComplement #-}



-- | Transcribes a DNA sequence into an RNA sequence. Note that 'transcribe' is
-- actually very generic. We just define its semantics to be that of
-- biomolecular transcription.
--
-- 'transcribe' makes the assumption that, given @DNA -> RNA@, we transcribe
-- the coding strand.
-- <http://hyperphysics.phy-astr.gsu.edu/hbase/Organic/transcription.html>
--
-- @@ DNAseq "ACGT" ^. transcribe == RNAseq "ACGU" RNAseq "ACGU" ^. transcribe
-- == DNAseq "ACGT" RNAseq "ACGU" ^. from transcribe :: DNAseq == DNAseq "ACGT"
-- @@

class Transcribe f where
  type TranscribeTo f :: *
  transcribe :: Iso' f (TranscribeTo f)

-- | Transcribe a DNA sequence into an RNA sequence. This does not @reverse@
-- the sequence!

instance Transcribe (BioSequence DNA) where
  type TranscribeTo (BioSequence DNA) = (BioSequence RNA)
  transcribe :: p (TranscribeTo (BioSequence DNA))
  (f (TranscribeTo (BioSequence DNA)))
-> p (BioSequence DNA) (f (BioSequence DNA))
transcribe = (BioSequence DNA -> BioSequence RNA)
-> (BioSequence RNA -> BioSequence DNA)
-> Iso
     (BioSequence DNA)
     (BioSequence DNA)
     (BioSequence RNA)
     (BioSequence RNA)
forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b
iso (ASetter (BioSequence DNA) (BioSequence RNA) ByteString ByteString
-> (ByteString -> ByteString) -> BioSequence DNA -> BioSequence RNA
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ASetter (BioSequence DNA) (BioSequence RNA) ByteString ByteString
forall k (which :: k) k (which :: k).
Iso (BioSequence which) (BioSequence which) ByteString ByteString
_BioSequence ((Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
dna2rna)) (ASetter (BioSequence RNA) (BioSequence DNA) ByteString ByteString
-> (ByteString -> ByteString) -> BioSequence RNA -> BioSequence DNA
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ASetter (BioSequence RNA) (BioSequence DNA) ByteString ByteString
forall k (which :: k) k (which :: k).
Iso (BioSequence which) (BioSequence which) ByteString ByteString
_BioSequence ((Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
rna2dna))
  {-# Inline transcribe #-}

-- | Transcribe a RNA sequence into an DNA sequence. This does not @reverse@
-- the sequence!

instance Transcribe (BioSequence RNA) where
  type TranscribeTo (BioSequence RNA) = (BioSequence DNA)
  transcribe :: p (TranscribeTo (BioSequence RNA))
  (f (TranscribeTo (BioSequence RNA)))
-> p (BioSequence RNA) (f (BioSequence RNA))
transcribe = AnIso
  (BioSequence DNA)
  (BioSequence DNA)
  (BioSequence RNA)
  (BioSequence RNA)
-> Iso
     (BioSequence RNA)
     (BioSequence RNA)
     (BioSequence DNA)
     (BioSequence DNA)
forall s t a b. AnIso s t a b -> Iso b a t s
from AnIso
  (BioSequence DNA)
  (BioSequence DNA)
  (BioSequence RNA)
  (BioSequence RNA)
forall f. Transcribe f => Iso' f (TranscribeTo f)
transcribe
  {-# Inline transcribe #-}



-- | The complement of a biosequence.

class Complement f where
  complement :: Iso' f f

instance Complement (BioSequence DNA) where
  {-# Inline complement #-}
  complement :: p (BioSequence DNA) (f (BioSequence DNA))
-> p (BioSequence DNA) (f (BioSequence DNA))
complement = let f :: BioSequence which -> BioSequence which
f = (ASetter
  (BioSequence which) (BioSequence which) ByteString ByteString
-> (ByteString -> ByteString)
-> BioSequence which
-> BioSequence which
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ASetter
  (BioSequence which) (BioSequence which) ByteString ByteString
forall k (which :: k) k (which :: k).
Iso (BioSequence which) (BioSequence which) ByteString ByteString
_BioSequence ((Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
dnaComplement))
                   {-# Inline f #-}
               in  (BioSequence DNA -> BioSequence DNA)
-> (BioSequence DNA -> BioSequence DNA)
-> Iso' (BioSequence DNA) (BioSequence DNA)
forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b
iso BioSequence DNA -> BioSequence DNA
forall k k (which :: k) (which :: k).
BioSequence which -> BioSequence which
f BioSequence DNA -> BioSequence DNA
forall k k (which :: k) (which :: k).
BioSequence which -> BioSequence which
f

instance Complement (BioSequence RNA) where
  {-# Inline complement #-}
  complement :: p (BioSequence RNA) (f (BioSequence RNA))
-> p (BioSequence RNA) (f (BioSequence RNA))
complement = let f :: BioSequence which -> BioSequence which
f = (ASetter
  (BioSequence which) (BioSequence which) ByteString ByteString
-> (ByteString -> ByteString)
-> BioSequence which
-> BioSequence which
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ASetter
  (BioSequence which) (BioSequence which) ByteString ByteString
forall k (which :: k) k (which :: k).
Iso (BioSequence which) (BioSequence which) ByteString ByteString
_BioSequence ((Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
rnaComplement))
                   {-# Inline f #-}
               in  (BioSequence RNA -> BioSequence RNA)
-> (BioSequence RNA -> BioSequence RNA)
-> Iso' (BioSequence RNA) (BioSequence RNA)
forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b
iso BioSequence RNA -> BioSequence RNA
forall k k (which :: k) (which :: k).
BioSequence which -> BioSequence which
f BioSequence RNA -> BioSequence RNA
forall k k (which :: k) (which :: k).
BioSequence which -> BioSequence which
f

{-
instance (Complement (BioSequence ty)) => Complement (BioSequenceWindow w ty k) where
  {-# Inline complement #-}
  complement = let f = over bswPrefix (view complement) . over bswInfix (view complement) . over bswSuffix (view complement)
                   {-# Inline f #-}
               in  iso f f
-}

reverseComplement :: (Complement f, Reversing f) => Iso' f f
{-# Inline reverseComplement #-}
reverseComplement :: Iso' f f
reverseComplement = p f (f f) -> p f (f f)
forall a. Reversing a => Iso' a a
reversed (p f (f f) -> p f (f f))
-> (p f (f f) -> p f (f f)) -> p f (f f) -> p f (f f)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. p f (f f) -> p f (f f)
forall f. Complement f => Iso' f f
complement