{-# LANGUAGE UndecidableInstances #-}
module Bio.Sequence.Class
(
SequenceDecodable (..)
, Sequence
, WeightedSequence
, MarkedSequence
, BareSequence
, Range
, sequ
, markings
, weights
, bareSequ
, IsMarking
, IsWeight (..)
, IsSequence (..)
, IsWeightedSequence
, IsMarkedSequence
, IsBareSequence
, ContainsMarking
, ContainsNoMarking
, ContainsWeight
, ContainsNoWeight
, createSequence
, unsafeCreateSequence
, bareSequence
, weightedSequence
, unsafeWeightedSequence
, markedSequence
, unsafeMarkedSequence
, _sequenceInner
) where
import Bio.Sequence.Utilities (Range, checkRange, unsafeEither)
import Control.Lens
import Control.Monad.Except (MonadError, throwError)
import Data.Kind (Constraint)
import qualified Data.List as L (length, null)
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V (fromList, length)
import GHC.Generics (Generic)
import GHC.TypeLits (ErrorMessage (..), TypeError)
data Sequence mk w a = Sequence { _sequ :: Vector a
, _markings :: [(mk, Range)]
, _weights :: Vector w
}
deriving (Eq, Show, Generic, Functor)
instance Semigroup (Sequence mk w a) where
sequA <> sequB = res
where
newSequ = sequA ^. sequ <> sequB ^. sequ
newMarkings = sequA ^. markings <> fmap (fmap (bimap addInd addInd)) (sequB ^. markings)
newWeights = sequA ^. weights <> sequB ^. weights
res = Sequence newSequ newMarkings newWeights
addInd :: Int -> Int
addInd = (+ V.length (sequA ^. sequ))
instance Monoid (Sequence mk () a) where
mempty = Sequence mempty mempty mempty
instance Foldable (Sequence mk w) where
foldMap f = foldMap f . _sequ
length = V.length . _sequ
instance Traversable (Sequence mk w) where
traverse f s@Sequence{..} = (\newSeq -> s { _sequ = newSeq }) <$> traverse f _sequ
_sequenceInner :: Vector a -> [(mk, Range)] -> Vector w -> Sequence mk w a
_sequenceInner = Sequence
type WeightedSequence w a = Sequence () w a
type MarkedSequence mk a = Sequence mk () a
type BareSequence a = Sequence () () a
sequ :: Getter (Sequence mk w a) (Vector a)
sequ = to _sequ
markings :: Getter (Sequence mk w a) [(mk, Range)]
markings = to _markings
weights :: Getter (Sequence mk w a) (Vector w)
weights = to _weights
bareSequ :: Lens' (BareSequence a) (Vector a)
bareSequ = lens _sequ (\s v -> s { _sequ = v })
class (Eq mk, Ord mk) => IsMarking mk where
instance IsMarking ()
class IsWeight w where
toDouble :: w -> Double
instance IsWeight () where
toDouble = error "Bio.Sequence.Class: () can't be valid 'Weight'."
instance IsWeight Double where
toDouble = id
class (IsMarking (Marking s), IsWeight (Weight s)) => IsSequence s where
type Element s :: *
type Marking s :: *
type Weight s :: *
toSequence :: s -> Sequence (Marking s) (Weight s) (Element s)
fromSequence :: Sequence (Marking s) (Weight s) (Element s) -> s
instance (IsMarking mk, IsWeight w) => IsSequence (Sequence mk w a) where
type Element (Sequence mk w a) = a
type Marking (Sequence mk w a) = mk
type Weight (Sequence mk w a) = w
toSequence = id
fromSequence = id
class IsSequence s => SequenceDecodable a s where
sequenceDecode :: a -> Either Text s
type IsWeightedSequence s = (IsSequence s, Unit (Marking s), NotUnit (Weight s))
type IsMarkedSequence s = (IsSequence s, NotUnit (Marking s), Unit (Weight s))
type IsBareSequence s = (IsSequence s, Unit (Marking s), Unit (Weight s))
type ContainsMarking s = (IsSequence s, NotUnit (Marking s))
type ContainsNoMarking s = (IsSequence s, Unit (Marking s))
type ContainsWeight s = (IsSequence s, NotUnit (Weight s))
type ContainsNoWeight s = (IsSequence s, Unit (Weight s))
createSequence :: (ContainsMarking s, ContainsWeight s, MonadError Text m) => [Element s] -> [(Marking s, Range)] -> [Weight s] -> m s
createSequence = createSequenceInner True True
unsafeCreateSequence :: (ContainsMarking s, ContainsWeight s) => [Element s] -> [(Marking s, Range)] -> [Weight s] -> s
unsafeCreateSequence s markings' = unsafeEither . createSequence s markings'
bareSequence :: IsBareSequence s => [Element s] -> s
bareSequence s = fromSequence $ Sequence (V.fromList s) mempty mempty
markedSequence :: (IsMarkedSequence s, MonadError Text m) => [Element s] -> [(Marking s, Range)] -> m s
markedSequence s markings' = createSequenceInner True False s markings' []
unsafeMarkedSequence :: IsMarkedSequence s => [Element s] -> [(Marking s, Range)] -> s
unsafeMarkedSequence s = unsafeEither . markedSequence s
weightedSequence :: (IsWeightedSequence s, MonadError Text m) => [Element s] -> [Weight s] -> m s
weightedSequence s = createSequenceInner False True s []
unsafeWeightedSequence :: IsWeightedSequence s => [Element s] -> [Weight s] -> s
unsafeWeightedSequence s = unsafeEither . weightedSequence s
type family NotUnit a :: Constraint where
NotUnit () = TypeError ('Text "cobot-io: this function doesn't work with when parametrized by ().")
NotUnit _ = ()
type family Unit a :: Constraint where
Unit () = ()
Unit _ = TypeError ('Text "cobot-io: this function doesn't work with when not parametrized by ().")
createSequenceInner :: (IsSequence s, MonadError Text m) => Bool -> Bool -> [Element s] -> [(Marking s, Range)] -> [Weight s] -> m s
createSequenceInner checkMk checkW s markings' weights' | checkMk && not checkRanges = throwError rangesError
| checkW && not checkNullWeights = throwError weightsNullError
| checkW && not checkLenWeights = throwError weightsLenError
| otherwise = pure resSequence
where
seqVector = V.fromList s
weightsVector = V.fromList weights'
resSequence = fromSequence $ Sequence seqVector markings' weightsVector
checkRanges :: Bool
checkRanges = all (checkRange (L.length s)) $ fmap snd markings'
checkNullWeights :: Bool
checkNullWeights = not (L.null weights')
checkLenWeights :: Bool
checkLenWeights = L.length s == L.length weights'
rangesError :: Text
rangesError = "Bio.Sequence.Class: invalid 'Range' found in sequence's marking."
weightsNullError :: Text
weightsNullError = "Bio.Sequence.Class: weights are null for sequence."
weightsLenError :: Text
weightsLenError = "Bio.Sequence.Class: sequence and weights have different lengths."