{-# LANGUAGE ViewPatterns #-} module Bio.Sequence.Functions.Marking ( getMarking , unsafeGetMarking , toMarked , unsafeToMarked , addMarkings , unsafeAddMarkings , listMarkings ) where import Bio.Sequence.Class (ContainsMarking, IsBareSequence, IsMarkedSequence, IsSequence (..), markedSequence, markings, sequ, unsafeMarkedSequence, weights, _sequenceInner) import Bio.Sequence.Functions.Sequence (length, unsafeGetRange) import Bio.Sequence.Utilities (Range, checkRange, unsafeEither) import Control.Lens import Control.Monad.Except (MonadError, throwError) import Data.List (nub) import Data.List.NonEmpty (NonEmpty (..)) import Data.Text (Text) import qualified Data.Vector as V (toList) import Prelude hiding (drop, head, length, null, reverse, tail, take, (!!)) -- | Function that retrieves all elements in 'IsSequence' @s@ that are covered by given 'Marking'' @s@. -- Returns 'NonEmpty' list, because if 'Marking' is present in @s@, then list of -- all 'Marking's for @s@ can't be empty. If given 'Marking is not found in @s@, an -- error will be thrown. -- -- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty -- > getMarking sequ "Letter A" == ['a', 'a'] :| [['a']] -- getMarking :: (ContainsMarking s, MonadError Text m) => s -> Marking s -> m (NonEmpty [Element s]) getMarking (toSequence -> s) mk | not $ mk `member` (s ^. markings) = throwError markingNotFoundError | otherwise = pure $ res where res = foldl1 (<>) $ fmap ((:| []) . unsafeGetRange s) $ mk `lookupAll` (s ^. markings) markingNotFoundError :: Text markingNotFoundError = "Bio.Sequence.Functions.Marking: given marking not found in Sequence." unsafeGetMarking :: ContainsMarking s => s -> Marking s -> NonEmpty [Element s] unsafeGetMarking mk = unsafeEither . getMarking mk -- | Converts 'IsBareSequence' @s@ to 'IsMarkedSequence' @s'@ that is marked using provided list -- of 'Marking's. If at least one of ranges in given list of 'Marking's is out of -- bounds, an error will be thrown. -- -- > sequBare = Sequence ['a', 'a', 'b', 'a'] mempty mempty :: BareSequence Char -- > toMarked sequ [("Letter A", (0, 2)), ("Letter A", (3, 4))] :: MarkedSequence String Char -- toMarked :: (IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk, Element s ~ Element s', MonadError Text m) => s -> [(mk, Range)] -> m s' toMarked (toSequence -> s) = markedSequence (V.toList $ s ^. sequ) unsafeToMarked :: (IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk, Element s ~ Element s') => s -> [(mk, Range)] -> s' unsafeToMarked (toSequence -> s) = unsafeMarkedSequence (V.toList $ s ^. sequ) -- | Adds new 'Marking's to given 'IsSequence' @s@. Type of new 'Marking's must -- match type of 'Marking's that @s@ is already marked with. If at least one of ranges -- in given list of 'Marking's is out of bounds, an error will be thrown. -- -- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty -- > sequ' = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4))] mempty -- > addMarkings sequ' [("Letter B", (2, 3))] == sequ -- addMarkings :: (ContainsMarking s, Marking s ~ mk, MonadError Text m) => s -> [(mk, Range)] -> m s addMarkings (toSequence -> s) markings' | all (checkRange (length s) . snd) markings' = pure res | otherwise = throwError rangesError where res = fromSequence $ _sequenceInner (s ^. sequ) (s ^. markings <> markings') (s ^. weights) rangesError :: Text rangesError = "Bio.Sequence.Functions.Marking: can't add markings to Sequence, because some of them are out of range." unsafeAddMarkings :: (ContainsMarking s, Marking s ~ mk) => s -> [(mk, Range)] -> s unsafeAddMarkings s = unsafeEither . addMarkings s -- | Retrieves all 'Marking's from given sequence that 'ContainsMarking'. -- Result is list of 'Marking's without dublicates. -- -- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty -- > listMarkings sequ == ["Letter A", "Letter B"] -- listMarkings :: ContainsMarking s => s -> [Marking s] listMarkings (toSequence -> s) = nub $ fst <$> s ^. markings -------------------------------------------------------------------------------- -- Inner functions. -------------------------------------------------------------------------------- member :: Eq a => a -> [(a, b)] -> Bool member a = (a `elem`) . fmap fst lookupAll :: Eq a => a -> [(a, b)] -> [b] lookupAll a = fmap snd . filter ((== a) . fst)