{-# LANGUAGE ViewPatterns #-}

module Bio.Sequence.Functions.Marking
  ( getMarking
  , unsafeGetMarking
  , toMarked
  , unsafeToMarked
  , addMarkings
  , unsafeAddMarkings
  , listMarkings
  ) where

import           Bio.Sequence.Class              (ContainsMarking,
                                                  IsBareSequence,
                                                  IsMarkedSequence,
                                                  IsSequence (..),
                                                  markedSequence, markings,
                                                  sequ, unsafeMarkedSequence,
                                                  weights, _sequenceInner)
import           Bio.Sequence.Functions.Sequence (length, unsafeGetRange)
import           Bio.Sequence.Utilities          (Range, checkRange,
                                                  unsafeEither)
import           Control.Lens
import           Control.Monad.Except            (MonadError, throwError)
import           Data.List                       (nub)
import           Data.List.NonEmpty              (NonEmpty (..))
import           Data.Text                       (Text)
import qualified Data.Vector                     as V (toList)
import           Prelude                         hiding (drop, head, length,
                                                  null, reverse, tail, take,
                                                  (!!))

-- | Function that retrieves all elements in 'IsSequence' @s@ that are covered by given 'Marking'' @s@.
-- Returns 'NonEmpty' list, because if 'Marking' is present in @s@, then list of
-- all 'Marking's for @s@ can't be empty. If given 'Marking is not found in @s@, an
-- error will be thrown.
--
-- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty
-- > getMarking sequ "Letter A" == ['a', 'a'] :| [['a']]
--
getMarking :: (ContainsMarking s, MonadError Text m) => s -> Marking s -> m (NonEmpty [Element s])
getMarking :: s -> Marking s -> m (NonEmpty [Element s])
getMarking (s -> Sequence (Marking s) (Weight s) (Element s)
forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) Marking s
mk | Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Marking s
mk Marking s -> [(Marking s, Range)] -> Bool
forall a b. Eq a => a -> [(a, b)] -> Bool
`member` (Sequence (Marking s) (Weight s) (Element s)
s Sequence (Marking s) (Weight s) (Element s)
-> Getting
     [(Marking s, Range)]
     (Sequence (Marking s) (Weight s) (Element s))
     [(Marking s, Range)]
-> [(Marking s, Range)]
forall s a. s -> Getting a s a -> a
^. Getting
  [(Marking s, Range)]
  (Sequence (Marking s) (Weight s) (Element s))
  [(Marking s, Range)]
forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings) = Text -> m (NonEmpty [Element s])
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError Text
markingNotFoundError
                                | Bool
otherwise                         = NonEmpty [Element s] -> m (NonEmpty [Element s])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (NonEmpty [Element s] -> m (NonEmpty [Element s]))
-> NonEmpty [Element s] -> m (NonEmpty [Element s])
forall a b. (a -> b) -> a -> b
$ NonEmpty [Element s]
res
  where
    res :: NonEmpty [Element s]
res = (NonEmpty [Element s]
 -> NonEmpty [Element s] -> NonEmpty [Element s])
-> [NonEmpty [Element s]] -> NonEmpty [Element s]
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 NonEmpty [Element s]
-> NonEmpty [Element s] -> NonEmpty [Element s]
forall a. Semigroup a => a -> a -> a
(<>) ([NonEmpty [Element s]] -> NonEmpty [Element s])
-> [NonEmpty [Element s]] -> NonEmpty [Element s]
forall a b. (a -> b) -> a -> b
$ (Range -> NonEmpty [Element s])
-> [Range] -> [NonEmpty [Element s]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (([Element s] -> [[Element s]] -> NonEmpty [Element s]
forall a. a -> [a] -> NonEmpty a
:| []) ([Element s] -> NonEmpty [Element s])
-> (Range -> [Element s]) -> Range -> NonEmpty [Element s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Sequence (Marking s) (Weight s) (Element s)
-> Range -> [Element (Sequence (Marking s) (Weight s) (Element s))]
forall s. IsSequence s => s -> Range -> [Element s]
unsafeGetRange Sequence (Marking s) (Weight s) (Element s)
s) ([Range] -> [NonEmpty [Element s]])
-> [Range] -> [NonEmpty [Element s]]
forall a b. (a -> b) -> a -> b
$  Marking s
mk Marking s -> [(Marking s, Range)] -> [Range]
forall a b. Eq a => a -> [(a, b)] -> [b]
`lookupAll` (Sequence (Marking s) (Weight s) (Element s)
s Sequence (Marking s) (Weight s) (Element s)
-> Getting
     [(Marking s, Range)]
     (Sequence (Marking s) (Weight s) (Element s))
     [(Marking s, Range)]
-> [(Marking s, Range)]
forall s a. s -> Getting a s a -> a
^. Getting
  [(Marking s, Range)]
  (Sequence (Marking s) (Weight s) (Element s))
  [(Marking s, Range)]
forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings)

    markingNotFoundError :: Text
    markingNotFoundError :: Text
markingNotFoundError = Text
"Bio.Sequence.Functions.Marking: given marking not found in Sequence."

unsafeGetMarking :: ContainsMarking s => s -> Marking s -> NonEmpty [Element s]
unsafeGetMarking :: s -> Marking s -> NonEmpty [Element s]
unsafeGetMarking s
mk = Either Text (NonEmpty [Element s]) -> NonEmpty [Element s]
forall a. Either Text a -> a
unsafeEither (Either Text (NonEmpty [Element s]) -> NonEmpty [Element s])
-> (Marking s -> Either Text (NonEmpty [Element s]))
-> Marking s
-> NonEmpty [Element s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> Marking s -> Either Text (NonEmpty [Element s])
forall s (m :: * -> *).
(ContainsMarking s, MonadError Text m) =>
s -> Marking s -> m (NonEmpty [Element s])
getMarking s
mk

-- | Converts 'IsBareSequence' @s@ to 'IsMarkedSequence' @s'@ that is marked using provided list
-- of 'Marking's. If at least one of ranges in given list of 'Marking's is out of
-- bounds, an error will be thrown.
--
-- > sequBare = Sequence ['a', 'a', 'b', 'a'] mempty mempty :: BareSequence Char
-- > toMarked sequ [("Letter A", (0, 2)), ("Letter A", (3, 4))] :: MarkedSequence String Char
--
toMarked :: (IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk, Element s ~ Element s', MonadError Text m) => s -> [(mk, Range)] -> m s'
toMarked :: s -> [(mk, Range)] -> m s'
toMarked (s -> Sequence (Marking s) (Weight s) (Element s)
forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) = [Element s'] -> [(Marking s', Range)] -> m s'
forall s (m :: * -> *).
(IsMarkedSequence s, MonadError Text m) =>
[Element s] -> [(Marking s, Range)] -> m s
markedSequence (Vector (Element s') -> [Element s']
forall a. Vector a -> [a]
V.toList (Vector (Element s') -> [Element s'])
-> Vector (Element s') -> [Element s']
forall a b. (a -> b) -> a -> b
$ Sequence (Marking s) (Weight s) (Element s)
Sequence (Marking s) (Weight s) (Element s')
s Sequence (Marking s) (Weight s) (Element s')
-> Getting
     (Vector (Element s'))
     (Sequence (Marking s) (Weight s) (Element s'))
     (Vector (Element s'))
-> Vector (Element s')
forall s a. s -> Getting a s a -> a
^. Getting
  (Vector (Element s'))
  (Sequence (Marking s) (Weight s) (Element s'))
  (Vector (Element s'))
forall mk w a. Getter (Sequence mk w a) (Vector a)
sequ)

unsafeToMarked :: (IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk, Element s ~ Element s') => s -> [(mk, Range)] -> s'
unsafeToMarked :: s -> [(mk, Range)] -> s'
unsafeToMarked (s -> Sequence (Marking s) (Weight s) (Element s)
forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) = [Element s'] -> [(Marking s', Range)] -> s'
forall s.
IsMarkedSequence s =>
[Element s] -> [(Marking s, Range)] -> s
unsafeMarkedSequence (Vector (Element s') -> [Element s']
forall a. Vector a -> [a]
V.toList (Vector (Element s') -> [Element s'])
-> Vector (Element s') -> [Element s']
forall a b. (a -> b) -> a -> b
$ Sequence (Marking s) (Weight s) (Element s)
Sequence (Marking s) (Weight s) (Element s')
s Sequence (Marking s) (Weight s) (Element s')
-> Getting
     (Vector (Element s'))
     (Sequence (Marking s) (Weight s) (Element s'))
     (Vector (Element s'))
-> Vector (Element s')
forall s a. s -> Getting a s a -> a
^. Getting
  (Vector (Element s'))
  (Sequence (Marking s) (Weight s) (Element s'))
  (Vector (Element s'))
forall mk w a. Getter (Sequence mk w a) (Vector a)
sequ)

-- | Adds new 'Marking's to given 'IsSequence' @s@. Type of new 'Marking's must
-- match type of 'Marking's that @s@ is already marked with. If at least one of ranges
-- in given list of 'Marking's is out of bounds, an error will be thrown.
--
-- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty
-- > sequ' = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4))] mempty
-- > addMarkings sequ' [("Letter B", (2, 3))] == sequ
--
addMarkings :: (ContainsMarking s, Marking s ~ mk, MonadError Text m) => s -> [(mk, Range)] -> m s
addMarkings :: s -> [(mk, Range)] -> m s
addMarkings (s -> Sequence (Marking s) (Weight s) (Element s)
forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) [(mk, Range)]
markings' | ((mk, Range) -> Bool) -> [(mk, Range)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Int -> Range -> Bool
checkRange (Sequence mk (Weight s) (Element s) -> Int
forall s. IsSequence s => s -> Int
length Sequence mk (Weight s) (Element s)
Sequence (Marking s) (Weight s) (Element s)
s) (Range -> Bool) -> ((mk, Range) -> Range) -> (mk, Range) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (mk, Range) -> Range
forall a b. (a, b) -> b
snd) [(mk, Range)]
markings' = s -> m s
forall (f :: * -> *) a. Applicative f => a -> f a
pure s
res
                                        | Bool
otherwise                                   = Text -> m s
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError Text
rangesError
  where
    res :: s
res = Sequence (Marking s) (Weight s) (Element s) -> s
forall s.
IsSequence s =>
Sequence (Marking s) (Weight s) (Element s) -> s
fromSequence (Sequence (Marking s) (Weight s) (Element s) -> s)
-> Sequence (Marking s) (Weight s) (Element s) -> s
forall a b. (a -> b) -> a -> b
$ Vector (Element s)
-> [(mk, Range)]
-> Vector (Weight s)
-> Sequence mk (Weight s) (Element s)
forall a mk w.
Vector a -> [(mk, Range)] -> Vector w -> Sequence mk w a
_sequenceInner (Sequence mk (Weight s) (Element s)
Sequence (Marking s) (Weight s) (Element s)
s Sequence mk (Weight s) (Element s)
-> Getting
     (Vector (Element s))
     (Sequence mk (Weight s) (Element s))
     (Vector (Element s))
-> Vector (Element s)
forall s a. s -> Getting a s a -> a
^. Getting
  (Vector (Element s))
  (Sequence mk (Weight s) (Element s))
  (Vector (Element s))
forall mk w a. Getter (Sequence mk w a) (Vector a)
sequ) (Sequence mk (Weight s) (Element s)
Sequence (Marking s) (Weight s) (Element s)
s Sequence mk (Weight s) (Element s)
-> Getting
     [(mk, Range)] (Sequence mk (Weight s) (Element s)) [(mk, Range)]
-> [(mk, Range)]
forall s a. s -> Getting a s a -> a
^. Getting
  [(mk, Range)] (Sequence mk (Weight s) (Element s)) [(mk, Range)]
forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings [(mk, Range)] -> [(mk, Range)] -> [(mk, Range)]
forall a. Semigroup a => a -> a -> a
<> [(mk, Range)]
markings') (Sequence mk (Weight s) (Element s)
Sequence (Marking s) (Weight s) (Element s)
s Sequence mk (Weight s) (Element s)
-> Getting
     (Vector (Weight s))
     (Sequence mk (Weight s) (Element s))
     (Vector (Weight s))
-> Vector (Weight s)
forall s a. s -> Getting a s a -> a
^. Getting
  (Vector (Weight s))
  (Sequence mk (Weight s) (Element s))
  (Vector (Weight s))
forall mk w a. Getter (Sequence mk w a) (Vector w)
weights)

    rangesError :: Text
    rangesError :: Text
rangesError = Text
"Bio.Sequence.Functions.Marking: can't add markings to Sequence, because some of them are out of range."

unsafeAddMarkings :: (ContainsMarking s, Marking s ~ mk) => s -> [(mk, Range)] -> s
unsafeAddMarkings :: s -> [(mk, Range)] -> s
unsafeAddMarkings s
s = Either Text s -> s
forall a. Either Text a -> a
unsafeEither (Either Text s -> s)
-> ([(mk, Range)] -> Either Text s) -> [(mk, Range)] -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> [(mk, Range)] -> Either Text s
forall s mk (m :: * -> *).
(ContainsMarking s, Marking s ~ mk, MonadError Text m) =>
s -> [(mk, Range)] -> m s
addMarkings s
s

-- | Retrieves all 'Marking's from given sequence that 'ContainsMarking'.
--   Result is list of 'Marking's without dublicates.
--
-- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty
-- > listMarkings sequ == ["Letter A", "Letter B"]
--
listMarkings :: ContainsMarking s => s -> [Marking s]
listMarkings :: s -> [Marking s]
listMarkings (s -> Sequence (Marking s) (Weight s) (Element s)
forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) = [Marking s] -> [Marking s]
forall a. Eq a => [a] -> [a]
nub ([Marking s] -> [Marking s]) -> [Marking s] -> [Marking s]
forall a b. (a -> b) -> a -> b
$ (Marking s, Range) -> Marking s
forall a b. (a, b) -> a
fst ((Marking s, Range) -> Marking s)
-> [(Marking s, Range)] -> [Marking s]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Sequence (Marking s) (Weight s) (Element s)
s Sequence (Marking s) (Weight s) (Element s)
-> Getting
     [(Marking s, Range)]
     (Sequence (Marking s) (Weight s) (Element s))
     [(Marking s, Range)]
-> [(Marking s, Range)]
forall s a. s -> Getting a s a -> a
^. Getting
  [(Marking s, Range)]
  (Sequence (Marking s) (Weight s) (Element s))
  [(Marking s, Range)]
forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings

--------------------------------------------------------------------------------
-- Inner functions.
--------------------------------------------------------------------------------

member :: Eq a => a -> [(a, b)] -> Bool
member :: a -> [(a, b)] -> Bool
member a
a = (a
a a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) ([a] -> Bool) -> ([(a, b)] -> [a]) -> [(a, b)] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a, b) -> a) -> [(a, b)] -> [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a, b) -> a
forall a b. (a, b) -> a
fst

lookupAll :: Eq a => a -> [(a, b)] -> [b]
lookupAll :: a -> [(a, b)] -> [b]
lookupAll a
a = ((a, b) -> b) -> [(a, b)] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a, b) -> b
forall a b. (a, b) -> b
snd ([(a, b)] -> [b]) -> ([(a, b)] -> [(a, b)]) -> [(a, b)] -> [b]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a, b) -> Bool) -> [(a, b)] -> [(a, b)]
forall a. (a -> Bool) -> [a] -> [a]
filter ((a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
a) (a -> Bool) -> ((a, b) -> a) -> (a, b) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a, b) -> a
forall a b. (a, b) -> a
fst)