{-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE RecordWildCards #-} module BioInf.RNAdesign.CandidateChain where import Control.Arrow (first) import Control.Monad (foldM) import Control.Monad.Primitive import Control.Monad.Primitive.Class import Data.Function (on) import qualified Data.Vector as V import qualified Data.Vector.Fusion.Stream.Monadic as SM import qualified Data.Vector.Unboxed as VU import System.Random.MWC.Monad import Biobase.Primary import Biobase.Secondary.Diagrams import Biobase.Vienna import BioInf.RNAdesign.Assignment (Assignment(..)) -- | A single candidate, with its sequence and the score, this sequence -- receives. Candidates are ordered by their scores. data Candidate = Candidate { candidate :: Primary , score :: Score } deriving (Eq,Show) instance Ord Candidate where (<=) = (<=) `on` score -- | The likelihood score we get. -- -- TODO replace Score Likelihood / LogLikelihood (once we switch to the more -- generic MCMC library) newtype Score = Score { unScore :: Double } deriving (Eq,Ord,Show,Read) -- | This structure defines a "design problem" data DesignProblem = DesignProblem { structures :: [D1Secondary] , assignments :: [Assignment] } deriving (Eq,Read,Show) -- | Create an initial, legal, candidate. mkInitial :: (MonadPrim m, PrimMonad m) => (Primary -> Score) -> Int -> DesignProblem -> Rand m Candidate mkInitial scoring l dp = do let z = VU.replicate l nA foldM (mutateOneAssignmentWith scoring (\_ _ -> return True)) (Candidate z (scoring z)) $ assignments dp -- | Create a stream of 'Candidate's from an initial candidate. unfoldStream :: forall m . (MonadPrim m, PrimMonad m) => Int -> Int -> Int -> (Primary -> Score) -> (Candidate -> Candidate -> Rand m Bool) -> DesignProblem -> Candidate -> SM.Stream (Rand m) Candidate unfoldStream burnin number thin score f dp = go where go s = SM.map snd -- remove remaining indices from stream . SM.take number -- take the number of sequences we want . SM.filter ((==0) . flip mod thin . fst) -- keep only every thin'th sequence . SM.indexed -- add index . SM.drop burnin -- drop the burnin sequences . SM.drop 1 -- drop original input . SM.scanlM' (mutateOneAssignmentWith score f) s -- starting with 's', mutate s further and further using cycled assignments $ SM.unfoldr (Just . first head . splitAt 1) (cycle $ assignments dp) -- create inifinite cycled assignments -- | Mutate the current (or "old") sequence under the possible 'assignment's as -- prescribed by 'Assignment'. The modifying assignment is selected uniformly. -- The monadic @old -> new -> Rand m Bool@ function chooses between the old and -- the new candidate. It can be used to, e.g., allow always choosing "new" if -- it is better, but choosing "new" as well if some stochastic value (hence -- dependence on @Rand m@) indicates so. mutateOneAssignmentWith :: (MonadPrim m, PrimMonad m) => (Primary -> Score) -- ^ the likelihood function, gives a sequence a score -> (Candidate -> Candidate -> Rand m Bool) -- ^ choose between old and new sequence (monadic, stochastic) -> Candidate -- ^ "old" / current sequence -> Assignment -- ^ possible assignments for the sequence -> Rand m Candidate -- ^ the "new" sequence mutateOneAssignmentWith score f old Assignment{..} = do i <- uniformR (0,V.length assignment -1) -- inclusive range for Int let cs = VU.zip columns (assignment V.! i) let nw = VU.update (candidate old) cs let new = Candidate nw (score nw) b <- f old new return $ if b then new else old