{-# OPTIONS_GHC -XFlexibleContexts #-} {-# LANGUAGE BangPatterns #-} module Bio.GFF3.FeatureHierSequences ( FeatureHierSequences, features, sequences , fromLists, parse , lookupId, parents, children , seqData, getSequence, featureSequence , runGFF, runGFFIO, asksGFF ) where import Control.Monad.Error import Control.Monad.Reader import qualified Data.ByteString.Lazy.Char8 as LBS import qualified Data.Map as M import Data.Maybe import qualified Data.Set as S import Bio.Sequence.SeqData import Bio.Sequence.Fasta import qualified Bio.GFF3.Feature as F import qualified Bio.GFF3.FeatureHier as FH import qualified Bio.Location.Location as Loc import qualified Bio.Location.SeqLocation as SeqLoc import Bio.Location.OnSeq data FeatureHierSequences = FeatureHierSequences { featureHier :: !FH.FeatureHier , sequenceMap :: !(M.Map SeqName SeqData) } deriving (Show) parse :: (Error e, MonadError e m) => LBS.ByteString -> m FeatureHierSequences parse str = do (feats, seqlines) <- F.parseWithFasta str fromLists feats $ mkSeqs seqlines fromLists :: (Error e, MonadError e m) => [F.Feature] -> [Sequence a] -> m FeatureHierSequences fromLists feats seqs = let !seqmap = M.fromList $ map seqAssoc seqs !featSeqids = S.fromList $ map F.seqid feats !missingSeqids = featSeqids `S.difference` M.keysSet seqmap in if S.null missingSeqids then liftM (\fh -> FeatureHierSequences fh seqmap) $ FH.fromList feats else throwError $ strMsg $ "Missing sequences for seqids " ++ show (map LBS.unpack $ S.toList missingSeqids) where seqAssoc (Seq seqname sequ _) = (seqname, sequ) features :: FeatureHierSequences -> S.Set F.Feature features = FH.features . featureHier sequences :: FeatureHierSequences -> [Sequence a] sequences = M.foldWithKey mkSeqList [] . sequenceMap where mkSeqList seqname sequ = ((Seq seqname sequ Nothing) :) lookupId :: (Error e, MonadError e m) => FeatureHierSequences -> SeqName -> m F.Feature lookupId fhs = FH.lookupId $ featureHier fhs children :: FeatureHierSequences -> F.Feature -> [F.Feature] children = FH.children . featureHier parents :: FeatureHierSequences -> F.Feature -> [F.Feature] parents = FH.parents . featureHier seqData :: (Error e, MonadError e m) => FeatureHierSequences -> SeqLoc.SeqLoc -> m SeqData seqData fhs (OnSeq chrname loc) = do chrseq <- getSequence fhs chrname return $ Loc.seqDataPadded chrseq loc getSequence :: (Error e, MonadError e m) => FeatureHierSequences -> SeqName -> m SeqData getSequence fhs seqid = maybe seqidNotFound return $ M.lookup seqid $ sequenceMap fhs where seqidNotFound = throwError $ strMsg $ "SeqID " ++ show (LBS.unpack seqid) ++ " not found" featureSequence :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m (Sequence a) featureSequence fhs f = do sequ <- seqData fhs $ F.seqLoc f let seqname = fromMaybe (LBS.pack $ SeqLoc.display $ F.seqLoc f) $ listToMaybe $ F.ids f return $ Seq seqname sequ Nothing catchIOErrors :: IO a -> ErrorT String IO a catchIOErrors m = ErrorT { runErrorT = liftM Right m `catch` (return . Left . show) } runGFF :: FilePath -> (ErrorT String (Reader FeatureHierSequences) a) -> ErrorT String IO a runGFF gffname m = do gff <- catchIOErrors $ LBS.readFile gffname fhs <- parse gff either throwError return $ runReader (runErrorT m) fhs runGFFIO :: FilePath -> (ErrorT String (ReaderT FeatureHierSequences IO) a) -> ErrorT String IO a runGFFIO gffname m = do gff <- catchIOErrors $ LBS.readFile gffname fhs <- parse gff mapErrorT (flip runReaderT fhs) m asksGFF :: (Error e, MonadError e m, MonadReader FeatureHierSequences m) => (FeatureHierSequences -> a -> m b) -> a -> m b asksGFF f x = do fhs <- ask; f fhs x