module Bio.GFF3.FeatureHierSequences ( FeatureHierSequences, features, sequences
, fromLists, parse
, lookupId, parents, children
, seqData, getSequence, featureSequence
, runGFF, runGFFIO, asksGFF
)
where
import Control.Monad.Error
import Control.Monad.Reader
import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as S
import Bio.Sequence.SeqData
import Bio.Sequence.Fasta
import qualified Bio.GFF3.Feature as F
import qualified Bio.GFF3.FeatureHier as FH
import qualified Bio.Location.Location as Loc
import qualified Bio.Location.SeqLocation as SeqLoc
import Bio.Location.OnSeq
data FeatureHierSequences = FeatureHierSequences { featureHier :: !FH.FeatureHier
, sequenceMap :: !(M.Map SeqName SeqData)
} deriving (Show)
parse :: (Error e, MonadError e m) => LBS.ByteString -> m FeatureHierSequences
parse str = do (feats, seqlines) <- F.parseWithFasta str
fromLists feats $ mkSeqs seqlines
fromLists :: (Error e, MonadError e m) => [F.Feature] -> [Sequence a] -> m FeatureHierSequences
fromLists feats seqs = let !seqmap = M.fromList $ map seqAssoc seqs
!featSeqids = S.fromList $ map F.seqid feats
!missingSeqids = featSeqids `S.difference` M.keysSet seqmap
in if S.null missingSeqids
then liftM (\fh -> FeatureHierSequences fh seqmap) $ FH.fromList feats
else throwError $ strMsg $ "Missing sequences for seqids " ++ show (map LBS.unpack $ S.toList missingSeqids)
where seqAssoc (Seq seqname sequ _) = (seqname, sequ)
features :: FeatureHierSequences -> S.Set F.Feature
features = FH.features . featureHier
sequences :: FeatureHierSequences -> [Sequence a]
sequences = M.foldWithKey mkSeqList [] . sequenceMap
where mkSeqList seqname sequ = ((Seq seqname sequ Nothing) :)
lookupId :: (Error e, MonadError e m) => FeatureHierSequences -> SeqName -> m F.Feature
lookupId fhs = FH.lookupId $ featureHier fhs
children :: FeatureHierSequences -> F.Feature -> [F.Feature]
children = FH.children . featureHier
parents :: FeatureHierSequences -> F.Feature -> [F.Feature]
parents = FH.parents . featureHier
seqData :: (Error e, MonadError e m) => FeatureHierSequences -> SeqLoc.SeqLoc -> m SeqData
seqData fhs (OnSeq chrname loc) = do chrseq <- getSequence fhs chrname
return $ Loc.seqDataPadded chrseq loc
getSequence :: (Error e, MonadError e m) => FeatureHierSequences -> SeqName -> m SeqData
getSequence fhs seqid = maybe seqidNotFound return $ M.lookup seqid $ sequenceMap fhs
where seqidNotFound = throwError $ strMsg $ "SeqID " ++ show (LBS.unpack seqid) ++ " not found"
featureSequence :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m (Sequence a)
featureSequence fhs f = do sequ <- seqData fhs $ F.seqLoc f
let seqname = fromMaybe (LBS.pack $ SeqLoc.display $ F.seqLoc f) $ listToMaybe $ F.ids f
return $ Seq seqname sequ Nothing
catchIOErrors :: IO a -> ErrorT String IO a
catchIOErrors m = ErrorT { runErrorT = liftM Right m `catch` (return . Left . show) }
runGFF :: FilePath -> (ErrorT String (Reader FeatureHierSequences) a) -> ErrorT String IO a
runGFF gffname m = do gff <- catchIOErrors $ LBS.readFile gffname
fhs <- parse gff
either throwError return $ runReader (runErrorT m) fhs
runGFFIO :: FilePath -> (ErrorT String (ReaderT FeatureHierSequences IO) a) -> ErrorT String IO a
runGFFIO gffname m = do gff <- catchIOErrors $ LBS.readFile gffname
fhs <- parse gff
mapErrorT (flip runReaderT fhs) m
asksGFF :: (Error e, MonadError e m, MonadReader FeatureHierSequences m) => (FeatureHierSequences -> a -> m b) -> a -> m b
asksGFF f x = do fhs <- ask; f fhs x