{-# OPTIONS_GHC -XFlexibleContexts #-}

module Bio.GFF3.FeatureHierSequences ( FeatureHierSequences, features, sequences
                                     , fromLists, parse
                                     , lookupId, parents, children
                                     , seqData, getSequence, featureSequence
                                     , runGFF, runGFFIO, asksGFF
                                     )
    where 

import Control.Monad.Error
import Control.Monad.Reader
import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as S

import Bio.Sequence.SeqData
import Bio.Sequence.Fasta

import qualified Bio.GFF3.Feature as F
import qualified Bio.GFF3.FeatureHier as FH
import qualified Bio.Location.Location as Loc
import qualified Bio.Location.SeqLocation as SeqLoc
import Bio.Location.OnSeq

data FeatureHierSequences = FeatureHierSequences { featureHier :: !FH.FeatureHier
                                                 , sequenceMap :: !(M.Map SeqName SeqData)
                                                 } deriving (Show)

parse :: (Error e, MonadError e m) => LBS.ByteString -> m FeatureHierSequences
parse str = do (feats, seqlines) <- F.parseWithFasta str
               fromLists feats $ mkSeqs seqlines

fromLists :: (Error e, MonadError e m) => [F.Feature] -> [Sequence a] -> m FeatureHierSequences
fromLists feats seqs = let !seqmap = M.fromList $ map seqAssoc seqs
                           !featSeqids = S.fromList $ map F.seqid feats
                           !missingSeqids = featSeqids `S.difference` M.keysSet seqmap
                       in if S.null missingSeqids
                          then liftM (\fh -> FeatureHierSequences fh seqmap) $ FH.fromList feats
                          else throwError $ strMsg $ "Missing sequences for seqids " ++ show (map LBS.unpack $ S.toList missingSeqids)
    where seqAssoc (Seq seqname sequ _) = (seqname, sequ)

features :: FeatureHierSequences -> S.Set F.Feature
features = FH.features . featureHier

sequences :: FeatureHierSequences -> [Sequence a]
sequences = M.foldWithKey mkSeqList [] . sequenceMap
    where mkSeqList seqname sequ = ((Seq seqname sequ Nothing) :)

lookupId :: (Error e, MonadError e m) => FeatureHierSequences -> SeqName -> m F.Feature
lookupId fhs = FH.lookupId $ featureHier fhs

children :: FeatureHierSequences -> F.Feature -> [F.Feature]
children = FH.children . featureHier

parents :: FeatureHierSequences -> F.Feature -> [F.Feature]
parents = FH.parents . featureHier

seqData :: (Error e, MonadError e m) => FeatureHierSequences -> SeqLoc.SeqLoc -> m SeqData
seqData fhs (OnSeq chrname loc) = do chrseq <- getSequence fhs chrname
                                     return $ Loc.seqDataPadded chrseq loc

getSequence :: (Error e, MonadError e m) => FeatureHierSequences -> SeqName -> m SeqData
getSequence fhs seqid = maybe seqidNotFound return $ M.lookup seqid $ sequenceMap fhs
    where seqidNotFound = throwError $ strMsg $ "SeqID " ++ show (LBS.unpack seqid) ++ " not found"

featureSequence :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m (Sequence a)
featureSequence fhs f = do sequ <- seqData fhs $ F.seqLoc f
                           let seqname = fromMaybe (LBS.pack $ SeqLoc.display $ F.seqLoc f) $ listToMaybe $ F.ids f
                           return $ Seq seqname sequ Nothing

catchIOErrors :: IO a -> ErrorT String IO a
catchIOErrors m = ErrorT { runErrorT = liftM Right m `catch` (return . Left . show) }

runGFF :: FilePath -> (ErrorT String (Reader FeatureHierSequences) a) -> ErrorT String IO a
runGFF gffname m = do gff <- catchIOErrors $ LBS.readFile gffname
                      fhs <- parse gff
                      either throwError return $ runReader (runErrorT m) fhs

runGFFIO :: FilePath -> (ErrorT String (ReaderT FeatureHierSequences IO) a) -> ErrorT String IO a
runGFFIO gffname m =  do gff <- catchIOErrors $ LBS.readFile gffname
                         fhs <- parse gff
                         mapErrorT (flip runReaderT fhs) m

asksGFF :: (Error e, MonadError e m, MonadReader FeatureHierSequences m) => (FeatureHierSequences -> a -> m b) -> a -> m b
asksGFF f x = do fhs <- ask; f fhs x