module Bio.GFF3.SGD ( chromosomes, genes, rRNAs
                    , sortExons
                    , geneSequence, geneSeqLoc, geneCDSes
                    , noncodingSequence, noncodingSeqLoc, noncodingExons
                    , namedSLM, geneCDS_SLM
                    )
    where

import Control.Arrow ((&&&))
import Control.Monad.Error
import Control.Monad.Reader
import qualified Data.ByteString.Lazy.Char8 as LBS
import Data.List
import Data.Maybe
import Data.Ord
import qualified Data.Set as S

import Bio.Sequence.SeqData

import qualified Bio.GFF3.Feature as F
import Bio.GFF3.FeatureHierSequences
-- import qualified Bio.Location.ContigLocation as CLoc
import qualified Bio.Location.Location as Loc
import Bio.Location.OnSeq
import qualified Bio.Location.SeqLocation as SeqLoc
import Bio.Location.Strand
import qualified Bio.Location.SeqLocMap as SLM

-- SGD feature types
cdsType, chromosomeType, geneType, noncodingExonType :: LBS.ByteString
chromosomeType = LBS.pack "chromosome"
cdsType = LBS.pack "CDS"
geneType = LBS.pack "gene"
noncodingExonType = LBS.pack "noncoding_exon"

chromosomes :: FeatureHierSequences -> [F.Feature]
chromosomes = filter isChromosome . S.toList . features
    where isChromosome = (== chromosomeType) . F.ftype

genes :: FeatureHierSequences -> [F.Feature]
genes = filter isGene . S.toList . features
    where isGene = (== geneType) . F.ftype

rRNAs :: FeatureHierSequences -> [F.Feature]
rRNAs = filter isRRNA . S.toList . features
    where isRRNA = any (flip S.member $ S.fromList rRNAFeatureIDs) . F.ids
          rRNAFeatureIDs = map LBS.pack ["RDN25-1", "RDN58-1", "RDN18-1", "RDN5-1"]

geneSequence :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m (Sequence a)
geneSequence fhs g = do sequ <- geneSeqLoc fhs g >>= SeqLoc.seqData (getSequence fhs)
                        seqname <- maybe (throwError $ strMsg $ "No gene ID for " ++ show g) return $ listToMaybe $ F.ids g
                        return $ Seq seqname sequ Nothing

geneSeqLoc :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m SeqLoc.SeqLoc
geneSeqLoc fhs g = exonSeqLoc $ geneCDSes fhs g

geneCDSes :: FeatureHierSequences -> F.Feature -> [F.Feature]
geneCDSes fhs g = filter isCDS $ children fhs g
    where isCDS = (== cdsType) . F.ftype          

noncodingSequence :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m (Sequence a)
noncodingSequence fhs nc = do sequ <- noncodingSeqLoc fhs nc >>= SeqLoc.seqData (getSequence fhs)
                              seqname <- maybe (throwError $ strMsg $ "No gene ID for " ++ show nc) return $ listToMaybe $ F.ids nc
                              return $ Seq seqname sequ Nothing

noncodingSeqLoc :: (Error e, MonadError e m) => FeatureHierSequences -> F.Feature -> m SeqLoc.SeqLoc
noncodingSeqLoc fhs nc = exonSeqLoc $ noncodingExons fhs nc

noncodingExons :: FeatureHierSequences -> F.Feature -> [F.Feature]
noncodingExons fhs nc = filter isNoncodingExon $ children fhs nc
    where isNoncodingExon = (== noncodingExonType) . F.ftype

sortExons :: (Error e, MonadError e m) => [F.Feature] -> m [F.Feature]
sortExons exons | all fwdStrand exons = return $ sortBy (comparing F.start) exons
                | all rcStrand exons = return $ sortBy (comparing $ negate . F.start) exons
                | otherwise = throwError $ strMsg "sortExons: discordant strands"
    where fwdStrand = (== (Just Fwd)) . F.strand
          rcStrand = (== (Just RevCompl)) . F.strand 

exonSeqLoc :: (Error e, MonadError e m) => [F.Feature] -> m SeqLoc.SeqLoc
exonSeqLoc [] = throwError $ strMsg "exonSeqLoc: empty list of exons"
exonSeqLoc exons@(exon0:exonrest)
    = let seqid = F.seqid exon0
      in if all ((== seqid) . F.seqid) exonrest
         then liftM (OnSeq seqid . Loc.Loc . map F.contigLoc) $ sortExons exons
         else throwError $ strMsg "exonSeqLoc: discordant seqid amongst exons"

namedSLM :: FeatureHierSequences -> SLM.SeqLocMap F.Feature
namedSLM = SLM.fromList . map (F.seqLoc &&& id) . filter isNamed . S.toList . features
    where isNamed = not . null . F.ids

geneCDS_SLM :: (Error e, MonadError e m) => FeatureHierSequences -> m (SLM.SeqLocMap F.Feature)
geneCDS_SLM fhs = liftM SLM.fromList $ mapM cdsLocAndGene $ genes fhs
    where cdsLocAndGene g = liftM (flip (,) g) $ geneSeqLoc fhs g