-- | An efficient pipes-based parser for @RNAfold@ output.

module BioInf.ViennaRNA.Parsers.RNAfold where

{-
-- | Lazily read @RNAfold@ structures.
--
-- TODO use @pipes/machines@! we need lazy reading of files and live in an exceptt transformer stack!
-- TODO generalize transformer stack

readRNAfoldFiles
  ∷ FilePath
  → IO [RNAfoldResult]
readRNAfoldFiles fp = do
  let go [] = return []
      go (f:fs) = do
        bs ← unsafeInterleaveIO (putStrLn ("#" ++ f) >> decompress <$> BSL.readFile f)
        let rs = either error id $ runExcept (bslToRNAfoldResult bs)
        rss ← go fs
        return $ rs ++ rss
      go ∷ [FilePath] → IO [RNAfoldResult]
  FP.find FP.always (FP.extension FP.==? ".gz")  (fp </> "structures") >>= go
{-# NoInline readRNAfoldFiles #-}

-- |

bslToRNAfoldResult ∷ (Monad m) ⇒ BSL.ByteString → ExceptT String m [RNAfoldResult]
bslToRNAfoldResult bs = do
  case A.eitherResult $ A.parse pRNAfold bs of
    Left e  → throwE e
    Right r → return r
{-# Inline bslToRNAfoldResult #-}

-- | Parser for @RNAfold@ output. @RNAfold@ can have between 2 and 5 output lines.
--
-- TODO Extend the parser to deal with all cases. Our best hint is probably if
-- there is whitespace in a line.
--
-- @
-- echo "CCCAAAGGG\nCCCAAAGGG" | ./RNAfold -p
-- CCCAAAGGG
-- (((...))) ( -1.20)
-- (((...))) [ -1.41]
-- (((...))) { -1.20 d=1.06}
--  frequency of mfe structure in ensemble 0.707288; ensemble diversity 1.67  
-- CCCAAAGGG
-- (((...))) ( -1.20)
-- (((...))) [ -1.41]
-- (((...))) { -1.20 d=1.06}
--  frequency of mfe structure in ensemble 0.707288; ensemble diversity 1.67  
-- @

pRNAfold ∷ A.Parser [RNAfoldResult]
pRNAfold = A.many1' go <* A.endOfInput where
  go = do
    -- 1. sequence
    rnaFoldSequence       ← BS.copy <$> AC.takeWhile AC.isAlpha_ascii <* AC.skipSpace A.<?> "RNAfold sequence"
    -- 2. mfe
    rnaFoldMFEStruc       ← BS.copy <$> AC.takeTill AC.isSpace <* AC.skipSpace A.<?> "RNAfold MFE structure"
    rnaFoldMFEEner        ← AC.char '(' *> AC.skipSpace *> AC.double <* AC.char ')' <* AC.skipSpace A.<?> "RNAfold MFE energy"
    -- 3. ensemble
    rnaFoldEnsembleStruc  ← BS.copy <$> AC.takeTill AC.isSpace <* AC.skipSpace
    rnaFoldEnsembleEner   ← AC.char '[' *> AC.skipSpace *> AC.double <* AC.char ']' <* AC.skipSpace
    -- 4. centroid
    rnaFoldCentroidStruc  ← BS.copy <$> AC.takeTill AC.isSpace <* AC.skipSpace
    rnaFoldCentroidEner   ← AC.char '{' *> AC.skipSpace *> AC.double <* AC.skipSpace
    dequal                ← AC.string "d=" *> AC.double <* AC.char '}' <* AC.skipSpace
    -- 5.mfe frequency and diversity
    AC.string "frequency of mfe structure in ensemble" *> AC.skipSpace A.<?> "frequency"
    rnaFoldMfeFrequency ← AC.double
    AC.string "; ensemble diversity" *> AC.skipSpace
    rnaFoldDiversity ← AC.double
    AC.skipSpace
    return RNAfoldResult{..}
{-# Inline pRNAfold #-}
-}