{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE OverloadedStrings #-}

module BioInf.ViennaRNA.DotPlot.Import where

import Data.Text (Text(..))
import qualified Data.Array.IArray as A
import qualified Data.Text as T
import qualified Data.Text.Read as R

import BioInf.ViennaRNA.DotPlot



textToDotPlot :: Text -> DotPlot
textToDotPlot t = DotPlot (A.accumArray (const id) Nothing ((1,1),(l,l)) (map f ps)) True sq where
  l  = T.length sq
  ls = T.lines t
  sq = case (drop 1 . dropWhile (not . T.isPrefixOf "/sequence") $ ls) of
         (x:_)     -> T.init x
         otherwise -> error $ "lines do not contain sequence part: " ++ show ls
  ps = filter (T.isSuffixOf "box")
     . filter (not . T.isPrefixOf "%")
     . takeWhile (not . T.isPrefixOf "showpage")
     . drop 1
     . dropWhile (not . T.isPrefixOf "%start of base pair probability data")
     $ ls
  f p'
    | last p == "cbox" = ( (rdI $ p!!1, rdI $ p!!0), (Just (rdP $ p!!2, Just (rdP $ p!!3, rdP $ p!!4, rdP $ p!!5))) )
    | last p == "ubox" = ( (rdI $ p!!0, rdI $ p!!1), (Just (rdP $ p!!2, Nothing)) )
    | last p == "lbox" = ( (rdI $ p!!1, rdI $ p!!0), (Just (rdP $ p!!2, Nothing)) )
    where p = T.words p'
          rdI :: Text -> Int
          rdI = chk . R.decimal
          rdP :: Text -> Double
          rdP = chk . R.rational
          chk (Left e)      = error $ "dotplot/import read error: " ++ e ++ " in " ++ T.unpack p' ++ show p
          chk (Right (k,_)) = k