-- | Parse HMMCompare output

module Bio.HMMCompareResult
    (
     HMMCompareResult,
     parseHMMCompareResult,
     readHMMCompareResult,
     model1Name,
     model2Name,
     linkscore1, 
     linkscore2,
     linksequence,
     model1matchednodes,
     model2matchednodes,
     getHMMCompareResults,
     getModelsNames,
     getModelNames
    ) where

import Text.ParserCombinators.Parsec     
import Control.Monad

-- | Datastructure for result strings of comparisons between covariance models by HMMCompare
data HMMCompareResult = HMMCompareResult           
  { model1Name :: String,
    model2Name :: String,
    linkscore1 :: Double,
    linkscore2 :: Double,
    linksequence  :: String,
    model1matchednodes :: [Int],
    model2matchednodes :: [Int]
  } deriving (Show)

-- | parse HMMCompareResult model from input string
parseHMMCompareResult :: [Char] -> Either ParseError [HMMCompareResult]
parseHMMCompareResult input = parse genParseHMMCompareResults "HMMCompareResult" input

-- | parse HMMCompareResult from input filePath                      
readHMMCompareResult :: String -> IO (Either ParseError [HMMCompareResult])                  
readHMMCompareResult filePath = do 
  parsedFile <- parseFromFile genParseHMMCompareResults filePath
  return parsedFile

-- | Parse the input as HMMCompareResult datatype
genParseHMMCompareResults :: GenParser Char st [HMMCompareResult]
genParseHMMCompareResults = do
  hmmcs  <- many1 (try genParseHMMCompareResult)
  eof
  return hmmcs

readDouble :: String -> Double
readDouble = read              

readInt :: String -> Int
readInt = read

-- | Parse a HMMCompare result string
genParseHMMCompareResult :: GenParser Char st HMMCompareResult
genParseHMMCompareResult = do
    name1 <-  many1 (noneOf " ")
    _ <- many1 space
    name2 <-  many1 (noneOf " ")
    _ <- many1 space
    score1 <- many1 (noneOf " ")
    _ <- many1 space
    score2 <- many1 (noneOf " ")
    _ <- many1 space
    linkseq <- many1 (oneOf "AGTCUagtcu")
    _ <- many1 space
    _ <- char '['
    nodes1 <- many1 parseMatchedNodes
    _ <- char ']'
    _ <- many1 space
    _ <- char '['
    nodes2 <- many1 parseMatchedNodes
    _ <- char ']'
    newline
    return $ HMMCompareResult name1 name2 (readDouble score1) (readDouble score2) linkseq nodes1 nodes2

-- | Parse indices of matched nodes between models as integers
parseMatchedNodes :: GenParser Char st Int 
parseMatchedNodes = do
    nodeNumber <- many1 digit
    optional (char ',')
    return $ (readInt nodeNumber)

-- | Parser for HMMCompare result strings
getHMMCompareResults :: FilePath -> IO [Either ParseError HMMCompareResult]    
getHMMCompareResults filePath = let
        fp = filePath
        doParseLine' = parse genParseHMMCompareResult "genParseHMMCompareResults"
        --doParseLine l = case (doParseLine' l) of
        --    Right x -> x
        --    Left _  -> error "Failed to parse line"
    in do
        fileContent <- liftM lines $ readFile fp
        return $ map doParseLine' fileContent

getModelsNames :: [HMMCompareResult] -> [String]
getModelsNames models = concat (map getModelNames models)

getModelNames :: HMMCompareResult -> [String]
getModelNames model = [model1Name model,model2Name model]