-- | A simple parser for Stockholm data. As we do not interpret most stuff
-- right now, Parsec is not required.

module Biobase.Infernal.Stockholm.Import where

import Biobase.Infernal.Stockholm
import Data.List

fromFile :: String -> IO Stockholm
fromFile fname = do
  ls <- readFile fname >>= return . filter (not . null) . lines
  let (colanno, rest1) = partition (\x -> length x >= 4 && (and $ zipWith (==) "#=GC" x)) ls
  let (seqdata, rest2) = partition (\(x:_) -> x/='#' && x/='/') rest1
  let (exdata , unk)   = partition (\x -> length x >= 4 && (and $ zipWith (==) "#=EX" x)) rest2
  {-
  print colanno
  print rest1
  print seqdata
  print rest2
  print exdata
  print unk
  -}
  return Stockholm
    { sequences      = map (mkPair . words) seqdata
    , colAnnotations = map (mkPair . drop 1 . words) colanno
    , exAnnotations  = map (mkPair . drop 1 . words) exdata
    , unknown        = unk
    }



mkPair []     = error "empty list"
mkPair [x]    = error $ "just one element: " ++ show x
mkPair (x:xs) = (x,unwords xs)