module Biobase.Infernal.CM.Import where
import Control.Arrow
import Control.Monad (unless)
import Data.ByteString.Char8 as BS
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.Iteratee.Iteratee as I
import Data.Iteratee.ListLike as I
import Data.Iteratee.ZLib as IZ
import Data.Map as M
import Prelude as P
import Control.Monad.IO.Class (liftIO, MonadIO)
import Data.PrimitiveArray
import Data.PrimitiveArray.Ix
import Biobase.Infernal.CM
import Biobase.Infernal.Types
eneeCM :: (Monad m) => Enumeratee ByteString [CM] m a
eneeCM = enumLinesBS ><> convStream f where
f = do
hs' <- I.takeWhile (/="MODEL:")
let hs = M.fromList . P.map (second (BS.dropWhile (==' ')) . BS.break (==' ')) $ hs'
mb <- I.tryHead
unless (mb == Just "MODEL:") . error $ "model error: " ++ show (hs,mb,"head")
ns <- iterNodes
me <- I.tryHead
unless (me == Just "//") . error $ "model error: " ++ show (hs,me,"tail")
return . (:[]) $ CM
{ name = ModelIdentification $ hs M.! "NAME"
, accession = ModelAccession . bsRead . BS.drop 2 $ hs M.! "ACCESSION"
, gathering = BitScore . bsRead $ hs M.! "GA"
, trustedCutoff = BitScore . bsRead $ hs M.! "TC"
, noiseCutoff = let x = hs M.! "NC" in if x == "undefined" then Nothing else Just . BitScore . bsRead $ x
, transition = error "not implemented yet"
, emission = error "not implemented yet"
, paths = error "not implemented yet"
, localBegin = error "not implemented yet"
, begins = error "not implemented yet"
, localEnd = error "not implemented yet"
, nodes = error "not implemented yet"
} where bsRead = read . BS.unpack
iterNodes :: (Monad m) => Iteratee [ByteString] m [Node]
iterNodes = do
hdr' <- I.head
let (ishdr,(hdr,nidx)) = isNodeHeader hdr'
unless ishdr $ error $ show hdr'
xs <- I.takeWhile (fst . isState)
pk <- I.peek
let n = Node
{ nodeHeader = hdr
, nodeIndex = nidx
}
case pk of
Just "//" -> return []
Just x
| (True,_) <- isNodeHeader x -> do
ns <- iterNodes
return $ n:ns
e -> error $ show e
data Node = Node
{ nodeHeader :: ByteString
, nodeIndex :: Int
}
isNodeHeader :: ByteString -> (Bool,(ByteString,Int))
isNodeHeader xs = (isnh,(hdr,nidx)) where
isnh = BS.elem '[' xs && BS.elem ']' xs
[hdr,nidx'] = BS.words . BS.init . BS.takeWhile (/=']') . BS.drop 1 . BS.dropWhile (/='[') $ xs
nidx = read . BS.unpack $ nidx'
isState :: ByteString -> (Bool,ByteString)
isState xs'
| P.null xs = (False,"")
| P.head xs `P.elem` [ "[", "//" ] = (False,"")
| P.head xs `P.elem` [ "S", "IL", "IR", "MATR", "MR", "D", "MP", "ML", "B", "E" ] = (True,"")
| otherwise = error $ show xs
where
xs = BS.words xs'
fromFile :: FilePath -> IO (ID2CM, AC2CM)
fromFile fp = run =<< ( enumFile 8192 fp
. joinI
. eneeCM
$ I.zip (mkMap name) (mkMap accession)
)
fromFileZip :: FilePath -> IO (ID2CM, AC2CM)
fromFileZip fp = run =<< ( enumFile 8192 fp
. joinI
. enumInflate GZipOrZlib defaultDecompressParams
. joinI
. eneeCM
$ I.zip (mkMap name) (mkMap accession)
)
mkMap f = I.foldl' (\ !m x -> M.insert (f x) x m) M.empty