module Biobase.MAF.Import where
import Control.Arrow
import Control.Monad
import Data.ByteString.Char8 as BS
import Data.Either as E
import Data.Iteratee as I
import Data.Iteratee.Char as C
import Data.Iteratee.IO
import Data.Iteratee.ListLike as LL
import Data.List as L
import Data.Map as M
import Data.Maybe
import Prelude as P
import Biobase.MAF
eneeMAF :: (Monad m) => Enumeratee BS.ByteString [Either MAF Alignment] m a
eneeMAF = enumLinesBS ><> convStream f where
f = icont step Nothing
step (Chunk []) = f
step (Chunk xs@(x:_))
| BS.isPrefixOf "#" x = icont (mh xs) Nothing
| otherwise = icont (md xs) Nothing
step str = idone [] str
mh xs (Chunk ys)
| P.null ts = icont (mh hs) Nothing
| otherwise = idone [mkMAF hs] (Chunk ts)
where (hs,ts) = P.span (BS.isPrefixOf "#") $ xs++ys
mh xs str = idone [] str
md xs (Chunk ys)
| P.length zs == 1 = icont (md (xs++ys)) Nothing
| otherwise = idone (P.map mkAlignment $ P.init zs) (Chunk $ P.last zs)
where zs = L.groupBy (\a b -> not $ BS.isPrefixOf "a" b)
. L.filter (not . BS.isPrefixOf "#")
. L.filter (not . BS.null)
$ xs++ys
md xs str = idone [mkAlignment xs] str
mkMAF [] = error "eneeMAF: empty stream or no header"
mkMAF xs = Left $ MAF (mkKVs $ P.head xs) (P.map BS.copy $ P.tail xs) [] []
mkAlignment xs = Right $ Alignment (mkKVs $ P.head xs) (P.map mkAligned $ P.tail xs)
mkKVs = M.fromList . P.map mkKV . P.drop 1 . BS.words
mkKV = (BS.copy *** BS.copy . BS.drop 1) . BS.span (/='=')
mkAligned x
| P.length ws == 7 = Aligned
{ key = BS.copy $ ws!!1
, start = read . BS.unpack $ ws!!2
, length = read . BS.unpack $ ws!!3
, strand = BS.head $ ws!!4
, genomesize = read . BS.unpack $ ws!!5
, value = BS.copy $ ws!!6
}
| otherwise = error $ "couldn't mkAligned from string: " ++ BS.unpack x
where
ws = BS.words x
eneeRemovePayload = mapStream f where
f l@(Left _) = l
f (Right x) = Right $ x{sequences = P.map g $ sequences x}
g a@Aligned{..} = a{value = BS.empty}
iMAF = do
Left maf <- LL.head
xs <- stream2list
return maf{blocks = rights xs}