module Bio.Sequence.Stockholm.Stream
(
Event(..)
, parseEvents
, renderEvents
)
where
import Control.Applicative
import Data.Monoid (mappend)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.List as CL
import qualified Data.Attoparsec as A
import qualified Data.Attoparsec.Char8 as A8
import Data.Conduit.Attoparsec (sinkParser)
import qualified Blaze.ByteString.Builder as Blaze
import Data.Conduit.Blaze (builderToByteString)
data Event = EvHeader
| EvEnd
| EvComment L.ByteString
| EvSeqData B.ByteString L.ByteString
| EvGF B.ByteString L.ByteString
| EvGC B.ByteString L.ByteString
| EvGS B.ByteString B.ByteString L.ByteString
| EvGR B.ByteString B.ByteString L.ByteString
deriving (Eq, Ord, Show)
eventParser :: A.Parser Event
eventParser = hash *> (ann <|> comment)
<|> end
<|> seqdata
where
word = A.takeTill A8.isHorizontalSpace <* spaces
tillNextLine = A.takeLazyByteString
hash = A8.char '#'
comment = EvComment <$> tillNextLine
end = EvEnd <$ A8.string "//" <* spaces
seqdata = EvSeqData <$> word <*> tillNextLine
ann = A8.string "=G" *> (gf <|> gc <|> gs <|> gr)
where
gf = EvGF <$ A8.char 'F' <* spaces <*> word <*> tillNextLine
gc = EvGC <$ A8.char 'C' <* spaces <*> word <*> tillNextLine
gs = EvGS <$ A8.char 'S' <* spaces <*> word <*> word <*> tillNextLine
gr = EvGR <$ A8.char 'R' <* spaces <*> word <*> word <*> tillNextLine
headerParser :: A.Parser Event
headerParser = EvHeader <$ A8.char '#' <* spaces <* mystring "STOCKHOLM 1.0" <* spaces
where
mystring (x:xs) = A8.char x *> mystring xs
mystring [] = pure ()
spaces :: A.Parser ()
spaces = A.skipWhile A8.isHorizontalSpace
parseEvents :: C.ResourceThrow m => C.Conduit B.ByteString m Event
parseEvents = C.sequenceSink LookingForHeader go
where
go LookingForHeader = do
dropSpaces
let emit = C.Emit InsideStockholm . (:[])
insideLine C.=$ sinkParser $ C.Stop <$ A8.endOfInput
<|> emit <$> headerParser
go InsideStockholm = do
dropSpaces
event <- insideLine C.=$ sinkParser eventParser
let newState = case event of
EvEnd -> LookingForHeader
_ -> InsideStockholm
return $ C.Emit newState [event]
dropSpaces = CB.dropWhile A8.isSpace_w8
insideLine = CB.takeWhile (/= 10)
data ParseEvents = LookingForHeader | InsideStockholm
eventPrinter :: Event -> Blaze.Builder
eventPrinter ev =
case ev of
EvHeader -> bs "# STOCKHOLM 1.0\n"
EvEnd -> bs "//\n"
EvComment comment -> bs "#" <> lbs comment <> n
EvSeqData seqlabel seqdata -> bs seqlabel <> s <> lbs seqdata <> n
EvGF feature data_ -> bs "#=GF " <> bs feature <> s <> lbs data_ <> n
EvGC feature data_ -> bs "#=GC " <> bs feature <> s <> lbs data_ <> n
EvGS seqlabel feature data_ -> bs "#=GS " <> bs seqlabel <> s <> bs feature <> s <> lbs data_ <> n
EvGR seqlabel feature data_ -> bs "#=GR " <> bs seqlabel <> s <> bs feature <> s <> lbs data_ <> n
where bs = Blaze.fromByteString
lbs = Blaze.fromLazyByteString
(<>) = mappend
s = bs " "
n = bs "\n"
renderEvents :: C.ResourceUnsafeIO m => C.Conduit Event m B.ByteString
renderEvents = CL.map eventPrinter C.=$= builderToByteString