module Pdf.Toolbox.Content.Parser
(
parseContentStream,
readNextOperator,
parseContent
)
where
import Data.Int
import qualified Data.ByteString.Char8 as ByteString8
import Data.Attoparsec.ByteString.Char8 (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as Parser
import Data.IORef
import Control.Applicative
import Control.Exception
import System.IO.Streams (InputStream)
import qualified System.IO.Streams as Streams
import qualified System.IO.Streams.Attoparsec as Streams
import Pdf.Toolbox.Core
import Pdf.Toolbox.Core.Parsers.Object
import Pdf.Toolbox.Content.Ops
parseContentStream :: MonadIO m
=> RIS
-> [StreamFilter]
-> (Ref -> IS -> IO IS)
-> [(Stream Int64, Ref, Int)]
-> PdfE m (InputStream Expr)
parseContentStream ris filters decryptor streams = do
is <- combineStreams ris filters decryptor streams
liftIO $ Streams.parserToInputStream parseContent is
readNextOperator :: MonadIO m => InputStream Expr -> PdfE m (Maybe Operator)
readNextOperator is = annotateError "reading the next operator from content stream" $ go []
where
go args = do
expr <- do
e <- tryPdfIO $ (Right <$> Streams.read is)
`catch` (\e -> return $ Left $ UnexpectedError $ show (e :: Streams.ParseException))
case e of
Right expr -> return expr
Left er -> throwE er
case expr of
Nothing -> case args of
[] -> return Nothing
_ -> throwE $ UnexpectedError $ "Args without op: " ++ show args
Just (Obj o) -> go (o : args)
Just (Op o) -> return $ Just (o, reverse args)
combineStreams :: MonadIO m => RIS -> [StreamFilter] -> (Ref -> IS -> IO IS) -> [(Stream Int64, Ref, Int)] -> PdfE m IS
combineStreams _ _ _ [] = liftIO Streams.nullInput
combineStreams ris filters decryptor (x:xs) = do
reader <- mkReader x xs
ref <- liftIO $ newIORef reader
liftIO $ Streams.makeInputStream (doRead ref)
where
mkReader (s, ref, len) ss = do
Stream _ is <- decodedStreamContent ris filters (decryptor ref) len s
return (is, ss)
doRead ref = do
(is, ss) <- liftIO $ readIORef ref
chunk <- liftIO $ Streams.read is
case chunk of
Nothing ->
case ss of
[] -> return Nothing
(h:t) -> do
reader <- runExceptT $ mkReader h t
case reader of
Left e -> liftIO $ ioError $ userError $ show e
Right r -> do
liftIO $ writeIORef ref r
doRead ref
Just c -> return (Just c)
parseContent :: Parser (Maybe Expr)
parseContent
= (skipSpace >> Parser.endOfInput >> return Nothing)
<|> do
skipSpace
fmap Just $ fmap Obj parseObject
<|> fmap (Op . toOp) (Parser.takeWhile1 isRegularChar)
<|> fmap (Op . UnknownOp . ByteString8.pack . return) Parser.anyChar
skipSpace :: Parser ()
skipSpace = do
Parser.skipSpace
_ <- many $ do
_ <- Parser.char '%'
Parser.skipWhile $ \c -> c /= '\n' && c /= '\r'
Parser.skipSpace
return ()