module Pdf.Toolbox.Document.Pdf
(
Pdf,
Pdf',
runPdf,
runPdfWithHandle,
document,
flushObjectCache,
withoutObjectCache,
knownFilters,
isEncrypted,
setUserPassword,
defaultUserPassword,
decrypt,
MonadIO(..)
)
where
import Data.Monoid
import Data.Int
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Map (Map)
import qualified Data.Map as Map
import Control.Applicative
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.State
import System.IO
import qualified System.IO.Streams as Streams
import Pdf.Toolbox.Core
import Pdf.Toolbox.Document.Monad
import Pdf.Toolbox.Document.Encryption
import Pdf.Toolbox.Document.Internal.Types
data PdfState = PdfState {
stRIS :: RIS,
stFilters :: [StreamFilter],
stLastXRef :: Maybe XRef,
stAddToObjectCache :: Bool,
stObjectCache :: Map Ref (Object Int64),
stXRefStreamCache :: Map Int64 [ByteString],
stDecryptor :: Maybe Decryptor
}
newtype Pdf' m a = Pdf' (StateT PdfState m a)
deriving (Monad, Functor, Applicative, MonadIO, MonadTrans)
type Pdf m = PdfE (Pdf' m)
instance MonadIO m => MonadPdf (Pdf' m) where
lookupObject ref = annotateError ("lookupObject: " ++ show ref) $ do
cached <- getFromCache ref
case cached of
Just o -> return o
Nothing -> do
xref <- getLastXRef
entry <- lookupEntryRec ref xref
o <- readObjectForEntry ref entry
addObjectToCache ref o
return o
streamContent ref s = do
decryptor <- do
dec <- getDecryptor
case dec of
Nothing -> return return
Just d -> return $ d ref
takeStreamContent decryptor s
getDecryptor = lift $ Pdf' $ gets stDecryptor
getRIS = lift $ Pdf' $ gets stRIS
getStreamFilters = lift $ Pdf' $ gets stFilters
readObjectForEntry :: MonadIO m => Ref -> XRefEntry -> Pdf m (Object Int64)
readObjectForEntry ref (XRefTableEntry entry)
| teIsFree entry = return ONull
| otherwise = do
ris <- getRIS
readObjectAtOffset ris (teOffset entry) (teGen entry) >>= decrypt ref
readObjectForEntry ref (XRefStreamEntry entry) =
case entry of
StreamEntryFree _ _ -> return ONull
StreamEntryUsed off gen -> do
ris <- getRIS
readObjectAtOffset ris off gen >>= decrypt ref
StreamEntryCompressed index num -> do
objStream <- lookupObject (Ref index 0) >>= toStream
Stream dict is <- streamContent (Ref index 0) objStream
first <- lookupDict "First" dict >>= fromObject >>= intValue
mapObject (error "readObjectForEntry: impossible") `liftM`
readCompressedObject is (fromIntegral first) num
getXRefStream :: MonadIO m => Stream Int64 -> Pdf m (Stream IS)
getXRefStream s@(Stream dict off) = do
cache <- lift $ Pdf' $ gets stXRefStreamCache
content <-
case Map.lookup off cache of
Just content -> return content
Nothing -> do
Stream _ is <- takeStreamContent return s
content <- liftIO $ Streams.toList is
lift $ Pdf' $ modify $ \st -> st {stXRefStreamCache = Map.insert off content $ stXRefStreamCache st}
return content
is <- liftIO $ Streams.fromList content
return $ Stream dict is
lookupEntryRec :: MonadIO m => Ref -> XRef -> Pdf m XRefEntry
lookupEntryRec ref = annotateError ("Can't find xref entry for ref: " ++ show ref) . loop
where
loop xref = do
res <- lookupXRefEntry ref xref
case res of
Just e -> return e
Nothing -> do
ris <- getRIS
prev <- prevXRef ris xref
case prev of
Just xref' -> loop xref'
Nothing -> left $ UnexpectedError "There are no more xrefs"
lookupXRefEntry :: MonadIO m => Ref -> XRef -> Pdf m (Maybe XRefEntry)
lookupXRefEntry ref (XRefTable off) = do
ris <- getRIS
seek ris off
_ <- inputStream ris >>= isTable
fmap XRefTableEntry `liftM` lookupTableEntry ris ref
lookupXRefEntry ref (XRefStream _ s) = do
decoded <- getXRefStream s
fmap XRefStreamEntry `liftM` lookupStreamEntry decoded ref
takeStreamContent :: MonadIO m => (IS -> IO IS) -> Stream Int64 -> Pdf m (Stream IS)
takeStreamContent decryptor s@(Stream dict _) = annotateError ("reading stream content: " ++ show s) $ do
len <- do
obj <- lookupDict "Length" dict
case obj of
ONumber _ -> fromObject obj >>= intValue
ORef ref -> lookupObject ref >>= fromObject >>= intValue
_ -> left $ UnexpectedError $ "Unexpected length object in stream: " ++ show obj
ris <- getRIS
filters <- lift $ Pdf' $ gets stFilters
decodedStreamContent ris filters decryptor len s
getLastXRef :: MonadIO m => Pdf m XRef
getLastXRef = do
cached <- lift $ Pdf' $ gets stLastXRef
case cached of
Just xref -> return xref
Nothing -> do
xref <- getRIS >>= lastXRef
lift $ Pdf' $ modify $ \st -> st {stLastXRef = Just xref}
return xref
getFromCache :: Monad m => Ref -> Pdf m (Maybe (Object Int64))
getFromCache ref = do
cache <- lift $ Pdf' $ gets stObjectCache
return $ Map.lookup ref cache
addObjectToCache :: Monad m => Ref -> Object Int64 -> Pdf m ()
addObjectToCache ref o = do
add <- lift $ Pdf' $ gets stAddToObjectCache
when add $
lift $ Pdf' $ modify $ \st ->
st {stObjectCache = Map.insert ref o $ stObjectCache st}
withoutObjectCache :: Monad m => Pdf m () -> Pdf m ()
withoutObjectCache action = do
old <- lift $ Pdf' $ do
val <- gets stAddToObjectCache
modify $ \st -> st {stAddToObjectCache = False}
return val
action
lift $ Pdf' $ modify $ \st -> st {stAddToObjectCache = old}
flushObjectCache :: Monad m => Pdf m ()
flushObjectCache = lift $ Pdf' $ modify $ \st -> st {stObjectCache = Map.empty}
runPdf :: MonadIO m => RIS -> [StreamFilter] -> Pdf m a -> m (Either PdfError a)
runPdf ris filters action = runPdf' ris filters $ runEitherT action
runPdfWithHandle :: MonadIO m => Handle -> [StreamFilter] -> Pdf m a -> m (Either PdfError a)
runPdfWithHandle handle filters action = do
ris <- liftIO $ fromHandle handle
runPdf ris filters action
runPdf' :: MonadIO m => RIS -> [StreamFilter] -> Pdf' m a -> m a
runPdf' ris filters (Pdf' action) = evalStateT action $ PdfState {
stRIS = ris,
stFilters = filters,
stLastXRef = Nothing,
stObjectCache = Map.empty,
stXRefStreamCache = Map.empty,
stDecryptor = Nothing,
stAddToObjectCache = True
}
document :: MonadIO m => Pdf m Document
document = do
ris <- getRIS
xref <- lastXRef ris
tr <- trailer ris xref
return $ Document xref tr
isEncrypted :: MonadIO m => Pdf m Bool
isEncrypted = annotateError "isEncrypted" $ do
ris <- getRIS
tr <- lastXRef ris >>= trailer ris
case lookupDict' "Encrypt" tr of
Nothing -> return False
Just _ -> return True
setUserPassword :: MonadIO m => ByteString -> Pdf m Bool
setUserPassword pass = annotateError "setUserPassword" $ do
ris <- getRIS
tr <- lastXRef ris >>= trailer ris
enc <- case lookupDict' "Encrypt" tr of
Nothing -> left $ UnexpectedError "The document is not encrypted"
Just enc -> deref enc >>= fromObject
decryptor <- mkStandardDecryptor tr enc $ BS.take 32 $ pass `mappend` defaultUserPassword
lift $ Pdf' $ modify $ \s -> s {stDecryptor = decryptor}
case decryptor of
Nothing -> return False
_ -> return True
decrypt :: MonadIO m => Ref -> Object a -> Pdf m (Object a)
decrypt ref o = do
decryptor <- getDecryptor
case decryptor of
Nothing -> return o
Just decr -> liftIO $ decryptObject (decr ref) o