{-# LANGUAGE GeneralizedNewtypeDeriving, MultiParamTypeClasses, ViewPatterns #-} {-# LANGUAGE FlexibleContexts #-} {-# OPTIONS -Wall #-} module Text.PDF.Info (-- * Reading PDF info pdfInfo ,PDFInfo(..) ,PDFSize(..) ,PDFInfoError(..) -- * Internals ,ParsePDFInfo ,runParse ,parse ,parseSize ,parseDate ,readRight) where import Prelude hiding (catch) import Control.Monad.Error import System.Process import Control.Applicative import Control.Arrow import Data.Char import Data.Time import System.Locale import Control.Exception -- | A type representing the output from the pdfinfo command. data PDFInfo = PDFInfo { pdfInfoAuthor :: Maybe String -- ^ Author: E.g. Chris Done , pdfInfoCreator :: Maybe String -- ^ Creator: E.g. Microsoft® Office Word 2007 , pdfInfoProducer :: Maybe String -- ^ Producer: E.g. Microsoft® Office Word 2007 , pdfInfoCreationDate :: Maybe UTCTime -- ^ Creation Date , pdfInfoModDate :: Maybe UTCTime -- ^ Modification Date , pdfInfoTagged :: Maybe Bool -- ^ Tagged? , pdfInfoPages :: Maybe Integer -- ^ Pages: E.g. 238 , pdfInfoEncrypted :: Maybe Bool -- ^ Encrypted? , pdfInfoPageSize :: Maybe PDFSize -- ^ Page: E.g. 595.32 x 841.92 pts (A4) , pdfInfoFileSize :: Maybe Integer -- ^ File: E.g. 4061737 bytes , pdfInfoOptimized :: Maybe Bool -- ^ Optimized? , pdfInfoPDFVersion :: Maybe Double -- ^ PDF: E.g. 1.5 } deriving Show -- | Possible things that can go wrong while reading the info. data PDFInfoError = ParseError String -- ^ Couldn't parse a property value. | ProcessError IOException -- ^ Error to do with the pdfinfo process. | NoMessage -- ^ No message given. | SomeError String -- ^ Some nonspecific error. deriving Show -- | Size of the PDF in pts. data PDFSize = PDFSize { pdfSizeW :: Float, pdfSizeH :: Float } deriving (Eq,Show) instance Error PDFInfoError where noMsg = NoMessage; strMsg = SomeError newtype ParsePDFInfo a = ParsePDFInfo { runParse :: Either PDFInfoError a } deriving (Monad,Functor,MonadError PDFInfoError) instance Applicative ParsePDFInfo where (<*>) = ap; pure = return -- | Run pdfinfo on the given file. Handles IO exceptions to do with -- running the process. pdfInfo :: MonadIO m => FilePath -> m (Either PDFInfoError PDFInfo) pdfInfo path = liftIO $ loadInfo `catch` ioErrorHandler where loadInfo = parse <$> readProcess "pdfinfo" [path] "" ioErrorHandler = return . Left . ProcessError -- | Parse PDFInfo's output. parse :: String -> Either PDFInfoError PDFInfo parse out = runParse $ PDFInfo <$> string "Author" <*> string "Creator" <*> string "Producer" <*> date "CreationDate" <*> date "ModDate" <*> bool "Tagged" <*> integer "Pages" <*> bool "Encrypted" <*> size "Page size" <*> integer "File size" <*> bool "Optimized" <*> floating "PDF version" where string = get id date = get (>>= parseDate) size = get (>>= parseSize) bool = get $ fmap $ \yes -> yes == "yes" floating = readIt integer = readIt readIt :: Read a => String -> ParsePDFInfo (Maybe a) readIt = get (>>= readRight) properties = map split . lines $ out get f name = case lookup name properties of Just ok -> catchError (Just <$> (f $ return $ trim ok)) (\_ -> return Nothing) Nothing -> return Nothing split = second (drop 2) . span (/=':') trim = bi reverse (dropWhile isSpace) . dropWhile isSpace -- | Parse a page size. This is loosely defined. parseSize :: String -> ParsePDFInfo PDFSize parseSize s = case words s of ((readRight -> Right x):"x":(readRight -> Right y):_) -> return $ PDFSize x y _ -> throwError $ ParseError $ "Unable to read size: " ++ show s -- | Parse a date according to pdfinfo's format. parseDate :: String -> ParsePDFInfo UTCTime parseDate s = case parseTime defaultTimeLocale "%a %b %e %H:%M:%S %Y" s of Just ok -> return ok Nothing -> throwError $ ParseError $ "Unable to parse date: " ++ show s -- | Read a value, maybe, allow misc trailing data. readRight :: (MonadError PDFInfoError m,Read a) => String -> m a readRight s = case reads s of [(v,_)] -> return v _ -> throwError $ ParseError $ "Couldn't read value: " ++ show s -- | Untwist a value, apply a function, twist it back. bi :: (c1 -> c) -> (c -> c1) -> c1 -> c bi g f = g . f . g