module Text.PDF.Info
(
pdfInfo
,PDFInfo(..)
,PDFSize(..)
,PDFEncryptionInfo(..)
,PDFInfoError(..)
,ParsePDFInfo
,runParse
,parse
,parseSize
,parseDate
,parseEncrypted
,readRight)
where
import Control.Applicative
import Control.Arrow
import Control.Exception as E
import Control.Monad.Error
import Data.Char (isSpace)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time (UTCTime,parseTime)
import Data.Time.Locale.Compat (defaultTimeLocale)
import Prelude
import System.Exit
import System.Process.Text
data PDFInfo = PDFInfo {
pdfInfoTitle :: !(Maybe Text)
, pdfInfoSubject :: !(Maybe Text)
, pdfInfoAuthor :: !(Maybe Text)
, pdfInfoCreator :: !(Maybe Text)
, pdfInfoProducer :: !(Maybe Text)
, pdfInfoCreationDate :: !(Maybe UTCTime)
, pdfInfoModDate :: !(Maybe UTCTime)
, pdfInfoTagged :: !(Maybe Bool)
, pdfInfoPages :: !(Maybe Integer)
, pdfInfoEncrypted :: !(Maybe PDFEncryptionInfo)
, pdfInfoPageSize :: !(Maybe PDFSize)
, pdfInfoFileSize :: !(Maybe Integer)
, pdfInfoOptimized :: !(Maybe Bool)
, pdfInfoPDFVersion :: !(Maybe Double)
} deriving Show
data PDFInfoError
= ParseError !String
| ProcessFailure !Text
| ProcessError !IOException
| NoMessage
| SomeError String
deriving Show
data PDFSize = PDFSize { pdfSizeW :: !Float, pdfSizeH :: !Float }
deriving (Eq,Show)
data PDFEncryptionInfo
= PDFNoEncryption
| PDFEncryption {
pdfCanPrint :: !(Maybe Bool)
, pdfCanCopy :: !(Maybe Bool)
, pdfCanChange :: !(Maybe Bool)
, pdfCanAddNotes :: !(Maybe Bool)
, pdfEncryptionAlgorithm :: !(Maybe Text)
}
deriving (Eq,Show)
instance Error PDFInfoError where noMsg = NoMessage; strMsg = SomeError
newtype ParsePDFInfo a = ParsePDFInfo { runParse :: Either PDFInfoError a }
deriving (Monad,Functor,MonadError PDFInfoError)
instance Applicative ParsePDFInfo where (<*>) = ap; pure = return
pdfInfo :: MonadIO m => FilePath -> m (Either PDFInfoError PDFInfo)
pdfInfo path = liftIO $ loadInfo `E.catch` ioErrorHandler where
loadInfo = do (code,out,err) <- readProcessWithExitCode "pdfinfo" ["-enc","UTF-8",path] ""
case code of
ExitSuccess -> return (parse out)
ExitFailure{} -> return (Left (ProcessFailure err))
ioErrorHandler = return . Left . ProcessError
parse :: Text -> Either PDFInfoError PDFInfo
parse out = runParse $
PDFInfo <$> string props "Title"
<*> string props "Subject"
<*> string props "Author"
<*> string props "Creator"
<*> string props "Producer"
<*> date "CreationDate"
<*> date "ModDate"
<*> bool props "Tagged"
<*> integer "Pages"
<*> encrypted "Encrypted"
<*> size "Page size"
<*> integer "File size"
<*> bool props "Optimized"
<*> floating "PDF version"
where date = get parseDate
size = get parseSize
encrypted = get parseEncrypted
floating = readIt
integer = readIt
readIt :: Read a => Text -> ParsePDFInfo (Maybe a)
readIt = get readRight
props = map split . T.lines $ out
get = withProps props
type Props = [(Text,Text)]
withProps :: Props -> (Text -> ParsePDFInfo a) -> Text -> ParsePDFInfo (Maybe a)
withProps properties f name =
case lookup name properties of
Just ok -> catchError (Just <$> (f $ T.strip ok))
(\_ -> return Nothing)
Nothing -> return Nothing
split :: Text -> (Text, Text)
split = second (T.drop 1) . T.span (/=':')
string :: Props -> Text -> ParsePDFInfo (Maybe Text)
string props = withProps props return
bool :: Props -> Text -> ParsePDFInfo (Maybe Bool)
bool props = withProps props $ \yes -> return $ yes == "yes"
parseSize :: Text -> ParsePDFInfo PDFSize
parseSize s =
case T.words s of
((readRight -> Right x):"x":(readRight -> Right y):_) ->
return $ PDFSize x y
_ -> throwError $ ParseError $ "Unable to read size: " ++ show s
parseDate :: Text -> ParsePDFInfo UTCTime
parseDate s =
case parseTime defaultTimeLocale "%a %b %e %H:%M:%S %Y" (T.unpack s) of
Just ok -> return ok
Nothing -> throwError $ ParseError $ "Unable to parse date: " ++ show s
parseEncrypted :: Text -> ParsePDFInfo PDFEncryptionInfo
parseEncrypted s =
case T.break isSpace s of
("yes",rest) ->
PDFEncryption <$> bool props "print"
<*> bool props "copy"
<*> bool props "change"
<*> bool props "addNotes"
<*> string props "algorithm"
where
props = map split $ T.words $ T.filter (flip notElem ['(',')']) rest
("no",_) -> return PDFNoEncryption
_ -> throwError $ ParseError $ "Unable to parse encryption: " ++ show s
readRight :: (MonadError PDFInfoError m,Read a) => Text -> m a
readRight s =
case reads (T.unpack s) of
[(v,_)] -> return v
_ -> throwError $ ParseError $ "Couldn't read value: " ++ show s