module Text.PDF.Info
(
pdfInfo
,PDFInfo(..)
,PDFSize(..)
,PDFInfoError(..)
,ParsePDFInfo
,runParse
,parse
,parseSize
,parseDate
,readRight)
where
import Prelude
import Control.Monad.Error
import System.Process
import Control.Applicative
import Control.Arrow
import Data.Char
import Data.Time
import System.Locale
import Control.Exception as E
data PDFInfo = PDFInfo {
pdfInfoTitle :: Maybe String
, pdfInfoSubject :: Maybe String
, pdfInfoAuthor :: Maybe String
, pdfInfoCreator :: Maybe String
, pdfInfoProducer :: Maybe String
, pdfInfoCreationDate :: Maybe UTCTime
, pdfInfoModDate :: Maybe UTCTime
, pdfInfoTagged :: Maybe Bool
, pdfInfoPages :: Maybe Integer
, pdfInfoEncrypted :: Maybe Bool
, pdfInfoPageSize :: Maybe PDFSize
, pdfInfoFileSize :: Maybe Integer
, pdfInfoOptimized :: Maybe Bool
, pdfInfoPDFVersion :: Maybe Double
} deriving Show
data PDFInfoError =
ParseError String
| ProcessError IOException
| NoMessage
| SomeError String
deriving Show
data PDFSize = PDFSize { pdfSizeW :: Float, pdfSizeH :: Float }
deriving (Eq,Show)
instance Error PDFInfoError where noMsg = NoMessage; strMsg = SomeError
newtype ParsePDFInfo a = ParsePDFInfo { runParse :: Either PDFInfoError a }
deriving (Monad,Functor,MonadError PDFInfoError)
instance Applicative ParsePDFInfo where (<*>) = ap; pure = return
pdfInfo :: MonadIO m => FilePath -> m (Either PDFInfoError PDFInfo)
pdfInfo path = liftIO $ loadInfo `E.catch` ioErrorHandler where
loadInfo = parse <$> readProcess "pdfinfo" [path] ""
ioErrorHandler = return . Left . ProcessError
parse :: String -> Either PDFInfoError PDFInfo
parse out = runParse $
PDFInfo <$> string "Title"
<*> string "Subject"
<*> string "Author"
<*> string "Creator"
<*> string "Producer"
<*> date "CreationDate"
<*> date "ModDate"
<*> bool "Tagged"
<*> integer "Pages"
<*> bool "Encrypted"
<*> size "Page size"
<*> integer "File size"
<*> bool "Optimized"
<*> floating "PDF version"
where string = get id
date = get (>>= parseDate)
size = get (>>= parseSize)
bool = get $ fmap $ \yes -> yes == "yes"
floating = readIt
integer = readIt
readIt :: Read a => String -> ParsePDFInfo (Maybe a)
readIt = get (>>= readRight)
properties = map split . lines $ out
get f name =
case lookup name properties of
Just ok -> catchError (Just <$> (f $ return $ trim ok))
(\_ -> return Nothing)
Nothing -> return Nothing
split = second (drop 2) . span (/=':')
trim = bi reverse (dropWhile isSpace) . dropWhile isSpace
parseSize :: String -> ParsePDFInfo PDFSize
parseSize s =
case words s of
((readRight -> Right x):"x":(readRight -> Right y):_) ->
return $ PDFSize x y
_ -> throwError $ ParseError $ "Unable to read size: " ++ show s
parseDate :: String -> ParsePDFInfo UTCTime
parseDate s =
case parseTime defaultTimeLocale "%a %b %e %H:%M:%S %Y" s of
Just ok -> return ok
Nothing -> throwError $ ParseError $ "Unable to parse date: " ++ show s
readRight :: (MonadError PDFInfoError m,Read a) => String -> m a
readRight s =
case reads s of
[(v,_)] -> return v
_ -> throwError $ ParseError $ "Couldn't read value: " ++ show s
bi :: (c1 -> c) -> (c -> c1) -> c1 -> c
bi g f = g . f . g