module Text.PDF.Info
(
pdfInfo
,PDFInfo(..)
,PDFSize(..)
,PDFInfoError(..)
,ParsePDFInfo
,runParse
,parse
,parseSize
,parseDate
,readRight)
where
import Prelude hiding (catch)
import Control.Monad.Error
import System.Process
import Control.Applicative
import Control.Arrow
import Data.Char
import Data.Time
import System.Locale
import Control.Exception
data PDFInfo = PDFInfo {
pdfInfoAuthor :: Maybe String
, pdfInfoCreator :: Maybe String
, pdfInfoProducer :: Maybe String
, pdfInfoCreationDate :: Maybe UTCTime
, pdfInfoModDate :: Maybe UTCTime
, pdfInfoTagged :: Maybe Bool
, pdfInfoPages :: Maybe Integer
, pdfInfoEncrypted :: Maybe Bool
, pdfInfoPageSize :: Maybe PDFSize
, pdfInfoFileSize :: Maybe Integer
, pdfInfoOptimized :: Maybe Bool
, pdfInfoPDFVersion :: Maybe Double
} deriving Show
data PDFInfoError =
ParseError String
| ProcessError IOException
| NoMessage
| SomeError String
deriving Show
data PDFSize = PDFSize { pdfSizeW :: Float, pdfSizeH :: Float }
deriving (Eq,Show)
instance Error PDFInfoError where noMsg = NoMessage; strMsg = SomeError
newtype ParsePDFInfo a = ParsePDFInfo { runParse :: Either PDFInfoError a }
deriving (Monad,Functor,MonadError PDFInfoError)
instance Applicative ParsePDFInfo where (<*>) = ap; pure = return
pdfInfo :: MonadIO m => FilePath -> m (Either PDFInfoError PDFInfo)
pdfInfo path = liftIO $ loadInfo `catch` ioErrorHandler where
loadInfo = parse <$> readProcess "pdfinfo" [path] ""
ioErrorHandler = return . Left . ProcessError
parse :: String -> Either PDFInfoError PDFInfo
parse out = runParse $
PDFInfo <$> string "Author"
<*> string "Creator"
<*> string "Producer"
<*> date "CreationDate"
<*> date "ModDate"
<*> bool "Tagged"
<*> integer "Pages"
<*> bool "Encrypted"
<*> size "Page size"
<*> integer "File size"
<*> bool "Optimized"
<*> floating "PDF version"
where string = get id
date = get (>>= parseDate)
size = get (>>= parseSize)
bool = get $ fmap $ \yes -> yes == "yes"
floating = readIt
integer = readIt
readIt :: Read a => String -> ParsePDFInfo (Maybe a)
readIt = get (>>= readRight)
properties = map split . lines $ out
get f name =
case lookup name properties of
Just ok -> catchError (Just <$> (f $ return $ trim ok))
(\_ -> return Nothing)
Nothing -> return Nothing
split = second (drop 2) . span (/=':')
trim = bi reverse (dropWhile isSpace) . dropWhile isSpace
parseSize :: String -> ParsePDFInfo PDFSize
parseSize s =
case words s of
((readRight -> Right x):"x":(readRight -> Right y):_) ->
return $ PDFSize x y
_ -> throwError $ ParseError $ "Unable to read size: " ++ show s
parseDate :: String -> ParsePDFInfo UTCTime
parseDate s =
case parseTime defaultTimeLocale "%a %b %e %H:%M:%S %Y" s of
Just ok -> return ok
Nothing -> throwError $ ParseError $ "Unable to parse date: " ++ show s
readRight :: (MonadError PDFInfoError m,Read a) => String -> m a
readRight s =
case reads s of
[(v,_)] -> return v
_ -> throwError $ ParseError $ "Couldn't read value: " ++ show s
bi :: (c1 -> c) -> (c -> c1) -> c1 -> c
bi g f = g . f . g