{-# LANGUAGE OverloadedStrings #-}

{-|
Module      : PDF.PDFIO
Description : IO utilities for hpdft
Copyright   : (c) Keiichiro Shikano, 2016
License     : MIT
Maintainer  : k16.shikano@gmail.com

Functions for use within IO. 
-}

module PDF.PDFIO ( getObjectByRef
                 , getPDFBSFromFile
                 , getPDFObjFromFile
                 , getRootRef
                 , getRootObj
                 , getStream
                 , getTrailer
                 , getInfo
                 ) where

import PDF.Definition
import PDF.DocumentStructure
  (rawStream, rawStreamByRef, findObjs, findObjsByRef,
   findDictByRef, findObjFromDict, rootRef,
   findTrailer, expandObjStm)
import PDF.Object (parsePDFObj)

import Debug.Trace

import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Lazy.Char8 as BSL

-- | Get PDF objects as a whole bytestring. Use `getPDFObjFromFile` instead if there's no reason to see a raw bytestring. 

getPDFBSFromFile :: FilePath -> IO [PDFBS]
getPDFBSFromFile :: FilePath -> IO [PDFBS]
getPDFBSFromFile FilePath
f = do
  ByteString
c <- FilePath -> IO ByteString
BS.readFile FilePath
f
  let bs :: [PDFBS]
bs = ByteString -> [PDFBS]
findObjs ByteString
c
  [PDFBS] -> IO [PDFBS]
forall (m :: * -> *) a. Monad m => a -> m a
return [PDFBS]
bs

-- | Get PDF objects each parsed as 'PDFObj' without being sorted. 

getPDFObjFromFile :: FilePath -> IO [PDFObj]
getPDFObjFromFile :: FilePath -> IO [PDFObj]
getPDFObjFromFile FilePath
f = do
  ByteString
c <- FilePath -> IO ByteString
BS.readFile FilePath
f
  let obj :: [PDFObj]
obj = [PDFObj] -> [PDFObj]
expandObjStm ([PDFObj] -> [PDFObj]) -> [PDFObj] -> [PDFObj]
forall a b. (a -> b) -> a -> b
$ (PDFBS -> PDFObj) -> [PDFBS] -> [PDFObj]
forall a b. (a -> b) -> [a] -> [b]
map PDFBS -> PDFObj
parsePDFObj ([PDFBS] -> [PDFObj]) -> [PDFBS] -> [PDFObj]
forall a b. (a -> b) -> a -> b
$ ByteString -> [PDFBS]
findObjs ByteString
c
  [PDFObj] -> IO [PDFObj]
forall (m :: * -> *) a. Monad m => a -> m a
return [PDFObj]
obj

-- | Get a PDF object from a whole 'PDFObj' by specifying `ref :: Int`

getObjectByRef :: Int -> [PDFObj] -> IO [Obj]
getObjectByRef :: Int -> [PDFObj] -> IO [Obj]
getObjectByRef Int
ref [PDFObj]
pdfobjs = do
  case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
ref [PDFObj]
pdfobjs of
    Just [Obj]
os -> [Obj] -> IO [Obj]
forall (m :: * -> *) a. Monad m => a -> m a
return [Obj]
os
    Maybe [Obj]
Nothing -> FilePath -> IO [Obj]
forall a. HasCallStack => FilePath -> a
error (FilePath -> IO [Obj]) -> FilePath -> IO [Obj]
forall a b. (a -> b) -> a -> b
$ FilePath
"No Object with Ref " FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Int -> FilePath
forall a. Show a => a -> FilePath
show Int
ref

-- | Get a PDF stream from a whole 'PDFObj' by specifying `ref :: Int`

getStream :: Bool -> [Obj] -> IO BSL.ByteString
getStream :: Bool -> [Obj] -> IO ByteString
getStream Bool
hex [Obj]
obj = ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ Bool -> ByteString -> ByteString
showBSL Bool
hex (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ [Obj] -> ByteString
rawStream [Obj]
obj

showBSL :: Bool -> ByteString -> ByteString
showBSL Bool
hex ByteString
s =
  let strm' :: ByteString
strm' = (Builder -> ByteString
B.toLazyByteString (Builder -> ByteString)
-> (ByteString -> Builder) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Builder
B.lazyByteStringHex) ByteString
s
  in if Bool
hex
     then if ByteString -> Int64
BSL.length ByteString
strm' Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
> Int64
256 then [ByteString] -> ByteString
BSL.concat [Int64 -> ByteString -> ByteString
BSL.take Int64
256 ByteString
strm', ByteString
"...(omit)"] else ByteString
strm'
     else ByteString
s

-- | The reference number of /Root in `filename`.

getRootRef :: FilePath -> IO Int
getRootRef :: FilePath -> IO Int
getRootRef FilePath
filename = do
  ByteString
c <- FilePath -> IO ByteString
BS.readFile FilePath
filename
  let n :: Maybe Int
n = ByteString -> Maybe Int
rootRef ByteString
c
  case Maybe Int
n of
    Just Int
i -> Int -> IO Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
i
    Maybe Int
Nothing -> FilePath -> IO Int
forall a. HasCallStack => FilePath -> a
error FilePath
"Could not find rood object"
    
-- | The /Root object in `filename`.

getRootObj :: FilePath -> IO [Obj]
getRootObj :: FilePath -> IO [Obj]
getRootObj FilePath
filename = do
  Int
rootref <- FilePath -> IO Int
getRootRef FilePath
filename
  [PDFObj]
objs <- FilePath -> IO [PDFObj]
getPDFObjFromFile FilePath
filename
  case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
rootref [PDFObj]
objs of
    Just [Obj]
os -> [Obj] -> IO [Obj]
forall (m :: * -> *) a. Monad m => a -> m a
return [Obj]
os
    Maybe [Obj]
Nothing -> FilePath -> IO [Obj]
forall a. HasCallStack => FilePath -> a
error FilePath
"Could not get root object"

-- | The trailer of `filename`.

getTrailer :: FilePath -> IO Dict
getTrailer :: FilePath -> IO Dict
getTrailer FilePath
filename = do
  ByteString
c <- FilePath -> IO ByteString
BS.readFile FilePath
filename
  Dict -> IO Dict
forall (m :: * -> *) a. Monad m => a -> m a
return (Dict -> IO Dict) -> Dict -> IO Dict
forall a b. (a -> b) -> a -> b
$ ByteString -> Dict
findTrailer ByteString
c

-- | /Info of `filename`.

getInfo :: FilePath -> IO Dict
getInfo :: FilePath -> IO Dict
getInfo FilePath
filename = do
  Dict
d <- FilePath -> IO Dict
getTrailer FilePath
filename
  [PDFObj]
objs <- FilePath -> IO [PDFObj]
getPDFObjFromFile FilePath
filename
  let inforef :: Int
inforef = case Dict -> FilePath -> Maybe Obj
findObjFromDict Dict
d FilePath
"/Info" of
                  Just (ObjRef Int
ref) -> Int
ref
                  Just Obj
_ -> FilePath -> Int
forall a. HasCallStack => FilePath -> a
error FilePath
"There seems to be no Info"
                  Maybe Obj
Nothing -> FilePath -> Int
forall a. HasCallStack => FilePath -> a
error FilePath
"There seems to be no Info"
  case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
inforef [PDFObj]
objs of
    Just Dict
os -> Dict -> IO Dict
forall (m :: * -> *) a. Monad m => a -> m a
return Dict
os
    Maybe Dict
Nothing -> FilePath -> IO Dict
forall a. HasCallStack => FilePath -> a
error FilePath
"Could not get info object"