{-# LANGUAGE OverloadedStrings #-}

{-|
Module      : PDF.DocumentStructure
Description : Function to walk around Document Structure of a PDF file
Copyright   : (c) Keiichiro Shikano, 2020
License     : MIT
Maintainer  : k16.shikano@gmail.com
-}

module PDF.DocumentStructure
       ( parseTrailer
       , expandObjStm
       , rootRef
       , contentsStream
       , rawStreamByRef
       , findKids
       , findPages
       , findDict
       , findDictByRef
       , findDictOfType
       , findObjFromDict
       , findObjFromDictWithRef
       , findObjsByRef
       , findObjs
       , findTrailer
       , rawStream
       ) where

import Data.Char (chr)
import Data.List (find)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.ByteString.Builder as B
import qualified Data.Text as T
import Data.Maybe (fromMaybe)
import Numeric (readDec)

import Data.Attoparsec.ByteString.Char8 hiding (take)
import Data.Attoparsec.Combinator
import Control.Applicative
import Codec.Compression.Zlib (decompress)

import Debug.Trace

import PDF.Definition
import PDF.Object
import PDF.ContentStream (parseStream, parseColorSpace)
import PDF.Cmap (parseCMap)
import qualified PDF.OpenType as OpenType
import qualified PDF.CFF as CFF
import qualified PDF.Type1 as Type1

spaces :: Parser ()
spaces = Parser ()
skipSpace
oneOf :: String -> Parser Char
oneOf = (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char)
-> (String -> Char -> Bool) -> String -> Parser Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char -> Bool
inClass
noneOf :: String -> Parser Char
noneOf = (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char)
-> (String -> Char -> Bool) -> String -> Parser Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char -> Bool
notInClass

-- find objects

findObjs :: BS.ByteString -> [PDFBS]
findObjs :: ByteString -> [PDFBS]
findObjs ByteString
contents = case Parser [PDFBS] -> ByteString -> Either String [PDFBS]
forall a. Parser a -> ByteString -> Either String a
parseOnly (Parser ByteString PDFBS -> Parser [PDFBS]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString PDFBS
pdfObj) ByteString
contents of
  Left  String
err -> []
  Right [PDFBS]
rlt -> [PDFBS]
rlt

findXref :: BS.ByteString -> String
findXref :: ByteString -> String
findXref ByteString
contents = case Parser String -> ByteString -> Either String String
forall a. Parser a -> ByteString -> Either String a
parseOnly (Parser String
xref) ByteString
contents of
  Left  String
err -> []
  Right String
rlt -> String
rlt

findObjsByRef :: Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef :: Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
x [PDFObj]
pdfobjs = case (PDFObj -> Bool) -> [PDFObj] -> Maybe PDFObj
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Maybe Int -> PDFObj -> Bool
forall a b. Eq a => Maybe a -> (a, b) -> Bool
isRefObj (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x)) [PDFObj]
pdfobjs of
  Just (Int
_,[Obj]
objs) -> [Obj] -> Maybe [Obj]
forall a. a -> Maybe a
Just [Obj]
objs
  Maybe PDFObj
Nothing -> Maybe [Obj]
forall a. Maybe a
Nothing
  where
    isRefObj :: Maybe a -> (a, b) -> Bool
isRefObj (Just a
x) (a
y, b
objs) = if a
xa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
y then Bool
True else Bool
False
    isRefObj Maybe a
_ (a, b)
_ = Bool
False

findObjFromDictWithRef :: Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef :: Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
ref String
name [PDFObj]
objs = case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
ref [PDFObj]
objs of 
  Just Dict
d -> Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
name
  Maybe Dict
Nothing -> Maybe Obj
forall a. Maybe a
Nothing
  
findObjFromDict :: Dict -> String -> Maybe Obj
findObjFromDict :: Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
name = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
isName Dict
d of
  Just (Obj
_, Obj
o) -> Obj -> Maybe Obj
forall a. a -> Maybe a
Just Obj
o
  Maybe (Obj, Obj)
otherwise -> Maybe Obj
forall a. Maybe a
Nothing
  where isName :: (Obj, b) -> Bool
isName (PdfName String
n, b
_) = if String
name String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
n then Bool
True else Bool
False
        isName (Obj, b)
_              = Bool
False

findDictByRef :: Int -> [PDFObj] -> Maybe Dict
findDictByRef :: Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
ref [PDFObj]
objs = case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
ref [PDFObj]
objs of
  Just [Obj]
os -> [Obj] -> Maybe Dict
findDict [Obj]
os
  Maybe [Obj]
Nothing -> Maybe Dict
forall a. Maybe a
Nothing

findDictOfType :: String -> [Obj] -> Maybe Dict
findDictOfType :: String -> [Obj] -> Maybe Dict
findDictOfType String
typename [Obj]
objs = case [Obj] -> Maybe Dict
findDict [Obj]
objs of
  Just Dict
d  -> if Dict -> Bool
forall (t :: * -> *). Foldable t => t (Obj, Obj) -> Bool
isType Dict
d then Dict -> Maybe Dict
forall a. a -> Maybe a
Just Dict
d else Maybe Dict
forall a. Maybe a
Nothing 
  Maybe Dict
Nothing -> Maybe Dict
forall a. Maybe a
Nothing
  where 
    isType :: t (Obj, Obj) -> Bool
isType t (Obj, Obj)
dict = (String -> Obj
PdfName String
"/Type",String -> Obj
PdfName String
typename) (Obj, Obj) -> t (Obj, Obj) -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t (Obj, Obj)
dict
 
findDict :: [Obj] -> Maybe Dict
findDict :: [Obj] -> Maybe Dict
findDict [Obj]
objs = case (Obj -> Bool) -> [Obj] -> Maybe Obj
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Obj -> Bool
isDict [Obj]
objs of
  Just (PdfDict Dict
d) -> Dict -> Maybe Dict
forall a. a -> Maybe a
Just Dict
d
  Maybe Obj
otherwise -> Maybe Dict
forall a. Maybe a
Nothing
  where 
    isDict :: Obj -> Bool
    isDict :: Obj -> Bool
isDict (PdfDict Dict
d) = Bool
True
    isDict Obj
_           = Bool
False

findPages :: Dict -> Maybe Int
findPages :: Dict -> Maybe Int
findPages Dict
dict = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
isPagesRef Dict
dict of
  Just (Obj
_, ObjRef Int
x) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x
  Maybe (Obj, Obj)
Nothing            -> Maybe Int
forall a. Maybe a
Nothing
  where
    isPagesRef :: (Obj, Obj) -> Bool
isPagesRef (PdfName String
"/Pages", ObjRef Int
x) = Bool
True
    isPagesRef (Obj
_,Obj
_)                        = Bool
False
    
findKids :: Dict -> Maybe [Int]
findKids :: Dict -> Maybe [Int]
findKids Dict
dict = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
isKidsRefs Dict
dict of
  Just (Obj
_, PdfArray [Obj]
arr) -> [Int] -> Maybe [Int]
forall a. a -> Maybe a
Just ([Obj] -> [Int]
parseRefsArray [Obj]
arr)
  Maybe (Obj, Obj)
Nothing                -> Maybe [Int]
forall a. Maybe a
Nothing
  where 
    isKidsRefs :: (Obj, Obj) -> Bool
isKidsRefs (PdfName String
"/Kids", PdfArray [Obj]
x) = Bool
True
    isKidsRefs (Obj
_,Obj
_)                         = Bool
False

contentsStream :: Dict -> PSR -> [PDFObj] -> PDFStream
contentsStream :: Dict -> PSR -> [PDFObj] -> PDFStream
contentsStream Dict
dict PSR
st [PDFObj]
objs = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
contents Dict
dict of
  Just (PdfName String
"/Contents", PdfArray [Obj]
arr) -> [Obj] -> PDFStream
getContentArray [Obj]
arr
  Just (PdfName String
"/Contents", ObjRef Int
r) ->
    case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
r [PDFObj]
objs of
      Just [PdfArray [Obj]
arr] -> [Obj] -> PDFStream
getContentArray [Obj]
arr
      Just [Obj]
_ -> Int -> PDFStream
getContent Int
r
      Maybe [Obj]
Nothing -> String -> PDFStream
forall a. HasCallStack => String -> a
error String
"No content to be shown"
  Maybe (Obj, Obj)
Nothing -> String -> PDFStream
forall a. HasCallStack => String -> a
error String
"No content to be shown"
  where
    contents :: (Obj, b) -> Bool
contents (PdfName String
"/Contents", b
_) = Bool
True
    contents (Obj, b)
_ = Bool
False

    getContentArray :: [Obj] -> PDFStream
getContentArray [Obj]
arr = Dict -> PSR -> [PDFObj] -> PDFStream -> PDFStream
parseContentStream Dict
dict PSR
st [PDFObj]
objs (PDFStream -> PDFStream) -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$
                          [PDFStream] -> PDFStream
BSL.concat ([PDFStream] -> PDFStream) -> [PDFStream] -> PDFStream
forall a b. (a -> b) -> a -> b
$ (Int -> PDFStream) -> [Int] -> [PDFStream]
forall a b. (a -> b) -> [a] -> [b]
map ([PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs) ([Obj] -> [Int]
parseRefsArray [Obj]
arr)
    getContent :: Int -> PDFStream
getContent Int
r = Dict -> PSR -> [PDFObj] -> PDFStream -> PDFStream
parseContentStream Dict
dict PSR
st [PDFObj]
objs (PDFStream -> PDFStream) -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
r

parseContentStream :: Dict -> PSR -> [PDFObj] -> BSL.ByteString -> PDFStream
parseContentStream :: Dict -> PSR -> [PDFObj] -> PDFStream -> PDFStream
parseContentStream Dict
dict PSR
st [PDFObj]
objs PDFStream
s = 
  PSR -> PDFStream -> PDFStream
parseStream (PSR
st {fontmaps :: [(String, Encoding)]
fontmaps=[(String, Encoding)]
fontdict, cmaps :: [(String, CMap)]
cmaps=[(String, CMap)]
cmap}) PDFStream
s
  where fontdict :: [(String, Encoding)]
fontdict = Dict -> [PDFObj] -> [(String, Encoding)]
findFontEncoding Dict
dict [PDFObj]
objs
        cmap :: [(String, CMap)]
cmap = Dict -> [PDFObj] -> [(String, CMap)]
findCMap Dict
dict [PDFObj]
objs

rawStreamByRef :: [PDFObj] -> Int -> BSL.ByteString
rawStreamByRef :: [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
pdfobjs Int
x = case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
x [PDFObj]
pdfobjs of
  Just [Obj]
objs -> [Obj] -> PDFStream
rawStream [Obj]
objs
  Maybe [Obj]
Nothing  -> String -> PDFStream
forall a. HasCallStack => String -> a
error String
"No object with stream to be shown"

rawStream :: [Obj] -> BSL.ByteString
rawStream :: [Obj] -> PDFStream
rawStream [Obj]
objs = case (Obj -> Bool) -> [Obj] -> Maybe Obj
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Obj -> Bool
isStream [Obj]
objs of
  Just (PdfStream PDFStream
strm) -> Dict -> PDFStream -> PDFStream
rawStream' (Dict -> Maybe Dict -> Dict
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe Dict -> Dict) -> Maybe Dict -> Dict
forall a b. (a -> b) -> a -> b
$ [Obj] -> Maybe Dict
findDict [Obj]
objs) PDFStream
strm
  Maybe Obj
Nothing               -> String -> PDFStream
BSL.pack (String -> PDFStream) -> String -> PDFStream
forall a b. (a -> b) -> a -> b
$ [Obj] -> String
forall a. Show a => a -> String
show [Obj]
objs
  where
    isStream :: Obj -> Bool
isStream (PdfStream PDFStream
s) = Bool
True
    isStream Obj
_             = Bool
False

    rawStream' :: Dict -> BSL.ByteString -> BSL.ByteString
    rawStream' :: Dict -> PDFStream -> PDFStream
rawStream' Dict
d PDFStream
s = Dict -> PDFStream -> PDFStream
forall (t :: * -> *).
Foldable t =>
t (Obj, Obj) -> PDFStream -> PDFStream
streamFilter Dict
d PDFStream
s

    streamFilter :: t (Obj, Obj) -> PDFStream -> PDFStream
streamFilter t (Obj, Obj)
d = case ((Obj, Obj) -> Bool) -> t (Obj, Obj) -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
withFilter t (Obj, Obj)
d of
      Just (PdfName String
"/Filter", PdfName String
"/FlateDecode")
        -> PDFStream -> PDFStream
decompress
      Just (PdfName String
"/Filter", PdfName String
f)
        -> String -> PDFStream -> PDFStream
forall a. HasCallStack => String -> a
error (String -> PDFStream -> PDFStream)
-> String -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$ String
"Unknown Stream Compression: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
f -- need fix
      Just (Obj, Obj)
_ -> String -> PDFStream -> PDFStream
forall a. HasCallStack => String -> a
error (String -> PDFStream -> PDFStream)
-> String -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$ String
"No Stream Compression Filter."
      Maybe (Obj, Obj)
Nothing -> PDFStream -> PDFStream
forall a. a -> a
id

    withFilter :: (Obj, b) -> Bool
withFilter (PdfName String
"/Filter", b
_) = Bool
True
    withFilter (Obj, b)
_                      = Bool
False

contentsColorSpace :: Dict -> PSR -> [PDFObj] -> [T.Text]
contentsColorSpace :: Dict -> PSR -> [PDFObj] -> [Text]
contentsColorSpace Dict
dict PSR
st [PDFObj]
objs = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
contents Dict
dict of
  Just (PdfName String
"/Contents", PdfArray [Obj]
arr) -> [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Text]] -> [Text]) -> [[Text]] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Int -> [Text]) -> [Int] -> [[Text]]
forall a b. (a -> b) -> [a] -> [b]
map (PSR -> PDFStream -> [Text]
parseColorSpace (PSR
st {xcolorspaces :: [String]
xcolorspaces=[String]
xobjcs}) (PDFStream -> [Text]) -> (Int -> PDFStream) -> Int -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs) ([Obj] -> [Int]
parseRefsArray [Obj]
arr)
  Just (PdfName String
"/Contents", ObjRef Int
x)     -> PSR -> PDFStream -> [Text]
parseColorSpace (PSR
st {xcolorspaces :: [String]
xcolorspaces=[String]
xobjcs}) (PDFStream -> [Text]) -> PDFStream -> [Text]
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
x
  Maybe (Obj, Obj)
Nothing                                  -> String -> [Text]
forall a. HasCallStack => String -> a
error String
"No content to be shown"
  where
    contents :: (Obj, b) -> Bool
contents (PdfName String
"/Contents", b
_) = Bool
True
    contents (Obj, b)
_                        = Bool
False
    xobjcs :: [String]
xobjcs = Dict -> [PDFObj] -> [String]
findXObjectColorSpace Dict
dict [PDFObj]
objs


-- find XObject

findXObjectColorSpace :: Dict -> [PDFObj] -> [String]
findXObjectColorSpace Dict
d [PDFObj]
os = Dict -> [PDFObj] -> [String]
xobjColorSpaceMap (Dict -> [PDFObj] -> Dict
findXObject Dict
d [PDFObj]
os) [PDFObj]
os

xobjColorSpaceMap :: Dict -> [PDFObj] -> [String]
xobjColorSpaceMap Dict
dict [PDFObj]
objs = ((Obj, Obj) -> String) -> Dict -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Obj, Obj) -> String
pairwise Dict
dict
  where
    pairwise :: (Obj, Obj) -> String
pairwise (PdfName String
n, ObjRef Int
r) = Int -> [PDFObj] -> String
xobjColorSpace Int
r [PDFObj]
objs
    pairwise (Obj, Obj)
x = String
""

findXObject :: Dict -> [PDFObj] -> Dict
findXObject Dict
dict [PDFObj]
objs = case Dict -> [PDFObj] -> Maybe Dict
findResourcesDict Dict
dict [PDFObj]
objs of
  Just Dict
d -> case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/XObject" of
    Just (PdfDict Dict
d) -> Dict
d
    Maybe Obj
otherwise -> []
  Maybe Dict
Nothing -> []

xobjColorSpace :: Int -> [PDFObj] -> String
xobjColorSpace :: Int -> [PDFObj] -> String
xobjColorSpace Int
x [PDFObj]
objs = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/ColorSpace" [PDFObj]
objs of
  Just (PdfName String
cs) -> String
cs
  Maybe Obj
otherwise -> String
""


-- find root ref from Trailer or Cross-Reference Dictionary

parseTrailer :: BS.ByteString -> Maybe Dict
parseTrailer :: ByteString -> Maybe Dict
parseTrailer ByteString
bs = case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.breakEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') ByteString
bs of
  (ByteString
source, ByteString
eofLine)
    | ByteString
"%%EOF" ByteString -> ByteString -> Bool
`BS.isPrefixOf` ByteString
eofLine
      -> Dict -> Maybe Dict
forall a. a -> Maybe a
Just (ByteString -> Dict
parseCRDict (ByteString -> Dict) -> ByteString -> Dict
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop (ByteString -> Int
forall p. (Eq p, Num p) => ByteString -> p
getOffset ByteString
source) ByteString
bs)
    | ByteString
source ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"" -> Maybe Dict
forall a. Maybe a
Nothing
    | Bool
otherwise -> ByteString -> Maybe Dict
parseTrailer (ByteString -> ByteString
BS.init ByteString
bs)

getOffset :: ByteString -> p
getOffset ByteString
bs = case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.breakEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') (ByteString -> ByteString
BS.init ByteString
bs) of
  (ByteString
_, ByteString
nstr) -> case ReadS p
forall a. (Eq a, Num a) => ReadS a
readDec ReadS p -> ReadS p
forall a b. (a -> b) -> a -> b
$ ByteString -> String
BS.unpack ByteString
nstr of
                 [(p
n,String
_)] -> p
n
                 [(p, String)]
_ -> String -> p
forall a. HasCallStack => String -> a
error String
"Could not find Offset"

parseCRDict :: BS.ByteString -> Dict
parseCRDict :: ByteString -> Dict
parseCRDict ByteString
rlt = case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
crdict ByteString
rlt of
  Left  String
err  -> String -> Dict
forall a. HasCallStack => String -> a
error (String -> Dict) -> String -> Dict
forall a b. (a -> b) -> a -> b
$ ByteString -> String
forall a. Show a => a -> String
show (Int -> ByteString -> ByteString
BS.take Int
100 ByteString
rlt)
  Right (PdfDict Dict
dict) -> Dict
dict
  Right Obj
_ -> String -> Dict
forall a. HasCallStack => String -> a
error String
"Could not find Cross-Reference dictionary"
  where
    crdict :: Parser Obj
    crdict :: Parser Obj
crdict = do 
      Parser ()
spaces
      (Parser () -> Parser ()
forall i a. Parser i a -> Parser i a
try Parser ()
skipCRtable Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
skipCRstream)
      Obj
d <- Parser Obj
pdfdictionary Parser Obj -> Parser () -> Parser Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
spaces
      Obj -> Parser Obj
forall (m :: * -> *) a. Monad m => a -> m a
return Obj
d
    skipCRtable :: Parser ()
skipCRtable = ((Parser Char -> Parser ByteString ByteString -> Parser String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser Char
anyChar (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"trailer")) Parser String -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
spaces)
    skipCRstream :: Parser ()
skipCRstream = (Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit Parser String -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
spaces Parser () -> Parser Char -> Parser Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Char
digit Parser Char
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Parser ByteString ByteString
string ByteString
" obj" Parser ByteString ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
spaces)

rootRef :: BS.ByteString -> Maybe Int
rootRef :: ByteString -> Maybe Int
rootRef ByteString
bs = case ByteString -> Maybe Dict
parseTrailer ByteString
bs of
  Just Dict
dict -> ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
isRootRef Dict
dict
  Maybe Dict
Nothing   -> ByteString -> Maybe Int
rootRefFromCRStream ByteString
bs

rootRefFromCRStream :: BS.ByteString -> Maybe Int
rootRefFromCRStream :: ByteString -> Maybe Int
rootRefFromCRStream ByteString
bs =
  let offset :: Int
offset = (String -> Int
forall a. Read a => String -> a
read (String -> Int) -> (ByteString -> String) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BS.unpack (ByteString -> String)
-> (ByteString -> ByteString) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> ByteString
forall a. [a] -> a
head ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [ByteString] -> [ByteString]
forall a. Int -> [a] -> [a]
drop Int
1 ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
BS.lines (ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ (String -> ByteString -> ByteString
forall a. String -> a -> a
trace (ByteString -> String
forall a. Show a => a -> String
show ByteString
bs) ByteString
bs)) :: Int
      crstrm :: ByteString
crstrm = PDFBS -> ByteString
forall a b. (a, b) -> b
snd (PDFBS -> ByteString)
-> (ByteString -> PDFBS) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PDFBS] -> PDFBS
forall a. [a] -> a
head ([PDFBS] -> PDFBS)
-> (ByteString -> [PDFBS]) -> ByteString -> PDFBS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [PDFBS]
findObjs (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop Int
offset ByteString
bs
      crdict :: Dict
crdict = ByteString -> Dict
parseCRDict ByteString
crstrm
  in ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
isRootRef (Dict -> Maybe Int) -> Dict -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Dict
crdict

isRootRef :: (Obj, Obj) -> Bool
isRootRef (PdfName String
"/Root", ObjRef Int
x) = Bool
True
isRootRef (Obj
_,Obj
_) = Bool
False

findRefs :: ((Obj,Obj) -> Bool) -> Dict -> Maybe Int
findRefs :: ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
pred Dict
dict = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
pred Dict
dict of
  Just (Obj
_, ObjRef Int
x) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x
  Maybe (Obj, Obj)
Nothing            -> Maybe Int
forall a. Maybe a
Nothing


-- find Info

findTrailer :: ByteString -> Dict
findTrailer ByteString
bs = do
  case ByteString -> Maybe Dict
parseTrailer ByteString
bs of
    Just Dict
d -> Dict
d
    Maybe Dict
Nothing -> []

infoRef :: ByteString -> Maybe Int
infoRef ByteString
bs = case ByteString -> Maybe Dict
parseTrailer ByteString
bs of
  Just Dict
dict -> ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
isInfoRef Dict
dict
  Maybe Dict
Nothing -> String -> Maybe Int
forall a. HasCallStack => String -> a
error String
"No ref for info"

isInfoRef :: (Obj, Obj) -> Bool
isInfoRef (PdfName String
"/Info", ObjRef Int
x) = Bool
True
isInfoRef (Obj
_,Obj
_) = Bool
False


-- expand PDF 1.5 Object Stream 

expandObjStm :: [PDFObj] -> [PDFObj]
expandObjStm :: [PDFObj] -> [PDFObj]
expandObjStm [PDFObj]
os = [[PDFObj]] -> [PDFObj]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[PDFObj]] -> [PDFObj]) -> [[PDFObj]] -> [PDFObj]
forall a b. (a -> b) -> a -> b
$ (PDFObj -> [PDFObj]) -> [PDFObj] -> [[PDFObj]]
forall a b. (a -> b) -> [a] -> [b]
map PDFObj -> [PDFObj]
objStm [PDFObj]
os

objStm :: PDFObj -> [PDFObj]
objStm :: PDFObj -> [PDFObj]
objStm (Int
n, [Obj]
obj) = case String -> [Obj] -> Maybe Dict
findDictOfType String
"/ObjStm" [Obj]
obj of
  Maybe Dict
Nothing -> [(Int
n,[Obj]
obj)]
  Just Dict
_  -> Int -> ByteString -> [PDFObj]
forall p. p -> ByteString -> [PDFObj]
pdfObjStm Int
n (ByteString -> [PDFObj]) -> ByteString -> [PDFObj]
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [Obj] -> PDFStream
rawStream [Obj]
obj
  
refOffset :: Parser ([(Int, Int)], String)
refOffset :: Parser ([(Int, Int)], String)
refOffset = Parser ()
spaces Parser ()
-> Parser ([(Int, Int)], String) -> Parser ([(Int, Int)], String)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((,) 
                       ([(Int, Int)] -> String -> ([(Int, Int)], String))
-> Parser ByteString [(Int, Int)]
-> Parser ByteString (String -> ([(Int, Int)], String))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString (Int, Int) -> Parser ByteString [(Int, Int)]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((\String
r String
o -> (String -> Int
forall a. Read a => String -> a
read String
r :: Int, String -> Int
forall a. Read a => String -> a
read String
o :: Int))
                                  (String -> String -> (Int, Int))
-> Parser String -> Parser ByteString (String -> (Int, Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit Parser String -> Parser () -> Parser String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
spaces) 
                                  Parser ByteString (String -> (Int, Int))
-> Parser String -> Parser ByteString (Int, Int)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit Parser String -> Parser () -> Parser String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
spaces))
                       Parser ByteString (String -> ([(Int, Int)], String))
-> Parser String -> Parser ([(Int, Int)], String)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
anyChar)

pdfObjStm :: p -> ByteString -> [PDFObj]
pdfObjStm p
n ByteString
s = 
  let ([(Int, Int)]
location, String
objstr) = case Parser ([(Int, Int)], String)
-> ByteString -> Either String ([(Int, Int)], String)
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser ([(Int, Int)], String)
refOffset ByteString
s of
        Right ([(Int, Int)], String)
val -> ([(Int, Int)], String)
val
        Left String
err  -> String -> ([(Int, Int)], String)
forall a. HasCallStack => String -> a
error (String -> ([(Int, Int)], String))
-> String -> ([(Int, Int)], String)
forall a b. (a -> b) -> a -> b
$ String
"Failed to parse Object Stream: "
  in ((Int, Int) -> PDFObj) -> [(Int, Int)] -> [PDFObj]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
r,Int
o) -> (Int
r, ByteString -> [Obj]
parseDict (ByteString -> [Obj]) -> ByteString -> [Obj]
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
o String
objstr)) [(Int, Int)]
location
    where parseDict :: ByteString -> [Obj]
parseDict ByteString
s' = case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
pdfdictionary ByteString
s' of
            Right Obj
obj -> [Obj
obj]
            Left  String
_   -> case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
pdfarray ByteString
s' of
              Right Obj
obj -> [Obj
obj]
              Left String
_ -> case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
pdfletters ByteString
s' of
                Right Obj
obj -> [Obj
obj]
                Left String
err -> String -> [Obj]
forall a. HasCallStack => String -> a
error (String -> [Obj]) -> String -> [Obj]
forall a b. (a -> b) -> a -> b
$ (String -> String
forall a. Show a => a -> String
show String
err) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
":\n   Failed to parse obj around; \n"
                              String -> String -> String
forall a. [a] -> [a] -> [a]
++ (ByteString -> String
forall a. Show a => a -> String
show (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.take Int
100 ByteString
s')


-- make fontmap from page's /Resources (see 3.7.2 of PDF Ref.)

findFontEncoding :: Dict -> [PDFObj] -> [(String, Encoding)]
findFontEncoding Dict
d [PDFObj]
os = Dict -> [PDFObj] -> [(String, Encoding)]
findEncoding (Dict -> [PDFObj] -> Dict
fontObjs Dict
d [PDFObj]
os) [PDFObj]
os

findEncoding :: Dict -> [PDFObj] -> [(String, Encoding)]
findEncoding :: Dict -> [PDFObj] -> [(String, Encoding)]
findEncoding Dict
dict [PDFObj]
objs = ((Obj, Obj) -> (String, Encoding)) -> Dict -> [(String, Encoding)]
forall a b. (a -> b) -> [a] -> [b]
map (Obj, Obj) -> (String, Encoding)
pairwise Dict
dict
  where
    pairwise :: (Obj, Obj) -> (String, Encoding)
pairwise (PdfName String
n, ObjRef Int
r) = (String
n, Int -> [PDFObj] -> Encoding
encoding Int
r [PDFObj]
objs)
    pairwise (Obj, Obj)
x = (String
"", Encoding
NullMap)

fontObjs :: Dict -> [PDFObj] -> Dict
fontObjs :: Dict -> [PDFObj] -> Dict
fontObjs Dict
dict [PDFObj]
objs = case Dict -> [PDFObj] -> Maybe Dict
findResourcesDict Dict
dict [PDFObj]
objs of
  Just Dict
d -> case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Font" of
    Just (PdfDict Dict
d') -> Dict
d'
    Just (ObjRef Int
x) -> case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
x [PDFObj]
objs of
                         Just Dict
d' -> Dict
d'
                         Maybe Dict
otherwise -> String -> Dict
forall a. HasCallStack => String -> a
error String
"cannot find /Font dictionary"
    Maybe Obj
otherwise -> String -> Dict -> Dict
forall a. String -> a -> a
trace (Dict -> String
forall a. Show a => a -> String
show Dict
d) (Dict -> Dict) -> Dict -> Dict
forall a b. (a -> b) -> a -> b
$ []
  Maybe Dict
Nothing -> []

findResourcesDict :: Dict -> [PDFObj] -> Maybe Dict
findResourcesDict :: Dict -> [PDFObj] -> Maybe Dict
findResourcesDict Dict
dict [PDFObj]
objs = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
resources Dict
dict of
  Just (Obj
_, ObjRef Int
x)  -> Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
x [PDFObj]
objs
  Just (Obj
_, PdfDict Dict
d) -> Dict -> Maybe Dict
forall a. a -> Maybe a
Just Dict
d
  Maybe (Obj, Obj)
otherwise -> String -> Maybe Dict
forall a. HasCallStack => String -> a
error (Dict -> String
forall a. Show a => a -> String
show Dict
dict)
  where
    resources :: (Obj, b) -> Bool
resources (PdfName String
"/Resources", b
_) = Bool
True
    resources (Obj, b)
_                         = Bool
False


encoding :: Int -> [PDFObj] -> Encoding
encoding :: Int -> [PDFObj] -> Encoding
encoding Int
x [PDFObj]
objs = case Maybe Obj
subtype of
  Just (PdfName String
"/Type0") -> case Maybe Obj
encoding of
    Just (PdfName String
"/Identity-H") -> [Encoding] -> Encoding
forall a. [a] -> a
head ([Encoding] -> Encoding) -> [Encoding] -> Encoding
forall a b. (a -> b) -> a -> b
$ [Obj] -> [Encoding]
cidSysInfo [Obj]
descendantFonts
    -- TODO" when /Encoding is stream of CMap
    Just (PdfName String
s) -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Unknown Encoding " String -> String -> String
forall a. [a] -> [a] -> [a]
++ (String -> String
forall a. Show a => a -> String
show String
s) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" for a Type0 font. Check " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
x
    Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Something wrong with a Type0 font. Check " String -> String -> String
forall a. [a] -> [a] -> [a]
++ (Int -> String
forall a. Show a => a -> String
show Int
x)
  Just (PdfName String
"/Type1") -> case Maybe Obj
encoding of
    Just (ObjRef Int
r) -> case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
r String
"/Differences" [PDFObj]
objs of
                     Just (PdfArray [Obj]
arr) -> [Obj] -> Encoding
charDiff [Obj]
arr
                     Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error String
"No /Differences"
    Just (PdfDict Dict
d) -> case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Differences" of
                     Just (PdfArray [Obj]
arr) -> [Obj] -> Encoding
charDiff [Obj]
arr
                     Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error String
"No /Differences"
    Just (PdfName String
"/MacRomanEncoding") -> Encoding
NullMap
    Just (PdfName String
"/MacExpertEncoding") -> Encoding
NullMap
    Just (PdfName String
"/WinAnsiEncoding") -> Encoding
NullMap
    -- TODO: FontFile (Type 1), FontFile2 (TrueType), FontFile3 (Other than Type1C)
    Maybe Obj
_ -> case Dict -> String -> Maybe Obj
findObjFromDict (Int -> Dict
fontDescriptor' Int
x) String
"/FontFile3" of
           Just (ObjRef Int
fontfile) ->
             ByteString -> Encoding
CFF.encoding (ByteString -> Encoding) -> ByteString -> Encoding
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
fontfile
           Maybe Obj
_ -> case Dict -> String -> Maybe Obj
findObjFromDict (Int -> Dict
fontDescriptor' Int
x) String
"/FontFile" of
             Just (ObjRef Int
fontfile) ->
               ByteString -> Encoding
Type1.encoding (ByteString -> Encoding) -> ByteString -> Encoding
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
fontfile
             Maybe Obj
_ -> Encoding
NullMap
  -- TODO
  Just (PdfName String
"/Type2") -> Encoding
NullMap
  Just (PdfName String
"/Type3") -> Encoding
NullMap
  Maybe Obj
_ -> Encoding
NullMap

  where
    subtype :: Maybe Obj
subtype = String -> Maybe Obj
get String
"/Subtype"
    encoding :: Maybe Obj
encoding = String -> Maybe Obj
get String
"/Encoding"
    toUnicode :: Maybe Obj
toUnicode = String -> Maybe Obj
get String
"/ToUnicode" 

    get :: String -> Maybe Obj
get String
s = Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
s [PDFObj]
objs

    -- Should be an array (or ref to an array) containing refs
    descendantFonts :: [Obj]
    descendantFonts :: [Obj]
descendantFonts = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/DescendantFonts" [PDFObj]
objs of
      Just (PdfArray [Obj]
dfrs) -> [Obj]
dfrs
      Just (ObjRef Int
r) -> case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
r [PDFObj]
objs of
        Just [(PdfArray [Obj]
dfrs)] -> [Obj]
dfrs
        Maybe [Obj]
_ -> String -> [Obj]
forall a. HasCallStack => String -> a
error (String -> [Obj]) -> String -> [Obj]
forall a b. (a -> b) -> a -> b
$ String
"Can not find /DescendantFonts entries in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r
      Maybe Obj
_ -> String -> [Obj]
forall a. HasCallStack => String -> a
error (String -> [Obj]) -> String -> [Obj]
forall a b. (a -> b) -> a -> b
$ String
"Can not find /DescendantFonts itself in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
x 

    cidSysInfo :: [Obj] -> [Encoding]
    cidSysInfo :: [Obj] -> [Encoding]
cidSysInfo [] = []
    cidSysInfo ((ObjRef Int
r):[Obj]
rs) = (Int -> Encoding
cidSysInfo' Int
r)Encoding -> [Encoding] -> [Encoding]
forall a. a -> [a] -> [a]
:([Obj] -> [Encoding]
cidSysInfo [Obj]
rs)
    cidSysInfo' :: Int -> Encoding
cidSysInfo' Int
dfr = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
dfr String
"/CIDSystemInfo" [PDFObj]
objs of
      Just (PdfDict Dict
dict) -> Dict -> Encoding
getCIDSystemInfo Dict
dict
      Just (ObjRef Int
r) -> case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
r [PDFObj]
objs of
                           Just Dict
dict -> Dict -> Encoding
getCIDSystemInfo Dict
dict
                           Maybe Dict
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Can not find /CIDSystemInfo entries in" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r
      Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Can not find /CidSystemInfo itself " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
dfr

    fontDescriptor :: [Obj] -> [Dict]
    fontDescriptor :: [Obj] -> [Dict]
fontDescriptor [] = []
    fontDescriptor ((ObjRef Int
r):[Obj]
rs) = (Int -> Dict
fontDescriptor' Int
r)Dict -> [Dict] -> [Dict]
forall a. a -> [a] -> [a]
:([Obj] -> [Dict]
fontDescriptor [Obj]
rs)
    fontDescriptor' :: Int -> Dict
    fontDescriptor' :: Int -> Dict
fontDescriptor' Int
fdr = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
fdr String
"/FontDescriptor" [PDFObj]
objs of
      Just (ObjRef Int
r) -> case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
r [PDFObj]
objs of
                           Just Dict
dict -> Dict
dict
                           Maybe Dict
_ -> String -> Dict
forall a. HasCallStack => String -> a
error (String -> Dict) -> String -> Dict
forall a b. (a -> b) -> a -> b
$ String
"No /FontDescriptor entries in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r
      Maybe Obj
_ -> String -> Dict
forall a. HasCallStack => String -> a
error (String -> Dict) -> String -> Dict
forall a b. (a -> b) -> a -> b
$ String
"Can not find /FontDescriptor itself in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
fdr

    getCIDSystemInfo :: Dict -> Encoding
getCIDSystemInfo Dict
d =
      let registry :: String
registry = case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Registry" of
                       Just (PdfText String
r) -> String
r
                       Maybe Obj
otherwise -> String -> String
forall a. HasCallStack => String -> a
error String
"Can not find /Registry"
          ordering :: String
ordering = case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Ordering" of
                       Just (PdfText String
o) -> String
o
                       Maybe Obj
othserwise -> String -> String
forall a. HasCallStack => String -> a
error String
"Can not find /Ordering"
          supplement :: Double
supplement = case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Supplement" of
                         Just (PdfNumber Double
s) -> Double
s
                         Maybe Obj
otherwise -> String -> Double
forall a. HasCallStack => String -> a
error String
"Can not find /Supprement"
          cmap :: String
cmap = String
registry String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"-" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
ordering -- ex. "Adobe-Japan1"
      in if String
cmap String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"Adobe-Japan1"
         then String -> Encoding
CIDmap String
cmap
         else String -> Encoding
WithCharSet String
""


charDiff :: [Obj] -> Encoding
charDiff :: [Obj] -> Encoding
charDiff [Obj]
objs = [(Char, String)] -> Encoding
Encoding ([(Char, String)] -> Encoding) -> [(Char, String)] -> Encoding
forall a b. (a -> b) -> a -> b
$ [Obj] -> Int -> [(Char, String)]
charmap [Obj]
objs Int
0
  where charmap :: [Obj] -> Int -> [(Char, String)]
charmap (PdfNumber Double
x : PdfName String
n : [Obj]
xs) Int
i = 
          if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate Double
x then 
            (Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate Double
x, String
n) (Char, String) -> [(Char, String)] -> [(Char, String)]
forall a. a -> [a] -> [a]
: ([Obj] -> Int -> [(Char, String)]
charmap [Obj]
xs (Int -> [(Char, String)]) -> Int -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a a. (RealFrac a, Integral a) => a -> a
incr Double
x)
          else 
            (Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Int
i, String
n) (Char, String) -> [(Char, String)] -> [(Char, String)]
forall a. a -> [a] -> [a]
: ([Obj] -> Int -> [(Char, String)]
charmap [Obj]
xs (Int -> [(Char, String)]) -> Int -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        charmap (PdfName String
n : [Obj]
xs) Int
i = (Int -> Char
chr Int
i, String
n) (Char, String) -> [(Char, String)] -> [(Char, String)]
forall a. a -> [a] -> [a]
: ([Obj] -> Int -> [(Char, String)]
charmap [Obj]
xs (Int -> [(Char, String)]) -> Int -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        charmap [] Int
i               = []
        incr :: a -> a
incr a
x = (a -> a
forall a b. (RealFrac a, Integral b) => a -> b
truncate a
x) a -> a -> a
forall a. Num a => a -> a -> a
+ a
1


findCMap :: Dict -> [PDFObj] -> [(String, CMap)]
findCMap :: Dict -> [PDFObj] -> [(String, CMap)]
findCMap Dict
d [PDFObj]
objs = ((Obj, Obj) -> (String, CMap)) -> Dict -> [(String, CMap)]
forall a b. (a -> b) -> [a] -> [b]
map (Obj, Obj) -> (String, CMap)
pairwise (Dict -> [PDFObj] -> Dict
fontObjs Dict
d [PDFObj]
objs)
  where
    pairwise :: (Obj, Obj) -> (String, CMap)
pairwise (PdfName String
n, ObjRef Int
r) = (String
n, Int -> [PDFObj] -> CMap
toUnicode Int
r [PDFObj]
objs)
    pairwise (Obj, Obj)
x = (String
"", [])

toUnicode :: Int -> [PDFObj] -> CMap
toUnicode :: Int -> [PDFObj] -> CMap
toUnicode Int
x [PDFObj]
objs =
  case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/ToUnicode" [PDFObj]
objs of
    Just (ObjRef Int
ref) ->
      PDFStream -> CMap
parseCMap (PDFStream -> CMap) -> PDFStream -> CMap
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
ref
    Maybe Obj
otherwise -> Int -> [PDFObj] -> CMap
noToUnicode Int
x [PDFObj]
objs

noToUnicode :: Int -> [PDFObj] -> CMap
noToUnicode Int
x [PDFObj]
objs = 
  case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/DescendantFonts" [PDFObj]
objs of
    Just (ObjRef Int
ref) ->
      case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
ref [PDFObj]
objs of
        Just [(PdfArray ((ObjRef Int
subref):[Obj]
_))] ->
          case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
subref String
"/FontDescriptor" [PDFObj]
objs of
            Just (ObjRef Int
desc) ->
              case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
desc String
"/FontFile2" [PDFObj]
objs of
                Just (ObjRef Int
fontfile) ->
                  ByteString -> CMap
OpenType.cmap (ByteString -> CMap) -> ByteString -> CMap
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
fontfile
                Maybe Obj
otherwise -> []
            Maybe Obj
otherwise -> []
        Maybe [Obj]
otherwise -> []
    Maybe Obj
otherwise -> []