{-# LANGUAGE OverloadedStrings #-}

-- | Page tree node

module Pdf.Document.PageNode
(
  PageNode,
  PageTree(..),
  pageNodeNKids,
  pageNodeParent,
  pageNodeKids,
  loadPageNode,
  pageNodePageByNum,
)
where

import Pdf.Core
import Pdf.Core.Exception
import Pdf.Core.Util
import Pdf.Core.Object.Util

import Pdf.Document.Pdf
import Pdf.Document.Internal.Types
import Pdf.Document.Internal.Util

import qualified Data.Vector as Vector
import qualified Data.HashMap.Strict as HashMap
import Control.Monad
import Control.Exception hiding (throw)

-- | Total number of child leaf nodes, including deep children
pageNodeNKids :: PageNode -> IO Int
pageNodeNKids :: PageNode -> IO Int
pageNodeNKids (PageNode Pdf
_ Ref
_ Dict
dict) = Either String Int -> IO Int
forall a. Either String a -> IO a
sure (Either String Int -> IO Int) -> Either String Int -> IO Int
forall a b. (a -> b) -> a -> b
$
  (Name -> Dict -> Maybe Object
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Name
"Count" Dict
dict Maybe Object -> (Object -> Maybe Int) -> Maybe Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Object -> Maybe Int
intValue)
  Maybe Int -> String -> Either String Int
forall a. Maybe a -> String -> Either String a
`notice` String
"Count should be an integer"

-- | Parent page node
pageNodeParent :: PageNode -> IO (Maybe PageNode)
pageNodeParent :: PageNode -> IO (Maybe PageNode)
pageNodeParent (PageNode Pdf
pdf Ref
_ Dict
dict) =
  case Name -> Dict -> Maybe Object
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Name
"Parent" Dict
dict of
    Maybe Object
Nothing -> Maybe PageNode -> IO (Maybe PageNode)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe PageNode
forall a. Maybe a
Nothing
    Just o :: Object
o@(Ref Ref
ref) -> do
      Object
obj <- Pdf -> Object -> IO Object
deref Pdf
pdf Object
o
      Dict
node <- Either String Dict -> IO Dict
forall a. Either String a -> IO a
sure (Either String Dict -> IO Dict) -> Either String Dict -> IO Dict
forall a b. (a -> b) -> a -> b
$ Object -> Maybe Dict
dictValue Object
obj Maybe Dict -> String -> Either String Dict
forall a. Maybe a -> String -> Either String a
`notice` String
"Parent should be a dictionary"
      Name -> Dict -> IO ()
ensureType Name
"Pages" Dict
node
      Maybe PageNode -> IO (Maybe PageNode)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe PageNode -> IO (Maybe PageNode))
-> Maybe PageNode -> IO (Maybe PageNode)
forall a b. (a -> b) -> a -> b
$ PageNode -> Maybe PageNode
forall a. a -> Maybe a
Just (Pdf -> Ref -> Dict -> PageNode
PageNode Pdf
pdf Ref
ref Dict
node)
    Maybe Object
_ -> Corrupted -> IO (Maybe PageNode)
forall e a. Exception e => e -> IO a
throwIO (String -> [String] -> Corrupted
Corrupted String
"Parent should be an indirect ref" [])

-- | Referencies to all kids
pageNodeKids :: PageNode -> IO [Ref]
pageNodeKids :: PageNode -> IO [Ref]
pageNodeKids (PageNode Pdf
pdf Ref
_ Dict
dict) = do
  Object
obj <- Either String Object -> IO Object
forall a. Either String a -> IO a
sure (Name -> Dict -> Maybe Object
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Name
"Kids" Dict
dict
                Maybe Object -> String -> Either String Object
forall a. Maybe a -> String -> Either String a
`notice` String
"Page node should have Kids")
        IO Object -> (Object -> IO Object) -> IO Object
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pdf -> Object -> IO Object
deref Pdf
pdf
  Array
kids <- Either String Array -> IO Array
forall a. Either String a -> IO a
sure (Either String Array -> IO Array)
-> Either String Array -> IO Array
forall a b. (a -> b) -> a -> b
$ Object -> Maybe Array
arrayValue Object
obj
    Maybe Array -> String -> Either String Array
forall a. Maybe a -> String -> Either String a
`notice` String
"Kids should be an array"
  [Object] -> (Object -> IO Ref) -> IO [Ref]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM (Array -> [Object]
forall a. Vector a -> [a]
Vector.toList Array
kids) ((Object -> IO Ref) -> IO [Ref]) -> (Object -> IO Ref) -> IO [Ref]
forall a b. (a -> b) -> a -> b
$ \Object
k -> Either String Ref -> IO Ref
forall a. Either String a -> IO a
sure (Either String Ref -> IO Ref) -> Either String Ref -> IO Ref
forall a b. (a -> b) -> a -> b
$
    Object -> Maybe Ref
refValue Object
k Maybe Ref -> String -> Either String Ref
forall a. Maybe a -> String -> Either String a
`notice` String
"each kid should be a reference"

-- | Load page tree node by reference
loadPageNode :: Pdf -> Ref -> IO PageTree
loadPageNode :: Pdf -> Ref -> IO PageTree
loadPageNode Pdf
pdf Ref
ref = do
  Object
obj <- Pdf -> Ref -> IO Object
lookupObject Pdf
pdf Ref
ref IO Object -> (Object -> IO Object) -> IO Object
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pdf -> Object -> IO Object
deref Pdf
pdf
  Dict
node <- Either String Dict -> IO Dict
forall a. Either String a -> IO a
sure (Either String Dict -> IO Dict) -> Either String Dict -> IO Dict
forall a b. (a -> b) -> a -> b
$ Object -> Maybe Dict
dictValue Object
obj Maybe Dict -> String -> Either String Dict
forall a. Maybe a -> String -> Either String a
`notice` String
"page should be a dictionary"
  Name
nodeType <- Either String Name -> IO Name
forall a. Either String a -> IO a
sure (Either String Name -> IO Name) -> Either String Name -> IO Name
forall a b. (a -> b) -> a -> b
$ Dict -> Either String Name
dictionaryType Dict
node
  case Name
nodeType of
    Name
"Pages" -> PageTree -> IO PageTree
forall (m :: * -> *) a. Monad m => a -> m a
return (PageTree -> IO PageTree) -> PageTree -> IO PageTree
forall a b. (a -> b) -> a -> b
$ PageNode -> PageTree
PageTreeNode (Pdf -> Ref -> Dict -> PageNode
PageNode Pdf
pdf Ref
ref Dict
node)
    Name
"Page" -> PageTree -> IO PageTree
forall (m :: * -> *) a. Monad m => a -> m a
return (PageTree -> IO PageTree) -> PageTree -> IO PageTree
forall a b. (a -> b) -> a -> b
$ Page -> PageTree
PageTreeLeaf (Pdf -> Ref -> Dict -> Page
Page Pdf
pdf Ref
ref Dict
node)
    Name
_ -> Corrupted -> IO PageTree
forall e a. Exception e => e -> IO a
throwIO (Corrupted -> IO PageTree) -> Corrupted -> IO PageTree
forall a b. (a -> b) -> a -> b
$ String -> [String] -> Corrupted
Corrupted (String
"Unexpected page tree node type: "
                              String -> String -> String
forall a. [a] -> [a] -> [a]
++ Name -> String
forall a. Show a => a -> String
show Name
nodeType) []

-- | Find page by it's number
--
-- Note: it is not efficient for PDF files with a lot of pages,
-- because it performs traversal through the page tree each time.
-- Use 'pageNodeNKids', 'pageNodeKids' and 'loadPageNode' for
-- efficient traversal.
pageNodePageByNum :: PageNode -> Int -> IO Page
pageNodePageByNum :: PageNode -> Int -> IO Page
pageNodePageByNum node :: PageNode
node@(PageNode Pdf
pdf Ref
nodeRef Dict
_) Int
num =
  String -> IO Page -> IO Page
forall a. String -> IO a -> IO a
message (String
"page #" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
num String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" for node: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Ref -> String
forall a. Show a => a -> String
show Ref
nodeRef) (IO Page -> IO Page) -> IO Page -> IO Page
forall a b. (a -> b) -> a -> b
$ do
  PageNode -> IO [Ref]
pageNodeKids PageNode
node IO [Ref] -> ([Ref] -> IO Page) -> IO Page
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> [Ref] -> IO Page
loop Int
num
  where
  loop :: Int -> [Ref] -> IO Page
loop Int
_ [] = Corrupted -> IO Page
forall e a. Exception e => e -> IO a
throwIO (Corrupted -> IO Page) -> Corrupted -> IO Page
forall a b. (a -> b) -> a -> b
$ String -> [String] -> Corrupted
Corrupted String
"Page not found" []
  loop Int
i (Ref
x:[Ref]
xs) = do
    PageTree
kid <- Pdf -> Ref -> IO PageTree
loadPageNode Pdf
pdf Ref
x
    case PageTree
kid  of
      PageTreeNode PageNode
n -> do
        Int
nkids <- PageNode -> IO Int
pageNodeNKids PageNode
n
        if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
nkids
          then PageNode -> Int -> IO Page
pageNodePageByNum PageNode
n Int
i
          else Int -> [Ref] -> IO Page
loop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nkids) [Ref]
xs
      PageTreeLeaf Page
page ->
        if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
          then Page -> IO Page
forall (m :: * -> *) a. Monad m => a -> m a
return Page
page
          else Int -> [Ref] -> IO Page
loop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [Ref]
xs