-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | A collection of tools for processing PDF files.
--   
--   Mid level tools for processing PDF files.
--   
--   Level of abstraction: document, catalog, page
@package pdf-toolbox-document
@version 0.1.2


-- | Various types
module Pdf.Document.Types


-- | Utilities for internal use
module Pdf.Document.Internal.Util

-- | Check that the dictionary has the specified "Type" filed
ensureType :: Name -> Dict -> IO ()

-- | Get dictionary type, name at key "Type"
dictionaryType :: Dict -> Either String Name
decodeTextString :: ByteString -> Either String Text
decodeTextStringThrow :: ByteString -> IO Text


-- | Internal type declarations
module Pdf.Document.Internal.Types
data Pdf
Pdf :: File -> IORef ObjectCache -> Pdf

-- | PDF document
--   
--   It is a trailer under the hood
data Document
Document :: Pdf -> Dict -> Document

-- | Document catalog
data Catalog
Catalog :: Pdf -> Ref -> Dict -> Catalog

-- | Information dictionary
data Info
Info :: Pdf -> Ref -> Dict -> Info

-- | Page tree node, contains pages or other nodes
data PageNode
PageNode :: Pdf -> Ref -> Dict -> PageNode

-- | Pdf document page
data Page
Page :: Pdf -> Ref -> Dict -> Page

-- | Page tree
data PageTree
PageTreeNode :: PageNode -> PageTree
PageTreeLeaf :: Page -> PageTree

-- | Font dictionary
data FontDict
FontDict :: Pdf -> Dict -> FontDict

module Pdf.Document.Pdf
data Pdf
withPdfFile :: FilePath -> (Pdf -> IO a) -> IO a

-- | Make Pdf with interface to pdf file
fromFile :: File -> IO Pdf

-- | Make Pdf with seekable handle
fromHandle :: Handle -> IO Pdf

-- | Make Pdf from a ByteString
fromBytes :: ByteString -> IO Pdf

-- | Get PDF document
document :: Pdf -> IO Document

-- | Find object by it's reference
lookupObject :: Pdf -> Ref -> IO Object

-- | Get stream content, decoded and decrypted
--   
--   Note: length of the content may differ from the raw one
streamContent :: Pdf -> Ref -> Stream -> IO (InputStream ByteString)

-- | Get stream content without decoding it
rawStreamContent :: Pdf -> Ref -> Stream -> IO (InputStream ByteString)
deref :: Pdf -> Object -> IO Object

-- | Whether the PDF document it encrypted
isEncrypted :: Pdf -> IO Bool

-- | Set the password to be user for decryption
--   
--   Returns False when the password is wrong
setUserPassword :: Pdf -> ByteString -> IO Bool

-- | The default user password
defaultUserPassword :: ByteString

-- | File is enctypted
data EncryptedError
EncryptedError :: Text -> EncryptedError

-- | Cache object for future lookups
enableCache :: Pdf -> IO ()

-- | Don't cache object for future lookups
disableCache :: Pdf -> IO ()
instance GHC.Show.Show Pdf.Document.Pdf.EncryptedError
instance GHC.Exception.Type.Exception Pdf.Document.Pdf.EncryptedError


-- | Page tree node
module Pdf.Document.PageNode

-- | Page tree node, contains pages or other nodes
data PageNode

-- | Page tree
data PageTree
PageTreeNode :: PageNode -> PageTree
PageTreeLeaf :: Page -> PageTree

-- | Total number of child leaf nodes, including deep children
pageNodeNKids :: PageNode -> IO Int

-- | Parent page node
pageNodeParent :: PageNode -> IO (Maybe PageNode)

-- | Referencies to all kids
pageNodeKids :: PageNode -> IO [Ref]

-- | Load page tree node by reference
loadPageNode :: Pdf -> Ref -> IO PageTree

-- | Find page by it's number
--   
--   Note: it is not efficient for PDF files with a lot of pages, because
--   it performs traversal through the page tree each time. Use
--   <a>pageNodeNKids</a>, <a>pageNodeKids</a> and <a>loadPageNode</a> for
--   efficient traversal.
pageNodePageByNum :: PageNode -> Int -> IO Page


-- | Document info dictionary
module Pdf.Document.Info

-- | Information dictionary
data Info

-- | Document title
infoTitle :: Info -> IO (Maybe Text)

-- | The name of the person who created the document
infoAuthor :: Info -> IO (Maybe Text)

-- | The subject of the document
infoSubject :: Info -> IO (Maybe Text)

-- | Keywords associated with the document
infoKeywords :: Info -> IO (Maybe Text)

-- | The name of the application that created the original document
infoCreator :: Info -> IO (Maybe Text)

-- | The name of the application that converted the document to PDF format
infoProducer :: Info -> IO (Maybe Text)


-- | Font dictionary
module Pdf.Document.FontDict

-- | Font dictionary
data FontDict

-- | Font subtypes
data FontSubtype
FontType0 :: FontSubtype
FontType1 :: FontSubtype
FontMMType1 :: FontSubtype
FontType3 :: FontSubtype
FontTrueType :: FontSubtype

-- | Get font subtype
fontDictSubtype :: FontDict -> IO FontSubtype

-- | Load font info for the font
fontDictLoadInfo :: FontDict -> IO FontInfo
instance GHC.Classes.Eq Pdf.Document.FontDict.FontSubtype
instance GHC.Show.Show Pdf.Document.FontDict.FontSubtype


-- | PDF document page
module Pdf.Document.Page

-- | Pdf document page
data Page

-- | Page's parent node
pageParentNode :: Page -> IO PageNode

-- | List of references to page's content streams
pageContents :: Page -> IO [Ref]

-- | Media box, inheritable
pageMediaBox :: Page -> IO (Rectangle Double)

-- | Font dictionaries for the page
pageFontDicts :: Page -> IO [(Name, FontDict)]

-- | Extract text from the page
--   
--   It tries to add spaces between chars if they don't present as actual
--   characters in content stream.
pageExtractText :: Page -> IO Text
pageExtractGlyphs :: Page -> IO [Span]

-- | Convert glyphs to text, trying to add spaces and newlines
--   
--   It takes list of spans. Each span is a list of glyphs that are
--   outputed in one shot. So we don't need to add space inside span, only
--   between them.
glyphsToText :: [Span] -> Text
instance GHC.Show.Show Pdf.Document.Page.XObject


-- | PDF document
module Pdf.Document.Document

-- | PDF document
--   
--   It is a trailer under the hood
data Document

-- | Get the document catalog
documentCatalog :: Document -> IO Catalog

-- | Infornation dictionary for the document
documentInfo :: Document -> IO (Maybe Info)

-- | Document encryption dictionary
documentEncryption :: Document -> IO (Maybe Dict)


-- | Document datalog
module Pdf.Document.Catalog

-- | Document catalog
data Catalog

-- | Get root node of page tree
catalogPageNode :: Catalog -> IO PageNode


-- | Mid level utils for processing PDF file
--   
--   Basic example how to get number of pages in document
--   
--   <pre>
--   import Pdf.Document
--   
--   withPdfFile "input.pdf" $ \pdf -&gt;
--     doc &lt;- <a>document</a> pdf
--     catalog &lt;- <a>documentCatalog</a> doc
--     rootNode &lt;- <a>catalogPageNode</a> catalog
--     count &lt;- <a>pageNodeNKids</a> rootNode
--     print count
--     page &lt;- <tt>loadPageByNum</tt> rootNode 1
--     text &lt;- <a>pageExtractText</a> page
--     print text
--   </pre>
module Pdf.Document