Copyright | (c) 2020 G. Eyaeb |
---|---|
License | BSD-3-Clause |
Maintainer | geyaeb@protonmail.com |
Stability | experimental |
Portability | POSIX |
Safe Haskell | None |
Language | Haskell2010 |
Internal functions.
Synopsis
- newtype Document = Document (ForeignPtr Poppler_Document)
- data Layout
- data Page = Page {}
- data Properties = Properties {}
- openByteStringIO :: ByteString -> IO (Maybe Document)
- openFile :: FilePath -> IO (Maybe Document)
- pageIO :: Int -> Document -> IO (Maybe Page)
- pagesIO :: Document -> IO [Page]
- pagesTotalIO :: Document -> IO Int
- pdftotextIO :: Layout -> Document -> IO Text
- propertiesIO :: Document -> IO Properties
- pageTextIO :: Layout -> Page -> IO Text
Types
Layout of text extracted from PDF.
Physical | Text emulates layout of PDF, including horizontal spaces,
and preserves hyphenation; corresponds to calling |
Raw | Discards horizontal spaces, preserves hyphenation;
corresponds to calling |
None | Discards horizontal spaces, removes hyphenation;
corresponds to calling |
Page | |
|
data Properties Source #
Document properties.
If flag xml-conduit
is set, metadata
is of type Maybe Text.XML.Document
.
Since: 0.0.2.0
Instances
Show Properties Source # | |
Defined in Pdftotext.Internal showsPrec :: Int -> Properties -> ShowS # show :: Properties -> String # showList :: [Properties] -> ShowS # |
Loading PDF's
openByteStringIO :: ByteString -> IO (Maybe Document) Source #
Open PDF represented as bytestring. If document cannot be parsed as valid PDF,
Nothing
is returned.
openFile :: FilePath -> IO (Maybe Document) Source #
Open PDF from file. If file does not exist or cannot be parsed as valid PDF,
Nothing
is returned.
Document functions
pageIO :: Int -> Document -> IO (Maybe Page) Source #
Return page number no
from PDF document, if the page exists.
pdftotextIO :: Layout -> Document -> IO Text Source #
Extract text from PDF document with given Layout
.
propertiesIO :: Document -> IO Properties Source #
Extract properties from the document. @since 0.0.2.0