pdf-toolbox-core- A collection of tools for processing PDF files.

Safe HaskellNone




This module contains parsers for pdf objects


Parse any object

parseObject :: Parser (Object ()) Source

It parses any Object except Stream cos for Stream we need offset of data in file

>>> parseOnly parseObject "/Name"
Right (OName (Name "Name"))

Parse object of specific type

parseDict :: Parser Dict Source

>>> parseOnly parseDict "<</Key1(some string)/Key2 123>>"
Right (Dict [(Name "Key1",OStr (Str "some string")),(Name "Key2",ONumber (NumInt 123))])

parseArray :: Parser Array Source

>>> parseOnly parseArray "[1 (string) /Name []]"
Right (Array [ONumber (NumInt 1),OStr (Str "string"),OName (Name "Name"),OArray (Array [])])

parseName :: Parser Name Source

>>> parseOnly parseName "/Name"
Right (Name "Name")

parseStr :: Parser Str Source

>>> parseOnly parseStr "(hello)"
Right (Str "hello")

parseHexStr :: Parser Str Source

>>> parseOnly parseHexStr "<68656C6C6F>"
Right (Str "hello")

parseRef :: Parser Ref Source

>>> parseOnly parseRef "0 2 R"
Right (Ref 0 2)

parseNumber :: Parser Number Source

>>> parseOnly parseNumber "123"
Right (NumInt 123)
>>> parseOnly parseNumber "12.3"
Right (NumReal 12.3)
>>> parseOnly parseNumber ".01"
Right (NumReal 1.0e-2)

parseBoolean :: Parser Boolean Source

>>> parseOnly parseBoolean "true"
Right (Boolean True)
>>> parseOnly parseBoolean "false"
Right (Boolean False)


parseTillStreamData :: Parser () Source

Consumes input till stream's data

Use parseDict then parseTillStreamData to determine whether the object is dictionary or stream. If parseTillStreamData fails, then it is a dictionary. Otherwise it is stream, and current position in input data will point to stream's data start

>>> parse (parseDict >>= \dict -> parseTillStreamData >> return dict) "<</Key 123>>\nstream\n1234\nendstream"
Done "1234\nendstream" Dict [(Name "Key",ONumber (NumInt 123))]

parseIndirectObject :: Parser (Ref, Object ()) Source

Parse object. Input position should point to offset defined in XRef

>>> parseOnly parseIndirectObject "1 2 obj\n12"
Right (Ref 1 2,ONumber (NumInt 12))

isRegularChar :: Char -> Bool Source

Whether the character can appear in Name