{-# LANGUAGE OverloadedStrings #-}

-- | Parsers for XRef

module Pdf.Toolbox.Core.Parsers.XRef
(
  startXRef,
  tableXRef,
  parseSubsectionHeader,
  parseTrailerAfterTable,
  parseTableEntry
)
where

import Data.Int
import Data.Attoparsec (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as P
import Control.Applicative (many)

import Pdf.Toolbox.Core.Object.Types
import Pdf.Toolbox.Core.Parsers.Object
import Pdf.Toolbox.Core.Parsers.Util

-- for doctest
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Attoparsec.ByteString.Char8

-- | Offset of the very last xref table
--
-- Before calling it, make sure your are currently somewhere near
-- the end of pdf file. Otherwice it can eat all the memory.
-- E.g. examine only the last 1KB
--
-- >>> parseOnly startXRef "anything...startxref\n222\n%%EOF...blah\nstartxref\n123\n%%EOF"
-- Right 123
startXRef :: Parser Int64
startXRef = do
  res <- many $ do
    _ <- P.manyTill P.anyChar $ P.string "startxref"
    P.skipSpace
    offset <- P.decimal
    P.skipSpace
    _ <- P.string "%%EOF"
    return offset
  case res of
    [] -> fail "Trailer not found"
    xs -> return $ last xs

-- | When current input position points to xref stream
-- (or doesn't point to xref at all), the parser will fail.
-- When it points to xref table, the parser will succeed
-- and input position will point to the first xref subsection
--
-- >>> parseOnly tableXRef "xref\n"
-- Right ()
-- >>> parseOnly tableXRef "not xref"
-- Left "Failed reading: takeWith"
tableXRef :: Parser ()
tableXRef = do
  _ <- P.string "xref"
  endOfLine

-- | Parse subsection header, return (the first object index, number of object)
--
-- Input position will point to the first object
parseSubsectionHeader :: Parser (Int, Int)
parseSubsectionHeader = do
  start <- P.decimal
  P.skipSpace
  count <- P.decimal
  endOfLine
  return (start, count)

-- | Parse trailer located after XRef table
--
-- Input position should point to the \"trailer\" keyword
parseTrailerAfterTable :: Parser Dict
parseTrailerAfterTable = do
  _ <- P.string "trailer"
  endOfLine
  P.skipSpace
  parseDict

-- | Parse XRef table entry. Returns offset, generation and whether the object is free.
parseTableEntry :: Parser (Int64, Int, Bool)
parseTableEntry = do
  offset <- P.decimal
  P.skipSpace
  generation <- P.decimal
  P.skipSpace
  c <- P.anyChar
  case c of
    'n' -> return (offset, generation, False)
    'f' -> return (offset, generation, True)
    _ -> fail $ "error parsing XRef table entry: unknown char: " ++ [c]