{- |
Copyright: (c) 2021 Scott Sedgwick
SPDX-License-Identifier: MIT
Maintainer: Scott Sedgwick <scott.sedgwick@gmail.com>
Stability: experimental
Portability: unknown

Simple function to extract PDF form field values from a PDF file.
-}

module Data.Pdf.FieldReader
  ( -- * File data parser
    --
    -- $fileDataParser
    readPdfFields
  ) where

import Prelude hiding (drop, init, lines)
import Data.ByteString (ByteString)
import Data.ByteString.Char8 (unpack)
import Data.List (foldl')
import Data.Map (Map, fromList)
import Data.Text (Text, drop, init, isPrefixOf, lines, pack, replace, strip)

-- $fileDataParser
-- 
-- Extract a Map of name-value pairs from the data read from a PDF file.
-- For example:
--
-- >import qualified Data.ByteString as B
-- >import Data.Pdf.FieldReader (readPdfFields)
-- >
-- >main :: IO()
-- >main = do
-- >  xs <- Data.ByteString.readFile "filename"
-- >  let ys = readPdfFields xs
-- >  print ys

-- | Read fields from file data
readPdfFields :: ByteString -> Map Text Text
readPdfFields :: ByteString -> Map Text Text
readPdfFields = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([(Text, Text)] -> Map Text Text)
-> (ByteString -> [(Text, Text)]) -> ByteString -> Map Text Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe Text, [(Text, Text)]) -> [(Text, Text)]
forall a b. (a, b) -> b
snd ((Maybe Text, [(Text, Text)]) -> [(Text, Text)])
-> (ByteString -> (Maybe Text, [(Text, Text)]))
-> ByteString
-> [(Text, Text)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Maybe Text, [(Text, Text)])
 -> Text -> (Maybe Text, [(Text, Text)]))
-> (Maybe Text, [(Text, Text)])
-> [Text]
-> (Maybe Text, [(Text, Text)])
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Maybe Text, [(Text, Text)])
-> Text -> (Maybe Text, [(Text, Text)])
f (Maybe Text
forall a. Maybe a
Nothing, []) ([Text] -> (Maybe Text, [(Text, Text)]))
-> (ByteString -> [Text])
-> ByteString
-> (Maybe Text, [(Text, Text)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
lines (Text -> [Text]) -> (ByteString -> Text) -> ByteString -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack (String -> Text) -> (ByteString -> String) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
unpack
  where
    f :: (Maybe Text, [(Text, Text)])
-> Text -> (Maybe Text, [(Text, Text)])
f (Maybe Text
Nothing,  [(Text, Text)]
b) Text
x | Text -> Bool
isFldName Text
x  = (Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Text
fmtFldName Text
x), [(Text, Text)]
b) 
                      | Bool
otherwise    = (Maybe Text
forall a. Maybe a
Nothing, [(Text, Text)]
b)
    f ((Just Text
n), [(Text, Text)]
b) Text
x | Text -> Bool
isFldName Text
x  = (Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Text
fmtFldName Text
x), [(Text, Text)]
b)
                      | Text -> Bool
isFldValue Text
x = (Maybe Text
forall a. Maybe a
Nothing, (Text
n, (Text -> Text
fmtFldValue Text
x)) (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
forall a. a -> [a] -> [a]
: [(Text, Text)]
b)
                      | Bool
otherwise    = ((Text -> Maybe Text
forall a. a -> Maybe a
Just Text
n), [(Text, Text)]
b)
    isFldName :: Text -> Bool
isFldName     = Text -> Text -> Bool
isPrefixOf Text
"/T("
    fmtFldName :: Text -> Text
fmtFldName    = Text -> Text
stripBrackets
    isFldValue :: Text -> Bool
isFldValue    = Text -> Text -> Bool
isPrefixOf Text
"/V("
    fmtFldValue :: Text -> Text
fmtFldValue   = Text -> Text
unescape (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
stripBrackets
    stripBrackets :: Text -> Text
stripBrackets = Text -> Text
init (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
drop Int
3 (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
strip
    unescape :: Text -> Text
unescape Text
xs   = ((Text, Text) -> Text -> Text) -> Text -> [(Text, Text)] -> Text
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Text
a, Text
b) Text
c -> Text -> Text -> Text -> Text
replace Text
a Text
b Text
c) Text
xs [(Text, Text)]
escPairs
    escPairs :: [(Text, Text)]
escPairs      = [(Text
"\\n", Text
"\n"), (Text
"\\(", Text
"("), (Text
"\\)", Text
")")]