module Data.Pdf.FieldReader
(
readPdfFields
) where
import Data.ByteString (ByteString)
import Data.Char (chr, digitToInt)
import Data.Functor ((<&>))
import Data.List (foldl')
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Text (Text)
import Data.Text.Encoding (decodeLatin1)
import Data.Void (Void)
import Text.Megaparsec (Parsec, (<|>), anySingle, chunk, many, parseMaybe, takeWhileP)
readPdfFields :: ByteString -> M.Map Text Text
readPdfFields :: ByteString -> Map Text Text
readPdfFields = Map Text Text
-> ([FieldPart] -> Map Text Text)
-> Maybe [FieldPart]
-> Map Text Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Map Text Text
forall k a. Map k a
M.empty [FieldPart] -> Map Text Text
collate (Maybe [FieldPart] -> Map Text Text)
-> (ByteString -> Maybe [FieldPart]) -> ByteString -> Map Text Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parsec Void Text [FieldPart] -> Text -> Maybe [FieldPart]
forall e s a. (Ord e, Stream s) => Parsec e s a -> s -> Maybe a
parseMaybe Parsec Void Text [FieldPart]
pdfFieldParser (Text -> Maybe [FieldPart])
-> (ByteString -> Text) -> ByteString -> Maybe [FieldPart]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
decodeLatin1
type Parser = Parsec Void Text
data FieldPart
= FieldName Text
| FieldValue Text
| FieldNothing
deriving stock (Int -> FieldPart -> ShowS
[FieldPart] -> ShowS
FieldPart -> String
(Int -> FieldPart -> ShowS)
-> (FieldPart -> String)
-> ([FieldPart] -> ShowS)
-> Show FieldPart
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FieldPart] -> ShowS
$cshowList :: [FieldPart] -> ShowS
show :: FieldPart -> String
$cshow :: FieldPart -> String
showsPrec :: Int -> FieldPart -> ShowS
$cshowsPrec :: Int -> FieldPart -> ShowS
Show, FieldPart -> FieldPart -> Bool
(FieldPart -> FieldPart -> Bool)
-> (FieldPart -> FieldPart -> Bool) -> Eq FieldPart
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FieldPart -> FieldPart -> Bool
$c/= :: FieldPart -> FieldPart -> Bool
== :: FieldPart -> FieldPart -> Bool
$c== :: FieldPart -> FieldPart -> Bool
Eq)
collate :: [FieldPart] -> M.Map Text Text
collate :: [FieldPart] -> Map Text Text
collate = (Maybe Text, Map Text Text) -> Map Text Text
forall a b. (a, b) -> b
snd ((Maybe Text, Map Text Text) -> Map Text Text)
-> ([FieldPart] -> (Maybe Text, Map Text Text))
-> [FieldPart]
-> Map Text Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Maybe Text, Map Text Text)
-> FieldPart -> (Maybe Text, Map Text Text))
-> (Maybe Text, Map Text Text)
-> [FieldPart]
-> (Maybe Text, Map Text Text)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Maybe Text, Map Text Text)
-> FieldPart -> (Maybe Text, Map Text Text)
f (Maybe Text
forall a. Maybe a
Nothing, Map Text Text
forall k a. Map k a
M.empty)
where
f :: (Maybe Text, Map Text Text)
-> FieldPart -> (Maybe Text, Map Text Text)
f (Maybe Text
Nothing, Map Text Text
m) (FieldName Text
n) = (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
n, Map Text Text
m)
f (Maybe Text
Nothing, Map Text Text
m) FieldPart
_ = (Maybe Text
forall a. Maybe a
Nothing, Map Text Text
m)
f (Just Text
n, Map Text Text
m) (FieldValue Text
v) = (Maybe Text
forall a. Maybe a
Nothing, Text -> Text -> Map Text Text -> Map Text Text
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
n Text
v Map Text Text
m)
f (Just Text
n, Map Text Text
m) FieldPart
_ = (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
n, Map Text Text
m)
pdfFieldParser :: Parser [FieldPart]
pdfFieldParser :: Parsec Void Text [FieldPart]
pdfFieldParser = ParsecT Void Text Identity FieldPart
-> Parsec Void Text [FieldPart]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void Text Identity FieldPart
pFieldName ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void Text Identity FieldPart
pFieldValue ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void Text Identity FieldPart
pFieldHexValue ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void Text Identity FieldPart
anythingElse)
anythingElse :: Parser FieldPart
anythingElse :: ParsecT Void Text Identity FieldPart
anythingElse = ParsecT Void Text Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle ParsecT Void Text Identity Char
-> ParsecT Void Text Identity FieldPart
-> ParsecT Void Text Identity FieldPart
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> FieldPart -> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a. Applicative f => a -> f a
pure FieldPart
FieldNothing
pFieldName :: Parser FieldPart
pFieldName :: ParsecT Void Text Identity FieldPart
pFieldName = Text -> Text -> Parser Text
getBetween Text
"T(" Text
")" Parser Text
-> (Text -> FieldPart) -> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> Text -> FieldPart
FieldName
pFieldValue :: Parser FieldPart
pFieldValue :: ParsecT Void Text Identity FieldPart
pFieldValue = Text -> Text -> Parser Text
getBetween Text
"V(" Text
")" Parser Text
-> (Text -> FieldPart) -> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> Text -> FieldPart
FieldValue
pFieldHexValue :: Parser FieldPart
pFieldHexValue :: ParsecT Void Text Identity FieldPart
pFieldHexValue = Text -> Text -> Parser Text
getBetween Text
"V<" Text
">" Parser Text
-> (Text -> FieldPart) -> ParsecT Void Text Identity FieldPart
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> Text -> FieldPart
FieldValue (Text -> FieldPart) -> (Text -> Text) -> Text -> FieldPart
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
decodeHexField
getBetween :: Text -> Text -> Parser Text
getBetween :: Text -> Text -> Parser Text
getBetween Text
x Text
y = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk Text
Tokens Text
x Parser Text -> Parser Text -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Maybe String
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP Maybe String
forall a. Maybe a
Nothing (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Text -> Char
T.head Text
y) Parser Text -> (Text -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Text
z -> Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk Text
Tokens Text
y Parser Text -> Parser Text -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
z
decodeHexField :: Text -> Text
decodeHexField :: Text -> Text
decodeHexField Text
xs = if [Text] -> Text
forall a. [a] -> a
head [Text]
ys Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"FEFF" then Text
"" else String -> Text
T.pack ((Text -> Char) -> [Text] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char
chr (Int -> Char) -> (Text -> Int) -> Text -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Int
decodeHex) ([Text] -> [Text]
forall a. [a] -> [a]
tail [Text]
ys))
where
ys :: [Text]
ys = Int -> Text -> [Text]
splitBy Int
4 (Text -> [Text]) -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
T.lines Text
xs
splitBy :: Int -> Text -> [Text]
splitBy :: Int -> Text -> [Text]
splitBy Int
_ Text
"" = []
splitBy Int
x Text
s = Int -> Text -> Text
T.take Int
x Text
s Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Int -> Text -> [Text]
splitBy Int
x (Int -> Text -> Text
T.drop Int
x Text
s)
decodeHex :: Text -> Int
decodeHex :: Text -> Int
decodeHex = (Int -> Char -> Int) -> Int -> String -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
b Char
a -> Int
b Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
16 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
digitToInt Char
a) Int
0 (String -> Int) -> (Text -> String) -> Text -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack