{-# LANGUAGE TupleSections     #-}
{-# LANGUAGE ViewPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.EPUB
   Copyright   : Copyright (C) 2014-2020 Matthew Pickering
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Conversion of EPUB to 'Pandoc' document.
-}

module Text.Pandoc.Readers.EPUB
  (readEPUB)
  where

import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                          toArchiveOrFail)
import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (mapMaybe)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Network.URI (unEscapeString)
import System.FilePath (dropFileName, dropFileName, normalise, splitFileName,
                        takeFileName, (</>))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia)
import Text.Pandoc.Definition hiding (Attr)
import Text.Pandoc.Error
import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Shared (addMetaField, collapseFilePath, tshow)
import Text.Pandoc.URI (escapeURI)
import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
import Text.Pandoc.Walk (query, walk)
import Text.Pandoc.XML.Light

type Items = M.Map Text (FilePath, MimeType)

readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB :: forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readEPUB ReaderOptions
opts ByteString
bytes = case ByteString -> Either String Archive
toArchiveOrFail ByteString
bytes of
  Right Archive
archive -> forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Archive -> m Pandoc
archiveToEPUB ReaderOptions
opts Archive
archive
  Left  String
e       -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocParseError forall a b. (a -> b) -> a -> b
$
                     Text
"Couldn't extract ePub file: " forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack String
e

-- runEPUB :: Except PandocError a -> Either PandocError a
-- runEPUB = runExcept

-- Note that internal reference are aggressively normalised so that all ids
-- are of the form "filename#id"
--
archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
archiveToEPUB :: forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Archive -> m Pandoc
archiveToEPUB ReaderOptions
os Archive
archive = do
  -- root is path to folder with manifest file in
  (String
root, Element
content) <- forall (m :: * -> *).
PandocMonad m =>
Archive -> m (String, Element)
getManifest Archive
archive
  (Maybe Text
coverId, Meta
meta) <- forall (m :: * -> *).
PandocMonad m =>
Element -> m (Maybe Text, Meta)
parseMeta Element
content
  (Maybe String
cover, Items
items)  <- forall (m :: * -> *).
PandocMonad m =>
Element -> Maybe Text -> m (Maybe String, Items)
parseManifest Element
content Maybe Text
coverId
  -- No need to collapse here as the image path is from the manifest file
  let coverDoc :: Pandoc
coverDoc = forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall a. Monoid a => a
mempty String -> Pandoc
imageToPandoc Maybe String
cover
  [(String, Text)]
spine <- forall (m :: * -> *).
PandocMonad m =>
Items -> Element -> m [(String, Text)]
parseSpine Items
items Element
content
  let escapedSpine :: [Text]
escapedSpine = forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text
escapeURI forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
takeFileName forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) [(String, Text)]
spine
  Pandoc Meta
_ [Block]
bs <-
      forall (m :: * -> *) a b.
(Monad m, NFData a) =>
(a -> b -> m a) -> a -> [b] -> m a
foldM' (\Pandoc
a (String, Text)
b -> ((Pandoc
a forall a. Semigroup a => a -> a -> a
<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. Walkable a b => (a -> a) -> b -> b
walk ([Text] -> Inline -> Inline
prependHash [Text]
escapedSpine))
        forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (m :: * -> *).
PandocMonad m =>
String -> (String, Text) -> m Pandoc
parseSpineElem String
root (String, Text)
b) forall a. Monoid a => a
mempty [(String, Text)]
spine
  let ast :: Pandoc
ast = Pandoc
coverDoc forall a. Semigroup a => a -> a -> a
<> Meta -> [Block] -> Pandoc
Pandoc Meta
meta [Block]
bs
  forall (m :: * -> *).
PandocMonad m =>
[(String, Text)] -> String -> Archive -> Pandoc -> m ()
fetchImages (forall k a. Map k a -> [a]
M.elems Items
items) String
root Archive
archive Pandoc
ast
  forall (m :: * -> *) a. Monad m => a -> m a
return Pandoc
ast
  where
    os' :: ReaderOptions
os' = ReaderOptions
os {readerExtensions :: Extensions
readerExtensions = Extension -> Extensions -> Extensions
enableExtension Extension
Ext_raw_html (ReaderOptions -> Extensions
readerExtensions ReaderOptions
os)}
    parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
    parseSpineElem :: forall (m :: * -> *).
PandocMonad m =>
String -> (String, Text) -> m Pandoc
parseSpineElem (String -> String
normalise -> String
r) (String -> String
normalise -> String
path, Text
mime) = do
      Pandoc
doc <- forall (m :: * -> *).
PandocMonad m =>
Text -> String -> String -> m Pandoc
mimeToReader Text
mime String
r String
path
      let docSpan :: Pandoc
docSpan = Blocks -> Pandoc
B.doc forall a b. (a -> b) -> a -> b
$ Inlines -> Blocks
B.para forall a b. (a -> b) -> a -> b
$ Attr -> Inlines -> Inlines
B.spanWith (String -> Text
T.pack forall a b. (a -> b) -> a -> b
$ String -> String
takeFileName String
path, [], []) forall a. Monoid a => a
mempty
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Pandoc
docSpan forall a. Semigroup a => a -> a -> a
<> Pandoc
doc
    mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
    mimeToReader :: forall (m :: * -> *).
PandocMonad m =>
Text -> String -> String -> m Pandoc
mimeToReader Text
"application/xhtml+xml" (String -> String
unEscapeString -> String
root)
                                         (String -> String
unEscapeString -> String
path) = do
      Entry
fname <- forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE (String
root String -> String -> String
</> String
path) Archive
archive
      Pandoc
html <- forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readHtml ReaderOptions
os' forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
TL.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TL.decodeUtf8 forall a b. (a -> b) -> a -> b
$ Entry -> ByteString
fromEntry Entry
fname
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> Pandoc -> Pandoc
fixInternalReferences String
path Pandoc
html
    mimeToReader Text
s String
_ (String -> String
unEscapeString -> String
path)
      | Text
s forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
imageMimes = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> Pandoc
imageToPandoc String
path
      | Bool
otherwise = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Monoid a => a
mempty

-- paths should be absolute when this function is called
-- renameImages should do this
fetchImages :: PandocMonad m
            => [(FilePath, MimeType)]
            -> FilePath -- ^ Root
            -> Archive
            -> Pandoc
            -> m ()
fetchImages :: forall (m :: * -> *).
PandocMonad m =>
[(String, Text)] -> String -> Archive -> Pandoc -> m ()
fetchImages [(String, Text)]
mimes String
root Archive
arc (forall a b c. (Walkable a b, Monoid c) => (a -> c) -> b -> c
query Inline -> [String]
iq -> [String]
links) =
    forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall a b c d. (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 forall (m :: * -> *).
PandocMonad m =>
String -> Maybe Text -> ByteString -> m ()
insertMedia) (forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe String -> Maybe (String, Maybe Text, ByteString)
getEntry [String]
links)
  where
    getEntry :: String -> Maybe (String, Maybe Text, ByteString)
getEntry String
link =
        let abslink :: String
abslink = String -> String
normalise (String -> String
unEscapeString (String
root String -> String -> String
</> String
link)) in
        (String
link , forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
link [(String, Text)]
mimes, ) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry
          forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Archive -> Maybe Entry
findEntryByPath String
abslink Archive
arc

iq :: Inline -> [FilePath]
iq :: Inline -> [String]
iq (Image Attr
_ [Inline]
_ (Text
url, Text
_)) = [Text -> String
T.unpack Text
url]
iq Inline
_                    = []

-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
renameImages :: String -> Inline -> Inline
renameImages String
root img :: Inline
img@(Image Attr
attr [Inline]
a (Text
url, Text
b))
  | Text
"data:" Text -> Text -> Bool
`T.isPrefixOf` Text
url = Inline
img
  | Bool
otherwise                  = Attr -> [Inline] -> (Text, Text) -> Inline
Image Attr
attr [Inline]
a ( String -> Text
T.pack forall a b. (a -> b) -> a -> b
$ String -> String
collapseFilePath (String
root String -> String -> String
</> Text -> String
T.unpack Text
url)
                                              , Text
b)
renameImages String
_ Inline
x = Inline
x

imageToPandoc :: FilePath -> Pandoc
imageToPandoc :: String -> Pandoc
imageToPandoc String
s = Blocks -> Pandoc
B.doc forall b c a. (b -> c) -> (a -> b) -> a -> c
. Inlines -> Blocks
B.para forall a b. (a -> b) -> a -> b
$ Text -> Text -> Inlines -> Inlines
B.image (String -> Text
T.pack String
s) Text
"" forall a. Monoid a => a
mempty

imageMimes :: [MimeType]
imageMimes :: [Text]
imageMimes = [Text
"image/gif", Text
"image/jpeg", Text
"image/png"]

type CoverId = Text

type CoverImage = FilePath

parseManifest :: (PandocMonad m)
              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
parseManifest :: forall (m :: * -> *).
PandocMonad m =>
Element -> Maybe Text -> m (Maybe String, Items)
parseManifest Element
content Maybe Text
coverId = do
  Element
manifest <- forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName Text
"manifest") Element
content
  let items :: [Element]
items = QName -> Element -> [Element]
findChildren (Text -> QName
dfName Text
"item") Element
manifest
  [(Text, (String, Text))]
r <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall {m :: * -> *}.
PandocMonad m =>
Element -> m (Text, (String, Text))
parseItem [Element]
items
  let cover :: Maybe Text
cover = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"href") forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Element -> Bool) -> Element -> Maybe Element
filterChild Element -> Bool
findCover Element
manifest
  forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> String
T.unpack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Maybe Text
cover forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
`mplus` Maybe Text
coverId), forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(Text, (String, Text))]
r)
  where
    findCover :: Element -> Bool
findCover Element
e = forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Text -> Text -> Bool
T.isInfixOf Text
"cover-image")
                  (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"properties") Element
e)
               Bool -> Bool -> Bool
|| forall a. a -> Maybe a
Just Bool
True forall a. Eq a => a -> a -> Bool
== forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 forall a. Eq a => a -> a -> Bool
(==) Maybe Text
coverId (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"id") Element
e)
    parseItem :: Element -> m (Text, (String, Text))
parseItem Element
e = do
      Text
uid <- forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName Text
"id") Element
e
      Text
href <- forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName Text
"href") Element
e
      Text
mime <- forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName Text
"media-type") Element
e
      forall (m :: * -> *) a. Monad m => a -> m a
return (Text
uid, (Text -> String
T.unpack Text
href, Text
mime))

parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine :: forall (m :: * -> *).
PandocMonad m =>
Items -> Element -> m [(String, Text)]
parseSpine Items
is Element
e = do
  Element
spine <- forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName Text
"spine") Element
e
  let itemRefs :: [Element]
itemRefs = QName -> Element -> [Element]
findChildren (Text -> QName
dfName Text
"itemref") Element
spine
  forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
"parseSpine" forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Items
is) forall a b. (a -> b) -> a -> b
$ forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Element -> Maybe Text
parseItemRef [Element]
itemRefs
  where
    parseItemRef :: Element -> Maybe Text
parseItemRef Element
ref = do
      let linear :: Bool
linear = forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True (forall a. Eq a => a -> a -> Bool
== Text
"yes") (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"linear") Element
ref)
      forall (f :: * -> *). Alternative f => Bool -> f ()
guard Bool
linear
      QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"idref") Element
ref

parseMeta :: PandocMonad m => Element -> m (Maybe CoverId, Meta)
parseMeta :: forall (m :: * -> *).
PandocMonad m =>
Element -> m (Maybe Text, Meta)
parseMeta Element
content = do
  Element
meta <- forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName Text
"metadata") Element
content
  let dcspace :: QName -> Bool
dcspace (QName Text
_ (Just Text
"http://purl.org/dc/elements/1.1/") (Just Text
"dc")) = Bool
True
      dcspace QName
_ = Bool
False
  let dcs :: [Element]
dcs = (QName -> Bool) -> Element -> [Element]
filterChildrenName QName -> Bool
dcspace Element
meta
  let r :: Meta
r = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Element -> Meta -> Meta
parseMetaItem Meta
nullMeta [Element]
dcs
  let coverId :: Maybe Text
coverId = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"content") forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Element -> Bool) -> Element -> Maybe Element
filterChild Element -> Bool
findCover Element
meta
  forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Text
coverId, Meta
r)
  where
    findCover :: Element -> Bool
findCover Element
e = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"name") Element
e forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just Text
"cover"

-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e :: Element
e@(QName -> Text
stripNamespace forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> QName
elName -> Text
field) Meta
meta =
  forall a. ToMetaValue a => Text -> a -> Meta -> Meta
addMetaField (Text -> Text
renameMeta Text
field) (Text -> Inlines
B.str forall a b. (a -> b) -> a -> b
$ Element -> Text
strContent Element
e) Meta
meta

renameMeta :: Text -> Text
renameMeta :: Text -> Text
renameMeta Text
"creator" = Text
"author"
renameMeta Text
s         = Text
s

getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest :: forall (m :: * -> *).
PandocMonad m =>
Archive -> m (String, Element)
getManifest Archive
archive = do
  Entry
metaEntry <- forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE (String
"META-INF" String -> String -> String
</> String
"container.xml") Archive
archive
  Element
docElem <- forall (m :: * -> *). PandocMonad m => Entry -> m Element
parseXMLDocE Entry
metaEntry
  let namespaces :: [(Text, Text)]
namespaces = forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Attr -> Maybe (Text, Text)
attrToNSPair (Element -> [Attr]
elAttribs Element
docElem)
  Text
ns <- forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
"xmlns not in namespaces" (forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"xmlns" [(Text, Text)]
namespaces)
  [(Text, Text)]
as <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b. (a -> b) -> [a] -> [b]
map Attr -> (Text, Text)
attrToPair forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> [Attr]
elAttribs)
    (forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> Maybe Text -> Maybe Text -> QName
QName Text
"rootfile" (forall a. a -> Maybe a
Just Text
ns) forall a. Maybe a
Nothing) Element
docElem)
  String
manifestFile <- Text -> String
T.unpack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
"Root not found" (forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"full-path" [(Text, Text)]
as)
  let rootdir :: String
rootdir = String -> String
dropFileName String
manifestFile
  --mime <- lookup "media-type" as
  Entry
manifest <- forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE String
manifestFile Archive
archive
  (String
rootdir,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *). PandocMonad m => Entry -> m Element
parseXMLDocE Entry
manifest

-- Fixup

fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences :: String -> Pandoc -> Pandoc
fixInternalReferences String
pathToFile =
   forall a b. Walkable a b => (a -> a) -> b -> b
walk (String -> Inline -> Inline
renameImages String
root)
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. Walkable a b => (a -> a) -> b -> b
walk (String -> Block -> Block
fixBlockIRs String
filename)
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. Walkable a b => (a -> a) -> b -> b
walk (String -> Inline -> Inline
fixInlineIRs String
filename)
  where
    (String
root, Text -> String
T.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
escapeURI forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack -> String
filename) =
      String -> (String, String)
splitFileName String
pathToFile

fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs String
s (Span Attr
as [Inline]
v) =
  Attr -> [Inline] -> Inline
Span (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
v
fixInlineIRs String
s (Code Attr
as Text
code) =
  Attr -> Text -> Inline
Code (String -> Attr -> Attr
fixAttrs String
s Attr
as) Text
code
fixInlineIRs String
s (Link Attr
as [Inline]
is (Text -> Maybe (Char, Text)
T.uncons -> Just (Char
'#', Text
url), Text
tit)) =
  Attr -> [Inline] -> (Text, Text) -> Inline
Link (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
is (String -> Text -> Text
addHash String
s Text
url, Text
tit)
fixInlineIRs String
s (Link Attr
as [Inline]
is (Text, Text)
t) =
  Attr -> [Inline] -> (Text, Text) -> Inline
Link (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
is (Text, Text)
t
fixInlineIRs String
_ Inline
v = Inline
v

prependHash :: [Text] -> Inline -> Inline
prependHash :: [Text] -> Inline -> Inline
prependHash [Text]
ps l :: Inline
l@(Link Attr
attr [Inline]
is (Text
url, Text
tit))
  | forall (t :: * -> *). Foldable t => t Bool -> Bool
or [Text
s Text -> Text -> Bool
`T.isPrefixOf` Text
url | Text
s <- [Text]
ps] =
    Attr -> [Inline] -> (Text, Text) -> Inline
Link Attr
attr [Inline]
is (Text
"#" forall a. Semigroup a => a -> a -> a
<> Text
url, Text
tit)
  | Bool
otherwise = Inline
l
prependHash [Text]
_ Inline
i = Inline
i

fixBlockIRs :: String -> Block -> Block
fixBlockIRs :: String -> Block -> Block
fixBlockIRs String
s (Div Attr
as [Block]
b) =
  Attr -> [Block] -> Block
Div (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Block]
b
fixBlockIRs String
s (Header Int
i Attr
as [Inline]
b) =
  Int -> Attr -> [Inline] -> Block
Header Int
i (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
b
fixBlockIRs String
s (CodeBlock Attr
as Text
code) =
  Attr -> Text -> Block
CodeBlock (String -> Attr -> Attr
fixAttrs String
s Attr
as) Text
code
fixBlockIRs String
_ Block
b = Block
b

fixAttrs :: FilePath -> B.Attr -> B.Attr
fixAttrs :: String -> Attr -> Attr
fixAttrs String
s (Text
ident, [Text]
cs, [(Text, Text)]
kvs) =
  (String -> Text -> Text
addHash String
s Text
ident, forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
cs, [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs [(Text, Text)]
kvs)

addHash :: FilePath -> Text -> Text
addHash :: String -> Text -> Text
addHash String
_ Text
""    = Text
""
addHash String
s Text
ident = String -> Text
T.pack (String -> String
takeFileName String
s) forall a. Semigroup a => a -> a -> a
<> Text
"#" forall a. Semigroup a => a -> a -> a
<> Text
ident

removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs [(Text, Text)]
kvs = forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Text, a) -> Bool
isEPUBAttr) [(Text, Text)]
kvs

isEPUBAttr :: (Text, a) -> Bool
isEPUBAttr :: forall a. (Text, a) -> Bool
isEPUBAttr (Text
k, a
_) = Text
"epub:" Text -> Text -> Bool
`T.isPrefixOf` Text
k

-- Library

-- Strict version of foldM
foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
foldM' :: forall (m :: * -> *) a b.
(Monad m, NFData a) =>
(a -> b -> m a) -> a -> [b] -> m a
foldM' a -> b -> m a
_ a
z [] = forall (m :: * -> *) a. Monad m => a -> m a
return a
z
foldM' a -> b -> m a
f a
z (b
x:[b]
xs) = do
  a
z' <- a -> b -> m a
f a
z b
x
  a
z' forall a b. NFData a => a -> b -> b
`deepseq` forall (m :: * -> *) a b.
(Monad m, NFData a) =>
(a -> b -> m a) -> a -> [b] -> m a
foldM' a -> b -> m a
f a
z' [b]
xs

uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 :: forall a b c d. (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 a -> b -> c -> d
f (a
a, b
b, c
c) = a -> b -> c -> d
f a
a b
b c
c

-- Utility

stripNamespace :: QName -> Text
stripNamespace :: QName -> Text
stripNamespace (QName Text
v Maybe Text
_ Maybe Text
_) = Text
v

attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair (Attr (QName Text
"xmlns" Maybe Text
_ Maybe Text
_) Text
val) = forall a. a -> Maybe a
Just (Text
"xmlns", Text
val)
attrToNSPair Attr
_                              = forall a. Maybe a
Nothing

attrToPair :: Attr -> (Text, Text)
attrToPair :: Attr -> (Text, Text)
attrToPair (Attr (QName Text
name Maybe Text
_ Maybe Text
_) Text
val) = (Text
name, Text
val)

defaultNameSpace :: Maybe Text
defaultNameSpace :: Maybe Text
defaultNameSpace = forall a. a -> Maybe a
Just Text
"http://www.idpf.org/2007/opf"

dfName :: Text -> QName
dfName :: Text -> QName
dfName Text
s = Text -> Maybe Text -> Maybe Text -> QName
QName Text
s Maybe Text
defaultNameSpace forall a. Maybe a
Nothing

emptyName :: Text -> QName
emptyName :: Text -> QName
emptyName Text
s = Text -> Maybe Text -> Maybe Text -> QName
QName Text
s forall a. Maybe a
Nothing forall a. Maybe a
Nothing

-- Convert Maybe interface to Either

findAttrE :: PandocMonad m => QName -> Element -> m Text
findAttrE :: forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE QName
q Element
e = forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
"findAttr" forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr QName
q Element
e

findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE :: forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE (String -> String
normalise forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
unEscapeString -> String
path) Archive
a =
  forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE (Text
"No entry on path: " forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack String
path) forall a b. (a -> b) -> a -> b
$ String -> Archive -> Maybe Entry
findEntryByPath String
path Archive
a

parseXMLDocE :: PandocMonad m => Entry -> m Element
parseXMLDocE :: forall (m :: * -> *). PandocMonad m => Entry -> m Element
parseXMLDocE Entry
entry =
  forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> PandocError
PandocXMLError Text
fp) forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Either Text Element
parseXMLElement Text
doc
 where
  doc :: Text
doc = ByteString -> Text
UTF8.toTextLazy forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry forall a b. (a -> b) -> a -> b
$ Entry
entry
  fp :: Text
fp  = String -> Text
T.pack forall a b. (a -> b) -> a -> b
$ Entry -> String
eRelativePath Entry
entry

findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE :: forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE QName
e Element
x =
  forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE (Text
"Unable to find element: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> Text
tshow QName
e) forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Element
findElement QName
e Element
x

mkE :: PandocMonad m => Text -> Maybe a -> m a
mkE :: forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
s = forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> PandocError
PandocParseError forall a b. (a -> b) -> a -> b
$ Text
s) forall (m :: * -> *) a. Monad m => a -> m a
return