{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes       #-}
{-|
Module      : $header$
Copyright   : (c) Laurent P René de Cotret, 2020
License     : GNU GPL, version 2 or above
Maintainer  : laurent.decotret@outlook.com
Stability   : internal
Portability : portable

Embedding HTML content
-}

module Text.Pandoc.Filter.Plot.Embed (
      extractPlot
    , toFigure
) where

import           Data.Default                      (def)
import           Data.List                         (nub)
import           Data.Maybe                        (fromMaybe)
import           Data.Text                         (Text, pack)
import qualified Data.Text.IO                      as T

import           System.FilePath                   (replaceExtension)

import           Text.HTML.TagSoup

import           Text.Pandoc.Builder               (fromList, imageWith, link,
                                                    para, toList, Inlines)
import           Text.Pandoc.Class                 (runPure)
import           Text.Pandoc.Definition            (Pandoc(..), Block (..), Format, Attr)
import           Text.Pandoc.Error                 (handleError)
import           Text.Pandoc.Writers.HTML          (writeHtml5String)

import           Text.Pandoc.Filter.Plot.Parse     (captionReader)
import           Text.Pandoc.Filter.Plot.Monad
import           Text.Pandoc.Filter.Plot.Scripting (figurePath)

import           Text.Shakespeare.Text             (st)


-- | Convert a @FigureSpec@ to a Pandoc figure component.

-- Note that the script to generate figure files must still

-- be run in another function.

toFigure :: Format       -- ^ text format of the caption

         -> FigureSpec 
         -> PlotM Block
toFigure :: Format -> FigureSpec -> PlotM Block
toFigure Format
fmt FigureSpec
spec = do
    FilePath
target <- FigureSpec -> PlotM FilePath
figurePath FigureSpec
spec
    let srcLink :: Inlines
srcLink = Text -> Text -> Inlines -> Inlines
link (FilePath -> Text
pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$ FilePath -> FilePath -> FilePath
replaceExtension FilePath
target FilePath
".txt") Text
forall a. Monoid a => a
mempty Inlines
"Source code"
        attrs' :: Attr
attrs'       = FigureSpec -> Attr
blockAttrs FigureSpec
spec
        withSource' :: Bool
withSource'  = FigureSpec -> Bool
withSource FigureSpec
spec
        captionText :: Inlines
captionText  = [Inline] -> Inlines
forall a. [a] -> Many a
fromList ([Inline] -> Inlines) -> [Inline] -> Inlines
forall a b. (a -> b) -> a -> b
$ [Inline] -> Maybe [Inline] -> [Inline]
forall a. a -> Maybe a -> a
fromMaybe [Inline]
forall a. Monoid a => a
mempty (Format -> Text -> Maybe [Inline]
captionReader Format
fmt (Text -> Maybe [Inline]) -> Text -> Maybe [Inline]
forall a b. (a -> b) -> a -> b
$ FigureSpec -> Text
caption FigureSpec
spec)
        captionLinks :: Inlines
captionLinks = [Inlines] -> Inlines
forall a. Monoid a => [a] -> a
mconcat [Inlines
" (", Inlines
srcLink, Inlines
")"]
        caption' :: Inlines
caption'     = if Bool
withSource' then Inlines
captionText Inlines -> Inlines -> Inlines
forall a. Semigroup a => a -> a -> a
<> Inlines
captionLinks else Inlines
captionText
    Attr -> FilePath -> Inlines -> PlotM Block
builder Attr
attrs' FilePath
target Inlines
caption'
    where
        builder :: Attr -> FilePath -> Inlines -> PlotM Block
builder      = if FigureSpec -> SaveFormat
saveFormat FigureSpec
spec SaveFormat -> SaveFormat -> Bool
forall a. Eq a => a -> a -> Bool
== SaveFormat
HTML
                        then Attr -> FilePath -> Inlines -> PlotM Block
interactiveBlock
                        else Attr -> FilePath -> Inlines -> PlotM Block
figure


figure :: Attr
       -> FilePath
       -> Inlines
       -> PlotM Block
-- To render images as figures with captions, the target title

-- must be "fig:"

-- Janky? yes

figure :: Attr -> FilePath -> Inlines -> PlotM Block
figure Attr
as FilePath
fp Inlines
caption' = Block -> PlotM Block
forall (m :: * -> *) a. Monad m => a -> m a
return (Block -> PlotM Block)
-> (Inlines -> Block) -> Inlines -> PlotM Block
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Block] -> Block
forall a. [a] -> a
head ([Block] -> Block) -> (Inlines -> [Block]) -> Inlines -> Block
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Many Block -> [Block]
forall a. Many a -> [a]
toList (Many Block -> [Block])
-> (Inlines -> Many Block) -> Inlines -> [Block]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Inlines -> Many Block
para (Inlines -> PlotM Block) -> Inlines -> PlotM Block
forall a b. (a -> b) -> a -> b
$ 
                        Attr -> Text -> Text -> Inlines -> Inlines
imageWith Attr
as (FilePath -> Text
pack FilePath
fp) Text
"fig:" Inlines
caption'


interactiveBlock :: Attr
                 -> FilePath
                 -> Inlines
                 -> PlotM Block
interactiveBlock :: Attr -> FilePath -> Inlines -> PlotM Block
interactiveBlock Attr
_ FilePath
fp Inlines
caption' = do
    -- TODO: should we instead include the scripts in the "include-after"

    --       template variable?

    --       See https://github.com/jgm/pandoc/issues/6582

    Text
htmlpage <- IO Text -> StateT PlotState (ReaderT RuntimeEnv IO) Text
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Text -> StateT PlotState (ReaderT RuntimeEnv IO) Text)
-> IO Text -> StateT PlotState (ReaderT RuntimeEnv IO) Text
forall a b. (a -> b) -> a -> b
$ FilePath -> IO Text
T.readFile FilePath
fp
    Text
renderedCaption <- Inlines -> StateT PlotState (ReaderT RuntimeEnv IO) Text
writeHtml Inlines
caption'
    Block -> PlotM Block
forall (m :: * -> *) a. Monad m => a -> m a
return (Block -> PlotM Block) -> Block -> PlotM Block
forall a b. (a -> b) -> a -> b
$ Format -> Text -> Block
RawBlock Format
"html5" [st|
<figure>
    <div>
    #{extractPlot htmlpage}
    </div>
    <figcaption>#{renderedCaption}</figcaption>
</figure>
    |]


-- | Convert Pandoc inlines to html

writeHtml :: Inlines -> PlotM Text
writeHtml :: Inlines -> StateT PlotState (ReaderT RuntimeEnv IO) Text
writeHtml Inlines
is = IO Text -> StateT PlotState (ReaderT RuntimeEnv IO) Text
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Text -> StateT PlotState (ReaderT RuntimeEnv IO) Text)
-> IO Text -> StateT PlotState (ReaderT RuntimeEnv IO) Text
forall a b. (a -> b) -> a -> b
$ Either PandocError Text -> IO Text
forall a. Either PandocError a -> IO a
handleError (Either PandocError Text -> IO Text)
-> Either PandocError Text -> IO Text
forall a b. (a -> b) -> a -> b
$ PandocPure Text -> Either PandocError Text
forall a. PandocPure a -> Either PandocError a
runPure (PandocPure Text -> Either PandocError Text)
-> PandocPure Text -> Either PandocError Text
forall a b. (a -> b) -> a -> b
$ WriterOptions -> Pandoc -> PandocPure Text
forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writeHtml5String WriterOptions
forall a. Default a => a
def Pandoc
document
    where
        document :: Pandoc
document = Meta -> [Block] -> Pandoc
Pandoc Meta
forall a. Monoid a => a
mempty [[Inline] -> Block
Para ([Inline] -> Block) -> (Inlines -> [Inline]) -> Inlines -> Block
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Inlines -> [Inline]
forall a. Many a -> [a]
toList (Inlines -> Block) -> Inlines -> Block
forall a b. (a -> b) -> a -> b
$ Inlines
is]


-- | Extract the plot-relevant content from inside of a full HTML document.

-- Scripts contained in the <head> tag are extracted, as well as the entirety of the

-- <body> tag.

extractPlot :: Text -> Text
extractPlot :: Text -> Text
extractPlot Text
t = let tags :: [Tag Text]
tags = [Tag Text] -> [Tag Text]
forall str. StringLike str => [Tag str] -> [Tag str]
canonicalizeTags ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [Tag Text]
forall a b. (a -> b) -> a -> b
$ ParseOptions Text -> Text -> [Tag Text]
forall str. StringLike str => ParseOptions str -> str -> [Tag str]
parseTagsOptions ParseOptions Text
forall str. StringLike str => ParseOptions str
parseOptionsFast Text
t  
                    extracted :: [[Tag Text]]
extracted = [Tag Text] -> [[Tag Text]]
headScripts [Tag Text]
tags [[Tag Text]] -> [[Tag Text]] -> [[Tag Text]]
forall a. Semigroup a => a -> a -> a
<> [Text -> [Tag Text] -> [Tag Text]
inside Text
"body" ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [Tag Text]
forall a b. (a -> b) -> a -> b
$ [Tag Text]
tags]
                in [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Tag Text] -> Text
forall str. StringLike str => [Tag str] -> str
renderTags ([Tag Text] -> Text) -> [[Tag Text]] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([Tag Text] -> [Tag Text]
deferScripts ([Tag Text] -> [Tag Text]) -> [[Tag Text]] -> [[Tag Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[Tag Text]]
extracted)
    where
        headScripts :: [Tag Text] -> [[Tag Text]]
headScripts = (Tag Text -> Bool) -> [Tag Text] -> [[Tag Text]]
forall a. (a -> Bool) -> [a] -> [[a]]
partitions (Tag Text -> FilePath -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~== (FilePath
"<script>"::String)) ([Tag Text] -> [[Tag Text]])
-> ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [[Tag Text]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Tag Text] -> [Tag Text]
inside Text
"head"


-- | Get content inside a tag, e.g. /inside "body"/ returns all tags

-- between /<body>/ and /</body>/

inside :: Text -> [Tag Text] -> [Tag Text]
inside :: Text -> [Tag Text] -> [Tag Text]
inside Text
t = [Tag Text] -> [Tag Text]
forall a. [a] -> [a]
init ([Tag Text] -> [Tag Text])
-> ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag Text] -> [Tag Text]
forall a. [a] -> [a]
tail ([Tag Text] -> [Tag Text])
-> ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag Text] -> [Tag Text]
tgs
    where
        tgs :: [Tag Text] -> [Tag Text]
tgs = (Tag Text -> Bool) -> [Tag Text] -> [Tag Text]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Tag Text -> Tag Text -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~/= Text -> Tag Text
forall str. str -> Tag str
TagClose Text
t) ([Tag Text] -> [Tag Text])
-> ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Tag Text -> Bool) -> [Tag Text] -> [Tag Text]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Tag Text -> Tag Text -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~/= Text -> [Attribute Text] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
t [])


data ScriptTag = InlineScript [Attribute Text]
               | ExternalScript [Attribute Text]


fromTag :: Tag Text -> Maybe ScriptTag
fromTag :: Tag Text -> Maybe ScriptTag
fromTag (TagOpen Text
"script" [Attribute Text]
attrs) = 
    ScriptTag -> Maybe ScriptTag
forall a. a -> Maybe a
Just (ScriptTag -> Maybe ScriptTag) -> ScriptTag -> Maybe ScriptTag
forall a b. (a -> b) -> a -> b
$ if Text
"src" Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (([Text], [Text]) -> [Text]
forall a b. (a, b) -> a
fst (([Text], [Text]) -> [Text])
-> ([Attribute Text] -> ([Text], [Text]))
-> [Attribute Text]
-> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Attribute Text] -> ([Text], [Text])
forall a b. [(a, b)] -> ([a], [b])
unzip ([Attribute Text] -> [Text]) -> [Attribute Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ [Attribute Text]
attrs) 
        then [Attribute Text] -> ScriptTag
ExternalScript [Attribute Text]
attrs
        else [Attribute Text] -> ScriptTag
InlineScript [Attribute Text]
attrs
fromTag Tag Text
_ = Maybe ScriptTag
forall a. Maybe a
Nothing


toTag :: ScriptTag -> Tag Text
toTag :: ScriptTag -> Tag Text
toTag (InlineScript [Attribute Text]
t)   = Text -> [Attribute Text] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
"script" [Attribute Text]
t
toTag (ExternalScript [Attribute Text]
t) = Text -> [Attribute Text] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
"script" [Attribute Text]
t


deferScript :: ScriptTag -> ScriptTag
deferScript :: ScriptTag -> ScriptTag
deferScript (InlineScript   [Attribute Text]
attrs) = [Attribute Text] -> ScriptTag
InlineScript   ([Attribute Text] -> ScriptTag) -> [Attribute Text] -> ScriptTag
forall a b. (a -> b) -> a -> b
$ [Attribute Text] -> [Attribute Text]
forall a. Eq a => [a] -> [a]
nub ([Attribute Text] -> [Attribute Text])
-> [Attribute Text] -> [Attribute Text]
forall a b. (a -> b) -> a -> b
$ [Attribute Text]
attrs [Attribute Text] -> [Attribute Text] -> [Attribute Text]
forall a. Semigroup a => a -> a -> a
<> [(Text
"type", Text
"module")]
deferScript (ExternalScript [Attribute Text]
attrs) = [Attribute Text] -> ScriptTag
ExternalScript ([Attribute Text] -> ScriptTag) -> [Attribute Text] -> ScriptTag
forall a b. (a -> b) -> a -> b
$ [Attribute Text] -> [Attribute Text]
forall a. Eq a => [a] -> [a]
nub ([Attribute Text] -> [Attribute Text])
-> [Attribute Text] -> [Attribute Text]
forall a b. (a -> b) -> a -> b
$ [Attribute Text]
attrs [Attribute Text] -> [Attribute Text] -> [Attribute Text]
forall a. Semigroup a => a -> a -> a
<> [(Text
"defer", Text
forall a. Monoid a => a
mempty)]


-- | Replace /<script src=...>/ tags with /<script src=... defer>/,

-- and inline scripts as /<script type="module">/.

-- This makes scripts execute only after HTML parsing has finished.

deferScripts :: [Tag Text] -> [Tag Text]
deferScripts :: [Tag Text] -> [Tag Text]
deferScripts = (Tag Text -> Tag Text) -> [Tag Text] -> [Tag Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Tag Text
t -> Tag Text -> (ScriptTag -> Tag Text) -> Maybe ScriptTag -> Tag Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Tag Text
t (ScriptTag -> Tag Text
toTag (ScriptTag -> Tag Text)
-> (ScriptTag -> ScriptTag) -> ScriptTag -> Tag Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ScriptTag -> ScriptTag
deferScript) (Tag Text -> Maybe ScriptTag
fromTag Tag Text
t))