{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}

-- |

-- Module      : $header$

-- Copyright   : (c) Laurent P René de Cotret, 2019 - 2021

-- License     : GNU GPL, version 2 or above

-- Maintainer  : laurent.decotret@outlook.com

-- Stability   : internal

-- Portability : portable

--

-- Utilities to clean pandoc-plot output directories.

module Text.Pandoc.Filter.Plot.Clean
  ( cleanOutputDirs,
    outputDirs,
    readDoc,
  )
where

-- TODO: forConcurrently

import Control.Monad.Reader (forM)
import qualified Data.ByteString.Lazy as B
import Data.Char (toLower)
import Data.Default (def)
import Data.List (nub)
import Data.Maybe (catMaybes, fromMaybe)
import Data.Text (Text, pack)
import qualified Data.Text.IO as Text
import System.Directory (removePathForcibly)
import System.FilePath (takeExtension)
import Text.Pandoc.Class (runIO)
import Text.Pandoc.Definition
import Text.Pandoc.Error (handleError)
import Text.Pandoc.Filter.Plot.Monad
import Text.Pandoc.Filter.Plot.Parse
import qualified Text.Pandoc.Options as P
import qualified Text.Pandoc.Readers as P
import Text.Pandoc.Walk (Walkable, query)

-- | Clean all output related to pandoc-plot. This includes output directories specified

-- in the configuration and in the document/block, as well as log files.

-- Note that *all* files in pandoc-plot output directories will be removed.

--

-- The cleaned directories are returned.

cleanOutputDirs ::
  Walkable Block b =>
  Configuration ->
  b ->
  IO [FilePath]
cleanOutputDirs :: Configuration -> b -> IO [FilePath]
cleanOutputDirs Configuration
conf b
doc = do
  [FilePath]
dirs <- Configuration -> PlotM [FilePath] -> IO [FilePath]
forall a. Configuration -> PlotM a -> IO a
runPlotM Configuration
conf (PlotM [FilePath] -> IO [FilePath])
-> (b -> PlotM [FilePath]) -> b -> IO [FilePath]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. b -> PlotM [FilePath]
forall b. Walkable Block b => b -> PlotM [FilePath]
cleanOutputDirsM (b -> IO [FilePath]) -> b -> IO [FilePath]
forall a b. (a -> b) -> a -> b
$ b
doc
  -- Deletion of the log file must be done outside of PlotM

  -- to ensure the log file has been closed.

  case Configuration -> LogSink
logSink Configuration
conf of
    LogFile FilePath
path -> FilePath -> IO ()
removePathForcibly FilePath
path
    LogSink
_ -> () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  [FilePath] -> IO [FilePath]
forall (m :: * -> *) a. Monad m => a -> m a
return [FilePath]
dirs

-- | Analyze a document to determine where would the pandoc-plot output directories be.

outputDirs ::
  Walkable Block b =>
  b ->
  PlotM [FilePath]
outputDirs :: b -> PlotM [FilePath]
outputDirs =
  ([Maybe FilePath] -> [FilePath])
-> StateT PlotState (ReaderT RuntimeEnv IO) [Maybe FilePath]
-> PlotM [FilePath]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([FilePath] -> [FilePath]
forall a. Eq a => [a] -> [a]
nub ([FilePath] -> [FilePath])
-> ([Maybe FilePath] -> [FilePath])
-> [Maybe FilePath]
-> [FilePath]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe FilePath] -> [FilePath]
forall a. [Maybe a] -> [a]
catMaybes)
    (StateT PlotState (ReaderT RuntimeEnv IO) [Maybe FilePath]
 -> PlotM [FilePath])
-> (b -> StateT PlotState (ReaderT RuntimeEnv IO) [Maybe FilePath])
-> b
-> PlotM [FilePath]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)]
-> StateT PlotState (ReaderT RuntimeEnv IO) [Maybe FilePath]
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence
    ([StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)]
 -> StateT PlotState (ReaderT RuntimeEnv IO) [Maybe FilePath])
-> (b
    -> [StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)])
-> b
-> StateT PlotState (ReaderT RuntimeEnv IO) [Maybe FilePath]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Block
 -> [StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)])
-> b -> [StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)]
forall a b c. (Walkable a b, Monoid c) => (a -> c) -> b -> c
query (\Block
b -> [Block -> PlotM (Maybe FigureSpec)
parseFigureSpec Block
b PlotM (Maybe FigureSpec)
-> (Maybe FigureSpec
    -> StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath))
-> StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Maybe FilePath
-> StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe FilePath
 -> StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath))
-> (Maybe FigureSpec -> Maybe FilePath)
-> Maybe FigureSpec
-> StateT PlotState (ReaderT RuntimeEnv IO) (Maybe FilePath)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FigureSpec -> FilePath) -> Maybe FigureSpec -> Maybe FilePath
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap FigureSpec -> FilePath
directory])

-- PlotM version of @cleanOutputDirs@

cleanOutputDirsM ::
  Walkable Block b =>
  b ->
  PlotM [FilePath]
cleanOutputDirsM :: b -> PlotM [FilePath]
cleanOutputDirsM b
doc = do
  [FilePath]
directories <- b -> PlotM [FilePath]
forall b. Walkable Block b => b -> PlotM [FilePath]
outputDirs b
doc
  [FilePath]
-> (FilePath -> StateT PlotState (ReaderT RuntimeEnv IO) FilePath)
-> PlotM [FilePath]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [FilePath]
directories ((FilePath -> StateT PlotState (ReaderT RuntimeEnv IO) FilePath)
 -> PlotM [FilePath])
-> (FilePath -> StateT PlotState (ReaderT RuntimeEnv IO) FilePath)
-> PlotM [FilePath]
forall a b. (a -> b) -> a -> b
$ \FilePath
fp -> do
    Text -> PlotM ()
info (Text -> PlotM ()) -> Text -> PlotM ()
forall a b. (a -> b) -> a -> b
$ Text
"Removing directory " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> FilePath -> Text
pack FilePath
fp
    -- It is important to use `removePathForcibly` here, because it does

    -- not throw exceptions if the directory doesn't exist. This means

    -- we do not have to check in advance if directories are nested in our

    -- list of directories.

    IO () -> PlotM ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PlotM ()) -> IO () -> PlotM ()
forall a b. (a -> b) -> a -> b
$ FilePath -> IO ()
removePathForcibly FilePath
fp
    FilePath -> StateT PlotState (ReaderT RuntimeEnv IO) FilePath
forall (m :: * -> *) a. Monad m => a -> m a
return FilePath
fp

-- | Read a document, guessing what extensions and reader options are appropriate. If

-- the file cannot be read for any reason, an error is thrown.

readDoc :: FilePath -> IO Pandoc
readDoc :: FilePath -> IO Pandoc
readDoc FilePath
fp =
  Either PandocError Pandoc -> IO Pandoc
forall a. Either PandocError a -> IO a
handleError
    (Either PandocError Pandoc -> IO Pandoc)
-> IO (Either PandocError Pandoc) -> IO Pandoc
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ( PandocIO Pandoc -> IO (Either PandocError Pandoc)
forall a. PandocIO a -> IO (Either PandocError a)
runIO (PandocIO Pandoc -> IO (Either PandocError Pandoc))
-> PandocIO Pandoc -> IO (Either PandocError Pandoc)
forall a b. (a -> b) -> a -> b
$ do
            let fmt :: Text
fmt = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
forall a. Monoid a => a
mempty (FilePath -> Maybe Text
formatFromFilePath FilePath
fp)
            (Reader PandocIO
reader, Extensions
exts) <- Text -> PandocIO (Reader PandocIO, Extensions)
forall (m :: * -> *).
PandocMonad m =>
Text -> m (Reader m, Extensions)
P.getReader Text
fmt
            let readerOpts :: ReaderOptions
readerOpts = ReaderOptions
forall a. Default a => a
def {readerExtensions :: Extensions
P.readerExtensions = Extensions
exts}
            case Reader PandocIO
reader of
              P.TextReader ReaderOptions -> Text -> PandocIO Pandoc
fct -> do
                Text
t <- IO Text -> PandocIO Text
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Text -> PandocIO Text) -> IO Text -> PandocIO Text
forall a b. (a -> b) -> a -> b
$ FilePath -> IO Text
Text.readFile FilePath
fp
                ReaderOptions -> Text -> PandocIO Pandoc
fct ReaderOptions
readerOpts Text
t
              P.ByteStringReader ReaderOptions -> ByteString -> PandocIO Pandoc
bst -> do
                ByteString
b <- IO ByteString -> PandocIO ByteString
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> PandocIO ByteString)
-> IO ByteString -> PandocIO ByteString
forall a b. (a -> b) -> a -> b
$ FilePath -> IO ByteString
B.readFile FilePath
fp
                ReaderOptions -> ByteString -> PandocIO Pandoc
bst ReaderOptions
readerOpts ByteString
b
        )

-- Determine format based on file extension

-- Note : this is exactly the heuristic used by pandoc here:

-- https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs

--

-- However, this is not exported, so I must re-define it here.

formatFromFilePath :: FilePath -> Maybe Text
formatFromFilePath :: FilePath -> Maybe Text
formatFromFilePath FilePath
x =
  case FilePath -> FilePath
takeExtension ((Char -> Char) -> FilePath -> FilePath
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower FilePath
x) of
    FilePath
".adoc" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"asciidoc"
    FilePath
".asciidoc" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"asciidoc"
    FilePath
".context" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"context"
    FilePath
".ctx" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"context"
    FilePath
".db" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"docbook"
    FilePath
".doc" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"doc" -- so we get an "unknown reader" error

    FilePath
".docx" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"docx"
    FilePath
".dokuwiki" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"dokuwiki"
    FilePath
".epub" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"epub"
    FilePath
".fb2" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"fb2"
    FilePath
".htm" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"html"
    FilePath
".html" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"html"
    FilePath
".icml" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"icml"
    FilePath
".json" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"json"
    FilePath
".latex" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"latex"
    FilePath
".lhs" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"markdown+lhs"
    FilePath
".ltx" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"latex"
    FilePath
".markdown" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"markdown"
    FilePath
".md" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"markdown"
    FilePath
".ms" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"ms"
    FilePath
".muse" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"muse"
    FilePath
".native" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"native"
    FilePath
".odt" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"odt"
    FilePath
".opml" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"opml"
    FilePath
".org" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"org"
    FilePath
".pdf" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"pdf" -- so we get an "unknown reader" error

    FilePath
".pptx" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"pptx"
    FilePath
".roff" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"ms"
    FilePath
".rst" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"rst"
    FilePath
".rtf" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"rtf"
    FilePath
".s5" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"s5"
    FilePath
".t2t" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"t2t"
    FilePath
".tei" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"tei"
    FilePath
".tei.xml" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"tei"
    FilePath
".tex" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"latex"
    FilePath
".texi" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"texinfo"
    FilePath
".texinfo" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"texinfo"
    FilePath
".text" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"markdown"
    FilePath
".textile" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"textile"
    FilePath
".txt" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"markdown"
    FilePath
".wiki" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"mediawiki"
    FilePath
".xhtml" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"html"
    FilePath
".ipynb" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"ipynb"
    FilePath
".csv" -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"csv"
    [Char
'.', Char
y] | Char
y Char -> FilePath -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'1' .. Char
'9'] -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"man"
    FilePath
_ -> Maybe Text
forall a. Maybe a
Nothing