{-|
Module      : Text.FromHTML
Description : Simple library for transformation of HTML to other formats
Copyright   : (c) Marek Suchánek, 2018
License     : MIT
Maintainer  : marek.suchanek@fit.cvut.cz
Stability   : experimental
Portability : POSIX

Simplified API for transformation of HTML to other formats with Pandoc
and wkhtmltopdf in Haskell code. It requires @wkhtmltopdf@ installed
locally (see <https://wkhtmltopdf.org wkhtmltopdf.org>).
-}
module Text.FromHTML
   ( fromHTML
   , ExportType(..)
   ) where

-- import Debug.Trace

import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL

import qualified Text.Pandoc as Pandoc
import qualified Text.Pandoc.Templates as PandocTemplates
import qualified Text.Pandoc.Writers as PandocWriters
import qualified Text.Pandoc.Error as PandocError
import qualified Text.Pandoc.PDF as PandocPDF

import           GHC.IO.Handle
import           System.Process
import           System.IO.Unsafe

-- | Allowed export types
data ExportType = HTML
        | LaTeX
        | RTF
        | RST
        | Markdown
        | AsciiDoc
        | Docx
        | ODT
        | DokuWiki
        | MediaWiki
        | EPUB2
        | EPUB3
        | PDF
        deriving (Show, Read, Enum, Bounded, Eq)

-- | Type alias for Pure Pandoc writer
type Writer = (Pandoc.WriterOptions -> Pandoc.Pandoc -> Pandoc.PandocPure B.ByteString)


-- | Helper function to translate Either to Maybe
eitherToMaybe :: Show a => Either a b -> Maybe b
eitherToMaybe (Right x) = Just x
eitherToMaybe _ = Nothing

-- Variant for debugging
-- eitherToMaybe :: Show a => Either a b -> Maybe b
-- eitherToMaybe (Right x) = Just x
-- eitherToMaybe (Left x) = traceShow x Nothing

readerOptions = Pandoc.def { Pandoc.readerStandalone = True }
writerOptions = Pandoc.def

-- | Transform given HTML as String to selected format
fromHTML :: ExportType -> String -> Maybe B.ByteString
fromHTML HTML html = Just . E.encodeUtf8 . T.pack $ html  -- HTML is already provided!
fromHTML PDF html = writerHTML2PDF html
fromHTML extp html = case html2pd html of
                       Just pd -> eitherToMaybe . Pandoc.runPure $ runWriter extp pd
                       Nothing -> Nothing


runWriter :: ExportType -> Pandoc.Pandoc -> Pandoc.PandocPure B.ByteString
runWriter extp pd = do
  template <- getTemplate extp
  let opts = writerOptions { Pandoc.writerTemplate = template }
  writer extp opts pd


getTemplate :: ExportType -> Pandoc.PandocPure (Maybe String)
getTemplate HTML = Just <$> PandocTemplates.getDefaultTemplate "html5"
getTemplate LaTeX = Just <$> PandocTemplates.getDefaultTemplate "latex"
getTemplate RTF = Just <$> PandocTemplates.getDefaultTemplate "rtf"
getTemplate RST = Just <$> PandocTemplates.getDefaultTemplate "rst"
getTemplate Markdown = Just <$> PandocTemplates.getDefaultTemplate "markdown"
getTemplate AsciiDoc = Just <$> PandocTemplates.getDefaultTemplate ""
getTemplate Docx = Just <$> PandocTemplates.getDefaultTemplate "docx"
getTemplate ODT = Just <$> PandocTemplates.getDefaultTemplate "odt"
getTemplate DokuWiki = Just <$> PandocTemplates.getDefaultTemplate "dokuwiki"
getTemplate MediaWiki = Just <$> PandocTemplates.getDefaultTemplate "mediawiki"
getTemplate EPUB2 = Just <$> PandocTemplates.getDefaultTemplate "epub2"
getTemplate EPUB3 = Just <$> PandocTemplates.getDefaultTemplate "epub3"
getTemplate _   = return Nothing


html2pd :: String -> Maybe Pandoc.Pandoc
html2pd html = eitherToMaybe . Pandoc.runPure $ Pandoc.readHtml readerOptions (T.pack html)

-- | Ugly PDF writer from HTML
-- writerHTML2PDF opts pd = fixError . unsafePerformIO . Pandoc.runIO $ PandocPDF.makePDF "wkhtmltopdf" ["--quiet"] PandocWriters.writeHtml5String opts pd
--   where
--     fixError (Left pderr) = Left pderr
--     fixError (Right (Left bserr)) = Left . PandocError.PandocSomeError . T.unpack . E.decodeUtf8 . BL.toStrict $ bserr
--     fixError (Right (Right x)) = Right (BL.toStrict x)

-- | Wrapping HTML to PDF conversion which is unsafe
writerHTML2PDF :: String -> Maybe B.ByteString
writerHTML2PDF = Just . unsafePerformIO . html2pdf

-- | Simple conversion of HTML to PDF using process wkhtmltopdf
html2pdf :: String -> IO B.ByteString
html2pdf html = do
    (Just stdin, Just stdout, _, _) <- createProcess cprocess
    hPutStr stdin html >> hClose stdin
    B.hGetContents stdout
    where
        procWith p = p { std_out = CreatePipe
                       , std_in  = CreatePipe
                       }
        opts = ["--quiet", "--encoding", "utf-8", "-", "-"]
        cprocess = procWith $ proc "wkhtmltopdf" opts

-- | Select Writer based on given ExportType
writer :: ExportType -> Writer
writer = wrapWriter . pandocWriter
  where
    wrapWriter :: Pandoc.Writer Pandoc.PandocPure -> Writer
    wrapWriter (Pandoc.TextWriter        tw) = \opts pd -> E.encodeUtf8 <$> tw opts pd
    wrapWriter (Pandoc.ByteStringWriter bsw) = \opts pd -> BL.toStrict <$> bsw opts pd


-- | Pick Pandoc writer for pure transformation
pandocWriter :: ExportType -> Pandoc.Writer Pandoc.PandocPure
pandocWriter HTML      = Pandoc.TextWriter PandocWriters.writeHtml5String
pandocWriter LaTeX     = Pandoc.TextWriter PandocWriters.writeLaTeX
pandocWriter RTF       = Pandoc.TextWriter PandocWriters.writeRTF
pandocWriter RST       = Pandoc.TextWriter PandocWriters.writeRST
pandocWriter Markdown  = Pandoc.TextWriter PandocWriters.writeMarkdown
pandocWriter AsciiDoc  = Pandoc.TextWriter PandocWriters.writeAsciiDoc
pandocWriter DokuWiki  = Pandoc.TextWriter PandocWriters.writeDokuWiki
pandocWriter MediaWiki = Pandoc.TextWriter PandocWriters.writeMediaWiki
pandocWriter Docx      = Pandoc.ByteStringWriter PandocWriters.writeDocx
pandocWriter ODT       = Pandoc.ByteStringWriter PandocWriters.writeODT
pandocWriter EPUB2     = Pandoc.ByteStringWriter PandocWriters.writeEPUB2
pandocWriter EPUB3     = Pandoc.ByteStringWriter PandocWriters.writeEPUB3
pandocWriter PDF       = pandocWriter HTML -- cannot be done as PandocPure