module Text.FromHTML
( fromHTML
, ExportType(..)
) where
import qualified Data.Char as C
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.ByteString as B
import Control.Exception
import GHC.IO.Encoding
import System.Exit
import System.Process.ByteString
data ExportType = HTML
| LaTeX
| RTF
| RST
| Markdown
| AsciiDoc
| Docx
| ODT
| DokuWiki
| MediaWiki
| EPUB2
| EPUB3
| PDF
deriving (Show, Read, Enum, Bounded, Eq)
type Input = B.ByteString
type Output = B.ByteString
type Command = Input -> IO (Either Output Output)
str2BS :: String -> B.ByteString
str2BS = E.encodeUtf8 . T.pack
fromHTML :: ExportType -> String -> IO (Either Output Output)
fromHTML HTML html = return $ Right (str2BS html)
fromHTML PDF html = wkhtmltopdf (str2BS html)
fromHTML extp html = pandoc extp (str2BS html)
wkhtmltopdf :: Command
wkhtmltopdf = perform "wkhtmltopdf" ["--quiet", "--disable-smart-shrinking",
"--footer-center", "[page]",
"--footer-font-name", "\"Noto Serif\"",
"--footer-spacing", "10",
"--footer-font-size", "10",
"-B", "25mm", "-L", "25mm", "-R", "25mm", "-T", "25mm",
"--encoding", "utf-8", "-", "-"]
pandoc :: ExportType -> Command
pandoc expt = perform "pandoc" args
where
format = exportType2PD expt
args = ["-s", "-f", "html", "-t", format, "-o", "-"]
perform :: String -> [String] -> Command
perform cmd args input = catch (performUnsafe cmd args input)
(\e -> do let err = show (e :: SomeException)
return . Left $ "Exception: " <> str2BS err)
performUnsafe :: String -> [String] -> Command
performUnsafe cmd args input = do
setLocaleEncoding utf8
(exitCode, stdout, stderr) <- readProcessWithExitCode cmd args input
case exitCode of
ExitSuccess -> return $ Right stdout
_ -> return . Left $ str2BS (show exitCode) <> ": " <> stderr
exportType2PD :: ExportType -> String
exportType2PD = map C.toLower . show