{-| Module : Text.FromHTML Description : Simple library for transformation of HTML to other formats Copyright : (c) Marek Suchánek, 2018 License : MIT Maintainer : marek.suchanek@fit.cvut.cz Stability : experimental Portability : POSIX Simplified API for transformation of HTML to other formats with Pandoc and wkhtmltopdf in Haskell code. It requires @wkhtmltopdf@ and @pandoc@ to be installed locally. -} module Text.FromHTML ( fromHTML , ExportType(..) ) where -- import Debug.Trace import qualified Data.Char as C import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified Data.ByteString as B import GHC.IO.Handle import System.Process import System.IO.Unsafe -- | Allowed export types data ExportType = HTML | LaTeX | RTF | RST | Markdown | AsciiDoc | Docx | ODT | DokuWiki | MediaWiki | EPUB2 | EPUB3 | PDF deriving (Show, Read, Enum, Bounded, Eq) -- | Helper function to translate Either to Maybe eitherToMaybe :: Show a => Either a b -> Maybe b eitherToMaybe (Right x) = Just x eitherToMaybe _ = Nothing -- Variant for debugging -- eitherToMaybe :: Show a => Either a b -> Maybe b -- eitherToMaybe (Right x) = Just x -- eitherToMaybe (Left x) = traceShow x Nothing str2BS :: String -> B.ByteString str2BS = E.encodeUtf8 . T.pack -- | Transform given HTML as String to selected format fromHTML :: ExportType -> String -> Maybe B.ByteString fromHTML HTML html = Just . str2BS $ html -- HTML is already provided! fromHTML PDF html = makePDF (str2BS html) fromHTML extp html = makePD extp (str2BS html) type Input = B.ByteString type Output = B.ByteString type Command = Input -> IO (Maybe Output) type Process = IO (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) makePDF :: Input -> Maybe Output makePDF html = unsafePerformIO $ wkhtmltopdf html makePD :: ExportType -> Input -> Maybe Output makePD expt html = unsafePerformIO $ pandoc expt html -- | Simple conversion of HTML to PDF using process wkhtmltopdf wkhtmltopdf :: Input -> IO (Maybe Output) wkhtmltopdf = perform cprocess where opts = ["--quiet", "--encoding", "utf-8", "-", "-"] cprocess = procWith $ proc "wkhtmltopdf" opts -- | Simple conversion of HTML to PDF using process wkhtmltopdf pandoc :: ExportType -> Input -> IO (Maybe Output) pandoc expt = perform cprocess where format = exportType2PD expt opts = ["-s", "-f", "html", "-t", format, "-o", "-"] cprocess = procWith $ proc "pandoc" opts perform :: CreateProcess -> Input -> IO (Maybe Output) perform cprocess input = do (Just stdin, Just stdout, Just stderr, _) <- createProcess cprocess B.hPutStr stdin input >> hClose stdin errors <- B.hGetContents stderr case errors of "" -> Just <$> B.hGetContents stdout _ -> return Nothing procWith p = p { std_out = CreatePipe , std_in = CreatePipe , std_err = CreatePipe } exportType2PD :: ExportType -> String exportType2PD = map C.toLower . show