module Text.FromHTML
( fromHTML
, ExportType(..)
) where
import qualified Data.Char as C
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.ByteString as B
import GHC.IO.Handle
import System.Process
import System.IO.Unsafe
data ExportType = HTML
| LaTeX
| RTF
| RST
| Markdown
| AsciiDoc
| Docx
| ODT
| DokuWiki
| MediaWiki
| EPUB2
| EPUB3
| PDF
deriving (Show, Read, Enum, Bounded, Eq)
eitherToMaybe :: Show a => Either a b -> Maybe b
eitherToMaybe (Right x) = Just x
eitherToMaybe _ = Nothing
str2BS :: String -> B.ByteString
str2BS = E.encodeUtf8 . T.pack
fromHTML :: ExportType -> String -> Maybe B.ByteString
fromHTML HTML html = Just . str2BS $ html
fromHTML PDF html = makePDF (str2BS html)
fromHTML extp html = makePD extp (str2BS html)
type Input = B.ByteString
type Output = B.ByteString
type Command = Input -> IO (Maybe Output)
type Process = IO (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle)
makePDF :: Input -> Maybe Output
makePDF html = unsafePerformIO $ wkhtmltopdf html
makePD :: ExportType -> Input -> Maybe Output
makePD expt html = unsafePerformIO $ pandoc expt html
wkhtmltopdf :: Input -> IO (Maybe Output)
wkhtmltopdf = perform cprocess
where
opts = ["--quiet", "--encoding", "utf-8", "-", "-"]
cprocess = procWith $ proc "wkhtmltopdf" opts
pandoc :: ExportType -> Input -> IO (Maybe Output)
pandoc expt = perform cprocess
where
format = exportType2PD expt
opts = ["-s", "-f", "html", "-t", format, "-o", "-"]
cprocess = procWith $ proc "pandoc" opts
perform :: CreateProcess -> Input -> IO (Maybe Output)
perform cprocess input = do
(Just stdin, Just stdout, Just stderr, _) <- createProcess cprocess
B.hPutStr stdin input >> hClose stdin
errors <- B.hGetContents stderr
case errors of
"" -> Just <$> B.hGetContents stdout
_ -> return Nothing
procWith p = p { std_out = CreatePipe
, std_in = CreatePipe
, std_err = CreatePipe
}
exportType2PD :: ExportType -> String
exportType2PD = map C.toLower . show