{- |
Module for tidying Malformed html using libtidy(see README).
-}
module HScraper.Tidy (
tidy
) where
import Data.Text as T
import Data.Text.IO as TIO
import System.Process
import System.Directory(getCurrentDirectory)
-- | Takes Malformed html and reuturns correct html if it can
-- be corrected. Output is empty if it cannot be corrected.
tidy :: T.Text -> IO T.Text
tidy t = do
pwd <- System.Directory.getCurrentDirectory
let tempFile = pwd ++ "/hscraper_temp.html"
TIO.writeFile tempFile t
(_,Just hout,_,_) <- createProcess (proc "tidy" ["-q","-f", "/home/nis/hscraper_webpages.logs", tempFile]){ std_out = CreatePipe }
TIO.hGetContents hout