-- | Program to replace HTML tags by whitespace -- -- This program was originally contributed by Petr Prokhorenkov. -- -- Tested in this benchmark: -- -- * Reading the file -- -- * Replacing text between HTML tags (<>) with whitespace -- -- * Writing back to a handle -- {-# OPTIONS_GHC -fspec-constr-count=5 #-} module Benchmarks.Programs.StripTags ( benchmark ) where import Test.Tasty.Bench (Benchmark, bgroup, bench, whnfIO) import System.IO (Handle) import qualified Data.ByteString as B import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.IO as T benchmark :: FilePath -> Handle -> Benchmark benchmark i o = bgroup "StripTags" [ bench "Text" $ whnfIO $ T.readFile i >>= T.hPutStr o . text , bench "TextByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . T.encodeUtf8 . text . T.decodeUtf8 ] text :: T.Text -> T.Text text = snd . T.mapAccumL step 0 step :: Int -> Char -> (Int, Char) step d c | d > 0 || d' > 0 = (d', ' ') | otherwise = (d', c) where d' = d + depth c depth '>' = 1 depth '<' = -1 depth _ = 0