module TagSoup.Benchmark where
import Text.HTML.TagSoup
import Control.DeepSeq
import Control.Monad
import Data.List
import Data.Maybe
import Data.Char
import System.CPUTime
import System.IO
import System.IO.Unsafe(unsafeInterleaveIO)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as LBS
import Data.Time.Clock.POSIX(getPOSIXTime)
conf = 0.95
timefile :: FilePath -> IO ()
timefile file = do
-- use LBS to be most representative of real life
lbs <- LBS.readFile file
let str = LBS.unpack lbs
bs = BS.concat $ LBS.toChunks lbs
() <- LBS.length lbs `seq` length str `seq` BS.length bs `seq` return ()
benchWith (const str, const bs, const lbs) $ benchStatic (toInteger $ LBS.length lbs)
sample :: String
sample = " is " ++
" and some just random & test ><"
nsample = genericLength sample :: Integer
time :: IO ()
time = benchWith (str,bs,lbs) benchVariable
where
str = \i -> concat $ genericReplicate i sample
bs = let s = BS.pack sample in \i -> BS.concat (genericReplicate i s)
lbs = let s = LBS.pack sample in \i -> LBS.concat (genericReplicate i s)
benchWith :: (Integer -> String, Integer -> BS.ByteString, Integer -> LBS.ByteString)
-> ((Integer -> ()) -> IO [String]) -> IO ()
benchWith (str,bs,lbs) bench = do
putStrLn "Timing parseTags in characters/second"
let header = map (:[]) ["(" ++ show (round $ conf * 100) ++ "% confidence)","String","BS","LBS"]
rows <- mapM row $ replicateM 3 [False,True]
mapM_ (putStrLn . strict . grid) $ delay2 $ header : rows
where
row [a,b,c] = do
let header = intercalate "," [g a "pos", g b "warn", g c "merge"]
g b x = (if b then ' ' else '!') : x
f x = bench $ \i -> rnf $ parseTagsOptions parseOptions{optTagPosition=a,optTagWarning=b,optTagTextMerge=c} $ x i
c1 <- f str
c2 <- f bs
c3 <- f lbs
return [[header],c1,c2,c3]
strict = reverse . reverse
---------------------------------------------------------------------
-- BENCHMARK ON THE SAMPLE INPUT
disp xs = showUnit (floor xbar) ++ " (~" ++ rng ++ "%)"
where xbar = mean xs
rng = if length xs <= 1 then "?" else show (ceiling $ (range conf xs) * 100 / xbar)
cons x = fmap (x:)
aimTime = 0.3 :: Double -- seconds to aim for
minTime = 0.2 :: Double -- below this a test is considered invalid
-- given a number of times to repeat sample, return a list of what
-- to display
benchVariable :: (Integer -> ()) -> IO [String]
benchVariable op = cons "?" $ f 10 []
where
f i seen | length seen > 9 = cons (" " ++ disp seen) $ return []
| otherwise = unsafeInterleaveIO $ do
now <- timer $ op i
let cps = if now == 0 then 0 else fromInteger (i * nsample) / now
if now < minTime || (null seen && now < aimTime) then do
let factor = min 7 $ max 2 $ floor $ aimTime / now
cons ("? " ++ disp [cps]) $ f (i * factor) []
else
cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f i (cps:seen)
benchStatic :: Integer -> (Integer -> ()) -> IO [String]
benchStatic nsample op = cons "?" $ f []
where
f seen | length seen > 9 = cons (" " ++ disp seen) $ return []
| otherwise = unsafeInterleaveIO $ do
now <- timer $ op $ genericLength seen
let cps = if now == 0 then 0 else fromInteger nsample / now
cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f (cps:seen)
---------------------------------------------------------------------
-- UTILITY FUNCTIONS
-- | Given a number, show it using a unit and decimal place
showUnit :: Integer -> String
showUnit x = num ++ unit
where
units = " KMGTPEZY"
(use,skip) = splitAt 3 $ show x
unit = [units !! ((length skip + 2) `div` 3)]
dot = ((length skip - 1) `mod` 3) + 1
num = a ++ ['.' | b /= ""] ++ b
where (a,b) = splitAt dot use
-- copied from the criterion package
getTime :: IO Double
getTime = (fromRational . toRational) `fmap` getPOSIXTime
timer :: () -> IO Double
timer x = do
start <- getTime
() <- return x
end <- getTime
return $ end - start
-- display a grid
grid :: [[String]] -> String
grid xs = unlines $ map (concat . zipWith f cols) xs
where cols = map (maximum . map length) $ transpose xs
f n x = x ++ replicate (n+1 - length x) ' '
-- display a series of grids over time
-- when a grid gets to [] keep its value at that
-- when all grids get to [] return []
delay2 :: [[[String]]] -> [[[String]]]
delay2 xs = map (map head) xs : (if all (null . tail) (concat xs) then [] else delay2 $ map (map tl) xs)
where tl (x:xs) = if null xs then x:xs else xs
---------------------------------------------------------------------
-- INSTANCES
instance NFData a => NFData (Tag a) where
rnf (TagOpen x y) = rnf x `seq` rnf y
rnf (TagClose x) = rnf x
rnf (TagText x) = rnf x
rnf (TagComment x) = rnf x
rnf (TagWarning x) = rnf x
rnf (TagPosition x y) = () -- both are already ! bound
instance NFData LBS.ByteString where
rnf x = LBS.length x `seq` ()
instance NFData BS.ByteString where
rnf x = BS.length x `seq` ()
---------------------------------------------------------------------
-- STATISTICS
-- Provided by Emily Mitchell
confNs = let (*) = (,) in
[0.95 * 1.96
,0.90 * 1.644]
size :: [Double] -> Double
size = genericLength
mean :: [Double] -> Double
mean xs = sum xs / size xs
stddev :: [Double] -> Double
stddev xs = sqrt $ sum [sqr (x - xbar) | x <- xs] / size xs
where xbar = mean xs
sqr x = x * x
-- given a sample, and a required confidence
-- of the mean (i.e. 2.5% = 0.025)
range ::Double -> [Double] -> Double
range conf xs = conf2 * stddev xs / sqrt (size xs)
where conf2 = fromMaybe (error $ "Unknown confidence interval: " ++ show conf) $ lookup conf confNs