{-# LANGUAGE CPP #-} module Unicode.Char.Bench ( -- Range benchmark benchRange -- Char benchmark , Bench(..) , CharRange(..) , bgroupWithValidCharRange , bgroupWithValidCharRange' , bgroupWithCharRange , bgroupWithCharRange' , bgroupWithChars ) where import Control.DeepSeq (NFData (..), deepseq) import Control.Exception (evaluate, assert) import Data.Char (ord) import qualified Data.Char as Char import Foreign (Storable (..)) import qualified GHC.Exts as Exts import GHC.IO (IO (..)) import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, env, nf) import Test.Tasty.Options ( IsOption (defaultValue, optionHelp, optionName, parseValue), ) #if MIN_VERSION_base(4,10,0) && !MIN_VERSION_base(4,15,0) import qualified GHC.Magic as Exts (noinline) #endif import qualified Unicode.Char.General as G import Data.Ix (Ix (..)) -------------------------------------------------------------------------------- -- Range benchmark -------------------------------------------------------------------------------- {-# INLINE benchRange #-} benchRange :: forall a b. (Bounded a, Ix a, NFData b) => String -> (a -> b) -> Benchmark benchRange t f = bench t (nf (fold_ f) (minBound, maxBound)) {-# INLINE fold_ #-} fold_ :: forall a b. (Ix a, NFData b) => (a -> b) -> (a, a) -> () fold_ f = foldr (deepseq . f) () . range -------------------------------------------------------------------------------- -- Char range -------------------------------------------------------------------------------- -- | Characters range data CharRange = CharRange !Char !Char -- | Characters range configurable from CLI instance IsOption CharRange where defaultValue = CharRange minBound maxBound parseValue = \case "ascii" -> Just (CharRange minBound '\x7f') "bmp" -> Just (CharRange minBound '\xffff') "planes0To3" -> Just (CharRange minBound '\x3FFFF') -- [TODO] handle errors s -> let (l, u) = break (== '-') s in Just (CharRange (Char.chr (read l)) (Char.chr (read (drop 1 u)))) optionName = pure "chars" optionHelp = pure "Range of chars to test" -------------------------------------------------------------------------------- -- Characters benchmark -------------------------------------------------------------------------------- -- | A unit benchmark data Bench a = Bench { _title :: !String -- ^ Name , _func :: !(Char -> a) -- ^ Function to benchmark } -- | Helper to compare benchmarks of function from this package to ones in base. {-# INLINE bgroupWithValidCharRange #-} bgroupWithValidCharRange :: String -> CharRange -> (Char -> Bool) -> (Chars -> [Benchmark]) -> Benchmark bgroupWithValidCharRange groupTitle charRange isValid mkBenches = -- Avoid side-effects with garbage collection (see tasty-bench doc for env). -- We use pinned ByteArray# instead of lists to avoid that GC kicks in. env (initialize isValid charRange >>= evaluate) (bgroup groupTitle . mkBenches) -- | Helper to compare benchmarks of function from this package to ones in base. -- Filter out Surrogates, Private Use Areas and unsassigned code points. {-# INLINE bgroupWithCharRange #-} bgroupWithCharRange :: String -> CharRange -> (Chars -> [Benchmark]) -> Benchmark bgroupWithCharRange title charRange = bgroupWithValidCharRange title charRange isValid where isValid c = G.generalCategory c < G.Surrogate -- | Variant of 'bgroupWithValidCharRange' {-# INLINE bgroupWithValidCharRange' #-} bgroupWithValidCharRange' :: (NFData a) => String -> CharRange -> (Char -> Bool) -> [Bench a] -> Benchmark bgroupWithValidCharRange' groupTitle charRange isValid bs = bgroupWithValidCharRange groupTitle charRange isValid $ \chars -> [ benchCharsRange groupTitle title chars f | Bench title f <- bs ] {-# INLINE benchCharsRange #-} benchCharsRange :: NFData a => String -> String -> Chars -> (Char -> a) -> Benchmark benchCharsRange groupTitle title chars = case title of "base" -> benchCharsNF title chars _ -> bcompare' "base" . benchCharsNF title chars where {-# INLINE bcompare' #-} -- [NOTE] Works if groupTitle uniquely identifies the benchmark group. bcompare' ref = bcompare (mconcat ["$NF == \"", ref, "\" && $(NF-1) == \"", groupTitle, "\""]) -- | Variant of 'bgroupWithCharRange' {-# INLINE bgroupWithCharRange' #-} bgroupWithCharRange' :: (NFData a) => String -> CharRange -> [Bench a] -> Benchmark bgroupWithCharRange' groupTitle charRange = bgroupWithValidCharRange' groupTitle charRange isValid where isValid c = G.generalCategory c < G.Surrogate -- | Helper to compare benchmarks of function from this package to ones in base. {-# INLINE bgroupWithChars #-} bgroupWithChars :: (NFData a) => String -> Chars -> [Bench a] -> Benchmark bgroupWithChars groupTitle chars bs = bgroup groupTitle [ benchCharsRange groupTitle title chars f | Bench title f <- bs ] -- | Helper to bench a char function on a filtered char range {-# INLINE benchCharsNF #-} benchCharsNF :: (NFData a) => String -> Chars -> (Char -> a) -> Benchmark benchCharsNF title chars f = bench title (nf (foldrChars f) chars) -------------------------------------------------------------------------------- -- Chars byte array -------------------------------------------------------------------------------- -- | Pinned array of characters data Chars = Chars !Exts.ByteArray# !Int instance NFData Chars where rnf (Chars !_ !_) = () -- | Fold over a chars byte array foldrChars :: NFData a => (Char -> a) -> Chars -> () foldrChars f = go where -- Loop over the pinned char array. The loop itself does not allocate. go (Chars cs len) = foldr (\(Exts.I# k) -> let c = Exts.indexWideCharArray# cs (k Exts.-# 1#) #if MIN_VERSION_base(4,10,0) -- `inline` is necessary to avoid excessive inlining, resulting -- in benchmarking empty loop iterations, i.e. not the function. -- We could use `inline` with more care at call site, but then we -- would have to test the functions one by one and everytime we -- modify them. Using it here is a hammer but more secure and -- maintainable. -- Note that we may improve this by controling the inlining for each -- phase. in deepseq (Exts.noinline f (Exts.C# c))) #else -- HACK: No `inline` for GHC < 8.2. Should we drop support? in deepseq (f (Exts.C# c))) #endif () [1..len] -- | Create a byte array of the chars to bench initialize :: (Char -> Bool) -> CharRange -> IO Chars initialize isValid charRange = IO $ \s1 -> case Exts.newPinnedByteArray# initialLength s1 of { (# s2, ma #) -> -- Write the filtered char range case writeChars isValid ma 0# s2 start end of { (# s3, filteredCount #) -> -- Duplicate to get enough chars to bench case tile ma 0# finalLength filteredLength s3 of { s4 -> case Exts.unsafeFreezeByteArray# ma s4 of { (# s5, a #) -> (# s5, Chars a (Exts.I# (replications Exts.*# filteredCount)) #) }} where -- Ensure to have enough chars replications = case Exts.quotInt# targetCharsCount filteredCount of 0# -> 1# r# -> r# filteredLength = filteredCount Exts.*# wcharSize finalLength = filteredLength Exts.*# replications }} where targetCharsCount = 0x10FFFF# !(CharRange start end) = assert (ord end - ord start + 1 < Exts.I# targetCharsCount) charRange !initialLength = targetCharsCount Exts.*# wcharSize !(Exts.I# wcharSize) = sizeOf 'x' -- | Write a range of chars that match the given predicate writeChars :: (Char -> Bool) -> Exts.MutableByteArray# d -> Exts.Int# -> Exts.State# d -> Char -> Char -> (# Exts.State# d, Exts.Int# #) writeChars isValid ma = go where go i s c1@(Exts.C# c1#) !c2 = if c1 < c2 then go i' s' (succ c1) c2 else (# s', i' #) where !(# s', i' #) = if isValid c1 then (# Exts.writeWideCharArray# ma i c1# s, i Exts.+# 1# #) else (# s, i #) -- | Duplicate a portion of an array -- -- Adapted from Data.Text.Array.tile tile :: -- | Mutable array Exts.MutableByteArray# s -> -- | Start of the portion to duplicate Exts.Int# -> -- | Total length of the duplicate Exts.Int# -> -- | Length of the portion to duplicate Exts.Int# -> Exts.State# s -> Exts.State# s tile dest destOff totalLen = go where go l s | Exts.isTrue# ((2# Exts.*# l) Exts.># totalLen) = Exts.copyMutableByteArray# dest destOff dest (destOff Exts.+# l) (totalLen Exts.-# l) s | otherwise = case Exts.copyMutableByteArray# dest destOff dest (destOff Exts.+# l) l s of s' -> go (2# Exts.*# l) s'