-- | -- Module : Streamly.Benchmark.FileSystem.Handle -- Copyright : (c) 2019 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com -- Stability : experimental -- Portability : GHC {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} #ifdef __HADDOCK_VERSION__ #undef INSPECTION #endif #ifdef INSPECTION {-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} #endif module Handle.Read (allBenchmarks) where import Data.Word (Word8) import GHC.Magic (inline) #if __GLASGOW_HASKELL__ >= 802 import GHC.Magic (noinline) #else #define noinline #endif import System.IO (Handle) import qualified Streamly.FileSystem.Handle as FH import qualified Streamly.Internal.Data.Array.Foreign as A import qualified Streamly.Internal.Data.Array.Foreign.Type as AT import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Stream.IsStream as IP import qualified Streamly.Internal.FileSystem.Handle as IFH import qualified Streamly.Internal.Unicode.Stream as IUS import qualified Streamly.Prelude as S import qualified Streamly.Unicode.Stream as SS import Gauge hiding (env) import Prelude hiding (last, length) import Streamly.Benchmark.Common.Handle #ifdef INSPECTION import Streamly.Internal.Data.Stream.StreamD.Type (Step(..), FoldMany) import qualified Streamly.Internal.Data.Array.Foreign.Mut.Type as MA import qualified Streamly.Internal.Data.Stream.StreamD as D import qualified Streamly.Internal.Data.Unfold as IUF import Test.Inspection #endif -- TBD reading with unfold ------------------------------------------------------------------------------- -- unfold read ------------------------------------------------------------------------------- -- | Get the last byte from a file bytestream. readLast :: Handle -> IO (Maybe Word8) readLast = S.last . S.unfold FH.read #ifdef INSPECTION inspect $ hasNoTypeClasses 'readLast inspect $ 'readLast `hasNoType` ''Step -- S.unfold inspect $ 'readLast `hasNoType` ''IUF.ConcatState -- FH.read/UF.many inspect $ 'readLast `hasNoType` ''MA.ReadUState -- FH.read/A.read #endif -- assert that flattenArrays constructors are not present -- | Count the number of bytes in a file. readCountBytes :: Handle -> IO Int readCountBytes = S.length . S.unfold FH.read #ifdef INSPECTION inspect $ hasNoTypeClasses 'readCountBytes inspect $ 'readCountBytes `hasNoType` ''Step -- S.unfold inspect $ 'readCountBytes `hasNoType` ''IUF.ConcatState -- FH.read/UF.many inspect $ 'readCountBytes `hasNoType` ''MA.ReadUState -- FH.read/A.read #endif -- | Count the number of lines in a file. readCountLines :: Handle -> IO Int readCountLines = S.length . IUS.lines FL.drain . SS.decodeLatin1 . S.unfold FH.read #ifdef INSPECTION inspect $ hasNoTypeClasses 'readCountLines inspect $ 'readCountLines `hasNoType` ''Step inspect $ 'readCountLines `hasNoType` ''IUF.ConcatState -- FH.read/UF.many inspect $ 'readCountLines `hasNoType` ''MA.ReadUState -- FH.read/A.read #endif -- | Count the number of words in a file. readCountWords :: Handle -> IO Int readCountWords = S.length . IUS.words FL.drain . SS.decodeLatin1 . S.unfold FH.read #ifdef INSPECTION inspect $ hasNoTypeClasses 'readCountWords -- inspect $ 'readCountWords `hasNoType` ''Step #endif -- | Sum the bytes in a file. readSumBytes :: Handle -> IO Word8 readSumBytes = S.sum . S.unfold FH.read #ifdef INSPECTION inspect $ hasNoTypeClasses 'readSumBytes inspect $ 'readSumBytes `hasNoType` ''Step inspect $ 'readSumBytes `hasNoType` ''IUF.ConcatState -- FH.read/UF.many inspect $ 'readSumBytes `hasNoType` ''MA.ReadUState -- FH.read/A.read #endif -- XXX When we mark this with INLINE and we have two benchmarks using S.drain -- in one benchmark group then somehow GHC ends up delaying the inlining of -- readDrain. Since S.drain has an INLINE[2] for proper rule firing, that does -- not work well because of delyaed inlining and the code does not fuse. We -- need some way of propagating the inline phase information up so that we can -- expedite inlining of the callers too automatically. The minimal example for -- the problem can be created by using just two benchmarks in a bench group -- both using "readDrain". Either GHC should be fixed or we can use -- fusion-plugin to propagate INLINE phase information such that this problem -- does not occur. readDrain :: Handle -> IO () readDrain inh = S.drain $ S.unfold FH.read inh -- XXX investigate why we need an INLINE in this case (GHC) {-# INLINE readDecodeLatin1 #-} readDecodeLatin1 :: Handle -> IO () readDecodeLatin1 inh = S.drain $ SS.decodeLatin1 $ S.unfold FH.read inh readDecodeUtf8 :: Handle -> IO () readDecodeUtf8 inh = S.drain $ SS.decodeUtf8 $ S.unfold FH.read inh #ifdef INSPECTION inspect $ hasNoTypeClasses 'readDecodeUtf8 -- inspect $ 'readDecodeUtf8Lax `hasNoType` ''Step #endif o_1_space_reduce_read :: BenchEnv -> [Benchmark] o_1_space_reduce_read env = [ bgroup "reduce/read" [ -- read raw bytes without any decoding mkBench "S.drain" env $ \inh _ -> readDrain inh , mkBench "S.last" env $ \inh _ -> readLast inh , mkBench "S.sum" env $ \inh _ -> readSumBytes inh -- read with Latin1 decoding , mkBench "SS.decodeLatin1" env $ \inh _ -> readDecodeLatin1 inh , mkBench "S.length" env $ \inh _ -> readCountBytes inh , mkBench "US.lines . SS.decodeLatin1" env $ \inh _ -> readCountLines inh , mkBench "US.words . SS.decodeLatin1" env $ \inh _ -> readCountWords inh -- read with utf8 decoding , mkBenchSmall "SS.decodeUtf8" env $ \inh _ -> readDecodeUtf8 inh ] ] ------------------------------------------------------------------------------- -- stream toBytes ------------------------------------------------------------------------------- -- | Count the number of lines in a file. toChunksConcatUnfoldCountLines :: Handle -> IO Int toChunksConcatUnfoldCountLines inh = S.length $ IUS.lines FL.drain $ SS.decodeLatin1 -- XXX replace with toBytes $ S.unfoldMany A.read (IFH.toChunks inh) #ifdef INSPECTION inspect $ hasNoTypeClasses 'toChunksConcatUnfoldCountLines inspect $ 'toChunksConcatUnfoldCountLines `hasNoType` ''Step inspect $ 'toChunksConcatUnfoldCountLines `hasNoType` ''D.ConcatMapUState #endif o_1_space_reduce_toBytes :: BenchEnv -> [Benchmark] o_1_space_reduce_toBytes env = [ bgroup "reduce/toBytes" [ mkBench "US.lines . SS.decodeLatin1" env $ \inh _ -> toChunksConcatUnfoldCountLines inh ] ] ------------------------------------------------------------------------------- -- reduce after grouping in chunks ------------------------------------------------------------------------------- chunksOfSum :: Int -> Handle -> IO Int chunksOfSum n inh = S.length $ S.chunksOf n FL.sum (S.unfold FH.read inh) foldManyPostChunksOfSum :: Int -> Handle -> IO Int foldManyPostChunksOfSum n inh = S.length $ IP.foldManyPost (FL.take n FL.sum) (S.unfold FH.read inh) foldManyChunksOfSum :: Int -> Handle -> IO Int foldManyChunksOfSum n inh = S.length $ IP.foldMany (FL.take n FL.sum) (S.unfold FH.read inh) -- XXX investigate why we need an INLINE in this case (GHC) -- Even though allocations remain the same in both cases inlining improves time -- by 4x. -- | Slice in chunks of size n and get the count of chunks. {-# INLINE chunksOf #-} chunksOf :: Int -> Handle -> IO Int chunksOf n inh = -- writeNUnsafe gives 2.5x boost here over writeN. S.length $ S.chunksOf n (AT.writeNUnsafe n) (S.unfold FH.read inh) #ifdef INSPECTION inspect $ hasNoTypeClasses 'chunksOf inspect $ 'chunksOf `hasNoType` ''Step inspect $ 'chunksOf `hasNoType` ''FoldMany inspect $ 'chunksOf `hasNoType` ''AT.ArrayUnsafe -- AT.writeNUnsafe inspect $ 'chunksOf `hasNoType` ''IUF.ConcatState -- FH.read/UF.many inspect $ 'chunksOf `hasNoType` ''MA.ReadUState -- FH.read/A.read #endif {-# INLINE arraysOf #-} arraysOf :: Int -> Handle -> IO Int arraysOf n inh = S.length $ IP.arraysOf n (S.unfold FH.read inh) o_1_space_reduce_read_grouped :: BenchEnv -> [Benchmark] o_1_space_reduce_read_grouped env = [ bgroup "reduce/read/chunks" [ mkBench ("S.chunksOf " ++ show (bigSize env) ++ " FL.sum") env $ \inh _ -> chunksOfSum (bigSize env) inh , mkBench "S.chunksOf 1 FL.sum" env $ \inh _ -> chunksOfSum 1 inh -- XXX investigate why we need inline/noinline in these cases (GHC) -- Chunk using parsers , mkBench ("S.foldManyPost (FL.take " ++ show (bigSize env) ++ " FL.sum)") env $ \inh _ -> noinline foldManyPostChunksOfSum (bigSize env) inh , mkBench "S.foldManyPost (FL.take 1 FL.sum)" env $ \inh _ -> inline foldManyPostChunksOfSum 1 inh , mkBench ("S.foldMany (FL.take " ++ show (bigSize env) ++ " FL.sum)") env $ \inh _ -> noinline foldManyChunksOfSum (bigSize env) inh , mkBench "S.foldMany (FL.take 1 FL.sum)" env $ \inh _ -> inline foldManyChunksOfSum 1 inh -- folding chunks to arrays , mkBenchSmall "S.chunksOf 1" env $ \inh _ -> chunksOf 1 inh , mkBench "S.chunksOf 10" env $ \inh _ -> chunksOf 10 inh , mkBench "S.chunksOf 1000" env $ \inh _ -> chunksOf 1000 inh -- arraysOf may use a different impl than chunksOf , mkBenchSmall "S.arraysOf 1" env $ \inh _ -> arraysOf 1 inh , mkBench "S.arraysOf 10" env $ \inh _ -> arraysOf 10 inh , mkBench "S.arraysOf 1000" env $ \inh _ -> arraysOf 1000 inh ] ] allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = Prelude.concat [ o_1_space_reduce_read env , o_1_space_reduce_toBytes env , o_1_space_reduce_read_grouped env ]