-- | Benchmarks various pure functions from the Text library
--
-- Tested in this benchmark:
--
-- * Most pure functions defined the string types
--
{-# LANGUAGE BangPatterns, CPP, GADTs, MagicHash #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Benchmarks.Pure
    ( benchmark
    ) where

import Control.DeepSeq (NFData (..))
import Control.Exception (evaluate)
import Criterion (Benchmark, bgroup, bench, nf)
import Data.Char (toLower, toUpper)
import Data.Monoid (mappend, mempty)
import GHC.Base (Char (..), Int (..), chr#, ord#, (+#))
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: String -> FilePath -> IO Benchmark
benchmark kind fp = do
    -- Evaluate stuff before actually running the benchmark, we don't want to
    -- count it here.

    -- ByteString A
    bsa     <- BS.readFile fp

    -- Text A/B, LazyText A/B
    ta      <- evaluate $ T.decodeUtf8 bsa
    tb      <- evaluate $ T.toUpper ta
    tla     <- evaluate $ TL.fromChunks (T.chunksOf 16376 ta)
    tlb     <- evaluate $ TL.fromChunks (T.chunksOf 16376 tb)

    -- ByteString B, LazyByteString A/B
    bsb     <- evaluate $ T.encodeUtf8 tb
    bla     <- evaluate $ BL.fromChunks (chunksOf 16376 bsa)
    blb     <- evaluate $ BL.fromChunks (chunksOf 16376 bsb)

    -- String A/B
    sa      <- evaluate $ UTF8.toString bsa
    sb      <- evaluate $ T.unpack tb

    -- Lengths
    bsa_len <- evaluate $ BS.length bsa
    ta_len  <- evaluate $ T.length ta
    bla_len <- evaluate $ BL.length bla
    tla_len <- evaluate $ TL.length tla
    sa_len  <- evaluate $ L.length sa

    -- Lines
    bsl     <- evaluate $ BS.lines bsa
    bll     <- evaluate $ BL.lines bla
    tl      <- evaluate $ T.lines ta
    tll     <- evaluate $ TL.lines tla
    sl      <- evaluate $ L.lines sa

    return $ bgroup "Pure"
        [ bgroup "append"
            [ benchT   $ nf (T.append tb) ta
            , benchTL  $ nf (TL.append tlb) tla
            , benchBS  $ nf (BS.append bsb) bsa
            , benchBSL $ nf (BL.append blb) bla
            , benchS   $ nf ((++) sb) sa
            ]
        , bgroup "concat"
            [ benchT   $ nf T.concat tl
            , benchTL  $ nf TL.concat tll
            , benchBS  $ nf BS.concat bsl
            , benchBSL $ nf BL.concat bll
            , benchS   $ nf L.concat sl
            ]
        , bgroup "cons"
            [ benchT   $ nf (T.cons c) ta
            , benchTL  $ nf (TL.cons c) tla
            , benchBS  $ nf (BS.cons c) bsa
            , benchBSL $ nf (BL.cons c) bla
            , benchS   $ nf (c:) sa
            ]
        , bgroup "concatMap"
            [ benchT   $ nf (T.concatMap (T.replicate 3 . T.singleton)) ta
            , benchTL  $ nf (TL.concatMap (TL.replicate 3 . TL.singleton)) tla
            , benchBS  $ nf (BS.concatMap (BS.replicate 3)) bsa
            , benchBSL $ nf (BL.concatMap (BL.replicate 3)) bla
            , benchS   $ nf (L.concatMap (L.replicate 3 . (:[]))) sa
            ]
        , bgroup "decode"
            [ benchT   $ nf T.decodeUtf8 bsa
            , benchTL  $ nf TL.decodeUtf8 bla
            , benchBS  $ nf BS.unpack bsa
            , benchBSL $ nf BL.unpack bla
            , benchS   $ nf UTF8.toString bsa
            ]
        , bgroup "decode'"
            [ benchT   $ nf T.decodeUtf8' bsa
            , benchTL  $ nf TL.decodeUtf8' bla
            ]
        , bgroup "drop"
            [ benchT   $ nf (T.drop (ta_len `div` 3)) ta
            , benchTL  $ nf (TL.drop (tla_len `div` 3)) tla
            , benchBS  $ nf (BS.drop (bsa_len `div` 3)) bsa
            , benchBSL $ nf (BL.drop (bla_len `div` 3)) bla
            , benchS   $ nf (L.drop (sa_len `div` 3)) sa
            ]
        , bgroup "encode"
            [ benchT   $ nf T.encodeUtf8 ta
            , benchTL  $ nf TL.encodeUtf8 tla
            , benchBS  $ nf BS.pack sa
            , benchBSL $ nf BL.pack sa
            , benchS   $ nf UTF8.fromString sa
            ]
        , bgroup "filter"
            [ benchT   $ nf (T.filter p0) ta
            , benchTL  $ nf (TL.filter p0) tla
            , benchBS  $ nf (BS.filter p0) bsa
            , benchBSL $ nf (BL.filter p0) bla
            , benchS   $ nf (L.filter p0) sa
            ]
        , bgroup "filter.filter"
            [ benchT   $ nf (T.filter p1 . T.filter p0) ta
            , benchTL  $ nf (TL.filter p1 . TL.filter p0) tla
            , benchBS  $ nf (BS.filter p1 . BS.filter p0) bsa
            , benchBSL $ nf (BL.filter p1 . BL.filter p0) bla
            , benchS   $ nf (L.filter p1 . L.filter p0) sa
            ]
        , bgroup "foldl'"
            [ benchT   $ nf (T.foldl' len 0) ta
            , benchTL  $ nf (TL.foldl' len 0) tla
            , benchBS  $ nf (BS.foldl' len 0) bsa
            , benchBSL $ nf (BL.foldl' len 0) bla
            , benchS   $ nf (L.foldl' len 0) sa
            ]
        , bgroup "foldr"
            [ benchT   $ nf (L.length . T.foldr (:) []) ta
            , benchTL  $ nf (L.length . TL.foldr (:) []) tla
            , benchBS  $ nf (L.length . BS.foldr (:) []) bsa
            , benchBSL $ nf (L.length . BL.foldr (:) []) bla
            , benchS   $ nf (L.length . L.foldr (:) []) sa
            ]
        , bgroup "head"
            [ benchT   $ nf T.head ta
            , benchTL  $ nf TL.head tla
            , benchBS  $ nf BS.head bsa
            , benchBSL $ nf BL.head bla
            , benchS   $ nf L.head sa
            ]
        , bgroup "init"
            [ benchT   $ nf T.init ta
            , benchTL  $ nf TL.init tla
            , benchBS  $ nf BS.init bsa
            , benchBSL $ nf BL.init bla
            , benchS   $ nf L.init sa
            ]
        , bgroup "intercalate"
            [ benchT   $ nf (T.intercalate tsw) tl
            , benchTL  $ nf (TL.intercalate tlw) tll
            , benchBS  $ nf (BS.intercalate bsw) bsl
            , benchBSL $ nf (BL.intercalate blw) bll
            , benchS   $ nf (L.intercalate lw) sl
            ]
        , bgroup "intersperse"
            [ benchT   $ nf (T.intersperse c) ta
            , benchTL  $ nf (TL.intersperse c) tla
            , benchBS  $ nf (BS.intersperse c) bsa
            , benchBSL $ nf (BL.intersperse c) bla
            , benchS   $ nf (L.intersperse c) sa
            ]
        , bgroup "isInfixOf"
            [ benchT   $ nf (T.isInfixOf tsw) ta
            , benchTL  $ nf (TL.isInfixOf tlw) tla
            , benchBS  $ nf (BS.isInfixOf bsw) bsa
              -- no isInfixOf for lazy bytestrings
            , benchS   $ nf (L.isInfixOf lw) sa
            ]
        , bgroup "last"
            [ benchT   $ nf T.last ta
            , benchTL  $ nf TL.last tla
            , benchBS  $ nf BS.last bsa
            , benchBSL $ nf BL.last bla
            , benchS   $ nf L.last sa
            ]
        , bgroup "map"
            [ benchT   $ nf (T.map f) ta
            , benchTL  $ nf (TL.map f) tla
            , benchBS  $ nf (BS.map f) bsa
            , benchBSL $ nf (BL.map f) bla
            , benchS   $ nf (L.map f) sa
            ]
        , bgroup "mapAccumL"
            [ benchT   $ nf (T.mapAccumL g 0) ta
            , benchTL  $ nf (TL.mapAccumL g 0) tla
            , benchBS  $ nf (BS.mapAccumL g 0) bsa
            , benchBSL $ nf (BL.mapAccumL g 0) bla
            , benchS   $ nf (L.mapAccumL g 0) sa
            ]
        , bgroup "mapAccumR"
            [ benchT   $ nf (T.mapAccumR g 0) ta
            , benchTL  $ nf (TL.mapAccumR g 0) tla
            , benchBS  $ nf (BS.mapAccumR g 0) bsa
            , benchBSL $ nf (BL.mapAccumR g 0) bla
            , benchS   $ nf (L.mapAccumR g 0) sa
            ]
        , bgroup "map.map"
            [ benchT   $ nf (T.map f . T.map f) ta
            , benchTL  $ nf (TL.map f . TL.map f) tla
            , benchBS  $ nf (BS.map f . BS.map f) bsa
            , benchBSL $ nf (BL.map f . BL.map f) bla
            , benchS   $ nf (L.map f . L.map f) sa
            ]
        , bgroup "replicate char"
            [ benchT   $ nf (T.replicate bsa_len) (T.singleton c)
            , benchTL  $ nf (TL.replicate (fromIntegral bsa_len)) (TL.singleton c)
            , benchBS  $ nf (BS.replicate bsa_len) c
            , benchBSL $ nf (BL.replicate (fromIntegral bsa_len)) c
            , benchS   $ nf (L.replicate bsa_len) c
            ]
        , bgroup "replicate string"
            [ benchT   $ nf (T.replicate (bsa_len `div` T.length tsw)) tsw
            , benchTL  $ nf (TL.replicate (fromIntegral bsa_len `div` TL.length tlw)) tlw
            , benchS   $ nf (replicat (bsa_len `div` T.length tsw)) lw
            ]
        , bgroup "reverse"
            [ benchT   $ nf T.reverse ta
            , benchTL  $ nf TL.reverse tla
            , benchBS  $ nf BS.reverse bsa
            , benchBSL $ nf BL.reverse bla
            , benchS   $ nf L.reverse sa
            ]
        , bgroup "take"
            [ benchT   $ nf (T.take (ta_len `div` 3)) ta
            , benchTL  $ nf (TL.take (tla_len `div` 3)) tla
            , benchBS  $ nf (BS.take (bsa_len `div` 3)) bsa
            , benchBSL $ nf (BL.take (bla_len `div` 3)) bla
            , benchS   $ nf (L.take (sa_len `div` 3)) sa
            ]
        , bgroup "tail"
            [ benchT   $ nf T.tail ta
            , benchTL  $ nf TL.tail tla
            , benchBS  $ nf BS.tail bsa
            , benchBSL $ nf BL.tail bla
            , benchS   $ nf L.tail sa
            ]
        , bgroup "toLower"
            [ benchT   $ nf T.toLower ta
            , benchTL  $ nf TL.toLower tla
            , benchBS  $ nf (BS.map toLower) bsa
            , benchBSL $ nf (BL.map toLower) bla
            , benchS   $ nf (L.map toLower) sa
            ]
        , bgroup "toUpper"
            [ benchT   $ nf T.toUpper ta
            , benchTL  $ nf TL.toUpper tla
            , benchBS  $ nf (BS.map toUpper) bsa
            , benchBSL $ nf (BL.map toUpper) bla
            , benchS   $ nf (L.map toUpper) sa
            ]
        , bgroup "words"
            [ benchT   $ nf T.words ta
            , benchTL  $ nf TL.words tla
            , benchBS  $ nf BS.words bsa
            , benchBSL $ nf BL.words bla
            , benchS   $ nf L.words sa
            ]
        , bgroup "zipWith"
            [ benchT   $ nf (T.zipWith min tb) ta
            , benchTL  $ nf (TL.zipWith min tlb) tla
            , benchBS  $ nf (BS.zipWith min bsb) bsa
            , benchBSL $ nf (BL.zipWith min blb) bla
            , benchS   $ nf (L.zipWith min sb) sa
            ]
        , bgroup "length"
            [ bgroup "cons"
                [ benchT   $ nf (T.length . T.cons c) ta
                , benchTL  $ nf (TL.length . TL.cons c) tla
                , benchBS  $ nf (BS.length . BS.cons c) bsa
                , benchBSL $ nf (BL.length . BL.cons c) bla
                , benchS   $ nf (L.length . (:) c) sa
                ]
            , bgroup "decode"
                [ benchT   $ nf (T.length . T.decodeUtf8) bsa
                , benchTL  $ nf (TL.length . TL.decodeUtf8) bla
                , benchBS  $ nf (L.length . BS.unpack) bsa
                , benchBSL $ nf (L.length . BL.unpack) bla
                , bench "StringUTF8" $ nf (L.length . UTF8.toString) bsa
                ]
            , bgroup "drop"
                [ benchT   $ nf (T.length . T.drop (ta_len `div` 3)) ta
                , benchTL  $ nf (TL.length . TL.drop (tla_len `div` 3)) tla
                , benchBS  $ nf (BS.length . BS.drop (bsa_len `div` 3)) bsa
                , benchBSL $ nf (BL.length . BL.drop (bla_len `div` 3)) bla
                , benchS   $ nf (L.length . L.drop (sa_len `div` 3)) sa
                ]
            , bgroup "filter"
                [ benchT   $ nf (T.length . T.filter p0) ta
                , benchTL  $ nf (TL.length . TL.filter p0) tla
                , benchBS  $ nf (BS.length . BS.filter p0) bsa
                , benchBSL $ nf (BL.length . BL.filter p0) bla
                , benchS   $ nf (L.length . L.filter p0) sa
                ]
            , bgroup "filter.filter"
                [ benchT   $ nf (T.length . T.filter p1 . T.filter p0) ta
                , benchTL  $ nf (TL.length . TL.filter p1 . TL.filter p0) tla
                , benchBS  $ nf (BS.length . BS.filter p1 . BS.filter p0) bsa
                , benchBSL $ nf (BL.length . BL.filter p1 . BL.filter p0) bla
                , benchS   $ nf (L.length . L.filter p1 . L.filter p0) sa
                ]
            , bgroup "init"
                [ benchT   $ nf (T.length . T.init) ta
                , benchTL  $ nf (TL.length . TL.init) tla
                , benchBS  $ nf (BS.length . BS.init) bsa
                , benchBSL $ nf (BL.length . BL.init) bla
                , benchS   $ nf (L.length . L.init) sa
                ]
            , bgroup "intercalate"
                [ benchT   $ nf (T.length . T.intercalate tsw) tl
                , benchTL  $ nf (TL.length . TL.intercalate tlw) tll
                , benchBS  $ nf (BS.length . BS.intercalate bsw) bsl
                , benchBSL $ nf (BL.length . BL.intercalate blw) bll
                , benchS   $ nf (L.length . L.intercalate lw) sl
                ]
            , bgroup "intersperse"
                [ benchT   $ nf (T.length . T.intersperse c) ta
                , benchTL  $ nf (TL.length . TL.intersperse c) tla
                , benchBS  $ nf (BS.length . BS.intersperse c) bsa
                , benchBSL $ nf (BL.length . BL.intersperse c) bla
                , benchS   $ nf (L.length . L.intersperse c) sa
                ]
            , bgroup "map"
                [ benchT   $ nf (T.length . T.map f) ta
                , benchTL  $ nf (TL.length . TL.map f) tla
                , benchBS  $ nf (BS.length . BS.map f) bsa
                , benchBSL $ nf (BL.length . BL.map f) bla
                , benchS   $ nf (L.length . L.map f) sa
                ]
            , bgroup "map.map"
                [ benchT   $ nf (T.length . T.map f . T.map f) ta
                , benchTL  $ nf (TL.length . TL.map f . TL.map f) tla
                , benchBS  $ nf (BS.length . BS.map f . BS.map f) bsa
                , benchS   $ nf (L.length . L.map f . L.map f) sa
                ]
            , bgroup "replicate char"
                [ benchT   $ nf (T.length . T.replicate bsa_len) (T.singleton c)
                , benchTL  $ nf (TL.length . TL.replicate (fromIntegral bsa_len)) (TL.singleton c)
                , benchBS  $ nf (BS.length . BS.replicate bsa_len) c
                , benchBSL $ nf (BL.length . BL.replicate (fromIntegral bsa_len)) c
                , benchS   $ nf (L.length . L.replicate bsa_len) c
                ]
            , bgroup "replicate string"
                [ benchT   $ nf (T.length . T.replicate (bsa_len `div` T.length tsw)) tsw
                , benchTL  $ nf (TL.length . TL.replicate (fromIntegral bsa_len `div` TL.length tlw)) tlw
                , benchS   $ nf (L.length . replicat (bsa_len `div` T.length tsw)) lw
                ]
            , bgroup "take"
                [ benchT   $ nf (T.length . T.take (ta_len `div` 3)) ta
                , benchTL  $ nf (TL.length . TL.take (tla_len `div` 3)) tla
                , benchBS  $ nf (BS.length . BS.take (bsa_len `div` 3)) bsa
                , benchBSL $ nf (BL.length . BL.take (bla_len `div` 3)) bla
                , benchS   $ nf (L.length . L.take (sa_len `div` 3)) sa
                ]
            , bgroup "tail"
                [ benchT   $ nf (T.length . T.tail) ta
                , benchTL  $ nf (TL.length . TL.tail) tla
                , benchBS  $ nf (BS.length . BS.tail) bsa
                , benchBSL $ nf (BL.length . BL.tail) bla
                , benchS   $ nf (L.length . L.tail) sa
                ]
            , bgroup "toLower"
                [ benchT   $ nf (T.length . T.toLower) ta
                , benchTL  $ nf (TL.length . TL.toLower) tla
                , benchBS  $ nf (BS.length . BS.map toLower) bsa
                , benchBSL $ nf (BL.length . BL.map toLower) bla
                , benchS   $ nf (L.length . L.map toLower) sa
                ]
            , bgroup "toUpper"
                [ benchT   $ nf (T.length . T.toUpper) ta
                , benchTL  $ nf (TL.length . TL.toUpper) tla
                , benchBS  $ nf (BS.length . BS.map toUpper) bsa
                , benchBSL $ nf (BL.length . BL.map toUpper) bla
                , benchS   $ nf (L.length . L.map toUpper) sa
                ]
            , bgroup "words"
                [ benchT   $ nf (L.length . T.words) ta
                , benchTL  $ nf (L.length . TL.words) tla
                , benchBS  $ nf (L.length . BS.words) bsa
                , benchBSL $ nf (L.length . BL.words) bla
                , benchS   $ nf (L.length . L.words) sa
                ]
            , bgroup "zipWith"
                [ benchT   $ nf (T.length . T.zipWith min tb) ta
                , benchTL  $ nf (TL.length . TL.zipWith min tlb) tla
                , benchBS  $ nf (L.length . BS.zipWith min bsb) bsa
                , benchBSL $ nf (L.length . BL.zipWith min blb) bla
                , benchS   $ nf (L.length . L.zipWith min sb) sa
                ]
              ]
        , bgroup "Builder"
            [ bench "mappend char" $ nf (TL.length . TB.toLazyText . mappendNChar 'a') 10000
            , bench "mappend 8 char" $ nf (TL.length . TB.toLazyText . mappend8Char) 'a'
            , bench "mappend text" $ nf (TL.length . TB.toLazyText . mappendNText short) 10000
            ]
        ]
  where
    benchS   = bench ("String+" ++ kind)
    benchT   = bench ("Text+" ++ kind)
    benchTL  = bench ("LazyText+" ++ kind)
    benchBS  = bench ("ByteString+" ++ kind)
    benchBSL = bench ("LazyByteString+" ++ kind)

    c  = 'й'
    p0 = (== c)
    p1 = (/= 'д')
    lw  = "право"
    bsw  = UTF8.fromString lw
    blw  = BL.fromChunks [bsw]
    tsw  = T.pack lw
    tlw  = TL.fromChunks [tsw]
    f (C# c#) = C# (chr# (ord# c# +# 1#))
    g (I# i#) (C# c#) = (I# (i# +# 1#), C# (chr# (ord# c# +# i#)))
    len l _ = l + (1::Int)
    replicat n = concat . L.replicate n
    short = T.pack "short"

#if !MIN_VERSION_bytestring(0,10,0)
instance NFData BS.ByteString

instance NFData BL.ByteString where
    rnf BL.Empty        = ()
    rnf (BL.Chunk _ ts) = rnf ts
#endif

data B where
    B :: NFData a => a -> B

instance NFData B where
    rnf (B b) = rnf b

-- | Split a bytestring in chunks
--
chunksOf :: Int -> BS.ByteString -> [BS.ByteString]
chunksOf k = go
  where
    go t = case BS.splitAt k t of
             (a,b) | BS.null a -> []
                   | otherwise -> a : go b

-- | Append a character n times
--
mappendNChar :: Char -> Int -> TB.Builder
mappendNChar c n = go 0
  where
    go i
      | i < n     = TB.singleton c `mappend` go (i+1)
      | otherwise = mempty

-- | Gives more opportunity for inlining and elimination of unnecesary
-- bounds checks.
--
mappend8Char :: Char -> TB.Builder
mappend8Char c = TB.singleton c `mappend` TB.singleton c `mappend`
                 TB.singleton c `mappend` TB.singleton c `mappend`
                 TB.singleton c `mappend` TB.singleton c `mappend`
                 TB.singleton c `mappend` TB.singleton c

-- | Append a text N times
--
mappendNText :: T.Text -> Int -> TB.Builder
mappendNText t n = go 0
  where
    go i
      | i < n     = TB.fromText t `mappend` go (i+1)
      | otherwise = mempty