root/compiler/coreSyn/CoreUnfold.lhs

Revision 4fa3f16ddb9fa8e5d59bde5354918a39e0430a74, 48.5 KB (checked in by Simon Peyton Jones <simonpj@…>, 17 minutes ago)

Be less aggressive about the result discount

This patch fixes Trac #6099 by reducing the result discount in CoreUnfold?.conSize.
See Note [Constructor size and result discount] in CoreUnfold?.

The existing version is definitely too aggressive. Simon M found it an
"unambiguous win" but it is definitely what led to the bloat. In a function
with a lot of case branches, all returning a constructor, the discount could
grow arbitrarily large.

I also had to increase the -funfolding-creation-threshold from 450 to 750,
otherwise some functions that should inline simply never get an unfolding.
(The massive result discount was allow the unfolding to appear before.)

The nofib results are these, picking a handful of outliers to show.

Program Size Allocs Runtime Elapsed TotalMem?


fulsom -0.5% -1.6% -2.8% -2.6% +31.1%

maillist -0.2% -0.0% 0.09 0.09 -3.7%

mandel -0.4% +6.6% 0.12 0.12 +0.0%

nucleic2 -0.2% +18.5% 0.11 0.11 +0.0%

parstof -0.4% +4.0% 0.00 0.00 +0.0%


Min -0.9% -1.6% -19.7% -19.7% -3.7%
Max +0.3% +18.5% +2.7% +2.7% +31.1%

Geometric Mean -0.3% +0.4% -3.0% -3.0% +0.2%

Turns out that nucleic2 has a function

Main.$wabsolute_pos =

\ (ww_s4oj
Types.Tfo) (ww1_s4oo :: Types.FloatT)
(ww2_s4op
Types.FloatT) (ww3_s4oq :: Types.FloatT) -> case ww_s4oj of _ { Types.Tfo a_a1sS b_a1sT c_a1sU d_a1sV e_a1sW f_a1sX g_a1sY h_a1sZ i_a1t0 tx_a1t1 ty_a1t2 tz_a1t3 -> (# case ww1_s4oo of _ { GHC.Types.F# x_a2sO -> case a_a1sS of _ { GHC.Types.F# y_a2sS -> case ww2_s4op of _ { GHC.Types.F# x1_X2y9 -> case d_a1sV of _ { GHC.Types.F# y1_X2yh -> case ww3_s4oq of _ { GHC.Types.F# x2_X2yj -> case g_a1sY of _ { GHC.Types.F# y2_X2yr -> case tx_a1t1 of _ { GHC.Types.F# y3_X2yn -> GHC.Types.F# (GHC.Prim.plusFloat# (GHC.Prim.plusFloat# (GHC.Prim.plusFloat# (GHC.Prim.timesFloat# x_a2sO y_a2sS) (GHC.Prim.timesFloat# x1_X2y9 y1_X2yh)) (GHC.Prim.timesFloat# x2_X2yj y2_X2yr)) y3_X2yn) } } }}}}},

<similar>,
<similar> )

This is pretty big, but inlining it does get rid of that F# allocation.
But we'll also get rid of it with deep CPR: Trac #2289. For now we just
accept the change.

  • Property mode set to 100644
Line 
1%
2% (c) The University of Glasgow 2006
3% (c) The AQUA Project, Glasgow University, 1994-1998
4%
5
6Core-syntax unfoldings
7
8Unfoldings (which can travel across module boundaries) are in Core
9syntax (namely @CoreExpr@s).
10
11The type @Unfolding@ sits ``above'' simply-Core-expressions
12unfoldings, capturing ``higher-level'' things we know about a binding,
13usually things that the simplifier found out (e.g., ``it's a
14literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
15find, unsurprisingly, a Core expression.
16
17\begin{code}
18{-# OPTIONS -fno-warn-tabs #-}
19-- The above warning supression flag is a temporary kludge.
20-- While working on this module you are encouraged to remove it and
21-- detab the module (please do the detabbing in a separate patch). See
22--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
23-- for details
24
25module CoreUnfold (
26        Unfolding, UnfoldingGuidance,   -- Abstract types
27
28        noUnfolding, mkImplicitUnfolding, 
29        mkUnfolding, mkCoreUnfolding,
30        mkTopUnfolding, mkSimpleUnfolding,
31        mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
32        mkCompulsoryUnfolding, mkDFunUnfolding,
33
34        interestingArg, ArgSummary(..),
35
36        couldBeSmallEnoughToInline, inlineBoringOk,
37        certainlyWillInline, smallEnoughToInline,
38
39        callSiteInline, CallCtxt(..),
40
41        -- Reexport from CoreSubst (it only live there so it can be used
42        -- by the Very Simple Optimiser)
43        exprIsConApp_maybe, exprIsLiteral_maybe
44    ) where
45
46#include "HsVersions.h"
47
48import StaticFlags
49import DynFlags
50import CoreSyn
51import PprCore          ()      -- Instances
52import TcType           ( tcSplitDFunTy )
53import OccurAnal        ( occurAnalyseExpr )
54import CoreSubst hiding( substTy )
55import CoreArity       ( manifestArity, exprBotStrictness_maybe )
56import CoreUtils
57import Id
58import DataCon
59import Literal
60import PrimOp
61import IdInfo
62import BasicTypes       ( Arity )
63import Type
64import PrelNames
65import Bag
66import Util
67import FastTypes
68import FastString
69import Outputable
70import ForeignCall
71
72import Data.Maybe
73\end{code}
74
75
76%************************************************************************
77%*                                                                      *
78\subsection{Making unfoldings}
79%*                                                                      *
80%************************************************************************
81
82\begin{code}
83mkTopUnfolding :: Bool -> CoreExpr -> Unfolding
84mkTopUnfolding = mkUnfolding InlineRhs True {- Top level -}
85
86mkImplicitUnfolding :: CoreExpr -> Unfolding
87-- For implicit Ids, do a tiny bit of optimising first
88mkImplicitUnfolding expr = mkTopUnfolding False (simpleOptExpr expr) 
89
90-- Note [Top-level flag on inline rules]
91-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92-- Slight hack: note that mk_inline_rules conservatively sets the
93-- top-level flag to True.  It gets set more accurately by the simplifier
94-- Simplify.simplUnfolding.
95
96mkSimpleUnfolding :: CoreExpr -> Unfolding
97mkSimpleUnfolding = mkUnfolding InlineRhs False False
98
99mkDFunUnfolding :: Type -> [CoreExpr] -> Unfolding
100mkDFunUnfolding dfun_ty ops
101  = DFunUnfolding dfun_nargs data_con ops
102  where
103    (tvs, n_theta, cls, _) = tcSplitDFunTy dfun_ty
104    dfun_nargs = length tvs + n_theta
105    data_con   = classDataCon cls
106
107mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
108mkWwInlineRule id expr arity
109  = mkCoreUnfolding (InlineWrapper id) True
110                   (simpleOptExpr expr) arity
111                   (UnfWhen unSaturatedOk boringCxtNotOk)
112
113mkCompulsoryUnfolding :: CoreExpr -> Unfolding
114mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
115  = mkCoreUnfolding InlineCompulsory True
116                    (simpleOptExpr expr) 0    -- Arity of unfolding doesn't matter
117                    (UnfWhen unSaturatedOk boringCxtOk)
118
119mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
120mkInlineUnfolding mb_arity expr
121  = mkCoreUnfolding InlineStable
122                    True         -- Note [Top-level flag on inline rules]
123                    expr' arity
124                    (UnfWhen unsat_ok boring_ok)
125  where
126    expr' = simpleOptExpr expr
127    (unsat_ok, arity) = case mb_arity of
128                          Nothing -> (unSaturatedOk, manifestArity expr')
129                          Just ar -> (needSaturated, ar)
130             
131    boring_ok = inlineBoringOk expr'
132
133mkInlinableUnfolding :: CoreExpr -> Unfolding
134mkInlinableUnfolding expr
135  = mkUnfolding InlineStable True is_bot expr'
136  where
137    expr' = simpleOptExpr expr
138    is_bot = isJust (exprBotStrictness_maybe expr')
139\end{code}
140
141Internal functions
142
143\begin{code}
144mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
145                -> Arity -> UnfoldingGuidance -> Unfolding
146-- Occurrence-analyses the expression before capturing it
147mkCoreUnfolding src top_lvl expr arity guidance
148  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
149                    uf_src          = src,
150                    uf_arity        = arity,
151                    uf_is_top       = top_lvl,
152                    uf_is_value     = exprIsHNF        expr,
153                    uf_is_conlike   = exprIsConLike    expr,
154                    uf_is_work_free = exprIsWorkFree   expr,
155                    uf_expandable   = exprIsExpandable expr,
156                    uf_guidance     = guidance }
157
158mkUnfolding :: UnfoldingSource -> Bool -> Bool -> CoreExpr -> Unfolding
159-- Calculates unfolding guidance
160-- Occurrence-analyses the expression before capturing it
161mkUnfolding src top_lvl is_bottoming expr
162  | top_lvl && is_bottoming
163  , not (exprIsTrivial expr)
164  = NoUnfolding    -- See Note [Do not inline top-level bottoming functions]
165  | otherwise
166  = CoreUnfolding { uf_tmpl         = occ_anald_expr,
167                    uf_src          = src,
168                    uf_arity        = arity,
169                    uf_is_top       = top_lvl,
170                    uf_is_value     = exprIsHNF        expr,
171                    uf_is_conlike   = exprIsConLike    expr,
172                    uf_expandable   = exprIsExpandable expr,
173                    uf_is_work_free = exprIsWorkFree   expr,
174                    uf_guidance     = guidance }
175  where
176    occ_anald_expr    = occurAnalyseExpr expr
177    (arity, guidance) = calcUnfoldingGuidance occ_anald_expr
178        -- Sometimes during simplification, there's a large let-bound thing     
179        -- which has been substituted, and so is now dead; so 'expr' contains
180        -- two copies of the thing while the occurrence-analysed expression doesn't
181        -- Nevertheless, we *don't* occ-analyse before computing the size because the
182        -- size computation bales out after a while, whereas occurrence analysis does not.
183        --
184        -- This can occasionally mean that the guidance is very pessimistic;
185        -- it gets fixed up next round.  And it should be rare, because large
186        -- let-bound things that are dead are usually caught by preInlineUnconditionally
187\end{code}
188
189%************************************************************************
190%*                                                                      *
191\subsection{The UnfoldingGuidance type}
192%*                                                                      *
193%************************************************************************
194
195\begin{code}
196inlineBoringOk :: CoreExpr -> Bool
197-- See Note [INLINE for small functions]
198-- True => the result of inlining the expression is
199--         no bigger than the expression itself
200--     eg      (\x y -> f y x)
201-- This is a quick and dirty version. It doesn't attempt
202-- to deal with  (\x y z -> x (y z))
203-- The really important one is (x `cast` c)
204inlineBoringOk e
205  = go 0 e
206  where
207    go :: Int -> CoreExpr -> Bool
208    go credit (Lam x e) | isId x           = go (credit+1) e
209                        | otherwise        = go credit e
210    go credit (App f (Type {}))            = go credit f
211    go credit (App f a) | credit > 0 
212                        , exprIsTrivial a  = go (credit-1) f
213    go credit (Tick _ e)                 = go credit e -- dubious
214    go credit (Cast e _)                   = go credit e
215    go _      (Var {})                     = boringCxtOk
216    go _      _                            = boringCxtNotOk
217
218calcUnfoldingGuidance
219        :: CoreExpr     -- Expression to look at
220        -> (Arity, UnfoldingGuidance)
221calcUnfoldingGuidance expr
222  = case collectBinders expr of { (bndrs, body) ->
223    let
224        bOMB_OUT_SIZE = opt_UF_CreationThreshold
225               -- Bomb out if size gets bigger than this
226        val_bndrs   = filter isId bndrs
227        n_val_bndrs = length val_bndrs
228
229        guidance
230          = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
231              TooBig -> UnfNever
232              SizeIs size cased_bndrs scrut_discount
233                | uncondInline expr n_val_bndrs (iBox size)
234                -> UnfWhen unSaturatedOk boringCxtOk   -- Note [INLINE for small functions]
235                | otherwise
236                -> UnfIfGoodArgs { ug_args  = map (discount cased_bndrs) val_bndrs
237                                 , ug_size  = iBox size
238                                 , ug_res   = iBox scrut_discount }
239
240        discount cbs bndr
241           = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc) 
242                      0 cbs
243    in
244    (n_val_bndrs, guidance) }
245\end{code}
246
247Note [Computing the size of an expression]
248~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
249The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
250heuristics right has taken a long time.  Here's the basic strategy:
251
252    * Variables, literals: 0
253      (Exception for string literals, see litSize.)
254
255    * Function applications (f e1 .. en): 1 + #value args
256
257    * Constructor applications: 1, regardless of #args
258
259    * Let(rec): 1 + size of components
260
261    * Note, cast: 0
262
263Examples
264
265  Size  Term
266  --------------
267    0     42#
268    0     x
269    0     True
270    2     f x
271    1     Just x
272    4     f (g x)
273
274Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
275a function call to account for.  Notice also that constructor applications
276are very cheap, because exposing them to a caller is so valuable.
277
278[25/5/11] All sizes are now multiplied by 10, except for primops
279(which have sizes like 1 or 4.  This makes primops look fantastically
280cheap, and seems to be almost unversally beneficial.  Done partly as a
281result of #4978.
282
283Note [Do not inline top-level bottoming functions]
284~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
285The FloatOut pass has gone to some trouble to float out calls to 'error'
286and similar friends.  See Note [Bottoming floats] in SetLevels.
287Do not re-inline them!  But we *do* still inline if they are very small
288(the uncondInline stuff).
289
290Note [INLINE for small functions]
291~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
292Consider        {-# INLINE f #-}
293                f x = Just x
294                g y = f y
295Then f's RHS is no larger than its LHS, so we should inline it into
296even the most boring context.  In general, f the function is
297sufficiently small that its body is as small as the call itself, the
298inline unconditionally, regardless of how boring the context is.
299
300Things to note:
301
302(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
303    than the thing it's replacing.  Notice that
304      (f x) --> (g 3)             -- YES, unconditionally
305      (f x) --> x : []            -- YES, *even though* there are two
306                                  --      arguments to the cons
307      x     --> g 3               -- NO
308      x     --> Just v            -- NO
309
310    It's very important not to unconditionally replace a variable by
311    a non-atomic term.
312
313(2) We do this even if the thing isn't saturated, else we end up with the
314    silly situation that
315       f x y = x
316       ...map (f 3)...
317    doesn't inline.  Even in a boring context, inlining without being
318    saturated will give a lambda instead of a PAP, and will be more
319    efficient at runtime.
320
321(3) However, when the function's arity > 0, we do insist that it
322    has at least one value argument at the call site.  (This check is
323    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
324         f = /\a \x:a. x
325         d = /\b. MkD (f b)
326    If we inline f here we get
327         d = /\b. MkD (\x:b. x)
328    and then prepareRhs floats out the argument, abstracting the type
329    variables, so we end up with the original again!
330
331(4) We must be much more cautious about arity-zero things. Consider
332       let x = y +# z in ...
333    In *size* terms primops look very small, because the generate a
334    single instruction, but we do not want to unconditionally replace
335    every occurrence of x with (y +# z).  So we only do the
336    unconditional-inline thing for *trivial* expressions.
337 
338    NB: you might think that PostInlineUnconditionally would do this
339    but it doesn't fire for top-level things; see SimplUtils
340    Note [Top level and postInlineUnconditionally]
341
342\begin{code}
343uncondInline :: CoreExpr -> Arity -> Int -> Bool
344-- Inline unconditionally if there no size increase
345-- Size of call is arity (+1 for the function)
346-- See Note [INLINE for small functions]
347uncondInline rhs arity size
348  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
349  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
350\end{code}
351
352
353\begin{code}
354sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
355         -> [Id]            -- Arguments; we're interested in which of these
356                            -- get case'd
357         -> CoreExpr
358         -> ExprSize
359
360-- Note [Computing the size of an expression]
361
362sizeExpr bOMB_OUT_SIZE top_args expr
363  = size_up expr
364  where
365    size_up (Cast e _) = size_up e
366    size_up (Tick _ e) = size_up e
367    size_up (Type _)   = sizeZero           -- Types cost nothing
368    size_up (Coercion _) = sizeZero
369    size_up (Lit lit)  = sizeN (litSize lit)
370    size_up (Var f)    = size_up_call f []  -- Make sure we get constructor
371                                            -- discounts even on nullary constructors
372
373    size_up (App fun (Type _)) = size_up fun
374    size_up (App fun (Coercion _)) = size_up fun
375    size_up (App fun arg)      = size_up arg  `addSizeNSD`
376                                 size_up_app fun [arg]
377
378    size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 10)
379                      | otherwise = size_up e
380
381    size_up (Let (NonRec binder rhs) body)
382      = size_up rhs             `addSizeNSD`
383        size_up body            `addSizeN`
384        (if isUnLiftedType (idType binder) then 0 else 10)
385                -- For the allocation
386                -- If the binder has an unlifted type there is no allocation
387
388    size_up (Let (Rec pairs) body)
389      = foldr (addSizeNSD . size_up . snd) 
390              (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
391              pairs
392
393    size_up (Case (Var v) _ _ alts) 
394        | v `elem` top_args             -- We are scrutinising an argument variable
395        = alts_size (foldr addAltSize sizeZero alt_sizes)
396                    (foldr maxSize    sizeZero alt_sizes)
397                -- Good to inline if an arg is scrutinised, because
398                -- that may eliminate allocation in the caller
399                -- And it eliminates the case itself
400        where
401          alt_sizes = map size_up_alt alts
402
403                -- alts_size tries to compute a good discount for
404                -- the case when we are scrutinising an argument variable
405          alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
406                    (SizeIs max _        _)          -- Size of biggest alternative
407                = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut
408                        -- If the variable is known, we produce a discount that
409                        -- will take us back to 'max', the size of the largest alternative
410                        -- The 1+ is a little discount for reduced allocation in the caller
411                        --
412                        -- Notice though, that we return tot_disc, the total discount from
413                        -- all branches.  I think that's right.
414
415          alts_size tot_size _ = tot_size
416
417    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
418                                foldr (addAltSize . size_up_alt) case_size alts
419      where
420          case_size
421           | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
422           | otherwise = sizeZero
423                -- Normally we don't charge for the case itself, but
424                -- we charge one per alternative (see size_up_alt,
425                -- below) to account for the cost of the info table
426                -- and comparisons.
427                --
428                -- However, in certain cases (see is_inline_scrut
429                -- below), no code is generated for the case unless
430                -- there are multiple alts.  In these cases we
431                -- subtract one, making the first alt free.
432                -- e.g. case x# +# y# of _ -> ...   should cost 1
433                --      case touch# x# of _ -> ...  should cost 0
434                -- (see #4978)
435                --
436                -- I would like to not have the "not (lengthExceeds alts 1)"
437                -- condition above, but without that some programs got worse
438                -- (spectral/hartel/event and spectral/para).  I don't fully
439                -- understand why. (SDM 24/5/11)
440
441                -- unboxed variables, inline primops and unsafe foreign calls
442                -- are all "inline" things:
443          is_inline_scrut (Var v) = isUnLiftedType (idType v)
444          is_inline_scrut scrut
445              | (Var f, _) <- collectArgs scrut
446                = case idDetails f of
447                    FCallId fc  -> not (isSafeForeignCall fc)
448                    PrimOpId op -> not (primOpOutOfLine op)
449                    _other      -> False
450              | otherwise
451                = False
452
453    ------------
454    -- size_up_app is used when there's ONE OR MORE value args
455    size_up_app (App fun arg) args
456        | isTyCoArg arg            = size_up_app fun args
457        | otherwise                = size_up arg  `addSizeNSD`
458                                     size_up_app fun (arg:args)
459    size_up_app (Var fun)     args = size_up_call fun args
460    size_up_app other         args = size_up other `addSizeN` length args
461
462    ------------
463    size_up_call :: Id -> [CoreExpr] -> ExprSize
464    size_up_call fun val_args
465       = case idDetails fun of
466           FCallId _        -> sizeN (10 * (1 + length val_args))
467           DataConWorkId dc -> conSize    dc (length val_args)
468           PrimOpId op      -> primOpSize op (length val_args)
469           ClassOpId _      -> classOpSize top_args val_args
470           _                -> funSize top_args fun (length val_args)
471
472    ------------
473    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
474        -- Don't charge for args, so that wrappers look cheap
475        -- (See comments about wrappers with Case)
476        --
477        -- IMPORATANT: *do* charge 1 for the alternative, else we
478        -- find that giant case nests are treated as practically free
479        -- A good example is Foreign.C.Error.errrnoToIOError
480
481    ------------
482        -- These addSize things have to be here because
483        -- I don't want to give them bOMB_OUT_SIZE as an argument
484    addSizeN TooBig          _  = TooBig
485    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
486   
487        -- addAltSize is used to add the sizes of case alternatives
488    addAltSize TooBig            _      = TooBig
489    addAltSize _                 TooBig = TooBig
490    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2) 
491        = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) 
492                                 (xs `unionBags` ys) 
493                                 (d1 +# d2)   -- Note [addAltSize result discounts]
494
495        -- This variant ignores the result discount from its LEFT argument
496        -- It's used when the second argument isn't part of the result
497    addSizeNSD TooBig            _      = TooBig
498    addSizeNSD _                 TooBig = TooBig
499    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2) 
500        = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) 
501                                 (xs `unionBags` ys) 
502                                 d2  -- Ignore d1
503\end{code}
504
505
506\begin{code}
507-- | Finds a nominal size of a string literal.
508litSize :: Literal -> Int
509-- Used by CoreUnfold.sizeExpr
510litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
511litSize (MachStr str)   = 10 + 10 * ((lengthFS str + 3) `div` 4)
512        -- If size could be 0 then @f "x"@ might be too small
513        -- [Sept03: make literal strings a bit bigger to avoid fruitless
514        --  duplication of little strings]
515litSize _other = 0    -- Must match size of nullary constructors
516                      -- Key point: if  x |-> 4, then x must inline unconditionally
517                      --            (eg via case binding)
518
519classOpSize :: [Id] -> [CoreExpr] -> ExprSize
520-- See Note [Conlike is interesting]
521classOpSize _ [] 
522  = sizeZero
523classOpSize top_args (arg1 : other_args)
524  = SizeIs (iUnbox size) arg_discount (_ILIT(0))
525  where
526    size = 20 + (10 * length other_args)
527    -- If the class op is scrutinising a lambda bound dictionary then
528    -- give it a discount, to encourage the inlining of this function
529    -- The actual discount is rather arbitrarily chosen
530    arg_discount = case arg1 of
531                     Var dict | dict `elem` top_args
532                              -> unitBag (dict, opt_UF_DictDiscount)
533                     _other   -> emptyBag
534                     
535funSize :: [Id] -> Id -> Int -> ExprSize
536-- Size for functions that are not constructors or primops
537-- Note [Function applications]
538funSize top_args fun n_val_args
539  | fun `hasKey` buildIdKey   = buildSize
540  | fun `hasKey` augmentIdKey = augmentSize
541  | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
542  where
543    some_val_args = n_val_args > 0
544
545    size | some_val_args = 10 * (1 + n_val_args)
546         | otherwise     = 0
547        -- The 1+ is for the function itself
548        -- Add 1 for each non-trivial arg;
549        -- the allocation cost, as in let(rec)
550 
551        --                  DISCOUNTS
552        --  See Note [Function application discounts]
553    arg_discount | some_val_args && one_call fun top_args
554                 = unitBag (fun, opt_UF_FunAppDiscount)
555                 | otherwise = emptyBag
556        -- If the function is an argument and is applied
557        -- to some values, give it an arg-discount
558
559    res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
560                 | otherwise                = 0
561        -- If the function is partially applied, show a result discount
562
563    one_call _   []                     = False
564    one_call fun (arg:args) | fun==arg  = case idOccInfo arg of
565                                           OneOcc _ one_branch _ -> one_branch
566                                           _                     -> False
567                            | otherwise = one_call fun args
568
569conSize :: DataCon -> Int -> ExprSize
570conSize dc n_val_args
571  | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10))    -- Like variables
572
573-- See Note [Unboxed tuple size and result discount]
574  | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args)))
575
576-- See Note [Constructor size and result discount]
577  | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (1 + n_val_args)))
578\end{code}
579
580Note [Constructor size and result discount]
581~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
582Treat a constructors application as size 10, regardless of how many
583arguments it has; we are keen to expose them (and we charge separately
584for their args).  We can't treat them as size zero, else we find that
585(Just x) has size 0, which is the same as a lone variable; and hence
586'v' will always be replaced by (Just x), where v is bound to Just x.
587
588The "result discount" is applied if the result of the call is
589scrutinised (say by a case).  For a constructor application that will
590mean the constructor application will disappear, so we don't need to
591charge it to the function.  So the discount should at least match the
592cost of the constructor application, namely 10.  But to give a bit
593of extra incentive we give a discount of 10*(1 + n_val_args).
594
595Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
596and said it was an "unambiguous win", but its terribly dangerous
597because a fuction with many many case branches, each finishing with
598a constructor, can have an arbitrarily large discount.  This led to
599terrible code bloat: see Trac #6099.
600
601Note [Unboxed tuple size and result discount]
602~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
603However, unboxed tuples count as size zero. I found occasions where we had
604        f x y z = case op# x y z of { s -> (# s, () #) }
605and f wasn't getting inlined.
606
607I tried giving unboxed tuples a *result discount* of zero (see the
608commented-out line).  Why?  When returned as a result they do not
609allocate, so maybe we don't want to charge so much for them If you
610have a non-zero discount here, we find that workers often get inlined
611back into wrappers, because it look like
612    f x = case $wf x of (# a,b #) -> (a,b)
613and we are keener because of the case.  However while this change
614shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
615more. All other changes were very small. So it's not a big deal but I
616didn't adopt the idea.
617
618Note [Function application discount]
619~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
620We want a discount if the function is applied. A good example is
621monadic combinators with continuation arguments, where inlining is
622quite important.
623
624But we don't want a big discount when a function is called many times
625(see the detailed comments with Trac #6048) because if the function is
626big it won't be inlined at its many call sites and no benefit results.
627Indeed, we can get exponentially big inlinings this way; that is what
628Trac #6048 is about.
629
630So, we only give a function-application discount when the function appears
631textually once, albeit possibly inside a lambda.
632
633Note [Literal integer size]
634~~~~~~~~~~~~~~~~~~~~~~~~~~~
635Literal integers *can* be big (mkInteger [...coefficients...]), but
636need not be (S# n).  We just use an aribitrary big-ish constant here
637so that, in particular, we don't inline top-level defns like
638   n = S# 5
639There's no point in doing so -- any optimsations will see the S#
640through n's unfolding.  Nor will a big size inhibit unfoldings functions
641that mention a literal Integer, because the float-out pass will float
642all those constants to top level.
643
644\begin{code}
645primOpSize :: PrimOp -> Int -> ExprSize
646primOpSize op n_val_args
647 = if primOpOutOfLine op
648      then sizeN (op_size + n_val_args)
649      else sizeN op_size
650 where
651   op_size = primOpCodeSize op
652
653
654buildSize :: ExprSize
655buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
656        -- We really want to inline applications of build
657        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
658        -- Indeed, we should add a result_discount becuause build is
659        -- very like a constructor.  We don't bother to check that the
660        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
661        -- The "4" is rather arbitrary.
662
663augmentSize :: ExprSize
664augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
665        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
666        -- e plus ys. The -2 accounts for the \cn
667
668-- When we return a lambda, give a discount if it's used (applied)
669lamScrutDiscount :: ExprSize -> ExprSize
670lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
671lamScrutDiscount TooBig          = TooBig
672\end{code}
673
674Note [addAltSize result discounts]
675~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
676When adding the size of alternatives, we *add* the result discounts
677too, rather than take the *maximum*.  For a multi-branch case, this
678gives a discount for each branch that returns a constructor, making us
679keener to inline.  I did try using 'max' instead, but it makes nofib
680'rewrite' and 'puzzle' allocate significantly more, and didn't make
681binary sizes shrink significantly either.
682
683Note [Discounts and thresholds]
684~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
685Constants for discounts and thesholds are defined in main/StaticFlags,
686all of form opt_UF_xxxx.   They are:
687
688opt_UF_CreationThreshold (45)
689     At a definition site, if the unfolding is bigger than this, we
690     may discard it altogether
691
692opt_UF_UseThreshold (6)
693     At a call site, if the unfolding, less discounts, is smaller than
694     this, then it's small enough inline
695
696opt_UF_KeennessFactor (1.5)
697     Factor by which the discounts are multiplied before
698     subtracting from size
699
700opt_UF_DictDiscount (1)
701     The discount for each occurrence of a dictionary argument
702     as an argument of a class method.  Should be pretty small
703     else big functions may get inlined
704
705opt_UF_FunAppDiscount (6)
706     Discount for a function argument that is applied.  Quite
707     large, because if we inline we avoid the higher-order call.
708
709opt_UF_DearOp (4)
710     The size of a foreign call or not-dupable PrimOp
711
712
713Note [Function applications]
714~~~~~~~~~~~~~~~~~~~~~~~~~~~~
715In a function application (f a b)
716
717  - If 'f' is an argument to the function being analysed,
718    and there's at least one value arg, record a FunAppDiscount for f
719
720  - If the application if a PAP (arity > 2 in this example)
721    record a *result* discount (because inlining
722    with "extra" args in the call may mean that we now
723    get a saturated application)
724
725Code for manipulating sizes
726
727\begin{code}
728data ExprSize = TooBig
729              | SizeIs FastInt          -- Size found
730                       (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
731                       FastInt          -- Size to subtract if result is scrutinised
732                                        -- by a case expression
733
734instance Outputable ExprSize where
735  ppr TooBig         = ptext (sLit "TooBig")
736  ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
737
738-- subtract the discount before deciding whether to bale out. eg. we
739-- want to inline a large constructor application into a selector:
740--      tup = (a_1, ..., a_99)
741--      x = case tup of ...
742--
743mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
744mkSizeIs max n xs d | (n -# d) ># max = TooBig
745                    | otherwise       = SizeIs n xs d
746 
747maxSize :: ExprSize -> ExprSize -> ExprSize
748maxSize TooBig         _                                  = TooBig
749maxSize _              TooBig                             = TooBig
750maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
751                                              | otherwise = s2
752
753sizeZero :: ExprSize
754sizeN :: Int -> ExprSize
755
756sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
757sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
758\end{code}
759
760
761%************************************************************************
762%*                                                                      *
763\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
764%*                                                                      *
765%************************************************************************
766
767We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
768we ``couldn't possibly use'' on the other side.  Can be overridden w/
769flaggery.  Just the same as smallEnoughToInline, except that it has no
770actual arguments.
771
772\begin{code}
773couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
774couldBeSmallEnoughToInline threshold rhs
775  = case sizeExpr (iUnbox threshold) [] body of
776       TooBig -> False
777       _      -> True
778  where
779    (_, body) = collectBinders rhs
780
781----------------
782smallEnoughToInline :: Unfolding -> Bool
783smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
784  = size <= opt_UF_UseThreshold
785smallEnoughToInline _
786  = False
787
788----------------
789certainlyWillInline :: Unfolding -> Bool
790  -- Sees if the unfolding is pretty certain to inline 
791certainlyWillInline (CoreUnfolding { uf_arity = n_vals, uf_guidance = guidance })
792  = case guidance of
793      UnfNever      -> False
794      UnfWhen {}    -> True
795      UnfIfGoodArgs { ug_size = size} 
796                    -> n_vals > 0     -- See Note [certainlyWillInline: be caseful of thunks]
797                    && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
798
799certainlyWillInline _
800  = False
801\end{code}
802
803Note [certainlyWillInline: be caseful of thunks]
804~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
805Don't claim that thunks will certainly inline, because that risks work
806duplication.  Even if the work duplication is not great (eg is_cheap
807holds), it can make a big difference in an inner loop In Trac #5623 we
808found that the WorkWrap phase thought that
809       y = case x of F# v -> F# (v +# v)
810was certainlyWillInline, so the addition got duplicated. 
811
812
813%************************************************************************
814%*                                                                      *
815\subsection{callSiteInline}
816%*                                                                      *
817%************************************************************************
818
819This is the key function.  It decides whether to inline a variable at a call site
820
821callSiteInline is used at call sites, so it is a bit more generous.
822It's a very important function that embodies lots of heuristics.
823A non-WHNF can be inlined if it doesn't occur inside a lambda,
824and occurs exactly once or
825    occurs once in each branch of a case and is small
826
827If the thing is in WHNF, there's no danger of duplicating work,
828so we can inline if it occurs once, or is small
829
830NOTE: we don't want to inline top-level functions that always diverge.
831It just makes the code bigger.  Tt turns out that the convenient way to prevent
832them inlining is to give them a NOINLINE pragma, which we do in
833StrictAnal.addStrictnessInfoToTopId
834
835\begin{code}
836callSiteInline :: DynFlags
837               -> Id                    -- The Id
838               -> Bool                  -- True <=> unfolding is active
839               -> Bool                  -- True if there are are no arguments at all (incl type args)
840               -> [ArgSummary]          -- One for each value arg; True if it is interesting
841               -> CallCtxt              -- True <=> continuation is interesting
842               -> Maybe CoreExpr        -- Unfolding, if any
843
844instance Outputable ArgSummary where
845  ppr TrivArg    = ptext (sLit "TrivArg")
846  ppr NonTrivArg = ptext (sLit "NonTrivArg")
847  ppr ValueArg   = ptext (sLit "ValueArg")
848
849data CallCtxt = BoringCtxt
850
851              | ArgCtxt         -- We are somewhere in the argument of a function
852                        Bool    -- True  <=> we're somewhere in the RHS of function with rules
853                                -- False <=> we *are* the argument of a function with non-zero
854                                --           arg discount
855                                --        OR
856                                --           we *are* the RHS of a let  Note [RHS of lets]
857                                -- In both cases, be a little keener to inline
858
859              | ValAppCtxt      -- We're applied to at least one value arg
860                                -- This arises when we have ((f x |> co) y)
861                                -- Then the (f x) has argument 'x' but in a ValAppCtxt
862
863              | CaseCtxt        -- We're the scrutinee of a case
864                                -- that decomposes its scrutinee
865
866instance Outputable CallCtxt where
867  ppr BoringCtxt      = ptext (sLit "BoringCtxt")
868  ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
869  ppr CaseCtxt        = ptext (sLit "CaseCtxt")
870  ppr ValAppCtxt      = ptext (sLit "ValAppCtxt")
871
872callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
873  = case idUnfolding id of 
874      -- idUnfolding checks for loop-breakers, returning NoUnfolding
875      -- Things with an INLINE pragma may have an unfolding *and*
876      -- be a loop breaker  (maybe the knot is not yet untied)
877        CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
878                      , uf_is_work_free = is_wf, uf_arity = uf_arity
879                      , uf_guidance = guidance, uf_expandable = is_exp }
880          | active_unfolding -> tryUnfolding dflags id lone_variable
881                                    arg_infos cont_info unf_template is_top
882                                    is_wf is_exp uf_arity guidance
883          | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
884          -> pprTrace "Inactive unfolding:" (ppr id) Nothing
885          | otherwise -> Nothing
886        NoUnfolding      -> Nothing 
887        OtherCon {}      -> Nothing 
888        DFunUnfolding {} -> Nothing     -- Never unfold a DFun
889
890tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
891             -> CoreExpr -> Bool -> Bool -> Bool -> Arity -> UnfoldingGuidance
892             -> Maybe CoreExpr 
893tryUnfolding dflags id lone_variable
894             arg_infos cont_info unf_template is_top
895             is_wf is_exp uf_arity guidance
896                        -- uf_arity will typically be equal to (idArity id),
897                        -- but may be less for InlineRules
898 | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
899 = pprTrace ("Considering inlining: " ++ showSDocDump (ppr id))
900                 (vcat [text "arg infos" <+> ppr arg_infos,
901                        text "uf arity" <+> ppr uf_arity,
902                        text "interesting continuation" <+> ppr cont_info,
903                        text "some_benefit" <+> ppr some_benefit,
904                        text "is exp:" <+> ppr is_exp,
905                        text "is work-free:" <+> ppr is_wf,
906                        text "guidance" <+> ppr guidance,
907                        extra_doc,
908                        text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
909                 result
910  | otherwise  = result
911
912  where
913    n_val_args = length arg_infos
914    saturated  = n_val_args >= uf_arity
915
916    result | yes_or_no = Just unf_template
917           | otherwise = Nothing
918
919    interesting_args = any nonTriv arg_infos
920        -- NB: (any nonTriv arg_infos) looks at the
921        -- over-saturated args too which is "wrong";
922        -- but if over-saturated we inline anyway.
923
924           -- some_benefit is used when the RHS is small enough
925           -- and the call has enough (or too many) value
926           -- arguments (ie n_val_args >= arity). But there must
927           -- be *something* interesting about some argument, or the
928           -- result context, to make it worth inlining
929    some_benefit
930       | not saturated = interesting_args       -- Under-saturated
931                                        -- Note [Unsaturated applications]
932       | n_val_args > uf_arity = True   -- Over-saturated
933       | otherwise = interesting_args   -- Saturated
934                  || interesting_saturated_call
935
936    interesting_saturated_call
937      = case cont_info of
938          BoringCtxt -> not is_top && uf_arity > 0        -- Note [Nested functions]
939          CaseCtxt   -> not (lone_variable && is_wf)      -- Note [Lone variables]
940          ArgCtxt {} -> uf_arity > 0                      -- Note [Inlining in ArgCtxt]
941          ValAppCtxt -> True                              -- Note [Cast then apply]
942
943    (yes_or_no, extra_doc)
944      = case guidance of
945          UnfNever -> (False, empty)
946
947          UnfWhen unsat_ok boring_ok
948             -> (enough_args && (boring_ok || some_benefit), empty )
949             where      -- See Note [INLINE for small functions (3)]
950               enough_args = saturated || (unsat_ok && n_val_args > 0)
951
952          UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
953             -> ( is_wf && some_benefit && small_enough
954                , (text "discounted size =" <+> int discounted_size) )
955             where
956               discounted_size = size - discount
957               small_enough = discounted_size <= opt_UF_UseThreshold
958               discount = computeDiscount uf_arity arg_discounts
959                                          res_discount arg_infos cont_info
960\end{code}
961
962Note [RHS of lets]
963~~~~~~~~~~~~~~~~~~
964Be a tiny bit keener to inline in the RHS of a let, because that might
965lead to good thing later
966     f y = (y,y,y)
967     g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
968We'd inline 'f' if the call was in a case context, and it kind-of-is,
969only we can't see it.  So we treat the RHS of a let as not-totally-boring.
970   
971Note [Unsaturated applications]
972~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
973When a call is not saturated, we *still* inline if one of the
974arguments has interesting structure.  That's sometimes very important.
975A good example is the Ord instance for Bool in Base:
976
977 Rec {
978    $fOrdBool =GHC.Classes.D:Ord
979                 @ Bool
980                 ...
981                 $cmin_ajX
982
983    $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
984    $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
985  }
986
987But the defn of GHC.Classes.$dmmin is:
988
989  $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
990    {- Arity: 3, HasNoCafRefs, Strictness: SLL,
991       Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
992                   case @ a GHC.Classes.<= @ a $dOrd x y of wild {
993                     GHC.Types.False -> y GHC.Types.True -> x }) -}
994
995We *really* want to inline $dmmin, even though it has arity 3, in
996order to unravel the recursion.
997
998
999Note [Things to watch]
1000~~~~~~~~~~~~~~~~~~~~~~
1001*   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
1002    Assume x is exported, so not inlined unconditionally.
1003    Then we want x to inline unconditionally; no reason for it
1004    not to, and doing so avoids an indirection.
1005
1006*   { x = I# 3; ....f x.... }
1007    Make sure that x does not inline unconditionally! 
1008    Lest we get extra allocation.
1009
1010Note [Inlining an InlineRule]
1011~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1012An InlineRules is used for
1013  (a) programmer INLINE pragmas
1014  (b) inlinings from worker/wrapper
1015
1016For (a) the RHS may be large, and our contract is that we *only* inline
1017when the function is applied to all the arguments on the LHS of the
1018source-code defn.  (The uf_arity in the rule.)
1019
1020However for worker/wrapper it may be worth inlining even if the
1021arity is not satisfied (as we do in the CoreUnfolding case) so we don't
1022require saturation.
1023
1024
1025Note [Nested functions]
1026~~~~~~~~~~~~~~~~~~~~~~~
1027If a function has a nested defn we also record some-benefit, on the
1028grounds that we are often able to eliminate the binding, and hence the
1029allocation, for the function altogether; this is good for join points.
1030But this only makes sense for *functions*; inlining a constructor
1031doesn't help allocation unless the result is scrutinised.  UNLESS the
1032constructor occurs just once, albeit possibly in multiple case
1033branches.  Then inlining it doesn't increase allocation, but it does
1034increase the chance that the constructor won't be allocated at all in
1035the branches that don't use it.
1036
1037Note [Cast then apply]
1038~~~~~~~~~~~~~~~~~~~~~~
1039Consider
1040   myIndex = __inline_me ( (/\a. <blah>) |> co )
1041   co :: (forall a. a -> a) ~ (forall a. T a)
1042     ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
1043
1044We need to inline myIndex to unravel this; but the actual call (myIndex a) has
1045no value arguments.  The ValAppCtxt gives it enough incentive to inline.
1046
1047Note [Inlining in ArgCtxt]
1048~~~~~~~~~~~~~~~~~~~~~~~~~~
1049The condition (arity > 0) here is very important, because otherwise
1050we end up inlining top-level stuff into useless places; eg
1051   x = I# 3#
1052   f = \y.  g x
1053This can make a very big difference: it adds 16% to nofib 'integer' allocs,
1054and 20% to 'power'.
1055
1056At one stage I replaced this condition by 'True' (leading to the above
1057slow-down).  The motivation was test eyeball/inline1.hs; but that seems
1058to work ok now.
1059
1060NOTE: arguably, we should inline in ArgCtxt only if the result of the
1061call is at least CONLIKE.  At least for the cases where we use ArgCtxt
1062for the RHS of a 'let', we only profit from the inlining if we get a
1063CONLIKE thing (modulo lets).
1064
1065Note [Lone variables]   See also Note [Interaction of exprIsWorkFree and lone variables]
1066~~~~~~~~~~~~~~~~~~~~~   which appears below
1067The "lone-variable" case is important.  I spent ages messing about
1068with unsatisfactory varaints, but this is nice.  The idea is that if a
1069variable appears all alone
1070
1071        as an arg of lazy fn, or rhs    BoringCtxt
1072        as scrutinee of a case          CaseCtxt
1073        as arg of a fn                  ArgCtxt
1074AND
1075        it is bound to a cheap expression
1076
1077then we should not inline it (unless there is some other reason,
1078e.g. is is the sole occurrence).  That is what is happening at
1079the use of 'lone_variable' in 'interesting_saturated_call'.
1080
1081Why?  At least in the case-scrutinee situation, turning
1082        let x = (a,b) in case x of y -> ...
1083into
1084        let x = (a,b) in case (a,b) of y -> ...
1085and thence to
1086        let x = (a,b) in let y = (a,b) in ...
1087is bad if the binding for x will remain.
1088
1089Another example: I discovered that strings
1090were getting inlined straight back into applications of 'error'
1091because the latter is strict.
1092        s = "foo"
1093        f = \x -> ...(error s)...
1094
1095Fundamentally such contexts should not encourage inlining because the
1096context can ``see'' the unfolding of the variable (e.g. case or a
1097RULE) so there's no gain.  If the thing is bound to a value.
1098
1099However, watch out:
1100
1101 * Consider this:
1102        foo = _inline_ (\n. [n])
1103        bar = _inline_ (foo 20)
1104        baz = \n. case bar of { (m:_) -> m + n }
1105   Here we really want to inline 'bar' so that we can inline 'foo'
1106   and the whole thing unravels as it should obviously do.  This is
1107   important: in the NDP project, 'bar' generates a closure data
1108   structure rather than a list.
1109
1110   So the non-inlining of lone_variables should only apply if the
1111   unfolding is regarded as cheap; because that is when exprIsConApp_maybe
1112   looks through the unfolding.  Hence the "&& is_wf" in the
1113   InlineRule branch.
1114
1115 * Even a type application or coercion isn't a lone variable.
1116   Consider
1117        case $fMonadST @ RealWorld of { :DMonad a b c -> c }
1118   We had better inline that sucker!  The case won't see through it.
1119
1120   For now, I'm treating treating a variable applied to types
1121   in a *lazy* context "lone". The motivating example was
1122        f = /\a. \x. BIG
1123        g = /\a. \y.  h (f a)
1124   There's no advantage in inlining f here, and perhaps
1125   a significant disadvantage.  Hence some_val_args in the Stop case
1126
1127Note [Interaction of exprIsWorkFree and lone variables]
1128~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1129The lone-variable test says "don't inline if a case expression
1130scrutines a lone variable whose unfolding is cheap".  It's very
1131important that, under these circumstances, exprIsConApp_maybe
1132can spot a constructor application. So, for example, we don't
1133consider
1134        let x = e in (x,x)
1135to be cheap, and that's good because exprIsConApp_maybe doesn't
1136think that expression is a constructor application.
1137
1138In the 'not (lone_variable && is_wf)' test, I used to test is_value
1139rather than is_wf, which was utterly wrong, because the above
1140expression responds True to exprIsHNF, which is what sets is_value.
1141
1142This kind of thing can occur if you have
1143
1144        {-# INLINE foo #-}
1145        foo = let x = e in (x,x)
1146
1147which Roman did.
1148
1149\begin{code}
1150computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
1151computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
1152        -- We multiple the raw discounts (args_discount and result_discount)
1153        -- ty opt_UnfoldingKeenessFactor because the former have to do with
1154        --  *size* whereas the discounts imply that there's some extra
1155        --  *efficiency* to be gained (e.g. beta reductions, case reductions)
1156        -- by inlining.
1157
1158  = 10          -- Discount of 1 because the result replaces the call
1159                -- so we count 1 for the function itself
1160
1161    + 10 * length (take n_vals_wanted arg_infos)
1162               -- Discount of (un-scaled) 1 for each arg supplied,
1163               -- because the result replaces the call
1164
1165    + round (opt_UF_KeenessFactor * 
1166             fromIntegral (arg_discount + res_discount'))
1167  where
1168    arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
1169
1170    mk_arg_discount _        TrivArg    = 0 
1171    mk_arg_discount _        NonTrivArg = 10
1172    mk_arg_discount discount ValueArg   = discount
1173
1174    res_discount' = case cont_info of
1175                        BoringCtxt  -> 0
1176                        CaseCtxt    -> res_discount
1177                        _other      -> 40 `min` res_discount
1178                -- res_discount can be very large when a function returns
1179                -- constructors; but we only want to invoke that large discount
1180                -- when there's a case continuation.
1181                -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
1182                -- But we want to aovid inlining large functions that return
1183                -- constructors into contexts that are simply "interesting"
1184\end{code}
1185
1186%************************************************************************
1187%*                                                                      *
1188        Interesting arguments
1189%*                                                                      *
1190%************************************************************************
1191
1192Note [Interesting arguments]
1193~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1194An argument is interesting if it deserves a discount for unfoldings
1195with a discount in that argument position.  The idea is to avoid
1196unfolding a function that is applied only to variables that have no
1197unfolding (i.e. they are probably lambda bound): f x y z There is
1198little point in inlining f here.
1199
1200Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
1201we must look through lets, eg (let x = e in C a b), because the let will
1202float, exposing the value, if we inline.  That makes it different to
1203exprIsHNF.
1204
1205Before 2009 we said it was interesting if the argument had *any* structure
1206at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
1207
1208But we don't regard (f x y) as interesting, unless f is unsaturated.
1209If it's saturated and f hasn't inlined, then it's probably not going
1210to now!
1211
1212Note [Conlike is interesting]
1213~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1214Consider
1215        f d = ...((*) d x y)...
1216        ... f (df d')...
1217where df is con-like. Then we'd really like to inline 'f' so that the
1218rule for (*) (df d) can fire.  To do this
1219  a) we give a discount for being an argument of a class-op (eg (*) d)
1220  b) we say that a con-like argument (eg (df d)) is interesting
1221
1222\begin{code}
1223data ArgSummary = TrivArg       -- Nothing interesting
1224                | NonTrivArg    -- Arg has structure
1225                | ValueArg      -- Arg is a con-app or PAP
1226                                -- ..or con-like. Note [Conlike is interesting]
1227
1228interestingArg :: CoreExpr -> ArgSummary
1229-- See Note [Interesting arguments]
1230interestingArg e = go e 0
1231  where
1232    -- n is # value args to which the expression is applied
1233    go (Lit {}) _          = ValueArg
1234    go (Var v)  n
1235       | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
1236                                        --    data constructors here
1237       | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
1238       | n > 0             = NonTrivArg -- Saturated or unknown call
1239       | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
1240                                        -- See Note [Conlike is interesting]
1241       | otherwise         = TrivArg    -- n==0, no useful unfolding
1242       where
1243         conlike_unfolding = isConLikeUnfolding (idUnfolding v)
1244
1245    go (Type _)          _ = TrivArg
1246    go (Coercion _)      _ = TrivArg
1247    go (App fn (Type _)) n = go fn n
1248    go (App fn (Coercion _)) n = go fn n
1249    go (App fn _)        n = go fn (n+1)
1250    go (Tick _ a)      n = go a n
1251    go (Cast e _)        n = go e n
1252    go (Lam v e)         n
1253       | isTyVar v         = go e n
1254       | n>0               = go e (n-1)
1255       | otherwise         = ValueArg
1256    go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
1257    go (Case {})         _ = NonTrivArg
1258
1259nonTriv ::  ArgSummary -> Bool
1260nonTriv TrivArg = False
1261nonTriv _       = True
1262\end{code}
Note: See TracBrowser for help on using the browser.