| 1 | % |
|---|
| 2 | % (c) The University of Glasgow 2006 |
|---|
| 3 | % (c) The AQUA Project, Glasgow University, 1994-1998 |
|---|
| 4 | % |
|---|
| 5 | |
|---|
| 6 | Core-syntax unfoldings |
|---|
| 7 | |
|---|
| 8 | Unfoldings (which can travel across module boundaries) are in Core |
|---|
| 9 | syntax (namely @CoreExpr@s). |
|---|
| 10 | |
|---|
| 11 | The type @Unfolding@ sits ``above'' simply-Core-expressions |
|---|
| 12 | unfoldings, capturing ``higher-level'' things we know about a binding, |
|---|
| 13 | usually things that the simplifier found out (e.g., ``it's a |
|---|
| 14 | literal''). In the corner of a @CoreUnfolding@ unfolding, you will |
|---|
| 15 | find, unsurprisingly, a Core expression. |
|---|
| 16 | |
|---|
| 17 | \begin{code} |
|---|
| 18 | {-# OPTIONS -fno-warn-tabs #-} |
|---|
| 19 | -- The above warning supression flag is a temporary kludge. |
|---|
| 20 | -- While working on this module you are encouraged to remove it and |
|---|
| 21 | -- detab the module (please do the detabbing in a separate patch). See |
|---|
| 22 | -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces |
|---|
| 23 | -- for details |
|---|
| 24 | |
|---|
| 25 | module CoreUnfold ( |
|---|
| 26 | Unfolding, UnfoldingGuidance, -- Abstract types |
|---|
| 27 | |
|---|
| 28 | noUnfolding, mkImplicitUnfolding, |
|---|
| 29 | mkUnfolding, mkCoreUnfolding, |
|---|
| 30 | mkTopUnfolding, mkSimpleUnfolding, |
|---|
| 31 | mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule, |
|---|
| 32 | mkCompulsoryUnfolding, mkDFunUnfolding, |
|---|
| 33 | |
|---|
| 34 | interestingArg, ArgSummary(..), |
|---|
| 35 | |
|---|
| 36 | couldBeSmallEnoughToInline, inlineBoringOk, |
|---|
| 37 | certainlyWillInline, smallEnoughToInline, |
|---|
| 38 | |
|---|
| 39 | callSiteInline, CallCtxt(..), |
|---|
| 40 | |
|---|
| 41 | -- Reexport from CoreSubst (it only live there so it can be used |
|---|
| 42 | -- by the Very Simple Optimiser) |
|---|
| 43 | exprIsConApp_maybe, exprIsLiteral_maybe |
|---|
| 44 | ) where |
|---|
| 45 | |
|---|
| 46 | #include "HsVersions.h" |
|---|
| 47 | |
|---|
| 48 | import StaticFlags |
|---|
| 49 | import DynFlags |
|---|
| 50 | import CoreSyn |
|---|
| 51 | import PprCore () -- Instances |
|---|
| 52 | import TcType ( tcSplitDFunTy ) |
|---|
| 53 | import OccurAnal ( occurAnalyseExpr ) |
|---|
| 54 | import CoreSubst hiding( substTy ) |
|---|
| 55 | import CoreArity ( manifestArity, exprBotStrictness_maybe ) |
|---|
| 56 | import CoreUtils |
|---|
| 57 | import Id |
|---|
| 58 | import DataCon |
|---|
| 59 | import Literal |
|---|
| 60 | import PrimOp |
|---|
| 61 | import IdInfo |
|---|
| 62 | import BasicTypes ( Arity ) |
|---|
| 63 | import Type |
|---|
| 64 | import PrelNames |
|---|
| 65 | import Bag |
|---|
| 66 | import Util |
|---|
| 67 | import FastTypes |
|---|
| 68 | import FastString |
|---|
| 69 | import Outputable |
|---|
| 70 | import ForeignCall |
|---|
| 71 | |
|---|
| 72 | import Data.Maybe |
|---|
| 73 | \end{code} |
|---|
| 74 | |
|---|
| 75 | |
|---|
| 76 | %************************************************************************ |
|---|
| 77 | %* * |
|---|
| 78 | \subsection{Making unfoldings} |
|---|
| 79 | %* * |
|---|
| 80 | %************************************************************************ |
|---|
| 81 | |
|---|
| 82 | \begin{code} |
|---|
| 83 | mkTopUnfolding :: Bool -> CoreExpr -> Unfolding |
|---|
| 84 | mkTopUnfolding = mkUnfolding InlineRhs True {- Top level -} |
|---|
| 85 | |
|---|
| 86 | mkImplicitUnfolding :: CoreExpr -> Unfolding |
|---|
| 87 | -- For implicit Ids, do a tiny bit of optimising first |
|---|
| 88 | mkImplicitUnfolding expr = mkTopUnfolding False (simpleOptExpr expr) |
|---|
| 89 | |
|---|
| 90 | -- Note [Top-level flag on inline rules] |
|---|
| 91 | -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 92 | -- Slight hack: note that mk_inline_rules conservatively sets the |
|---|
| 93 | -- top-level flag to True. It gets set more accurately by the simplifier |
|---|
| 94 | -- Simplify.simplUnfolding. |
|---|
| 95 | |
|---|
| 96 | mkSimpleUnfolding :: CoreExpr -> Unfolding |
|---|
| 97 | mkSimpleUnfolding = mkUnfolding InlineRhs False False |
|---|
| 98 | |
|---|
| 99 | mkDFunUnfolding :: Type -> [CoreExpr] -> Unfolding |
|---|
| 100 | mkDFunUnfolding dfun_ty ops |
|---|
| 101 | = DFunUnfolding dfun_nargs data_con ops |
|---|
| 102 | where |
|---|
| 103 | (tvs, n_theta, cls, _) = tcSplitDFunTy dfun_ty |
|---|
| 104 | dfun_nargs = length tvs + n_theta |
|---|
| 105 | data_con = classDataCon cls |
|---|
| 106 | |
|---|
| 107 | mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding |
|---|
| 108 | mkWwInlineRule id expr arity |
|---|
| 109 | = mkCoreUnfolding (InlineWrapper id) True |
|---|
| 110 | (simpleOptExpr expr) arity |
|---|
| 111 | (UnfWhen unSaturatedOk boringCxtNotOk) |
|---|
| 112 | |
|---|
| 113 | mkCompulsoryUnfolding :: CoreExpr -> Unfolding |
|---|
| 114 | mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded |
|---|
| 115 | = mkCoreUnfolding InlineCompulsory True |
|---|
| 116 | (simpleOptExpr expr) 0 -- Arity of unfolding doesn't matter |
|---|
| 117 | (UnfWhen unSaturatedOk boringCxtOk) |
|---|
| 118 | |
|---|
| 119 | mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding |
|---|
| 120 | mkInlineUnfolding mb_arity expr |
|---|
| 121 | = mkCoreUnfolding InlineStable |
|---|
| 122 | True -- Note [Top-level flag on inline rules] |
|---|
| 123 | expr' arity |
|---|
| 124 | (UnfWhen unsat_ok boring_ok) |
|---|
| 125 | where |
|---|
| 126 | expr' = simpleOptExpr expr |
|---|
| 127 | (unsat_ok, arity) = case mb_arity of |
|---|
| 128 | Nothing -> (unSaturatedOk, manifestArity expr') |
|---|
| 129 | Just ar -> (needSaturated, ar) |
|---|
| 130 | |
|---|
| 131 | boring_ok = inlineBoringOk expr' |
|---|
| 132 | |
|---|
| 133 | mkInlinableUnfolding :: CoreExpr -> Unfolding |
|---|
| 134 | mkInlinableUnfolding expr |
|---|
| 135 | = mkUnfolding InlineStable True is_bot expr' |
|---|
| 136 | where |
|---|
| 137 | expr' = simpleOptExpr expr |
|---|
| 138 | is_bot = isJust (exprBotStrictness_maybe expr') |
|---|
| 139 | \end{code} |
|---|
| 140 | |
|---|
| 141 | Internal functions |
|---|
| 142 | |
|---|
| 143 | \begin{code} |
|---|
| 144 | mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr |
|---|
| 145 | -> Arity -> UnfoldingGuidance -> Unfolding |
|---|
| 146 | -- Occurrence-analyses the expression before capturing it |
|---|
| 147 | mkCoreUnfolding src top_lvl expr arity guidance |
|---|
| 148 | = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, |
|---|
| 149 | uf_src = src, |
|---|
| 150 | uf_arity = arity, |
|---|
| 151 | uf_is_top = top_lvl, |
|---|
| 152 | uf_is_value = exprIsHNF expr, |
|---|
| 153 | uf_is_conlike = exprIsConLike expr, |
|---|
| 154 | uf_is_work_free = exprIsWorkFree expr, |
|---|
| 155 | uf_expandable = exprIsExpandable expr, |
|---|
| 156 | uf_guidance = guidance } |
|---|
| 157 | |
|---|
| 158 | mkUnfolding :: UnfoldingSource -> Bool -> Bool -> CoreExpr -> Unfolding |
|---|
| 159 | -- Calculates unfolding guidance |
|---|
| 160 | -- Occurrence-analyses the expression before capturing it |
|---|
| 161 | mkUnfolding src top_lvl is_bottoming expr |
|---|
| 162 | | top_lvl && is_bottoming |
|---|
| 163 | , not (exprIsTrivial expr) |
|---|
| 164 | = NoUnfolding -- See Note [Do not inline top-level bottoming functions] |
|---|
| 165 | | otherwise |
|---|
| 166 | = CoreUnfolding { uf_tmpl = occ_anald_expr, |
|---|
| 167 | uf_src = src, |
|---|
| 168 | uf_arity = arity, |
|---|
| 169 | uf_is_top = top_lvl, |
|---|
| 170 | uf_is_value = exprIsHNF expr, |
|---|
| 171 | uf_is_conlike = exprIsConLike expr, |
|---|
| 172 | uf_expandable = exprIsExpandable expr, |
|---|
| 173 | uf_is_work_free = exprIsWorkFree expr, |
|---|
| 174 | uf_guidance = guidance } |
|---|
| 175 | where |
|---|
| 176 | occ_anald_expr = occurAnalyseExpr expr |
|---|
| 177 | (arity, guidance) = calcUnfoldingGuidance occ_anald_expr |
|---|
| 178 | -- Sometimes during simplification, there's a large let-bound thing |
|---|
| 179 | -- which has been substituted, and so is now dead; so 'expr' contains |
|---|
| 180 | -- two copies of the thing while the occurrence-analysed expression doesn't |
|---|
| 181 | -- Nevertheless, we *don't* occ-analyse before computing the size because the |
|---|
| 182 | -- size computation bales out after a while, whereas occurrence analysis does not. |
|---|
| 183 | -- |
|---|
| 184 | -- This can occasionally mean that the guidance is very pessimistic; |
|---|
| 185 | -- it gets fixed up next round. And it should be rare, because large |
|---|
| 186 | -- let-bound things that are dead are usually caught by preInlineUnconditionally |
|---|
| 187 | \end{code} |
|---|
| 188 | |
|---|
| 189 | %************************************************************************ |
|---|
| 190 | %* * |
|---|
| 191 | \subsection{The UnfoldingGuidance type} |
|---|
| 192 | %* * |
|---|
| 193 | %************************************************************************ |
|---|
| 194 | |
|---|
| 195 | \begin{code} |
|---|
| 196 | inlineBoringOk :: CoreExpr -> Bool |
|---|
| 197 | -- See Note [INLINE for small functions] |
|---|
| 198 | -- True => the result of inlining the expression is |
|---|
| 199 | -- no bigger than the expression itself |
|---|
| 200 | -- eg (\x y -> f y x) |
|---|
| 201 | -- This is a quick and dirty version. It doesn't attempt |
|---|
| 202 | -- to deal with (\x y z -> x (y z)) |
|---|
| 203 | -- The really important one is (x `cast` c) |
|---|
| 204 | inlineBoringOk e |
|---|
| 205 | = go 0 e |
|---|
| 206 | where |
|---|
| 207 | go :: Int -> CoreExpr -> Bool |
|---|
| 208 | go credit (Lam x e) | isId x = go (credit+1) e |
|---|
| 209 | | otherwise = go credit e |
|---|
| 210 | go credit (App f (Type {})) = go credit f |
|---|
| 211 | go credit (App f a) | credit > 0 |
|---|
| 212 | , exprIsTrivial a = go (credit-1) f |
|---|
| 213 | go credit (Tick _ e) = go credit e -- dubious |
|---|
| 214 | go credit (Cast e _) = go credit e |
|---|
| 215 | go _ (Var {}) = boringCxtOk |
|---|
| 216 | go _ _ = boringCxtNotOk |
|---|
| 217 | |
|---|
| 218 | calcUnfoldingGuidance |
|---|
| 219 | :: CoreExpr -- Expression to look at |
|---|
| 220 | -> (Arity, UnfoldingGuidance) |
|---|
| 221 | calcUnfoldingGuidance expr |
|---|
| 222 | = case collectBinders expr of { (bndrs, body) -> |
|---|
| 223 | let |
|---|
| 224 | bOMB_OUT_SIZE = opt_UF_CreationThreshold |
|---|
| 225 | -- Bomb out if size gets bigger than this |
|---|
| 226 | val_bndrs = filter isId bndrs |
|---|
| 227 | n_val_bndrs = length val_bndrs |
|---|
| 228 | |
|---|
| 229 | guidance |
|---|
| 230 | = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of |
|---|
| 231 | TooBig -> UnfNever |
|---|
| 232 | SizeIs size cased_bndrs scrut_discount |
|---|
| 233 | | uncondInline expr n_val_bndrs (iBox size) |
|---|
| 234 | -> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions] |
|---|
| 235 | | otherwise |
|---|
| 236 | -> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs |
|---|
| 237 | , ug_size = iBox size |
|---|
| 238 | , ug_res = iBox scrut_discount } |
|---|
| 239 | |
|---|
| 240 | discount cbs bndr |
|---|
| 241 | = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc) |
|---|
| 242 | 0 cbs |
|---|
| 243 | in |
|---|
| 244 | (n_val_bndrs, guidance) } |
|---|
| 245 | \end{code} |
|---|
| 246 | |
|---|
| 247 | Note [Computing the size of an expression] |
|---|
| 248 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 249 | The basic idea of sizeExpr is obvious enough: count nodes. But getting the |
|---|
| 250 | heuristics right has taken a long time. Here's the basic strategy: |
|---|
| 251 | |
|---|
| 252 | * Variables, literals: 0 |
|---|
| 253 | (Exception for string literals, see litSize.) |
|---|
| 254 | |
|---|
| 255 | * Function applications (f e1 .. en): 1 + #value args |
|---|
| 256 | |
|---|
| 257 | * Constructor applications: 1, regardless of #args |
|---|
| 258 | |
|---|
| 259 | * Let(rec): 1 + size of components |
|---|
| 260 | |
|---|
| 261 | * Note, cast: 0 |
|---|
| 262 | |
|---|
| 263 | Examples |
|---|
| 264 | |
|---|
| 265 | Size Term |
|---|
| 266 | -------------- |
|---|
| 267 | 0 42# |
|---|
| 268 | 0 x |
|---|
| 269 | 0 True |
|---|
| 270 | 2 f x |
|---|
| 271 | 1 Just x |
|---|
| 272 | 4 f (g x) |
|---|
| 273 | |
|---|
| 274 | Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's |
|---|
| 275 | a function call to account for. Notice also that constructor applications |
|---|
| 276 | are very cheap, because exposing them to a caller is so valuable. |
|---|
| 277 | |
|---|
| 278 | [25/5/11] All sizes are now multiplied by 10, except for primops |
|---|
| 279 | (which have sizes like 1 or 4. This makes primops look fantastically |
|---|
| 280 | cheap, and seems to be almost unversally beneficial. Done partly as a |
|---|
| 281 | result of #4978. |
|---|
| 282 | |
|---|
| 283 | Note [Do not inline top-level bottoming functions] |
|---|
| 284 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 285 | The FloatOut pass has gone to some trouble to float out calls to 'error' |
|---|
| 286 | and similar friends. See Note [Bottoming floats] in SetLevels. |
|---|
| 287 | Do not re-inline them! But we *do* still inline if they are very small |
|---|
| 288 | (the uncondInline stuff). |
|---|
| 289 | |
|---|
| 290 | Note [INLINE for small functions] |
|---|
| 291 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 292 | Consider {-# INLINE f #-} |
|---|
| 293 | f x = Just x |
|---|
| 294 | g y = f y |
|---|
| 295 | Then f's RHS is no larger than its LHS, so we should inline it into |
|---|
| 296 | even the most boring context. In general, f the function is |
|---|
| 297 | sufficiently small that its body is as small as the call itself, the |
|---|
| 298 | inline unconditionally, regardless of how boring the context is. |
|---|
| 299 | |
|---|
| 300 | Things to note: |
|---|
| 301 | |
|---|
| 302 | (1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr) |
|---|
| 303 | than the thing it's replacing. Notice that |
|---|
| 304 | (f x) --> (g 3) -- YES, unconditionally |
|---|
| 305 | (f x) --> x : [] -- YES, *even though* there are two |
|---|
| 306 | -- arguments to the cons |
|---|
| 307 | x --> g 3 -- NO |
|---|
| 308 | x --> Just v -- NO |
|---|
| 309 | |
|---|
| 310 | It's very important not to unconditionally replace a variable by |
|---|
| 311 | a non-atomic term. |
|---|
| 312 | |
|---|
| 313 | (2) We do this even if the thing isn't saturated, else we end up with the |
|---|
| 314 | silly situation that |
|---|
| 315 | f x y = x |
|---|
| 316 | ...map (f 3)... |
|---|
| 317 | doesn't inline. Even in a boring context, inlining without being |
|---|
| 318 | saturated will give a lambda instead of a PAP, and will be more |
|---|
| 319 | efficient at runtime. |
|---|
| 320 | |
|---|
| 321 | (3) However, when the function's arity > 0, we do insist that it |
|---|
| 322 | has at least one value argument at the call site. (This check is |
|---|
| 323 | made in the UnfWhen case of callSiteInline.) Otherwise we find this: |
|---|
| 324 | f = /\a \x:a. x |
|---|
| 325 | d = /\b. MkD (f b) |
|---|
| 326 | If we inline f here we get |
|---|
| 327 | d = /\b. MkD (\x:b. x) |
|---|
| 328 | and then prepareRhs floats out the argument, abstracting the type |
|---|
| 329 | variables, so we end up with the original again! |
|---|
| 330 | |
|---|
| 331 | (4) We must be much more cautious about arity-zero things. Consider |
|---|
| 332 | let x = y +# z in ... |
|---|
| 333 | In *size* terms primops look very small, because the generate a |
|---|
| 334 | single instruction, but we do not want to unconditionally replace |
|---|
| 335 | every occurrence of x with (y +# z). So we only do the |
|---|
| 336 | unconditional-inline thing for *trivial* expressions. |
|---|
| 337 | |
|---|
| 338 | NB: you might think that PostInlineUnconditionally would do this |
|---|
| 339 | but it doesn't fire for top-level things; see SimplUtils |
|---|
| 340 | Note [Top level and postInlineUnconditionally] |
|---|
| 341 | |
|---|
| 342 | \begin{code} |
|---|
| 343 | uncondInline :: CoreExpr -> Arity -> Int -> Bool |
|---|
| 344 | -- Inline unconditionally if there no size increase |
|---|
| 345 | -- Size of call is arity (+1 for the function) |
|---|
| 346 | -- See Note [INLINE for small functions] |
|---|
| 347 | uncondInline rhs arity size |
|---|
| 348 | | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1) |
|---|
| 349 | | otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4) |
|---|
| 350 | \end{code} |
|---|
| 351 | |
|---|
| 352 | |
|---|
| 353 | \begin{code} |
|---|
| 354 | sizeExpr :: FastInt -- Bomb out if it gets bigger than this |
|---|
| 355 | -> [Id] -- Arguments; we're interested in which of these |
|---|
| 356 | -- get case'd |
|---|
| 357 | -> CoreExpr |
|---|
| 358 | -> ExprSize |
|---|
| 359 | |
|---|
| 360 | -- Note [Computing the size of an expression] |
|---|
| 361 | |
|---|
| 362 | sizeExpr bOMB_OUT_SIZE top_args expr |
|---|
| 363 | = size_up expr |
|---|
| 364 | where |
|---|
| 365 | size_up (Cast e _) = size_up e |
|---|
| 366 | size_up (Tick _ e) = size_up e |
|---|
| 367 | size_up (Type _) = sizeZero -- Types cost nothing |
|---|
| 368 | size_up (Coercion _) = sizeZero |
|---|
| 369 | size_up (Lit lit) = sizeN (litSize lit) |
|---|
| 370 | size_up (Var f) = size_up_call f [] -- Make sure we get constructor |
|---|
| 371 | -- discounts even on nullary constructors |
|---|
| 372 | |
|---|
| 373 | size_up (App fun (Type _)) = size_up fun |
|---|
| 374 | size_up (App fun (Coercion _)) = size_up fun |
|---|
| 375 | size_up (App fun arg) = size_up arg `addSizeNSD` |
|---|
| 376 | size_up_app fun [arg] |
|---|
| 377 | |
|---|
| 378 | size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 10) |
|---|
| 379 | | otherwise = size_up e |
|---|
| 380 | |
|---|
| 381 | size_up (Let (NonRec binder rhs) body) |
|---|
| 382 | = size_up rhs `addSizeNSD` |
|---|
| 383 | size_up body `addSizeN` |
|---|
| 384 | (if isUnLiftedType (idType binder) then 0 else 10) |
|---|
| 385 | -- For the allocation |
|---|
| 386 | -- If the binder has an unlifted type there is no allocation |
|---|
| 387 | |
|---|
| 388 | size_up (Let (Rec pairs) body) |
|---|
| 389 | = foldr (addSizeNSD . size_up . snd) |
|---|
| 390 | (size_up body `addSizeN` (10 * length pairs)) -- (length pairs) for the allocation |
|---|
| 391 | pairs |
|---|
| 392 | |
|---|
| 393 | size_up (Case (Var v) _ _ alts) |
|---|
| 394 | | v `elem` top_args -- We are scrutinising an argument variable |
|---|
| 395 | = alts_size (foldr addAltSize sizeZero alt_sizes) |
|---|
| 396 | (foldr maxSize sizeZero alt_sizes) |
|---|
| 397 | -- Good to inline if an arg is scrutinised, because |
|---|
| 398 | -- that may eliminate allocation in the caller |
|---|
| 399 | -- And it eliminates the case itself |
|---|
| 400 | where |
|---|
| 401 | alt_sizes = map size_up_alt alts |
|---|
| 402 | |
|---|
| 403 | -- alts_size tries to compute a good discount for |
|---|
| 404 | -- the case when we are scrutinising an argument variable |
|---|
| 405 | alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives |
|---|
| 406 | (SizeIs max _ _) -- Size of biggest alternative |
|---|
| 407 | = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut |
|---|
| 408 | -- If the variable is known, we produce a discount that |
|---|
| 409 | -- will take us back to 'max', the size of the largest alternative |
|---|
| 410 | -- The 1+ is a little discount for reduced allocation in the caller |
|---|
| 411 | -- |
|---|
| 412 | -- Notice though, that we return tot_disc, the total discount from |
|---|
| 413 | -- all branches. I think that's right. |
|---|
| 414 | |
|---|
| 415 | alts_size tot_size _ = tot_size |
|---|
| 416 | |
|---|
| 417 | size_up (Case e _ _ alts) = size_up e `addSizeNSD` |
|---|
| 418 | foldr (addAltSize . size_up_alt) case_size alts |
|---|
| 419 | where |
|---|
| 420 | case_size |
|---|
| 421 | | is_inline_scrut e, not (lengthExceeds alts 1) = sizeN (-10) |
|---|
| 422 | | otherwise = sizeZero |
|---|
| 423 | -- Normally we don't charge for the case itself, but |
|---|
| 424 | -- we charge one per alternative (see size_up_alt, |
|---|
| 425 | -- below) to account for the cost of the info table |
|---|
| 426 | -- and comparisons. |
|---|
| 427 | -- |
|---|
| 428 | -- However, in certain cases (see is_inline_scrut |
|---|
| 429 | -- below), no code is generated for the case unless |
|---|
| 430 | -- there are multiple alts. In these cases we |
|---|
| 431 | -- subtract one, making the first alt free. |
|---|
| 432 | -- e.g. case x# +# y# of _ -> ... should cost 1 |
|---|
| 433 | -- case touch# x# of _ -> ... should cost 0 |
|---|
| 434 | -- (see #4978) |
|---|
| 435 | -- |
|---|
| 436 | -- I would like to not have the "not (lengthExceeds alts 1)" |
|---|
| 437 | -- condition above, but without that some programs got worse |
|---|
| 438 | -- (spectral/hartel/event and spectral/para). I don't fully |
|---|
| 439 | -- understand why. (SDM 24/5/11) |
|---|
| 440 | |
|---|
| 441 | -- unboxed variables, inline primops and unsafe foreign calls |
|---|
| 442 | -- are all "inline" things: |
|---|
| 443 | is_inline_scrut (Var v) = isUnLiftedType (idType v) |
|---|
| 444 | is_inline_scrut scrut |
|---|
| 445 | | (Var f, _) <- collectArgs scrut |
|---|
| 446 | = case idDetails f of |
|---|
| 447 | FCallId fc -> not (isSafeForeignCall fc) |
|---|
| 448 | PrimOpId op -> not (primOpOutOfLine op) |
|---|
| 449 | _other -> False |
|---|
| 450 | | otherwise |
|---|
| 451 | = False |
|---|
| 452 | |
|---|
| 453 | ------------ |
|---|
| 454 | -- size_up_app is used when there's ONE OR MORE value args |
|---|
| 455 | size_up_app (App fun arg) args |
|---|
| 456 | | isTyCoArg arg = size_up_app fun args |
|---|
| 457 | | otherwise = size_up arg `addSizeNSD` |
|---|
| 458 | size_up_app fun (arg:args) |
|---|
| 459 | size_up_app (Var fun) args = size_up_call fun args |
|---|
| 460 | size_up_app other args = size_up other `addSizeN` length args |
|---|
| 461 | |
|---|
| 462 | ------------ |
|---|
| 463 | size_up_call :: Id -> [CoreExpr] -> ExprSize |
|---|
| 464 | size_up_call fun val_args |
|---|
| 465 | = case idDetails fun of |
|---|
| 466 | FCallId _ -> sizeN (10 * (1 + length val_args)) |
|---|
| 467 | DataConWorkId dc -> conSize dc (length val_args) |
|---|
| 468 | PrimOpId op -> primOpSize op (length val_args) |
|---|
| 469 | ClassOpId _ -> classOpSize top_args val_args |
|---|
| 470 | _ -> funSize top_args fun (length val_args) |
|---|
| 471 | |
|---|
| 472 | ------------ |
|---|
| 473 | size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10 |
|---|
| 474 | -- Don't charge for args, so that wrappers look cheap |
|---|
| 475 | -- (See comments about wrappers with Case) |
|---|
| 476 | -- |
|---|
| 477 | -- IMPORATANT: *do* charge 1 for the alternative, else we |
|---|
| 478 | -- find that giant case nests are treated as practically free |
|---|
| 479 | -- A good example is Foreign.C.Error.errrnoToIOError |
|---|
| 480 | |
|---|
| 481 | ------------ |
|---|
| 482 | -- These addSize things have to be here because |
|---|
| 483 | -- I don't want to give them bOMB_OUT_SIZE as an argument |
|---|
| 484 | addSizeN TooBig _ = TooBig |
|---|
| 485 | addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d |
|---|
| 486 | |
|---|
| 487 | -- addAltSize is used to add the sizes of case alternatives |
|---|
| 488 | addAltSize TooBig _ = TooBig |
|---|
| 489 | addAltSize _ TooBig = TooBig |
|---|
| 490 | addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2) |
|---|
| 491 | = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) |
|---|
| 492 | (xs `unionBags` ys) |
|---|
| 493 | (d1 +# d2) -- Note [addAltSize result discounts] |
|---|
| 494 | |
|---|
| 495 | -- This variant ignores the result discount from its LEFT argument |
|---|
| 496 | -- It's used when the second argument isn't part of the result |
|---|
| 497 | addSizeNSD TooBig _ = TooBig |
|---|
| 498 | addSizeNSD _ TooBig = TooBig |
|---|
| 499 | addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2) |
|---|
| 500 | = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) |
|---|
| 501 | (xs `unionBags` ys) |
|---|
| 502 | d2 -- Ignore d1 |
|---|
| 503 | \end{code} |
|---|
| 504 | |
|---|
| 505 | |
|---|
| 506 | \begin{code} |
|---|
| 507 | -- | Finds a nominal size of a string literal. |
|---|
| 508 | litSize :: Literal -> Int |
|---|
| 509 | -- Used by CoreUnfold.sizeExpr |
|---|
| 510 | litSize (LitInteger {}) = 100 -- Note [Size of literal integers] |
|---|
| 511 | litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4) |
|---|
| 512 | -- If size could be 0 then @f "x"@ might be too small |
|---|
| 513 | -- [Sept03: make literal strings a bit bigger to avoid fruitless |
|---|
| 514 | -- duplication of little strings] |
|---|
| 515 | litSize _other = 0 -- Must match size of nullary constructors |
|---|
| 516 | -- Key point: if x |-> 4, then x must inline unconditionally |
|---|
| 517 | -- (eg via case binding) |
|---|
| 518 | |
|---|
| 519 | classOpSize :: [Id] -> [CoreExpr] -> ExprSize |
|---|
| 520 | -- See Note [Conlike is interesting] |
|---|
| 521 | classOpSize _ [] |
|---|
| 522 | = sizeZero |
|---|
| 523 | classOpSize top_args (arg1 : other_args) |
|---|
| 524 | = SizeIs (iUnbox size) arg_discount (_ILIT(0)) |
|---|
| 525 | where |
|---|
| 526 | size = 20 + (10 * length other_args) |
|---|
| 527 | -- If the class op is scrutinising a lambda bound dictionary then |
|---|
| 528 | -- give it a discount, to encourage the inlining of this function |
|---|
| 529 | -- The actual discount is rather arbitrarily chosen |
|---|
| 530 | arg_discount = case arg1 of |
|---|
| 531 | Var dict | dict `elem` top_args |
|---|
| 532 | -> unitBag (dict, opt_UF_DictDiscount) |
|---|
| 533 | _other -> emptyBag |
|---|
| 534 | |
|---|
| 535 | funSize :: [Id] -> Id -> Int -> ExprSize |
|---|
| 536 | -- Size for functions that are not constructors or primops |
|---|
| 537 | -- Note [Function applications] |
|---|
| 538 | funSize top_args fun n_val_args |
|---|
| 539 | | fun `hasKey` buildIdKey = buildSize |
|---|
| 540 | | fun `hasKey` augmentIdKey = augmentSize |
|---|
| 541 | | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount) |
|---|
| 542 | where |
|---|
| 543 | some_val_args = n_val_args > 0 |
|---|
| 544 | |
|---|
| 545 | size | some_val_args = 10 * (1 + n_val_args) |
|---|
| 546 | | otherwise = 0 |
|---|
| 547 | -- The 1+ is for the function itself |
|---|
| 548 | -- Add 1 for each non-trivial arg; |
|---|
| 549 | -- the allocation cost, as in let(rec) |
|---|
| 550 | |
|---|
| 551 | -- DISCOUNTS |
|---|
| 552 | -- See Note [Function application discounts] |
|---|
| 553 | arg_discount | some_val_args && one_call fun top_args |
|---|
| 554 | = unitBag (fun, opt_UF_FunAppDiscount) |
|---|
| 555 | | otherwise = emptyBag |
|---|
| 556 | -- If the function is an argument and is applied |
|---|
| 557 | -- to some values, give it an arg-discount |
|---|
| 558 | |
|---|
| 559 | res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount |
|---|
| 560 | | otherwise = 0 |
|---|
| 561 | -- If the function is partially applied, show a result discount |
|---|
| 562 | |
|---|
| 563 | one_call _ [] = False |
|---|
| 564 | one_call fun (arg:args) | fun==arg = case idOccInfo arg of |
|---|
| 565 | OneOcc _ one_branch _ -> one_branch |
|---|
| 566 | _ -> False |
|---|
| 567 | | otherwise = one_call fun args |
|---|
| 568 | |
|---|
| 569 | conSize :: DataCon -> Int -> ExprSize |
|---|
| 570 | conSize dc n_val_args |
|---|
| 571 | | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10)) -- Like variables |
|---|
| 572 | |
|---|
| 573 | -- See Note [Unboxed tuple size and result discount] |
|---|
| 574 | | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args))) |
|---|
| 575 | |
|---|
| 576 | -- See Note [Constructor size and result discount] |
|---|
| 577 | | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (1 + n_val_args))) |
|---|
| 578 | \end{code} |
|---|
| 579 | |
|---|
| 580 | Note [Constructor size and result discount] |
|---|
| 581 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 582 | Treat a constructors application as size 10, regardless of how many |
|---|
| 583 | arguments it has; we are keen to expose them (and we charge separately |
|---|
| 584 | for their args). We can't treat them as size zero, else we find that |
|---|
| 585 | (Just x) has size 0, which is the same as a lone variable; and hence |
|---|
| 586 | 'v' will always be replaced by (Just x), where v is bound to Just x. |
|---|
| 587 | |
|---|
| 588 | The "result discount" is applied if the result of the call is |
|---|
| 589 | scrutinised (say by a case). For a constructor application that will |
|---|
| 590 | mean the constructor application will disappear, so we don't need to |
|---|
| 591 | charge it to the function. So the discount should at least match the |
|---|
| 592 | cost of the constructor application, namely 10. But to give a bit |
|---|
| 593 | of extra incentive we give a discount of 10*(1 + n_val_args). |
|---|
| 594 | |
|---|
| 595 | Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)), |
|---|
| 596 | and said it was an "unambiguous win", but its terribly dangerous |
|---|
| 597 | because a fuction with many many case branches, each finishing with |
|---|
| 598 | a constructor, can have an arbitrarily large discount. This led to |
|---|
| 599 | terrible code bloat: see Trac #6099. |
|---|
| 600 | |
|---|
| 601 | Note [Unboxed tuple size and result discount] |
|---|
| 602 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 603 | However, unboxed tuples count as size zero. I found occasions where we had |
|---|
| 604 | f x y z = case op# x y z of { s -> (# s, () #) } |
|---|
| 605 | and f wasn't getting inlined. |
|---|
| 606 | |
|---|
| 607 | I tried giving unboxed tuples a *result discount* of zero (see the |
|---|
| 608 | commented-out line). Why? When returned as a result they do not |
|---|
| 609 | allocate, so maybe we don't want to charge so much for them If you |
|---|
| 610 | have a non-zero discount here, we find that workers often get inlined |
|---|
| 611 | back into wrappers, because it look like |
|---|
| 612 | f x = case $wf x of (# a,b #) -> (a,b) |
|---|
| 613 | and we are keener because of the case. However while this change |
|---|
| 614 | shrank binary sizes by 0.5% it also made spectral/boyer allocate 5% |
|---|
| 615 | more. All other changes were very small. So it's not a big deal but I |
|---|
| 616 | didn't adopt the idea. |
|---|
| 617 | |
|---|
| 618 | Note [Function application discount] |
|---|
| 619 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 620 | We want a discount if the function is applied. A good example is |
|---|
| 621 | monadic combinators with continuation arguments, where inlining is |
|---|
| 622 | quite important. |
|---|
| 623 | |
|---|
| 624 | But we don't want a big discount when a function is called many times |
|---|
| 625 | (see the detailed comments with Trac #6048) because if the function is |
|---|
| 626 | big it won't be inlined at its many call sites and no benefit results. |
|---|
| 627 | Indeed, we can get exponentially big inlinings this way; that is what |
|---|
| 628 | Trac #6048 is about. |
|---|
| 629 | |
|---|
| 630 | So, we only give a function-application discount when the function appears |
|---|
| 631 | textually once, albeit possibly inside a lambda. |
|---|
| 632 | |
|---|
| 633 | Note [Literal integer size] |
|---|
| 634 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 635 | Literal integers *can* be big (mkInteger [...coefficients...]), but |
|---|
| 636 | need not be (S# n). We just use an aribitrary big-ish constant here |
|---|
| 637 | so that, in particular, we don't inline top-level defns like |
|---|
| 638 | n = S# 5 |
|---|
| 639 | There's no point in doing so -- any optimsations will see the S# |
|---|
| 640 | through n's unfolding. Nor will a big size inhibit unfoldings functions |
|---|
| 641 | that mention a literal Integer, because the float-out pass will float |
|---|
| 642 | all those constants to top level. |
|---|
| 643 | |
|---|
| 644 | \begin{code} |
|---|
| 645 | primOpSize :: PrimOp -> Int -> ExprSize |
|---|
| 646 | primOpSize op n_val_args |
|---|
| 647 | = if primOpOutOfLine op |
|---|
| 648 | then sizeN (op_size + n_val_args) |
|---|
| 649 | else sizeN op_size |
|---|
| 650 | where |
|---|
| 651 | op_size = primOpCodeSize op |
|---|
| 652 | |
|---|
| 653 | |
|---|
| 654 | buildSize :: ExprSize |
|---|
| 655 | buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40)) |
|---|
| 656 | -- We really want to inline applications of build |
|---|
| 657 | -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later) |
|---|
| 658 | -- Indeed, we should add a result_discount becuause build is |
|---|
| 659 | -- very like a constructor. We don't bother to check that the |
|---|
| 660 | -- build is saturated (it usually is). The "-2" discounts for the \c n, |
|---|
| 661 | -- The "4" is rather arbitrary. |
|---|
| 662 | |
|---|
| 663 | augmentSize :: ExprSize |
|---|
| 664 | augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40)) |
|---|
| 665 | -- Ditto (augment t (\cn -> e) ys) should cost only the cost of |
|---|
| 666 | -- e plus ys. The -2 accounts for the \cn |
|---|
| 667 | |
|---|
| 668 | -- When we return a lambda, give a discount if it's used (applied) |
|---|
| 669 | lamScrutDiscount :: ExprSize -> ExprSize |
|---|
| 670 | lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount) |
|---|
| 671 | lamScrutDiscount TooBig = TooBig |
|---|
| 672 | \end{code} |
|---|
| 673 | |
|---|
| 674 | Note [addAltSize result discounts] |
|---|
| 675 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 676 | When adding the size of alternatives, we *add* the result discounts |
|---|
| 677 | too, rather than take the *maximum*. For a multi-branch case, this |
|---|
| 678 | gives a discount for each branch that returns a constructor, making us |
|---|
| 679 | keener to inline. I did try using 'max' instead, but it makes nofib |
|---|
| 680 | 'rewrite' and 'puzzle' allocate significantly more, and didn't make |
|---|
| 681 | binary sizes shrink significantly either. |
|---|
| 682 | |
|---|
| 683 | Note [Discounts and thresholds] |
|---|
| 684 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 685 | Constants for discounts and thesholds are defined in main/StaticFlags, |
|---|
| 686 | all of form opt_UF_xxxx. They are: |
|---|
| 687 | |
|---|
| 688 | opt_UF_CreationThreshold (45) |
|---|
| 689 | At a definition site, if the unfolding is bigger than this, we |
|---|
| 690 | may discard it altogether |
|---|
| 691 | |
|---|
| 692 | opt_UF_UseThreshold (6) |
|---|
| 693 | At a call site, if the unfolding, less discounts, is smaller than |
|---|
| 694 | this, then it's small enough inline |
|---|
| 695 | |
|---|
| 696 | opt_UF_KeennessFactor (1.5) |
|---|
| 697 | Factor by which the discounts are multiplied before |
|---|
| 698 | subtracting from size |
|---|
| 699 | |
|---|
| 700 | opt_UF_DictDiscount (1) |
|---|
| 701 | The discount for each occurrence of a dictionary argument |
|---|
| 702 | as an argument of a class method. Should be pretty small |
|---|
| 703 | else big functions may get inlined |
|---|
| 704 | |
|---|
| 705 | opt_UF_FunAppDiscount (6) |
|---|
| 706 | Discount for a function argument that is applied. Quite |
|---|
| 707 | large, because if we inline we avoid the higher-order call. |
|---|
| 708 | |
|---|
| 709 | opt_UF_DearOp (4) |
|---|
| 710 | The size of a foreign call or not-dupable PrimOp |
|---|
| 711 | |
|---|
| 712 | |
|---|
| 713 | Note [Function applications] |
|---|
| 714 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 715 | In a function application (f a b) |
|---|
| 716 | |
|---|
| 717 | - If 'f' is an argument to the function being analysed, |
|---|
| 718 | and there's at least one value arg, record a FunAppDiscount for f |
|---|
| 719 | |
|---|
| 720 | - If the application if a PAP (arity > 2 in this example) |
|---|
| 721 | record a *result* discount (because inlining |
|---|
| 722 | with "extra" args in the call may mean that we now |
|---|
| 723 | get a saturated application) |
|---|
| 724 | |
|---|
| 725 | Code for manipulating sizes |
|---|
| 726 | |
|---|
| 727 | \begin{code} |
|---|
| 728 | data ExprSize = TooBig |
|---|
| 729 | | SizeIs FastInt -- Size found |
|---|
| 730 | (Bag (Id,Int)) -- Arguments cased herein, and discount for each such |
|---|
| 731 | FastInt -- Size to subtract if result is scrutinised |
|---|
| 732 | -- by a case expression |
|---|
| 733 | |
|---|
| 734 | instance Outputable ExprSize where |
|---|
| 735 | ppr TooBig = ptext (sLit "TooBig") |
|---|
| 736 | ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c)) |
|---|
| 737 | |
|---|
| 738 | -- subtract the discount before deciding whether to bale out. eg. we |
|---|
| 739 | -- want to inline a large constructor application into a selector: |
|---|
| 740 | -- tup = (a_1, ..., a_99) |
|---|
| 741 | -- x = case tup of ... |
|---|
| 742 | -- |
|---|
| 743 | mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize |
|---|
| 744 | mkSizeIs max n xs d | (n -# d) ># max = TooBig |
|---|
| 745 | | otherwise = SizeIs n xs d |
|---|
| 746 | |
|---|
| 747 | maxSize :: ExprSize -> ExprSize -> ExprSize |
|---|
| 748 | maxSize TooBig _ = TooBig |
|---|
| 749 | maxSize _ TooBig = TooBig |
|---|
| 750 | maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1 |
|---|
| 751 | | otherwise = s2 |
|---|
| 752 | |
|---|
| 753 | sizeZero :: ExprSize |
|---|
| 754 | sizeN :: Int -> ExprSize |
|---|
| 755 | |
|---|
| 756 | sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0)) |
|---|
| 757 | sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0)) |
|---|
| 758 | \end{code} |
|---|
| 759 | |
|---|
| 760 | |
|---|
| 761 | %************************************************************************ |
|---|
| 762 | %* * |
|---|
| 763 | \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding} |
|---|
| 764 | %* * |
|---|
| 765 | %************************************************************************ |
|---|
| 766 | |
|---|
| 767 | We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that |
|---|
| 768 | we ``couldn't possibly use'' on the other side. Can be overridden w/ |
|---|
| 769 | flaggery. Just the same as smallEnoughToInline, except that it has no |
|---|
| 770 | actual arguments. |
|---|
| 771 | |
|---|
| 772 | \begin{code} |
|---|
| 773 | couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool |
|---|
| 774 | couldBeSmallEnoughToInline threshold rhs |
|---|
| 775 | = case sizeExpr (iUnbox threshold) [] body of |
|---|
| 776 | TooBig -> False |
|---|
| 777 | _ -> True |
|---|
| 778 | where |
|---|
| 779 | (_, body) = collectBinders rhs |
|---|
| 780 | |
|---|
| 781 | ---------------- |
|---|
| 782 | smallEnoughToInline :: Unfolding -> Bool |
|---|
| 783 | smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}}) |
|---|
| 784 | = size <= opt_UF_UseThreshold |
|---|
| 785 | smallEnoughToInline _ |
|---|
| 786 | = False |
|---|
| 787 | |
|---|
| 788 | ---------------- |
|---|
| 789 | certainlyWillInline :: Unfolding -> Bool |
|---|
| 790 | -- Sees if the unfolding is pretty certain to inline |
|---|
| 791 | certainlyWillInline (CoreUnfolding { uf_arity = n_vals, uf_guidance = guidance }) |
|---|
| 792 | = case guidance of |
|---|
| 793 | UnfNever -> False |
|---|
| 794 | UnfWhen {} -> True |
|---|
| 795 | UnfIfGoodArgs { ug_size = size} |
|---|
| 796 | -> n_vals > 0 -- See Note [certainlyWillInline: be caseful of thunks] |
|---|
| 797 | && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold |
|---|
| 798 | |
|---|
| 799 | certainlyWillInline _ |
|---|
| 800 | = False |
|---|
| 801 | \end{code} |
|---|
| 802 | |
|---|
| 803 | Note [certainlyWillInline: be caseful of thunks] |
|---|
| 804 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 805 | Don't claim that thunks will certainly inline, because that risks work |
|---|
| 806 | duplication. Even if the work duplication is not great (eg is_cheap |
|---|
| 807 | holds), it can make a big difference in an inner loop In Trac #5623 we |
|---|
| 808 | found that the WorkWrap phase thought that |
|---|
| 809 | y = case x of F# v -> F# (v +# v) |
|---|
| 810 | was certainlyWillInline, so the addition got duplicated. |
|---|
| 811 | |
|---|
| 812 | |
|---|
| 813 | %************************************************************************ |
|---|
| 814 | %* * |
|---|
| 815 | \subsection{callSiteInline} |
|---|
| 816 | %* * |
|---|
| 817 | %************************************************************************ |
|---|
| 818 | |
|---|
| 819 | This is the key function. It decides whether to inline a variable at a call site |
|---|
| 820 | |
|---|
| 821 | callSiteInline is used at call sites, so it is a bit more generous. |
|---|
| 822 | It's a very important function that embodies lots of heuristics. |
|---|
| 823 | A non-WHNF can be inlined if it doesn't occur inside a lambda, |
|---|
| 824 | and occurs exactly once or |
|---|
| 825 | occurs once in each branch of a case and is small |
|---|
| 826 | |
|---|
| 827 | If the thing is in WHNF, there's no danger of duplicating work, |
|---|
| 828 | so we can inline if it occurs once, or is small |
|---|
| 829 | |
|---|
| 830 | NOTE: we don't want to inline top-level functions that always diverge. |
|---|
| 831 | It just makes the code bigger. Tt turns out that the convenient way to prevent |
|---|
| 832 | them inlining is to give them a NOINLINE pragma, which we do in |
|---|
| 833 | StrictAnal.addStrictnessInfoToTopId |
|---|
| 834 | |
|---|
| 835 | \begin{code} |
|---|
| 836 | callSiteInline :: DynFlags |
|---|
| 837 | -> Id -- The Id |
|---|
| 838 | -> Bool -- True <=> unfolding is active |
|---|
| 839 | -> Bool -- True if there are are no arguments at all (incl type args) |
|---|
| 840 | -> [ArgSummary] -- One for each value arg; True if it is interesting |
|---|
| 841 | -> CallCtxt -- True <=> continuation is interesting |
|---|
| 842 | -> Maybe CoreExpr -- Unfolding, if any |
|---|
| 843 | |
|---|
| 844 | instance Outputable ArgSummary where |
|---|
| 845 | ppr TrivArg = ptext (sLit "TrivArg") |
|---|
| 846 | ppr NonTrivArg = ptext (sLit "NonTrivArg") |
|---|
| 847 | ppr ValueArg = ptext (sLit "ValueArg") |
|---|
| 848 | |
|---|
| 849 | data CallCtxt = BoringCtxt |
|---|
| 850 | |
|---|
| 851 | | ArgCtxt -- We are somewhere in the argument of a function |
|---|
| 852 | Bool -- True <=> we're somewhere in the RHS of function with rules |
|---|
| 853 | -- False <=> we *are* the argument of a function with non-zero |
|---|
| 854 | -- arg discount |
|---|
| 855 | -- OR |
|---|
| 856 | -- we *are* the RHS of a let Note [RHS of lets] |
|---|
| 857 | -- In both cases, be a little keener to inline |
|---|
| 858 | |
|---|
| 859 | | ValAppCtxt -- We're applied to at least one value arg |
|---|
| 860 | -- This arises when we have ((f x |> co) y) |
|---|
| 861 | -- Then the (f x) has argument 'x' but in a ValAppCtxt |
|---|
| 862 | |
|---|
| 863 | | CaseCtxt -- We're the scrutinee of a case |
|---|
| 864 | -- that decomposes its scrutinee |
|---|
| 865 | |
|---|
| 866 | instance Outputable CallCtxt where |
|---|
| 867 | ppr BoringCtxt = ptext (sLit "BoringCtxt") |
|---|
| 868 | ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules |
|---|
| 869 | ppr CaseCtxt = ptext (sLit "CaseCtxt") |
|---|
| 870 | ppr ValAppCtxt = ptext (sLit "ValAppCtxt") |
|---|
| 871 | |
|---|
| 872 | callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info |
|---|
| 873 | = case idUnfolding id of |
|---|
| 874 | -- idUnfolding checks for loop-breakers, returning NoUnfolding |
|---|
| 875 | -- Things with an INLINE pragma may have an unfolding *and* |
|---|
| 876 | -- be a loop breaker (maybe the knot is not yet untied) |
|---|
| 877 | CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top |
|---|
| 878 | , uf_is_work_free = is_wf, uf_arity = uf_arity |
|---|
| 879 | , uf_guidance = guidance, uf_expandable = is_exp } |
|---|
| 880 | | active_unfolding -> tryUnfolding dflags id lone_variable |
|---|
| 881 | arg_infos cont_info unf_template is_top |
|---|
| 882 | is_wf is_exp uf_arity guidance |
|---|
| 883 | | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags |
|---|
| 884 | -> pprTrace "Inactive unfolding:" (ppr id) Nothing |
|---|
| 885 | | otherwise -> Nothing |
|---|
| 886 | NoUnfolding -> Nothing |
|---|
| 887 | OtherCon {} -> Nothing |
|---|
| 888 | DFunUnfolding {} -> Nothing -- Never unfold a DFun |
|---|
| 889 | |
|---|
| 890 | tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt |
|---|
| 891 | -> CoreExpr -> Bool -> Bool -> Bool -> Arity -> UnfoldingGuidance |
|---|
| 892 | -> Maybe CoreExpr |
|---|
| 893 | tryUnfolding dflags id lone_variable |
|---|
| 894 | arg_infos cont_info unf_template is_top |
|---|
| 895 | is_wf is_exp uf_arity guidance |
|---|
| 896 | -- uf_arity will typically be equal to (idArity id), |
|---|
| 897 | -- but may be less for InlineRules |
|---|
| 898 | | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags |
|---|
| 899 | = pprTrace ("Considering inlining: " ++ showSDocDump (ppr id)) |
|---|
| 900 | (vcat [text "arg infos" <+> ppr arg_infos, |
|---|
| 901 | text "uf arity" <+> ppr uf_arity, |
|---|
| 902 | text "interesting continuation" <+> ppr cont_info, |
|---|
| 903 | text "some_benefit" <+> ppr some_benefit, |
|---|
| 904 | text "is exp:" <+> ppr is_exp, |
|---|
| 905 | text "is work-free:" <+> ppr is_wf, |
|---|
| 906 | text "guidance" <+> ppr guidance, |
|---|
| 907 | extra_doc, |
|---|
| 908 | text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) |
|---|
| 909 | result |
|---|
| 910 | | otherwise = result |
|---|
| 911 | |
|---|
| 912 | where |
|---|
| 913 | n_val_args = length arg_infos |
|---|
| 914 | saturated = n_val_args >= uf_arity |
|---|
| 915 | |
|---|
| 916 | result | yes_or_no = Just unf_template |
|---|
| 917 | | otherwise = Nothing |
|---|
| 918 | |
|---|
| 919 | interesting_args = any nonTriv arg_infos |
|---|
| 920 | -- NB: (any nonTriv arg_infos) looks at the |
|---|
| 921 | -- over-saturated args too which is "wrong"; |
|---|
| 922 | -- but if over-saturated we inline anyway. |
|---|
| 923 | |
|---|
| 924 | -- some_benefit is used when the RHS is small enough |
|---|
| 925 | -- and the call has enough (or too many) value |
|---|
| 926 | -- arguments (ie n_val_args >= arity). But there must |
|---|
| 927 | -- be *something* interesting about some argument, or the |
|---|
| 928 | -- result context, to make it worth inlining |
|---|
| 929 | some_benefit |
|---|
| 930 | | not saturated = interesting_args -- Under-saturated |
|---|
| 931 | -- Note [Unsaturated applications] |
|---|
| 932 | | n_val_args > uf_arity = True -- Over-saturated |
|---|
| 933 | | otherwise = interesting_args -- Saturated |
|---|
| 934 | || interesting_saturated_call |
|---|
| 935 | |
|---|
| 936 | interesting_saturated_call |
|---|
| 937 | = case cont_info of |
|---|
| 938 | BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions] |
|---|
| 939 | CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables] |
|---|
| 940 | ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt] |
|---|
| 941 | ValAppCtxt -> True -- Note [Cast then apply] |
|---|
| 942 | |
|---|
| 943 | (yes_or_no, extra_doc) |
|---|
| 944 | = case guidance of |
|---|
| 945 | UnfNever -> (False, empty) |
|---|
| 946 | |
|---|
| 947 | UnfWhen unsat_ok boring_ok |
|---|
| 948 | -> (enough_args && (boring_ok || some_benefit), empty ) |
|---|
| 949 | where -- See Note [INLINE for small functions (3)] |
|---|
| 950 | enough_args = saturated || (unsat_ok && n_val_args > 0) |
|---|
| 951 | |
|---|
| 952 | UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } |
|---|
| 953 | -> ( is_wf && some_benefit && small_enough |
|---|
| 954 | , (text "discounted size =" <+> int discounted_size) ) |
|---|
| 955 | where |
|---|
| 956 | discounted_size = size - discount |
|---|
| 957 | small_enough = discounted_size <= opt_UF_UseThreshold |
|---|
| 958 | discount = computeDiscount uf_arity arg_discounts |
|---|
| 959 | res_discount arg_infos cont_info |
|---|
| 960 | \end{code} |
|---|
| 961 | |
|---|
| 962 | Note [RHS of lets] |
|---|
| 963 | ~~~~~~~~~~~~~~~~~~ |
|---|
| 964 | Be a tiny bit keener to inline in the RHS of a let, because that might |
|---|
| 965 | lead to good thing later |
|---|
| 966 | f y = (y,y,y) |
|---|
| 967 | g y = let x = f y in ...(case x of (a,b,c) -> ...) ... |
|---|
| 968 | We'd inline 'f' if the call was in a case context, and it kind-of-is, |
|---|
| 969 | only we can't see it. So we treat the RHS of a let as not-totally-boring. |
|---|
| 970 | |
|---|
| 971 | Note [Unsaturated applications] |
|---|
| 972 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 973 | When a call is not saturated, we *still* inline if one of the |
|---|
| 974 | arguments has interesting structure. That's sometimes very important. |
|---|
| 975 | A good example is the Ord instance for Bool in Base: |
|---|
| 976 | |
|---|
| 977 | Rec { |
|---|
| 978 | $fOrdBool =GHC.Classes.D:Ord |
|---|
| 979 | @ Bool |
|---|
| 980 | ... |
|---|
| 981 | $cmin_ajX |
|---|
| 982 | |
|---|
| 983 | $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool |
|---|
| 984 | $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool |
|---|
| 985 | } |
|---|
| 986 | |
|---|
| 987 | But the defn of GHC.Classes.$dmmin is: |
|---|
| 988 | |
|---|
| 989 | $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a |
|---|
| 990 | {- Arity: 3, HasNoCafRefs, Strictness: SLL, |
|---|
| 991 | Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a -> |
|---|
| 992 | case @ a GHC.Classes.<= @ a $dOrd x y of wild { |
|---|
| 993 | GHC.Types.False -> y GHC.Types.True -> x }) -} |
|---|
| 994 | |
|---|
| 995 | We *really* want to inline $dmmin, even though it has arity 3, in |
|---|
| 996 | order to unravel the recursion. |
|---|
| 997 | |
|---|
| 998 | |
|---|
| 999 | Note [Things to watch] |
|---|
| 1000 | ~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1001 | * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } |
|---|
| 1002 | Assume x is exported, so not inlined unconditionally. |
|---|
| 1003 | Then we want x to inline unconditionally; no reason for it |
|---|
| 1004 | not to, and doing so avoids an indirection. |
|---|
| 1005 | |
|---|
| 1006 | * { x = I# 3; ....f x.... } |
|---|
| 1007 | Make sure that x does not inline unconditionally! |
|---|
| 1008 | Lest we get extra allocation. |
|---|
| 1009 | |
|---|
| 1010 | Note [Inlining an InlineRule] |
|---|
| 1011 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1012 | An InlineRules is used for |
|---|
| 1013 | (a) programmer INLINE pragmas |
|---|
| 1014 | (b) inlinings from worker/wrapper |
|---|
| 1015 | |
|---|
| 1016 | For (a) the RHS may be large, and our contract is that we *only* inline |
|---|
| 1017 | when the function is applied to all the arguments on the LHS of the |
|---|
| 1018 | source-code defn. (The uf_arity in the rule.) |
|---|
| 1019 | |
|---|
| 1020 | However for worker/wrapper it may be worth inlining even if the |
|---|
| 1021 | arity is not satisfied (as we do in the CoreUnfolding case) so we don't |
|---|
| 1022 | require saturation. |
|---|
| 1023 | |
|---|
| 1024 | |
|---|
| 1025 | Note [Nested functions] |
|---|
| 1026 | ~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1027 | If a function has a nested defn we also record some-benefit, on the |
|---|
| 1028 | grounds that we are often able to eliminate the binding, and hence the |
|---|
| 1029 | allocation, for the function altogether; this is good for join points. |
|---|
| 1030 | But this only makes sense for *functions*; inlining a constructor |
|---|
| 1031 | doesn't help allocation unless the result is scrutinised. UNLESS the |
|---|
| 1032 | constructor occurs just once, albeit possibly in multiple case |
|---|
| 1033 | branches. Then inlining it doesn't increase allocation, but it does |
|---|
| 1034 | increase the chance that the constructor won't be allocated at all in |
|---|
| 1035 | the branches that don't use it. |
|---|
| 1036 | |
|---|
| 1037 | Note [Cast then apply] |
|---|
| 1038 | ~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1039 | Consider |
|---|
| 1040 | myIndex = __inline_me ( (/\a. <blah>) |> co ) |
|---|
| 1041 | co :: (forall a. a -> a) ~ (forall a. T a) |
|---|
| 1042 | ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... |
|---|
| 1043 | |
|---|
| 1044 | We need to inline myIndex to unravel this; but the actual call (myIndex a) has |
|---|
| 1045 | no value arguments. The ValAppCtxt gives it enough incentive to inline. |
|---|
| 1046 | |
|---|
| 1047 | Note [Inlining in ArgCtxt] |
|---|
| 1048 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1049 | The condition (arity > 0) here is very important, because otherwise |
|---|
| 1050 | we end up inlining top-level stuff into useless places; eg |
|---|
| 1051 | x = I# 3# |
|---|
| 1052 | f = \y. g x |
|---|
| 1053 | This can make a very big difference: it adds 16% to nofib 'integer' allocs, |
|---|
| 1054 | and 20% to 'power'. |
|---|
| 1055 | |
|---|
| 1056 | At one stage I replaced this condition by 'True' (leading to the above |
|---|
| 1057 | slow-down). The motivation was test eyeball/inline1.hs; but that seems |
|---|
| 1058 | to work ok now. |
|---|
| 1059 | |
|---|
| 1060 | NOTE: arguably, we should inline in ArgCtxt only if the result of the |
|---|
| 1061 | call is at least CONLIKE. At least for the cases where we use ArgCtxt |
|---|
| 1062 | for the RHS of a 'let', we only profit from the inlining if we get a |
|---|
| 1063 | CONLIKE thing (modulo lets). |
|---|
| 1064 | |
|---|
| 1065 | Note [Lone variables] See also Note [Interaction of exprIsWorkFree and lone variables] |
|---|
| 1066 | ~~~~~~~~~~~~~~~~~~~~~ which appears below |
|---|
| 1067 | The "lone-variable" case is important. I spent ages messing about |
|---|
| 1068 | with unsatisfactory varaints, but this is nice. The idea is that if a |
|---|
| 1069 | variable appears all alone |
|---|
| 1070 | |
|---|
| 1071 | as an arg of lazy fn, or rhs BoringCtxt |
|---|
| 1072 | as scrutinee of a case CaseCtxt |
|---|
| 1073 | as arg of a fn ArgCtxt |
|---|
| 1074 | AND |
|---|
| 1075 | it is bound to a cheap expression |
|---|
| 1076 | |
|---|
| 1077 | then we should not inline it (unless there is some other reason, |
|---|
| 1078 | e.g. is is the sole occurrence). That is what is happening at |
|---|
| 1079 | the use of 'lone_variable' in 'interesting_saturated_call'. |
|---|
| 1080 | |
|---|
| 1081 | Why? At least in the case-scrutinee situation, turning |
|---|
| 1082 | let x = (a,b) in case x of y -> ... |
|---|
| 1083 | into |
|---|
| 1084 | let x = (a,b) in case (a,b) of y -> ... |
|---|
| 1085 | and thence to |
|---|
| 1086 | let x = (a,b) in let y = (a,b) in ... |
|---|
| 1087 | is bad if the binding for x will remain. |
|---|
| 1088 | |
|---|
| 1089 | Another example: I discovered that strings |
|---|
| 1090 | were getting inlined straight back into applications of 'error' |
|---|
| 1091 | because the latter is strict. |
|---|
| 1092 | s = "foo" |
|---|
| 1093 | f = \x -> ...(error s)... |
|---|
| 1094 | |
|---|
| 1095 | Fundamentally such contexts should not encourage inlining because the |
|---|
| 1096 | context can ``see'' the unfolding of the variable (e.g. case or a |
|---|
| 1097 | RULE) so there's no gain. If the thing is bound to a value. |
|---|
| 1098 | |
|---|
| 1099 | However, watch out: |
|---|
| 1100 | |
|---|
| 1101 | * Consider this: |
|---|
| 1102 | foo = _inline_ (\n. [n]) |
|---|
| 1103 | bar = _inline_ (foo 20) |
|---|
| 1104 | baz = \n. case bar of { (m:_) -> m + n } |
|---|
| 1105 | Here we really want to inline 'bar' so that we can inline 'foo' |
|---|
| 1106 | and the whole thing unravels as it should obviously do. This is |
|---|
| 1107 | important: in the NDP project, 'bar' generates a closure data |
|---|
| 1108 | structure rather than a list. |
|---|
| 1109 | |
|---|
| 1110 | So the non-inlining of lone_variables should only apply if the |
|---|
| 1111 | unfolding is regarded as cheap; because that is when exprIsConApp_maybe |
|---|
| 1112 | looks through the unfolding. Hence the "&& is_wf" in the |
|---|
| 1113 | InlineRule branch. |
|---|
| 1114 | |
|---|
| 1115 | * Even a type application or coercion isn't a lone variable. |
|---|
| 1116 | Consider |
|---|
| 1117 | case $fMonadST @ RealWorld of { :DMonad a b c -> c } |
|---|
| 1118 | We had better inline that sucker! The case won't see through it. |
|---|
| 1119 | |
|---|
| 1120 | For now, I'm treating treating a variable applied to types |
|---|
| 1121 | in a *lazy* context "lone". The motivating example was |
|---|
| 1122 | f = /\a. \x. BIG |
|---|
| 1123 | g = /\a. \y. h (f a) |
|---|
| 1124 | There's no advantage in inlining f here, and perhaps |
|---|
| 1125 | a significant disadvantage. Hence some_val_args in the Stop case |
|---|
| 1126 | |
|---|
| 1127 | Note [Interaction of exprIsWorkFree and lone variables] |
|---|
| 1128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1129 | The lone-variable test says "don't inline if a case expression |
|---|
| 1130 | scrutines a lone variable whose unfolding is cheap". It's very |
|---|
| 1131 | important that, under these circumstances, exprIsConApp_maybe |
|---|
| 1132 | can spot a constructor application. So, for example, we don't |
|---|
| 1133 | consider |
|---|
| 1134 | let x = e in (x,x) |
|---|
| 1135 | to be cheap, and that's good because exprIsConApp_maybe doesn't |
|---|
| 1136 | think that expression is a constructor application. |
|---|
| 1137 | |
|---|
| 1138 | In the 'not (lone_variable && is_wf)' test, I used to test is_value |
|---|
| 1139 | rather than is_wf, which was utterly wrong, because the above |
|---|
| 1140 | expression responds True to exprIsHNF, which is what sets is_value. |
|---|
| 1141 | |
|---|
| 1142 | This kind of thing can occur if you have |
|---|
| 1143 | |
|---|
| 1144 | {-# INLINE foo #-} |
|---|
| 1145 | foo = let x = e in (x,x) |
|---|
| 1146 | |
|---|
| 1147 | which Roman did. |
|---|
| 1148 | |
|---|
| 1149 | \begin{code} |
|---|
| 1150 | computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int |
|---|
| 1151 | computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info |
|---|
| 1152 | -- We multiple the raw discounts (args_discount and result_discount) |
|---|
| 1153 | -- ty opt_UnfoldingKeenessFactor because the former have to do with |
|---|
| 1154 | -- *size* whereas the discounts imply that there's some extra |
|---|
| 1155 | -- *efficiency* to be gained (e.g. beta reductions, case reductions) |
|---|
| 1156 | -- by inlining. |
|---|
| 1157 | |
|---|
| 1158 | = 10 -- Discount of 1 because the result replaces the call |
|---|
| 1159 | -- so we count 1 for the function itself |
|---|
| 1160 | |
|---|
| 1161 | + 10 * length (take n_vals_wanted arg_infos) |
|---|
| 1162 | -- Discount of (un-scaled) 1 for each arg supplied, |
|---|
| 1163 | -- because the result replaces the call |
|---|
| 1164 | |
|---|
| 1165 | + round (opt_UF_KeenessFactor * |
|---|
| 1166 | fromIntegral (arg_discount + res_discount')) |
|---|
| 1167 | where |
|---|
| 1168 | arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos) |
|---|
| 1169 | |
|---|
| 1170 | mk_arg_discount _ TrivArg = 0 |
|---|
| 1171 | mk_arg_discount _ NonTrivArg = 10 |
|---|
| 1172 | mk_arg_discount discount ValueArg = discount |
|---|
| 1173 | |
|---|
| 1174 | res_discount' = case cont_info of |
|---|
| 1175 | BoringCtxt -> 0 |
|---|
| 1176 | CaseCtxt -> res_discount |
|---|
| 1177 | _other -> 40 `min` res_discount |
|---|
| 1178 | -- res_discount can be very large when a function returns |
|---|
| 1179 | -- constructors; but we only want to invoke that large discount |
|---|
| 1180 | -- when there's a case continuation. |
|---|
| 1181 | -- Otherwise we, rather arbitrarily, threshold it. Yuk. |
|---|
| 1182 | -- But we want to aovid inlining large functions that return |
|---|
| 1183 | -- constructors into contexts that are simply "interesting" |
|---|
| 1184 | \end{code} |
|---|
| 1185 | |
|---|
| 1186 | %************************************************************************ |
|---|
| 1187 | %* * |
|---|
| 1188 | Interesting arguments |
|---|
| 1189 | %* * |
|---|
| 1190 | %************************************************************************ |
|---|
| 1191 | |
|---|
| 1192 | Note [Interesting arguments] |
|---|
| 1193 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1194 | An argument is interesting if it deserves a discount for unfoldings |
|---|
| 1195 | with a discount in that argument position. The idea is to avoid |
|---|
| 1196 | unfolding a function that is applied only to variables that have no |
|---|
| 1197 | unfolding (i.e. they are probably lambda bound): f x y z There is |
|---|
| 1198 | little point in inlining f here. |
|---|
| 1199 | |
|---|
| 1200 | Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But |
|---|
| 1201 | we must look through lets, eg (let x = e in C a b), because the let will |
|---|
| 1202 | float, exposing the value, if we inline. That makes it different to |
|---|
| 1203 | exprIsHNF. |
|---|
| 1204 | |
|---|
| 1205 | Before 2009 we said it was interesting if the argument had *any* structure |
|---|
| 1206 | at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016. |
|---|
| 1207 | |
|---|
| 1208 | But we don't regard (f x y) as interesting, unless f is unsaturated. |
|---|
| 1209 | If it's saturated and f hasn't inlined, then it's probably not going |
|---|
| 1210 | to now! |
|---|
| 1211 | |
|---|
| 1212 | Note [Conlike is interesting] |
|---|
| 1213 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 1214 | Consider |
|---|
| 1215 | f d = ...((*) d x y)... |
|---|
| 1216 | ... f (df d')... |
|---|
| 1217 | where df is con-like. Then we'd really like to inline 'f' so that the |
|---|
| 1218 | rule for (*) (df d) can fire. To do this |
|---|
| 1219 | a) we give a discount for being an argument of a class-op (eg (*) d) |
|---|
| 1220 | b) we say that a con-like argument (eg (df d)) is interesting |
|---|
| 1221 | |
|---|
| 1222 | \begin{code} |
|---|
| 1223 | data ArgSummary = TrivArg -- Nothing interesting |
|---|
| 1224 | | NonTrivArg -- Arg has structure |
|---|
| 1225 | | ValueArg -- Arg is a con-app or PAP |
|---|
| 1226 | -- ..or con-like. Note [Conlike is interesting] |
|---|
| 1227 | |
|---|
| 1228 | interestingArg :: CoreExpr -> ArgSummary |
|---|
| 1229 | -- See Note [Interesting arguments] |
|---|
| 1230 | interestingArg e = go e 0 |
|---|
| 1231 | where |
|---|
| 1232 | -- n is # value args to which the expression is applied |
|---|
| 1233 | go (Lit {}) _ = ValueArg |
|---|
| 1234 | go (Var v) n |
|---|
| 1235 | | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that |
|---|
| 1236 | -- data constructors here |
|---|
| 1237 | | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding |
|---|
| 1238 | | n > 0 = NonTrivArg -- Saturated or unknown call |
|---|
| 1239 | | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding |
|---|
| 1240 | -- See Note [Conlike is interesting] |
|---|
| 1241 | | otherwise = TrivArg -- n==0, no useful unfolding |
|---|
| 1242 | where |
|---|
| 1243 | conlike_unfolding = isConLikeUnfolding (idUnfolding v) |
|---|
| 1244 | |
|---|
| 1245 | go (Type _) _ = TrivArg |
|---|
| 1246 | go (Coercion _) _ = TrivArg |
|---|
| 1247 | go (App fn (Type _)) n = go fn n |
|---|
| 1248 | go (App fn (Coercion _)) n = go fn n |
|---|
| 1249 | go (App fn _) n = go fn (n+1) |
|---|
| 1250 | go (Tick _ a) n = go a n |
|---|
| 1251 | go (Cast e _) n = go e n |
|---|
| 1252 | go (Lam v e) n |
|---|
| 1253 | | isTyVar v = go e n |
|---|
| 1254 | | n>0 = go e (n-1) |
|---|
| 1255 | | otherwise = ValueArg |
|---|
| 1256 | go (Let _ e) n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg } |
|---|
| 1257 | go (Case {}) _ = NonTrivArg |
|---|
| 1258 | |
|---|
| 1259 | nonTriv :: ArgSummary -> Bool |
|---|
| 1260 | nonTriv TrivArg = False |
|---|
| 1261 | nonTriv _ = True |
|---|
| 1262 | \end{code} |
|---|