root/compiler/cmm/CmmOpt.hs

Revision 2304a36272531fd20f163b6f378e417dc351aa25, 28.0 KB (checked in by Ian Lynagh <igloo@…>, 3 months ago)

Fix the unregisterised build; fixes #5901

  • Property mode set to 100644
Line 
1-----------------------------------------------------------------------------
2--
3-- Cmm optimisation
4--
5-- (c) The University of Glasgow 2006
6--
7-----------------------------------------------------------------------------
8
9module CmmOpt (
10        cmmEliminateDeadBlocks,
11        cmmMiniInline,
12        cmmMachOpFold,
13        cmmMachOpFoldM,
14        cmmLoopifyForC,
15 ) where
16
17#include "HsVersions.h"
18
19import OldCmm
20import OldPprCmm
21import CmmNode (wrapRecExp)
22import CmmUtils
23import StaticFlags
24
25import UniqFM
26import Unique
27import Util
28import FastTypes
29import Outputable
30import Platform
31import BlockId
32
33import Data.Bits
34import Data.Maybe
35import Data.List
36
37-- -----------------------------------------------------------------------------
38-- Eliminates dead blocks
39
40{-
41We repeatedly expand the set of reachable blocks until we hit a
42fixpoint, and then prune any blocks that were not in this set.  This is
43actually a required optimization, as dead blocks can cause problems
44for invariants in the linear register allocator (and possibly other
45places.)
46-}
47
48-- Deep fold over statements could probably be abstracted out, but it
49-- might not be worth the effort since OldCmm is moribund
50cmmEliminateDeadBlocks :: [CmmBasicBlock] -> [CmmBasicBlock]
51cmmEliminateDeadBlocks [] = []
52cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) =
53    let -- Calculate what's reachable from what block
54        reachableMap = foldl' f emptyUFM blocks -- lazy in values
55            where f m (BasicBlock block_id stmts) = addToUFM m block_id (reachableFrom stmts)
56        reachableFrom stmts = foldl stmt [] stmts
57            where
58                stmt m CmmNop = m
59                stmt m (CmmComment _) = m
60                stmt m (CmmAssign _ e) = expr m e
61                stmt m (CmmStore e1 e2) = expr (expr m e1) e2
62                stmt m (CmmCall c _ as _) = f (actuals m as) c
63                    where f m (CmmCallee e _) = expr m e
64                          f m (CmmPrim _ Nothing) = m
65                          f m (CmmPrim _ (Just stmts)) = foldl' stmt m stmts
66                stmt m (CmmBranch b) = b:m
67                stmt m (CmmCondBranch e b) = b:(expr m e)
68                stmt m (CmmSwitch e bs) = catMaybes bs ++ expr m e
69                stmt m (CmmJump e _) = expr m e
70                stmt m (CmmReturn) = m
71                actuals m as = foldl' (\m h -> expr m (hintlessCmm h)) m as
72                -- We have to do a deep fold into CmmExpr because
73                -- there may be a BlockId in the CmmBlock literal.
74                expr m (CmmLit l) = lit m l
75                expr m (CmmLoad e _) = expr m e
76                expr m (CmmReg _) = m
77                expr m (CmmMachOp _ es) = foldl' expr m es
78                expr m (CmmStackSlot _ _) = m
79                expr m (CmmRegOff _ _) = m
80                lit m (CmmBlock b) = b:m
81                lit m _ = m
82        -- go todo done
83        reachable = go [base_id] (setEmpty :: BlockSet)
84          where go []     m = m
85                go (x:xs) m
86                    | setMember x m = go xs m
87                    | otherwise     = go (add ++ xs) (setInsert x m)
88                        where add = fromMaybe (panic "cmmEliminateDeadBlocks: unknown block")
89                                              (lookupUFM reachableMap x)
90    in filter (\(BasicBlock block_id _) -> setMember block_id reachable) blocks
91
92-- -----------------------------------------------------------------------------
93-- The mini-inliner
94
95{-
96This pass inlines assignments to temporaries.  Temporaries that are
97only used once are unconditionally inlined.  Temporaries that are used
98two or more times are only inlined if they are assigned a literal.  It
99works as follows:
100
101  - count uses of each temporary
102  - for each temporary:
103        - attempt to push it forward to the statement that uses it
104        - only push forward past assignments to other temporaries
105          (assumes that temporaries are single-assignment)
106        - if we reach the statement that uses it, inline the rhs
107          and delete the original assignment.
108
109[N.B. In the Quick C-- compiler, this optimization is achieved by a
110 combination of two dataflow passes: forward substitution (peephole
111 optimization) and dead-assignment elimination.  ---NR]
112
113Possible generalisations: here is an example from factorial
114
115Fac_zdwfac_entry:
116    cmG:
117        _smi = R2;
118        if (_smi != 0) goto cmK;
119        R1 = R3;
120        jump I64[Sp];
121    cmK:
122        _smn = _smi * R3;
123        R2 = _smi + (-1);
124        R3 = _smn;
125        jump Fac_zdwfac_info;
126
127We want to inline _smi and _smn.  To inline _smn:
128
129   - we must be able to push forward past assignments to global regs.
130     We can do this if the rhs of the assignment we are pushing
131     forward doesn't refer to the global reg being assigned to; easy
132     to test.
133
134To inline _smi:
135
136   - It is a trivial replacement, reg for reg, but it occurs more than
137     once.
138   - We can inline trivial assignments even if the temporary occurs
139     more than once, as long as we don't eliminate the original assignment
140     (this doesn't help much on its own).
141   - We need to be able to propagate the assignment forward through jumps;
142     if we did this, we would find that it can be inlined safely in all
143     its occurrences.
144-}
145
146countUses :: UserOfLocalRegs a => a -> UniqFM Int
147countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a
148  where count m r = lookupWithDefaultUFM m (0::Int) r
149
150cmmMiniInline :: Platform -> [CmmBasicBlock] -> [CmmBasicBlock]
151cmmMiniInline platform blocks = map do_inline blocks
152  where do_inline (BasicBlock id stmts)
153          = BasicBlock id (cmmMiniInlineStmts platform (countUses blocks) stmts)
154
155cmmMiniInlineStmts :: Platform -> UniqFM Int -> [CmmStmt] -> [CmmStmt]
156cmmMiniInlineStmts _        _    [] = []
157cmmMiniInlineStmts platform uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
158        -- not used: just discard this assignment
159  | Nothing <- lookupUFM uses u
160  = cmmMiniInlineStmts platform uses stmts
161
162        -- used (literal): try to inline at all the use sites
163  | Just n <- lookupUFM uses u, isLit expr
164  =
165     ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt platform stmt)) $
166     case lookForInlineLit u expr stmts of
167         (m, stmts')
168             | n == m -> cmmMiniInlineStmts platform (delFromUFM uses u) stmts'
169             | otherwise ->
170                 stmt : cmmMiniInlineStmts platform (adjustUFM (\x -> x - m) uses u) stmts'
171
172        -- used (foldable to literal): try to inline at all the use sites
173  | Just n <- lookupUFM uses u,
174    e@(CmmLit _) <- wrapRecExp foldExp expr
175  =
176     ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt platform stmt)) $
177     case lookForInlineLit u e stmts of
178         (m, stmts')
179             | n == m -> cmmMiniInlineStmts platform (delFromUFM uses u) stmts'
180             | otherwise ->
181                 stmt : cmmMiniInlineStmts platform (adjustUFM (\x -> x - m) uses u) stmts'
182
183        -- used once (non-literal): try to inline at the use site
184  | Just 1 <- lookupUFM uses u,
185    Just stmts' <- lookForInline u expr stmts
186  = 
187     ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt platform stmt)) $
188     cmmMiniInlineStmts platform uses stmts'
189 where
190  foldExp (CmmMachOp op args) = cmmMachOpFold platform op args
191  foldExp e = e
192
193  ncgDebugTrace str x = if ncgDebugIsOn then trace str x else x
194
195cmmMiniInlineStmts platform uses (stmt:stmts)
196  = stmt : cmmMiniInlineStmts platform uses stmts
197
198-- | Takes a register, a 'CmmLit' expression assigned to that
199-- register, and a list of statements.  Inlines the expression at all
200-- use sites of the register.  Returns the number of substituations
201-- made and the, possibly modified, list of statements.
202lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt])
203lookForInlineLit _ _ [] = (0, [])
204lookForInlineLit u expr stmts@(stmt : rest)
205  | Just n <- lookupUFM (countUses stmt) u
206  = case lookForInlineLit u expr rest of
207      (m, stmts) -> let z = n + m
208                    in z `seq` (z, inlineStmt u expr stmt : stmts)
209
210  | ok_to_skip
211  = case lookForInlineLit u expr rest of
212      (n, stmts) -> (n, stmt : stmts)
213
214  | otherwise
215  = (0, stmts)
216  where
217    -- We skip over assignments to registers, unless the register
218    -- being assigned to is the one we're inlining.
219    ok_to_skip = case stmt of
220        CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False
221        _other -> True
222
223lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt]
224lookForInline u expr stmts = lookForInline' u expr regset stmts
225    where regset = foldRegsUsed extendRegSet emptyRegSet expr
226
227lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt]
228lookForInline' _ _    _      [] = panic "lookForInline' []"
229lookForInline' u expr regset (stmt : rest)
230  | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline
231  = Just (inlineStmt u expr stmt : rest)
232
233  | ok_to_skip
234  = case lookForInline' u expr regset rest of
235           Nothing    -> Nothing
236           Just stmts -> Just (stmt:stmts)
237
238  | otherwise
239  = Nothing
240
241  where
242        -- we don't inline into CmmCall if the expression refers to global
243        -- registers.  This is a HACK to avoid global registers clashing with
244        -- C argument-passing registers, really the back-end ought to be able
245        -- to handle it properly, but currently neither PprC nor the NCG can
246        -- do it.  See also CgForeignCall:load_args_into_temps.
247    ok_to_inline = case stmt of
248                     CmmCall{} -> hasNoGlobalRegs expr
249                     _ -> True
250
251   -- Expressions aren't side-effecting.  Temporaries may or may not
252   -- be single-assignment depending on the source (the old code
253   -- generator creates single-assignment code, but hand-written Cmm
254   -- and Cmm from the new code generator is not single-assignment.)
255   -- So we do an extra check to make sure that the register being
256   -- changed is not one we were relying on.  I don't know how much of a
257   -- performance hit this is (we have to create a regset for every
258   -- instruction.) -- EZY
259    ok_to_skip = case stmt of
260                 CmmNop -> True
261                 CmmComment{} -> True
262                 CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True
263                 CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr)
264                 _other -> False
265
266
267inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
268inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
269inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
270inlineStmt u a (CmmCall target regs es ret)
271   = CmmCall (infn target) regs es' ret
272   where infn (CmmCallee fn cconv) = CmmCallee (inlineExpr u a fn) cconv
273         infn (CmmPrim p mStmts) = CmmPrim p (fmap (map (inlineStmt u a)) mStmts)
274         es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ]
275inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
276inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
277inlineStmt u a (CmmJump e live) = CmmJump (inlineExpr u a e) live
278inlineStmt _ _ other_stmt = other_stmt
279
280inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
281inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
282  | u == u' = a
283  | otherwise = e
284inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
285  | u == u' = CmmMachOp (MO_Add width) [a, CmmLit (CmmInt (fromIntegral off) width)]
286  | otherwise = e
287  where
288    width = typeWidth rep
289inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
290inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
291inlineExpr _ _ other_expr = other_expr
292
293-- -----------------------------------------------------------------------------
294-- MachOp constant folder
295
296-- Now, try to constant-fold the MachOps.  The arguments have already
297-- been optimized and folded.
298
299cmmMachOpFold
300    :: Platform
301    -> MachOp       -- The operation from an CmmMachOp
302    -> [CmmExpr]    -- The optimized arguments
303    -> CmmExpr
304
305cmmMachOpFold platform op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM platform op args)
306
307-- Returns Nothing if no changes, useful for Hoopl, also reduces
308-- allocation!
309cmmMachOpFoldM
310    :: Platform
311    -> MachOp
312    -> [CmmExpr]
313    -> Maybe CmmExpr
314
315cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
316  = Just $ case op of
317      MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
318      MO_Not _   -> CmmLit (CmmInt (complement x) rep)
319
320        -- these are interesting: we must first narrow to the
321        -- "from" type, in order to truncate to the correct size.
322        -- The final narrow/widen to the destination type
323        -- is implicit in the CmmLit.
324      MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
325      MO_SS_Conv  from to -> CmmLit (CmmInt (narrowS from x) to)
326      MO_UU_Conv  from to -> CmmLit (CmmInt (narrowU from x) to)
327
328      _ -> panic "cmmMachOpFoldM: unknown unary op"
329
330
331-- Eliminate conversion NOPs
332cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
333cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
334
335-- Eliminate nested conversions where possible
336cmmMachOpFoldM platform conv_outer [CmmMachOp conv_inner [x]]
337  | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
338    Just (_,   rep3,signed2) <- isIntConversion conv_outer
339  = case () of
340        -- widen then narrow to the same size is a nop
341      _ | rep1 < rep2 && rep1 == rep3 -> Just x
342        -- Widen then narrow to different size: collapse to single conversion
343        -- but remember to use the signedness from the widening, just in case
344        -- the final conversion is a widen.
345        | rep1 < rep2 && rep2 > rep3 ->
346            Just $ cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
347        -- Nested widenings: collapse if the signedness is the same
348        | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
349            Just $ cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
350        -- Nested narrowings: collapse
351        | rep1 > rep2 && rep2 > rep3 ->
352            Just $ cmmMachOpFold platform (MO_UU_Conv rep1 rep3) [x]
353        | otherwise ->
354            Nothing
355  where
356        isIntConversion (MO_UU_Conv rep1 rep2) 
357          = Just (rep1,rep2,False)
358        isIntConversion (MO_SS_Conv rep1 rep2)
359          = Just (rep1,rep2,True)
360        isIntConversion _ = Nothing
361
362        intconv True  = MO_SS_Conv
363        intconv False = MO_UU_Conv
364
365-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
366-- but what if the architecture only supports word-sized loads, should
367-- we do the transformation anyway?
368
369cmmMachOpFoldM _ mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
370  = case mop of
371        -- for comparisons: don't forget to narrow the arguments before
372        -- comparing, since they might be out of range.
373        MO_Eq _   -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth)
374        MO_Ne _   -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth)
375
376        MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u >  y_u then 1 else 0) wordWidth)
377        MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth)
378        MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u <  y_u then 1 else 0) wordWidth)
379        MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth)
380
381        MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s >  y_s then 1 else 0) wordWidth)
382        MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth)
383        MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s <  y_s then 1 else 0) wordWidth)
384        MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth)
385
386        MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
387        MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
388        MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
389        MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
390        MO_U_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem`  y_u) r)
391        MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
392        MO_S_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
393
394        MO_And   r -> Just $ CmmLit (CmmInt (x .&. y) r)
395        MO_Or    r -> Just $ CmmLit (CmmInt (x .|. y) r)
396        MO_Xor   r -> Just $ CmmLit (CmmInt (x `xor` y) r)
397
398        MO_Shl   r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
399        MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
400        MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
401
402        _          -> Nothing
403
404   where
405        x_u = narrowU xrep x
406        y_u = narrowU xrep y
407        x_s = narrowS xrep x
408        y_s = narrowS xrep y
409
410
411-- When possible, shift the constants to the right-hand side, so that we
412-- can match for strength reductions.  Note that the code generator will
413-- also assume that constants have been shifted to the right when
414-- possible.
415
416cmmMachOpFoldM platform op [x@(CmmLit _), y]
417   | not (isLit y) && isCommutableMachOp op
418   = Just (cmmMachOpFold platform op [y, x])
419
420-- Turn (a+b)+c into a+(b+c) where possible.  Because literals are
421-- moved to the right, it is more likely that we will find
422-- opportunities for constant folding when the expression is
423-- right-associated.
424--
425-- ToDo: this appears to introduce a quadratic behaviour due to the
426-- nested cmmMachOpFold.  Can we fix this?
427--
428-- Why do we check isLit arg1?  If arg1 is a lit, it means that arg2
429-- is also a lit (otherwise arg1 would be on the right).  If we
430-- put arg1 on the left of the rearranged expression, we'll get into a
431-- loop:  (x1+x2)+x3 => x1+(x2+x3)  => (x2+x3)+x1 => x2+(x3+x1) ...
432--
433-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
434-- PicBaseReg from the corresponding label (or label difference).
435--
436cmmMachOpFoldM platform mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
437   | mop2 `associates_with` mop1
438     && not (isLit arg1) && not (isPicReg arg1)
439   = Just (cmmMachOpFold platform mop2 [arg1, cmmMachOpFold platform mop1 [arg2,arg3]])
440   where
441     MO_Add{} `associates_with` MO_Sub{} = True
442     mop1 `associates_with` mop2 =
443        mop1 == mop2 && isAssociativeMachOp mop1
444
445-- special case: (a - b) + c  ==>  a + (c - b)
446cmmMachOpFoldM platform mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
447   | not (isLit arg1) && not (isPicReg arg1)
448   = Just (cmmMachOpFold platform mop1 [arg1, cmmMachOpFold platform mop2 [arg3,arg2]])
449
450-- Make a RegOff if we can
451cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
452  = Just $ CmmRegOff reg (fromIntegral (narrowS rep n))
453cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
454  = Just $ CmmRegOff reg (off + fromIntegral (narrowS rep n))
455cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
456  = Just $ CmmRegOff reg (- fromIntegral (narrowS rep n))
457cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
458  = Just $ CmmRegOff reg (off - fromIntegral (narrowS rep n))
459
460-- Fold label(+/-)offset into a CmmLit where possible
461
462cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
463  = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
464cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
465  = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
466cmmMachOpFoldM _ (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
467  = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
468
469
470-- Comparison of literal with widened operand: perform the comparison
471-- at the smaller width, as long as the literal is within range.
472
473-- We can't do the reverse trick, when the operand is narrowed:
474-- narrowing throws away bits from the operand, there's no way to do
475-- the same comparison at the larger size.
476
477cmmMachOpFoldM platform cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
478  |     -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
479    platformArch platform `elem` [ArchX86, ArchX86_64],
480        -- if the operand is widened:
481    Just (rep, signed, narrow_fn) <- maybe_conversion conv,
482        -- and this is a comparison operation:
483    Just narrow_cmp <- maybe_comparison cmp rep signed,
484        -- and the literal fits in the smaller size:
485    i == narrow_fn rep i
486        -- then we can do the comparison at the smaller size
487  = Just (cmmMachOpFold platform narrow_cmp [x, CmmLit (CmmInt i rep)])
488 where
489    maybe_conversion (MO_UU_Conv from to)
490        | to > from
491        = Just (from, False, narrowU)
492    maybe_conversion (MO_SS_Conv from to)
493        | to > from
494        = Just (from, True, narrowS)
495
496        -- don't attempt to apply this optimisation when the source
497        -- is a float; see #1916
498    maybe_conversion _ = Nothing
499
500        -- careful (#2080): if the original comparison was signed, but
501        -- we were doing an unsigned widen, then we must do an
502        -- unsigned comparison at the smaller size.
503    maybe_comparison (MO_U_Gt _) rep _     = Just (MO_U_Gt rep)
504    maybe_comparison (MO_U_Ge _) rep _     = Just (MO_U_Ge rep)
505    maybe_comparison (MO_U_Lt _) rep _     = Just (MO_U_Lt rep)
506    maybe_comparison (MO_U_Le _) rep _     = Just (MO_U_Le rep)
507    maybe_comparison (MO_Eq   _) rep _     = Just (MO_Eq   rep)
508    maybe_comparison (MO_S_Gt _) rep True  = Just (MO_S_Gt rep)
509    maybe_comparison (MO_S_Ge _) rep True  = Just (MO_S_Ge rep)
510    maybe_comparison (MO_S_Lt _) rep True  = Just (MO_S_Lt rep)
511    maybe_comparison (MO_S_Le _) rep True  = Just (MO_S_Le rep)
512    maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
513    maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
514    maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
515    maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
516    maybe_comparison _ _ _ = Nothing
517
518-- We can often do something with constants of 0 and 1 ...
519
520cmmMachOpFoldM _ mop [x, y@(CmmLit (CmmInt 0 _))]
521  = case mop of
522        MO_Add   _ -> Just x
523        MO_Sub   _ -> Just x
524        MO_Mul   _ -> Just y
525        MO_And   _ -> Just y
526        MO_Or    _ -> Just x
527        MO_Xor   _ -> Just x
528        MO_Shl   _ -> Just x
529        MO_S_Shr _ -> Just x
530        MO_U_Shr _ -> Just x
531        MO_Ne    _ | isComparisonExpr x -> Just x
532        MO_Eq    _ | Just x' <- maybeInvertCmmExpr x -> Just x'
533        MO_U_Gt  _ | isComparisonExpr x -> Just x
534        MO_S_Gt  _ | isComparisonExpr x -> Just x
535        MO_U_Lt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
536        MO_S_Lt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
537        MO_U_Ge  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
538        MO_S_Ge  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
539        MO_U_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
540        MO_S_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
541        _ -> Nothing
542
543cmmMachOpFoldM _ mop [x, (CmmLit (CmmInt 1 rep))]
544  = case mop of
545        MO_Mul    _ -> Just x
546        MO_S_Quot _ -> Just x
547        MO_U_Quot _ -> Just x
548        MO_S_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
549        MO_U_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
550        MO_Ne    _ | Just x' <- maybeInvertCmmExpr x -> Just x'
551        MO_Eq    _ | isComparisonExpr x -> Just x
552        MO_U_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
553        MO_S_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
554        MO_U_Gt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
555        MO_S_Gt  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
556        MO_U_Le  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
557        MO_S_Le  _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
558        MO_U_Ge  _ | isComparisonExpr x -> Just x
559        MO_S_Ge  _ | isComparisonExpr x -> Just x
560        _ -> Nothing
561
562-- Now look for multiplication/division by powers of 2 (integers).
563
564cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _))]
565  = case mop of
566        MO_Mul rep
567           | Just p <- exactLog2 n ->
568                 Just (cmmMachOpFold platform (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
569        MO_U_Quot rep
570           | Just p <- exactLog2 n ->
571                 Just (cmmMachOpFold platform (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
572        MO_S_Quot rep
573           | Just p <- exactLog2 n, 
574             CmmReg _ <- x ->   -- We duplicate x below, hence require
575                                -- it is a reg.  FIXME: remove this restriction.
576                -- shift right is not the same as quot, because it rounds
577                -- to minus infinity, whereasq quot rounds toward zero.
578                -- To fix this up, we add one less than the divisor to the
579                -- dividend if it is a negative number.
580                --
581                -- to avoid a test/jump, we use the following sequence:
582                --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
583                --      x2 = y & (divisor-1)
584                --      result = (x+x2) >>= log2(divisor)
585                -- this could be done a bit more simply using conditional moves,
586                -- but we're processor independent here.
587                --
588                -- we optimise the divide by 2 case slightly, generating
589                --      x1 = x >> word_size-1  (unsigned)
590                --      return = (x + x1) >>= log2(divisor)
591                let
592                    bits = fromIntegral (widthInBits rep) - 1
593                    shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
594                    x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
595                    x2 = if p == 1 then x1 else
596                         CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
597                    x3 = CmmMachOp (MO_Add rep) [x, x2]
598                in
599                Just (cmmMachOpFold platform (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
600        _ -> Nothing
601
602-- Anything else is just too hard.
603
604cmmMachOpFoldM _ _ _ = Nothing
605
606-- -----------------------------------------------------------------------------
607-- exactLog2
608
609-- This algorithm for determining the $\log_2$ of exact powers of 2 comes
610-- from GCC.  It requires bit manipulation primitives, and we use GHC
611-- extensions.  Tough.
612--
613-- Used to be in MachInstrs --SDM.
614-- ToDo: remove use of unboxery --SDM.
615
616-- Unboxery removed in favor of FastInt; but is the function supposed to fail
617-- on inputs >= 2147483648, or was that just an implementation artifact?
618-- And is this speed-critical, or can we just use Integer operations
619-- (including Data.Bits)?
620--  --Isaac Dupree
621
622exactLog2 :: Integer -> Maybe Integer
623exactLog2 x_
624  = if (x_ <= 0 || x_ >= 2147483648) then
625       Nothing
626    else
627       case iUnbox (fromInteger x_) of { x ->
628       if (x `bitAndFastInt` negateFastInt x) /=# x then
629          Nothing
630       else
631          Just (toInteger (iBox (pow2 x)))
632       }
633  where
634    pow2 x | x ==# _ILIT(1) = _ILIT(0)
635           | otherwise = _ILIT(1) +# pow2 (x `shiftR_FastInt` _ILIT(1))
636
637
638-- -----------------------------------------------------------------------------
639-- Loopify for C
640
641{-
642 This is a simple pass that replaces tail-recursive functions like this:
643
644   fac() {
645     ...
646     jump fac();
647   }
648
649 with this:
650
651  fac() {
652   L:
653     ...
654     goto L;
655  }
656
657  the latter generates better C code, because the C compiler treats it
658  like a loop, and brings full loop optimisation to bear.
659
660  In my measurements this makes little or no difference to anything
661  except factorial, but what the hell.
662-}
663
664cmmLoopifyForC :: RawCmmDecl -> RawCmmDecl
665cmmLoopifyForC p@(CmmProc Nothing _ _) = p  -- only if there's an info table, ignore case alts
666cmmLoopifyForC (CmmProc (Just info@(Statics info_lbl _)) entry_lbl
667                 (ListGraph blocks@(BasicBlock top_id _ : _))) =
668--  pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
669  CmmProc (Just info) entry_lbl (ListGraph blocks')
670  where blocks' = [ BasicBlock id (map do_stmt stmts)
671                  | BasicBlock id stmts <- blocks ]
672
673        do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
674                = CmmBranch top_id
675        do_stmt stmt = stmt
676
677        jump_lbl | tablesNextToCode = info_lbl
678                 | otherwise        = entry_lbl
679
680cmmLoopifyForC top = top
681
682-- -----------------------------------------------------------------------------
683-- Utils
684
685isLit :: CmmExpr -> Bool
686isLit (CmmLit _) = True
687isLit _          = False
688
689isComparisonExpr :: CmmExpr -> Bool
690isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
691isComparisonExpr _                  = False
692
693isPicReg :: CmmExpr -> Bool
694isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
695isPicReg _ = False
Note: See TracBrowser for help on using the browser.