{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
module X86.CodeGen (
        cmmTopCodeGen,
        generateJumpTableForInstr,
        extractUnwindPoints,
        invertCondBranches,
        InstrBlock
)
where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
#include "../includes/MachDeps.h"
import GhcPrelude
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
import X86.Ppr()
import CodeGen.Platform
import CPrim
import Debug            ( DebugBlock(..), UnwindPoint(..), UnwindTable
                        , UnwindExpr(UwReg), toUnwindExpr )
import Instruction
import PIC
import NCGMonad   ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
                  , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat
                  , getPicBaseMaybeNat, getDebugBlock, getFileId
                  , addImmediateSuccessorNat, updateCfgNat)
import CFG
import Format
import Reg
import Platform
import BasicTypes
import BlockId
import Module           ( primUnitId )
import PprCmm           ()
import CmmUtils
import CmmSwitch
import Cmm
import Hoopl.Block
import Hoopl.Collections
import Hoopl.Graph
import Hoopl.Label
import CLabel
import CoreSyn          ( Tickish(..) )
import SrcLoc           ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
import ForeignCall      ( CCallConv(..) )
import OrdList
import Outputable
import FastString
import DynFlags
import Util
import UniqSupply       ( getUniqueM )
import Control.Monad
import Data.Bits
import Data.Foldable (fold)
import Data.Int
import Data.Maybe
import Data.Word
import qualified Data.Map as M
is32BitPlatform :: NatM Bool
is32BitPlatform = do
    dflags <- getDynFlags
    return $ target32Bit (targetPlatform dflags)
sse2Enabled :: NatM Bool
sse2Enabled = do
  dflags <- getDynFlags
  case platformArch (targetPlatform dflags) of
  
  
  
  
  
  
  
  
    ArchX86_64 -> return True
    ArchX86    -> return True
    _          -> panic "trying to generate x86/x86_64 on the wrong platform"
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
  dflags <- getDynFlags
  return (isSse4_2Enabled dflags)
cmmTopCodeGen
        :: RawCmmDecl
        -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
cmmTopCodeGen (CmmProc info lab live graph) = do
  let blocks = toBlockListEntryFirst graph
  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
  picBaseMb <- getPicBaseMaybeNat
  dflags <- getDynFlags
  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
      tops = proc : concat statics
      os   = platformOS $ targetPlatform dflags
  case picBaseMb of
      Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
      Nothing -> return tops
cmmTopCodeGen (CmmData sec dat) = do
  return [CmmData sec (mkAlignment 1, dat)]  
basicBlockCodeGen
        :: CmmBlock
        -> NatM ( [NatBasicBlock Instr]
                , [NatCmmDecl (Alignment, CmmStatics) Instr])
basicBlockCodeGen block = do
  let (_, nodes, tail)  = blockSplit block
      id = entryLabel block
      stmts = blockToList nodes
  
  dbg <- getDebugBlock (entryLabel block)
  loc_instrs <- case dblSourceTick =<< dbg of
    Just (SourceNote span name)
      -> do fileId <- getFileId (srcSpanFile span)
            let line = srcSpanStartLine span; col = srcSpanStartCol span
            return $ unitOL $ LOCATION fileId line col name
    _ -> return nilOL
  mid_instrs <- stmtsToInstrs id stmts
  tail_instrs <- stmtToInstrs id tail
  let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
  instrs' <- fold <$> traverse addSpUnwindings instrs
  
  
  
  
  let
        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
          = ([], BasicBlock id instrs : blocks, statics)
        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
          = (instrs, blocks, CmmData sec dat:statics)
        mkBlocks instr (instrs,blocks,statics)
          = (instr:instrs, blocks, statics)
  return (BasicBlock id top : other_blocks, statics)
addSpUnwindings :: Instr -> NatM (OrdList Instr)
addSpUnwindings instr@(DELTA d) = do
    dflags <- getDynFlags
    if debugLevel dflags >= 1
        then do lbl <- mkAsmTempLabel <$> getUniqueM
                let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
                return $ toOL [ instr, UNWIND lbl unwind ]
        else return (unitOL instr)
addSpUnwindings instr = return $ unitOL instr
stmtsToInstrs :: BlockId -> [CmmNode e x] -> NatM InstrBlock
stmtsToInstrs bid stmts
   = do instrss <- mapM (stmtToInstrs bid) stmts
        return (concatOL instrss)
stmtToInstrs :: BlockId -> CmmNode e x -> NatM InstrBlock
stmtToInstrs bid stmt = do
  dflags <- getDynFlags
  is32Bit <- is32BitPlatform
  case stmt of
    CmmComment s   -> return (unitOL (COMMENT s))
    CmmTick {}     -> return nilOL
    CmmUnwind regs -> do
      let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
          to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr)
      case foldMap to_unwind_entry regs of
        tbl | M.null tbl -> return nilOL
            | otherwise  -> do
                lbl <- mkAsmTempLabel <$> getUniqueM
                return $ unitOL $ UNWIND lbl tbl
    CmmAssign reg src
      | isFloatType ty         -> assignReg_FltCode format reg src
      | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
      | otherwise              -> assignReg_IntCode format reg src
        where ty = cmmRegType dflags reg
              format = cmmTypeFormat ty
    CmmStore addr src
      | isFloatType ty         -> assignMem_FltCode format addr src
      | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
      | otherwise              -> assignMem_IntCode format addr src
        where ty = cmmExprType dflags src
              format = cmmTypeFormat ty
    CmmUnsafeForeignCall target result_regs args
       -> genCCall dflags is32Bit target result_regs args bid
    CmmBranch id          -> return $ genBranch id
    
    
    CmmCondBranch arg true false _ -> genCondBranch bid true false arg
    CmmSwitch arg ids -> do dflags <- getDynFlags
                            genSwitch dflags arg ids
    CmmCall { cml_target = arg
            , cml_args_regs = gregs } -> do
                                dflags <- getDynFlags
                                genJump arg (jumpRegs dflags gregs)
    _ ->
      panic "stmtToInstrs: statement should have been cps'd away"
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
    where platform = targetPlatform dflags
type InstrBlock
        = OrdList Instr
data CondCode
        = CondCode Bool Cond InstrBlock
data ChildCode64
   = ChildCode64
        InstrBlock
        Reg
data Register
        = Fixed Format Reg InstrBlock
        | Any   Format (Reg -> InstrBlock)
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
getRegisterReg :: Platform  -> CmmReg -> Reg
getRegisterReg _   (CmmLocal (LocalReg u pk))
  = 
   let fmt = cmmTypeFormat pk in
        RegVirtual (mkVirtualReg u fmt)
getRegisterReg platform  (CmmGlobal mid)
  = case globalRegMaybe platform mid of
        Just reg -> RegReal $ reg
        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
        
        
        
data Amode
        = Amode AddrMode InstrBlock
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
  where i64 = fromIntegral i :: Int64
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
    where blockLabel = blockLbl blockid
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
  where width = typeWidth (cmmRegType dflags reg)
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
  Amode addr addr_code <- getAmode addrTree
  ChildCode64 vcode rlo <- iselExpr64 valueTree
  let
        rhi = getHiVRegFromLo rlo
        
        mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
        mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
  return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
   let
         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
         r_dst_hi = getHiVRegFromLo r_dst_lo
         r_src_hi = getHiVRegFromLo r_src_lo
         mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
         mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
   return (
        vcode `snocOL` mov_lo `snocOL` mov_hi
     )
assignReg_I64Code _ _
   = panic "assignReg_I64Code(i386): invalid lvalue"
iselExpr64        :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
  (rlo,rhi) <- getNewRegPairNat II32
  let
        r = fromIntegral (fromIntegral i :: Word32)
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
        code = toOL [
                MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
                MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
                ]
  return (ChildCode64 code rlo)
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
   Amode addr addr_code <- getAmode addrTree
   (rlo,rhi) <- getNewRegPairNat II32
   let
        mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
        mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
   return (
            ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
                        rlo
     )
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r = fromIntegral (fromIntegral i :: Word32)
        q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
        r1hi = getHiVRegFromLo r1lo
        code =  code1 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
   return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       ADD II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       ADC II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
   ChildCode64 code1 r1lo <- iselExpr64 e1
   ChildCode64 code2 r2lo <- iselExpr64 e2
   (rlo,rhi) <- getNewRegPairNat II32
   let
        r1hi = getHiVRegFromLo r1lo
        r2hi = getHiVRegFromLo r2lo
        code =  code1 `appOL`
                code2 `appOL`
                toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
                       SUB II32 (OpReg r2lo) (OpReg rlo),
                       MOV II32 (OpReg r1hi) (OpReg rhi),
                       SBB II32 (OpReg r2hi) (OpReg rhi) ]
   return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL`
                          MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
                          r_dst_lo
            )
iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
     fn <- getAnyReg expr
     r_dst_lo <-  getNewRegNat II32
     let r_dst_hi = getHiVRegFromLo r_dst_lo
         code = fn r_dst_lo
     return (
             ChildCode64 (code `snocOL`
                          MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
                          CLTD II32 `snocOL`
                          MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
                          MOV II32 (OpReg edx) (OpReg r_dst_hi))
                          r_dst_lo
            )
iselExpr64 expr
   = pprPanic "iselExpr64(i386)" (ppr expr)
getRegister :: CmmExpr -> NatM Register
getRegister e = do dflags <- getDynFlags
                   is32Bit <- is32BitPlatform
                   getRegister' dflags is32Bit e
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
getRegister' dflags is32Bit (CmmReg reg)
  = case reg of
        CmmGlobal PicBaseReg
         | is32Bit ->
            
            
            
            do reg' <- getPicBaseNat (archWordFormat is32Bit)
               return (Fixed (archWordFormat is32Bit) reg' nilOL)
        _ ->
            do
               let
                 fmt = cmmTypeFormat (cmmRegType dflags reg)
                 format  = fmt
               
               let platform = targetPlatform dflags
               return (Fixed format
                             (getRegisterReg platform  reg)
                             nilOL)
getRegister' dflags is32Bit (CmmRegOff r n)
  = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
  = addAlignmentCheck align <$> getRegister' dflags is32Bit e
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
 | is32Bit = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
 | is32Bit = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
 | is32Bit = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
  float_const_sse2  where
  float_const_sse2
    | f == 0.0 = do
      let
          format = floatFormat w
          code dst = unitOL  (XOR format (OpReg dst) (OpReg dst))
        
        
      return (Any format code)
   | otherwise = do
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
      loadFloatAmode w addr code
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II32 code)
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II32 code)
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II32 code)
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II32 code)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
 | not is32Bit = do
  code <- intLoadCode (MOVZxL II8) addr
  return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
 | not is32Bit = do
  code <- intLoadCode (MOVSxL II8) addr
  return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
 | not is32Bit = do
  code <- intLoadCode (MOVZxL II16) addr
  return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
 | not is32Bit = do
  code <- intLoadCode (MOVSxL II16) addr
  return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
 | not is32Bit = do
  code <- intLoadCode (MOV II32) addr 
  return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
 | not is32Bit = do
  code <- intLoadCode (MOVSxL II32) addr
  return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
                                     CmmLit displacement])
 | not is32Bit = do
      return $ Any II64 (\dst -> unitOL $
        LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do 
    case mop of
      MO_F_Neg w  -> sse2NegCode w x
      MO_S_Neg w -> triv_ucode NEGI (intFormat w)
      MO_Not w   -> triv_ucode NOT  (intFormat w)
      
      MO_UU_Conv W32 W8  -> toI8Reg  W32 x
      MO_SS_Conv W32 W8  -> toI8Reg  W32 x
      MO_XX_Conv W32 W8  -> toI8Reg  W32 x
      MO_UU_Conv W16 W8  -> toI8Reg  W16 x
      MO_SS_Conv W16 W8  -> toI8Reg  W16 x
      MO_XX_Conv W16 W8  -> toI8Reg  W16 x
      MO_UU_Conv W32 W16 -> toI16Reg W32 x
      MO_SS_Conv W32 W16 -> toI16Reg W32 x
      MO_XX_Conv W32 W16 -> toI16Reg W32 x
      MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x
      MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
      MO_UU_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_SS_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_XX_Conv W64 W8  | not is32Bit -> toI8Reg  W64 x
      MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
      
      MO_UU_Conv W8  W32 -> integerExtend W8  W32 MOVZxL x
      MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
      MO_UU_Conv W8  W16 -> integerExtend W8  W16 MOVZxL x
      MO_SS_Conv W8  W32 -> integerExtend W8  W32 MOVSxL x
      MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
      MO_SS_Conv W8  W16 -> integerExtend W8  W16 MOVSxL x
      
      
      
      MO_XX_Conv W8  W32
          | is32Bit   -> integerExtend W8 W32 MOVZxL x
          | otherwise -> integerExtend W8 W32 MOV x
      MO_XX_Conv W8  W16
          | is32Bit   -> integerExtend W8 W16 MOVZxL x
          | otherwise -> integerExtend W8 W16 MOV x
      MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x
      MO_UU_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVZxL x
      MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
      MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
      MO_SS_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOVSxL x
      MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
      MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
      
      
      
      
      
      
      MO_XX_Conv W8  W64 | not is32Bit -> integerExtend W8  W64 MOV x
      MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
      MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
      MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
      MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
      MO_FS_Conv from to -> coerceFP2Int from to x
      MO_SF_Conv from to -> coerceInt2FP from to x
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VU_Quot {}    -> needLlvm
      MO_VU_Rem {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
      _other -> pprPanic "getRegister" (pprMachOp mop)
   where
        triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
        triv_ucode instr format = trivialUCode format (instr format) x
        
        integerExtend :: Width -> Width
                      -> (Format -> Operand -> Operand -> Instr)
                      -> CmmExpr -> NatM Register
        integerExtend from to instr expr = do
            (reg,e_code) <- if from == W8 then getByteReg expr
                                          else getSomeReg expr
            let
                code dst =
                  e_code `snocOL`
                  instr (intFormat from) (OpReg reg) (OpReg dst)
            return (Any (intFormat to) code)
        toI8Reg :: Width -> CmmExpr -> NatM Register
        toI8Reg new_rep expr
            = do codefn <- getAnyReg expr
                 return (Any (intFormat new_rep) codefn)
                
                
                
                
                
                
        toI16Reg = toI8Reg 
        conversionNop :: Format -> CmmExpr -> NatM Register
        conversionNop new_format expr
            = do e_code <- getRegister' dflags is32Bit expr
                 return (swizzleRegisterRep e_code new_format)
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do 
  case mop of
      MO_F_Eq _ -> condFltReg is32Bit EQQ x y
      MO_F_Ne _ -> condFltReg is32Bit NE  x y
      MO_F_Gt _ -> condFltReg is32Bit GTT x y
      MO_F_Ge _ -> condFltReg is32Bit GE  x y
      
      
      MO_F_Lt _ -> condFltReg is32Bit GTT  y x
      MO_F_Le _ -> condFltReg is32Bit GE   y x
      MO_Eq _   -> condIntReg EQQ x y
      MO_Ne _   -> condIntReg NE  x y
      MO_S_Gt _ -> condIntReg GTT x y
      MO_S_Ge _ -> condIntReg GE  x y
      MO_S_Lt _ -> condIntReg LTT x y
      MO_S_Le _ -> condIntReg LE  x y
      MO_U_Gt _ -> condIntReg GU  x y
      MO_U_Ge _ -> condIntReg GEU x y
      MO_U_Lt _ -> condIntReg LU  x y
      MO_U_Le _ -> condIntReg LEU x y
      MO_F_Add w   -> trivialFCode_sse2 w ADD  x y
      MO_F_Sub w   -> trivialFCode_sse2 w SUB  x y
      MO_F_Quot w  -> trivialFCode_sse2 w FDIV x y
      MO_F_Mul w   -> trivialFCode_sse2 w MUL x y
      MO_Add rep -> add_code rep x y
      MO_Sub rep -> sub_code rep x y
      MO_S_Quot rep -> div_code rep True  True  x y
      MO_S_Rem  rep -> div_code rep True  False x y
      MO_U_Quot rep -> div_code rep False True  x y
      MO_U_Rem  rep -> div_code rep False False x y
      MO_S_MulMayOflo rep -> imulMayOflo rep x y
      MO_Mul W8  -> imulW8 x y
      MO_Mul rep -> triv_op rep IMUL
      MO_And rep -> triv_op rep AND
      MO_Or  rep -> triv_op rep OR
      MO_Xor rep -> triv_op rep XOR
        
      MO_Shl rep   -> shift_code rep SHL x y 
      MO_U_Shr rep -> shift_code rep SHR x y 
      MO_S_Shr rep -> shift_code rep SAR x y 
      MO_V_Insert {}   -> needLlvm
      MO_V_Extract {}  -> needLlvm
      MO_V_Add {}      -> needLlvm
      MO_V_Sub {}      -> needLlvm
      MO_V_Mul {}      -> needLlvm
      MO_VS_Quot {}    -> needLlvm
      MO_VS_Rem {}     -> needLlvm
      MO_VS_Neg {}     -> needLlvm
      MO_VF_Insert {}  -> needLlvm
      MO_VF_Extract {} -> needLlvm
      MO_VF_Add {}     -> needLlvm
      MO_VF_Sub {}     -> needLlvm
      MO_VF_Mul {}     -> needLlvm
      MO_VF_Quot {}    -> needLlvm
      MO_VF_Neg {}     -> needLlvm
      _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
  where
    
    triv_op width instr = trivialCode width op (Just op) x y
                        where op   = instr (intFormat width)
    
    
    
    imulW8 :: CmmExpr -> CmmExpr -> NatM Register
    imulW8 arg_a arg_b = do
        (a_reg, a_code) <- getNonClobberedReg arg_a
        b_code <- getAnyReg arg_b
        let code = a_code `appOL` b_code eax `appOL`
                   toOL [ IMUL2 format (OpReg a_reg) ]
            format = intFormat W8
        return (Fixed format eax code)
    imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
    imulMayOflo rep a b = do
         (a_reg, a_code) <- getNonClobberedReg a
         b_code <- getAnyReg b
         let
             shift_amt  = case rep of
                           W32 -> 31
                           W64 -> 63
                           _ -> panic "shift_amt"
             format = intFormat rep
             code = a_code `appOL` b_code eax `appOL`
                        toOL [
                           IMUL2 format (OpReg a_reg),   
                           SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
                                
                           SUB format (OpReg edx) (OpReg eax)
                                
                           
                        ]
         return (Fixed format eax code)
    
    shift_code :: Width
               -> (Format -> Operand -> Operand -> Instr)
               -> CmmExpr
               -> CmmExpr
               -> NatM Register
    
    shift_code width instr x (CmmLit lit) = do
          x_code <- getAnyReg x
          let
               format = intFormat width
               code dst
                  = x_code dst `snocOL`
                    instr format (OpImm (litToImm lit)) (OpReg dst)
          return (Any format code)
    
    shift_code width instr x y = do
        x_code <- getAnyReg x
        let format = intFormat width
        tmp <- getNewRegNat format
        y_code <- getAnyReg y
        let
           code = x_code tmp `appOL`
                  y_code ecx `snocOL`
                  instr format (OpReg ecx) (OpReg tmp)
        return (Fixed format tmp code)
    
    add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    add_code rep x (CmmLit (CmmInt y _))
        | is32BitInteger y = add_int rep x y
    add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
      where format = intFormat rep
    
    
    
    sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
    sub_code rep x (CmmLit (CmmInt y _))
        | is32BitInteger (-y) = add_int rep x (-y)
    sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
    
    add_int width x y = do
        (x_reg, x_code) <- getSomeReg x
        let
            format = intFormat width
            imm = ImmInt (fromInteger y)
            code dst
               = x_code `snocOL`
                 LEA format
                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
                        (OpReg dst)
        
        return (Any format code)
    
    
    div_code W8 signed quotient x y = do
        let widen | signed    = MO_SS_Conv W8 W16
                  | otherwise = MO_UU_Conv W8 W16
        div_code
            W16
            signed
            quotient
            (CmmMachOp widen [x])
            (CmmMachOp widen [y])
    div_code width signed quotient x y = do
           (y_op, y_code) <- getRegOrMem y 
           x_code <- getAnyReg x
           let
             format = intFormat width
             widen | signed    = CLTD format
                   | otherwise = XOR format (OpReg edx) (OpReg edx)
             instr | signed    = IDIV
                   | otherwise = DIV
             code = y_code `appOL`
                    x_code eax `appOL`
                    toOL [widen, instr format y_op]
             result | quotient  = eax
                    | otherwise = edx
           return (Fixed format result code)
getRegister' _ _ (CmmLoad mem pk)
  | isFloatType pk
  = do
    Amode addr mem_code <- getAmode mem
    loadFloatAmode  (typeWidth pk) addr mem_code
getRegister' _ is32Bit (CmmLoad mem pk)
  | is32Bit && not (isWord64 pk)
  = do
    code <- intLoadCode instr mem
    return (Any format code)
  where
    width = typeWidth pk
    format = intFormat width
    instr = case width of
                W8     -> MOVZxL II8
                _other -> MOV format
        
        
        
        
        
getRegister' _ is32Bit (CmmLoad mem pk)
 | not is32Bit
  = do
    code <- intLoadCode (MOV format) mem
    return (Any format code)
  where format = intFormat $ typeWidth pk
getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
  = let
        format = intFormat width
        
        format1 = if is32Bit then format
                           else case format of
                                II64 -> II32
                                _ -> format
        code dst
           = unitOL (XOR format1 (OpReg dst) (OpReg dst))
    in
        return (Any format code)
  
  
  
getRegister' dflags is32Bit (CmmLit lit)
  | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
  = let
        imm = litToImm lit
        code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
    in
        return (Any II64 code)
  where
   isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
   isBigLit _ = False
        
        
        
        
        
getRegister' dflags _ (CmmLit lit)
  = do let format = cmmTypeFormat (cmmLitType dflags lit)
           imm = litToImm lit
           code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
       return (Any format code)
getRegister' _ _ other
    | isVecExpr other  = needLlvm
    | otherwise        = pprPanic "getRegister(x86)" (ppr other)
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
   -> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
  Amode src mem_code <- getAmode mem
  return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
  r <- getRegister expr
  anyReg r
anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code)          = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
  is32Bit <- is32BitPlatform
  if is32Bit
      then do r <- getRegister expr
              case r of
                Any rep code -> do
                    tmp <- getNewRegNat rep
                    return (tmp, code tmp)
                Fixed rep reg code
                    | isVirtualReg reg -> return (reg,code)
                    | otherwise -> do
                        tmp <- getNewRegNat rep
                        return (tmp, code `snocOL` reg2reg rep reg tmp)
                    
                    
                    
      else getSomeReg expr 
getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg expr = do
  dflags <- getDynFlags
  r <- getRegister expr
  case r of
    Any rep code -> do
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed rep reg code
        
        | reg `elem` instrClobberedRegs (targetPlatform dflags)
        -> do
                tmp <- getNewRegNat rep
                return (tmp, code `snocOL` reg2reg rep reg tmp)
        | otherwise ->
                return (reg, code)
reg2reg :: Format -> Reg -> Reg -> Instr
reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
getAmode :: CmmExpr -> NatM Amode
getAmode e = do is32Bit <- is32BitPlatform
                getAmode' is32Bit e
getAmode' :: Bool -> CmmExpr -> NatM Amode
getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
                                 getAmode $ mangleIndexTree dflags r n
getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
                                                  CmmLit displacement])
 | not is32Bit
    = return $ Amode (ripRel (litToImm displacement)) nilOL
getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
  | is32BitLit is32Bit lit
  
  = do (x_reg, x_code) <- getSomeReg x
       let off = ImmInt (-(fromInteger i))
       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
  | is32BitLit is32Bit lit
  
  = do (x_reg, x_code) <- getSomeReg x
       let off = litToImm lit
       return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
                                  b@(CmmLit _)])
  = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
                                   CmmMachOp (MO_Shl _)
                                        [y, CmmLit (CmmInt shift _)]])
  | shift == 0 || shift == 1 || shift == 2 || shift == 3
  = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
                                        [y, CmmLit (CmmInt shift _)]])
  | shift == 0 || shift == 1 || shift == 2 || shift == 3
  = x86_complex_amode x y shift 0
getAmode' _ (CmmMachOp (MO_Add _)
                [x, CmmMachOp (MO_Add _)
                        [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
                         CmmLit (CmmInt offset _)]])
  | shift == 0 || shift == 1 || shift == 2 || shift == 3
  && is32BitInteger offset
  = x86_complex_amode x y shift offset
getAmode' _ (CmmMachOp (MO_Add _) [x,y])
  = x86_complex_amode x y 0 0
getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
  = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
getAmode' _ expr = do
  (reg,code) <- getSomeReg expr
  return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
getSimpleAmode dflags is32Bit addr
    | is32Bit = do
        addr_code <- getAnyReg addr
        addr_r <- getNewRegNat (intFormat (wordWidth dflags))
        let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
        return $! Amode amode (addr_code addr_r)
    | otherwise = getAmode addr
x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
x86_complex_amode base index shift offset
  = do (x_reg, x_code) <- getNonClobberedReg base
        
        
       (y_reg, y_code) <- getSomeReg index
       let
           code = x_code `appOL` y_code
           base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
                                n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
       return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
               code)
getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand (CmmLit lit) = do
  if  isSuitableFloatingPointLit lit
    then do
      let CmmFloat _ w = lit
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
      return (OpAddr addr, code)
     else do
  is32Bit <- is32BitPlatform
  dflags <- getDynFlags
  if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
    then return (OpImm (litToImm lit), nilOL)
    else getNonClobberedOperand_generic (CmmLit lit)
getNonClobberedOperand (CmmLoad mem pk) = do
  is32Bit <- is32BitPlatform
  
  
  if   (if is32Bit then not (isWord64 pk) else True)
      
      
      
    then do
      dflags <- getDynFlags
      let platform = targetPlatform dflags
      Amode src mem_code <- getAmode mem
      (src',save_code) <-
        if (amodeCouldBeClobbered platform src)
                then do
                   tmp <- getNewRegNat (archWordFormat is32Bit)
                   return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
                           unitOL (LEA (archWordFormat is32Bit)
                                       (OpAddr src)
                                       (OpReg tmp)))
                else
                   return (src, nilOL)
      return (OpAddr src', mem_code `appOL` save_code)
    else do
      
      getNonClobberedOperand_generic (CmmLoad mem pk)
getNonClobberedOperand e = getNonClobberedOperand_generic e
getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getNonClobberedOperand_generic e = do
    (reg, code) <- getNonClobberedReg e
    return (OpReg reg, code)
amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
regClobbered :: Platform -> Reg -> Bool
regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
regClobbered _ _ = False
getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand (CmmLit lit) = do
  use_sse2 <- sse2Enabled
  if (use_sse2 && isSuitableFloatingPointLit lit)
    then do
      let CmmFloat _ w = lit
      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
      return (OpAddr addr, code)
    else do
  is32Bit <- is32BitPlatform
  dflags <- getDynFlags
  if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
    then return (OpImm (litToImm lit), nilOL)
    else getOperand_generic (CmmLit lit)
getOperand (CmmLoad mem pk) = do
  is32Bit <- is32BitPlatform
  use_sse2 <- sse2Enabled
  if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else
       getOperand_generic (CmmLoad mem pk)
getOperand e = getOperand_generic e
getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
getOperand_generic e = do
    (reg, code) <- getSomeReg e
    return (OpReg reg, code)
isOperand :: Bool -> CmmExpr -> Bool
isOperand _ (CmmLoad _ _) = True
isOperand is32Bit (CmmLit lit)  = is32BitLit is32Bit lit
                          || isSuitableFloatingPointLit lit
isOperand _ _            = False
addAlignmentCheck :: Int -> Register -> Register
addAlignmentCheck align reg =
    case reg of
      Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg)
      Any fmt f          -> Any fmt (\reg -> f reg `appOL` check fmt reg)
  where
    check :: Format -> Reg -> InstrBlock
    check fmt reg =
        ASSERT(not $ isFloatFormat fmt)
        toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg)
             , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
             ]
memConstant :: Alignment -> CmmLit -> NatM Amode
memConstant align lit = do
  lbl <- getNewLabelNat
  let rosection = Section ReadOnlyData lbl
  dflags <- getDynFlags
  (addr, addr_code) <- if target32Bit (targetPlatform dflags)
                       then do dynRef <- cmmMakeDynamicReference
                                             dflags
                                             DataReference
                                             lbl
                               Amode addr addr_code <- getAmode dynRef
                               return (addr, addr_code)
                       else return (ripRel (ImmCLbl lbl), nilOL)
  let code =
        LDATA rosection (align, Statics lbl [CmmStaticLit lit])
        `consOL` addr_code
  return (Amode addr code)
loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register
loadFloatAmode w addr addr_code = do
  let format = floatFormat w
      code dst = addr_code `snocOL`
                    MOV format (OpAddr addr) (OpReg dst)
  return (Any format code)
isSuitableFloatingPointLit :: CmmLit -> Bool
isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
isSuitableFloatingPointLit _ = False
getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
getRegOrMem e@(CmmLoad mem pk) = do
  is32Bit <- is32BitPlatform
  use_sse2 <- sse2Enabled
  if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
     then do
       Amode src mem_code <- getAmode mem
       return (OpAddr src, mem_code)
     else do
       (reg, code) <- getNonClobberedReg e
       return (OpReg reg, code)
getRegOrMem e = do
    (reg, code) <- getNonClobberedReg e
    return (OpReg reg, code)
is32BitLit :: Bool -> CmmLit -> Bool
is32BitLit is32Bit (CmmInt i W64)
 | not is32Bit
    = 
      
      is32BitInteger i
is32BitLit _ _ = True
getCondCode :: CmmExpr -> NatM CondCode
getCondCode (CmmMachOp mop [x, y])
  =
    case mop of
      MO_F_Eq W32 -> condFltCode EQQ x y
      MO_F_Ne W32 -> condFltCode NE  x y
      MO_F_Gt W32 -> condFltCode GTT x y
      MO_F_Ge W32 -> condFltCode GE  x y
      
      
      MO_F_Lt W32 -> condFltCode GTT  y x
      MO_F_Le W32 -> condFltCode GE   y x
      MO_F_Eq W64 -> condFltCode EQQ x y
      MO_F_Ne W64 -> condFltCode NE  x y
      MO_F_Gt W64 -> condFltCode GTT x y
      MO_F_Ge W64 -> condFltCode GE  x y
      MO_F_Lt W64 -> condFltCode GTT y x
      MO_F_Le W64 -> condFltCode GE  y x
      _ -> condIntCode (machOpToCond mop) x y
getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
machOpToCond :: MachOp -> Cond
machOpToCond mo = case mo of
  MO_Eq _   -> EQQ
  MO_Ne _   -> NE
  MO_S_Gt _ -> GTT
  MO_S_Ge _ -> GE
  MO_S_Lt _ -> LTT
  MO_S_Le _ -> LE
  MO_U_Gt _ -> GU
  MO_U_Ge _ -> GEU
  MO_U_Lt _ -> LU
  MO_U_Le _ -> LEU
  _other -> pprPanic "machOpToCond" (pprMachOp mo)
condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode cond x y = do is32Bit <- is32BitPlatform
                          condIntCode' is32Bit cond x y
condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
 | is32BitLit is32Bit lit = do
    Amode x_addr x_code <- getAmode x
    let
        imm  = litToImm lit
        code = x_code `snocOL`
                  CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
    
    return (CondCode False cond code)
condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
    | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
    = do
      (x_reg, x_code) <- getSomeReg x
      let
         code = x_code `snocOL`
                TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
      
      return (CondCode False cond code)
condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
    (x_reg, x_code) <- getSomeReg x
    let
        code = x_code `snocOL`
                  TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
    
    return (CondCode False cond code)
condIntCode' is32Bit cond x y
 | isOperand is32Bit y = do
    dflags <- getDynFlags
    (x_reg, x_code) <- getNonClobberedReg x
    (y_op,  y_code) <- getOperand y
    let
        code = x_code `appOL` y_code `snocOL`
                  CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
    return (CondCode False cond code)
 | isOperand is32Bit x
 , Just revcond <- maybeFlipCond cond = do
    dflags <- getDynFlags
    (y_reg, y_code) <- getNonClobberedReg y
    (x_op,  x_code) <- getOperand x
    let
        code = y_code `appOL` x_code `snocOL`
                  CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
    return (CondCode False revcond code)
condIntCode' _ cond x y = do
  dflags <- getDynFlags
  (y_reg, y_code) <- getNonClobberedReg y
  (x_op, x_code) <- getRegOrMem x
  let
        code = y_code `appOL`
               x_code `snocOL`
                  CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
  return (CondCode False cond code)
condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
condFltCode cond x y
  =  condFltCode_sse2
  where
  
  
  
  condFltCode_sse2 = do
    dflags <- getDynFlags
    (x_reg, x_code) <- getNonClobberedReg x
    (y_op, y_code) <- getOperand y
    let
        code = x_code `appOL`
               y_code `snocOL`
                  CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
        
        
    return (CondCode True (condToUnsigned cond) code)
assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_IntCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
assignReg_FltCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
                                                 CmmLit (CmmInt i _)])
   | addr == addr2, pk /= II64 || is32BitInteger i,
     Just instr <- check op
   = do Amode amode code_addr <- getAmode addr
        let code = code_addr `snocOL`
                   instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
        return code
   where
        check (MO_Add _) = Just ADD
        check (MO_Sub _) = Just SUB
        check _ = Nothing
        
assignMem_IntCode pk addr src = do
    is32Bit <- is32BitPlatform
    Amode addr code_addr <- getAmode addr
    (code_src, op_src)   <- get_op_RI is32Bit src
    let
        code = code_src `appOL`
               code_addr `snocOL`
                  MOV pk op_src (OpAddr addr)
        
        
        
        
    
    return code
  where
    get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand)   
    get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
      = return (nilOL, OpImm (litToImm lit))
    get_op_RI _ op
      = do (reg,code) <- getNonClobberedReg op
           return (code, OpReg reg)
assignReg_IntCode pk reg (CmmLoad src _) = do
  load_code <- intLoadCode (MOV pk) src
  dflags <- getDynFlags
  let platform = targetPlatform dflags
  return (load_code (getRegisterReg platform reg))
assignReg_IntCode _ reg src = do
  dflags <- getDynFlags
  let platform = targetPlatform dflags
  code <- getAnyReg src
  return (code (getRegisterReg platform reg))
assignMem_FltCode pk addr src = do
  (src_reg, src_code) <- getNonClobberedReg src
  Amode addr addr_code <- getAmode addr
  let
        code = src_code `appOL`
               addr_code `snocOL`
               MOV pk (OpReg src_reg) (OpAddr addr)
  return code
assignReg_FltCode _ reg src = do
  src_code <- getAnyReg src
  dflags <- getDynFlags
  let platform = targetPlatform dflags
  return (src_code (getRegisterReg platform  reg))
genJump :: CmmExpr -> [Reg] -> NatM InstrBlock
genJump (CmmLoad mem _) regs = do
  Amode target code <- getAmode mem
  return (code `snocOL` JMP (OpAddr target) regs)
genJump (CmmLit lit) regs = do
  return (unitOL (JMP (OpImm (litToImm lit)) regs))
genJump expr regs = do
  (reg,code) <- getSomeReg expr
  return (code `snocOL` JMP (OpReg reg) regs)
genBranch :: BlockId -> InstrBlock
genBranch = toOL . mkJumpInstr
genCondBranch
    :: BlockId      
    -> BlockId      
    -> BlockId      
    -> CmmExpr      
    -> NatM InstrBlock 
genCondBranch bid id false expr = do
  is32Bit <- is32BitPlatform
  genCondBranch' is32Bit bid id false expr
genCondBranch' :: Bool -> BlockId -> BlockId -> BlockId -> CmmExpr
               -> NatM InstrBlock
genCondBranch' is32Bit _bid true false (CmmMachOp mop [e1,e2])
  | is32Bit, Just W64 <- maybeIntComparison mop = do
  ChildCode64 code1 r1_lo <- iselExpr64 e1
  ChildCode64 code2 r2_lo <- iselExpr64 e2
  let r1_hi = getHiVRegFromLo r1_lo
      r2_hi = getHiVRegFromLo r2_lo
      cond = machOpToCond mop
      Just cond' = maybeFlipCond cond
  
  let code = code1 `appOL` code2 `appOL` toOL [
        CMP II32 (OpReg r2_hi) (OpReg r1_hi),
        JXX cond true,
        JXX cond' false,
        CMP II32 (OpReg r2_lo) (OpReg r1_lo),
        JXX cond true] `appOL` genBranch false
  return code
genCondBranch' _ bid id false bool = do
  CondCode is_float cond cond_code <- getCondCode bool
  use_sse2 <- sse2Enabled
  if not is_float || not use_sse2
    then
        return (cond_code `snocOL` JXX cond id `appOL` genBranch false)
    else do
        
        let jmpFalse = genBranch false
            code
                = case cond of
                  NE  -> or_unordered
                  GU  -> plain_test
                  GEU -> plain_test
                  
                  
                  LTT ->
                    ASSERT2(False, ppr "Should have been turned into >")
                    and_ordered
                  LE  ->
                    ASSERT2(False, ppr "Should have been turned into >=")
                    and_ordered
                  _   -> and_ordered
            plain_test = unitOL (
                  JXX cond id
                ) `appOL` jmpFalse
            or_unordered = toOL [
                  JXX cond id,
                  JXX PARITY id
                ] `appOL` jmpFalse
            and_ordered = toOL [
                  JXX PARITY false,
                  JXX cond id,
                  JXX ALWAYS false
                ]
        updateCfgNat (\cfg -> adjustEdgeWeight cfg (+3) bid false)
        return (cond_code `appOL` code)
genCCall
    :: DynFlags
    -> Bool                     
    -> ForeignTarget            
    -> [CmmFormal]        
    -> [CmmActual]        
    -> BlockId      
    -> NatM InstrBlock
genCCall dflags _ (PrimTarget (MO_Memcpy align)) _
         [dst, src, CmmLit (CmmInt n _)] _
    | fromInteger insns <= maxInlineMemcpyInsns dflags = do
        code_dst <- getAnyReg dst
        dst_r <- getNewRegNat format
        code_src <- getAnyReg src
        src_r <- getNewRegNat format
        tmp_r <- getNewRegNat format
        return $ code_dst dst_r `appOL` code_src src_r `appOL`
            go dst_r src_r tmp_r (fromInteger n)
  where
    
    
    insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
    maxAlignment = wordAlignment dflags 
    effectiveAlignment = min (alignmentOf align) maxAlignment
    format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
    
    sizeBytes :: Integer
    sizeBytes = fromIntegral (formatInBytes format)
    go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
    go dst src tmp i
        | i >= sizeBytes =
            unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
            unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
            go dst src tmp (i - sizeBytes)
        
        | i >= 4 =  
            unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
            unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
            go dst src tmp (i - 4)
        | i >= 2 =
            unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
            unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
            go dst src tmp (i - 2)
        | i >= 1 =
            unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
            unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
            go dst src tmp (i - 1)
        | otherwise = nilOL
      where
        src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
                   (ImmInteger (n - i))
        dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
                   (ImmInteger (n - i))
genCCall dflags _ (PrimTarget (MO_Memset align)) _
         [dst,
          CmmLit (CmmInt c _),
          CmmLit (CmmInt n _)]
         _
    | fromInteger insns <= maxInlineMemsetInsns dflags = do
        code_dst <- getAnyReg dst
        dst_r <- getNewRegNat format
        if format == II64 && n >= 8 then do
          code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64))
          imm8byte_r <- getNewRegNat II64
          return $ code_dst dst_r `appOL`
                   code_imm8byte imm8byte_r `appOL`
                   go8 dst_r imm8byte_r (fromInteger n)
        else
          return $ code_dst dst_r `appOL`
                   go4 dst_r (fromInteger n)
  where
    maxAlignment = wordAlignment dflags 
    effectiveAlignment = min (alignmentOf align) maxAlignment
    format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
    c2 = c `shiftL` 8 .|. c
    c4 = c2 `shiftL` 16 .|. c2
    c8 = c4 `shiftL` 32 .|. c4
    
    
    insns = (n + sizeBytes - 1) `div` sizeBytes
    
    sizeBytes :: Integer
    sizeBytes = fromIntegral (formatInBytes format)
    
    
    gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
    gen4 addr size
        | size >= 4 =
            (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
        | size >= 2 =
            (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
        | size >= 1 =
            (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
        | otherwise = (nilOL, 0)
    
    gen8 :: AddrMode -> Reg -> InstrBlock
    gen8 addr reg8byte =
      unitOL (MOV format (OpReg reg8byte) (OpAddr addr))
    
    go4 :: Reg -> Integer -> InstrBlock
    go4 dst left =
      if left <= 0 then nilOL
      else curMov `appOL` go4 dst (left - curWidth)
      where
        possibleWidth = minimum [left, sizeBytes]
        dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
        (curMov, curWidth) = gen4 dst_addr possibleWidth
    
    
    
    go8 :: Reg -> Reg -> Integer -> InstrBlock
    go8 dst reg8byte left =
      if possibleWidth >= 8 then
        let curMov = gen8 dst_addr reg8byte
        in  curMov `appOL` go8 dst reg8byte (left - 8)
      else go4 dst left
      where
        possibleWidth = minimum [left, sizeBytes]
        dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL
        
        
genCCall _ _ (PrimTarget MO_Touch) _ _ _ = return nilOL
genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _  [src] _ =
        case n of
            0 -> genPrefetch src $ PREFETCH NTA  format
            1 -> genPrefetch src $ PREFETCH Lvl2 format
            2 -> genPrefetch src $ PREFETCH Lvl1 format
            3 -> genPrefetch src $ PREFETCH Lvl0 format
            l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
            
            
   where
        format = archWordFormat is32bit
        
        genPrefetch inRegSrc prefetchCTor =
            do
                code_src <- getAnyReg inRegSrc
                src_r <- getNewRegNat format
                return $ code_src src_r `appOL`
                  (unitOL (prefetchCTor  (OpAddr
                              ((AddrBaseIndex (EABaseReg src_r )   EAIndexNone (ImmInt 0))))  ))
                  
genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do
    let platform = targetPlatform dflags
    let dst_r = getRegisterReg platform (CmmLocal dst)
    case width of
        W64 | is32Bit -> do
               ChildCode64 vcode rlo <- iselExpr64 src
               let dst_rhi = getHiVRegFromLo dst_r
                   rhi     = getHiVRegFromLo rlo
               return $ vcode `appOL`
                        toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
                               MOV II32 (OpReg rhi) (OpReg dst_r),
                               BSWAP II32 dst_rhi,
                               BSWAP II32 dst_r ]
        W16 -> do code_src <- getAnyReg src
                  return $ code_src dst_r `appOL`
                           unitOL (BSWAP II32 dst_r) `appOL`
                           unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
        _   -> do code_src <- getAnyReg src
                  return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
  where
    format = intFormat width
genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
         args@[src] bid = do
    sse4_2 <- sse4_2Enabled
    let platform = targetPlatform dflags
    if sse4_2
        then do code_src <- getAnyReg src
                src_r <- getNewRegNat format
                let dst_r = getRegisterReg platform  (CmmLocal dst)
                return $ code_src src_r `appOL`
                    (if width == W8 then
                         
                         unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
                         unitOL (POPCNT II16 (OpReg src_r) dst_r)
                     else
                         unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
                    (if width == W8 || width == W16 then
                         
                         
                         unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
                     else nilOL)
        else do
            targetExpr <- cmmMakeDynamicReference dflags
                          CallReference lbl
            let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                                           [NoHint] [NoHint]
                                                           CmmMayReturn)
            genCCall dflags is32Bit target dest_regs args bid
  where
    format = intFormat width
    lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
         args@[src, mask] bid = do
    let platform = targetPlatform dflags
    if isBmi2Enabled dflags
        then do code_src  <- getAnyReg src
                code_mask <- getAnyReg mask
                src_r     <- getNewRegNat format
                mask_r    <- getNewRegNat format
                let dst_r = getRegisterReg platform  (CmmLocal dst)
                return $ code_src src_r `appOL` code_mask mask_r `appOL`
                    (if width == W8 then
                         
                         unitOL (MOVZxL II8  (OpReg src_r ) (OpReg src_r )) `appOL`
                         unitOL (MOVZxL II8  (OpReg mask_r) (OpReg mask_r)) `appOL`
                         unitOL (PDEP   II16 (OpReg mask_r) (OpReg src_r ) dst_r)
                     else
                         unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
                    (if width == W8 || width == W16 then
                         
                         
                         unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
                     else nilOL)
        else do
            targetExpr <- cmmMakeDynamicReference dflags
                          CallReference lbl
            let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                                           [NoHint] [NoHint]
                                                           CmmMayReturn)
            genCCall dflags is32Bit target dest_regs args bid
  where
    format = intFormat width
    lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
         args@[src, mask] bid = do
    let platform = targetPlatform dflags
    if isBmi2Enabled dflags
        then do code_src  <- getAnyReg src
                code_mask <- getAnyReg mask
                src_r     <- getNewRegNat format
                mask_r    <- getNewRegNat format
                let dst_r = getRegisterReg platform  (CmmLocal dst)
                return $ code_src src_r `appOL` code_mask mask_r `appOL`
                    (if width == W8 then
                         
                         unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
                         unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
                         unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r)
                     else
                         unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
                    (if width == W8 || width == W16 then
                         
                         
                         unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
                     else nilOL)
        else do
            targetExpr <- cmmMakeDynamicReference dflags
                          CallReference lbl
            let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                                           [NoHint] [NoHint]
                                                           CmmMayReturn)
            genCCall dflags is32Bit target dest_regs args bid
  where
    format = intFormat width
    lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] bid
  | is32Bit && width == W64 = do
    
    targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
    let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                           [NoHint] [NoHint]
                                           CmmMayReturn)
    genCCall dflags is32Bit target dest_regs args bid
  | otherwise = do
    code_src <- getAnyReg src
    let dst_r = getRegisterReg platform (CmmLocal dst)
    if isBmi2Enabled dflags
        then do
            src_r <- getNewRegNat (intFormat width)
            return $ appOL (code_src src_r) $ case width of
                W8 -> toOL
                    [ MOVZxL II8  (OpReg src_r)       (OpReg src_r) 
                    , LZCNT  II32 (OpReg src_r)       dst_r         
                    , SUB    II32 (OpImm (ImmInt 24)) (OpReg dst_r) 
                    ]
                W16 -> toOL
                    [ LZCNT  II16 (OpReg src_r) dst_r
                    , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) 
                    ]
                _ -> unitOL (LZCNT (intFormat width) (OpReg src_r) dst_r)
        else do
            let format = if width == W8 then II16 else intFormat width
            src_r <- getNewRegNat format
            tmp_r <- getNewRegNat format
            return $ code_src src_r `appOL` toOL
                     ([ MOVZxL  II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                      [ BSR     format (OpReg src_r) tmp_r
                      , MOV     II32   (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
                      , CMOV NE format (OpReg tmp_r) dst_r
                      , XOR     format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
                      ]) 
                         
                         
  where
    bw = widthInBits width
    platform = targetPlatform dflags
    lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
  | is32Bit, width == W64 = do
      ChildCode64 vcode rlo <- iselExpr64 src
      let rhi     = getHiVRegFromLo rlo
          dst_r   = getRegisterReg platform  (CmmLocal dst)
      lbl1 <- getBlockIdNat
      lbl2 <- getBlockIdNat
      let format = if width == W8 then II16 else intFormat width
      tmp_r <- getNewRegNat format
      
      
      
      
      updateCfgNat (addWeightEdge bid lbl1 110 .
                    addWeightEdge lbl1 lbl2 110 .
                    addImmediateSuccessor bid lbl2)
      
      
      
      
      
      
      
      return $ vcode `appOL` toOL
               ([ MOV      II32 (OpReg rhi)         (OpReg tmp_r)
                , OR       II32 (OpReg rlo)         (OpReg tmp_r)
                , MOV      II32 (OpImm (ImmInt 64)) (OpReg dst_r)
                , JXX EQQ    lbl2
                , JXX ALWAYS lbl1
                , NEWBLOCK   lbl1
                , BSF     II32 (OpReg rhi)         dst_r
                , ADD     II32 (OpImm (ImmInt 32)) (OpReg dst_r)
                , BSF     II32 (OpReg rlo)         tmp_r
                , CMOV NE II32 (OpReg tmp_r)       dst_r
                , JXX ALWAYS lbl2
                , NEWBLOCK   lbl2
                ])
  | otherwise = do
    code_src <- getAnyReg src
    let dst_r = getRegisterReg platform (CmmLocal dst)
    if isBmi2Enabled dflags
    then do
        src_r <- getNewRegNat (intFormat width)
        return $ appOL (code_src src_r) $ case width of
            W8 -> toOL
                [ OR    II32 (OpImm (ImmInteger 0xFFFFFF00)) (OpReg src_r)
                , TZCNT II32 (OpReg src_r)        dst_r
                ]
            W16 -> toOL
                [ TZCNT  II16 (OpReg src_r) dst_r
                , MOVZxL II16 (OpReg dst_r) (OpReg dst_r)
                ]
            _ -> unitOL $ TZCNT (intFormat width) (OpReg src_r) dst_r
    else do
        
        
        let format = if width == W8 then II16 else intFormat width
        src_r <- getNewRegNat format
        tmp_r <- getNewRegNat format
        return $ code_src src_r `appOL` toOL
                 ([ MOVZxL  II8    (OpReg src_r) (OpReg src_r) | width == W8 ] ++
                  [ BSF     format (OpReg src_r) tmp_r
                  , MOV     II32   (OpImm (ImmInt bw)) (OpReg dst_r)
                  , CMOV NE format (OpReg tmp_r) dst_r
                  ]) 
                     
                     
  where
    bw = widthInBits width
    platform = targetPlatform dflags
genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do
    targetExpr <- cmmMakeDynamicReference dflags
                  CallReference lbl
    let target = ForeignTarget targetExpr (ForeignConvention CCallConv
                                           [NoHint] [NoHint]
                                           CmmMayReturn)
    genCCall dflags is32Bit target dest_regs args bid
  where
    lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
                                           [dst] [addr, n] bid = do
    Amode amode addr_code <-
        if amop `elem` [AMO_Add, AMO_Sub]
        then getAmode addr
        else getSimpleAmode dflags is32Bit addr  
    arg <- getNewRegNat format
    arg_code <- getAnyReg n
    let platform = targetPlatform dflags
        dst_r    = getRegisterReg platform  (CmmLocal dst)
    code <- op_code dst_r arg amode
    return $ addr_code `appOL` arg_code arg `appOL` code
  where
    
    op_code :: Reg       
            -> Reg       
            -> AddrMode  
            -> NatM (OrdList Instr)
    op_code dst_r arg amode = case amop of
        
        
        
        AMO_Add  -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
                                  , MOV format (OpReg arg) (OpReg dst_r)
                                  ]
        AMO_Sub  -> return $ toOL [ NEGI format (OpReg arg)
                                  , LOCK (XADD format (OpReg arg) (OpAddr amode))
                                  , MOV format (OpReg arg) (OpReg dst_r)
                                  ]
        AMO_And  -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
        AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
                                                    , NOT format dst
                                                    ])
        AMO_Or   -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
        AMO_Xor  -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
      where
        
        
        cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
                     -> NatM (OrdList Instr)
        cmpxchg_code instrs = do
            lbl <- getBlockIdNat
            tmp <- getNewRegNat format
            
            addImmediateSuccessorNat bid lbl
            updateCfgNat (addWeightEdge lbl lbl 0)
            return $ toOL
                [ MOV format (OpAddr amode) (OpReg eax)
                , JXX ALWAYS lbl
                , NEWBLOCK lbl
                  
                , MOV format (OpReg eax) (OpReg dst_r)
                , MOV format (OpReg eax) (OpReg tmp)
                ]
                `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
                [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
                , JXX NE lbl
                ]
    format = intFormat width
genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do
  load_code <- intLoadCode (MOV (intFormat width)) addr
  let platform = targetPlatform dflags
  return (load_code (getRegisterReg platform  (CmmLocal dst)))
genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do
    code <- assignMem_IntCode (intFormat width) addr val
    return $ code `snocOL` MFENCE
genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = do
    
    
    
    Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
    newval <- getNewRegNat format
    newval_code <- getAnyReg new
    oldval <- getNewRegNat format
    oldval_code <- getAnyReg old
    let platform = targetPlatform dflags
        dst_r    = getRegisterReg platform  (CmmLocal dst)
        code     = toOL
                   [ MOV format (OpReg oldval) (OpReg eax)
                   , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
                   , MOV format (OpReg eax) (OpReg dst_r)
                   ]
    return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
        `appOL` code
  where
    format = intFormat width
genCCall _ is32Bit target dest_regs args bid = do
  dflags <- getDynFlags
  let platform = targetPlatform dflags
  case (target, dest_regs) of
    
    (PrimTarget op, []) ->
        outOfLineCmmOp bid op Nothing args
    
    (PrimTarget op, [r])  -> case op of
          MO_F32_Fabs -> case args of
            [x] -> sse2FabsCode W32 x
            _ -> panic "genCCall: Wrong number of arguments for fabs"
          MO_F64_Fabs -> case args of
            [x] -> sse2FabsCode W64 x
            _ -> panic "genCCall: Wrong number of arguments for fabs"
          MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
          MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
          _other_op -> outOfLineCmmOp bid op (Just r) args
       where
        actuallyInlineSSE2Op = actuallyInlineFloatOp'
        actuallyInlineFloatOp'  instr format [x]
              = do res <- trivialUFCode format (instr format) x
                   any <- anyReg res
                   return (any (getRegisterReg platform  (CmmLocal r)))
        actuallyInlineFloatOp' _ _ args
              = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! ("
                      ++ show (length args) ++ ")"
        sse2FabsCode :: Width -> CmmExpr -> NatM InstrBlock
        sse2FabsCode w x = do
          let fmt = floatFormat w
          x_code <- getAnyReg x
          let
            const | FF32 <- fmt = CmmInt 0x7fffffff W32
                  | otherwise   = CmmInt 0x7fffffffffffffff W64
          Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
          tmp <- getNewRegNat fmt
          let
            code dst = x_code dst `appOL` amode_code `appOL` toOL [
                MOV fmt (OpAddr amode) (OpReg tmp),
                AND fmt (OpReg tmp) (OpReg dst)
                ]
          return $ code (getRegisterReg platform (CmmLocal r))
    (PrimTarget (MO_S_QuotRem  width), _) -> divOp1 platform True  width dest_regs args
    (PrimTarget (MO_U_QuotRem  width), _) -> divOp1 platform False width dest_regs args
    (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
    (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
        case args of
        [arg_x, arg_y] ->
            do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
               let format = intFormat width
               lCode <- anyReg =<< trivialCode width (ADD_CC format)
                                     (Just (ADD_CC format)) arg_x arg_y
               let reg_l = getRegisterReg platform (CmmLocal res_l)
                   reg_h = getRegisterReg platform (CmmLocal res_h)
                   code = hCode reg_h `appOL`
                          lCode reg_l `snocOL`
                          ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
               return code
        _ -> panic "genCCall: Wrong number of arguments/results for add2"
    (PrimTarget (MO_AddWordC width), [res_r, res_c]) ->
        addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args
    (PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
        addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
    (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
        addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args
    (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
        addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args
    (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
        case args of
        [arg_x, arg_y] ->
            do (y_reg, y_code) <- getRegOrMem arg_y
               x_code <- getAnyReg arg_x
               let format = intFormat width
                   reg_h = getRegisterReg platform (CmmLocal res_h)
                   reg_l = getRegisterReg platform (CmmLocal res_l)
                   code = y_code `appOL`
                          x_code rax `appOL`
                          toOL [MUL2 format y_reg,
                                MOV format (OpReg rdx) (OpReg reg_h),
                                MOV format (OpReg rax) (OpReg reg_l)]
               return code
        _ -> panic "genCCall: Wrong number of arguments/results for mul2"
    _ -> if is32Bit
         then genCCall32' dflags target dest_regs args
         else genCCall64' dflags target dest_regs args
  where divOp1 platform signed width results [arg_x, arg_y]
            = divOp platform signed width results Nothing arg_x arg_y
        divOp1 _ _ _ _ _
            = panic "genCCall: Wrong number of arguments for divOp1"
        divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
            = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
        divOp2 _ _ _ _ _
            = panic "genCCall: Wrong number of arguments for divOp2"
        
        divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y =
            let widen | signed = MO_SS_Conv W8 W16
                      | otherwise = MO_UU_Conv W8 W16
                arg_x_low_16 = CmmMachOp widen [arg_x_low]
                arg_y_16 = CmmMachOp widen [arg_y]
                m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high
            in divOp
                  platform signed W16 [res_q, res_r]
                  m_arg_x_high_16 arg_x_low_16 arg_y_16
        divOp platform signed width [res_q, res_r]
              m_arg_x_high arg_x_low arg_y
            = do let format = intFormat width
                     reg_q = getRegisterReg platform (CmmLocal res_q)
                     reg_r = getRegisterReg platform (CmmLocal res_r)
                     widen | signed    = CLTD format
                           | otherwise = XOR format (OpReg rdx) (OpReg rdx)
                     instr | signed    = IDIV
                           | otherwise = DIV
                 (y_reg, y_code) <- getRegOrMem arg_y
                 x_low_code <- getAnyReg arg_x_low
                 x_high_code <- case m_arg_x_high of
                                Just arg_x_high ->
                                    getAnyReg arg_x_high
                                Nothing ->
                                    return $ const $ unitOL widen
                 return $ y_code `appOL`
                          x_low_code rax `appOL`
                          x_high_code rdx `appOL`
                          toOL [instr format y_reg,
                                MOV format (OpReg rax) (OpReg reg_q),
                                MOV format (OpReg rdx) (OpReg reg_r)]
        divOp _ _ _ _ _ _ _
            = panic "genCCall: Wrong number of results for divOp"
        addSubIntC platform instr mrevinstr cond width
                   res_r res_c [arg_x, arg_y]
            = do let format = intFormat width
                 rCode <- anyReg =<< trivialCode width (instr format)
                                       (mrevinstr format) arg_x arg_y
                 reg_tmp <- getNewRegNat II8
                 let reg_c = getRegisterReg platform  (CmmLocal res_c)
                     reg_r = getRegisterReg platform  (CmmLocal res_r)
                     code = rCode reg_r `snocOL`
                            SETCC cond (OpReg reg_tmp) `snocOL`
                            MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
                 return code
        addSubIntC _ _ _ _ _ _ _ _
            = panic "genCCall: Wrong number of arguments/results for addSubIntC"
genCCall32' :: DynFlags
            -> ForeignTarget            
            -> [CmmFormal]        
            -> [CmmActual]        
            -> NatM InstrBlock
genCCall32' dflags target dest_regs args = do
        let
            prom_args = map (maybePromoteCArg dflags W32) args
            
            
            
            sizes               = map (arg_size_bytes . cmmExprType dflags) (reverse args)
            raw_arg_size        = sum sizes + wORD_SIZE dflags
            arg_pad_size        = (roundTo 16 $ raw_arg_size) - raw_arg_size
            tot_arg_size        = raw_arg_size + arg_pad_size - wORD_SIZE dflags
        delta0 <- getDeltaNat
        setDeltaNat (delta0 - arg_pad_size)
        push_codes <- mapM push_arg (reverse prom_args)
        delta <- getDeltaNat
        MASSERT(delta == delta0 - tot_arg_size)
        
        (callinsns,cconv) <-
          case target of
            ForeignTarget (CmmLit (CmmLabel lbl)) conv
               -> 
                  return (unitOL (CALL (Left fn_imm) []), conv)
               where fn_imm = ImmCLbl lbl
            ForeignTarget expr conv
               -> do { (dyn_r, dyn_c) <- getSomeReg expr
                     ; ASSERT( isWord32 (cmmExprType dflags expr) )
                       return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
            PrimTarget _
                -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
                            ++ "probably because too many return values."
        let push_code
                | arg_pad_size /= 0
                = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
                        DELTA (delta0 - arg_pad_size)]
                  `appOL` concatOL push_codes
                | otherwise
                = concatOL push_codes
              
              
              
              
              
            pop_size
               | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
               | otherwise = tot_arg_size
            call = callinsns `appOL`
                   toOL (
                      (if pop_size==0 then [] else
                       [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
                      ++
                      [DELTA delta0]
                   )
        setDeltaNat delta0
        dflags <- getDynFlags
        let platform = targetPlatform dflags
        let
            
            assign_code []     = nilOL
            assign_code [dest]
              | isFloatType ty =
                  
                  let tmp_amode = AddrBaseIndex (EABaseReg esp)
                                                       EAIndexNone
                                                       (ImmInt 0)
                      fmt = floatFormat w
                         in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
                                   DELTA (delta0 - b),
                                   X87Store fmt  tmp_amode,
                                   
                                   
                                   
                                   
                                   MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
                                   ADD II32 (OpImm (ImmInt b)) (OpReg esp),
                                   DELTA delta0]
              | isWord64 ty    = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
                                        MOV II32 (OpReg edx) (OpReg r_dest_hi)]
              | otherwise      = unitOL (MOV (intFormat w)
                                             (OpReg eax)
                                             (OpReg r_dest))
              where
                    ty = localRegType dest
                    w  = typeWidth ty
                    b  = widthInBytes w
                    r_dest_hi = getHiVRegFromLo r_dest
                    r_dest    = getRegisterReg platform  (CmmLocal dest)
            assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
        return (push_code `appOL`
                call `appOL`
                assign_code dest_regs)
      where
        
        arg_size_bytes :: CmmType -> Int
        arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth dflags))
        roundTo a x | x `mod` a == 0 = x
                    | otherwise = x + a - (x `mod` a)
        push_arg :: CmmActual 
                        -> NatM InstrBlock  
        push_arg  arg 
          | isWord64 arg_ty = do
            ChildCode64 code r_lo <- iselExpr64 arg
            delta <- getDeltaNat
            setDeltaNat (delta - 8)
            let r_hi = getHiVRegFromLo r_lo
            return (       code `appOL`
                           toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
                                 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
                                 DELTA (delta-8)]
                )
          | isFloatType arg_ty = do
            (reg, code) <- getSomeReg arg
            delta <- getDeltaNat
            setDeltaNat (delta-size)
            return (code `appOL`
                            toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
                                  DELTA (delta-size),
                                  let addr = AddrBaseIndex (EABaseReg esp)
                                                            EAIndexNone
                                                            (ImmInt 0)
                                      format = floatFormat (typeWidth arg_ty)
                                  in
                                  
                                   MOV format (OpReg reg) (OpAddr addr)
                                 ]
                           )
          | otherwise = do
            
            
            
            ASSERT((typeWidth arg_ty) <= W32) return ()
            (operand, code) <- getOperand arg
            delta <- getDeltaNat
            setDeltaNat (delta-size)
            return (code `snocOL`
                    PUSH II32 operand `snocOL`
                    DELTA (delta-size))
          where
             arg_ty = cmmExprType dflags arg
             size = arg_size_bytes arg_ty 
genCCall64' :: DynFlags
            -> ForeignTarget      
            -> [CmmFormal]        
            -> [CmmActual]        
            -> NatM InstrBlock
genCCall64' dflags target dest_regs args = do
    
    let prom_args = map (maybePromoteCArg dflags W32) args
    (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code)
         <-
        if platformOS platform == OSMinGW32
        then load_args_win prom_args [] [] (allArgRegs platform) nilOL
        else do
           (stack_args, aregs, fregs, load_args_code, assign_args_code)
               <- load_args prom_args (allIntArgRegs platform)
                                      (allFPArgRegs platform)
                                      nilOL nilOL
           let used_regs rs as = reverse (drop (length rs) (reverse as))
               fregs_used      = used_regs fregs (allFPArgRegs platform)
               aregs_used      = used_regs aregs (allIntArgRegs platform)
           return (stack_args, aregs_used, fregs_used, load_args_code
                                                      , assign_args_code)
    let
        arg_regs_used = int_regs_used ++ fp_regs_used
        arg_regs = [eax] ++ arg_regs_used
                
        sse_regs = length fp_regs_used
        arg_stack_slots = if platformOS platform == OSMinGW32
                          then length stack_args + length (allArgRegs platform)
                          else length stack_args
        tot_arg_size = arg_size * arg_stack_slots
    
    
    
    (real_size, adjust_rsp) <-
        if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
            then return (tot_arg_size, nilOL)
            else do 
                delta <- getDeltaNat
                setDeltaNat (delta - wORD_SIZE dflags)
                return (tot_arg_size + wORD_SIZE dflags, toOL [
                                SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
                                DELTA (delta - wORD_SIZE dflags) ])
    
    push_code <- push_args (reverse stack_args) nilOL
    
    
    lss_code <- if platformOS platform == OSMinGW32
                then leaveStackSpace (length (allArgRegs platform))
                else return nilOL
    delta <- getDeltaNat
    
    (callinsns,_cconv) <-
      case target of
        ForeignTarget (CmmLit (CmmLabel lbl)) conv
           -> 
              return (unitOL (CALL (Left fn_imm) arg_regs), conv)
           where fn_imm = ImmCLbl lbl
        ForeignTarget expr conv
           -> do (dyn_r, dyn_c) <- getSomeReg expr
                 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
        PrimTarget _
            -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
                        ++ "probably because too many return values."
    let
        
        
        
        
        
        
        
        
        assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
    let call = callinsns `appOL`
               toOL (
                    
                    
                    
                  (if real_size==0 then [] else
                   [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
                  ++
                  [DELTA (delta + real_size)]
               )
    setDeltaNat (delta + real_size)
    let
        
        assign_code []     = nilOL
        assign_code [dest] =
          case typeWidth rep of
                W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
                                                     (OpReg xmm0)
                                                     (OpReg r_dest))
                W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
                                                     (OpReg xmm0)
                                                     (OpReg r_dest))
                _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
          where
                rep = localRegType dest
                r_dest = getRegisterReg platform  (CmmLocal dest)
        assign_code _many = panic "genCCall.assign_code many"
    return (adjust_rsp          `appOL`
            push_code           `appOL`
            load_args_code      `appOL`
            assign_args_code    `appOL`
            lss_code            `appOL`
            assign_eax sse_regs `appOL`
            call                `appOL`
            assign_code dest_regs)
  where platform = targetPlatform dflags
        arg_size = 8 
        load_args :: [CmmExpr]
                  -> [Reg]         
                  -> [Reg]         
                  -> InstrBlock    
                  -> InstrBlock    
                  -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
        
        load_args args [] [] code acode     =
            return (args, [], [], code, acode)
        
        load_args [] aregs fregs code acode =
            return ([], aregs, fregs, code, acode)
        load_args (arg : rest) aregs fregs code acode
            | isFloatType arg_rep = case fregs of
                 []     -> push_this_arg
                 (r:rs) -> do
                    (code',acode') <- reg_this_arg r
                    load_args rest aregs rs code' acode'
            | otherwise           = case aregs of
                 []     -> push_this_arg
                 (r:rs) -> do
                    (code',acode') <- reg_this_arg r
                    load_args rest rs fregs code' acode'
            where
              
              push_this_arg = do
                 (args',ars,frs,code',acode')
                     <- load_args rest aregs fregs code acode
                 return (arg:args', ars, frs, code', acode')
              
              reg_this_arg r
                
                | isOperand False arg = do
                    arg_code <- getAnyReg arg
                    return (code, (acode `appOL` arg_code r))
                
                
                | all (isOperand False) rest = do
                    arg_code   <- getAnyReg arg
                    return (code `appOL` arg_code r,acode)
                
                
                
                
                
                | otherwise     = do
                    arg_code <- getAnyReg arg
                    tmp      <- getNewRegNat arg_fmt
                    let
                      code'  = code `appOL` arg_code tmp
                      acode' = acode `snocOL` reg2reg arg_fmt tmp r
                    return (code',acode')
              arg_rep = cmmExprType dflags arg
              arg_fmt = cmmTypeFormat arg_rep
        load_args_win :: [CmmExpr]
                      -> [Reg]        
                      -> [Reg]        
                      -> [(Reg, Reg)] 
                      -> InstrBlock
                      -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
        load_args_win args usedInt usedFP [] code
            = return (args, usedInt, usedFP, code, nilOL)
            
        load_args_win [] usedInt usedFP _ code
            = return ([], usedInt, usedFP, code, nilOL)
            
        load_args_win (arg : rest) usedInt usedFP
                      ((ireg, freg) : regs) code
            | isFloatType arg_rep = do
                 arg_code <- getAnyReg arg
                 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
                               (code `appOL`
                                arg_code freg `snocOL`
                                
                                
                                
                                MOV II64 (OpReg freg) (OpReg ireg))
            | otherwise = do
                 arg_code <- getAnyReg arg
                 load_args_win rest (ireg : usedInt) usedFP regs
                               (code `appOL` arg_code ireg)
            where
              arg_rep = cmmExprType dflags arg
        push_args [] code = return code
        push_args (arg:rest) code
           | isFloatType arg_rep = do
             (arg_reg, arg_code) <- getSomeReg arg
             delta <- getDeltaNat
             setDeltaNat (delta-arg_size)
             let code' = code `appOL` arg_code `appOL` toOL [
                            SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
                            DELTA (delta-arg_size),
                            MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
             push_args rest code'
           | otherwise = do
             
             
             
             ASSERT(width <= W64) return ()
             (arg_op, arg_code) <- getOperand arg
             delta <- getDeltaNat
             setDeltaNat (delta-arg_size)
             let code' = code `appOL` arg_code `appOL` toOL [
                                    PUSH II64 arg_op,
                                    DELTA (delta-arg_size)]
             push_args rest code'
            where
              arg_rep = cmmExprType dflags arg
              width = typeWidth arg_rep
        leaveStackSpace n = do
             delta <- getDeltaNat
             setDeltaNat (delta - n * arg_size)
             return $ toOL [
                         SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
                         DELTA (delta - n * arg_size)]
maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
maybePromoteCArg dflags wto arg
 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
 | otherwise   = arg
 where
   wfrom = cmmExprWidth dflags arg
outOfLineCmmOp :: BlockId -> CallishMachOp -> Maybe CmmFormal -> [CmmActual]
               -> NatM InstrBlock
outOfLineCmmOp bid mop res args
  = do
      dflags <- getDynFlags
      targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
      let target = ForeignTarget targetExpr
                           (ForeignConvention CCallConv [] [] CmmMayReturn)
      stmtToInstrs bid (CmmUnsafeForeignCall target (catMaybes [res]) args)
  where
        
        
        
        lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
        fn = case mop of
              MO_F32_Sqrt  -> fsLit "sqrtf"
              MO_F32_Fabs  -> fsLit "fabsf"
              MO_F32_Sin   -> fsLit "sinf"
              MO_F32_Cos   -> fsLit "cosf"
              MO_F32_Tan   -> fsLit "tanf"
              MO_F32_Exp   -> fsLit "expf"
              MO_F32_Log   -> fsLit "logf"
              MO_F32_Asin  -> fsLit "asinf"
              MO_F32_Acos  -> fsLit "acosf"
              MO_F32_Atan  -> fsLit "atanf"
              MO_F32_Sinh  -> fsLit "sinhf"
              MO_F32_Cosh  -> fsLit "coshf"
              MO_F32_Tanh  -> fsLit "tanhf"
              MO_F32_Pwr   -> fsLit "powf"
              MO_F32_Asinh -> fsLit "asinhf"
              MO_F32_Acosh -> fsLit "acoshf"
              MO_F32_Atanh -> fsLit "atanhf"
              MO_F64_Sqrt  -> fsLit "sqrt"
              MO_F64_Fabs  -> fsLit "fabs"
              MO_F64_Sin   -> fsLit "sin"
              MO_F64_Cos   -> fsLit "cos"
              MO_F64_Tan   -> fsLit "tan"
              MO_F64_Exp   -> fsLit "exp"
              MO_F64_Log   -> fsLit "log"
              MO_F64_Asin  -> fsLit "asin"
              MO_F64_Acos  -> fsLit "acos"
              MO_F64_Atan  -> fsLit "atan"
              MO_F64_Sinh  -> fsLit "sinh"
              MO_F64_Cosh  -> fsLit "cosh"
              MO_F64_Tanh  -> fsLit "tanh"
              MO_F64_Pwr   -> fsLit "pow"
              MO_F64_Asinh  -> fsLit "asinh"
              MO_F64_Acosh  -> fsLit "acosh"
              MO_F64_Atanh  -> fsLit "atanh"
              MO_Memcpy _  -> fsLit "memcpy"
              MO_Memset _  -> fsLit "memset"
              MO_Memmove _ -> fsLit "memmove"
              MO_Memcmp _  -> fsLit "memcmp"
              MO_PopCnt _  -> fsLit "popcnt"
              MO_BSwap _   -> fsLit "bswap"
              
              MO_BRev w    -> fsLit $ bRevLabel w
              MO_Clz w     -> fsLit $ clzLabel w
              MO_Ctz _     -> unsupported
              MO_Pdep w    -> fsLit $ pdepLabel w
              MO_Pext w    -> fsLit $ pextLabel w
              MO_AtomicRMW _ _ -> fsLit "atomicrmw"
              MO_AtomicRead _  -> fsLit "atomicread"
              MO_AtomicWrite _ -> fsLit "atomicwrite"
              MO_Cmpxchg _     -> fsLit "cmpxchg"
              MO_UF_Conv _ -> unsupported
              MO_S_QuotRem {}  -> unsupported
              MO_U_QuotRem {}  -> unsupported
              MO_U_QuotRem2 {} -> unsupported
              MO_Add2 {}       -> unsupported
              MO_AddIntC {}    -> unsupported
              MO_SubIntC {}    -> unsupported
              MO_AddWordC {}   -> unsupported
              MO_SubWordC {}   -> unsupported
              MO_U_Mul2 {}     -> unsupported
              MO_WriteBarrier  -> unsupported
              MO_Touch         -> unsupported
              (MO_Prefetch_Data _ ) -> unsupported
        unsupported = panic ("outOfLineCmmOp: " ++ show mop
                          ++ " not supported here")
genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
genSwitch dflags expr targets
  | positionIndependent dflags
  = do
        (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset)
           
        lbl <- getNewLabelNat
        dflags <- getDynFlags
        let is32bit = target32Bit (targetPlatform dflags)
            os = platformOS (targetPlatform dflags)
            
            
            
            rosection = case os of
              
              
              
              
              
              OSDarwin | not is32bit -> Section Text lbl
              _ -> Section ReadOnlyData lbl
        dynRef <- cmmMakeDynamicReference dflags DataReference lbl
        (tableReg,t_code) <- getSomeReg $ dynRef
        let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
                                       (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
        offsetReg <- getNewRegNat (intFormat (wordWidth dflags))
        return $ if is32bit || os == OSDarwin
                 then e_code `appOL` t_code `appOL` toOL [
                                ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
                                JMP_TBL (OpReg tableReg) ids rosection lbl
                       ]
                 else 
                      
                      
                      
                      
                      
                      e_code `appOL` t_code `appOL` toOL [
                               MOVSxL II32 op (OpReg offsetReg),
                               ADD (intFormat (wordWidth dflags))
                                   (OpReg offsetReg)
                                   (OpReg tableReg),
                               JMP_TBL (OpReg tableReg) ids rosection lbl
                       ]
  | otherwise
  = do
        (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
        lbl <- getNewLabelNat
        let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
            code = e_code `appOL` toOL [
                    JMP_TBL op ids (Section ReadOnlyData lbl) lbl
                 ]
        return code
  where
    (offset, blockIds) = switchTargetsToTable targets
    ids = map (fmap DestBlockId) blockIds
generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
    = let getBlockId (DestBlockId id) = id
          getBlockId _ = panic "Non-Label target in Jump Table"
          blockIds = map (fmap getBlockId) ids
      in Just (createJumpTable dflags blockIds section lbl)
generateJumpTableForInstr _ _ = Nothing
createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
                -> GenCmmDecl (Alignment, CmmStatics) h g
createJumpTable dflags ids section lbl
    = let jumpTable
            | positionIndependent dflags =
                  let ww = wordWidth dflags
                      jumpTableEntryRel Nothing
                          = CmmStaticLit (CmmInt 0 ww)
                      jumpTableEntryRel (Just blockid)
                          = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww)
                          where blockLabel = blockLbl blockid
                  in map jumpTableEntryRel ids
            | otherwise = map (jumpTableEntry dflags) ids
      in CmmData section (mkAlignment 1, Statics lbl jumpTable)
extractUnwindPoints :: [Instr] -> [UnwindPoint]
extractUnwindPoints instrs =
    [ UnwindPoint lbl unwinds | UNWIND lbl unwinds <- instrs]
condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg cond x y = do
  CondCode _ cond cond_code <- condIntCode cond x y
  tmp <- getNewRegNat II8
  let
        code dst = cond_code `appOL` toOL [
                    SETCC cond (OpReg tmp),
                    MOVZxL II8 (OpReg tmp) (OpReg dst)
                  ]
  return (Any II32 code)
condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg is32Bit cond x y = condFltReg_sse2
 where
  condFltReg_sse2 = do
    CondCode _ cond cond_code <- condFltCode cond x y
    tmp1 <- getNewRegNat (archWordFormat is32Bit)
    tmp2 <- getNewRegNat (archWordFormat is32Bit)
    let 
        code dst =
           cond_code `appOL`
             (case cond of
                NE  -> or_unordered dst
                GU  -> plain_test   dst
                GEU -> plain_test   dst
                
                LTT -> ASSERT2(False, ppr "Should have been turned into >")
                       and_ordered  dst
                LE  -> ASSERT2(False, ppr "Should have been turned into >=")
                       and_ordered  dst
                _   -> and_ordered  dst)
        plain_test dst = toOL [
                    SETCC cond (OpReg tmp1),
                    MOVZxL II8 (OpReg tmp1) (OpReg dst)
                 ]
        or_unordered dst = toOL [
                    SETCC cond (OpReg tmp1),
                    SETCC PARITY (OpReg tmp2),
                    OR II8 (OpReg tmp1) (OpReg tmp2),
                    MOVZxL II8 (OpReg tmp2) (OpReg dst)
                  ]
        and_ordered dst = toOL [
                    SETCC cond (OpReg tmp1),
                    SETCC NOTPARITY (OpReg tmp2),
                    AND II8 (OpReg tmp1) (OpReg tmp2),
                    MOVZxL II8 (OpReg tmp2) (OpReg dst)
                  ]
    return (Any II32 code)
trivialCode :: Width -> (Operand -> Operand -> Instr)
            -> Maybe (Operand -> Operand -> Instr)
            -> CmmExpr -> CmmExpr -> NatM Register
trivialCode width instr m a b
    = do is32Bit <- is32BitPlatform
         trivialCode' is32Bit width instr m a b
trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
             -> Maybe (Operand -> Operand -> Instr)
             -> CmmExpr -> CmmExpr -> NatM Register
trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
  | is32BitLit is32Bit lit_a = do
  b_code <- getAnyReg b
  let
       code dst
         = b_code dst `snocOL`
           revinstr (OpImm (litToImm lit_a)) (OpReg dst)
  return (Any (intFormat width) code)
trivialCode' _ width instr _ a b
  = genTrivialCode (intFormat width) instr a b
genTrivialCode :: Format -> (Operand -> Operand -> Instr)
               -> CmmExpr -> CmmExpr -> NatM Register
genTrivialCode rep instr a b = do
  (b_op, b_code) <- getNonClobberedOperand b
  a_code <- getAnyReg a
  tmp <- getNewRegNat rep
  let
     
     
     
     
     
     
     code dst
        | dst `regClashesWithOp` b_op =
                b_code `appOL`
                unitOL (MOV rep b_op (OpReg tmp)) `appOL`
                a_code dst `snocOL`
                instr (OpReg tmp) (OpReg dst)
        | otherwise =
                b_code `appOL`
                a_code dst `snocOL`
                instr b_op (OpReg dst)
  return (Any rep code)
regClashesWithOp :: Reg -> Operand -> Bool
reg `regClashesWithOp` OpReg reg2   = reg == reg2
reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
_   `regClashesWithOp` _            = False
trivialUCode :: Format -> (Operand -> Instr)
             -> CmmExpr -> NatM Register
trivialUCode rep instr x = do
  x_code <- getAnyReg x
  let
     code dst =
        x_code dst `snocOL`
        instr (OpReg dst)
  return (Any rep code)
trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
                  -> CmmExpr -> CmmExpr -> NatM Register
trivialFCode_sse2 pk instr x y
    = genTrivialCode format (instr format) x y
    where format = floatFormat pk
trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
trivialUFCode format instr x = do
  (x_reg, x_code) <- getSomeReg x
  let
     code dst =
        x_code `snocOL`
        instr x_reg dst
  return (Any format code)
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP from to x =  coerce_sse2
 where
   coerce_sse2 = do
     (x_op, x_code) <- getOperand x  
     let
           opc  = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
                             n -> panic $ "coerceInt2FP.sse: unhandled width ("
                                         ++ show n ++ ")"
           code dst = x_code `snocOL` opc (intFormat from) x_op dst
     return (Any (floatFormat to) code)
        
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int from to x =  coerceFP2Int_sse2
 where
   coerceFP2Int_sse2 = do
     (x_op, x_code) <- getOperand x  
     let
           opc  = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
                               n -> panic $ "coerceFP2Init.sse: unhandled width ("
                                           ++ show n ++ ")"
           code dst = x_code `snocOL` opc (intFormat to) x_op dst
     return (Any (intFormat to) code)
         
coerceFP2FP :: Width -> CmmExpr -> NatM Register
coerceFP2FP to x = do
  (x_reg, x_code) <- getSomeReg x
  let
        opc  = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
                                     n -> panic $ "coerceFP2FP: unhandled width ("
                                                 ++ show n ++ ")"
        code dst = x_code `snocOL` opc x_reg dst
  return (Any ( floatFormat to) code)
sse2NegCode :: Width -> CmmExpr -> NatM Register
sse2NegCode w x = do
  let fmt = floatFormat w
  x_code <- getAnyReg x
  
  let
    const = case fmt of
      FF32 -> CmmInt 0x80000000 W32
      FF64 -> CmmInt 0x8000000000000000 W64
      x@II8  -> wrongFmt x
      x@II16 -> wrongFmt x
      x@II32 -> wrongFmt x
      x@II64 -> wrongFmt x
      where
        wrongFmt x = panic $ "sse2NegCode: " ++ show x
  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
  tmp <- getNewRegNat fmt
  let
    code dst = x_code dst `appOL` amode_code `appOL` toOL [
        MOV fmt (OpAddr amode) (OpReg tmp),
        XOR fmt (OpReg tmp) (OpReg dst)
        ]
  
  return (Any fmt code)
isVecExpr :: CmmExpr -> Bool
isVecExpr (CmmMachOp (MO_V_Insert {}) _)   = True
isVecExpr (CmmMachOp (MO_V_Extract {}) _)  = True
isVecExpr (CmmMachOp (MO_V_Add {}) _)      = True
isVecExpr (CmmMachOp (MO_V_Sub {}) _)      = True
isVecExpr (CmmMachOp (MO_V_Mul {}) _)      = True
isVecExpr (CmmMachOp (MO_VS_Quot {}) _)    = True
isVecExpr (CmmMachOp (MO_VS_Rem {}) _)     = True
isVecExpr (CmmMachOp (MO_VS_Neg {}) _)     = True
isVecExpr (CmmMachOp (MO_VF_Insert {}) _)  = True
isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Add {}) _)     = True
isVecExpr (CmmMachOp (MO_VF_Sub {}) _)     = True
isVecExpr (CmmMachOp (MO_VF_Mul {}) _)     = True
isVecExpr (CmmMachOp (MO_VF_Quot {}) _)    = True
isVecExpr (CmmMachOp (MO_VF_Neg {}) _)     = True
isVecExpr (CmmMachOp _ [e])                = isVecExpr e
isVecExpr _                                = False
needLlvm :: NatM a
needLlvm =
    sorry $ unlines ["The native code generator does not support vector"
                    ,"instructions. Please use -fllvm."]
invertCondBranches :: CFG -> LabelMap a -> [NatBasicBlock Instr]
                   -> [NatBasicBlock Instr]
invertCondBranches cfg keep bs =
    
    invert bs
  where
    invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
    invert ((BasicBlock lbl1 ins@(_:_:_xs)):b2@(BasicBlock lbl2 _):bs)
      | 
        (jmp1,jmp2) <- last2 ins
      , JXX cond1 target1 <- jmp1
      , target1 == lbl2
      
      , JXX ALWAYS target2 <- jmp2
      
      
      
      
      , Just edgeInfo1 <- getEdgeInfo lbl1 target1 cfg
      , Just edgeInfo2 <- getEdgeInfo lbl1 target2 cfg
      
      , transitionSource edgeInfo1 == transitionSource edgeInfo2
      , (CmmSource cmmCondBranch) <- transitionSource edgeInfo1
      
      , CmmCondBranch (CmmMachOp op _args) _ _ _ <- cmmCondBranch
      , Just _ <- maybeIntComparison op
      , Just invCond <- maybeInvertCond cond1
      
      = let jumps =
              case () of
                
                _ | not (mapMember target1 keep)
                    -> [JXX invCond target2]
                
                
                  | edgeWeight edgeInfo2 > edgeWeight edgeInfo1
                    -> [JXX invCond target2, JXX ALWAYS target1]
                
                  | otherwise
                    -> [jmp1, jmp2]
        in 
           (BasicBlock lbl1
            (dropTail 2 ins ++ jumps))
            : invert (b2:bs)
    invert (b:bs) = b : invert bs
    invert [] = []