{-# OPTIONS_GHC -fno-warn-orphans #-}

module GHC.CmmToAsm.LA64.Instr where

import GHC.Prelude

import GHC.CmmToAsm.LA64.Cond
import GHC.CmmToAsm.LA64.Regs

import GHC.CmmToAsm.Instr (RegUsage(..))
import GHC.CmmToAsm.Format
import GHC.CmmToAsm.Types
import GHC.CmmToAsm.Utils
import GHC.CmmToAsm.Config
import GHC.Platform.Reg

import GHC.Platform.Regs
import GHC.Platform.Reg.Class.Separate
import GHC.Cmm.BlockId
import GHC.Cmm.Dataflow.Label
import GHC.Cmm
import GHC.Cmm.CLabel
import GHC.Utils.Outputable
import GHC.Platform
import GHC.Types.Unique.DSM

import GHC.Utils.Panic
import Data.Maybe
import GHC.Stack
import GHC.Data.FastString (LexicalFastString)

-- | Stack frame header size
-- Each stack frame contains ra and fp -- prologue.
stackFrameHeaderSize :: Int
stackFrameHeaderSize = 2 * spillSlotSize

-- | All registers are 8 byte wide.
spillSlotSize :: Int
spillSlotSize = 8

-- | The number of bytes that the stack pointer should be aligned to.
stackAlign :: Int
stackAlign = 16

-- | The number of spill slots available without allocating more.
maxSpillSlots :: NCGConfig -> Int
maxSpillSlots config
    = (
        (ncgSpillPreallocSize config - stackFrameHeaderSize)
         `div`
         spillSlotSize
      ) - 1

-- | Convert a spill slot number to a *byte* offset.
spillSlotToOffset :: Int -> Int
spillSlotToOffset slot
   = stackFrameHeaderSize + spillSlotSize * slot

instance Outputable RegUsage where
    ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')'

-- | Get the registers that are being used by this instruction.
-- regUsage doesn't need to do any trickery for jumps and such.
-- Just state precisely the regs read and written by that insn.
-- The consequences of control flow transfers, as far as register
-- allocation goes, are taken care of by the register allocator.
--
-- RegUsage = RU [<read regs>] [<write regs>]
regUsageOfInstr :: Platform -> Instr -> RegUsage
regUsageOfInstr platform instr = case instr of
  -- Pseudo Instructions
  ANN _ i                  -> regUsageOfInstr platform i
  COMMENT{}                -> usage ([], [])
  MULTILINE_COMMENT{}      -> usage ([], [])
  PUSH_STACK_FRAME         -> usage ([], [])
  POP_STACK_FRAME          -> usage ([], [])
  DELTA{}                  -> usage ([], [])
  LOCATION{}               -> usage ([], [])

  -- 1. Arithmetic Instructions ------------------------------------------------
  ADD dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  SUB dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  ALSL dst src1 src2 src3  -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
  ALSLU dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
  LU12I dst src1           -> usage (regOp src1, regOp dst)
  LU32I dst src1           -> usage (regOp src1, regOp dst)
  LU52I dst src1 src2      -> usage (regOp src1 ++ regOp src2, regOp dst)
  SSLT dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  SSLTU dst src1 src2      -> usage (regOp src1 ++ regOp src2, regOp dst)
  PCADDI dst src1          -> usage (regOp src1, regOp dst)
  PCADDU12I dst src1       -> usage (regOp src1, regOp dst)
  PCADDU18I dst src1       -> usage (regOp src1, regOp dst)
  PCALAU12I dst src1       -> usage (regOp src1, regOp dst)
  AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  OR dst src1 src2         -> usage (regOp src1 ++ regOp src2, regOp dst)
  XOR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  NOR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  ANDN dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  ORN dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  MUL dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  MULW dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  MULWU dst src1 src2      -> usage (regOp src1 ++ regOp src2, regOp dst)
  MULH dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  MULHU dst src1 src2      -> usage (regOp src1 ++ regOp src2, regOp dst)
  DIV dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  DIVU dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  MOD dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  MODU dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  -- 2. Bit-shift Instructions ------------------------------------------
  SLL dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  SRL dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  SRA dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
  ROTR dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  -- 3. Bit Manipulation Instructions ------------------------------------------
  EXT dst src1             -> usage (regOp src1, regOp dst)
  CLO dst src1             -> usage (regOp src1, regOp dst)
  CLZ dst src1             -> usage (regOp src1, regOp dst)
  CTO dst src1             -> usage (regOp src1, regOp dst)
  CTZ dst src1             -> usage (regOp src1, regOp dst)
  BYTEPICK dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
  REVB2H dst src1          -> usage (regOp src1, regOp dst)
  REVB4H dst src1          -> usage (regOp src1, regOp dst)
  REVB2W dst src1          -> usage (regOp src1, regOp dst)
  REVBD  dst src1          -> usage (regOp src1, regOp dst)
  REVH2W dst src1          -> usage (regOp src1, regOp dst)
  REVHD  dst src1          -> usage (regOp src1, regOp dst)
  BITREV4B dst src1        -> usage (regOp src1, regOp dst)
  BITREV8B dst src1        -> usage (regOp src1, regOp dst)
  BITREVW dst src1         -> usage (regOp src1, regOp dst)
  BITREVD dst src1         -> usage (regOp src1, regOp dst)
  BSTRINS _ dst src1 src2 src3  -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
  BSTRPICK _ dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
  MASKEQZ dst src1 src2         -> usage (regOp src1 ++ regOp src2, regOp dst)
  MASKNEZ dst src1 src2         -> usage (regOp src1 ++ regOp src2, regOp dst)
  --
  -- Pseudo instructions
  NOP                      -> usage ([], [])
  MOV dst src              -> usage (regOp src, regOp dst)
  NEG dst src              -> usage (regOp src, regOp dst)
  CSET _cond dst src1 src2  -> usage (regOp src1 ++ regOp src2 , regOp dst)
  -- 4. Branch Instructions ----------------------------------------------------
  J t                      -> usage (regTarget t, [])
  J_TBL _ _ t              -> usage ([t], [])
  B t                      -> usage (regTarget t, [])
  BL t ps                  -> usage (regTarget t ++ ps, callerSavedRegisters)
  CALL t ps                -> usage (regTarget t ++ ps, callerSavedRegisters)
  CALL36 t                 -> usage (regTarget t, [])
  TAIL36 r t               -> usage (regTarget t, regOp r)
  -- Here two kinds of BCOND and BCOND1 are implemented, mainly because we want
  -- to distinguish between two kinds of conditional jumps with different jump
  -- ranges, corresponding to 2 and 1 instruction implementations respectively.
  --
  -- BCOND1 is selected by default.
  BCOND1 _ j d t           -> usage (regTarget t ++ regOp j ++ regOp d, [])
  BCOND _ j d t            -> usage (regTarget t ++ regOp j ++ regOp d, [])
  BEQZ j t                 -> usage (regTarget t ++ regOp j, [])
  BNEZ j t                 -> usage (regTarget t ++ regOp j, [])
  -- 5. Common Memory Access Instructions --------------------------------------
  LD _ dst src             -> usage (regOp src, regOp dst)
  LDU _ dst src            -> usage (regOp src, regOp dst)
  ST _ dst src             -> usage (regOp src ++ regOp dst, [])
  LDX _ dst src            -> usage (regOp src, regOp dst)
  LDXU _ dst src           -> usage (regOp src, regOp dst)
  STX _ dst src            -> usage (regOp src ++ regOp dst, [])
  LDPTR _ dst src          -> usage (regOp src, regOp dst)
  STPTR _ dst src          -> usage (regOp src ++ regOp dst, [])
  PRELD _hint src          -> usage (regOp src, [])
  -- 6. Bound Check Memory Access Instructions ---------------------------------
  -- LDCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
  -- STCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
  -- 7. Atomic Memory Access Instructions --------------------------------------
  -- 8. Barrier Instructions ---------------------------------------------------
  DBAR _hint               -> usage ([], [])
  IBAR _hint               -> usage ([], [])
  -- 11. Floating Point Instructions -------------------------------------------
  FMAX dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  FMIN dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
  FMAXA dst src1 src2      -> usage (regOp src1 ++ regOp src2, regOp dst)
  FMINA dst src1 src2      -> usage (regOp src1 ++ regOp src2, regOp dst)
  FNEG dst src1            -> usage (regOp src1, regOp dst)

  FCVT dst src             -> usage (regOp src, regOp dst)
  -- SCVTF dst src            -> usage (regOp src, regOp src ++ regOp dst)
  SCVTF dst src            -> usage (regOp src, regOp dst)
  FCVTZS dst src1 src2     -> usage (regOp src2, regOp src1 ++ regOp dst)
  FABS dst src             -> usage (regOp src, regOp dst)
  FSQRT dst src            -> usage (regOp src, regOp dst)
  FMA _ dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)

  _ -> panic $ "regUsageOfInstr: " ++ instrCon instr

  where
        -- filtering the usage is necessary, otherwise the register
        -- allocator will try to allocate pre-defined fixed stg
        -- registers as well, as they show up.
        usage :: ([Reg], [Reg]) -> RegUsage
        usage (srcRegs, dstRegs) =
          RU
            (map mkFmt $ filter (interesting platform) srcRegs)
            (map mkFmt $ filter (interesting platform) dstRegs)

        mkFmt r = RegWithFormat r fmt
          where
            fmt = case cls of
              RcInteger -> II64
              RcFloat   -> FF64
              RcVector  -> sorry "The LoongArch64 NCG does not (yet) support vectors; please use -fllvm."
            cls = case r of
              RegVirtual vr -> classOfVirtualReg (platformArch platform) vr
              RegReal rr -> classOfRealReg rr

        regAddr :: AddrMode -> [Reg]
        regAddr (AddrRegReg r1 r2) = [r1, r2]
        regAddr (AddrRegImm r1 _)  = [r1]
        regAddr (AddrReg r1)       = [r1]

        regOp :: Operand -> [Reg]
        regOp (OpReg _ r1) = [r1]
        regOp (OpAddr a)    = regAddr a
        regOp (OpImm _)     = []

        regTarget :: Target -> [Reg]
        regTarget (TBlock _) = []
        regTarget (TLabel _) = []
        regTarget (TReg r1)  = [r1]

        -- Is this register interesting for the register allocator?
        interesting :: Platform -> Reg -> Bool
        interesting _        (RegVirtual _)                 = True
        interesting platform (RegReal (RealRegSingle i))    = freeReg platform i

-- | Caller-saved registers (according to calling convention)
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- |zero| ra | tp | sp | a0 | a1 | a2 | a3 | a4 | a5 | a6 | a7 | t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | Rv | fp | s0 | s1 | s2 | s3 | s4 | s5 | s6 | s7 | s8 |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
--f| a0 | a1 | a2 | a3 | a4 | a5 | a6 | a7 | t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9 | t10| t11| t12| t13| t14| t15| s0 | s1 | s2 | s3 | s4 | s5 | s6 | s7 |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
callerSavedRegisters :: [Reg]
callerSavedRegisters =
    -- TODO: Not sure.
    [regSingle 1]                 -- ra
    ++ map regSingle [4 .. 11]    -- a0 - a7
    ++ map regSingle [12 .. 20]   -- t0 - t8
    ++ map regSingle [32 .. 39]   -- fa0 - fa7
    ++ map regSingle [40 .. 55]   -- ft0 - ft15

-- | Apply a given mapping to all the register references in this instruction.
patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
patchRegsOfInstr instr env = case instr of
    -- 0. Meta Instructions
    ANN d i             -> ANN d (patchRegsOfInstr i env)
    COMMENT{}           -> instr
    MULTILINE_COMMENT{} -> instr
    PUSH_STACK_FRAME    -> instr
    POP_STACK_FRAME     -> instr
    DELTA{}             -> instr
    LOCATION{}          -> instr
    -- 1. Arithmetic Instructions ------------------------------------------------
    ADD o1 o2 o3        -> ADD  (patchOp o1)  (patchOp o2)  (patchOp o3)
    SUB o1 o2 o3        -> SUB  (patchOp o1)  (patchOp o2)  (patchOp o3)
    ALSL o1 o2 o3 o4    -> ALSL  (patchOp o1)  (patchOp o2)  (patchOp o3)  (patchOp o4)
    ALSLU o1 o2 o3 o4   -> ALSLU (patchOp o1)  (patchOp o2)  (patchOp o3)  (patchOp o4)
    LU12I o1 o2         -> LU12I  (patchOp o1)  (patchOp o2)
    LU32I o1 o2         -> LU32I  (patchOp o1)  (patchOp o2)
    LU52I o1 o2 o3      -> LU52I  (patchOp o1)  (patchOp o2)  (patchOp o3)
    SSLT o1 o2 o3       -> SSLT  (patchOp o1)  (patchOp o2)  (patchOp o3)
    SSLTU o1 o2 o3      -> SSLTU  (patchOp o1)  (patchOp o2)  (patchOp o3)
    PCADDI o1 o2        -> PCADDI  (patchOp o1)  (patchOp o2)
    PCADDU12I o1 o2     -> PCADDU12I  (patchOp o1)  (patchOp o2)
    PCADDU18I o1 o2     -> PCADDU18I  (patchOp o1)  (patchOp o2)
    PCALAU12I o1 o2     -> PCALAU12I  (patchOp o1)  (patchOp o2)
    AND o1 o2 o3        -> AND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    OR o1 o2 o3         -> OR  (patchOp o1)  (patchOp o2)  (patchOp o3)
    XOR o1 o2 o3        -> XOR  (patchOp o1)  (patchOp o2)  (patchOp o3)
    NOR o1 o2 o3        -> NOR  (patchOp o1)  (patchOp o2)  (patchOp o3)
    ANDN o1 o2 o3       -> ANDN  (patchOp o1)  (patchOp o2)  (patchOp o3)
    ORN o1 o2 o3        -> ORN  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MUL o1 o2 o3        -> MUL  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MULW o1 o2 o3       -> MULW  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MULWU o1 o2 o3      -> MULWU (patchOp o1)  (patchOp o2)  (patchOp o3)
    MULH o1 o2 o3       -> MULH  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MULHU o1 o2 o3      -> MULHU  (patchOp o1)  (patchOp o2)  (patchOp o3)
    DIV o1 o2 o3        -> DIV  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MOD o1 o2 o3        -> MOD  (patchOp o1)  (patchOp o2)  (patchOp o3)
    DIVU o1 o2 o3       -> DIVU  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MODU o1 o2 o3       -> MODU  (patchOp o1)  (patchOp o2)  (patchOp o3)
    -- 2. Bit-shift Instructions ------------------------------------------
    SLL o1 o2 o3        -> SLL  (patchOp o1)  (patchOp o2)  (patchOp o3)
    SRL o1 o2 o3        -> SRL  (patchOp o1)  (patchOp o2)  (patchOp o3)
    SRA o1 o2 o3        -> SRA  (patchOp o1)  (patchOp o2)  (patchOp o3)
    ROTR o1 o2 o3       -> ROTR  (patchOp o1)  (patchOp o2)  (patchOp o3)
    -- 3. Bit Manipulation Instructions ------------------------------------------
    EXT o1 o2             -> EXT  (patchOp o1)  (patchOp o2)
    CLO o1 o2             -> CLO  (patchOp o1)  (patchOp o2)
    CLZ o1 o2             -> CLZ  (patchOp o1)  (patchOp o2)
    CTO o1 o2             -> CTO  (patchOp o1)  (patchOp o2)
    CTZ o1 o2             -> CTZ  (patchOp o1)  (patchOp o2)
    BYTEPICK o1 o2 o3 o4  -> BYTEPICK  (patchOp o1)  (patchOp o2) (patchOp o3) (patchOp o4)
    REVB2H o1 o2          -> REVB2H  (patchOp o1)  (patchOp o2)
    REVB4H o1 o2          -> REVB4H  (patchOp o1)  (patchOp o2)
    REVB2W o1 o2          -> REVB2W  (patchOp o1)  (patchOp o2)
    REVBD  o1 o2          -> REVBD  (patchOp o1)  (patchOp o2)
    REVH2W o1 o2          -> REVH2W  (patchOp o1)  (patchOp o2)
    REVHD  o1 o2          -> REVHD  (patchOp o1)  (patchOp o2)
    BITREV4B o1 o2         -> BITREV4B  (patchOp o1)  (patchOp o2)
    BITREV8B o1 o2         -> BITREV8B  (patchOp o1)  (patchOp o2)
    BITREVW o1 o2          -> BITREVW  (patchOp o1)  (patchOp o2)
    BITREVD o1 o2          -> BITREVD  (patchOp o1)  (patchOp o2)
    BSTRINS f o1 o2 o3 o4  -> BSTRINS f (patchOp o1)  (patchOp o2)  (patchOp o3)  (patchOp o4)
    BSTRPICK f o1 o2 o3 o4 -> BSTRPICK f (patchOp o1)  (patchOp o2)  (patchOp o3)  (patchOp o4)
    MASKEQZ o1 o2 o3       -> MASKEQZ  (patchOp o1)  (patchOp o2)  (patchOp o3)
    MASKNEZ o1 o2 o3       -> MASKNEZ  (patchOp o1)  (patchOp o2)  (patchOp o3)
    --
    -- Pseudo instrcutions
    NOP                 -> NOP
    MOV o1 o2           -> MOV  (patchOp o1)  (patchOp o2)
    NEG o1 o2           -> NEG  (patchOp o1)  (patchOp o2)
    CSET cond o1 o2 o3  -> CSET cond (patchOp o1) (patchOp o2) (patchOp o3)
    -- 4. Branch Instructions ----------------------------------------------------
    -- TODO:
    J t            -> J (patchTarget t)
    J_TBL ids mbLbl t  -> J_TBL ids mbLbl (env t)
    B t            -> B (patchTarget t)
    BL t ps        -> BL (patchTarget t) ps
    CALL t ps      -> CALL (patchTarget t) ps
    CALL36 t       -> CALL36 (patchTarget t)
    TAIL36 r t     -> TAIL36 (patchOp r) (patchTarget t)
    BCOND1 c j d t -> BCOND1 c (patchOp j) (patchOp d) (patchTarget t)
    BCOND c j d t  -> BCOND c (patchOp j) (patchOp d) (patchTarget t)
    BEQZ j t       -> BEQZ (patchOp j) (patchTarget t)
    BNEZ j t       -> BNEZ (patchOp j) (patchTarget t)
    -- 5. Common Memory Access Instructions --------------------------------------
    -- TODO:
    LD f o1 o2         -> LD f (patchOp o1)  (patchOp o2)
    LDU f o1 o2        -> LDU f (patchOp o1)  (patchOp o2)
    ST f o1 o2         -> ST f (patchOp o1)  (patchOp o2)
    LDX f o1 o2        -> LDX f (patchOp o1)  (patchOp o2)
    LDXU f o1 o2       -> LDXU f (patchOp o1)  (patchOp o2)
    STX f o1 o2        -> STX f (patchOp o1)  (patchOp o2)
    LDPTR f o1 o2      -> LDPTR f (patchOp o1)  (patchOp o2)
    STPTR f o1 o2      -> STPTR f (patchOp o1)  (patchOp o2)
    PRELD o1 o2         -> PRELD (patchOp o1) (patchOp o2)
    -- 6. Bound Check Memory Access Instructions ---------------------------------
    -- LDCOND o1 o2 o3       -> LDCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    -- STCOND o1 o2 o3       -> STCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    -- 7. Atomic Memory Access Instructions --------------------------------------
    -- 8. Barrier Instructions ---------------------------------------------------
    -- TODO: need fix
    DBAR o1             -> DBAR o1
    IBAR o1             -> IBAR o1
    -- 11. Floating Point Instructions -------------------------------------------
    FCVT o1 o2          -> FCVT  (patchOp o1)  (patchOp o2)
    SCVTF o1 o2         -> SCVTF  (patchOp o1)  (patchOp o2)
    FCVTZS o1 o2 o3     -> FCVTZS  (patchOp o1)  (patchOp o2) (patchOp o3)
    FMIN o1 o2 o3       -> FMIN  (patchOp o1)  (patchOp o2)  (patchOp o3)
    FMAX o1 o2 o3       -> FMAX  (patchOp o1)  (patchOp o2)  (patchOp o3)
    FMINA o1 o2 o3      -> FMINA  (patchOp o1)  (patchOp o2)  (patchOp o3)
    FMAXA o1 o2 o3      -> FMAXA  (patchOp o1)  (patchOp o2)  (patchOp o3)
    FNEG o1 o2          -> FNEG  (patchOp o1)  (patchOp o2)
    FABS o1 o2          -> FABS  (patchOp o1)  (patchOp o2)
    FSQRT o1 o2         -> FSQRT  (patchOp o1)  (patchOp o2)
    FMA s o1 o2 o3 o4   -> FMA s (patchOp o1)  (patchOp o2)  (patchOp o3)  (patchOp o4)

    _                   -> panic $ "patchRegsOfInstr: " ++ instrCon instr
    where
        -- TODO:
        patchOp :: Operand -> Operand
        patchOp (OpReg w r) = OpReg w (env r)
        patchOp (OpAddr a) = OpAddr (patchAddr a)
        patchOp opImm = opImm

        patchTarget :: Target -> Target
        patchTarget (TReg r) = TReg (env r)
        patchTarget t = t

        patchAddr :: AddrMode -> AddrMode
        patchAddr (AddrRegReg r1 r2)  = AddrRegReg (env r1) (env r2)
        patchAddr (AddrRegImm r1 imm) = AddrRegImm (env r1) imm
        patchAddr (AddrReg r) = AddrReg (env r)

--------------------------------------------------------------------------------

-- | Checks whether this instruction is a jump/branch instruction.
-- One that can change the flow of control in a way that the
-- register allocator needs to worry about.
isJumpishInstr :: Instr -> Bool
isJumpishInstr instr = case instr of
  ANN _ i -> isJumpishInstr i
  J {} -> True
  J_TBL {} -> True
  B {} -> True
  BL {} -> True
  CALL {} -> True
  CALL36 {} -> True
  TAIL36 {} -> True
  BCOND1 {} -> True
  BCOND {} -> True
  BEQZ {} -> True
  BNEZ {} -> True
  _ -> False

-- | Get the `BlockId`s of the jump destinations (if any)
jumpDestsOfInstr :: Instr -> [BlockId]
jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i
jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (J_TBL ids _mbLbl _r) = catMaybes ids
jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (BL t _) = [id | TBlock id <- [t]]
jumpDestsOfInstr (CALL t _) = [id | TBlock id <- [t]]
jumpDestsOfInstr (CALL36 t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (TAIL36 _ t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (BCOND1 _ _ _ t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (BCOND _ _ _ t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (BEQZ _ t) = [id | TBlock id <- [t]]
jumpDestsOfInstr (BNEZ _ t) = [id | TBlock id <- [t]]
jumpDestsOfInstr _ = []

-- | Change the destination of this (potential) jump instruction.
-- Used in the linear allocator when adding fixup blocks for join
-- points.
patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
patchJumpInstr instr patchF =
  case instr of
    ANN d i -> ANN d (patchJumpInstr i patchF)
    J (TBlock bid) -> J (TBlock (patchF bid))
    J_TBL ids mbLbl r -> J_TBL (map (fmap patchF) ids) mbLbl r
    B (TBlock bid) -> B (TBlock (patchF bid))
    BL (TBlock bid) ps -> BL (TBlock (patchF bid)) ps
    CALL (TBlock bid) ps -> CALL (TBlock (patchF bid)) ps
    CALL36 (TBlock bid) -> CALL36 (TBlock (patchF bid))
    TAIL36 r (TBlock bid) -> TAIL36 r (TBlock (patchF bid))
    BCOND1 c o1 o2 (TBlock bid) -> BCOND1 c o1 o2 (TBlock (patchF bid))
    BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid))
    BEQZ j (TBlock bid) -> BEQZ j (TBlock (patchF bid))
    BNEZ j (TBlock bid) -> BNEZ j (TBlock (patchF bid))
    _ -> panic $ "patchJumpInstr: " ++ instrCon instr

-- -----------------------------------------------------------------------------
-- | Make a spill instruction, spill a register into spill slot.
mkSpillInstr
   :: HasCallStack
   => NCGConfig
   -> RegWithFormat -- register to spill
   -> Int       -- current stack delta
   -> Int       -- spill slot to use
   -> [Instr]

mkSpillInstr _config (RegWithFormat reg _fmt) delta slot =
  case off - delta of
    imm | fitsInNbits 12 imm -> [mkStrSpImm imm]
    imm ->
      [ movImmToIp imm,
        addSpToIp,
        mkStrIp
      ]
  where
    fmt = case reg of
      RegReal (RealRegSingle n) | n < 32 -> II64
      _ -> FF64
    mkStrSpImm imm = ANN (text "Spill@" <> int (off - delta)) $ ST fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
    movImmToIp imm = ANN (text "Spill: TMP <- " <> int imm) $ MOV tmp (OpImm (ImmInt imm))
    addSpToIp = ANN (text "Spill: TMP <- SP + TMP ") $ ADD tmp tmp sp
    mkStrIp = ANN (text "Spill@" <> int (off - delta)) $ ST fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
    off = spillSlotToOffset slot

-- | Make a reload instruction, reload from spill slot to a register.
mkLoadInstr
   :: NCGConfig
   -> RegWithFormat
   -> Int       -- current stack delta
   -> Int       -- spill slot to use
   -> [Instr]

mkLoadInstr _config (RegWithFormat reg _fmt) delta slot =
  case off - delta of
    imm | fitsInNbits 12 imm -> [mkLdrSpImm imm]
    imm ->
      [ movImmToIp imm,
        addSpToIp,
        mkLdrIp
      ]
  where
    fmt = case reg of
      RegReal (RealRegSingle n) | n < 32 -> II64
      _ -> FF64
    mkLdrSpImm imm = ANN (text "Reload@" <> int (off - delta)) $ LD fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
    movImmToIp imm = ANN (text "Reload: TMP <- " <> int imm) $ MOV tmp (OpImm (ImmInt imm))
    addSpToIp = ANN (text "Reload: TMP <- SP + TMP ") $ ADD tmp tmp sp
    mkLdrIp = ANN (text "Reload@" <> int (off - delta)) $ LD fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
    off = spillSlotToOffset slot

-- | See if this instruction is telling us the current C stack delta
takeDeltaInstr :: Instr -> Maybe Int
takeDeltaInstr (ANN _ i) = takeDeltaInstr i
takeDeltaInstr (DELTA i) = Just i
takeDeltaInstr _         = Nothing

-- | Not real instructions.  Just meta data
isMetaInstr :: Instr -> Bool
isMetaInstr instr =
  case instr of
    ANN _ i -> isMetaInstr i
    COMMENT {} -> True
    MULTILINE_COMMENT {} -> True
    LOCATION {} -> True
    NEWBLOCK {} -> True
    DELTA {} -> True
    LDATA {} -> True
    PUSH_STACK_FRAME -> True
    POP_STACK_FRAME -> True
    _ -> False

canFallthroughTo :: Instr -> BlockId -> Bool
canFallthroughTo insn bid =
  case insn of
    J (TBlock target) -> bid == target
    J_TBL targets _ _ -> all isTargetBid targets
    B (TBlock target) -> bid == target
    TAIL36 _ (TBlock target) -> bid == target
    BCOND1 _ _ _ (TBlock target) -> bid == target
    BCOND _ _ _ (TBlock target) -> bid == target
    _ -> False
  where
    isTargetBid target = case target of
      Nothing -> True
      Just target -> target == bid

-- | Copy the value in a register to another one.
-- Must work for all register classes.
mkRegRegMoveInstr :: Reg -> Reg -> Instr
mkRegRegMoveInstr src dst = ANN desc instr
  where
    desc = text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst
    instr = MOV (OpReg W64 dst) (OpReg W64 src)

-- | Take the source and destination from this (potential) reg -> reg move instruction
-- We have to be a bit careful here: A `MOV` can also mean an implicit
-- conversion. This case is filtered out.
takeRegRegMoveInstr :: Instr -> Maybe (Reg, Reg)
takeRegRegMoveInstr (MOV (OpReg width dst) (OpReg width' src))
  | width == width' && (isFloatReg dst == isFloatReg src) = pure (src, dst)
takeRegRegMoveInstr _ = Nothing

-- | Make an unconditional jump instruction.
mkJumpInstr :: BlockId -> [Instr]
mkJumpInstr id = [TAIL36 (OpReg W64 tmpReg) (TBlock (id))]

-- | Decrement @sp@ to allocate stack space.
mkStackAllocInstr :: Platform -> Int -> [Instr]
mkStackAllocInstr _platform n
  | n == 0 = []
  | n > 0 && fitsInNbits 12 (fromIntegral n) =
      [ ANN (text "Alloc stack") $ SUB sp sp (OpImm (ImmInt n)) ]
  | n > 0 =
     [
         ANN (text "Alloc more stack") (MOV tmp (OpImm (ImmInt n))),
         SUB sp sp tmp
     ]
mkStackAllocInstr _platform n = pprPanic "mkStackAllocInstr" (int n)

-- | Increment SP to deallocate stack space.
mkStackDeallocInstr :: Platform -> Int -> [Instr]
mkStackDeallocInstr _platform  n
  | n == 0 = []
  | n > 0 && fitsInNbits 12 (fromIntegral n) =
      [ ANN (text "Dealloc stack") $ ADD sp sp (OpImm (ImmInt n)) ]
  | n > 0 =
     [
         ANN (text "Dealloc more stack") (MOV tmp (OpImm (ImmInt n))),
         ADD sp sp tmp
     ]
mkStackDeallocInstr _platform n = pprPanic "mkStackDeallocInstr" (int n)

allocMoreStack
  :: Platform
  -> Int
  -> NatCmmDecl statics GHC.CmmToAsm.LA64.Instr.Instr
  -> UniqDSM (NatCmmDecl statics GHC.CmmToAsm.LA64.Instr.Instr, [(BlockId,BlockId)])

allocMoreStack _ _ top@(CmmData _ _) = return (top, [])
allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
  let entries = entryBlocks proc

  retargetList <- mapM (\e -> (e,) <$> newBlockId) entries

  let
    delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
      where x = slots * spillSlotSize -- sp delta

    alloc   = mkStackAllocInstr   platform delta
    dealloc = mkStackDeallocInstr platform delta

    new_blockmap :: LabelMap BlockId
    new_blockmap = mapFromList retargetList

    insert_stack_insn (BasicBlock id insns)
      | Just new_blockid <- mapLookup id new_blockmap =
        [ BasicBlock id $ alloc ++ [ B (TBlock new_blockid) ],
          BasicBlock new_blockid block' ]
      | otherwise =
        [ BasicBlock id block' ]
      where
        block' = foldr insert_dealloc [] insns

    insert_dealloc insn r = case insn of
      J {} -> dealloc ++ (insn : r)
      ANN _ e -> insert_dealloc e r
      _other | jumpDestsOfInstr insn /= [] ->
        patchJumpInstr insn retarget : r
      _other -> insn : r

      where retarget b = fromMaybe b (mapLookup b new_blockmap)

    new_code = concatMap insert_stack_insn code
  return (CmmProc info lbl live (ListGraph new_code), retargetList)

-- -----------------------------------------------------------------------------
-- Machine's assembly language

-- We have a few common "instructions" (nearly all the pseudo-ops) but
-- mostly all of 'Instr' is machine-specific.

data Instr
    -- comment pseudo-op
    = COMMENT SDoc
    | MULTILINE_COMMENT SDoc

    -- Annotated instruction. Should print <instr> # <doc>
    | ANN SDoc Instr

    -- location pseudo-op (file, line, col, name)
    | LOCATION Int Int Int LexicalFastString

    -- start a new basic block.  Useful during codegen, removed later.
    -- Preceding instruction should be a jump, as per the invariants
    -- for a BasicBlock (see Cmm).
    | NEWBLOCK BlockId

    -- specify current stack offset for benefit of subsequent passes.
    -- This carries a BlockId so it can be used in unwinding information.
    | DELTA   Int

    -- | Static data spat out during code generation.
    | LDATA Section RawCmmStatics

    | PUSH_STACK_FRAME
    | POP_STACK_FRAME
    -- Basic Integer Instructions ------------------------------------------------
    -- 1. Arithmetic Instructions ------------------------------------------------
    | ADD Operand Operand Operand
    | SUB Operand Operand Operand
    | ALSL Operand Operand Operand Operand
    | ALSLU Operand Operand Operand Operand
    | LU12I Operand Operand
    | LU32I Operand Operand
    | LU52I Operand Operand Operand
    | SSLT Operand Operand Operand
    | SSLTU Operand Operand Operand
    | PCADDI Operand Operand
    | PCADDU12I Operand Operand
    | PCADDU18I Operand Operand
    | PCALAU12I Operand Operand
    | AND Operand Operand Operand
    | OR Operand Operand Operand
    | XOR Operand Operand Operand
    | NOR Operand Operand Operand
    | ANDN Operand Operand Operand
    | ORN Operand Operand Operand
    | MUL Operand Operand Operand
    | MULW Operand Operand Operand
    | MULWU Operand Operand Operand
    | MULH Operand Operand Operand
    | MULHU Operand Operand Operand
    | DIV Operand Operand Operand
    | DIVU Operand Operand Operand
    | MOD Operand Operand Operand
    | MODU Operand Operand Operand
    -- 2. Bit-shift Instuctions --------------------------------------------------
    | SLL Operand Operand Operand
    | SRL Operand Operand Operand
    | SRA Operand Operand Operand
    | ROTR Operand Operand Operand
    -- 3. Bit-manupulation Instructions ------------------------------------------
    | EXT Operand Operand
    | CLO Operand Operand
    | CTO Operand Operand
    | CLZ Operand Operand
    | CTZ Operand Operand
    | BYTEPICK Operand Operand Operand Operand
    | REVB2H Operand Operand
    | REVB4H Operand Operand
    | REVB2W Operand Operand
    | REVBD Operand Operand
    | REVH2W Operand Operand
    | REVHD Operand Operand
    | BITREV4B Operand Operand
    | BITREV8B Operand Operand
    | BITREVW Operand Operand
    | BITREVD Operand Operand
    | BSTRINS Format Operand Operand Operand Operand
    | BSTRPICK Format Operand Operand Operand Operand
    | MASKEQZ Operand Operand Operand
    | MASKNEZ Operand Operand Operand
    -- Pseudo instructions
    | NOP
    | MOV Operand Operand
    | NEG Operand Operand
    | CSET Cond Operand Operand Operand
    -- 4. Branch Instructions ----------------------------------------------------
    | J Target
    | J_TBL [Maybe BlockId] (Maybe CLabel) Reg
    | B Target
    | BL Target [Reg]
    | CALL Target [Reg]
    | CALL36 Target
    | TAIL36 Operand Target
    | BCOND1 Cond Operand Operand Target
    | BCOND Cond Operand Operand Target
    | BEQZ Operand Target
    | BNEZ Operand Target
    -- 5. Common Memory Access Instructions --------------------------------------
    | LD Format Operand Operand
    | LDU Format Operand Operand
    | ST Format Operand Operand
    | LDX Format Operand Operand
    | LDXU Format Operand Operand
    | STX Format Operand Operand
    | LDPTR Format Operand Operand
    | STPTR Format Operand Operand
    | PRELD Operand Operand
    -- 6. Bound Check Memory Access Instructions ---------------------------------
    -- 7. Atomic Memory Access Instructions --------------------------------------
    -- 8. Barrier Instructions ---------------------------------------------------
    | DBAR BarrierType
    | IBAR BarrierType
    -- Basic Floating Point Instructions -----------------------------------------
    | FCVT    Operand Operand
    | SCVTF   Operand Operand
    | FCVTZS  Operand Operand Operand
    | FMAX Operand Operand Operand
    | FMIN Operand Operand Operand
    | FMAXA Operand Operand Operand
    | FMINA Operand Operand Operand
    | FNEG Operand Operand
    | FABS Operand Operand
    | FSQRT Operand Operand
    -- Floating-point fused multiply-add instructions
    --  fmadd : d =   r1 * r2 + r3
    --  fnmsub: d =   r1 * r2 - r3
    --  fmsub : d = - r1 * r2 + r3
    --  fnmadd: d = - r1 * r2 - r3
    | FMA FMASign Operand Operand Operand Operand

-- TODO: Not complete.
data BarrierType = Hint0

instrCon :: Instr -> String
instrCon i =
    case i of
      COMMENT{} -> "COMMENT"
      MULTILINE_COMMENT{} -> "COMMENT"
      ANN{} -> "ANN"
      LOCATION{} -> "LOCATION"
      NEWBLOCK{} -> "NEWBLOCK"
      DELTA{} -> "DELTA"
      LDATA {} -> "LDATA"
      PUSH_STACK_FRAME{} -> "PUSH_STACK_FRAME"
      POP_STACK_FRAME{} -> "POP_STACK_FRAME"

      ADD{} -> "ADD"
      SUB{} -> "SUB"
      ALSL{} -> "ALSL"
      ALSLU{} -> "ALSLU"
      LU12I{} -> "LU12I"
      LU32I{} -> "LU32I"
      LU52I{} -> "LU52I"
      SSLT{} -> "SSLT"
      SSLTU{} -> "SSLTU"
      PCADDI{} -> "PCADDI"
      PCADDU12I{} -> "PCADDU12I"
      PCADDU18I{} -> "PCADDU18I"
      PCALAU12I{} -> "PCALAU12I"
      AND{} -> "AND"
      OR{} -> "OR"
      XOR{} -> "XOR"
      NOR{} -> "NOR"
      ANDN{} -> "ANDN"
      ORN{} -> "ORN"
      MUL{} -> "MUL"
      MULW{} -> "MULW"
      MULWU{} -> "MULWU"
      MULH{} -> "MULH"
      MULHU{} -> "MULHU"
      DIV{} -> "DIV"
      MOD{} -> "MOD"
      DIVU{} -> "DIVU"
      MODU{} -> "MODU"
      SLL{} -> "SLL"
      SRL{} -> "SRL"
      SRA{} -> "SRA"
      ROTR{} -> "ROTR"
      EXT{} -> "EXT"
      CLO{} -> "CLO"
      CLZ{} -> "CLZ"
      CTO{} -> "CTO"
      CTZ{} -> "CTZ"
      BYTEPICK{} -> "BYTEPICK"
      REVB2H{} -> "REVB2H"
      REVB4H{} -> "REVB4H"
      REVB2W{} -> "REVB2W"
      REVBD{} -> "REVBD"
      REVH2W{} -> "REVH2W"
      REVHD{} -> "REVHD"
      BITREV4B{} -> "BITREV4B"
      BITREV8B{} -> "BITREV8B"
      BITREVW{} -> "BITREVW"
      BITREVD{} -> "BITREVD"
      BSTRINS{} -> "BSTRINS"
      BSTRPICK{} -> "BSTRPICK"
      MASKEQZ{} -> "MASKEQZ"
      MASKNEZ{} -> "MASKNEZ"
      NOP{} -> "NOP"
      MOV{} -> "MOV"
      NEG{} -> "NEG"
      CSET{} -> "CSET"
      J{} -> "J"
      J_TBL{} -> "J_TBL"
      B{} -> "B"
      BL{} -> "BL"
      CALL{} -> "CALL"
      CALL36{} -> "CALL36"
      TAIL36{} -> "TAIL36"
      BCOND1{} -> "BCOND1"
      BCOND{} -> "BCOND"
      BEQZ{} -> "BEQZ"
      BNEZ{} -> "BNEZ"
      LD{} -> "LD"
      LDU{} -> "LDU"
      ST{} -> "ST"
      LDX{} -> "LDX"
      LDXU{} -> "LDXU"
      STX{} -> "STX"
      LDPTR{} -> "LDPTR"
      STPTR{} -> "STPTR"
      PRELD{} -> "PRELD"
      DBAR{} -> "DBAR"
      IBAR{} -> "IBAR"
      FCVT{} -> "FCVT"
      SCVTF{} -> "SCVTF"
      FCVTZS{} -> "FCVTZS"
      FMAX{} -> "FMAX"
      FMIN{} -> "FMIN"
      FMAXA{} -> "FMAXA"
      FMINA{} -> "FMINA"
      FNEG{} -> "FNEG"
      FABS{} -> "FABS"
      FSQRT{} -> "FSQRT"
      FMA variant _ _ _ _ ->
        case variant of
          FMAdd  -> "FMADD"
          FMSub  -> "FMSUB"
          FNMAdd -> "FNMADD"
          FNMSub -> "FNMSUB"

data Target
    = TBlock BlockId
    | TLabel CLabel
    | TReg   Reg

data Operand
  = OpReg Width Reg -- register
  | OpImm Imm       -- immediate
  | OpAddr AddrMode -- address
  deriving (Eq, Show)

opReg :: Reg -> Operand
opReg = OpReg W64

opRegNo :: RegNo -> Operand
opRegNo = opReg . regSingle

-- LoongArch64 has no ip register in ABI. Here ip register is for spilling/
-- reloading register to/from slots. So make t8(r20) non-free for ip.
zero, ra, tp, sp, fp, tmp :: Operand
zero = opReg zeroReg
ra   = opReg raReg
sp   = opReg spMachReg
tp   = opReg tpMachReg
fp   = opReg fpMachReg
tmp  = opReg tmpReg

x0,  x1,  x2,  x3,  x4,  x5,  x6,  x7  :: Operand
x8,  x9,  x10, x11, x12, x13, x14, x15 :: Operand
x16, x17, x18, x19, x20, x21, x22, x23 :: Operand
x24, x25, x26, x27, x28, x29, x30, x31 :: Operand
x0  = opRegNo  0
x1  = opRegNo  1
x2  = opRegNo  2
x3  = opRegNo  3
x4  = opRegNo  4
x5  = opRegNo  5
x6  = opRegNo  6
x7  = opRegNo  7
x8  = opRegNo  8
x9  = opRegNo  9
x10 = opRegNo 10
x11 = opRegNo 11
x12 = opRegNo 12
x13 = opRegNo 13
x14 = opRegNo 14
x15 = opRegNo 15
x16 = opRegNo 16
x17 = opRegNo 17
x18 = opRegNo 18
x19 = opRegNo 19
x20 = opRegNo 20
x21 = opRegNo 21
x22 = opRegNo 22
x23 = opRegNo 23
x24 = opRegNo 24
x25 = opRegNo 25
x26 = opRegNo 26
x27 = opRegNo 27
x28 = opRegNo 18
x29 = opRegNo 29
x30 = opRegNo 30
x31 = opRegNo 31

d0,  d1,  d2,  d3,  d4,  d5,  d6,  d7  :: Operand
d8,  d9,  d10, d11, d12, d13, d14, d15 :: Operand
d16, d17, d18, d19, d20, d21, d22, d23 :: Operand
d24, d25, d26, d27, d28, d29, d30, d31 :: Operand
d0  = opRegNo 32
d1  = opRegNo 33
d2  = opRegNo 34
d3  = opRegNo 35
d4  = opRegNo 36
d5  = opRegNo 37
d6  = opRegNo 38
d7  = opRegNo 39
d8  = opRegNo 40
d9  = opRegNo 41
d10 = opRegNo 42
d11 = opRegNo 43
d12 = opRegNo 44
d13 = opRegNo 45
d14 = opRegNo 46
d15 = opRegNo 47
d16 = opRegNo 48
d17 = opRegNo 49
d18 = opRegNo 50
d19 = opRegNo 51
d20 = opRegNo 52
d21 = opRegNo 53
d22 = opRegNo 54
d23 = opRegNo 55
d24 = opRegNo 56
d25 = opRegNo 57
d26 = opRegNo 58
d27 = opRegNo 59
d28 = opRegNo 60
d29 = opRegNo 61
d30 = opRegNo 62
d31 = opRegNo 63

fitsInNbits :: Int -> Int -> Bool
fitsInNbits n i = (-1 `shiftL` (n - 1)) <= i && i <= (1 `shiftL` (n - 1) - 1)

isUnsignOp :: Int -> Bool
isUnsignOp i = (i >= 0)

isNbitEncodeable :: Int -> Integer -> Bool
isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)

isEncodeableInWidth :: Width -> Integer -> Bool
isEncodeableInWidth = isNbitEncodeable . widthInBits

isIntOp :: Operand -> Bool
isIntOp = not . isFloatOp

isFloatOp :: Operand -> Bool
isFloatOp (OpReg _ reg) | isFloatReg reg = True
isFloatOp _ = False

isFloatReg :: Reg -> Bool
isFloatReg (RegReal (RealRegSingle i)) | i > 31 = True
isFloatReg (RegVirtual (VirtualRegD _)) = True
isFloatReg _ = False

widthToInt :: Width -> Int
widthToInt W8   = 8
widthToInt W16  = 16
widthToInt W32  = 32
widthToInt W64  = 64
widthToInt _ = 64

widthFromOpReg :: Operand -> Width
widthFromOpReg (OpReg W8 _)  = W8
widthFromOpReg (OpReg W16 _) = W16
widthFromOpReg (OpReg W32 _) = W32
widthFromOpReg (OpReg W64 _) = W64
widthFromOpReg _ = W64

ldFormat :: Format -> Format
ldFormat f
  | f `elem` [II8, II16, II32, II64] = II64
  | f `elem` [FF32, FF64] = FF64
  | otherwise = pprPanic "unsupported ldFormat: " (text $ show f)