-------------------------------------------------------------------------- -- | -- Module : Text.Disassembler.X86Disassembler -- Copyright : (c) Martin Grabmueller and Dirk Kleeblatt -- License : BSD3 -- -- Maintainer : martin@grabmueller.de,klee@cs.tu-berlin.de -- Stability : provisional -- Portability : portable -- -- Disassembler for x86 machine code. -- -- This is a disassembler for object code for the x86 architecture. -- It provides functions for disassembling byte arrays, byte lists and -- memory blocks containing raw binary code. -- -- Features: -- -- - Disassembles memory blocks, lists or arrays of bytes into lists of -- instructions. -- -- - Abstract instructions provide as much information as possible about -- opcodes, addressing modes or operand sizes, allowing for detailed -- output. -- -- - Provides functions for displaying instructions in Intel or AT&T -- style (like the GNU tools) -- -- Differences to GNU tools, like gdb or objdump: -- -- - Displacements are shown in decimal, with sign if negative. -- -- Missing: -- -- - LOCK and repeat prefixes are recognized, but not contained in the -- opcodes of instructions. -- -- - Support for 16-bit addressing modes. Could be added when needed. -- -- - Complete disassembly of all 64-bit instructions. I have tried to -- disassemble them properly but have been limited to the information -- in the docs, because I have no 64-bit machine to test on. This will -- probably change when I get GNU as to produce 64-bit object files. -- -- - Not all MMX and SSE/SSE2/SSE3 instructions are decoded yet. This is -- just a matter of missing time. -- -- - segment override prefixes are decoded, but not appended to memory -- references -- -- On the implementation: -- -- This disassembler uses the Parsec parser combinators, working on byte -- lists. This proved to be very convenient, as the combinators keep -- track of the current position, etc. -------------------------------------------------------------------------- module Text.Disassembler.X86Disassembler( -- * Types Opcode, Operand(..), InstrOperandSize(..), Instruction(..), ShowStyle(..), Config(..), -- * Functions disassembleBlock, disassembleList, disassembleArray, disassembleFile, disassembleBlockWithConfig, disassembleListWithConfig, disassembleArrayWithConfig, disassembleFileWithConfig, showIntel, showAtt, defaultConfig ) where import Text.ParserCombinators.Parsec import Control.Monad.State import System.IO import Data.List import Data.Char import Data.Array.IArray import Numeric import Foreign -- | All opcodes are represented by this enumeration type. data Opcode = InvalidOpcode | AAA | AAD | AAM | AAS | ADC | ADD | ADDPD | ADDPS | ADDSD | ADDSS | ADDSUBPD | ADDUBPS | AND | ANDNPD | ANDNPS | ANDPD | ANDPS | ARPL | BOUND | BSF | BSR | BT | BTC | BTR | BTS | CALL | CALLF | CBW | CDQ | CDQE | CLC | CLD | CLFLUSH | CLI | CLTS | CMC | CMOVA | CMOVB | CMOVBE | CMOVE | CMOVG | CMOVGE | CMOVL | CMOVLE | CMOVNB | CMOVNE | CMOVNO | CMOVNP | CMOVNS | CMOVO | CMOVP | CMOVS | CMP | CMPS | CMPXCHG | CMPXCHG16B | CMPXCHG8B | COMISD | COMISS | CPUID | CWD | CWDE | DAA | DAS | DEC | DIV | DIVPD | DIVPS | DIVSD | DIVSS | EMMS | ENTER | FABS | FADD | FADDP | FBLD | FBSTP | FCHS | FCLEX | FCMOVB | FCMOVBE | FCMOVE | FCMOVNB | FCMOVNBE | FCMOVNE | FCMOVNU | FCMOVU | FCOM | FCOMI | FCOMIP | FCOMP | FCOMPP | FDIV | FDIVP | FDIVR | FDIVRP | FFREE | FIADD | FICOM | FICOMP | FIDIV | FIDIVR | FILD | FIMUL | FINIT | FIST | FISTP | FISTPP | FISTTP | FISUB | FISUBR | FLD | FLD1 | FLDCW | FLDENV | FLDL2E | FLDL2T | FLDLG2 | FLDLN2 | FLDPI | FLDZ | FMUL | FMULP | FNOP | FRSTOR | FSAVE | FST | FSTCW | FSTENV | FSTP | FSTSW | FSUB | FSUBP | FSUBR | FSUBRP | FTST | FUCOM | FUCOMI | FUCOMIP | FUCOMP | FUCOMPP | FXAM | FXCH | FXRSTOR | FXSAVE | HADDPD | HADDPS | HLT | HSUBPD | HSUBPS | IDIV | IMUL | BSWAP | IN | INC | INS | INT | INT3 | INTO | INVD | INVLPG | IRET | JA | JB | JBE | JCXZ | JE | JG | JGE | JL | JLE | JMP | JMPF | JMPN | JNB | JNE | JNO | JNP | JNS | JO | JP | JS | LAHF | LAR | LDDQU | LDMXCSR | LDS | LEA | LEAVE | LES | LFENCE | LFS | LGDT | LGS | LIDT | LLDT | LMSW | LODS | LOOP | LOOPE | LOOPNE | LSL | LSS | LTR | MASKMOVQ | MAXPD | MAXPS | MAXSD | MAXSS | MFENCE | MINPD | MINPS | MINSD | MINSS | MONITOR | MOV | MOVAPD | MOVAPS | MOVDDUP | MOVHPD | MOVHPS | MOVLHPS | MOVLPD | MOVLPS | MOVLSDUP | MOVMSKPD | MOVMSKPS | MOVNTDQ | MOVNTPD | MOVNTPS | MOVNTQ | MOVQ | MOVS | MOVSD | MOVSLDUP | MOVSS | MOVSXB | MOVSXD | MOVSXW | MOVUPD | MOVUPS | MOVZXB | MOVZXW | MUL | MULPD | MULPS | MULSD | MULSS | MWAIT | NEG | NOP | NOT | OR | ORPD | ORPS | OUT | OUTS | PADDB | PADDD | PADDQ | PADDSB | PADDSW | PADDUSB | PADDUSW | PADDW | PAND | PANDN | PAUSE | PAVGB | PAVGW | PMADDWD | PMAXSW | PMAXUB | PMINSW | PMINUB | PMOVMSKB | PMULHUW | PMULHW | PMULLW | PMULUDQ | POP | POPA | POPAD | POPF | POPFD | POPFQ | POR | PREFETCHNTA | PREFETCHT0 | PREFETCHT1 | PREFETCHT2 | PSADBW | PSLLD | PSLLDQ | PSLLQ | PSLLW | PSRAD | PSRAW | PSRLD | PSRLDQ | PSRLQ | PSRLW | PSUBB | PSUBD | PSUBQ | PSUBSB | PSUBSQ | PSUBUSB | PSUBUSW | PSUBW | PUSH | PUSHA | PUSHAD | PUSHF | PUSHFD | PUSHFQ | PXOR | RCL | RCPPS | RCPSS | RCR | RDMSR | RDPMC | RDTSC | RET | RETF | ROL | ROR | RSM | RSQRTPS | RSQRTSS | SAHF | SAR | SBB | SCAS | SETA | SETB | SETBE | SETE | SETG | SETGE | SETL | SETLE | SETNB | SETNE | SETNO | SETNP | SETNS | SETO | SETP | SETS | SFENCE | SGDT | SHL | SHLD | SHR | SHRD | SIDT | SLDT | SMSW | SQRTPD | SQRTPS | SQRTSD | SQRTSS | STC | STD | STI | STMXCSR | STOS | STR | SUB | SUBPD | SUBPS | SUBSD | SUBSS | SWAPGS | SYSCALL | SYSENTER | SYSEXIT | TEST | UCOMISD | UCOMISS | UD2 | UNPCKHPD | UNPCKHPS | UNPCKLPD | UNPCKLPS | VERR | VERW | VMCALL | VMCLEAR | VMLAUNCH | VMPTRLD | VMPTRST | VMREAD | VMRESUME | VMWRITE | VMXOFF | VMXON | WAIT | WBINVD | WRMSR | XADD | XCHG | XLAT | XOR | XORPD | XORPS deriving (Show, Eq) -- Display an opcode in lower case. showOp :: Opcode -> String showOp = (map toLower) . show -- | All operands are in one of the following locations: -- -- - Constants in the instruction stream -- -- - Memory locations -- -- - Registers -- -- Memory locations are referred to by on of several addressing modes: -- -- - Absolute (address in instruction stream) -- -- - Register-indirect (address in register) -- -- - Register-indirect with displacement -- -- - Base-Index with scale -- -- - Base-Index with scale and displacement -- -- Displacements can be encoded as 8 or 32-bit immediates in the -- instruction stream, but are encoded as Int in instructions for -- simplicity. -- data Operand = OpImm Word32 -- ^ Immediate value | OpAddr Word32 InstrOperandSize -- ^ Absolute address | OpReg String Int -- ^ Register | OpFPReg Int -- ^ Floating-point register | OpInd String InstrOperandSize -- ^Register-indirect | OpIndDisp String Int InstrOperandSize -- ^ Register-indirect with displacement | OpBaseIndex String String Int InstrOperandSize -- ^ Base plus scaled index | OpIndexDisp String Int Int InstrOperandSize -- ^ Scaled index with displacement | OpBaseIndexDisp String String Int Int InstrOperandSize -- ^ Base plus scaled index with displacement deriving (Eq) -- Show an operand in AT&T style. showAttOps (OpImm w) = showImm w showAttOps (OpAddr w _) = showAddr w showAttOps (OpReg s num) = "%" ++ s showAttOps (OpFPReg 0) = "%st" showAttOps (OpFPReg i) = "%st(" ++ show i ++ ")" showAttOps (OpInd s _) = "(%" ++ s ++ ")" showAttOps (OpIndDisp s disp _) = show disp ++ "(%" ++ s ++ ")" showAttOps (OpBaseIndex b i s _) = "(%" ++ b ++ ",%" ++ i ++ "," ++ show s ++ ")" showAttOps (OpIndexDisp i s disp _) = show disp ++ "(%" ++ i ++ "," ++ show s ++ ")" showAttOps (OpBaseIndexDisp b i s disp _) = show disp ++ "(%" ++ b ++ ",%" ++ i ++ "," ++ show s ++ ")" -- Show an operand in Intel style. showIntelOps opsize (OpImm w) = showIntelImm w showIntelOps opsize (OpAddr w sz) = opInd sz ++ "[" ++ showIntelAddr w ++ "]" showIntelOps opsize (OpReg s num) = s showIntelOps opsize (OpFPReg 0) = "st" showIntelOps opsize (OpFPReg i) = "st(" ++ show i ++ ")" showIntelOps opsize (OpInd s sz) = opInd sz ++ "[" ++ s ++ "]" showIntelOps opsize (OpIndDisp s disp sz) = opInd sz ++ "[" ++ s ++ (if disp < 0 then "" else "+") ++ show disp ++ "]" showIntelOps opsize (OpBaseIndex b i s sz) = opInd sz ++ "[" ++ b ++ "+" ++ i ++ "*" ++ show s ++ "]" showIntelOps opsize (OpIndexDisp i s disp sz) = opInd sz ++ "[" ++ i ++ "*" ++ show s ++ (if disp < 0 then "" else "+") ++ show disp ++ "]" showIntelOps opsize (OpBaseIndexDisp b i s disp sz) = opInd sz ++ "[" ++ b ++ "+" ++ i ++ "*" ++ show s ++ (if disp < 0 then "" else "+") ++ show disp ++ "]" opInd OPNONE = "" opInd OP8 = "byte ptr " opInd OP16 = "word ptr " opInd OP32 = "dword ptr " opInd OPF32 = "dword ptr " opInd OP64 = "qword ptr " opInd OPF64 = "qword ptr " opInd OPF80 = "tbyte ptr " opInd OP128 = "dqword ptr " -- | Encodes the default and currently active operand or address size. Can -- be changed with the operand- or address-size prefixes 0x66 and 0x67. data OperandSize = BIT16 | BIT32 -- | Some opcodes can operate on data of several widths. This information -- is encoded in instructions using the following enumeration type.. data InstrOperandSize = OPNONE -- ^ No operand size specified | OP8 -- ^ 8-bit integer operand | OP16 -- ^ 16-bit integer operand | OP32 -- ^ 32-bit integer operand | OP64 -- ^ 64-bit integer operand | OP128 -- ^ 128-bit integer operand | OPF32 -- ^ 32-bit floating point operand | OPF64 -- ^ 64-bit floating point operand | OPF80 -- ^ 80-bit floating point operand deriving (Show, Eq) -- | The disassembly routines return lists of the following datatype. It -- encodes both invalid byte sequences (with a useful error message, if -- possible), or a valid instruction. Both variants contain the list of -- opcode bytes from which the instruction was decoded and the address of -- the instruction. data Instruction = BadInstruction Word8 String Int [Word8] -- ^ Invalid instruction | PseudoInstruction Int String -- ^ Pseudo instruction, e.g. label | Instruction { opcode :: Opcode, -- ^ Opcode of the instruction opsize :: InstrOperandSize, -- ^ Operand size, if any operands :: [Operand], -- ^ Instruction operands address :: Int, -- ^ Start address of instruction bytes ::[Word8] -- ^ Instruction bytes } -- ^ Valid instruction deriving (Eq) instance Show Instruction where show = showIntel data Instr = Bad Word8 String | Instr Opcode InstrOperandSize [Operand] -- Show an integer as an 8-digit hexadecimal number with leading zeroes. hex32 :: Int -> String hex32 i = let w :: Word32 w = fromIntegral i s = showHex w "" in take (8 - length s) (repeat '0') ++ s -- Show a byte as an 2-digit hexadecimal number with leading zeroes. hex8 :: Word8 -> String hex8 i = let s = showHex i "" in take (2 - length s) ['0','0'] ++ s -- | Instructions can be displayed either in Intel or AT&T style (like in -- GNU tools). -- -- Intel style: -- -- - Destination operand comes first, source second. -- -- - No register or immediate prefixes. -- -- - Memory operands are annotated with operand size. -- -- - Hexadecimal numbers are suffixed with @H@ and prefixed with @0@ if -- necessary. -- -- AT&T style: -- -- - Source operand comes first, destination second. -- -- - Register names are prefixes with @%@. -- -- - Immediates are prefixed with @$@. -- -- - Hexadecimal numbers are prefixes with @0x@ -- -- - Opcodes are suffixed with operand size, when ambiguous otherwise. data ShowStyle = IntelStyle -- ^ Show in Intel style | AttStyle -- ^ Show in AT&T style -- | Show an instruction in Intel style. showIntel :: Instruction -> [Char] showIntel (BadInstruction b desc pos bytes) = showPosBytes pos bytes ++ "(" ++ desc ++ ", byte=" ++ show b ++ ")" showIntel (PseudoInstruction pos s) = hex32 pos ++ " " ++ s showIntel (Instruction op opsize [] pos bytes) = showPosBytes pos bytes ++ showOp op showIntel (Instruction op opsize ops pos bytes) = showPosBytes pos bytes ++ enlarge (showOp op) 6 ++ " " ++ concat (intersperse "," (map (showIntelOps opsize) ops)) -- | Show an instruction in AT&T style. showAtt :: Instruction -> [Char] showAtt (BadInstruction b desc pos bytes) = showPosBytes pos bytes ++ "(" ++ desc ++ ", byte=" ++ show b ++ ")" showAtt (PseudoInstruction pos s) = hex32 pos ++ " " ++ s showAtt (Instruction op opsize [] pos bytes) = showPosBytes pos bytes ++ showOp op ++ showInstrSuffix [] opsize showAtt (Instruction op opsize ops pos bytes) = showPosBytes pos bytes ++ enlarge (showOp op ++ showInstrSuffix ops opsize) 6 ++ " " ++ concat (intersperse "," (map showAttOps (reverse ops))) showPosBytes pos bytes = hex32 pos ++ " " ++ enlarge (concat (intersperse " " (map hex8 bytes))) 30 enlarge s i = s ++ take (i - length s) (repeat ' ') opSizeSuffix OPNONE = "" opSizeSuffix OP8 = "b" opSizeSuffix OP16 = "w" opSizeSuffix OP32 = "l" opSizeSuffix OP64 = "q" opSizeSuffix OP128 = "dq" opSizeSuffix OPF32 = "s" opSizeSuffix OPF64 = "l" opSizeSuffix OPF80 = "t" showInstrSuffix [] sz = opSizeSuffix sz showInstrSuffix ((OpImm _) : os) s = showInstrSuffix os s --showInstrSuffix ((OpReg _ _) : []) s = "" showInstrSuffix ((OpReg _ _) : os) s = showInstrSuffix os OPNONE showInstrSuffix ((OpFPReg _) : os) s = showInstrSuffix os s showInstrSuffix ((OpAddr _ OPNONE) : os) s = showInstrSuffix os s showInstrSuffix ((OpAddr _ sz) : os) s = opSizeSuffix sz showInstrSuffix ((OpInd _ OPNONE) : os) s = showInstrSuffix os s showInstrSuffix ((OpInd _ sz) : os) s = opSizeSuffix sz showInstrSuffix ((OpIndDisp _ _ OPNONE) : os) s = showInstrSuffix os s showInstrSuffix ((OpIndDisp _ _ sz) : os) s = opSizeSuffix sz showInstrSuffix ((OpBaseIndex _ _ _ OPNONE) : os) s = showInstrSuffix os s showInstrSuffix ((OpBaseIndex _ _ _ sz) : os) s = opSizeSuffix sz showInstrSuffix ((OpIndexDisp _ _ _ OPNONE) : os) s = showInstrSuffix os s showInstrSuffix ((OpIndexDisp _ _ _ sz) : os) s = opSizeSuffix sz showInstrSuffix ((OpBaseIndexDisp _ _ _ _ OPNONE) : os) s = showInstrSuffix os s showInstrSuffix ((OpBaseIndexDisp _ _ _ _ sz) : os) s = opSizeSuffix sz -- showInstrOperandSize ops OPNONE | noRegop ops = "" -- showInstrOperandSize ops OP8 | noRegop ops = "b" -- showInstrOperandSize ops OP16 | noRegop ops = "w" -- showInstrOperandSize ops OP32 | noRegop ops = "l" -- showInstrOperandSize ops OPF32 | noRegop ops = "s" -- showInstrOperandSize ops OP64 | noRegop ops = "q" -- showInstrOperandSize ops OPF64 | noRegop ops = "l" -- showInstrOperandSize ops OPF80 | noRegop ops = "e" -- showInstrOperandSize ops OP128 | noRegop ops = "" -- showInstrOperandSize _ _ = "" -- noRegop ops = null (filter isRegop ops) -- isRegop (OpReg _ _) = True -- isRegop _ = False -- Show an immediate value in hexadecimal. showImm :: Word32 -> String showImm i = "$0x" ++ showHex i "" showIntelImm :: Word32 -> String showIntelImm i = let h = showHex i "H" (f:_) = h in (if isDigit f then "" else "0") ++ h -- Show an address in hexadecimal. showAddr i = let w :: Word32 w = fromIntegral i in "0x" ++ showHex w "" showIntelAddr i = let w :: Word32 w = fromIntegral i h = showHex w "H" (f:_) = h in (if isDigit f then "" else "0") ++ h -- | Disassemble a block of memory. Starting at the location -- pointed to by the given pointer, the given number of bytes are -- disassembled. disassembleBlock :: Ptr Word8 -> Int -> IO (Either ParseError [Instruction]) disassembleBlock ptr len = disassembleBlockWithConfig defaultConfig{confStartAddr = fromIntegral (minusPtr ptr nullPtr)} ptr len disassembleBlockWithConfig :: Config -> Ptr Word8 -> Int -> IO (Either ParseError [Instruction]) disassembleBlockWithConfig config ptr len = do l <- toList ptr len 0 [] parseInstructions (configToState config) (reverse l) where toList :: (Ptr Word8) -> Int -> Int -> [Word8] -> IO [Word8] toList ptr len idx acc | idx < len = do p <- peekByteOff ptr idx toList ptr len (idx + 1) (p : acc) -- return (p : r) toList ptr len idx acc | idx >= len = return acc -- | Disassemble the contents of the given array. disassembleArray :: (Monad m, IArray a Word8, Ix i) => a i Word8 -> m (Either ParseError [Instruction]) disassembleArray arr = disassembleArrayWithConfig defaultConfig arr disassembleArrayWithConfig :: (Monad m, IArray a Word8, Ix i) => Config -> a i Word8 -> m (Either ParseError [Instruction]) disassembleArrayWithConfig config arr = let l = elems arr in parseInstructions (configToState config) l -- | Disassemble the contents of the given list. disassembleList :: (Monad m) => [Word8] -> m (Either ParseError [Instruction]) disassembleList ls = disassembleListWithConfig defaultConfig ls disassembleListWithConfig :: (Monad m) => Config -> [Word8] -> m (Either ParseError [Instruction]) disassembleListWithConfig config ls = parseInstructions (configToState config) ls disassembleFile filename = disassembleFileWithConfig defaultConfig filename disassembleFileWithConfig config filename = do l <- readFile filename parseInstructions (configToState config) (map (fromIntegral . ord) l) instrToString insts style = map showInstr insts where showInstr = case style of IntelStyle -> showIntel AttStyle -> showAtt -- | Test function for disassembling the contents of a binary file and -- displaying it in the provided style ("IntelStyle" or "AttStyle"). testFile :: FilePath -> ShowStyle -> IO () testFile fname style = do l <- readFile fname i <- parseInstructions defaultState (map (fromIntegral . ord) l) case i of Left err -> putStrLn (show err) Right i' -> mapM_ (putStrLn . showInstr) i' where showInstr = case style of IntelStyle -> showIntel AttStyle -> showAtt -- This is the state maintained by the disassembler. data PState = PState { defaultBitMode :: OperandSize, operandBitMode :: OperandSize, addressBitMode :: OperandSize, in64BitMode :: Bool, prefixes :: [Word8], startAddr :: Word32 } data Config = Config {confDefaultBitMode :: OperandSize, confOperandBitMode :: OperandSize, confAddressBitMode :: OperandSize, confIn64BitMode :: Bool, confStartAddr :: Word32} defaultConfig = Config{ confDefaultBitMode = BIT32, confOperandBitMode = BIT32, confAddressBitMode = BIT32, confIn64BitMode = False, confStartAddr = 0} configToState (Config defBitMode opMode addrMode in64 confStartAddr) = defaultState{defaultBitMode = defBitMode, operandBitMode = opMode, addressBitMode = addrMode, in64BitMode = in64, startAddr = confStartAddr} -- Default state to be used if no other is given to the disassembly -- routines. defaultState = PState { defaultBitMode = BIT32, operandBitMode = BIT32, addressBitMode = BIT32, in64BitMode = False, prefixes = [], startAddr = 0} type Word8Parser a = GenParser Word8 PState a parseInstructions st l = return (runParser instructionSequence st "memory block" l) -- Parse a possibly empty sequence of instructions. instructionSequence = many instruction -- Parse a single instruction. The result is either a valid instruction -- or an indicator that there starts no valid instruction at the current -- position. instruction = do startPos' <- getPosition let startPos = sourceColumn startPos' - 1 input <- getInput st <- getState setState st{operandBitMode = defaultBitMode st, addressBitMode = defaultBitMode st, prefixes = []} many parsePrefix b <- anyWord8 case lookup b oneByteOpCodeMap of Just p -> do i <- p b endPos' <- getPosition let endPos = sourceColumn endPos' - 1 case i of Instr oc opsize ops -> do return $ Instruction oc opsize ops (fromIntegral (startAddr st) + startPos) (take (endPos - startPos) input) Bad b desc -> return $ BadInstruction b desc (fromIntegral (startAddr st) + startPos) (take (endPos - startPos) input) Nothing -> do Bad b desc <- parseInvalidOpcode b endPos' <- getPosition let endPos = sourceColumn endPos' - 1 return $ BadInstruction b desc (fromIntegral (startAddr st) + startPos) (take (endPos - startPos) input) toggleBitMode BIT16 = BIT32 toggleBitMode BIT32 = BIT16 rex_B = 0x1 rex_X = 0x2 rex_R = 0x4 rex_W = 0x8 -- Return True if the given REX prefix bit appears in the list of current -- instruction prefixes, False otherwise. hasREX rex st = let rexs = filter (\ b -> b >= 0x40 && b <= 0x4f) (prefixes st) in case rexs of (r : _) -> if r .&. rex == rex then True else False _ -> False -- Return True if the given prefix appears in the list of current -- instruction prefixes, False otherwise. hasPrefix b st = b `elem` prefixes st addPrefix b = do st <- getState setState st{prefixes = b : prefixes st} -- Parse a single prefix byte and remember it in the parser state. If in -- 64-bit mode, accept REX prefixes. parsePrefix = do (word8 0xf0 >>= addPrefix) -- LOCK <|> (word8 0xf2 >>= addPrefix) -- REPNE/REPNZ <|> (word8 0xf3 >>= addPrefix) -- REP or REPE/REPZ <|> (word8 0x2e >>= addPrefix) -- CS segment override <|> (word8 0x36 >>= addPrefix) -- SS segment override <|> (word8 0x3e >>= addPrefix) -- DS segment override <|> (word8 0x26 >>= addPrefix) -- ES segment override <|> (word8 0x64 >>= addPrefix) -- FS segment override <|> (word8 0x65 >>= addPrefix) -- GS segment override <|> (word8 0x2e >>= addPrefix) -- branch not taken <|> (word8 0x3e >>= addPrefix) -- branch taken <|> do word8 0x66 -- operand-size override st <- getState setState st{operandBitMode = toggleBitMode (operandBitMode st)} addPrefix 0x66 <|> do word8 0x67 -- address-size override st <- getState setState st{addressBitMode = toggleBitMode (addressBitMode st)} addPrefix 0x66 <|> do st <- getState if in64BitMode st then (word8 0x40 >>= addPrefix) <|> (word8 0x41 >>= addPrefix) <|> (word8 0x42 >>= addPrefix) <|> (word8 0x43 >>= addPrefix) <|> (word8 0x44 >>= addPrefix) <|> (word8 0x45 >>= addPrefix) <|> (word8 0x46 >>= addPrefix) <|> (word8 0x47 >>= addPrefix) <|> (word8 0x48 >>= addPrefix) <|> (word8 0x49 >>= addPrefix) <|> (word8 0x4a >>= addPrefix) <|> (word8 0x4b >>= addPrefix) <|> (word8 0x4c >>= addPrefix) <|> (word8 0x4d >>= addPrefix) <|> (word8 0x4e >>= addPrefix) <|> (word8 0x4f >>= addPrefix) else pzero -- Accept the single unsigned byte B. word8 b = do tokenPrim showByte nextPos testByte where showByte by = show by nextPos pos x xs = incSourceColumn pos 1 testByte by = if b == by then Just by else Nothing -- Accept and return a single unsigned byte. anyWord8 :: Word8Parser Word8 anyWord8 = do tokenPrim showByte nextPos testByte where showByte by = show by nextPos pos x xs = incSourceColumn pos 1 testByte by = Just by -- Accept any 8-bit signed byte. anyInt8 :: Word8Parser Int8 anyInt8 = do b <- anyWord8 let i :: Int8 i = fromIntegral b return i -- Accept any 16-bit unsigned word. anyWord16 = do b0 <- anyWord8 b1 <- anyWord8 let w0, w1 :: Word16 w0 = fromIntegral b0 w1 = fromIntegral b1 return $ w0 .|. (w1 `shiftL` 8) -- Accept any 16-bit signed integer. anyInt16 = do b0 <- anyWord16 let w0 :: Int16 w0 = fromIntegral b0 return $ w0 -- Accept a 32-bit unsigned word. anyWord32 = do b0 <- anyWord16 b1 <- anyWord16 let w0, w1 :: Word32 w0 = fromIntegral b0 w1 = fromIntegral b1 return $ w0 .|. (w1 `shiftL` 16) -- Accept a 32-bit signed integer. anyInt32 :: Word8Parser Int32 anyInt32 = do b0 <- anyWord32 let w0 :: Int32 w0 = fromIntegral b0 return $ w0 -- Accept a 64-bit unsigned word. anyWord64 :: Word8Parser Word64 anyWord64 = do b0 <- anyWord32 b1 <- anyWord32 let w0, w1 :: Word64 w0 = fromIntegral b0 w1 = fromIntegral b1 return $ w0 .|. (w1 `shiftL` 32) -- Accept a 64-bit signed integer. anyInt64 :: Word8Parser Int64 anyInt64 = do b0 <- anyWord64 let w0 :: Int64 w0 = fromIntegral b0 return $ w0 -- Accept a 16-bit word for 16-bit operand-size, a 32-bit word for -- 32-bit operand-size, or a 64-bit word in 64-bit mode. anyWordV :: Word8Parser Word64 anyWordV = do st <- getState if in64BitMode st then do w <- anyWord64 return w else case operandBitMode st of BIT16 -> do w <- anyWord16 let w' :: Word64 w' = fromIntegral w return w' BIT32 -> do w <- anyWord32 let w' :: Word64 w' = fromIntegral w return w' -- Accept a 16-bit word for 16-bit operand-size or a 32-bit word for -- 32-bit operand-size or 64-bit mode. anyWordZ :: Word8Parser Word32 anyWordZ = do st <- getState case operandBitMode st of BIT16 -> do w <- anyWord16 let w' :: Word32 w' = fromIntegral w return w' BIT32 -> anyWord32 -- Accept a 16-bit integer for 16-bit operand-size or a 32-bit word for -- 32-bit operand-size or 64-bit mode. anyIntZ :: Word8Parser Int32 anyIntZ = do st <- getState case operandBitMode st of BIT16 -> do w <- anyInt16 let w' :: Int32 w' = fromIntegral w return w' BIT32 -> anyInt32 -- Accept a 32-bit far address for 16-bit operand-size or a 48-bit far -- address for 32-bit operand-size. anyWordP :: Word8Parser Word64 anyWordP = do st <- getState case operandBitMode st of BIT16 -> do w <- anyWord32 let w' :: Word64 w' = fromIntegral w return w' _ -> do w1 <- anyWord32 w2 <- anyWord16 let w1', w2' :: Word64 w1' = fromIntegral w1 w2' = fromIntegral w2 return (w1' .|. (w2' `shiftL` 32)) oneByteOpCodeMap = [(0x00, parseALU ADD), (0x01, parseALU ADD), (0x02, parseALU ADD), (0x03, parseALU ADD), (0x04, parseALU ADD), (0x05, parseALU ADD), (0x06, invalidIn64BitMode (parsePUSHSeg "es")), (0x07, invalidIn64BitMode (parsePOPSeg "es")), (0x08, parseALU OR), (0x09, parseALU OR), (0x0a, parseALU OR), (0x0b, parseALU OR), (0x0c, parseALU OR), (0x0d, parseALU OR), (0x0e, invalidIn64BitMode (parsePUSHSeg "cs")), (0x0f, twoByteEscape), (0x10, parseALU ADC), (0x11, parseALU ADC), (0x12, parseALU ADC), (0x13, parseALU ADC), (0x14, parseALU ADC), (0x15, parseALU ADC), (0x16, invalidIn64BitMode (parsePUSHSeg "ss")), (0x17, invalidIn64BitMode (parsePOPSeg "ss")), (0x18, parseALU SBB), (0x19, parseALU SBB), (0x1a, parseALU SBB), (0x1b, parseALU SBB), (0x1c, parseALU SBB), (0x1d, parseALU SBB), (0x1e, invalidIn64BitMode (parsePUSHSeg "ds")), (0x1f, invalidIn64BitMode (parsePOPSeg "ds")), (0x20, parseALU AND), (0x21, parseALU AND), (0x22, parseALU AND), (0x23, parseALU AND), (0x24, parseALU AND), (0x25, parseALU AND), (0x26, parseInvalidPrefix), -- ES segment override prefix (0x27, invalidIn64BitMode (parseGeneric DAA OPNONE)), (0x28, parseALU SUB), (0x29, parseALU SUB), (0x2a, parseALU SUB), (0x2b, parseALU SUB), (0x2c, parseALU SUB), (0x2d, parseALU SUB), (0x2e, parseInvalidPrefix), -- CS segment override prefix (0x2f, invalidIn64BitMode (parseGeneric DAS OPNONE)), (0x30, parseALU XOR), (0x31, parseALU XOR), (0x32, parseALU XOR), (0x33, parseALU XOR), (0x34, parseALU XOR), (0x35, parseALU XOR), (0x36, parseInvalidPrefix), -- SS segment override prefix (0x37, invalidIn64BitMode (parseGeneric AAA OPNONE)), (0x38, parseALU CMP), (0x39, parseALU CMP), (0x3a, parseALU CMP), (0x3b, parseALU CMP), (0x3c, parseALU CMP), (0x3d, parseALU CMP), (0x3e, parseInvalidPrefix), -- DS segment override prefix (0x3f, invalidIn64BitMode (parseGeneric AAS OPNONE)), (0x40, invalidIn64BitMode parseINC), -- REX Prefix in 64-bit mode (0x41, invalidIn64BitMode parseINC), -- ... (0x42, invalidIn64BitMode parseINC), (0x43, invalidIn64BitMode parseINC), (0x44, invalidIn64BitMode parseINC), (0x45, invalidIn64BitMode parseINC), (0x46, invalidIn64BitMode parseINC), (0x47, invalidIn64BitMode parseINC), (0x48, invalidIn64BitMode parseDEC), (0x49, invalidIn64BitMode parseDEC), (0x4a, invalidIn64BitMode parseDEC), (0x4b, invalidIn64BitMode parseDEC), (0x4c, invalidIn64BitMode parseDEC), (0x4d, invalidIn64BitMode parseDEC), (0x4e, invalidIn64BitMode parseDEC), (0x4f, invalidIn64BitMode parseDEC), (0x50, parsePUSH), (0x51, parsePUSH), (0x52, parsePUSH), (0x53, parsePUSH), (0x54, parsePUSH), (0x55, parsePUSH), (0x56, parsePUSH), (0x57, parsePUSH), (0x58, parsePOP), (0x59, parsePOP), (0x5a, parsePOP), (0x5b, parsePOP), (0x5c, parsePOP), (0x5d, parsePOP), (0x5e, parsePOP), (0x5f, parsePOP), (0x60, invalidIn64BitMode parsePUSHA), (0x61, invalidIn64BitMode parsePOPA), (0x62, invalidIn64BitMode parseBOUND), (0x63, choose64BitMode parseARPL parseMOVSXD), -- MOVSXD in 64-bit mode (0x64, parseInvalidPrefix), -- FS segment override prefix (0x65, parseInvalidPrefix), -- GS segment override prefix (0x66, parseInvalidPrefix), -- operand-size prefix (0x67, parseInvalidPrefix), -- address-size prefix (0x68, parsePUSHImm), (0x69, parseIMUL), (0x6a, parsePUSHImm), (0x6b, parseIMUL), (0x6c, parseINS), (0x6d, parseINS), (0x6e, parseOUTS), (0x6f, parseOUTS), (0x70, parseJccShort), (0x71, parseJccShort), (0x72, parseJccShort), (0x73, parseJccShort), (0x74, parseJccShort), (0x75, parseJccShort), (0x76, parseJccShort), (0x77, parseJccShort), (0x78, parseJccShort), (0x79, parseJccShort), (0x7a, parseJccShort), (0x7b, parseJccShort), (0x7c, parseJccShort), (0x7d, parseJccShort), (0x7e, parseJccShort), (0x7f, parseJccShort), (0x80, parseGrp1), (0x81, parseGrp1), (0x82, invalidIn64BitMode parseGrp1), (0x83, parseGrp1), (0x84, parseTEST), (0x85, parseTEST), (0x86, parseXCHG), (0x87, parseXCHG), (0x88, parseMOV), (0x89, parseMOV), (0x8a, parseMOV), (0x8b, parseMOV), (0x8c, parseMOV), (0x8d, parseLEA), (0x8e, parseMOV), (0x8f, parseGrp1A), (0x90, parse0x90), -- NOP, PAUSE(F3), XCHG r8,rAX (0x91, parseXCHGReg), (0x92, parseXCHGReg), (0x93, parseXCHGReg), (0x94, parseXCHGReg), (0x95, parseXCHGReg), (0x96, parseXCHGReg), (0x97, parseXCHGReg), (0x98, parseCBW_CWDE_CDQE), (0x99, parseCWD_CDQ_CQO), (0x9a, invalidIn64BitMode parseCALLF), (0x9b, parseGeneric WAIT OPNONE), (0x9c, parsePUSHF), (0x9d, parsePOPF), (0x9e, parseGeneric SAHF OPNONE), (0x9f, parseGeneric LAHF OPNONE), (0xa0, parseMOVImm), (0xa1, parseMOVImm), (0xa2, parseMOVImm), (0xa3, parseMOVImm), (0xa4, parseMOVS), (0xa5, parseMOVS), (0xa6, parseCMPS), (0xa7, parseCMPS), (0xa8, parseTESTImm), (0xa9, parseTESTImm), (0xaa, parseSTOS), (0xab, parseSTOS), (0xac, parseLODS), (0xad, parseLODS), (0xae, parseSCAS), (0xaf, parseSCAS), (0xb0, parseMOVImmByteToByteReg), (0xb1, parseMOVImmByteToByteReg), (0xb2, parseMOVImmByteToByteReg), (0xb3, parseMOVImmByteToByteReg), (0xb4, parseMOVImmByteToByteReg), (0xb5, parseMOVImmByteToByteReg), (0xb6, parseMOVImmByteToByteReg), (0xb7, parseMOVImmByteToByteReg), (0xb8, parseMOVImmToReg), (0xb9, parseMOVImmToReg), (0xba, parseMOVImmToReg), (0xbb, parseMOVImmToReg), (0xbc, parseMOVImmToReg), (0xbd, parseMOVImmToReg), (0xbe, parseMOVImmToReg), (0xbf, parseMOVImmToReg), (0xc0, parseGrp2), (0xc1, parseGrp2), (0xc2, parseRETN), (0xc3, parseRETN), (0xc4, invalidIn64BitMode (parseLoadSegmentRegister LES)), (0xc5, invalidIn64BitMode (parseLoadSegmentRegister LDS)), (0xc6, parseGrp11), (0xc7, parseGrp11), (0xc8, parseENTER), (0xc9, parseGeneric LEAVE OPNONE), (0xca, parseGenericIw RETF), (0xcb, parseGeneric RETF OPNONE), (0xcc, parseGeneric INT3 OPNONE), (0xcd, parseGenericIb INT), (0xce, parseGeneric INTO OPNONE), (0xcf, parseGeneric IRET OPNONE), (0xd0, parseGrp2), (0xd1, parseGrp2), (0xd2, parseGrp2), (0xd3, parseGrp2), (0xd4, parseGenericIb AAM), (0xd5, parseGenericIb AAD), (0xd6, parseReserved), -- reserved (0xd7, parseGeneric XLAT OPNONE), (0xd8, parseESC), (0xd9, parseESC), (0xda, parseESC), (0xdb, parseESC), (0xdc, parseESC), (0xdd, parseESC), (0xde, parseESC), (0xdf, parseESC), (0xe0, parseGenericJb LOOPNE), (0xe1, parseGenericJb LOOPE), (0xe2, parseGenericJb LOOP), (0xe3, parseGenericJb JCXZ), -- depends on bit mode (0xe4, parseINImm), (0xe5, parseINImm), (0xe6, parseOUTImm), (0xe7, parseOUTImm), (0xe8, parseGenericJz CALL), (0xe9, parseGenericJz JMP), (0xea, parseJMPF), (0xeb, parseGenericJb JMP), (0xec, parseIN), (0xed, parseIN), (0xee, parseOUT), (0xef, parseOUT), (0xf0, parseInvalidPrefix), -- LOCK prefix (0xf1, parseReserved), -- reserved (0xf2, parseInvalidPrefix), -- REPNE prefix (0xf3, parseInvalidPrefix), -- REP/REPQ prefix (0xf4, parseGeneric HLT OPNONE), (0xf5, parseGeneric CMC OPNONE), (0xf6, parseGrp3), (0xf7, parseGrp3), (0xf8, parseGeneric CLC OPNONE), (0xf9, parseGeneric STC OPNONE), (0xfa, parseGeneric CLI OPNONE), (0xfb, parseGeneric STI OPNONE), (0xfc, parseGeneric CLD OPNONE), (0xfd, parseGeneric STD OPNONE), (0xfe, parseGrp4), (0xff, parseGrp5) ] parseInvalidPrefix b = do return $ Bad b "invalid prefix" parseInvalidOpcode b = do return $ Bad b "invalid opcode" parseReserved b = do return $ Bad b "reserved opcode" parseUndefined name b = do return $ Bad b ("undefined opcode: " ++ show name) parseUnimplemented b = do return $ Bad b "not implemented yet" invalidIn64BitMode p b = do st <- getState if in64BitMode st then return $ Bad b "invalid in 64-bit mode" else p b onlyIn64BitMode p b = do st <- getState if in64BitMode st then p b else return $ Bad b "only in 64-bit mode" choose64BitMode p32 p64 b = do st <- getState if in64BitMode st then p64 b else p32 b chooseOperandSize p16 p32 b = do st <- getState case operandBitMode st of BIT16 -> p16 b BIT32 -> p32 b chooseAddressSize p16 p32 b = do st <- getState case addressBitMode st of BIT16 -> p16 b BIT32 -> p32 b parseModRM = do b <- anyWord8 parseModRM' b parseModRM' b = do return (b `shiftR` 6, (b `shiftR` 3) .&. 7, (b .&. 7)) parseSIB = do b <- anyWord8 parseSIB' b parseSIB' b = do return (b `shiftR` 6, (b `shiftR` 3) .&. 7, (b .&. 7)) scaleToFactor 0 = 1 scaleToFactor 1 = 2 scaleToFactor 2 = 4 scaleToFactor 3 = 8 parseAddress32 :: InstrOperandSize -> Word8Parser (Operand, Operand, Word8, Word8, Word8) parseAddress32 s = do b <- anyWord8 parseAddress32' s b parseAddress32' :: InstrOperandSize -> Word8 -> Word8Parser (Operand, Operand, Word8, Word8, Word8) parseAddress32' opsize modrm = do (mod, reg_opc, rm) <- parseModRM' modrm st <- getState let opregnames = if in64BitMode st && hasREX rex_W st then regnames64 else case operandBitMode st of BIT16 -> regnames16 BIT32 -> regnames32 let addregnames = if in64BitMode st && hasREX rex_R st then regnames64 else case addressBitMode st of BIT16 -> regnames16 BIT32 -> regnames32 case mod of 0 -> case rm of 4 -> do (s, i, b) <- parseSIB case (i, b) of (4, 5) -> do disp <- anyWord32 return (OpAddr (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) (_, 5) -> do disp <- anyWord32 return (OpIndexDisp (addregnames !! fromIntegral i) (scaleToFactor s) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) (4, _) -> return (OpInd (addregnames !! fromIntegral b) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) (_ ,_) -> return (OpBaseIndex (addregnames !! fromIntegral b) (addregnames !! fromIntegral i) (scaleToFactor (fromIntegral s)) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) 5 -> do disp <- anyWord32 return (OpAddr disp opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) _ -> return (OpInd (addregnames !! fromIntegral rm) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) 1 -> case rm of 4 -> do (s, i, b) <- parseSIB disp <- anyInt8 case i of 4 -> return (OpIndDisp (addregnames !! fromIntegral b) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) _ -> return (OpBaseIndexDisp (addregnames !! fromIntegral b) (addregnames !! fromIntegral i) (scaleToFactor (fromIntegral s)) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) _ -> do disp <- anyInt8 return (OpIndDisp (addregnames !! fromIntegral rm) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) 2 -> case rm of 4 -> do (s, i, b) <- parseSIB disp <- anyInt32 case i of 4 -> return (OpIndDisp (addregnames !! fromIntegral b) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) _ -> return (OpBaseIndexDisp (addregnames !! fromIntegral b) (addregnames !! fromIntegral i) (scaleToFactor (fromIntegral s)) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) _ -> do disp <- anyInt32 return (OpIndDisp (addregnames !! fromIntegral rm) (fromIntegral disp) opsize, OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) 3 -> return (OpReg (opregnames !! fromIntegral rm) (fromIntegral rm), OpReg (opregnames !! fromIntegral reg_opc) (fromIntegral reg_opc), mod, reg_opc, rm) parseALU :: Opcode -> Word8 -> Word8Parser Instr parseALU op b = do opsize <- instrOperandSize case b .&. 0x07 of 0 -> do (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr op OP8 [op1, (OpReg (regnames8 !! fromIntegral reg)) (fromIntegral reg)] 1 -> do (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr op opsize [op1, op2] 2 -> do (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr op OP8 [(OpReg (regnames8 !! fromIntegral reg)) (fromIntegral reg), op1] 3 -> do (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr op opsize [op2, op1] 4 -> do b <- anyWord8 return $ Instr op OP8 [(OpReg "al" 0), (OpImm (fromIntegral b))] 5 -> do b <- anyWordZ rn <- registerName 0 return $ Instr op opsize [(OpReg rn 0), (OpImm b)] _ -> return $ Bad b "no ALU opcode (internal error)" parsePUSHSeg :: String -> Word8 -> Word8Parser Instr parsePUSHSeg r _ = do return $ Instr PUSH OP16 [(OpReg r 0)] -- FIXME: register number parsePOPSeg :: String -> Word8 -> Word8Parser Instr parsePOPSeg r _ = do return $ Instr POP OP16 [(OpReg r 0)] -- FIXME: register number parseGenericGvEw name b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP16 case op1 of OpReg _ num -> return $ Instr name OP16 [op2, OpReg (regnames16 !! num) num] _ -> return $ Instr name OP8 [op2, op1] parseGenericGvEb name b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 case op1 of OpReg _ num -> return $ Instr name OP8 [op2, OpReg (regnames8 !! num) num] _ -> return $ Instr name OP8 [op2, op1] parseGenericGvEv name b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr name opsize [op2, op1] parseGenericEvGv name b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr name opsize [op1, op2] parseGenericEbGb name b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr name OP8 [op1, (OpReg (regnames8 !! fromIntegral reg) (fromIntegral reg))] parseGenericEv name b = do opsize <- instrOperandSize (op1, op2, mod, _, rm) <- parseAddress32 opsize return $ Instr name opsize [op1] twoByteOpCodeMap = [(0x00, parseGrp6), (0x01, parseGrp7), (0x02, parseGenericGvEw LAR), (0x03, parseGenericGvEw LSL), (0x04, parseReserved), (0x05, onlyIn64BitMode (parseGeneric SYSCALL OPNONE)), (0x06, parseGeneric CLTS OPNONE), (0x07, onlyIn64BitMode (parseGeneric SYSCALL OPNONE)), (0x08, parseGeneric INVD OPNONE), (0x09, parseGeneric WBINVD OPNONE), (0x0a, parseReserved), (0x0b, parseUndefined UD2), (0x0c, parseReserved), (0x0d, parseGenericEv NOP), (0x0e, parseReserved), (0x0f, parseReserved), (0x10, parseMOVUPS), (0x11, parseMOVUPS), (0x12, parseMOVLPS), (0x13, parseMOVLPS), (0x14, parseUNPCKLPS), (0x15, parseUNPCKHPS), (0x16, parseMOVHPS), (0x17, parseMOVHPS), (0x18, parseGrp16), (0x19, parseReserved), (0x1a, parseReserved), (0x1b, parseReserved), (0x1c, parseReserved), (0x1d, parseReserved), (0x1e, parseReserved), (0x1f, parseGenericEv NOP), (0x20, parseMOVCtrlDebug), (0x21, parseMOVCtrlDebug), (0x22, parseMOVCtrlDebug), (0x23, parseMOVCtrlDebug), (0x24, parseReserved), (0x25, parseReserved), (0x26, parseReserved), (0x27, parseReserved), (0x28, parseMOVAPS), (0x29, parseMOVAPS), (0x2a, parseCVTI2PS), (0x2b, parseMOVNTPS), (0x2c, parseCVTTPS2PI), (0x2d, parseCVTPS2PI), (0x2e, parseUCOMISS), (0x2f, parseCOMISS), (0x30, parseGeneric WRMSR OPNONE), (0x31, parseGeneric RDTSC OPNONE), (0x32, parseGeneric RDMSR OPNONE), (0x33, parseGeneric RDPMC OPNONE), (0x34, parseGeneric SYSENTER OPNONE), (0x35, parseGeneric SYSEXIT OPNONE), (0x36, parseReserved), (0x37, parseReserved), (0x38, parseReserved), (0x39, parseReserved), (0x3a, parseReserved), (0x3b, parseReserved), (0x3c, parseReserved), (0x3d, parseReserved), (0x3e, parseReserved), (0x3f, parseReserved), (0x40, parseCMOVcc), (0x41, parseCMOVcc), (0x42, parseCMOVcc), (0x43, parseCMOVcc), (0x44, parseCMOVcc), (0x45, parseCMOVcc), (0x46, parseCMOVcc), (0x47, parseCMOVcc), (0x48, parseCMOVcc), (0x49, parseCMOVcc), (0x4a, parseCMOVcc), (0x4b, parseCMOVcc), (0x4c, parseCMOVcc), (0x4d, parseCMOVcc), (0x4e, parseCMOVcc), (0x4f, parseCMOVcc), (0x50, parseMOVSKPS), (0x51, parseSQRTPS), (0x52, parseRSQRTPS), (0x53, parseRCPPS), (0x54, parseANDPS), (0x55, parseANDNPS), (0x56, parseORPS), (0x57, parseXORPS), (0x58, parseADDPS), (0x59, parseMULPS), (0x5a, parseCVTPS2PD), (0x5b, parseCVTDQ2PS), (0x5c, parseSUBPS), (0x5d, parseMINPS), (0x5e, parseDIVPS), (0x5f, parseMAXPS), (0x60, parsePUNPCKLBW), (0x61, parsePUNPCKLWD), (0x62, parsePUNPCKLDQ), (0x63, parsePACKSSWB), (0x64, parsePCMPGTB), (0x65, parsePCMPGTW), (0x66, parsePCMPGTD), (0x67, parsePACKUSWB), (0x68, parsePUNPCKHBW), (0x69, parsePUNPCKHWD), (0x6a, parsePUNPCKHDQ), (0x6b, parsePACKSSDW), (0x6c, parsePUNPCKLQDQ), (0x6d, parsePUNPCKHQDQ), (0x6e, parseMOVD_Q), (0x6f, parseMOVQ), (0x70, parsePSHUFW), (0x71, parseGrp12), (0x72, parseGrp13), (0x73, parseGrp14), (0x74, parsePCMPEQB), (0x75, parsePCMPEQW), (0x76, parsePCMPEQD), (0x77, parseGeneric EMMS OPNONE), (0x78, parseVMREAD), (0x79, parseVMWRITE), (0x7a, parseReserved), (0x7b, parseReserved), (0x7c, parseHADDPS), (0x7d, parseHSUBPS), (0x7e, parseMOVD_Q), (0x7f, parseMOVQ), (0x80, parseJccLong), (0x81, parseJccLong), (0x82, parseJccLong), (0x83, parseJccLong), (0x84, parseJccLong), (0x85, parseJccLong), (0x86, parseJccLong), (0x87, parseJccLong), (0x88, parseJccLong), (0x89, parseJccLong), (0x8a, parseJccLong), (0x8b, parseJccLong), (0x8c, parseJccLong), (0x8d, parseJccLong), (0x8e, parseJccLong), (0x8f, parseJccLong), (0x90, parseSETcc), (0x91, parseSETcc), (0x92, parseSETcc), (0x93, parseSETcc), (0x94, parseSETcc), (0x95, parseSETcc), (0x96, parseSETcc), (0x97, parseSETcc), (0x98, parseSETcc), (0x99, parseSETcc), (0x9a, parseSETcc), (0x9b, parseSETcc), (0x9c, parseSETcc), (0x9d, parseSETcc), (0x9e, parseSETcc), (0x9f, parseSETcc), (0xa0, parsePUSHSeg "fs"), (0xa1, parsePOPSeg "fs"), (0xa2, parseGeneric CPUID OPNONE), (0xa3, parseGenericEvGv BT), (0xa4, parseSHLD), (0xa5, parseSHLD), (0xa6, parseReserved), (0xa7, parseReserved), (0xa8, parsePUSHSeg "gs"), (0xa9, parsePOPSeg "gs"), (0xaa, parseGeneric RSM OPNONE), (0xab, parseGenericEvGv BTS), (0xac, parseSHRD), (0xad, parseSHRD), (0xae, parseGrp15), (0xaf, parseGenericGvEv IMUL), (0xb0, parseGenericEbGb CMPXCHG), (0xb1, parseGenericEvGv CMPXCHG), (0xb2, parseLoadSegmentRegister LSS), (0xb3, parseGenericEvGv BTR), (0xb4, parseLoadSegmentRegister LFS), (0xb5, parseLoadSegmentRegister LGS), (0xb6, parseGenericGvEb MOVZXB), (0xb7, parseGenericGvEw MOVZXW), (0xb8, parseReserved), (0xb9, parseGrp10), (0xba, parseGrp8), (0xbb, parseGenericEvGv BTC), (0xbc, parseGenericGvEv BSF), (0xbd, parseGenericGvEv BSR), (0xbe, parseGenericGvEb MOVSXB), (0xbf, parseGenericGvEw MOVSXW), (0xc0, parseGenericEbGb XADD), (0xc1, parseGenericEvGv XADD), (0xc2, parseCMPPS), (0xc3, parseMOVNTI), (0xc4, parsePINSRW), (0xc5, parsePEXTRW), (0xc6, parseSHUFPS), (0xc7, parseGrp9), (0xc8, parseBSWAP), (0xc9, parseBSWAP), (0xca, parseBSWAP), (0xcb, parseBSWAP), (0xcc, parseBSWAP), (0xcd, parseBSWAP), (0xce, parseBSWAP), (0xcf, parseBSWAP), (0xd0, parseADDSUBPS), (0xd1, parsePSRLW), (0xd2, parsePSRLD), (0xd3, parsePSRLQ), (0xd4, parsePADDQ), (0xd5, parsePMULLW), (0xd6, parseMOVQ), (0xd7, parsePMOVMSKB), (0xd8, parsePSUBUSB), (0xd9, parsePSUBUSW), (0xda, parsePMINUB), (0xdb, parsePAND), (0xdc, parsePADDUSB), (0xdd, parsePADDUSW), (0xde, parsePMAXUB), (0xdf, parsePANDN), (0xe0, parsePAVGB), (0xe1, parsePSRAW), (0xe2, parsePSRAD), (0xe3, parsePAVGW), (0xe4, parsePMULHUW), (0xe5, parsePMULHW), (0xe6, parseCVTPD2DQ), (0xe7, parseMOVNTQ), (0xe8, parsePSUBSB), (0xe9, parsePSUBSQ), (0xea, parsePMINSW), (0xeb, parsePOR), (0xec, parsePADDSB), (0xed, parsePADDSW), (0xee, parsePMAXSW), (0xef, parsePXOR), (0xf0, parseLDDQU), (0xf1, parsePSLLW), (0xf2, parsePSLLD), (0xf3, parsePSLLQ), (0xf4, parsePMULUDQ), (0xf5, parsePMADDWD), (0xf6, parsePSADBW), (0xf7, parseMASKMOVQ), (0xf8, parsePSUBB), (0xf9, parsePSUBW), (0xfa, parsePSUBD), (0xfb, parsePSUBQ), (0xfc, parsePADDB), (0xfd, parsePADDW), (0xfe, parsePADDD), (0xff, parseReserved) ] twoByteEscape :: Word8 -> Word8Parser Instr twoByteEscape b1 = do b <- anyWord8 case lookup b twoByteOpCodeMap of Just p -> p b Nothing -> return $ Bad b "invalid two-byte opcode" parseGeneric name opsize _ = do return (Instr name opsize []) parseGenericIb name b = do b <- anyWord8 return $ Instr name OP8 [OpImm (fromIntegral b)] parseGenericIw name _ = do w <- anyWord16 pos <- getPosition return $ Instr name OP16 [OpImm (fromIntegral w)] parseGenericJb name _ = do b <- anyInt8 pos <- getPosition st <- getState return $ Instr name OPNONE [OpAddr (fromIntegral ((fromIntegral b + sourceColumn pos - 1)) + (startAddr st)) OPNONE] parseGenericJz name _ = do b <- anyIntZ pos <- getPosition st <- getState return $ Instr name OPNONE [OpAddr (fromIntegral ((fromIntegral b + sourceColumn pos - 1)) + (startAddr st)) OPNONE] parseINC b = do opsize <- instrOperandSize let reg = b .&. 0x0f rn <- registerName (fromIntegral reg) return $ Instr INC opsize [OpReg rn (fromIntegral reg)] parseDEC b = do opsize <- instrOperandSize let reg = (b .&. 0x0f) - 8 rn <- registerName (fromIntegral reg) return $ Instr DEC opsize [OpReg rn (fromIntegral reg)] parsePUSH b = let reg = b .&. 0x0f in do st <- getState rn <- registerName (fromIntegral reg) opsize <- instrOperandSize if hasREX rex_R st then return $ Instr PUSH opsize [OpReg ("r" ++ show (reg + 8)) (fromIntegral reg)] else return $ Instr PUSH opsize [OpReg rn (fromIntegral reg)] parsePOP b = let reg = (b .&. 0x0f) - 8 in do st <- getState rn <- registerName (fromIntegral reg) opsize <- instrOperandSize if hasREX rex_R st then return $ Instr POP opsize [OpReg ("r" ++ show (reg + 8)) (fromIntegral reg)] else return $ Instr POP opsize [OpReg rn (fromIntegral reg)] parsePUSHA = do chooseOperandSize (\ _ -> return $ Instr PUSHA OPNONE []) (\ _ -> return $ Instr PUSHAD OPNONE []) parsePOPA = do chooseOperandSize (\ _ -> return $ Instr POPA OPNONE []) (\ _ -> return $ Instr POPAD OPNONE []) parseBOUND b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE return $ Instr BOUND OPNONE [op2, op1] parseARPL b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP16 let rn = regnames16 !! fromIntegral reg return $ Instr ARPL OPNONE [op1, (OpReg rn (fromIntegral reg))] parseMOVSXD b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE return $ Instr MOVSXD OPNONE [op2, op1] parsePUSHImm 0x68 = do w <- anyWordZ opsize <- instrOperandSize return $ Instr PUSH opsize [OpImm w] parsePUSHImm 0x6a = do w <- anyWord8 opsize <- instrOperandSize return $ Instr PUSH opsize [OpImm (fromIntegral w)] parseIMUL 0x69 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize imm <- anyWordZ return $ Instr IMUL opsize [op2, op1, OpImm imm] parseIMUL 0x6b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize imm <- anyWord8 return $ Instr IMUL opsize [op2, op1, OpImm (fromIntegral imm)] parseINS 0x6c = return $ Instr INS OP8 [] parseINS b@0x6d = chooseOperandSize (\ _ -> return $ Instr INS OP16 []) (\ _ -> return $ Instr INS OP32 []) b parseOUTS 0x6e = return $ Instr OUTS OP8 [] parseOUTS b@0x6f = chooseOperandSize (\ _ -> return $ Instr OUTS OP16 []) (\ _ -> return $ Instr OUTS OP32 []) b parseJccShort b = do disp <- anyInt8 pos <- getPosition st <- getState return $ Instr (jccname (b .&. 0xf)) OPNONE [OpAddr (fromIntegral (fromIntegral disp + sourceColumn pos - 1) + (startAddr st)) OPNONE] parseTEST 0x84 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr TEST OP8 [op1, OpReg (regnames8 !! fromIntegral reg) (fromIntegral reg)] parseTEST 0x85 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr TEST opsize [op1, op2] parseXCHG 0x86 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr XCHG OP8 [op1, OpReg (regnames8 !! fromIntegral reg) (fromIntegral reg)] parseXCHG 0x87 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr XCHG opsize[op1, op2] parseMOV 0x88 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr MOV OP8 [op1, OpReg (regnames8 !! fromIntegral reg) (fromIntegral reg)] parseMOV 0x89 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr MOV opsize [op1, op2] parseMOV 0x8a = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr MOV OP8 [OpReg (regnames8 !! fromIntegral reg) (fromIntegral reg), op1] parseMOV 0x8b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr MOV opsize [op2, op1] parseMOV 0x8c = do (op1, op2, mod, reg, rm) <- parseAddress32 OP16 let rn = segregnames !! (fromIntegral reg) return $ Instr MOV OP16 [op1, OpReg rn (fromIntegral reg)] parseMOV 0x8e = do (op1, op2, mod, reg, rm) <- parseAddress32 OP16 let rn = segregnames !! (fromIntegral reg) return $ Instr MOV OP16 [OpReg rn (fromIntegral reg), op1] parseLEA b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE return $ Instr LEA OPNONE [op2, op1] parse0x90 b = do st <- getState if hasPrefix 0xf3 st then return $ Instr PAUSE OPNONE [] else do st <- getState if in64BitMode st then parseXCHGReg b else return $ Instr NOP OPNONE [] -- FIXME: Register name handling not quite right parseXCHGReg :: Word8 -> Word8Parser Instr parseXCHGReg b = let reg = b .&. 0x0f in do st <- getState if hasREX rex_R st then return $ Instr XCHG OP64 [OpReg "rax" 0, OpReg ("r" ++ show (reg + 8)) (fromIntegral reg)] else do rn <- registerName (fromIntegral reg) return $ Instr XCHG OP64 [OpReg "rax" 0, OpReg rn (fromIntegral reg)] parseCBW_CWDE_CDQE b = do st <- getState if in64BitMode st then if hasREX rex_W st then return $ Instr CDQE OPNONE [] else return $ Instr CWDE OPNONE [] else chooseOperandSize (\ _ -> return $ Instr CBW OPNONE []) (\ _ -> return $ Instr CWDE OPNONE []) b parseCWD_CDQ_CQO b = do st <- getState if in64BitMode st then if hasREX rex_W st then return $ Instr CDQE OPNONE [] else return $ Instr CDQ OPNONE [] else chooseOperandSize (\ _ -> return $ Instr CWD OPNONE []) (\ _ -> return $ Instr CDQ OPNONE []) b parseCALLF b = do w <- anyWord32 s <- anyWord16 return $ Instr CALLF OPNONE [OpImm (fromIntegral w), OpImm (fromIntegral s)] -- FIXME: Check default/operand sizes. parsePUSHF b = do st <- getState if in64BitMode st then chooseOperandSize (\ _ -> return $ Instr PUSHF OPNONE []) (\ _ -> return $ Instr PUSHFQ OPNONE []) b else chooseOperandSize (\ _ -> return $ Instr PUSHF OPNONE []) (\ _ -> return $ Instr PUSHFD OPNONE []) b parsePOPF b = do st <- getState if in64BitMode st then chooseOperandSize (\ _ -> return $ Instr POPF OPNONE []) (\ _ -> return $ Instr POPFQ OPNONE []) b else chooseOperandSize (\ _ -> return $ Instr POPF OPNONE []) (\ _ -> return $ Instr POPFD OPNONE []) b parseJMPF b = do w <- anyWord32 return $ Instr JMPF OPNONE [OpImm w] parseMOVImm b@0xa0 = do chooseAddressSize (\ _ -> do w <- anyWord16 return $ Instr MOV OP8 [OpReg "al" 0, OpImm (fromIntegral w)]) (\ _ -> do w <- anyWord32 return $ Instr MOV OP8 [OpReg "al" 0, OpImm w]) b parseMOVImm b@0xa1 = do opsize <- instrOperandSize reg <- registerName 0 chooseAddressSize (\ _ -> do w <- anyWord16 return $ Instr MOV opsize [OpReg reg 0, OpImm (fromIntegral w)]) (\ _ -> do w <- anyWord32 return $ Instr MOV opsize [OpReg reg 0, OpImm w]) b parseMOVImm b@0xa2 = do chooseAddressSize (\ _ -> do w <- anyWord16 return $ Instr MOV OP8 [OpImm (fromIntegral w), OpReg "al" 0]) (\ _ -> do w <- anyWord32 return $ Instr MOV OP8 [OpImm w, OpReg "al" 0]) b parseMOVImm b@0xa3 = do opsize <- instrOperandSize reg <- registerName 0 chooseAddressSize (\ _ -> do w <- anyWord16 return $ Instr MOV opsize [OpImm (fromIntegral w), OpReg reg 0]) (\ _ -> do w <- anyWord32 return $ Instr MOV opsize [OpImm w, OpReg reg 0]) b parseMOVS 0xa4 = return $ Instr MOVS OP8 [] parseMOVS b@0xa5 = do st <- getState opsize <- instrOperandSize return $ Instr MOVS opsize [] parseCMPS 0xa6 = return $ Instr CMPS OP8 [] parseCMPS 0xa7 = do st <- getState opsize <- instrOperandSize return $ Instr CMPS opsize [] parseTESTImm 0xa8 = do imm <- anyWord8 return $ Instr TEST OP8 [OpReg "al" 0, OpImm (fromIntegral imm)] parseTESTImm 0xa9 = do imm <- anyWordZ rn <- registerName 0 opsize <- instrOperandSize return $ Instr TEST opsize [OpReg rn 0, OpImm imm] parseSTOS 0xaa = return $ Instr STOS OP8 [] parseSTOS b@0xab = do st <- getState opsize <- instrOperandSize if in64BitMode st then if hasREX rex_W st then return $ Instr STOS opsize [] else chooseOperandSize (\ _ -> return $ Instr STOS opsize []) (\ _ -> return $ Instr STOS opsize []) b else chooseOperandSize (\ _ -> return $ Instr STOS opsize []) (\ _ -> return $ Instr STOS opsize []) b parseLODS 0xac = return $ Instr LODS OP8 [] parseLODS b@0xad = do st <- getState opsize <- instrOperandSize if in64BitMode st then if hasREX rex_W st then return $ Instr LODS opsize [] else chooseOperandSize (\ _ -> return $ Instr LODS opsize []) (\ _ -> return $ Instr LODS opsize []) b else chooseOperandSize (\ _ -> return $ Instr LODS opsize []) (\ _ -> return $ Instr LODS opsize []) b parseSCAS 0xae = return $ Instr SCAS OP8 [] parseSCAS b@0xaf = do st <- getState opsize <- instrOperandSize if in64BitMode st then if hasREX rex_W st then return $ Instr SCAS opsize [] else chooseOperandSize (\ _ -> return $ Instr SCAS opsize []) (\ _ -> return $ Instr SCAS opsize []) b else chooseOperandSize (\ _ -> return $ Instr SCAS opsize []) (\ _ -> return $ Instr SCAS opsize []) b parseMOVImmByteToByteReg :: Word8 -> Word8Parser Instr parseMOVImmByteToByteReg b = do let reg = b .&. 0x0f st <- getState imm <- anyWord8 if hasREX rex_R st then return $ Instr MOV OP8 [OpReg ("r" ++ show reg ++ "l") (fromIntegral reg), OpImm (fromIntegral imm)] else return $ Instr MOV OP8 [OpReg (regnames8 !! (fromIntegral reg)) (fromIntegral reg), OpImm (fromIntegral imm)] parseMOVImmToReg :: Word8 -> Word8Parser Instr parseMOVImmToReg b = do let reg = (b .&. 0x0f - 8) imm <- anyWordV opsize <- instrOperandSize rn <- registerName (fromIntegral reg) return $ Instr MOV opsize [OpReg rn (fromIntegral reg), OpImm (fromIntegral imm)] parseRETN 0xc2 = do w <- anyWord16 return $ Instr RET OPNONE [OpImm (fromIntegral w)] parseRETN 0xc3 = return $ Instr RET OPNONE [] parseLoadSegmentRegister opcode b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE return $ Instr opcode OPNONE [op2, op1] parseENTER b = do w <- anyWord16 b <- anyWord8 return $ Instr ENTER OPNONE [OpImm (fromIntegral w), OpImm (fromIntegral b)] -- Floating-point operations. These can probably shortened by doing some -- arithmetic/logical tricks on the opcodes, but since the instruction -- set is still quite irregular (even though much better than the integer -- ops), I haven't bothered yet. parseESC 0xd8 = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1, op2, mod, reg, rm) <- parseAddress32' OPF32 modrm return $ Instr (ops !! fromIntegral reg) OPF32 [op1] else if (modrm .&. 0x0f) < 0x8 then return $ Instr (ops !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral (modrm .&. 0x0f))] else return $ Instr (ops !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral ((modrm .&. 0x0f) - 8))] where ops = [FADD, FMUL, FCOM, FCOMP, FSUB, FSUBR, FDIV, FDIVR] parseESC b@0xd9 = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1', op2, mod, reg, rm) <- parseAddress32' OPNONE modrm let op1 = case op1' of OpAddr a _ -> OpAddr a (opsizes !! fromIntegral reg) op -> op return $ Instr (lowOps !! fromIntegral reg) (opsizes !! fromIntegral reg) [op1] else if (modrm < 0xd0) then if (modrm .&. 0x0f) < 8 then return $ Instr (ops !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral (modrm .&. 0x0f))] else return $ Instr (ops !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral (modrm .&. 0x0f) - 8)] else case modrm of 0xd0 -> return $ Instr FNOP OPNONE [] 0xe0 -> return $ Instr FCHS OPNONE [] 0xe1 -> return $ Instr FABS OPNONE [] 0xe4 -> return $ Instr FTST OPNONE [] 0xe5 -> return $ Instr FXAM OPNONE [] 0xe8 -> return $ Instr FLD1 OPNONE [] 0xe9 -> return $ Instr FLDL2T OPNONE [] 0xea -> return $ Instr FLDL2E OPNONE [] 0xeb -> return $ Instr FLDPI OPNONE [] 0xec -> return $ Instr FLDLG2 OPNONE [] 0xed -> return $ Instr FLDLN2 OPNONE [] 0xee -> return $ Instr FLDZ OPNONE [] _ -> parseInvalidOpcode b where lowOps = [FLD, InvalidOpcode, FST, FSTP, FLDENV, FLDCW, FSTENV, FSTCW] opsizes = [OPF32, OPNONE, OPF32, OPF32, OPNONE, OPNONE, OPNONE, OPNONE] ops = [FLD, FXCH] parseESC 0xda = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1, op2, mod, reg, rm) <- parseAddress32' OPNONE modrm return $ Instr (ops !! fromIntegral reg) OPNONE [op1] else if (modrm < 0xe0) then return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral (modrm .&. 0x0f))] else case modrm of 0xe1 -> return $ Instr FUCOMPP OPNONE [] _ -> parseInvalidOpcode 0xda where ops = [FIADD, FIMUL, FICOM, FICOMP, FISUB, FISUBR, FIDIV, FIDIVR] ops' = [FCMOVB, FCMOVE, FCMOVBE, FCMOVU] parseESC 0xdb = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1', op2, mod, reg, rm) <- parseAddress32' OPNONE modrm let op1 = case op1' of OpAddr a _ -> OpAddr a (opsizes !! fromIntegral reg) op -> op return $ Instr (ops !! fromIntegral reg) (opsizes !! fromIntegral reg) [op1] else case modrm of 0xe2 -> return $ Instr FCLEX OPNONE [] 0xe3 -> return $ Instr FINIT OPNONE [] _ -> if (modrm .&. 0x0f) < 0x8 then return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral (modrm .&. 0x0f))] else return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg 0, OpFPReg (fromIntegral ((modrm .&. 0x0f) - 8))] where ops = [FILD, FISTP, FIST, FISTP, InvalidOpcode, FLD, InvalidOpcode, FSTP] opsizes = [OP32, OP32, OP32, OP32, OPNONE, OPF80, OPNONE, OPF80] ops' = [FCMOVNB, FCMOVNE, FCMOVNBE, FCMOVNU, InvalidOpcode, FUCOMI, FCOMI, InvalidOpcode] parseESC 0xdc = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1, op2, mod, reg, rm) <- parseAddress32' OPNONE modrm return $ Instr (ops !! fromIntegral reg) OPNONE [op1] else if modrm >= 0xd0 && modrm < 0xe0 then parseInvalidOpcode 0xdc else if (modrm .&. 0x0f) < 0x8 then return $ Instr (ops !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral (modrm .&. 0x0f)), OpFPReg 0] else return $ Instr (ops !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral ((modrm .&. 0x0f) - 8)), OpFPReg 0] where ops = [FADD, FMUL, FCOM, FCOMP, FSUB, FSUBR, FDIV, FDIVR] parseESC 0xdd = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1', op2, mod, reg, rm) <- parseAddress32' OPNONE modrm let op1 = case op1' of OpAddr a _ -> OpAddr a (opsizes !! fromIntegral reg) op -> op return $ Instr (ops !! fromIntegral reg) (opsizes !! fromIntegral reg) [op1] else if (modrm >= 0xc8) && modrm <= 0xd0 || (modrm >= 0xf0 && modrm < 0xff) then parseInvalidOpcode 0xdc else if (modrm .&. 0x0f) < 0x8 then return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral (modrm .&. 0x0f)), OpFPReg 0] else return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral ((modrm .&. 0x0f) - 8)), OpFPReg 0] where ops = [FLD, FISTTP, FST, FSTP, FRSTOR, InvalidOpcode, FSAVE, FSTSW] opsizes = [OPF64, OP64, OPF64, OPF64, OPNONE, OPNONE, OPNONE, OP16] ops' = [FFREE, InvalidOpcode, FST, FSTP, FUCOM, FUCOMP] parseESC 0xde = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1, op2, mod, reg, rm) <- parseAddress32' OPNONE modrm return $ Instr (ops !! fromIntegral reg) OPNONE [op1] else if modrm >= 0xd0 && modrm <= 0xe0 then case modrm of 0xd9 -> return $ Instr FCOMPP OPNONE [] _ -> parseInvalidOpcode 0xde else if (modrm .&. 0x0f) < 0x8 then return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral (modrm .&. 0x0f)), OpFPReg 0] else return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral ((modrm .&. 0x0f) - 8)), OpFPReg 0] where ops = [FIADD, FIMUL, FICOM, FICOMP, FISUB, FISUBR, FIDIV, FIDIVR] ops' = [FADDP, FMULP, InvalidOpcode, InvalidOpcode, FSUBRP, FSUBP, FDIVRP, FDIVP] parseESC 0xdf = do modrm <- anyWord8 let modrm' :: Word8 modrm' = modrm - 0xc0 if modrm <= 0xbf then do (op1, op2, mod, reg, rm) <- parseAddress32' OPNONE modrm return $ Instr (ops !! fromIntegral reg) OPNONE [op1] else case modrm of 0xe0 -> return $ Instr FSTSW OPNONE [OpReg "ax" 0] _ -> if (modrm >= 0xe8 && modrm <= 0xef) || (modrm >= 0xf0 && modrm <= 0xf7) then if (modrm .&. 0x0f) < 0x8 then return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral (modrm .&. 0x0f)), OpFPReg 0] else return $ Instr (ops' !! fromIntegral ((modrm' `shiftR` 3))) OPNONE [OpFPReg (fromIntegral ((modrm .&. 0x0f) - 8)), OpFPReg 0] else parseInvalidOpcode 0xdf where ops = [FILD, FISTPP, FIST, FISTP, FBLD, FILD, FBSTP, FISTP] ops' = [InvalidOpcode, InvalidOpcode, InvalidOpcode, InvalidOpcode, InvalidOpcode, FUCOMIP, FCOMIP, InvalidOpcode] parseINImm 0xe4 = do b <- anyWord8 return $ Instr IN OP8 [OpReg "al" 0, OpImm (fromIntegral b)] parseINImm 0xe5 = do b <- anyWord8 rn <- registerName 0 opsize <- instrOperandSize return $ Instr IN opsize [OpReg rn 0, OpImm (fromIntegral b)] parseOUTImm 0xe6 = do b <- anyWord8 return $ Instr OUT OP8 [OpImm (fromIntegral b), OpReg "al" 0] parseOUTImm 0xe7 = do b <- anyWord8 rn <- registerName 0 opsize <- instrOperandSize return $ Instr OUT opsize [OpImm (fromIntegral b), OpReg rn 0] parseIN 0xec = do return $ Instr IN OP8 [OpReg "al" 0, OpReg "dx" 2] parseIN 0xed = do rn <- registerName 0 opsize <- instrOperandSize return $ Instr IN opsize [OpReg rn 0, OpReg "dx" 2] parseOUT 0xee = do return $ Instr OUT OP8 [OpReg "dx" 2, OpReg "al" 0] parseOUT 0xef = do rn <- registerName 0 opsize <- instrOperandSize return $ Instr OUT opsize [OpReg "dx" 2, OpReg rn 0] -- Return the name of the register encoded with R. Take 64-bit mode and -- possible REX and operand-size prefixes into account. registerName r = do st <- getState if in64BitMode st && hasREX rex_R st then return $ "r" ++ show (r + 8) else case operandBitMode st of BIT16 -> return $ regnames16 !! r BIT32 -> return $ regnames32 !! r instrOperandSize = do st <- getState if in64BitMode st && hasREX rex_W st then return $ OP64 else case operandBitMode st of BIT16 -> return OP16 BIT32 -> return OP32 regnames8 = ["al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"] regnames16 = ["ax", "cx", "dx", "bx", "sp", "bp", "si", "di"] regnames32 = ["eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"] regnames64 = ["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"] segregnames = ["es", "cs", "ss", "ds", "fs", "gs", "", ""] mmxregs = ["mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"] xmmregs = ["xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"] jccname 0 = JO jccname 1 = JNO jccname 2 = JB jccname 3 = JNB jccname 4 = JE jccname 5 = JNE jccname 6 = JBE jccname 7 = JA jccname 8 = JS jccname 9 = JNS jccname 10 = JP jccname 11 = JNP jccname 12 = JL jccname 13 = JGE jccname 14 = JLE jccname 15 = JG setccname 0 = SETO setccname 1 = SETNO setccname 2 = SETB setccname 3 = SETNB setccname 4 = SETE setccname 5 = SETNE setccname 6 = SETBE setccname 7 = SETA setccname 8 = SETS setccname 9 = SETNS setccname 10 = SETP setccname 11 = SETNP setccname 12 = SETL setccname 13 = SETGE setccname 14 = SETLE setccname 15 = SETG cmovccname 0 = CMOVO cmovccname 1 = CMOVNO cmovccname 2 = CMOVB cmovccname 3 = CMOVNB cmovccname 4 = CMOVE cmovccname 5 = CMOVNE cmovccname 6 = CMOVBE cmovccname 7 = CMOVA cmovccname 8 = CMOVS cmovccname 9 = CMOVNS cmovccname 10 = CMOVP cmovccname 11 = CMOVNP cmovccname 12 = CMOVL cmovccname 13 = CMOVGE cmovccname 14 = CMOVLE cmovccname 15 = CMOVG parseGrp1 0x80 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 immb <- anyWord8 return $ Instr (aluOps !! fromIntegral reg) OP8 [op1, OpImm (fromIntegral immb)] parseGrp1 0x81 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize immb <- anyWordZ return $ Instr (aluOps !! fromIntegral reg) opsize [op1, OpImm (fromIntegral immb)] parseGrp1 0x82 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 immb <- anyWord8 return $ Instr (aluOps !! fromIntegral reg) OP8 [op1, OpImm (fromIntegral immb)] parseGrp1 0x83 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize immb <- anyWord8 return $ Instr (aluOps !! fromIntegral reg) opsize [op1, OpImm (fromIntegral immb)] aluOps = [ADD, OR, ADC, SBB, AND, SUB, XOR, CMP] parseGrp1A b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize case reg of 0 -> return $ Instr POP opsize [op1] _ -> parseInvalidOpcode b parseGrp2 0xc0 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 immb <- anyWord8 return $ Instr (shiftOps !! fromIntegral reg) OP8 [op1, OpImm (fromIntegral immb)] parseGrp2 0xc1 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize imm <- anyWord8 return $ Instr (shiftOps !! fromIntegral reg) opsize [op1, OpImm (fromIntegral imm)] parseGrp2 0xd0 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr (shiftOps !! fromIntegral reg) OP8 [op1, OpImm 1] parseGrp2 0xd1 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr (shiftOps !! fromIntegral reg) opsize [op1, OpImm 1] parseGrp2 0xd2 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 return $ Instr (shiftOps !! fromIntegral reg) OP8 [op1, OpReg "cl" 1] parseGrp2 0xd3 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr (shiftOps !! fromIntegral reg) opsize [op1, OpReg "cl" 1] shiftOps = [ROL, ROR, RCL, RCR, SHL, SHR, InvalidOpcode, SAR] parseGrp3 0xf6 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 case reg of 0 -> do imm <- anyWord8 return $ Instr TEST OP8 [op1, OpImm (fromIntegral imm)] 1 -> parseInvalidOpcode 0xf6 2 -> return $ Instr NOT OP8 [op1] 3 -> return $ Instr NEG OP8 [op1] 4 -> return $ Instr MUL OP8 [OpReg "al" 0, op1] 5 -> return $ Instr IMUL OP8 [OpReg "al" 0, op1] 6 -> return $ Instr DIV OP8 [OpReg "al" 0, op1] 7 -> return $ Instr IDIV OP8 [OpReg "al" 0, op1] parseGrp3 0xf7 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize rn <- registerName 0 case reg of 0 -> do imm <- anyWordZ return $ Instr TEST opsize [op1, OpImm (fromIntegral imm)] 1 -> parseInvalidOpcode 0xf6 2 -> return $ Instr NOT opsize [op1] 3 -> return $ Instr NEG opsize [op1] 4 -> return $ Instr MUL opsize [OpReg rn 0, op1] 5 -> return $ Instr IMUL opsize [OpReg rn 0, op1] 6 -> return $ Instr DIV opsize [OpReg rn 0, op1] 7 -> return $ Instr IDIV opsize [OpReg rn 0, op1] parseGrp4 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 case reg of 0 -> return $ Instr INC OP8 [op1] 1 -> return $ Instr DEC OP8 [op1] _ -> parseInvalidOpcode b parseGrp5 b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize case reg of 0 -> return $ Instr INC opsize [op1] 1 -> return $ Instr DEC opsize [op1] 2 -> return $ Instr CALL OPNONE [op1] 3 -> do w <- anyWord16 return $ Instr CALLF OPNONE [OpAddr (fromIntegral w) OPNONE, op1] 4 -> return $ Instr JMPN OPNONE [op1] 5 -> do w <- anyWord16 return $ Instr JMPF OPNONE [OpAddr (fromIntegral w) OPNONE, op1] 6 -> return $ Instr PUSH opsize [op1] _ -> parseInvalidOpcode b parseGrp6 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE case reg of 0 -> return $ Instr SLDT OPNONE [op1] 1 -> return $ Instr STR OPNONE [op1] 2 -> return $ Instr LLDT OPNONE [op1] 3 -> return $ Instr LTR OPNONE [op1] 4 -> return $ Instr VERR OPNONE [op1] 5 -> return $ Instr VERW OPNONE [op1] _ -> parseInvalidOpcode b parseGrp7 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE case mod of 3 -> case reg of 0 -> case rm of 1 -> return $ Instr VMCALL OPNONE [] 2 -> return $ Instr VMLAUNCH OPNONE [] 3 -> return $ Instr VMRESUME OPNONE [] 4 -> return $ Instr VMXOFF OPNONE [] _ -> parseInvalidOpcode b 1 -> case rm of 0 -> return $ Instr MONITOR OPNONE [] 1 -> return $ Instr MWAIT OPNONE [] _ -> parseInvalidOpcode b 4 -> return $ Instr SMSW OPNONE [op1] 6 -> return $ Instr LMSW OPNONE [op1] 7 -> case rm of 0 -> onlyIn64BitMode (\b -> return $ Instr SWAPGS OPNONE []) b _ -> parseInvalidOpcode b _ -> parseInvalidOpcode b _ -> case reg of 0 -> return $ Instr SGDT OPNONE [op1] 1 -> return $ Instr SIDT OPNONE [op1] 2 -> return $ Instr LGDT OPNONE [op1] 3 -> return $ Instr LIDT OPNONE [op1] 4 -> return $ Instr SMSW OPNONE [op1] 5 -> parseInvalidOpcode b 6 -> return $ Instr LMSW OPNONE [op1] 7 -> return $ Instr INVLPG OPNONE [op1] parseGrp8 b = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize imm <- anyWord8 case reg of 4 -> return $ Instr BT opsize [op1, OpImm (fromIntegral imm)] 5 -> return $ Instr BTS opsize [op1, OpImm (fromIntegral imm)] 6 -> return $ Instr BTR opsize [op1, OpImm (fromIntegral imm)] 7 -> return $ Instr BTC opsize [op1, OpImm (fromIntegral imm)] _ -> parseInvalidOpcode b parseGrp9 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE st <- getState case mod of 3 -> parseInvalidOpcode b _ -> case reg of 1 -> if hasREX rex_W st then return $ Instr CMPXCHG16B OPNONE [op1] else return $ Instr CMPXCHG8B OPNONE [op1] 6 -> if hasPrefix 0x66 st then return $ Instr VMCLEAR OPNONE [op1] else if hasPrefix 0xf3 st then return $ Instr VMXON OPNONE [op1] else return $ Instr VMPTRLD OPNONE [op1] 7 -> return $ Instr VMPTRST OPNONE [op1] _ -> parseInvalidOpcode b parseGrp10 = parseInvalidOpcode parseGrp11 0xc6 = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 imm <- anyWord8 return $ Instr MOV OP8 [op1, OpImm (fromIntegral imm)] parseGrp11 0xc7 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize imm <- anyWordZ return $ Instr MOV opsize [op1, OpImm (fromIntegral imm)] mmxInstr op1 mod reg rm name = do st <- getState imm <- anyWord8 if hasPrefix 0x66 st then return $ Instr name OP128 [OpReg (xmmregs !! fromIntegral rm) (fromIntegral rm), OpImm (fromIntegral imm)] else return $ Instr name OP64 [OpReg (mmxregs !! fromIntegral rm) (fromIntegral rm), OpImm (fromIntegral imm)] parseGrp12 b = do st <- getState let opsize = if hasPrefix 0x66 st then OP128 else OP64 (op1, op2, mod, reg, rm) <- parseAddress32 opsize case mod of 3 -> case reg of 2 -> mmxInstr op1 mod reg rm PSRLW 4 -> mmxInstr op1 mod reg rm PSRAW 6 -> mmxInstr op1 mod reg rm PSLLW _ -> parseInvalidOpcode b _ -> parseInvalidOpcode b parseGrp13 b = do st <- getState let opsize = if hasPrefix 0x66 st then OP128 else OP64 (op1, op2, mod, reg, rm) <- parseAddress32 opsize case mod of 3 -> case reg of 2 -> mmxInstr op1 mod reg rm PSRLD 4 -> mmxInstr op1 mod reg rm PSRAD 6 -> mmxInstr op1 mod reg rm PSLLD _ -> parseInvalidOpcode b _ -> parseInvalidOpcode b parseGrp14 b = do st <- getState let opsize = if hasPrefix 0x66 st then OP128 else OP64 (op1, op2, mod, reg, rm) <- parseAddress32 opsize st <- getState case mod of 3 -> case reg of 2 -> mmxInstr op1 mod reg rm PSRLQ 3 -> if hasPrefix 0x66 st then mmxInstr op1 mod reg rm PSRLDQ else parseInvalidOpcode b 6 -> mmxInstr op1 mod reg rm PSLLQ 7 -> if hasPrefix 0x66 st then mmxInstr op1 mod reg rm PSLLDQ else parseInvalidOpcode b _ -> parseInvalidOpcode b _ -> parseInvalidOpcode b parseGrp15 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE case mod of 3 -> case reg of 5 -> return $ Instr LFENCE OPNONE [] 6 -> return $ Instr MFENCE OPNONE [] 7 -> return $ Instr SFENCE OPNONE [] _ -> parseInvalidOpcode b _ -> case reg of 0 -> return $ Instr FXSAVE OPNONE [op1] 1 -> return $ Instr FXRSTOR OPNONE [op1] 2 -> return $ Instr LDMXCSR OPNONE [op1] 3 -> return $ Instr STMXCSR OPNONE [op1] 7 -> return $ Instr CLFLUSH OPNONE [op1] _ -> parseInvalidOpcode b parseGrp16 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OPNONE case mod of 3 -> parseInvalidOpcode b _ -> case reg of 0 -> return $ Instr PREFETCHNTA OPNONE [op1] 1 -> return $ Instr PREFETCHT0 OPNONE [op1] 2 -> return $ Instr PREFETCHT1 OPNONE [op1] 3 -> return $ Instr PREFETCHT2 OPNONE [op1] _ -> parseInvalidOpcode b parseXmmVW p p0xf3 p0x66 p0xf2 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP128 st <- getState let v = OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg) let w = case op1 of OpReg _ num -> OpReg (xmmregs !! num) num op -> op if hasPrefix 0xf3 st then return $ Instr p0xf3 OP128 [v, w] else if hasPrefix 0x66 st then return $ Instr p0x66 OP128 [v, w] else if hasPrefix 0xf2 st then return $ Instr p0xf2 OP128 [v, w] else return $ Instr p OP128 [v, w] parseXmmWV p p0xf3 p0x66 p0xf2 b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP128 st <- getState let w = OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg) let v = case op1 of OpReg _ num -> OpReg (xmmregs !! num) num op -> op if hasPrefix 0xf3 st then return $ Instr p0xf3 OP128 [v, w] else if hasPrefix 0x66 st then return $ Instr p0x66 OP128 [v, w] else if hasPrefix 0xf2 st then return $ Instr p0xf2 OP128 [v, w] else return $ Instr p OP128 [v, w] parseXmmGU p p0xf3 p0x66 p0xf2 b = do (mod, reg, rm) <- parseModRM st <- getState let g = OpReg (regnames32 !! (fromIntegral reg)) (fromIntegral reg) let u = OpReg (xmmregs !! (fromIntegral rm)) (fromIntegral rm) if hasPrefix 0xf3 st then return $ Instr p0xf3 OP32 [g, u] else if hasPrefix 0x66 st then return $ Instr p0x66 OP32 [g, u] else if hasPrefix 0xf2 st then return $ Instr p0xf2 OP32 [g, u] else return $ Instr p OP32 [g, u] parseMOVUPS b@0x10 = parseXmmVW MOVUPS MOVSS MOVUPD MOVSD b parseMOVUPS b@0x11 = parseXmmWV MOVUPS MOVSS MOVUPD MOVSD b parseMOVLPS b@0x12 = parseXmmWV MOVLPS MOVSLDUP MOVLPD MOVDDUP b parseMOVLPS b@0x13 = parseXmmVW MOVLPS InvalidOpcode MOVLPD InvalidOpcode b parseUNPCKLPS b@0x14 = parseXmmVW UNPCKLPS InvalidOpcode UNPCKLPD InvalidOpcode b parseUNPCKHPS b@0x15 = parseXmmVW UNPCKHPS InvalidOpcode UNPCKHPD InvalidOpcode b parseMOVHPS b@0x16 = parseXmmVW MOVHPS MOVLSDUP MOVHPD MOVLHPS b parseMOVHPS b@0x17 = parseXmmVW MOVHPS InvalidOpcode MOVHPD InvalidOpcode b parseMOVCtrlDebug 0x20 = do (mod, reg, rm) <- parseModRM return $ Instr MOV OPNONE [OpReg (regnames32 !! fromIntegral rm) (fromIntegral rm), OpReg ("cr" ++ show reg) (fromIntegral reg)] parseMOVCtrlDebug 0x21 = do (mod, reg, rm) <- parseModRM return $ Instr MOV OPNONE [OpReg (regnames32 !! fromIntegral rm) (fromIntegral rm), OpReg ("db" ++ show reg) (fromIntegral reg)] parseMOVCtrlDebug 0x22 = do (mod, reg, rm) <- parseModRM return $ Instr MOV OPNONE [OpReg ("cr" ++ show reg) (fromIntegral reg), OpReg (regnames32 !! fromIntegral rm) (fromIntegral rm)] parseMOVCtrlDebug 0x23 = do (mod, reg, rm) <- parseModRM return $ Instr MOV OPNONE [OpReg ("db" ++ show reg) (fromIntegral reg), OpReg (regnames32 !! fromIntegral rm) (fromIntegral rm)] parseMOVAPS b@0x28 = parseXmmVW MOVAPS InvalidOpcode MOVAPD InvalidOpcode b parseMOVAPS b@0x29 = parseXmmWV MOVAPS InvalidOpcode MOVAPD InvalidOpcode b parseCVTI2PS = parseUnimplemented parseMOVNTPS = parseXmmWV MOVNTPS InvalidOpcode MOVNTPD InvalidOpcode parseCVTPS2PI = parseUnimplemented parseCVTTPS2PI = parseUnimplemented parseUCOMISS = parseXmmVW UCOMISS InvalidOpcode UCOMISD InvalidOpcode parseCOMISS = parseXmmVW COMISS InvalidOpcode COMISD InvalidOpcode parseCMOVcc b= do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize return $ Instr (cmovccname (b .&. 0xf)) OPNONE [op2, op1] parseMOVSKPS = parseXmmGU MOVMSKPS InvalidOpcode MOVMSKPD InvalidOpcode parseSQRTPS = parseXmmVW SQRTPS SQRTSS SQRTPD SQRTSD parseRSQRTPS = parseXmmVW RSQRTPS RSQRTSS InvalidOpcode InvalidOpcode parseRCPPS = parseXmmVW RCPPS RCPSS InvalidOpcode InvalidOpcode parseCVTPS2PD = parseUnimplemented parseANDNPS = parseXmmVW ANDNPS InvalidOpcode ANDNPD InvalidOpcode parseANDPS = parseXmmVW ANDPS InvalidOpcode ANDPD InvalidOpcode parseORPS = parseXmmVW ORPS InvalidOpcode ORPD InvalidOpcode parseXORPS = parseXmmVW XORPS InvalidOpcode XORPD InvalidOpcode parseADDPS = parseXmmVW ADDPS ADDSS ADDPD ADDSD parseMULPS = parseXmmVW MULPS MULSS MULPD MULSD parseCVTDQ2PS = parseUnimplemented parsePUNPCKLWD = parseUnimplemented parsePACKSSWB = parseUnimplemented parsePUNPCKHWD = parseUnimplemented parseSUBPS = parseXmmVW SUBPS SUBSS SUBPD SUBSD parseMINPS = parseXmmVW MINPS MINSS MINPD MINSD parseDIVPS = parseXmmVW DIVPS DIVSS DIVPD DIVSD parseMAXPS = parseXmmVW MAXPS MAXSS MAXPD MAXSD parsePUNPCKLBW = parseUnimplemented parsePUNPCKLDQ = parseUnimplemented parsePACKUSWB = parseUnimplemented parsePCMPGTB = parseUnimplemented parsePCMPGTW = parseUnimplemented parsePCMPGTD = parseUnimplemented parsePUNPCKHBW = parseUnimplemented parsePUNPCKHDQ = parseUnimplemented parsePACKSSDW = parseUnimplemented parsePUNPCKLQDQ = parseUnimplemented parsePUNPCKHQDQ = parseUnimplemented parsePSHUFW = parseUnimplemented parsePCMPEQB = parseUnimplemented parsePCMPEQW = parseUnimplemented parsePCMPEQD = parseUnimplemented parseVMREAD b = do st <- getState if in64BitMode st then do (op1, op2, mod, reg, rm) <- parseAddress32 OP64 return $ Instr VMREAD OP64 [op1, op2] else do (op1, op2, mod, reg, rm) <- parseAddress32 OP32 return $ Instr VMREAD OP32 [op1, op2] parseVMWRITE b = do st <- getState if in64BitMode st then do (op1, op2, mod, reg, rm) <- parseAddress32 OP64 return $ Instr VMWRITE OP64 [op1, op2] else do (op1, op2, mod, reg, rm) <- parseAddress32 OP32 return $ Instr VMWRITE OP32 [op1, op2] parseHADDPS = parseXmmVW InvalidOpcode InvalidOpcode HADDPD HADDPS parseHSUBPS = parseXmmVW InvalidOpcode InvalidOpcode HSUBPS HSUBPD parseMOVD_Q = parseUnimplemented parseJccLong b = do disp <- anyIntZ let disp' :: Int disp' = fromIntegral disp pos <- getPosition st <- getState return $ Instr (jccname (b .&. 0xf)) OPNONE [OpAddr (fromIntegral (disp' + sourceColumn pos - 1) + (startAddr st)) OPNONE] parseSETcc b = do (op1, op2, mod, reg, rm) <- parseAddress32 OP8 case op1 of OpReg name num -> return $ Instr (setccname (b .&. 0xf)) OPNONE [OpReg (regnames8 !! fromIntegral num) num] _ -> return $ Instr (setccname (b .&. 0xf)) OPNONE [op1] parseSHLD 0xa4 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize b <- anyWord8 opsize <- instrOperandSize return $ Instr SHLD opsize [op1, op2, OpImm (fromIntegral b)] parseSHLD 0xa5 = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize opsize <- instrOperandSize return $ Instr SHLD opsize [op1, op2, OpReg "cl" 1] parseSHRD 0xac = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize b <- anyWord8 opsize <- instrOperandSize return $ Instr SHRD opsize [op1, op2, OpImm (fromIntegral b)] parseSHRD 0xad = do opsize <- instrOperandSize (op1, op2, mod, reg, rm) <- parseAddress32 opsize opsize <- instrOperandSize return $ Instr SHRD opsize [op1, op2, OpReg "cl" 1] parseCMPPS = parseUnimplemented parseMOVNTI = parseUnimplemented parsePINSRW = parseUnimplemented parsePEXTRW = parseUnimplemented parseSHUFPS = parseUnimplemented parseBSWAP b = do let reg = (b .&. 0xf) - 8 r <- registerName (fromIntegral reg) opsize <- instrOperandSize return $ Instr BSWAP opsize [OpReg r (fromIntegral reg)] parseADDSUBPS = parseXmmVW InvalidOpcode InvalidOpcode ADDSUBPD ADDUBPS parseMmxXmmPQVW opcode b = do st <- getState if hasPrefix 0x66 st then do (op1, op2, mod, reg, rm) <- parseAddress32 OP128 let v = OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg) let w = case op1 of OpReg _ num -> OpReg (xmmregs !! num) num op -> op return $ Instr opcode OP128 [v, w] else do (op1, op2, mod, reg, rm) <- parseAddress32 OP64 let p = OpReg (mmxregs !! (fromIntegral reg)) (fromIntegral reg) let q = case op1 of OpReg _ num -> OpReg (mmxregs !! num) num op -> op return $ Instr opcode OP128 [p, q] parseMmxXmmMPMV opcode1 opcode2 b = do st <- getState if hasPrefix 0x66 st then do (op1, op2, mod, reg, rm) <- parseAddress32 OP128 let v = OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg) return $ Instr opcode2 OP128 [op1, v] else do (op1, op2, mod, reg, rm) <- parseAddress32 OP64 let p = OpReg (mmxregs !! (fromIntegral reg)) (fromIntegral reg) return $ Instr opcode1 OP128 [op1, p] parseMmxXmmPNVU opcode b = do st <- getState if hasPrefix 0x66 st then do (mod, reg, rm) <- parseModRM let v = OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg) let u = OpReg (xmmregs !! (fromIntegral rm)) (fromIntegral reg) return $ Instr opcode OP128 [v, u] else do (op1, op2, mod, reg, rm) <- parseAddress32 OP64 let p = OpReg (mmxregs !! (fromIntegral reg)) (fromIntegral reg) let n = OpReg (mmxregs !! (fromIntegral rm)) (fromIntegral reg) return $ Instr opcode OP128 [p, n] parsePSRLW = parseMmxXmmPQVW PSRLW parsePSRLD = parseMmxXmmPQVW PSRLD parsePSRLQ = parseMmxXmmPQVW PSRLQ parsePADDQ = parseMmxXmmPQVW PADDQ parsePMULLW = parseMmxXmmPQVW PMULLW parseMOVQ b@0x6f = parseUnimplemented b parseMOVQ b@0x7f = parseUnimplemented b parseMOVQ b@0xd6 = do st <- getState if hasPrefix 0x66 st then do (op1, op2, mod, reg, rm) <- parseAddress32 OP64 return $ Instr MOVQ OP64 [op1, op2] else if hasPrefix 0xf3 st then do (mod, reg, rm) <- parseModRM return $ Instr MOVQ OPNONE [OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg), OpReg (mmxregs !! (fromIntegral rm)) (fromIntegral rm)] else if hasPrefix 0xf2 st then do (mod, reg, rm) <- parseModRM return $ Instr MOVQ OPNONE [OpReg (mmxregs !! (fromIntegral reg)) (fromIntegral reg), OpReg (xmmregs !! (fromIntegral rm)) (fromIntegral rm)] else parseInvalidOpcode b parsePMOVMSKB b = do st <- getState (mod, reg, rm) <- parseModRM if hasPrefix 0x66 st then do return $ Instr PMOVMSKB OPNONE [OpReg (regnames32 !! (fromIntegral reg)) (fromIntegral reg), OpReg (xmmregs !! (fromIntegral rm)) (fromIntegral rm)] else do return $ Instr PMOVMSKB OPNONE [OpReg (regnames32 !! (fromIntegral reg)) (fromIntegral reg), OpReg (mmxregs !! (fromIntegral rm)) (fromIntegral rm)] parsePSUBUSB = parseMmxXmmPQVW PSUBUSB parsePSUBUSW = parseMmxXmmPQVW PSUBUSW parsePMINUB = parseMmxXmmPQVW PMINUB parsePAND = parseMmxXmmPQVW PAND parsePADDUSB = parseMmxXmmPQVW PADDUSB parsePADDUSW = parseMmxXmmPQVW PADDUSW parsePMAXUB = parseMmxXmmPQVW PMAXUB parsePANDN = parseMmxXmmPQVW PANDN parsePAVGB = parseMmxXmmPQVW PAVGB parsePSRAW = parseMmxXmmPQVW PSRAW parsePSRAD = parseMmxXmmPQVW PSRAD parsePAVGW = parseMmxXmmPQVW PAVGW parseCVTPD2DQ = parseUnimplemented parsePMULHUW = parseMmxXmmPQVW PMULHUW parsePMULHW = parseMmxXmmPQVW PMULHW parseMOVNTQ = parseMmxXmmMPMV MOVNTQ MOVNTDQ parsePSUBSB = parseMmxXmmPQVW PSUBSB parsePSUBSQ = parseMmxXmmPQVW PSUBSQ parsePMINSW = parseMmxXmmPQVW PMINSW parsePOR = parseMmxXmmPQVW POR parsePADDSB = parseMmxXmmPQVW PADDSB parsePADDSW = parseMmxXmmPQVW PADDSW parsePMAXSW = parseMmxXmmPQVW PMAXSW parsePXOR = parseMmxXmmPQVW PXOR parseLDDQU b = do st <- getState if hasPrefix 0xf2 st then do (op1, op2, mod, reg, rm) <- parseAddress32 OP128 let v = OpReg (xmmregs !! (fromIntegral reg)) (fromIntegral reg) return $ Instr LDDQU OP128 [v, op1] else parseInvalidOpcode b parsePSLLW = parseMmxXmmPQVW PSLLW parsePSLLD = parseMmxXmmPQVW PSLLD parsePSLLQ = parseMmxXmmPQVW PSLLQ parsePMULUDQ = parseMmxXmmPQVW PMULUDQ parsePMADDWD = parseMmxXmmPQVW PMADDWD parsePSADBW = parseMmxXmmPQVW PSADBW parseMASKMOVQ = parseMmxXmmPNVU MASKMOVQ parsePSUBB = parseMmxXmmPQVW PSUBB parsePSUBW = parseMmxXmmPQVW PSUBW parsePSUBD = parseMmxXmmPQVW PSUBD parsePSUBQ = parseMmxXmmPQVW PSUBQ parsePADDB = parseMmxXmmPQVW PADDB parsePADDW = parseMmxXmmPQVW PADDW parsePADDD = parseMmxXmmPQVW PADDD