{-# LANGUAGE DeriveDataTypeable , ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Types provided by the disassembler. module Hdis86.Types ( -- * Instructions Instruction(..) , Prefix(..) , Operand(..), Memory(..), Pointer(..), Immediate(..) -- * Registers , Register(..), GPR(..), Half(..) , Segment(..), ControlRegister(..), DebugRegister(..) , MMXRegister(..), X87Register(..), XMMRegister(..) -- * Word sizes , WordSize(..), wordSize, bitsInWord -- * Instruction with metadata , Metadata(..) -- * Configuration , Config(..) , Vendor(..), CPUMode(..), Syntax(..) -- * Common configurations , intel32, intel64, amd32, amd64 -- * Opcodes , Opcode(..) ) where import Hdis86.Internal.Opcode import Data.Typeable ( Typeable ) import Data.Data ( Data ) import Data.Word import Data.Int import Control.Applicative hiding ( Const ) import qualified Data.ByteString as BS import qualified Text.Read as R import qualified Test.QuickCheck as Q -- | Machine word sizes. -- -- Some fields, such as immediate operands, come in different -- widths. We store the equivalent integer value in a @'Word64'@, -- along with a @'WordSize'@ to indicate the original width. data WordSize = Bits0 -- ^ Field not present, value will be 0 | Bits8 | Bits16 | Bits32 | Bits48 | Bits64 | Bits80 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | Convert a number of bits to a @'WordSize'@. wordSize :: Word8 -> Maybe WordSize wordSize 0 = Just Bits0 wordSize 8 = Just Bits8 wordSize 16 = Just Bits16 wordSize 32 = Just Bits32 wordSize 48 = Just Bits48 wordSize 64 = Just Bits64 wordSize 80 = Just Bits80 wordSize _ = Nothing -- | Number of bits in a word of a given size. bitsInWord :: WordSize -> Word8 bitsInWord Bits0 = 0 bitsInWord Bits8 = 8 bitsInWord Bits16 = 16 bitsInWord Bits32 = 32 bitsInWord Bits48 = 48 bitsInWord Bits64 = 64 bitsInWord Bits80 = 80 -- | An x86 \/ AMD64 register. data Register = RegNone -- ^ No register specified. | Reg8 GPR Half -- ^ Either 8-bit half of the low 16 bits -- of a general-purpose register | Reg16 GPR -- ^ Low 16 bits of a general-purpose register -- (full register in 16-bit mode) | Reg32 GPR -- ^ Low 32 bits of a general-purpose register -- (full register in 32-bit mode) | Reg64 GPR -- ^ Full 64-bit general-purpose register | RegSeg Segment -- ^ Segment register | RegCtl ControlRegister -- ^ Control register | RegDbg DebugRegister -- ^ Debug register | RegMMX MMXRegister -- ^ MMX register | RegX87 X87Register -- ^ @x87@ floating point unit register | RegXMM XMMRegister -- ^ XMM register | RegIP -- ^ Instruction pointer deriving (Eq, Ord, Show, Read, Typeable, Data) -- | A general-purpose register. -- -- The names are taken from the 64-bit architecture, but they map onto -- other modes in the obvious way. data GPR = RAX | RCX | RDX | RBX | RSP | RBP | RSI | RDI | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | Indicates which half of a 16-bit register is used as an 8-bit register. data Half = L -- ^ Low or least significant 8 bits | H -- ^ High or most significant 8 bits deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | A segment register. data Segment = ES | CS | SS | DS | FS | GS deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | A control register. data ControlRegister = CR0 | CR1 | CR2 | CR3 | CR4 | CR5 | CR6 | CR7 | CR8 | CR9 | CR10 | CR11 | CR12 | CR13 | CR14 | CR15 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | A debug register. data DebugRegister = DR0 | DR1 | DR2 | DR3 | DR4 | DR5 | DR6 | DR7 | DR8 | DR9 | DR10 | DR11 | DR12 | DR13 | DR14 | DR15 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | An MMX register. data MMXRegister = MM0 | MM1 | MM2 | MM3 | MM4 | MM5 | MM6 | MM7 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | An @x87@ floating-point unit register. data X87Register = ST0 | ST1 | ST2 | ST3 | ST4 | ST5 | ST6 | ST7 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | An XMM register. data XMMRegister = XMM0 | XMM1 | XMM2 | XMM3 | XMM4 | XMM5 | XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 deriving (Eq, Ord, Show, Read, Typeable, Data, Enum, Bounded) -- | An x86 \/ AMD64 CPU instruction. -- -- The destination @'Operand'@ (if any) will precede the source -- @'Operand'@s. data Instruction = Inst { inPrefixes :: [Prefix] , inOpcode :: Opcode , inOperands :: [Operand] } deriving (Eq, Ord, Typeable, Data) instance Show Instruction where showsPrec p (Inst pfx opc opr) = showParen (p >= 11) body where body = foldr (.) id [("Inst "++), showsPrec 11 pfx, (" "++), showsPrec 11 opc, (" "++), showsPrec 11 opr] instance Read Instruction where readsPrec d = R.readParen (d > 10) $ \r -> [(Inst pfx opc opr, xd) | ("Inst", xa) <- R.lex r , (pfx, xb) <- readsPrec 11 xa , (opc, xc) <- readsPrec 11 xb , (opr, xd) <- readsPrec 11 xc] -- | Prefixes, used to modify an instruction. data Prefix = Seg Segment -- ^ Segment override | Rex -- ^ @REX@ prefix; enables certain 64-bit features | OperSize -- ^ Operand size override | AddrSize -- ^ Address size override | Lock -- ^ Perform memory operations atomically | Rep -- ^ Repeat | RepE -- ^ Repeat while equal | RepNE -- ^ Repeat while not equal deriving (Eq, Ord, Show, Read, Typeable, Data) -- | Instruction operands. data Operand = Mem Memory -- ^ Memory access | Reg Register -- ^ Register | Ptr Pointer -- ^ Segmented pointer | Imm (Immediate Word64) -- ^ Immediate value | Jump (Immediate Int64 ) -- ^ Immediate value, for a relative jump | Const (Immediate Word64) -- ^ Constant value deriving (Eq, Ord, Show, Read, Typeable, Data) -- | A memory-access operand. data Memory = Memory { mSize :: WordSize -- ^ Size of the word in memory , mBase :: Register -- ^ Base register , mIndex :: Register -- ^ Index register , mScale :: Word8 -- ^ Scale of index , mOffset :: Immediate Int64 -- ^ Displacement / offset value } deriving (Eq, Ord, Show, Read, Typeable, Data) -- | A segmented pointer operand. data Pointer = Pointer { pSegment :: Word16 -- ^ Segment , pOffset :: Immediate Word32 -- ^ Offset, 16 or 32 bits } deriving (Eq, Ord, Show, Read, Typeable, Data) -- | An immediate operand. data Immediate t = Immediate { iSize :: WordSize -- ^ Size of the field , iValue :: t -- ^ Immediate value, e.g @'Int64'@ or @'Word64'@ } deriving (Eq, Ord, Show, Read, Typeable, Data) -- | An instruction with full metadata. data Metadata = Metadata { mdOffset :: Word64 -- ^ Offset of the start of this instruction , mdLength :: Word -- ^ Length of this instruction in bytes , mdHex :: String -- ^ Hexadecimal representation of this instruction , mdBytes :: BS.ByteString -- ^ Bytes that make up this instruction , mdAssembly :: String -- ^ Assembly code for this instruction , mdInst :: Instruction -- ^ The instruction itself } deriving (Eq, Ord, Show, Read, Typeable, Data) -- | CPU vendors, supporting slightly different instruction sets. data Vendor = Intel | AMD deriving (Eq, Ord, Show, Read, Enum, Bounded, Typeable, Data) -- | Selection of assembly output syntax. data Syntax = SyntaxNone -- ^ Don't generate assembly syntax | SyntaxIntel -- ^ Intel- / NASM-like syntax | SyntaxATT -- ^ AT&T- / @gas@-like syntax deriving (Eq, Ord, Show, Read, Enum, Bounded, Typeable, Data) -- | Overall configuration of the disassembler. data Config = Config { cfgVendor :: Vendor -- ^ CPU vendor; determines the instruction set used , cfgCPUMode :: CPUMode -- ^ Disassemble 16-, 32-, or 64-bit code , cfgSyntax :: Syntax -- ^ Syntax to use when generating assembly , cfgOrigin :: Word64 -- ^ Address where the first instruction would live in memory } deriving (Eq, Ord, Show, Read, Typeable, Data) -- | CPU execution mode. data CPUMode = Mode16 -- ^ 16-bit mode | Mode32 -- ^ 32-bit mode | Mode64 -- ^ 64-bit mode deriving (Eq, Ord, Show, Read, Enum, Bounded, Typeable, Data) intel32, intel64, amd32, amd64 :: Config intel32 = Config Intel Mode32 SyntaxNone 0 intel64 = Config Intel Mode64 SyntaxNone 0 amd32 = Config AMD Mode32 SyntaxNone 0 amd64 = Config AMD Mode64 SyntaxNone 0 -- QuickCheck properties _prop_Instruction_ReadShow :: Instruction -> Bool _prop_Instruction_ReadShow i = read (show i) == i -- QuickCheck instances arbEnum :: forall a. (Enum a, Bounded a) => Q.Gen a arbEnum = toEnum <$> Q.choose (fromEnum lb, fromEnum ub) where lb, ub :: a (lb, ub) = (minBound, maxBound) instance Q.Arbitrary GPR where arbitrary = arbEnum instance Q.Arbitrary Half where arbitrary = arbEnum instance Q.Arbitrary Segment where arbitrary = arbEnum instance Q.Arbitrary ControlRegister where arbitrary = arbEnum instance Q.Arbitrary DebugRegister where arbitrary = arbEnum instance Q.Arbitrary MMXRegister where arbitrary = arbEnum instance Q.Arbitrary X87Register where arbitrary = arbEnum instance Q.Arbitrary XMMRegister where arbitrary = arbEnum instance Q.Arbitrary WordSize where arbitrary = arbEnum instance Q.Arbitrary Vendor where arbitrary = arbEnum instance Q.Arbitrary CPUMode where arbitrary = arbEnum instance Q.Arbitrary Syntax where arbitrary = arbEnum instance Q.Arbitrary Opcode where arbitrary = arbEnum instance Q.Arbitrary Register where arbitrary = Q.oneof [ pure RegNone , pure RegIP , Reg8 <$> Q.arbitrary <*> Q.arbitrary , Reg16 <$> Q.arbitrary , Reg32 <$> Q.arbitrary , Reg64 <$> Q.arbitrary , RegSeg <$> Q.arbitrary , RegCtl <$> Q.arbitrary , RegDbg <$> Q.arbitrary , RegMMX <$> Q.arbitrary , RegX87 <$> Q.arbitrary , RegXMM <$> Q.arbitrary ] instance (Q.Arbitrary t) => Q.Arbitrary (Immediate t) where arbitrary = Immediate <$> Q.arbitrary <*> Q.arbitrary instance Q.Arbitrary Pointer where arbitrary = Pointer <$> Q.arbitrary <*> Q.arbitrary instance Q.Arbitrary Memory where arbitrary = Memory <$> Q.arbitrary <*> Q.arbitrary <*> Q.arbitrary <*> Q.arbitrary <*> Q.arbitrary instance Q.Arbitrary Operand where arbitrary = Q.oneof [ Mem <$> Q.arbitrary , Reg <$> Q.arbitrary , Ptr <$> Q.arbitrary , Imm <$> Q.arbitrary , Jump <$> Q.arbitrary , Const <$> Q.arbitrary ] instance Q.Arbitrary Prefix where arbitrary = Q.oneof ( (Seg <$> Q.arbitrary) : map pure [Rex, OperSize, AddrSize, Lock, Rep, RepE, RepNE] ) instance Q.Arbitrary Instruction where arbitrary = do np <- Q.choose (0,3) no <- Q.choose (0,3) Inst <$> Q.vectorOf np Q.arbitrary <*> Q.arbitrary <*> Q.vectorOf no Q.arbitrary instance Q.Arbitrary Config where arbitrary = Config <$> Q.arbitrary <*> Q.arbitrary <*> Q.arbitrary <*> Q.arbitrary