-- | Data.Macho is a module for parsing a ByteString of a Mach-O file into a Macho record. module Data.Macho ( parseMacho , Macho(..) , MachoHeader(..) , LC_COMMAND(..) , CPU_TYPE(..) , CPU_SUBTYPE(..) , MH_FLAGS(..) , VM_PROT(..) , MachoSegment(..) , SG_FLAG(..) , MachoSection(..) , S_TYPE(..) , S_USER_ATTR(..) , S_SYS_ATTR(..) , N_TYPE(..) , REFERENCE_FLAG(..) , MachoSymbol(..) , DylibModule(..) , R_TYPE(..) , Relocation(..) , MachoDynamicSymbolTable(..) , MH_FILETYPE(..)) where import Data.Binary import Data.Binary.Get import Data.Bits import Data.Word import Data.Int import Numeric import Control.Monad import Control.Applicative import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as C import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Lazy as L data MH_MAGIC = MH_MAGIC32 | MH_MAGIC64 | MH_CIGAM32 | MH_CIGAM64 macho_magic 0xfeedface = MH_MAGIC32 macho_magic 0xfeedfacf = MH_MAGIC64 macho_magic 0xcefaedfe = MH_CIGAM32 macho_magic 0xcffaedfe = MH_CIGAM64 bitfield_le off sz word = (word `shiftL` (32 - off - sz)) `shiftR` (32 - sz) bitfield_be off sz word = (word `shiftL` off) `shiftR` (32 - sz) data MachoReader = MachoReader { is64bit :: Bool , getWord16 :: Get Word16 , getWord32 :: Get Word32 , getWord64 :: Get Word64 , bitfield :: Int -> Int -> Word32 -> Word32 } macho_reader MH_MAGIC32 = MachoReader { is64bit = False, getWord16 = getWord16le, getWord32 = getWord32le, getWord64 = getWord64le, bitfield = bitfield_le } macho_reader MH_MAGIC64 = MachoReader { is64bit = True, getWord16 = getWord16le, getWord32 = getWord32le, getWord64 = getWord64le, bitfield = bitfield_le } macho_reader MH_CIGAM32 = MachoReader { is64bit = False, getWord16 = getWord16be, getWord32 = getWord32be, getWord64 = getWord64be, bitfield = bitfield_be } macho_reader MH_CIGAM64 = MachoReader { is64bit = True, getWord16 = getWord16be, getWord32 = getWord32be, getWord64 = getWord64be, bitfield = bitfield_be } data CPU_TYPE = CPU_TYPE_X86 | CPU_TYPE_X86_64 | CPU_TYPE_ARM | CPU_TYPE_POWERPC | CPU_TYPE_POWERPC64 deriving (Show, Eq) mach_cputype 0x00000007 = CPU_TYPE_X86 mach_cputype 0x01000007 = CPU_TYPE_X86_64 mach_cputype 0x0000000c = CPU_TYPE_ARM mach_cputype 0x00000012 = CPU_TYPE_POWERPC mach_cputype 0x01000012 = CPU_TYPE_POWERPC64 data CPU_SUBTYPE = CPU_SUBTYPE_INTEL | CPU_SUBTYPE_I386_ALL | CPU_SUBTYPE_386 | CPU_SUBTYPE_486 | CPU_SUBTYPE_486SX | CPU_SUBTYPE_PENT | CPU_SUBTYPE_PENTPRO | CPU_SUBTYPE_PENTII_M3 | CPU_SUBTYPE_PENTII_M5 | CPU_SUBTYPE_CELERON | CPU_SUBTYPE_CELERON_MOBILE | CPU_SUBTYPE_PENTIUM_3 | CPU_SUBTYPE_PENTIUM_3_M | CPU_SUBTYPE_PENTIUM_3_XEON | CPU_SUBTYPE_PENTIUM_M | CPU_SUBTYPE_PENTIUM_4 | CPU_SUBTYPE_PENTIUM_4_M | CPU_SUBTYPE_ITANIUM | CPU_SUBTYPE_ITANIUM_2 | CPU_SUBTYPE_XEON | CPU_SUBTYPE_XEON_MP | CPU_SUBTYPE_INTEL_FAMILY | CPU_SUBTYPE_INTEL_FAMILY_MAX | CPU_SUBTYPE_INTEL_MODEL | CPU_SUBTYPE_INTEL_MODEL_ALL | CPU_SUBTYPE_X86_ALL | CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_X86_ARCH1 | CPU_SUBTYPE_POWERPC_ALL | CPU_SUBTYPE_POWERPC_601 | CPU_SUBTYPE_POWERPC_602 | CPU_SUBTYPE_POWERPC_603 | CPU_SUBTYPE_POWERPC_603e | CPU_SUBTYPE_POWERPC_603ev | CPU_SUBTYPE_POWERPC_604 | CPU_SUBTYPE_POWERPC_604e | CPU_SUBTYPE_POWERPC_620 | CPU_SUBTYPE_POWERPC_750 | CPU_SUBTYPE_POWERPC_7400 | CPU_SUBTYPE_POWERPC_7450 | CPU_SUBTYPE_POWERPC_970 | CPU_SUBTYPE_ARM_ALL | CPU_SUBTYPE_ARM_V4T | CPU_SUBTYPE_ARM_V6 deriving (Show, Eq) mach_cpusubtype CPU_TYPE_X86 132 = CPU_SUBTYPE_486SX mach_cpusubtype CPU_TYPE_X86 5 = CPU_SUBTYPE_PENT mach_cpusubtype CPU_TYPE_X86 22 = CPU_SUBTYPE_PENTPRO mach_cpusubtype CPU_TYPE_X86 54 = CPU_SUBTYPE_PENTII_M3 mach_cpusubtype CPU_TYPE_X86 86 = CPU_SUBTYPE_PENTII_M5 mach_cpusubtype CPU_TYPE_X86 103 = CPU_SUBTYPE_CELERON mach_cpusubtype CPU_TYPE_X86 119 = CPU_SUBTYPE_CELERON_MOBILE mach_cpusubtype CPU_TYPE_X86 8 = CPU_SUBTYPE_PENTIUM_3 mach_cpusubtype CPU_TYPE_X86 24 = CPU_SUBTYPE_PENTIUM_3_M mach_cpusubtype CPU_TYPE_X86 40 = CPU_SUBTYPE_PENTIUM_3_XEON mach_cpusubtype CPU_TYPE_X86 9 = CPU_SUBTYPE_PENTIUM_M mach_cpusubtype CPU_TYPE_X86 10 = CPU_SUBTYPE_PENTIUM_4 mach_cpusubtype CPU_TYPE_X86 26 = CPU_SUBTYPE_PENTIUM_4_M mach_cpusubtype CPU_TYPE_X86 11 = CPU_SUBTYPE_ITANIUM mach_cpusubtype CPU_TYPE_X86 27 = CPU_SUBTYPE_ITANIUM_2 mach_cpusubtype CPU_TYPE_X86 12 = CPU_SUBTYPE_XEON mach_cpusubtype CPU_TYPE_X86 28 = CPU_SUBTYPE_XEON_MP mach_cpusubtype CPU_TYPE_X86 3 = CPU_SUBTYPE_X86_ALL mach_cpusubtype CPU_TYPE_X86 4 = CPU_SUBTYPE_X86_ARCH1 mach_cpusubtype CPU_TYPE_X86_64 3 = CPU_SUBTYPE_X86_64_ALL mach_cpusubtype CPU_TYPE_POWERPC 0 = CPU_SUBTYPE_POWERPC_ALL mach_cpusubtype CPU_TYPE_POWERPC 1 = CPU_SUBTYPE_POWERPC_601 mach_cpusubtype CPU_TYPE_POWERPC 2 = CPU_SUBTYPE_POWERPC_602 mach_cpusubtype CPU_TYPE_POWERPC 3 = CPU_SUBTYPE_POWERPC_603 mach_cpusubtype CPU_TYPE_POWERPC 4 = CPU_SUBTYPE_POWERPC_603e mach_cpusubtype CPU_TYPE_POWERPC 5 = CPU_SUBTYPE_POWERPC_603ev mach_cpusubtype CPU_TYPE_POWERPC 6 = CPU_SUBTYPE_POWERPC_604 mach_cpusubtype CPU_TYPE_POWERPC 7 = CPU_SUBTYPE_POWERPC_604e mach_cpusubtype CPU_TYPE_POWERPC 8 = CPU_SUBTYPE_POWERPC_620 mach_cpusubtype CPU_TYPE_POWERPC 9 = CPU_SUBTYPE_POWERPC_750 mach_cpusubtype CPU_TYPE_POWERPC 10 = CPU_SUBTYPE_POWERPC_7400 mach_cpusubtype CPU_TYPE_POWERPC 11 = CPU_SUBTYPE_POWERPC_7450 mach_cpusubtype CPU_TYPE_POWERPC 100 = CPU_SUBTYPE_POWERPC_970 mach_cpusubtype CPU_TYPE_POWERPC64 0 = CPU_SUBTYPE_POWERPC_ALL mach_cpusubtype CPU_TYPE_POWERPC64 1 = CPU_SUBTYPE_POWERPC_601 mach_cpusubtype CPU_TYPE_POWERPC64 2 = CPU_SUBTYPE_POWERPC_602 mach_cpusubtype CPU_TYPE_POWERPC64 3 = CPU_SUBTYPE_POWERPC_603 mach_cpusubtype CPU_TYPE_POWERPC64 4 = CPU_SUBTYPE_POWERPC_603e mach_cpusubtype CPU_TYPE_POWERPC64 5 = CPU_SUBTYPE_POWERPC_603ev mach_cpusubtype CPU_TYPE_POWERPC64 6 = CPU_SUBTYPE_POWERPC_604 mach_cpusubtype CPU_TYPE_POWERPC64 7 = CPU_SUBTYPE_POWERPC_604e mach_cpusubtype CPU_TYPE_POWERPC64 8 = CPU_SUBTYPE_POWERPC_620 mach_cpusubtype CPU_TYPE_POWERPC64 9 = CPU_SUBTYPE_POWERPC_750 mach_cpusubtype CPU_TYPE_POWERPC64 10 = CPU_SUBTYPE_POWERPC_7400 mach_cpusubtype CPU_TYPE_POWERPC64 11 = CPU_SUBTYPE_POWERPC_7450 mach_cpusubtype CPU_TYPE_POWERPC64 100 = CPU_SUBTYPE_POWERPC_970 mach_cpusubtype CPU_TYPE_ARM 0 = CPU_SUBTYPE_ARM_ALL mach_cpusubtype CPU_TYPE_ARM 5 = CPU_SUBTYPE_ARM_V4T mach_cpusubtype CPU_TYPE_ARM 6 = CPU_SUBTYPE_ARM_V6 data MachoHeader = MachoHeader { mh_cputype :: CPU_TYPE -- ^ CPU family the Mach-O executes on. , mh_cpusubtype :: CPU_SUBTYPE -- ^ Specific CPU type the Mach-O executes on. , mh_filetype :: MH_FILETYPE -- ^ Type of Mach-o file. , mh_flags :: [MH_FLAGS] -- ^ Flags. } deriving (Show, Eq) getMachoHeader :: Get (MachoReader, Int, Int, MachoHeader) getMachoHeader = do magic <- liftM macho_magic getWord32le reader <- return $ macho_reader magic cputype <- liftM mach_cputype $ getWord32 reader cpusubtype <- liftM (mach_cpusubtype cputype)$ getWord32 reader filetype <- liftM mach_filetype $ getWord32 reader ncmds <- liftM fromIntegral $ getWord32 reader sizeofcmds <- liftM fromIntegral $ getWord32 reader flags <- getMachHeaderFlags reader reserved <- case magic of MH_MAGIC64 -> getWord32 reader MH_CIGAM64 -> getWord32 reader _ -> return 0 headerSize <- liftM fromIntegral bytesRead return (reader, sizeofcmds, headerSize, MachoHeader cputype cpusubtype filetype flags) getLoadCommands mr fl mh = do empty <- isEmpty if empty then return [] else do cmd <- liftM fromIntegral $ getWord32 mr cmdsize <- liftM fromIntegral $ getWord32 mr lcdata <- getByteString (cmdsize - 8) lc <- return $ runGet (getLoadCommand cmd mr lcdata fl mh) (L.fromChunks [lcdata]) rest <- getLoadCommands mr fl mh return $ lc : rest data Macho = Macho { m_header :: MachoHeader -- ^ Header information. , m_commands :: [LC_COMMAND] -- ^ List of load commands describing Mach-O contents. } deriving (Show, Eq) -- | Parse a ByteString of a Mach-O object into a Macho record. parseMacho :: B.ByteString -> Macho parseMacho b = let (mr, sizeofcmds, hdrSize, header) = runGet getMachoHeader $ L.fromChunks [b] commands = runGet (getLoadCommands mr b header) $ L.fromChunks [B.take sizeofcmds $ B.drop hdrSize b] in Macho header commands data MH_FILETYPE = MH_OBJECT -- ^ relocatable object file | MH_EXECUTE -- ^ demand paged executable file | MH_CORE -- ^ core file | MH_PRELOAD -- ^ preloaded executable file | MH_DYLIB -- ^ dynamically bound shared library | MH_DYLINKER -- ^ dynamic link editor | MH_BUNDLE -- ^ dynamically bound bundle file | MH_DYLIB_STUB -- ^ shared library stub for static. linking only, no section contents | MH_DSYM -- ^ companion file with only debug. sections deriving (Show, Eq) mach_filetype 0x1 = MH_OBJECT mach_filetype 0x2 = MH_EXECUTE mach_filetype 0x4 = MH_CORE mach_filetype 0x5 = MH_PRELOAD mach_filetype 0x6 = MH_DYLIB mach_filetype 0x7 = MH_DYLINKER mach_filetype 0x8 = MH_BUNDLE mach_filetype 0x9 = MH_DYLIB_STUB mach_filetype 0xa = MH_DSYM data MH_FLAGS = MH_NOUNDEFS -- ^ the object file has no undefined references | MH_INCRLINK -- ^ the object file is the output of an incremental link against a base file and can't be link edited again | MH_DYLDLINK -- ^ the object file is input for the dynamic linker and can't be staticly link edited again | MH_BINDATLOAD -- ^ the object file's undefined references are bound by the dynamic linker when loaded. | MH_PREBOUND -- ^ the file has its dynamic undefined references prebound. | MH_SPLIT_SEGS -- ^ the file has its read-only and read-write segments split | MH_TWOLEVEL -- ^ the image is using two-level name space bindings | MH_FORCE_FLAT -- ^ the executable is forcing all images to use flat name space bindings | MH_NOMULTIDEFS -- ^ this umbrella guarantees no multiple defintions of symbols in its sub-images so the two-level namespace hints can always be used. | MH_NOFIXPREBINDING -- ^ do not have dyld notify the prebinding agent about this executable | MH_PREBINDABLE -- ^ the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set. | MH_ALLMODSBOUND -- ^ indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set. | MH_SUBSECTIONS_VIA_SYMBOLS -- ^ safe to divide up the sections into sub-sections via symbols for dead code stripping | MH_CANONICAL -- ^ the binary has been canonicalized via the unprebind operation | MH_WEAK_DEFINES -- ^ the final linked image contains external weak symbols | MH_BINDS_TO_WEAK -- ^ the final linked image uses weak symbols | MH_ALLOW_STACK_EXECUTION -- ^ When this bit is set, all stacks in the task will be given stack execution privilege. Only used in MH_EXECUTE filetypes. | MH_ROOT_SAFE -- ^ When this bit is set, the binary declares it is safe for use in processes with uid zero | MH_SETUID_SAFE -- ^ When this bit is set, the binary declares it is safe for use in processes when issetugid() is true | MH_NO_REEXPORTED_DYLIBS -- ^ When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to see if any are re-exported | MH_PIE -- ^ When this bit is set, the OS will load the main executable at a random address. Only used in MH_EXECUTE filetypes. deriving (Show, Eq) getMachHeaderFlags :: MachoReader -> Get [MH_FLAGS] getMachHeaderFlags mr = getWord32 mr >>= return . getMachHeaderFlags_ 31 where getMachHeaderFlags_ 0 word = [] getMachHeaderFlags_ 1 word | testBit word 0 = MH_NOUNDEFS : getMachHeaderFlags_ 0 word getMachHeaderFlags_ 2 word | testBit word 1 = MH_INCRLINK : getMachHeaderFlags_ 1 word getMachHeaderFlags_ 3 word | testBit word 2 = MH_DYLDLINK : getMachHeaderFlags_ 2 word getMachHeaderFlags_ 4 word | testBit word 3 = MH_BINDATLOAD : getMachHeaderFlags_ 3 word getMachHeaderFlags_ 5 word | testBit word 4 = MH_PREBOUND : getMachHeaderFlags_ 4 word getMachHeaderFlags_ 6 word | testBit word 5 = MH_SPLIT_SEGS : getMachHeaderFlags_ 5 word getMachHeaderFlags_ 8 word | testBit word 7 = MH_TWOLEVEL : getMachHeaderFlags_ 7 word getMachHeaderFlags_ 9 word | testBit word 8 = MH_FORCE_FLAT : getMachHeaderFlags_ 8 word getMachHeaderFlags_ 10 word | testBit word 9 = MH_NOMULTIDEFS : getMachHeaderFlags_ 9 word getMachHeaderFlags_ 11 word | testBit word 10 = MH_NOFIXPREBINDING : getMachHeaderFlags_ 10 word getMachHeaderFlags_ 12 word | testBit word 11 = MH_PREBINDABLE : getMachHeaderFlags_ 11 word getMachHeaderFlags_ 13 word | testBit word 12 = MH_ALLMODSBOUND : getMachHeaderFlags_ 12 word getMachHeaderFlags_ 14 word | testBit word 13 = MH_SUBSECTIONS_VIA_SYMBOLS : getMachHeaderFlags_ 13 word getMachHeaderFlags_ 15 word | testBit word 14 = MH_CANONICAL : getMachHeaderFlags_ 14 word getMachHeaderFlags_ 16 word | testBit word 15 = MH_WEAK_DEFINES : getMachHeaderFlags_ 15 word getMachHeaderFlags_ 17 word | testBit word 16 = MH_BINDS_TO_WEAK : getMachHeaderFlags_ 16 word getMachHeaderFlags_ 18 word | testBit word 17 = MH_ALLOW_STACK_EXECUTION : getMachHeaderFlags_ 17 word getMachHeaderFlags_ 19 word | testBit word 18 = MH_ROOT_SAFE : getMachHeaderFlags_ 18 word getMachHeaderFlags_ 20 word | testBit word 19 = MH_SETUID_SAFE : getMachHeaderFlags_ 19 word getMachHeaderFlags_ 21 word | testBit word 20 = MH_NO_REEXPORTED_DYLIBS : getMachHeaderFlags_ 20 word getMachHeaderFlags_ 22 word | testBit word 21 = MH_PIE : getMachHeaderFlags_ 21 word getMachHeaderFlags_ n word = getMachHeaderFlags_ (n-1) word data LC_COMMAND = LC_SEGMENT MachoSegment -- ^ segment of this file to be mapped | LC_SYMTAB [MachoSymbol] B.ByteString -- ^ static link-edit symbol table and stab info | LC_THREAD [(Word32, [Word32])] -- ^ thread state information (list of (flavor, [long]) pairs) | LC_UNIXTHREAD [(Word32, [Word32])] -- ^ unix thread state information (includes a stack) (list of (flavor, [long] pairs) | LC_DYSYMTAB MachoDynamicSymbolTable -- ^ dynamic link-edit symbol table info | LC_LOAD_DYLIB String Word32 Word32 Word32 -- ^ load a dynamically linked shared library (name, timestamp, current version, compatibility version) | LC_ID_DYLIB String Word32 Word32 Word32 -- ^ dynamically linked shared lib ident (name, timestamp, current version, compatibility version) | LC_LOAD_DYLINKER String -- ^ load a dynamic linker (name of dynamic linker) | LC_ID_DYLINKER String -- ^ dynamic linker identification (name of dynamic linker) | LC_PREBOUND_DYLIB String [Word8] -- ^ modules prebound for a dynamically linked shared library (name, list of module indices) | LC_ROUTINES Word32 Word32 -- ^ image routines (virtual address of initialization routine, module index where it resides) | LC_SUB_FRAMEWORK String -- ^ sub framework (name) | LC_SUB_UMBRELLA String -- ^ sub umbrella (name) | LC_SUB_CLIENT String -- ^ sub client (name) | LC_SUB_LIBRARY String -- ^ sub library (name) | LC_TWOLEVEL_HINTS [(Word32, Word32)] -- ^ two-level namespace lookup hints (list of (subimage index, symbol table index) pairs | LC_PREBIND_CKSUM Word32 -- ^ prebind checksum (checksum) | LC_LOAD_WEAK_DYLIB String Word32 Word32 Word32 -- ^ load a dynamically linked shared library that is allowed to be missing (symbols are weak imported) (name, timestamp, current version, compatibility version) | LC_SEGMENT_64 MachoSegment -- ^ 64-bit segment of this file to mapped | LC_ROUTINES_64 Word64 Word64 -- ^ 64-bit image routines (virtual address of initialization routine, module index where it resides) | LC_UUID [Word8] -- ^ the uuid for an image or its corresponding dsym file (8 element list of bytes) | LC_RPATH String -- ^ runpath additions (path) deriving (Show, Eq) getLoadCommand 0x00000001 mr lc fl mh = getSegmentCommand32 mr fl mh getLoadCommand 0x00000002 mr lc fl mh = getSymTabCommand mr fl mh getLoadCommand 0x00000004 mr lc fl mh = getThreadCommand mr LC_THREAD getLoadCommand 0x00000005 mr lc fl mh = getThreadCommand mr LC_UNIXTHREAD getLoadCommand 0x0000000b mr lc fl mh = getDySymTabCommand mr fl mh getLoadCommand 0x0000000c mr lc fl mh = getDylibCommand mr lc LC_LOAD_DYLIB getLoadCommand 0x0000000d mr lc fl mh = getDylibCommand mr lc LC_ID_DYLIB getLoadCommand 0x0000000e mr lc fl mh = getDylinkerCommand mr lc LC_LOAD_DYLINKER getLoadCommand 0x0000000f mr lc fl mh = getDylinkerCommand mr lc LC_ID_DYLINKER getLoadCommand 0x00000010 mr lc fl mh = getPreboundDylibCommand mr lc getLoadCommand 0x00000011 mr lc fl mh = getRoutinesCommand32 mr getLoadCommand 0x00000012 mr lc fl mh = getSubFrameworkCommand mr lc getLoadCommand 0x00000013 mr lc fl mh = getSubUmbrellaCommand mr lc getLoadCommand 0x00000014 mr lc fl mh = getSubClientCommand mr lc getLoadCommand 0x00000015 mr lc fl mh = getSubLibraryCommand mr lc getLoadCommand 0x00000016 mr lc fl mh = getTwoLevelHintsCommand mr fl getLoadCommand 0x00000017 mr lc fl mh = getPrebindCkSumCommand mr getLoadCommand 0x80000018 mr lc fl mh = getDylibCommand mr lc LC_LOAD_WEAK_DYLIB getLoadCommand 0x00000019 mr lc fl mh = getSegmentCommand64 mr fl mh getLoadCommand 0x0000001a mr lc fl mh = getRoutinesCommand64 mr getLoadCommand 0x0000001b mr lc fl mh = getUUIDCommand mr getLoadCommand 0x8000001c mr lc fl mh = getRPathCommand mr lc data VM_PROT = VM_PROT_READ -- ^ read permission | VM_PROT_WRITE -- ^ write permission | VM_PROT_EXECUTE -- ^ execute permission deriving (Show, Eq) getVM_PROT mr = getWord32 mr >>= return . getVM_PROT_ 31 where getVM_PROT_ 0 word = [] getVM_PROT_ 1 word | testBit word 0 = VM_PROT_READ : getVM_PROT_ 0 word getVM_PROT_ 2 word | testBit word 1 = VM_PROT_WRITE : getVM_PROT_ 1 word getVM_PROT_ 3 word | testBit word 2 = VM_PROT_EXECUTE : getVM_PROT_ 2 word getVM_PROT_ n word = getVM_PROT_ (n-1) word data MachoSegment = MachoSegment { seg_segname :: String -- ^ segment name , seg_vmaddr :: Word64 -- ^ virtual address where the segment is loaded , seg_vmsize :: Word64 -- ^ size of segment at runtime , seg_fileoff :: Word64 -- ^ file offset of the segment , seg_filesize :: Word64 -- ^ size of segment in file , seg_maxprot :: [VM_PROT] -- ^ maximum virtual memory protection , seg_initprot :: [VM_PROT] -- ^ initial virtual memory protection , seg_flags :: [SG_FLAG] -- ^ segment flags , seg_sections :: [MachoSection] -- ^ sections owned by this segment } deriving (Show, Eq) getSegmentCommand32 mr fl mh = do segname <- liftM (takeWhile (/= '\0') . C.unpack) $ getByteString 16 vmaddr <- liftM fromIntegral $ getWord32 mr vmsize <- liftM fromIntegral $ getWord32 mr fileoff <- liftM fromIntegral $ getWord32 mr filesize <- liftM fromIntegral $ getWord32 mr maxprot <- getVM_PROT mr initprot <- getVM_PROT mr nsects <- liftM fromIntegral $ getWord32 mr flags <- getSG_FLAG mr sects <- sequence (replicate nsects (getSection32 mr fl mh)) return $ LC_SEGMENT $ MachoSegment { seg_segname = segname , seg_vmaddr = vmaddr , seg_vmsize = vmsize , seg_fileoff = fileoff , seg_filesize = filesize , seg_maxprot = maxprot , seg_initprot = initprot , seg_flags = flags , seg_sections = sects } getSegmentCommand64 mr fl mh = do segname <- liftM (takeWhile (/= '\0') . C.unpack) $ getByteString 16 vmaddr <- getWord64 mr vmsize <- getWord64 mr fileoff <- getWord64 mr filesize <- getWord64 mr maxprot <- getVM_PROT mr initprot <- getVM_PROT mr nsects <- liftM fromIntegral $ getWord32 mr flags <- getSG_FLAG mr sects <- sequence (replicate nsects (getSection64 mr fl mh)) return $ LC_SEGMENT_64 $ MachoSegment { seg_segname = segname , seg_vmaddr = vmaddr , seg_vmsize = vmsize , seg_fileoff = fileoff , seg_filesize = filesize , seg_maxprot = maxprot , seg_initprot = initprot , seg_flags = flags , seg_sections = sects } data SG_FLAG = SG_HIGHVM -- ^ The file contents for this segment is for the high part of the VM space, the low part is zero filled (for stacks in core files). | SG_NORELOC -- ^ This segment has nothing that was relocated in it and nothing relocated to it, that is it may be safely replaced without relocation. deriving (Show, Eq) getSG_FLAG mr = getWord32 mr >>= (return . getSG_FLAG_ 31) where getSG_FLAG_ 0 word = [] getSG_FLAG_ 1 word | testBit word 0 = SG_HIGHVM : getSG_FLAG_ 0 word getSG_FLAG_ 3 word | testBit word 2 = SG_NORELOC : getSG_FLAG_ 2 word getSG_FLAG_ n word = getSG_FLAG_ (n-1) word data MachoSection = MachoSection { sec_sectname :: String -- ^ name of section , sec_segname :: String -- ^ name of segment that should own this section , sec_addr :: Word64 -- ^ virtual memoy address for section , sec_size :: Word64 -- ^ size of section , sec_align :: Int -- ^ alignment required by section (literal form, not power of two, e.g. 8 not 3) , sec_relocs :: [Relocation] -- ^ relocations for this section , sec_type :: S_TYPE -- ^ type of section , sec_user_attrs :: [S_USER_ATTR] -- ^ user attributes of section , sec_sys_attrs :: [S_SYS_ATTR] -- ^ system attibutes of section } deriving (Show, Eq) getSection32 mr fl mh = do sectname <- liftM (takeWhile (/= '\0') . C.unpack) $ getByteString 16 segname <- liftM (takeWhile (/= '\0') . C.unpack) $ getByteString 16 addr <- liftM fromIntegral $ getWord32 mr size <- liftM fromIntegral $ getWord32 mr offset <- liftM fromIntegral $ getWord32 mr align <- liftM (2 ^) $ getWord32 mr reloff <- liftM fromIntegral $ getWord32 mr nreloc <- liftM fromIntegral $ getWord32 mr relocs <- return $ runGet (sequence (replicate nreloc (getRel mr mh))) $ L.fromChunks [B.drop reloff fl] flags <- getWord32 mr reserved1 <- getWord32 mr reserved2 <- getWord32 mr sectype <- return $ sectionType flags userattrs <- return $ sectionUserAttribute flags sysattrs <- return $ sectionSystemAttribute flags return $ MachoSection { sec_sectname = sectname , sec_segname = segname , sec_addr = addr , sec_size = size , sec_align = align , sec_relocs = relocs , sec_type = sectype , sec_user_attrs = userattrs , sec_sys_attrs = sysattrs } getSection64 mr fl mh = do sectname <- liftM (takeWhile (/= '\0') . C.unpack) $ getByteString 16 segname <- liftM (takeWhile (/= '\0') . C.unpack) $ getByteString 16 addr <- getWord64 mr size <- getWord64 mr offset <- getWord32 mr align <- liftM (2 ^) $ getWord32 mr reloff <- liftM fromIntegral $ getWord32 mr nreloc <- liftM fromIntegral $ getWord32 mr relocs <- return $ runGet (sequence (replicate nreloc (getRel mr mh))) $ L.fromChunks [B.drop reloff fl] flags <- getWord32 mr reserved1 <- getWord32 mr reserved2 <- getWord32 mr reserved3 <- getWord32 mr sectype <- return $ sectionType flags userattrs <- return $ sectionUserAttribute flags sysattrs <- return $ sectionSystemAttribute flags return $ MachoSection { sec_sectname = sectname , sec_segname = segname , sec_addr = addr , sec_size = size , sec_align = align , sec_relocs = relocs , sec_type = sectype , sec_user_attrs = userattrs , sec_sys_attrs = sysattrs } data S_TYPE = S_REGULAR -- ^ regular section | S_ZEROFILL -- ^ zero fill on demand section | S_CSTRING_LITERALS -- ^ section with only literal C strings | S_4BYTE_LITERALS -- ^ section with only 4 byte literals | S_8BYTE_LITERALS -- ^ section with only 8 byte literals | S_LITERAL_POINTERS -- ^ section with only pointers to literals | S_NON_LAZY_SYMBOL_POINTERS -- ^ section with only non-lazy symbol pointers | S_LAZY_SYMBOL_POINTERS -- ^ section with only lazy symbol pointers | S_SYMBOL_STUBS -- ^ section with only symbol stubs, bte size of stub in the reserved2 field | S_MOD_INIT_FUNC_POINTERS -- ^ section with only function pointers for initialization | S_MOD_TERM_FUNC_POINTERS -- ^ section with only function pointers for termination | S_COALESCED -- ^ section contains symbols that are to be coalesced | S_GB_ZEROFILL -- ^ zero fill on demand section (that can be larger than 4 gigabytes) | S_INTERPOSING -- ^ section with only pairs of function pointers for interposing | S_16BYTE_LITERALS -- ^ section with only 16 byte literals | S_DTRACE_DOF -- ^ section contains DTrace Object Format | S_LAZY_DYLIB_SYMBOL_POINTERS -- ^ section with only lazy symbol pointers to lazy loaded dylibs deriving (Show, Eq) sectionType flags = case flags .&. 0x000000ff of 0x00 -> S_REGULAR 0x01 -> S_ZEROFILL 0x02 -> S_CSTRING_LITERALS 0x03 -> S_4BYTE_LITERALS 0x04 -> S_8BYTE_LITERALS 0x05 -> S_LITERAL_POINTERS 0x06 -> S_NON_LAZY_SYMBOL_POINTERS 0x07 -> S_LAZY_SYMBOL_POINTERS 0x08 -> S_SYMBOL_STUBS 0x09 -> S_MOD_INIT_FUNC_POINTERS 0x0a -> S_MOD_TERM_FUNC_POINTERS 0x0b -> S_COALESCED 0x0c -> S_GB_ZEROFILL 0x0d -> S_INTERPOSING 0x0e -> S_16BYTE_LITERALS 0x0f -> S_DTRACE_DOF 0x10 -> S_LAZY_DYLIB_SYMBOL_POINTERS data S_USER_ATTR = S_ATTR_PURE_INSTRUCTIONS -- ^ section contains only true machine instructions | S_ATTR_NO_TOC -- ^ setion contains coalesced symbols that are not to be in a ranlib table of contents | S_ATTR_STRIP_STATIC_SYMS -- ^ ok to strip static symbols in this section in files with the MH_DYLDLINK flag | S_ATTR_NO_DEAD_STRIP -- ^ no dead stripping | S_ATTR_LIVE_SUPPORT -- ^ blocks are live if they reference live blocks | S_ATTR_SELF_MODIFYING_CODE -- ^ used with i386 code stubs written on by dyld | S_ATTR_DEBUG -- ^ a debug section deriving (Show, Eq) sectionUserAttribute flags = sectionUserAttribute_ 31 (flags .&. 0xff000000) where sectionUserAttribute_ 0 flags = [] sectionUserAttribute_ 31 flags | testBit flags 30 = S_ATTR_PURE_INSTRUCTIONS : sectionUserAttribute_ 30 flags sectionUserAttribute_ 30 flags | testBit flags 29 = S_ATTR_NO_TOC : sectionUserAttribute_ 29 flags sectionUserAttribute_ 29 flags | testBit flags 28 = S_ATTR_STRIP_STATIC_SYMS : sectionUserAttribute_ 28 flags sectionUserAttribute_ 28 flags | testBit flags 27 = S_ATTR_NO_DEAD_STRIP : sectionUserAttribute_ 27 flags sectionUserAttribute_ 27 flags | testBit flags 26 = S_ATTR_LIVE_SUPPORT : sectionUserAttribute_ 26 flags sectionUserAttribute_ 26 flags | testBit flags 25 = S_ATTR_SELF_MODIFYING_CODE : sectionUserAttribute_ 25 flags sectionUserAttribute_ n flags = sectionUserAttribute_ (n-1) flags data S_SYS_ATTR = S_ATTR_SOME_INSTRUCTIONS -- ^ section contains soem machine instructions | S_ATTR_EXT_RELOC -- ^ section has external relocation entries | S_ATTR_LOC_RELOC -- ^ section has local relocation entries deriving (Show, Eq) sectionSystemAttribute flags = sectionSystemAttribute_ 31 (flags .&. 0x00ffff00) where sectionSystemAttribute_ 0 flags = [] sectionSystemAttribute_ 8 flags | testBit flags 7 = S_ATTR_LOC_RELOC : sectionSystemAttribute_ 7 flags sectionSystemAttribute_ 9 flags | testBit flags 8 = S_ATTR_EXT_RELOC : sectionSystemAttribute_ 8 flags sectionSystemAttribute_ 10 flags | testBit flags 9 = S_ATTR_SOME_INSTRUCTIONS : sectionSystemAttribute_ 9 flags sectionSystemAttribute_ n flags = sectionSystemAttribute_ (n-1) flags nullStringAt offset = B.takeWhile ((/=) 0) . B.drop offset substringAt offset size = B.take size . B.drop offset getLC_STR mr lc = do offset <- liftM fromIntegral $ getWord32 mr return $ C.unpack $ nullStringAt offset lc getDylibCommand :: MachoReader -> B.ByteString -> (String -> Word32 -> Word32 -> Word32 -> LC_COMMAND) -> Get LC_COMMAND getDylibCommand mr lc con = do name <- getLC_STR mr lc timestamp <- getWord32 mr current_version <- getWord32 mr compatibility_version <- getWord32 mr return $ con name timestamp current_version compatibility_version getSubFrameworkCommand mr lc = return LC_SUB_FRAMEWORK <*> getLC_STR mr lc getSubClientCommand mr lc = return LC_SUB_CLIENT <*> getLC_STR mr lc getSubUmbrellaCommand mr lc = return LC_SUB_UMBRELLA <*> getLC_STR mr lc getSubLibraryCommand mr lc = return LC_SUB_LIBRARY <*> getLC_STR mr lc getDylinkerCommand mr lc con = return con <*> getLC_STR mr lc getPreboundDylibCommand mr lc = do name <- getLC_STR mr lc nmodules <- liftM fromIntegral $ getWord32 mr modules_offset <- liftM fromIntegral $ getWord32 mr modules <- return $ B.unpack $ B.take ((nmodules `div` 8) + (nmodules `mod` 8)) $ B.drop modules_offset lc return $ LC_PREBOUND_DYLIB name modules getThreadCommand mr con = do let getThreadCommand_ mr = do empty <- isEmpty if empty then return [] else do flavor <- getWord32 mr count <- liftM fromIntegral $ getWord32 mr state <- sequence (replicate count (getWord32 mr)) rest <- getThreadCommand_ mr return $ ((flavor, state) : rest) flavours <- getThreadCommand_ mr return $ con $ flavours getRoutinesCommand32 mr = do init_address <- getWord32 mr init_module <- getWord32 mr reserved1 <- getWord32 mr reserved2 <- getWord32 mr reserved3 <- getWord32 mr reserved4 <- getWord32 mr reserved5 <- getWord32 mr reserved6 <- getWord32 mr return $ LC_ROUTINES init_address init_module getRoutinesCommand64 mr = do init_address <- getWord64 mr init_module <- getWord64 mr reserved1 <- getWord64 mr reserved2 <- getWord64 mr reserved3 <- getWord64 mr reserved4 <- getWord64 mr reserved5 <- getWord64 mr reserved6 <- getWord64 mr return $ LC_ROUTINES_64 init_address init_module data N_TYPE = N_UNDF -- ^ undefined symbol, n_sect is 0 | N_ABS -- ^ absolute symbol, does not need relocation, n_sect is 0 | N_SECT -- ^ symbol is defined in section n_sect | N_PBUD -- ^ symbol is undefined and the image is using a prebound value for the symbol, n_sect is 0 | N_INDR -- ^ symbol is defined to be the same as another symbol. n_value is a string table offset indicating the name of that symbol | N_GSYM -- ^ stab global symbol: name,,0,type,0 | N_FNAME -- ^ stab procedure name (f77 kludge): name,,0,0,0 | N_FUN -- ^ stab procedure: name,,n_sect,linenumber,address | N_STSYM -- ^ stab static symbol: name,,n_sect,type,address | N_LCSYM -- ^ stab .lcomm symbol: name,,n_sect,type,address | N_BNSYM -- ^ stab begin nsect sym: 0,,n_sect,0,address | N_OPT -- ^ stab emitted with gcc2_compiled and in gcc source | N_RSYM -- ^ stab register sym: name,,0,type,register | N_SLINE -- ^ stab src line: 0,,n_sect,linenumber,address | N_ENSYM -- ^ stab end nsect sym: 0,,n_sect,0,address | N_SSYM -- ^ stab structure elt: name,,0,type,struct_offset | N_SO -- ^ stab source file name: name,,n_sect,0,address | N_OSO -- ^ stab object file name: name,,0,0,st_mtime | N_LSYM -- ^ stab local sym: name,,0,type,offset | N_BINCL -- ^ stab include file beginning: name,,0,0,sum | N_SOL -- ^ stab #included file name: name,,n_sect,0,address | N_PARAMS -- ^ stab compiler parameters: name,,0,0,0 | N_VERSION -- ^ stab compiler version: name,,0,0,0 | N_OLEVEL -- ^ stab compiler -O level: name,,0,0,0 | N_PSYM -- ^ stab parameter: name,,0,type,offset | N_EINCL -- ^ stab include file end: name,,0,0,0 | N_ENTRY -- ^ stab alternate entry: name,,n_sect,linenumber,address | N_LBRAC -- ^ stab left bracket: 0,,0,nesting level,address | N_EXCL -- ^ stab deleted include file: name,,0,0,sum | N_RBRAC -- ^ stab right bracket: 0,,0,nesting level,address | N_BCOMM -- ^ stab begin common: name,,0,0,0 | N_ECOMM -- ^ stab end common: name,,n_sect,0,0 | N_ECOML -- ^ stab end common (local name): 0,,n_sect,0,address | N_LENG -- ^ stab second stab entry with length information | N_PC -- ^ stab global pascal symbol: name,,0,subtype,line deriving (Show, Eq) n_type 0x00 = N_UNDF n_type 0x01 = N_ABS n_type 0x07 = N_SECT n_type 0x06 = N_PBUD n_type 0x05 = N_INDR n_type 0x20 = N_GSYM n_type 0x22 = N_FNAME n_type 0x24 = N_FUN n_type 0x26 = N_STSYM n_type 0x28 = N_LCSYM n_type 0x2e = N_BNSYM n_type 0x3c = N_OPT n_type 0x40 = N_RSYM n_type 0x44 = N_SLINE n_type 0x4e = N_ENSYM n_type 0x60 = N_SSYM n_type 0x64 = N_SO n_type 0x66 = N_OSO n_type 0x80 = N_LSYM n_type 0x82 = N_BINCL n_type 0x84 = N_SOL n_type 0x86 = N_PARAMS n_type 0x88 = N_VERSION n_type 0x8A = N_OLEVEL n_type 0xa0 = N_PSYM n_type 0xa2 = N_EINCL n_type 0xa4 = N_ENTRY n_type 0xc0 = N_LBRAC n_type 0xc2 = N_EXCL n_type 0xe0 = N_RBRAC n_type 0xe2 = N_BCOMM n_type 0xe4 = N_ECOMM n_type 0xe8 = N_ECOML n_type 0xfe = N_LENG n_type 0x30 = N_PC data REFERENCE_FLAG = REFERENCE_FLAG_UNDEFINED_NON_LAZY -- ^ reference to an external non-lazy symbol | REFERENCE_FLAG_UNDEFINED_LAZY -- ^ reference to an external lazy symbol | REFERENCE_FLAG_DEFINED -- ^ symbol is defined in this module | REFERENCE_FLAG_PRIVATE_DEFINED -- ^ symbol is defined in this module and visible only to modules within this shared library | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY -- ^ reference to an external non-lazy symbol and visible only to modules within this shared library | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY -- ^ reference to an external lazy symbol and visible only to modules within this shared library | REFERENCED_DYNAMICALLY -- ^ set for all symbols referenced by dynamic loader APIs | N_WEAK_REF -- ^ indicates the symbol is a weak reference, set to 0 if definition cannot be found | N_WEAK_DEF -- ^ indicates the symbol is a weak definition, will be overridden by a strong definition at link-time | LIBRARY_ORDINAL Word16 -- ^ for two-level mach-o objects, specifies the index of the library in which this symbol is defined. zero specifies current image. deriving (Show, Eq) reference_flag_lo16 0 = REFERENCE_FLAG_UNDEFINED_NON_LAZY reference_flag_lo16 1 = REFERENCE_FLAG_UNDEFINED_LAZY reference_flag_lo16 2 = REFERENCE_FLAG_DEFINED reference_flag_lo16 3 = REFERENCE_FLAG_PRIVATE_DEFINED reference_flag_lo16 4 = REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY reference_flag_lo16 5 = REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY reference_flag_hi16 word = reference_flag_hi16_ 16 word where reference_flag_hi16_ 0 word = [] reference_flag_hi16_ 1 word | testBit word 0 = REFERENCED_DYNAMICALLY : reference_flag_hi16 0 reference_flag_hi16_ 3 word | testBit word 2 = N_WEAK_REF : reference_flag_hi16 1 reference_flag_hi16_ 4 word | testBit word 3 = N_WEAK_DEF : reference_flag_hi16 1 reference_flag_hi16_ n word = reference_flag_hi16_ (n-1) word reference_flags word mh = if MH_TWOLEVEL `elem` mh_flags mh then [reference_flag_lo16 (word .&. 0xf), LIBRARY_ORDINAL ((word .&. 0xf0) `shiftR` 16)] else reference_flag_lo16 (word .&. 0xf) : reference_flag_hi16 word n_types n = if n .&. 0xe0 == 0 then let npext = n .&. 0x10 /= 0 ntype = n_type $ ((n .&. 0x0e) `shiftR` 1) next = n .&. 0x01 /= 0 in (False, npext, ntype, next) else (True, False, n_type n, False) data MachoSymbol = MachoSymbol { sym_name :: String -- ^ symbol name , sym_type :: N_TYPE -- ^ symbol type , sym_pext :: Bool -- ^ true if limited global scope , sym_ext :: Bool -- ^ true if external symbol , sym_sect :: Word8 -- ^ section index where the symbol can be found , sym_flags :: Either Word16 [REFERENCE_FLAG] -- ^ for stab entries, Left Word16 is the uninterpreted flags field, otherwise Right [REFERENCE_FLAG] are the symbol flags , sym_value :: Word64 -- ^ symbol value, 32-bit symbol values are promoted to 64-bit for simpliciy } deriving (Show, Eq) getSymbolName mr strsect = do offset <- liftM fromIntegral $ getWord32 mr return $ C.unpack $ C.takeWhile (/= '\0') $ B.drop offset strsect getNList32 mr strsect mh = do n_name <- getSymbolName mr strsect n_type <- getWord8 let (stabs, npext, ntype, next) = n_types n_type n_sect <- getWord8 n_desc <- getWord16 mr let ref_flags = if stabs then Left n_desc else Right $ reference_flags n_desc mh n_value <- liftM fromIntegral $ getWord32 mr return $ MachoSymbol n_name ntype npext next n_sect ref_flags n_value getNList64 mr strsect mh = do n_name <- getSymbolName mr strsect n_type <- getWord8 let (stabs, npext, ntype, next) = n_types n_type n_sect <- getWord8 n_desc <- getWord16 mr let ref_flags = if stabs then Left n_desc else Right $ reference_flags n_desc mh n_value <- getWord64 mr return $ MachoSymbol n_name ntype npext next n_sect ref_flags n_value getSymTabCommand mr fl mh = do symoff <- liftM fromIntegral $ getWord32 mr nsyms <- liftM fromIntegral $ getWord32 mr stroff <- liftM fromIntegral $ getWord32 mr strsize <- liftM fromIntegral $ getWord32 mr strsect <- return $ B.take strsize $ B.drop stroff fl symbols <- if is64bit mr then return $ runGet (sequence (replicate nsyms (getNList64 mr strsect mh))) $ L.fromChunks [B.drop symoff fl] else return $ runGet (sequence (replicate nsyms (getNList32 mr strsect mh))) $ L.fromChunks [B.drop symoff fl] return $ LC_SYMTAB symbols strsect getTOC mr = do symbol_index <- getWord32 mr module_index <- getWord32 mr return (symbol_index, module_index) data DylibModule = DylibModule { dylib_module_name_offset :: Word32 -- ^ module name string table offset , dylib_ext_def_sym :: (Word32, Word32) -- ^ (initial, count) pair of symbol table indices for externally defined symbols , dylib_ref_sym :: (Word32, Word32) -- ^ (initial, count) pair of symbol table indices for referenced symbols , dylib_local_sym :: (Word32, Word32) -- ^ (initial, count) pair of symbol table indices for local symbols , dylib_ext_rel :: (Word32, Word32) -- ^ (initial, count) pair of symbol table indices for externally referenced symbols , dylib_init :: (Word32, Word32) -- ^ (initial, count) pair of symbol table indices for the index of the module init section and the number of init pointers , dylib_term :: (Word32, Word32) -- ^ (initial, count) pair of symbol table indices for the index of the module term section and the number of term pointers , dylib_objc_module_info_addr :: Word32 -- ^ statically linked address of the start of the data for this module in the __module_info section in the __OBJC segment , dylib_objc_module_info_size :: Word64 -- ^ number of bytes of data for this module that are used in the __module_info section in the __OBJC segment } deriving (Show, Eq) getModule32 mr = do module_name <- getWord32 mr iextdefsym <- getWord32 mr nextdefsym <- getWord32 mr irefsym <- getWord32 mr nrefsym <- getWord32 mr ilocalsym <- getWord32 mr nlocalsym <- getWord32 mr iextrel <- getWord32 mr nextrel <- getWord32 mr iinit_iterm <- getWord32 mr iinit <- return $ iinit_iterm .&. 0x0000ffff iterm <- return $ (iinit_iterm .&. 0xffff0000) `shiftR` 16 ninit_nterm <- getWord32 mr ninit <- return $ (ninit_nterm .&. 0x0000ffff) nterm <- return $ (ninit_nterm .&. 0xffff0000) `shiftR` 16 objc_module_info_addr <- getWord32 mr objc_module_info_size <- liftM fromIntegral $ getWord32 mr return $ DylibModule { dylib_module_name_offset = module_name , dylib_ext_def_sym = (iextdefsym, nextdefsym) , dylib_ref_sym = (irefsym, nrefsym) , dylib_local_sym = (ilocalsym, nlocalsym) , dylib_ext_rel = (iextrel, nextrel) , dylib_init = (iinit, ninit) , dylib_term = (iterm, nterm) , dylib_objc_module_info_addr = objc_module_info_addr , dylib_objc_module_info_size = objc_module_info_size } getModule64 mr = do module_name <- getWord32 mr iextdefsym <- getWord32 mr nextdefsym <- getWord32 mr irefsym <- getWord32 mr nrefsym <- getWord32 mr ilocalsym <- getWord32 mr nlocalsym <- getWord32 mr iextrel <- getWord32 mr nextrel <- getWord32 mr iinit_iterm <- getWord32 mr iinit <- return $ iinit_iterm .&. 0x0000ffff iterm <- return $ (iinit_iterm .&. 0xffff0000) `shiftR` 16 ninit_nterm <- getWord32 mr ninit <- return $ (ninit_nterm .&. 0x0000ffff) nterm <- return $ (ninit_nterm .&. 0xffff0000) `shiftR` 16 objc_module_info_addr <- getWord32 mr objc_module_info_size <- getWord64 mr return $ DylibModule { dylib_module_name_offset = module_name , dylib_ext_def_sym = (iextdefsym, nextdefsym) , dylib_ref_sym = (irefsym, nrefsym) , dylib_local_sym = (ilocalsym, nlocalsym) , dylib_ext_rel = (iextrel, nextrel) , dylib_init = (iinit, ninit) , dylib_term = (iterm, nterm) , dylib_objc_module_info_addr = objc_module_info_addr , dylib_objc_module_info_size = objc_module_info_size } -- | Platform-specific relocation types. data R_TYPE = GENERIC_RELOC_VANILLA | GENERIC_RELOC_PAIR | GENERIC_RELOC_SECTDIFF | GENERIC_RELOC_LOCAL_SECTDIFF | GENERIC_RELOC_PB_LA_PTR | X86_64_RELOC_BRANCH | X86_64_RELOC_GOT_LOAD | X86_64_RELOC_GOT | X86_64_RELOC_SIGNED | X86_64_RELOC_UNSIGNED | X86_64_RELOC_SUBTRACTOR | X86_64_RELOC_SIGNED_1 | X86_64_RELOC_SIGNED_2 | X86_64_RELOC_SIGNED_4 | PPC_RELOC_VANILLA | PPC_RELOC_PAIR | PPC_RELOC_BR14 | PPC_RELOC_BR24 | PPC_RELOC_HI16 | PPC_RELOC_LO16 | PPC_RELOC_HA16 | PPC_RELOC_LO14 | PPC_RELOC_SECTDIFF | PPC_RELOC_LOCAL_SECTDIFF | PPC_RELOC_PB_LA_PTR | PPC_RELOC_HI16_SECTDIFF | PPC_RELOC_LO16_SECTDIFF | PPC_RELOC_HA16_SECTDIFF | PPC_RELOC_JBSR | PPC_RELOC_LO14_SECTDIFF deriving (Show, Eq) r_type 0 CPU_TYPE_X86 = GENERIC_RELOC_VANILLA r_type 1 CPU_TYPE_X86 = GENERIC_RELOC_PAIR r_type 2 CPU_TYPE_X86 = GENERIC_RELOC_SECTDIFF r_type 3 CPU_TYPE_X86 = GENERIC_RELOC_LOCAL_SECTDIFF r_type 4 CPU_TYPE_X86 = GENERIC_RELOC_PB_LA_PTR r_type 0 CPU_TYPE_X86_64 = X86_64_RELOC_UNSIGNED r_type 1 CPU_TYPE_X86_64 = X86_64_RELOC_SIGNED r_type 2 CPU_TYPE_X86_64 = X86_64_RELOC_BRANCH r_type 3 CPU_TYPE_X86_64 = X86_64_RELOC_GOT_LOAD r_type 4 CPU_TYPE_X86_64 = X86_64_RELOC_GOT r_type 5 CPU_TYPE_X86_64 = X86_64_RELOC_SUBTRACTOR r_type 6 CPU_TYPE_X86_64 = X86_64_RELOC_SIGNED_1 r_type 7 CPU_TYPE_X86_64 = X86_64_RELOC_SIGNED_2 r_type 8 CPU_TYPE_X86_64 = X86_64_RELOC_SIGNED_4 r_type 0 CPU_TYPE_POWERPC = PPC_RELOC_VANILLA r_type 1 CPU_TYPE_POWERPC = PPC_RELOC_PAIR r_type 2 CPU_TYPE_POWERPC = PPC_RELOC_BR14 r_type 3 CPU_TYPE_POWERPC = PPC_RELOC_BR24 r_type 4 CPU_TYPE_POWERPC = PPC_RELOC_HI16 r_type 5 CPU_TYPE_POWERPC = PPC_RELOC_LO16 r_type 6 CPU_TYPE_POWERPC = PPC_RELOC_HA16 r_type 7 CPU_TYPE_POWERPC = PPC_RELOC_LO14 r_type 8 CPU_TYPE_POWERPC = PPC_RELOC_SECTDIFF r_type 9 CPU_TYPE_POWERPC = PPC_RELOC_PB_LA_PTR r_type 10 CPU_TYPE_POWERPC = PPC_RELOC_HI16_SECTDIFF r_type 11 CPU_TYPE_POWERPC = PPC_RELOC_LO16_SECTDIFF r_type 12 CPU_TYPE_POWERPC = PPC_RELOC_HA16_SECTDIFF r_type 13 CPU_TYPE_POWERPC = PPC_RELOC_JBSR r_type 14 CPU_TYPE_POWERPC = PPC_RELOC_LO14_SECTDIFF r_type 15 CPU_TYPE_POWERPC = PPC_RELOC_LOCAL_SECTDIFF r_type 0 CPU_TYPE_POWERPC64 = PPC_RELOC_VANILLA r_type 1 CPU_TYPE_POWERPC64 = PPC_RELOC_PAIR r_type 2 CPU_TYPE_POWERPC64 = PPC_RELOC_BR14 r_type 3 CPU_TYPE_POWERPC64 = PPC_RELOC_BR24 r_type 4 CPU_TYPE_POWERPC64 = PPC_RELOC_HI16 r_type 5 CPU_TYPE_POWERPC64 = PPC_RELOC_LO16 r_type 6 CPU_TYPE_POWERPC64 = PPC_RELOC_HA16 r_type 7 CPU_TYPE_POWERPC64 = PPC_RELOC_LO14 r_type 8 CPU_TYPE_POWERPC64 = PPC_RELOC_SECTDIFF r_type 9 CPU_TYPE_POWERPC64 = PPC_RELOC_PB_LA_PTR r_type 10 CPU_TYPE_POWERPC64 = PPC_RELOC_HI16_SECTDIFF r_type 11 CPU_TYPE_POWERPC64 = PPC_RELOC_LO16_SECTDIFF r_type 12 CPU_TYPE_POWERPC64 = PPC_RELOC_HA16_SECTDIFF r_type 13 CPU_TYPE_POWERPC64 = PPC_RELOC_JBSR r_type 14 CPU_TYPE_POWERPC64 = PPC_RELOC_LO14_SECTDIFF r_type 15 CPU_TYPE_POWERPC64 = PPC_RELOC_LOCAL_SECTDIFF data Relocation = RelocationInfo { ri_address :: Int32 -- ^ offset from start of section to place to be relocated , ri_symbolnum :: Word32 -- ^ index into symbol or section table , ri_pcrel :: Bool -- ^ indicates if the item to be relocated is part of an instruction containing PC-relative addressing , ri_length :: Word32 -- ^ length of item containing address to be relocated (literal form (4) instead of power of two (2)) , ri_extern :: Bool -- ^ indicates whether symbolnum is an index into the symbol table (True) or section table (False) , ri_type :: R_TYPE -- ^ relocation type } | ScatteredRelocationInfo { rs_pcrel :: Bool -- ^ indicates if the item to be relocated is part of an instruction containing PC-relative addressing , rs_length :: Word32 -- ^ length of item containing address to be relocated (literal form (4) instead of power of two (2)) , rs_type :: R_TYPE -- ^ relocation type , rs_address :: Word32 -- ^ offset from start of section to place to be relocated , rs_value :: Int32 -- ^ address of the relocatable expression for the item in the file that needs to be updated if the address is changed } deriving (Show, Eq) getRel mr mh = do r_address <- getWord32 mr r_value <- getWord32 mr if (r_address .&. 0x80000000) /= 0 then do rs_pcrel <- return $ bitfield mr 1 1 r_address == 1 rs_length <- return $ 2 ^ bitfield mr 2 2 r_address rs_type <- return $ flip r_type (mh_cputype mh) $ bitfield mr 4 4 r_address rs_address <- return $ bitfield mr 8 24 r_address rs_value <- return $ fromIntegral r_value return $ ScatteredRelocationInfo rs_pcrel rs_length rs_type rs_address rs_value else do ri_address <- return $ fromIntegral r_address ri_symbolnum <- return $ bitfield mr 0 24 r_value ri_pcrel <- return $ bitfield mr 24 1 r_value == 1 ri_length <- return $ 2 ^ bitfield mr 25 2 r_value ri_extern <- return $ bitfield mr 27 1 r_value == 1 ri_type <- return $ flip r_type (mh_cputype mh) $ bitfield mr 28 4 r_value return $ RelocationInfo ri_address ri_symbolnum ri_pcrel ri_length ri_extern ri_type data MachoDynamicSymbolTable = MachoDynamicSymbolTable { localSyms :: (Word32, Word32) -- ^ symbol table index and count for local symbols , extDefSyms :: (Word32, Word32) -- ^ symbol table index and count for externally defined symbols , undefSyms :: (Word32, Word32) -- ^ symbol table index and count for undefined symbols , tocEntries :: [(Word32, Word32)] -- ^ list of symbol index and module index pairs , modules :: [DylibModule] -- ^ modules , extRefSyms :: [Word32] -- ^ list of external reference symbol indices , indirectSyms :: [Word32] -- ^ list of indirect symbol indices , extRels :: [Relocation] -- ^ external locations , locRels :: [Relocation] -- ^ local relocations } deriving (Show, Eq) getDySymTabCommand mr fl mh = do ilocalsym <- getWord32 mr nlocalsym <- getWord32 mr iextdefsym <- getWord32 mr nextdefsym <- getWord32 mr iundefsym <- getWord32 mr nundefsym <- getWord32 mr tocoff <- liftM fromIntegral $ getWord32 mr ntoc <- liftM fromIntegral $ getWord32 mr toc <- return $ runGet (sequence (replicate ntoc (getTOC mr))) $ L.fromChunks [B.drop tocoff fl] modtaboff <- liftM fromIntegral $ getWord32 mr nmodtab <- liftM fromIntegral $ getWord32 mr modtab <- if is64bit mr then return $ runGet (sequence (replicate nmodtab (getModule64 mr))) $ L.fromChunks [B.drop modtaboff fl] else return $ runGet (sequence (replicate nmodtab (getModule32 mr))) $ L.fromChunks [B.drop modtaboff fl] extrefsymoff <- liftM fromIntegral $ getWord32 mr nextrefsyms <- liftM fromIntegral $ getWord32 mr extrefsyms <- return $ runGet (sequence (replicate nextrefsyms (getWord32 mr))) $ L.fromChunks [B.drop extrefsymoff fl] indirectsymoff <- liftM fromIntegral $ getWord32 mr nindirectsyms <- liftM fromIntegral $ getWord32 mr indirectsyms <- return $ runGet (sequence (replicate nindirectsyms (getWord32 mr))) $ L.fromChunks [B.drop indirectsymoff fl] extreloff <- liftM fromIntegral $ getWord32 mr nextrel <- liftM fromIntegral $ getWord32 mr extrels <- return $ runGet (sequence (replicate nextrel (getRel mr mh))) $ L.fromChunks [B.drop extreloff fl] locreloff <- liftM fromIntegral $ getWord32 mr nlocrel <- liftM fromIntegral $ getWord32 mr locrels <- return $ runGet (sequence (replicate nlocrel (getRel mr mh))) $ L.fromChunks [B.drop locreloff fl] return $ LC_DYSYMTAB $ MachoDynamicSymbolTable { localSyms = (ilocalsym, nlocalsym) , extDefSyms = (iextdefsym, nextdefsym) , undefSyms = (iundefsym, nundefsym) , tocEntries = toc , modules = modtab , extRefSyms = extrefsyms , indirectSyms = indirectsyms , extRels = extrels , locRels = locrels } getTwoLevelHint mr = do word <- getWord32 mr let isub_image = bitfield mr 0 8 word itoc = bitfield mr 8 24 word return (isub_image, itoc) getTwoLevelHintsCommand mr fl = do offset <- liftM fromIntegral $ getWord32 mr nhints <- liftM fromIntegral $ getWord32 mr return $ LC_TWOLEVEL_HINTS $ runGet (sequence (replicate nhints (getTwoLevelHint mr))) $ L.fromChunks [B.drop offset fl] getPrebindCkSumCommand mr = do cksum <- getWord32 mr return $ LC_PREBIND_CKSUM cksum getUUIDCommand mr = do uuid <- sequence $ replicate 8 getWord8 return $ LC_UUID uuid getRPathCommand mr lc = do name_offset <- liftM fromIntegral $ getWord32 mr name <- return $ C.unpack $ nullStringAt name_offset lc return $ LC_RPATH name