From: Kevin Enderby Date: Thu, 6 Nov 2014 19:00:13 +0000 (+0000) Subject: Plumb in the ARM thumb symbolizer in llvm-objdump’s Mach-O disassembler and X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3b80486e3a6ce2fb1ced1e3cda90c90adbfa5cf6;p=llvm Plumb in the ARM thumb symbolizer in llvm-objdump’s Mach-O disassembler and add the code and test cases for 32-bit ARM symbolizer. Also fixed the printing of data in code as it was not using the table correctly and needed to fix one of the test cases too. This will break lld’s test/mach-o/arm-interworking-movw.yaml till the tweak for that is made. Which I’ll be committing immediately after this commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221470 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Object/ARM/macho-data-in-code.test b/test/Object/ARM/macho-data-in-code.test index dca084c2cab..2bfb6c11864 100644 --- a/test/Object/ARM/macho-data-in-code.test +++ b/test/Object/ARM/macho-data-in-code.test @@ -3,5 +3,5 @@ RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data CHECK: 12: 80 bd pop {r7, pc} CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA -CHECK: 16: 00 00 movs r0, r0 +CHECK: 18: 70 47 bx lr diff --git a/test/tools/llvm-objdump/ARM/Inputs/hello.exe.macho-arm b/test/tools/llvm-objdump/ARM/Inputs/hello.exe.macho-arm new file mode 100755 index 00000000000..40d657b35c0 Binary files /dev/null and b/test/tools/llvm-objdump/ARM/Inputs/hello.exe.macho-arm differ diff --git a/test/tools/llvm-objdump/ARM/Inputs/hello.obj.macho-arm b/test/tools/llvm-objdump/ARM/Inputs/hello.obj.macho-arm new file mode 100644 index 00000000000..fb8706b2857 Binary files /dev/null and b/test/tools/llvm-objdump/ARM/Inputs/hello.obj.macho-arm differ diff --git a/test/tools/llvm-objdump/ARM/macho-symbolized-disassembly.test b/test/tools/llvm-objdump/ARM/macho-symbolized-disassembly.test new file mode 100644 index 00000000000..eeeab526cf8 --- /dev/null +++ b/test/tools/llvm-objdump/ARM/macho-symbolized-disassembly.test @@ -0,0 +1,8 @@ +// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-arm | FileCheck %s -check-prefix=OBJ +// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-arm | FileCheck %s -check-prefix=EXE + +OBJ: 00000006 movw r3, :lower16:((54-14)-4) +OBJ: 0000000a movt r3, :upper16:((54-14)-4) +OBJ: 00000024 bl _printf + +EXE: 0000bfa8 blx 0xbffc @ symbol stub for: _printf diff --git a/test/tools/llvm-objdump/ARM/macho-symbolized-subtractor.test b/test/tools/llvm-objdump/ARM/macho-symbolized-subtractor.test new file mode 100644 index 00000000000..65df2a984cd --- /dev/null +++ b/test/tools/llvm-objdump/ARM/macho-symbolized-subtractor.test @@ -0,0 +1,15 @@ +# RUN: llvm-mc < %s -triple armv7-apple-darwin -filetype=obj | llvm-objdump -m -d - | FileCheck %s + .thumb + .thumb_func _t +_t: + movw r3, :lower16:(Str-(PCinst+4)) + movt r3, :upper16:(Str-(PCinst+4)) + .thumb_func PCinst +PCinst: + add r3, pc + + .section __TEXT,__cstring,cstring_literals +Str: + .asciz "Hello world\n" +# CHECK: movw r3, :lower16:((Str-PCinst)-4) +# CHECK: movt r3, :upper16:((Str-PCinst)-4) diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 362a1bafe7c..566a59769a8 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -124,49 +124,80 @@ typedef std::pair DiceTableEntry; typedef std::vector DiceTable; typedef DiceTable::iterator dice_table_iterator; +// This is used to search for a data in code table entry for the PC being +// disassembled. The j parameter has the PC in j.first. A single data in code +// table entry can cover many bytes for each of its Kind's. So if the offset, +// aka the i.first value, of the data in code table entry plus its Length +// covers the PC being searched for this will return true. If not it will +// return false. static bool compareDiceTableEntries(const DiceTableEntry &i, const DiceTableEntry &j) { - return i.first == j.first; + uint16_t Length; + i.second.getLength(Length); + + return j.first >= i.first && j.first < i.first + Length; } -static void DumpDataInCode(const char *bytes, uint64_t Size, - unsigned short Kind) { - uint64_t Value; +static uint64_t DumpDataInCode(const char *bytes, uint64_t Length, + unsigned short Kind) { + uint32_t Value, Size = 1; switch (Kind) { + default: case MachO::DICE_KIND_DATA: - switch (Size) { - case 4: + if (Length >= 4) { + if (!NoShowRawInsn) + DumpBytes(StringRef(bytes, 4)); Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0]; outs() << "\t.long " << Value; - break; - case 2: + Size = 4; + } else if (Length >= 2) { + if (!NoShowRawInsn) + DumpBytes(StringRef(bytes, 2)); Value = bytes[1] << 8 | bytes[0]; outs() << "\t.short " << Value; - break; - case 1: + Size = 2; + } else { + if (!NoShowRawInsn) + DumpBytes(StringRef(bytes, 2)); Value = bytes[0]; outs() << "\t.byte " << Value; - break; + Size = 1; } - outs() << "\t@ KIND_DATA\n"; + if (Kind == MachO::DICE_KIND_DATA) + outs() << "\t@ KIND_DATA\n"; + else + outs() << "\t@ data in code kind = " << Kind << "\n"; break; case MachO::DICE_KIND_JUMP_TABLE8: + if (!NoShowRawInsn) + DumpBytes(StringRef(bytes, 1)); Value = bytes[0]; - outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8"; + outs() << "\t.byte " << format("%3u", Value) << "\t@ KIND_JUMP_TABLE8\n"; + Size = 1; break; case MachO::DICE_KIND_JUMP_TABLE16: + if (!NoShowRawInsn) + DumpBytes(StringRef(bytes, 2)); Value = bytes[1] << 8 | bytes[0]; - outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16"; + outs() << "\t.short " << format("%5u", Value & 0xffff) + << "\t@ KIND_JUMP_TABLE16\n"; + Size = 2; break; case MachO::DICE_KIND_JUMP_TABLE32: + case MachO::DICE_KIND_ABS_JUMP_TABLE32: + if (!NoShowRawInsn) + DumpBytes(StringRef(bytes, 4)); Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0]; - outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32"; - break; - default: - outs() << "\t@ data in code kind = " << Kind << "\n"; + outs() << "\t.long " << Value; + if (Kind == MachO::DICE_KIND_JUMP_TABLE32) + outs() << "\t@ KIND_JUMP_TABLE32\n"; + else + outs() << "\t@ KIND_ABS_JUMP_TABLE32\n"; + Size = 4; break; } + return Size; } static void getSectionsAndSymbols(const MachO::mach_header Header, @@ -326,7 +357,7 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, MachO::any_relocation_info RENext; RENext = info->O->getRelocation(RelNext); if (info->O->isRelocationScattered(RENext)) - pair_r_value = info->O->getPlainRelocationSymbolNum(RENext); + pair_r_value = info->O->getScatteredRelocationValue(RENext); else return 0; } @@ -441,7 +472,157 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, // uint64_t seg_offset = (Pc + Offset); return 0; } else if (Arch == Triple::arm) { - return 0; + if (Offset != 0 || (Size != 4 && Size != 2)) + return 0; + // First search the section's relocation entries (if any) for an entry + // for this section offset. + uint32_t sect_addr = info->S.getAddress(); + uint32_t sect_offset = (Pc + Offset) - sect_addr; + bool reloc_found = false; + DataRefImpl Rel; + MachO::any_relocation_info RE; + bool isExtern = false; + SymbolRef Symbol; + bool r_scattered = false; + uint32_t r_value, pair_r_value, r_type, r_length, other_half; + for (const RelocationRef &Reloc : info->S.relocations()) { + uint64_t RelocOffset; + Reloc.getOffset(RelocOffset); + if (RelocOffset == sect_offset) { + Rel = Reloc.getRawDataRefImpl(); + RE = info->O->getRelocation(Rel); + r_length = info->O->getAnyRelocationLength(RE); + r_scattered = info->O->isRelocationScattered(RE); + if (r_scattered) { + r_value = info->O->getScatteredRelocationValue(RE); + r_type = info->O->getScatteredRelocationType(RE); + } else { + r_type = info->O->getAnyRelocationType(RE); + isExtern = info->O->getPlainRelocationExternal(RE); + if (isExtern) { + symbol_iterator RelocSym = Reloc.getSymbol(); + Symbol = *RelocSym; + } + } + if (r_type == MachO::ARM_RELOC_HALF || + r_type == MachO::ARM_RELOC_SECTDIFF || + r_type == MachO::ARM_RELOC_LOCAL_SECTDIFF || + r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { + DataRefImpl RelNext = Rel; + info->O->moveRelocationNext(RelNext); + MachO::any_relocation_info RENext; + RENext = info->O->getRelocation(RelNext); + other_half = info->O->getAnyRelocationAddress(RENext) & 0xffff; + if (info->O->isRelocationScattered(RENext)) + pair_r_value = info->O->getScatteredRelocationValue(RENext); + } + reloc_found = true; + break; + } + } + if (reloc_found && isExtern) { + StringRef SymName; + Symbol.getName(SymName); + const char *name = SymName.data(); + op_info->AddSymbol.Present = 1; + op_info->AddSymbol.Name = name; + if (value != 0) { + switch (r_type) { + case MachO::ARM_RELOC_HALF: + if ((r_length & 0x1) == 1) { + op_info->Value = value << 16 | other_half; + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + } else { + op_info->Value = other_half << 16 | value; + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + } + break; + default: + break; + } + } else { + switch (r_type) { + case MachO::ARM_RELOC_HALF: + if ((r_length & 0x1) == 1) { + op_info->Value = value << 16 | other_half; + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + } else { + op_info->Value = other_half << 16 | value; + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + } + break; + default: + break; + } + } + return 1; + } + // If we have a branch that is not an external relocation entry then + // return 0 so the code in tryAddingSymbolicOperand() can use the + // SymbolLookUp call back with the branch target address to look up the + // symbol and possiblity add an annotation for a symbol stub. + if (reloc_found && isExtern == 0 && (r_type == MachO::ARM_RELOC_BR24 || + r_type == MachO::ARM_THUMB_RELOC_BR22)) + return 0; + + uint32_t offset = 0; + if (reloc_found) { + if (r_type == MachO::ARM_RELOC_HALF || + r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { + if ((r_length & 0x1) == 1) + value = value << 16 | other_half; + else + value = other_half << 16 | value; + } + if (r_scattered && (r_type != MachO::ARM_RELOC_HALF && + r_type != MachO::ARM_RELOC_HALF_SECTDIFF)) { + offset = value - r_value; + value = r_value; + } + } + + if (reloc_found && r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { + if ((r_length & 0x1) == 1) + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + else + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + const char *add = GuessSymbolName(r_value, info); + const char *sub = GuessSymbolName(pair_r_value, info); + int32_t offset = value - (r_value - pair_r_value); + op_info->AddSymbol.Present = 1; + if (add != nullptr) + op_info->AddSymbol.Name = add; + else + op_info->AddSymbol.Value = r_value; + op_info->SubtractSymbol.Present = 1; + if (sub != nullptr) + op_info->SubtractSymbol.Name = sub; + else + op_info->SubtractSymbol.Value = pair_r_value; + op_info->Value = offset; + return 1; + } + + if (reloc_found == false) + return 0; + + op_info->AddSymbol.Present = 1; + op_info->Value = offset; + if (reloc_found) { + if (r_type == MachO::ARM_RELOC_HALF) { + if ((r_length & 0x1) == 1) + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + else + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + } + } + const char *add = GuessSymbolName(value, info); + if (add != nullptr) { + op_info->AddSymbol.Name = add; + return 1; + } + op_info->AddSymbol.Value = value; + return 1; } else if (Arch == Triple::aarch64) { return 0; } else { @@ -1342,9 +1523,12 @@ static void DisassembleInputMachO2(StringRef Filename, std::unique_ptr ThumbMRI; std::unique_ptr ThumbAsmInfo; std::unique_ptr ThumbSTI; - std::unique_ptr ThumbDisAsm; + std::unique_ptr ThumbDisAsm; std::unique_ptr ThumbIP; std::unique_ptr ThumbCtx; + std::unique_ptr ThumbSymbolizer; + struct DisassembleInfo ThumbSymbolizerInfo; + std::unique_ptr ThumbRelInfo; if (ThumbTarget) { ThumbMRI.reset(ThumbTarget->createMCRegInfo(ThumbTripleName)); ThumbAsmInfo.reset( @@ -1353,7 +1537,15 @@ static void DisassembleInputMachO2(StringRef Filename, ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MCPU, FeaturesStr)); ThumbCtx.reset(new MCContext(ThumbAsmInfo.get(), ThumbMRI.get(), nullptr)); ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx)); - // TODO: add MCSymbolizer here for the ThumbTarget like above for TheTarget. + MCContext *PtrThumbCtx = ThumbCtx.get(); + ThumbRelInfo.reset( + ThumbTarget->createMCRelocationInfo(ThumbTripleName, *PtrThumbCtx)); + if (ThumbRelInfo) { + ThumbSymbolizer.reset(ThumbTarget->createMCSymbolizer( + ThumbTripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp, + &ThumbSymbolizerInfo, PtrThumbCtx, ThumbRelInfo.release())); + ThumbDisAsm->setSymbolizer(std::move(ThumbSymbolizer)); + } int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect(); ThumbIP.reset(ThumbTarget->createMCInstPrinter( ThumbAsmPrinterVariant, *ThumbAsmInfo, *ThumbInstrInfo, *ThumbMRI, @@ -1495,6 +1687,17 @@ static void DisassembleInputMachO2(StringRef Filename, SymbolizerInfo.method = nullptr; SymbolizerInfo.demangled_name = nullptr; SymbolizerInfo.bindtable = nullptr; + // Same for the ThumbSymbolizer + ThumbSymbolizerInfo.verbose = true; + ThumbSymbolizerInfo.O = MachOOF; + ThumbSymbolizerInfo.S = Sections[SectIdx]; + ThumbSymbolizerInfo.AddrMap = &AddrMap; + ThumbSymbolizerInfo.Sections = &Sections; + ThumbSymbolizerInfo.class_name = nullptr; + ThumbSymbolizerInfo.selector_name = nullptr; + ThumbSymbolizerInfo.method = nullptr; + ThumbSymbolizerInfo.demangled_name = nullptr; + ThumbSymbolizerInfo.bindtable = nullptr; // Disassemble symbol by symbol. for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { @@ -1575,10 +1778,12 @@ static void DisassembleInputMachO2(StringRef Filename, if (DTI != Dices.end()) { uint16_t Length; DTI->second.getLength(Length); - DumpBytes(StringRef(Bytes.data() + Index, Length)); uint16_t Kind; DTI->second.getKind(Kind); - DumpDataInCode(Bytes.data() + Index, Length, Kind); + Size = DumpDataInCode(Bytes.data() + Index, Length, Kind); + if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) && + (PC == (DTI->first + Length - 1)) && (Length & 1)) + Size++; continue; } @@ -1674,6 +1879,12 @@ static void DisassembleInputMachO2(StringRef Filename, free(SymbolizerInfo.demangled_name); if (SymbolizerInfo.bindtable != nullptr) delete SymbolizerInfo.bindtable; + if (ThumbSymbolizerInfo.method != nullptr) + free(ThumbSymbolizerInfo.method); + if (ThumbSymbolizerInfo.demangled_name != nullptr) + free(ThumbSymbolizerInfo.demangled_name); + if (ThumbSymbolizerInfo.bindtable != nullptr) + delete ThumbSymbolizerInfo.bindtable; } }