From 49db1b923504c8d6b97f38d1d06e60e4866ec5ff Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Sat, 2 Feb 2019 00:38:07 +0000 Subject: [PATCH] [llvm-objcopy] Add ability to copy MachO object files This diff implements first bits for copying (without modification) MachO object files. Test plan: make check-all Differential revision: https://reviews.llvm.org/D54674 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352944 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm-objcopy/MachO/Inputs/macho.32.s | 54 +++ .../llvm-objcopy/MachO/Inputs/macho.64.s | 47 +++ .../MachO/basic-big-endian-32-copy.test | 112 ++++++ .../MachO/basic-big-endian-64-copy.test | 124 ++++++ .../MachO/basic-little-endian-32-copy.test | 112 ++++++ .../MachO/basic-little-endian-64-copy.test | 124 ++++++ .../MachO/real-world-input-copy.test | 7 + tools/llvm-objcopy/CMakeLists.txt | 3 + tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 32 ++ tools/llvm-objcopy/MachO/MachOObjcopy.h | 32 ++ tools/llvm-objcopy/MachO/MachOReader.cpp | 225 +++++++++++ tools/llvm-objcopy/MachO/MachOReader.h | 49 +++ tools/llvm-objcopy/MachO/MachOWriter.cpp | 353 ++++++++++++++++++ tools/llvm-objcopy/MachO/MachOWriter.h | 55 +++ tools/llvm-objcopy/MachO/Object.h | 209 +++++++++++ tools/llvm-objcopy/llvm-objcopy.cpp | 4 + 16 files changed, 1542 insertions(+) create mode 100644 test/tools/llvm-objcopy/MachO/Inputs/macho.32.s create mode 100644 test/tools/llvm-objcopy/MachO/Inputs/macho.64.s create mode 100644 test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test create mode 100644 test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test create mode 100644 test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test create mode 100644 test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test create mode 100644 test/tools/llvm-objcopy/MachO/real-world-input-copy.test create mode 100644 tools/llvm-objcopy/MachO/MachOObjcopy.cpp create mode 100644 tools/llvm-objcopy/MachO/MachOObjcopy.h create mode 100644 tools/llvm-objcopy/MachO/MachOReader.cpp create mode 100644 tools/llvm-objcopy/MachO/MachOReader.h create mode 100644 tools/llvm-objcopy/MachO/MachOWriter.cpp create mode 100644 tools/llvm-objcopy/MachO/MachOWriter.h create mode 100644 tools/llvm-objcopy/MachO/Object.h diff --git a/test/tools/llvm-objcopy/MachO/Inputs/macho.32.s b/test/tools/llvm-objcopy/MachO/Inputs/macho.32.s new file mode 100644 index 00000000000..66bcda8a938 --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/Inputs/macho.32.s @@ -0,0 +1,54 @@ + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 + .globl __Z1fi ## -- Begin function _Z1fi + .p2align 4, 0x90 +__Z1fi: ## @_Z1fi + .cfi_startproc +## %bb.0: + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset %ebp, -8 + movl %esp, %ebp + .cfi_def_cfa_register %ebp + pushl %eax + calll L0$pb +L0$pb: + popl %eax + movl 8(%ebp), %ecx + movl _x-L0$pb(%eax), %eax + addl 8(%ebp), %eax + movl %ecx, -4(%ebp) ## 4-byte Spill + addl $4, %esp + popl %ebp + retl + .cfi_endproc + ## -- End function + .globl _main ## -- Begin function main + .p2align 4, 0x90 +_main: ## @main + .cfi_startproc +## %bb.0: + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset %ebp, -8 + movl %esp, %ebp + .cfi_def_cfa_register %ebp + subl $24, %esp + movl $2, %eax + movl $0, -4(%ebp) + movl $2, (%esp) + movl %eax, -8(%ebp) ## 4-byte Spill + calll __Z1fi + addl $24, %esp + popl %ebp + retl + .cfi_endproc + ## -- End function + .section __DATA,__data + .globl _x ## @x + .p2align 2 +_x: + .long 1 ## 0x1 + + +.subsections_via_symbols diff --git a/test/tools/llvm-objcopy/MachO/Inputs/macho.64.s b/test/tools/llvm-objcopy/MachO/Inputs/macho.64.s new file mode 100644 index 00000000000..9d7e964972c --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/Inputs/macho.64.s @@ -0,0 +1,47 @@ + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 + .globl __Z1fi ## -- Begin function _Z1fi + .p2align 4, 0x90 +__Z1fi: ## @_Z1fi + .cfi_startproc +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) + movl _x(%rip), %edi + addl -4(%rbp), %edi + movl %edi, %eax + popq %rbp + retq + .cfi_endproc + ## -- End function + .globl _main ## -- Begin function main + .p2align 4, 0x90 +_main: ## @main + .cfi_startproc +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $2, %edi + movl $0, -4(%rbp) + callq __Z1fi + addq $16, %rsp + popq %rbp + retq + .cfi_endproc + ## -- End function + .section __DATA,__data + .globl _x ## @x + .p2align 2 +_x: + .long 1 ## 0x1 + + +.subsections_via_symbols diff --git a/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test b/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test new file mode 100644 index 00000000000..692fb50f058 --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test @@ -0,0 +1,112 @@ +# RUN: yaml2obj %s > %t +# RUN: llvm-objcopy %t %t2 +# RUN: cmp %t %t2 + +--- !mach-o +IsLittleEndian: false +FileHeader: + magic: 0xFEEDFACE + cputype: 0x00000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 380 + flags: 0x00002000 +LoadCommands: + - cmd: LC_SEGMENT + cmdsize: 260 + segname: '' + vmaddr: 0 + vmsize: 184 + fileoff: 408 + filesize: 184 + maxprot: 7 + initprot: 7 + nsects: 3 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 61 + offset: 0x00000198 + align: 4 + reloff: 0x00000250 + nreloc: 1 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __compact_unwind + segname: __LD + addr: 0x0000000000000040 + size: 40 + offset: 0x000001D8 + align: 2 + reloff: 0x00000258 + nreloc: 2 + flags: 0x02000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __eh_frame + segname: __TEXT + addr: 0x0000000000000068 + size: 80 + offset: 0x00000200 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x6800000B + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 658688 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 616 + nsyms: 2 + stroff: 640 + strsize: 16 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 2 + iundefsym: 2 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 8 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 32 + StringTable: + - '' + - __Z1fv + - _main + - '' + - '' +... diff --git a/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test b/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test new file mode 100644 index 00000000000..5294f4e5be1 --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test @@ -0,0 +1,124 @@ +# RUN: yaml2obj %s > %t +# RUN: llvm-objcopy %t %t2 +# RUN: cmp %t %t2 + +--- !mach-o +IsLittleEndian: false +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 512 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 392 + segname: '' + vmaddr: 0 + vmsize: 240 + fileoff: 544 + filesize: 240 + maxprot: 7 + initprot: 7 + nsects: 4 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 93 + offset: 0x00000220 + align: 4 + reloff: 0x00000310 + nreloc: 3 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __cstring + segname: __TEXT + addr: 0x000000000000005D + size: 48 + offset: 0x0000027D + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000002 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __compact_unwind + segname: __LD + addr: 0x0000000000000090 + size: 32 + offset: 0x000002B0 + align: 3 + reloff: 0x00000328 + nreloc: 1 + flags: 0x02000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __eh_frame + segname: __TEXT + addr: 0x00000000000000B0 + size: 64 + offset: 0x000002D0 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x6800000B + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 658432 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 816 + nsyms: 2 + stroff: 848 + strsize: 36 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 24 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 1 + n_type: 0x01 + n_sect: 0 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _compilerrt_abort_impl + - ___absvdi2 + - '' +... diff --git a/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test b/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test new file mode 100644 index 00000000000..c43aed7004b --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test @@ -0,0 +1,112 @@ +# RUN: yaml2obj %s > %t +# RUN: llvm-objcopy %t %t2 +# RUN: cmp %t %t2 + +--- !mach-o +IsLittleEndian: true +FileHeader: + magic: 0xFEEDFACE + cputype: 0x00000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 380 + flags: 0x00002000 +LoadCommands: + - cmd: LC_SEGMENT + cmdsize: 260 + segname: '' + vmaddr: 0 + vmsize: 184 + fileoff: 408 + filesize: 184 + maxprot: 7 + initprot: 7 + nsects: 3 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 61 + offset: 0x00000198 + align: 4 + reloff: 0x00000250 + nreloc: 1 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __compact_unwind + segname: __LD + addr: 0x0000000000000040 + size: 40 + offset: 0x000001D8 + align: 2 + reloff: 0x00000258 + nreloc: 2 + flags: 0x02000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __eh_frame + segname: __TEXT + addr: 0x0000000000000068 + size: 80 + offset: 0x00000200 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x6800000B + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 658688 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 616 + nsyms: 2 + stroff: 640 + strsize: 16 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 2 + iundefsym: 2 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 8 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 32 + StringTable: + - '' + - __Z1fv + - _main + - '' + - '' +... diff --git a/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test b/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test new file mode 100644 index 00000000000..b395020b4bc --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test @@ -0,0 +1,124 @@ +# RUN: yaml2obj %s > %t +# RUN: llvm-objcopy %t %t2 +# RUN: cmp %t %t2 + +--- !mach-o +IsLittleEndian: true +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 512 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 392 + segname: '' + vmaddr: 0 + vmsize: 240 + fileoff: 544 + filesize: 240 + maxprot: 7 + initprot: 7 + nsects: 4 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 93 + offset: 0x00000220 + align: 4 + reloff: 0x00000310 + nreloc: 3 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __cstring + segname: __TEXT + addr: 0x000000000000005D + size: 48 + offset: 0x0000027D + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000002 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __compact_unwind + segname: __LD + addr: 0x0000000000000090 + size: 32 + offset: 0x000002B0 + align: 3 + reloff: 0x00000328 + nreloc: 1 + flags: 0x02000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __eh_frame + segname: __TEXT + addr: 0x00000000000000B0 + size: 64 + offset: 0x000002D0 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x6800000B + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 658432 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 816 + nsyms: 2 + stroff: 848 + strsize: 36 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 24 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 1 + n_type: 0x01 + n_sect: 0 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _compilerrt_abort_impl + - ___absvdi2 + - '' +... diff --git a/test/tools/llvm-objcopy/MachO/real-world-input-copy.test b/test/tools/llvm-objcopy/MachO/real-world-input-copy.test new file mode 100644 index 00000000000..4497d7d8c2c --- /dev/null +++ b/test/tools/llvm-objcopy/MachO/real-world-input-copy.test @@ -0,0 +1,7 @@ +# RUN: llvm-mc -assemble -triple i386-apple-macosx10.14.0 -filetype=obj %p/Inputs/macho.32.s -o %t.32.o +# RUN: llvm-objcopy %t.32.o %t.32.copy.o +# RUN: cmp %t.32.o %t.32.copy.o + +# RUN: llvm-mc -assemble -triple x86_64-apple-macosx10.14.0 -filetype=obj %p/Inputs/macho.64.s -o %t.64.o +# RUN: llvm-objcopy %t.64.o %t.64.copy.o +# RUN: cmp %t.64.o %t.64.copy.o diff --git a/tools/llvm-objcopy/CMakeLists.txt b/tools/llvm-objcopy/CMakeLists.txt index 1beb7374cf8..8406786e9e4 100644 --- a/tools/llvm-objcopy/CMakeLists.txt +++ b/tools/llvm-objcopy/CMakeLists.txt @@ -23,6 +23,9 @@ add_llvm_tool(llvm-objcopy COFF/Writer.cpp ELF/ELFObjcopy.cpp ELF/Object.cpp + MachO/MachOObjcopy.cpp + MachO/MachOReader.cpp + MachO/MachOWriter.cpp DEPENDS ObjcopyOptsTableGen StripOptsTableGen diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp new file mode 100644 index 00000000000..1f2d1180e46 --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -0,0 +1,32 @@ +//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MachOObjcopy.h" +#include "../CopyConfig.h" +#include "../llvm-objcopy.h" +#include "MachOReader.h" +#include "MachOWriter.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +Error executeObjcopyOnBinary(const CopyConfig &Config, + object::MachOObjectFile &In, Buffer &Out) { + MachOReader Reader(In); + std::unique_ptr O = Reader.create(); + assert(O && "Unable to deserialize MachO object"); + MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out); + return Writer.write(); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.h b/tools/llvm-objcopy/MachO/MachOObjcopy.h new file mode 100644 index 00000000000..9e7a664a33a --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOObjcopy.h @@ -0,0 +1,32 @@ +//===- MachOObjcopy.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H +#define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H + +namespace llvm { +class Error; + +namespace object { +class MachOObjectFile; +class MachOUniversalBinary; +} // end namespace object + +namespace objcopy { +struct CopyConfig; +class Buffer; + +namespace macho { +Error executeObjcopyOnBinary(const CopyConfig &Config, + object::MachOObjectFile &In, Buffer &Out); +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H diff --git a/tools/llvm-objcopy/MachO/MachOReader.cpp b/tools/llvm-objcopy/MachO/MachOReader.cpp new file mode 100644 index 00000000000..f9b3749ec46 --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -0,0 +1,225 @@ +//===- MachOReader.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MachOReader.h" +#include "../llvm-objcopy.h" +#include "Object.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include + +namespace llvm { +namespace objcopy { +namespace macho { + +void MachOReader::readHeader(Object &O) const { + O.Header.Magic = MachOObj.getHeader().magic; + O.Header.CPUType = MachOObj.getHeader().cputype; + O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; + O.Header.FileType = MachOObj.getHeader().filetype; + O.Header.NCmds = MachOObj.getHeader().ncmds; + O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; + O.Header.Flags = MachOObj.getHeader().flags; +} + +template +Section constructSectionCommon(SectionType Sec) { + Section S; + memcpy(S.Sectname, Sec.sectname, sizeof(Sec.sectname)); + memcpy(S.Segname, Sec.segname, sizeof(Sec.segname)); + S.Addr = Sec.addr; + S.Size = Sec.size; + S.Offset = Sec.offset; + S.Align = Sec.align; + S.RelOff = Sec.reloff; + S.NReloc = Sec.nreloc; + S.Flags = Sec.flags; + S.Reserved1 = Sec.reserved1; + S.Reserved2 = Sec.reserved2; + S.Reserved3 = 0; + return S; +} + +template Section constructSection(SectionType Sec); + +template <> Section constructSection(MachO::section Sec) { + return constructSectionCommon(Sec); +} + +template <> Section constructSection(MachO::section_64 Sec) { + Section S = constructSectionCommon(Sec); + S.Reserved3 = Sec.reserved3; + return S; +} + +// TODO: get rid of reportError and make MachOReader return Expected<> instead. +template +std::vector
+extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, + const object::MachOObjectFile &MachOObj, + size_t &NextSectionIndex) { + auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; + const SectionType *Curr = + reinterpret_cast(LoadCmd.Ptr + sizeof(SegmentType)); + std::vector
Sections; + for (; reinterpret_cast(Curr) < End; Curr++) { + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + SectionType Sec; + memcpy((void *)&Sec, Curr, sizeof(SectionType)); + MachO::swapStruct(Sec); + Sections.push_back(constructSection(Sec)); + } else { + Sections.push_back(constructSection(*Curr)); + } + + Section &S = Sections.back(); + + StringRef SectName(S.Sectname); + Expected SecRef = + MachOObj.getSection(NextSectionIndex++); + if (!SecRef) + reportError(MachOObj.getFileName(), SecRef.takeError()); + + StringRef Content; + if (auto EC = + MachOObj.getSectionContents(SecRef->getRawDataRefImpl(), Content)) + reportError(MachOObj.getFileName(), std::move(EC)); + S.Content = Content; + + S.Relocations.reserve(S.NReloc); + for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), + RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); + RI != RE; ++RI) + S.Relocations.push_back(MachOObj.getRelocation(RI->getRawDataRefImpl())); + assert(S.NReloc == S.Relocations.size() && + "Incorrect number of relocations"); + } + return Sections; +} + +void MachOReader::readLoadCommands(Object &O) const { + // For MachO sections indices start from 1. + size_t NextSectionIndex = 1; + for (auto LoadCmd : MachOObj.load_commands()) { + LoadCommand LC; + switch (LoadCmd.C.cmd) { + case MachO::LC_SEGMENT: + LC.Sections = extractSections( + LoadCmd, MachOObj, NextSectionIndex); + break; + case MachO::LC_SEGMENT_64: + LC.Sections = + extractSections( + LoadCmd, MachOObj, NextSectionIndex); + break; + case MachO::LC_SYMTAB: + O.SymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + O.DyLdInfoCommandIndex = O.LoadCommands.size(); + break; + } +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ + sizeof(MachO::LCStruct)); \ + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ + MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ + LC.Payload = ArrayRef( \ + reinterpret_cast(const_cast(LoadCmd.Ptr)) + \ + sizeof(MachO::LCStruct), \ + LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ + break; + + switch (LoadCmd.C.cmd) { + default: + memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, + sizeof(MachO::load_command)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(LC.MachOLoadCommand.load_command_data); + LC.Payload = ArrayRef( + reinterpret_cast(const_cast(LoadCmd.Ptr)) + + sizeof(MachO::load_command), + LoadCmd.C.cmdsize - sizeof(MachO::load_command)); + break; +#include "llvm/BinaryFormat/MachO.def" + } + O.LoadCommands.push_back(std::move(LC)); + } +} + +template NListEntry constructNameList(const nlist_t &nlist) { + NListEntry NL; + NL.n_strx = nlist.n_strx; + NL.n_type = nlist.n_type; + NL.n_sect = nlist.n_sect; + NL.n_desc = nlist.n_desc; + NL.n_value = nlist.n_value; + return NL; +} + +void MachOReader::readSymbolTable(Object &O) const { + for (auto Symbol : MachOObj.symbols()) { + NListEntry NLE = + MachOObj.is64Bit() + ? constructNameList( + MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) + : constructNameList( + MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())); + O.SymTable.NameList.push_back(NLE); + } +}; + +void MachOReader::readStringTable(Object &O) const { + StringRef Data = MachOObj.getStringTableData(); + SmallVector Strs; + Data.split(Strs, '\0'); + O.StrTable.Strings.reserve(Strs.size()); + for (auto S : Strs) + O.StrTable.Strings.push_back(S.str()); +}; + +void MachOReader::readRebaseInfo(Object &O) const { + O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); +} + +void MachOReader::readBindInfo(Object &O) const { + O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); +} + +void MachOReader::readWeakBindInfo(Object &O) const { + O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); +} + +void MachOReader::readLazyBindInfo(Object &O) const { + O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); +} + +void MachOReader::readExportInfo(Object &O) const { + O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); +} + +std::unique_ptr MachOReader::create() const { + auto Obj = llvm::make_unique(); + readHeader(*Obj); + readLoadCommands(*Obj); + readSymbolTable(*Obj); + readStringTable(*Obj); + readRebaseInfo(*Obj); + readBindInfo(*Obj); + readWeakBindInfo(*Obj); + readLazyBindInfo(*Obj); + readExportInfo(*Obj); + return Obj; +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/MachOReader.h b/tools/llvm-objcopy/MachO/MachOReader.h new file mode 100644 index 00000000000..e38afbd6b71 --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOReader.h @@ -0,0 +1,49 @@ +//===- MachOReader.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MachOObjcopy.h" +#include "Object.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include + +namespace llvm { +namespace objcopy { +namespace macho { + +// The hierarchy of readers is responsible for parsing different inputs: +// raw binaries and regular MachO object files. +class Reader { +public: + virtual ~Reader(){}; + virtual std::unique_ptr create() const = 0; +}; + +class MachOReader : public Reader { + const object::MachOObjectFile &MachOObj; + + void readHeader(Object &O) const; + void readLoadCommands(Object &O) const; + void readSymbolTable(Object &O) const; + void readStringTable(Object &O) const; + void readRebaseInfo(Object &O) const; + void readBindInfo(Object &O) const; + void readWeakBindInfo(Object &O) const; + void readLazyBindInfo(Object &O) const; + void readExportInfo(Object &O) const; + +public: + explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} + + std::unique_ptr create() const override; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/MachOWriter.cpp b/tools/llvm-objcopy/MachO/MachOWriter.cpp new file mode 100644 index 00000000000..10dce52566f --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -0,0 +1,353 @@ +//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "../llvm-objcopy.h" +#include "Object.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include + +namespace llvm { +namespace objcopy { +namespace macho { + +size_t MachOWriter::headerSize() const { + return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); +} + +size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } + +size_t MachOWriter::symTableSize() const { + return O.SymTable.NameList.size() * + (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); +} + +size_t MachOWriter::strTableSize() const { + size_t S = 0; + for (const auto &Str : O.StrTable.Strings) + S += Str.size(); + S += (O.StrTable.Strings.empty() ? 0 : O.StrTable.Strings.size() - 1); + return S; +} + +size_t MachOWriter::totalSize() const { + // Going from tail to head and looking for an appropriate "anchor" to + // calculate the total size assuming that all the offsets are either valid + // ("true") or 0 (0 indicates that the corresponding part is missing). + + SmallVector Ends; + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) { + assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) && + "Incorrect number of symbols"); + Ends.push_back(SymTabCommand.symoff + symTableSize()); + } + if (SymTabCommand.stroff) { + assert((SymTabCommand.strsize == strTableSize()) && + "Incorrect string table size"); + Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); + } + } + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) { + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); + } + if (DyLdInfoCommand.bind_off) { + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); + } + if (DyLdInfoCommand.weak_bind_off) { + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + Ends.push_back(DyLdInfoCommand.weak_bind_off + + DyLdInfoCommand.weak_bind_size); + } + if (DyLdInfoCommand.lazy_bind_off) { + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + Ends.push_back(DyLdInfoCommand.lazy_bind_off + + DyLdInfoCommand.lazy_bind_size); + } + if (DyLdInfoCommand.export_off) { + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect trie size"); + Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); + } + } + + // Otherwise, use the last section / reloction. + for (const auto &LC : O.LoadCommands) + for (const auto &S : LC.Sections) { + Ends.push_back(S.Offset + S.Size); + if (S.RelOff) + Ends.push_back(S.RelOff + + S.NReloc * sizeof(MachO::any_relocation_info)); + } + + if (!Ends.empty()) + return *std::max_element(Ends.begin(), Ends.end()); + + // Otherwise, we have only Mach header and load commands. + return headerSize() + loadCommandsSize(); +} + +void MachOWriter::writeHeader() { + MachO::mach_header_64 Header; + + Header.magic = O.Header.Magic; + Header.cputype = O.Header.CPUType; + Header.cpusubtype = O.Header.CPUSubType; + Header.filetype = O.Header.FileType; + Header.ncmds = O.Header.NCmds; + Header.sizeofcmds = O.Header.SizeOfCmds; + Header.flags = O.Header.Flags; + Header.reserved = O.Header.Reserved; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Header); + + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + memcpy(B.getBufferStart(), &Header, HeaderSize); +} + +void MachOWriter::writeLoadCommands() { + uint8_t *Begin = B.getBufferStart() + headerSize(); + MachO::macho_load_command MLC; + for (const auto &LC : O.LoadCommands) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ + LC.MachOLoadCommand.load_command_data.cmdsize); \ + MLC = LC.MachOLoadCommand; \ + if (IsLittleEndian != sys::IsLittleEndianHost) \ + MachO::swapStruct(MLC.LCStruct##_data); \ + memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ + Begin += sizeof(MachO::LCStruct); \ + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ + Begin += LC.Payload.size(); \ + break; + + switch (LC.MachOLoadCommand.load_command_data.cmd) { + default: + assert(sizeof(MachO::load_command) + LC.Payload.size() == + LC.MachOLoadCommand.load_command_data.cmdsize); + MLC = LC.MachOLoadCommand; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.load_command_data); + memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); + Begin += sizeof(MachO::load_command); + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); + Begin += LC.Payload.size(); + break; +#include "llvm/BinaryFormat/MachO.def" + } + } +} + +void MachOWriter::writeSections() { + for (const auto &LC : O.LoadCommands) + for (const auto &Sec : LC.Sections) { + assert(Sec.Offset && "Section offset can not be zero"); + assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); + memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), + Sec.Content.size()); + for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) { + MachO::any_relocation_info R = Sec.Relocations[Index]; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(R); + memcpy(B.getBufferStart() + Sec.RelOff + + Index * sizeof(MachO::any_relocation_info), + &R, sizeof(R)); + } + } +} + +template +void writeNListEntry(const NListEntry &NLE, bool IsLittleEndian, char *&Out) { + NListType ListEntry; + ListEntry.n_strx = NLE.n_strx; + ListEntry.n_type = NLE.n_type; + ListEntry.n_sect = NLE.n_sect; + ListEntry.n_desc = NLE.n_desc; + ListEntry.n_value = NLE.n_value; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(ListEntry); + memcpy(Out, reinterpret_cast(&ListEntry), sizeof(NListType)); + Out += sizeof(NListType); +} + +void MachOWriter::writeSymbolTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) && + "Incorrect number of symbols"); + char *Out = (char *)B.getBufferStart() + SymTabCommand.symoff; + for (auto NLE : O.SymTable.NameList) { + if (Is64Bit) + writeNListEntry(NLE, IsLittleEndian, Out); + else + writeNListEntry(NLE, IsLittleEndian, Out); + } +} + +void MachOWriter::writeStringTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + char *Out = (char *)B.getBufferStart() + SymTabCommand.stroff; + assert((SymTabCommand.strsize == strTableSize()) && + "Incorrect string table size"); + for (size_t Index = 0; Index < O.StrTable.Strings.size(); ++Index) { + memcpy(Out, O.StrTable.Strings[Index].data(), + O.StrTable.Strings[Index].size()); + Out += O.StrTable.Strings[Index].size(); + if (Index + 1 != O.StrTable.Strings.size()) { + memcpy(Out, "\0", 1); + Out += 1; + } + } +} + +void MachOWriter::writeRebaseInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off; + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); +} + +void MachOWriter::writeBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off; + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); +} + +void MachOWriter::writeWeakBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off; + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); +} + +void MachOWriter::writeLazyBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off; + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); +} + +void MachOWriter::writeExportInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off; + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect export trie size"); + memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); +} + +void MachOWriter::writeTail() { + typedef void (MachOWriter::*WriteHandlerType)(void); + typedef std::pair WriteOperation; + SmallVector Queue; + + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); + if (SymTabCommand.stroff) + Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable}); + } + + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) + Queue.push_back( + {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); + if (DyLdInfoCommand.bind_off) + Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); + if (DyLdInfoCommand.weak_bind_off) + Queue.push_back( + {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); + if (DyLdInfoCommand.lazy_bind_off) + Queue.push_back( + {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); + if (DyLdInfoCommand.export_off) + Queue.push_back( + {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); + } + + llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { + return LHS.first < RHS.first; + }); + + for (auto WriteOp : Queue) + (this->*WriteOp.second)(); +} + +Error MachOWriter::write() { + if (Error E = B.allocate(totalSize())) + return E; + memset(B.getBufferStart(), 0, totalSize()); + writeHeader(); + writeLoadCommands(); + writeSections(); + writeTail(); + if (auto E = B.commit()) + return E; + return Error::success(); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/MachOWriter.h b/tools/llvm-objcopy/MachO/MachOWriter.h new file mode 100644 index 00000000000..dedff114256 --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOWriter.h @@ -0,0 +1,55 @@ +//===- MachOWriter.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../Buffer.h" +#include "MachOObjcopy.h" +#include "Object.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" + +namespace llvm { +class Error; + +namespace objcopy { +namespace macho { + +class MachOWriter { + Object &O; + bool Is64Bit; + bool IsLittleEndian; + Buffer &B; + + size_t headerSize() const; + size_t loadCommandsSize() const; + size_t symTableSize() const; + size_t strTableSize() const; + + void writeHeader(); + void writeLoadCommands(); + void writeSections(); + void writeSymbolTable(); + void writeStringTable(); + void writeRebaseInfo(); + void writeBindInfo(); + void writeWeakBindInfo(); + void writeLazyBindInfo(); + void writeExportInfo(); + void writeTail(); + +public: + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {} + + size_t totalSize() const; + Error write(); +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/Object.h b/tools/llvm-objcopy/MachO/Object.h new file mode 100644 index 00000000000..d24306497b7 --- /dev/null +++ b/tools/llvm-objcopy/MachO/Object.h @@ -0,0 +1,209 @@ +//===- Object.h - Mach-O object file model ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_OBJECT_H +#define LLVM_OBJCOPY_MACHO_OBJECT_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjectYAML/DWARFYAML.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +namespace llvm { +namespace objcopy { +namespace macho { + +struct MachHeader { + uint32_t Magic; + uint32_t CPUType; + uint32_t CPUSubType; + uint32_t FileType; + uint32_t NCmds; + uint32_t SizeOfCmds; + uint32_t Flags; + uint32_t Reserved = 0; +}; + +struct Section { + char Sectname[16]; + char Segname[16]; + uint64_t Addr; + uint64_t Size; + uint32_t Offset; + uint32_t Align; + uint32_t RelOff; + uint32_t NReloc; + uint32_t Flags; + uint32_t Reserved1; + uint32_t Reserved2; + uint32_t Reserved3; + + StringRef Content; + std::vector Relocations; +}; + +struct LoadCommand { + // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h + // and it is a union of all the structs corresponding to various load + // commands. + MachO::macho_load_command MachOLoadCommand; + + // The raw content of the payload of the load command (located right after the + // corresponding struct). In some cases it is either empty or can be + // copied-over without digging into its structure. + ArrayRef Payload; + + // Some load commands can contain (inside the payload) an array of sections, + // though the contents of the sections are stored separately. The struct + // Section describes only sections' metadata and where to find the + // corresponding content inside the binary. + std::vector
Sections; +}; + +struct NListEntry { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; +}; + +/// The location of the symbol table inside the binary is described by LC_SYMTAB +/// load command. +struct SymbolTable { + std::vector NameList; +}; + +/// The location of the string table inside the binary is described by LC_SYMTAB +/// load command. +struct StringTable { + std::vector Strings; +}; + +/// The location of the rebase info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at +/// an address different from its preferred address. The rebase information is +/// a stream of byte sized opcodes whose symbolic names start with +/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples: +/// +/// The opcodes are a compressed way to encode the table by only +/// encoding when a column changes. In addition simple patterns +/// like "every n'th offset for m times" can be encoded in a few +/// bytes. +struct RebaseInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef Opcodes; +}; + +/// The location of the bind info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld binds an image during the loading process, +/// if the image requires any pointers to be initialized to symbols in other +/// images. The bind information is a stream of byte sized opcodes whose +/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is +/// a table of tuples: The opcodes are a compressed way to encode the table by +/// only encoding when a column changes. In addition simple patterns like for +/// runs of pointers initialized to the same value can be encoded in a few +/// bytes. +struct BindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef Opcodes; +}; + +/// The location of the weak bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols +/// so that all images in the process use the same copy of some code/data. This +/// step is done after binding. The content of the weak_bind info is an opcode +/// stream like the bind_info. But it is sorted alphabetically by symbol name. +/// This enable dyld to walk all images with weak binding information in order +/// and look for collisions. If there are no collisions, dyld does no updating. +/// That means that some fixups are also encoded in the bind_info. For +/// instance, all calls to "operator new" are first bound to libstdc++.dylib +/// using the information in bind_info. Then if some image overrides operator +/// new that is detected when the weak_bind information is processed and the +/// call to operator new is then rebound. +struct WeakBindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef Opcodes; +}; + +/// The location of the lazy bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be +/// bound immediately. Instead they can be lazily bound on first use. The +/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal +/// use is that dyld ignores the lazy_bind section when loading an image. +/// Instead the static linker arranged for the lazy pointer to initially point +/// to a helper function which pushes the offset into the lazy_bind area for the +/// symbol needing to be bound, then jumps to dyld which simply adds the offset +/// to lazy_bind_off to get the information on what to bind. +struct LazyBindInfo { + ArrayRef Opcodes; +}; + +/// The location of the export info inside the binary is described by +/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a +/// trie. This is a compact representation that factors out common prefixes. It +/// also reduces LINKEDIT pages in RAM because it encodes all information (name, +/// address, flags) in one small, contiguous range. The export area is a stream +/// of nodes. The first node sequentially is the start node for the trie. Nodes +/// for a symbol start with a uleb128 that is the length of the exported symbol +/// information for the string so far. If there is no exported symbol, the node +/// starts with a zero byte. If there is exported info, it follows the length. +/// First is a uleb128 containing flags. Normally, it is followed by +/// a uleb128 encoded offset which is location of the content named +/// by the symbol from the mach_header for the image. If the flags +/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is +/// a uleb128 encoded library ordinal, then a zero terminated +/// UTF8 string. If the string is zero length, then the symbol +/// is re-export from the specified dylib with the same name. +/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following +/// the flags is two uleb128s: the stub offset and the resolver offset. +/// The stub is used by non-lazy pointers. The resolver is used +/// by lazy pointers and must be called to get the actual address to use. +/// After the optional exported symbol information is a byte of +/// how many edges (0-255) that this node has leaving it, +/// followed by each edge. +/// Each edge is a zero terminated UTF8 of the addition chars +/// in the symbol, followed by a uleb128 offset for the node that +/// edge points to. +struct ExportInfo { + ArrayRef Trie; +}; + +struct Object { + MachHeader Header; + std::vector LoadCommands; + + SymbolTable SymTable; + StringTable StrTable; + + RebaseInfo Rebases; + BindInfo Binds; + WeakBindInfo WeakBinds; + LazyBindInfo LazyBinds; + ExportInfo Exports; + + /// The index of LC_SYMTAB load command if present. + Optional SymTabCommandIndex; + /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. + Optional DyLdInfoCommandIndex; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_OBJECT_H diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp index 2537c6289e9..b46ca9b5ec9 100644 --- a/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/tools/llvm-objcopy/llvm-objcopy.cpp @@ -11,6 +11,7 @@ #include "COFF/COFFObjcopy.h" #include "CopyConfig.h" #include "ELF/ELFObjcopy.h" +#include "MachO/MachOObjcopy.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -23,6 +24,7 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" @@ -142,6 +144,8 @@ static Error executeObjcopyOnBinary(const CopyConfig &Config, return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out); else if (auto *COFFBinary = dyn_cast(&In)) return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); + else if (auto *MachOBinary = dyn_cast(&In)) + return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); else return createStringError(object_error::invalid_file_type, "Unsupported object file format"); -- 2.40.0