]> granicus.if.org Git - llvm/commitdiff
[WebAssembly] Fixed objdump not parsing function headers.
authorWouter van Oortmerssen <aardappel@gmail.com>
Thu, 17 Jan 2019 18:14:09 +0000 (18:14 +0000)
committerWouter van Oortmerssen <aardappel@gmail.com>
Thu, 17 Jan 2019 18:14:09 +0000 (18:14 +0000)
Summary:
objdump was interpreting the function header containing the locals
declaration as instructions. To parse these without injecting target
specific code in objdump, MCDisassembler::onSymbolStart was added to
be implemented by the WebAssembly implemention.

WasmObjectFile now returns a code offset for the "address" of a symbol,
rather than the index. This is also more in-line with what other
targets do.

Also ensured that the AsmParser correctly puts each function
in its own segment to enable this test case.

Reviewers: sbc100, dschuff

Subscribers: jgravelle-google, aheejin, sunfish, rupprecht, llvm-commits

Differential Revision: https://reviews.llvm.org/D56684

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351460 91177308-0d34-0410-b5e6-96231b3b80d8

12 files changed:
include/llvm/MC/MCDisassembler/MCDisassembler.h
include/llvm/Object/Wasm.h
lib/MC/MCDisassembler/MCDisassembler.cpp
lib/MC/MCParser/WasmAsmParser.cpp
lib/Object/WasmObjectFile.cpp
lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
test/MC/WebAssembly/objdump.s [new file with mode: 0644]
test/MC/WebAssembly/weak-alias.ll
test/tools/llvm-objdump/WebAssembly/symbol-table.test
tools/llvm-objdump/llvm-objdump.cpp

index 7f09c05ccf2abfdbf6ec8cb7707dc209716a4161..daa741d0762b842a6000ad9b80bf6807df608752 100644 (file)
@@ -17,6 +17,7 @@
 namespace llvm {
 
 template <typename T> class ArrayRef;
+class StringRef;
 class MCContext;
 class MCInst;
 class MCSubtargetInfo;
@@ -80,6 +81,23 @@ public:
                                       raw_ostream &VStream,
                                       raw_ostream &CStream) const = 0;
 
+  /// May parse any prelude that precedes instructions after the start of a
+  /// symbol. Needed for some targets, e.g. WebAssembly.
+  ///
+  /// \param Name     - The name of the symbol.
+  /// \param Size     - The number of bytes consumed.
+  /// \param Address  - The address, in the memory space of region, of the first
+  ///                   byte of the symbol.
+  /// \param Bytes    - A reference to the actual bytes at the symbol location.
+  /// \param VStream  - The stream to print warnings and diagnostic messages on.
+  /// \param CStream  - The stream to print comments and annotations on.
+  /// \return         - MCDisassembler::Success if the bytes are valid,
+  ///                   MCDisassembler::Fail if the bytes were invalid.
+  virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
+                                     ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                     raw_ostream &VStream,
+                                     raw_ostream &CStream) const;
+
 private:
   MCContext &Ctx;
 
index 083fd2e9030d9b673076a1e2551cf0206ad5fcf0..5f43eeaa8e7d5df44fa188aa490ba06f8ab7a999 100644 (file)
@@ -222,6 +222,7 @@ private:
   bool isValidDataSymbol(uint32_t Index) const;
   bool isValidSectionSymbol(uint32_t Index) const;
   wasm::WasmFunction &getDefinedFunction(uint32_t Index);
+  const wasm::WasmFunction &getDefinedFunction(uint32_t Index) const;
   wasm::WasmGlobal &getDefinedGlobal(uint32_t Index);
   wasm::WasmEvent &getDefinedEvent(uint32_t Index);
 
index 2f1275d00b861f3db7a4c87273a915d486453b4f..0959881c97a049dab0fff1bb2e5960a0ad265805 100644 (file)
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -15,6 +17,13 @@ using namespace llvm;
 
 MCDisassembler::~MCDisassembler() = default;
 
+MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart(
+    StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+    raw_ostream &VStream, raw_ostream &CStream) const {
+  Size = 0;
+  return MCDisassembler::Success;
+}
+
 bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
                                               uint64_t Address, bool IsBranch,
                                               uint64_t Offset,
index 93bb0cb3c72e476360da73d8e054f8bb8bcc0af2..9c41c474dae59201e4cdcd1d560b7b98930157ac 100644 (file)
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSectionWasm.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolWasm.h"
@@ -83,8 +84,16 @@ public:
   }
 
   bool parseSectionDirective(StringRef, SMLoc) {
-    // FIXME: .section currently no-op.
-    while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex();
+    StringRef Name;
+    if (Parser->parseIdentifier(Name))
+      return TokError("expected identifier in directive");
+    // FIXME: currently requiring this very fixed format.
+    if (Expect(AsmToken::Comma, ",") || Expect(AsmToken::String, "string") ||
+        Expect(AsmToken::Comma, ",") || Expect(AsmToken::At, "@") ||
+        Expect(AsmToken::EndOfStatement, "eol"))
+      return true;
+    auto WS = getContext().getWasmSection(Name, SectionKind::getText());
+    getStreamer().SwitchSection(WS);
     return false;
   }
 
@@ -95,15 +104,13 @@ public:
     if (Parser->parseIdentifier(Name))
       return TokError("expected identifier in directive");
     auto Sym = getContext().getOrCreateSymbol(Name);
-    if (Lexer->isNot(AsmToken::Comma))
-      return TokError("unexpected token in directive");
-    Lex();
+    if (Expect(AsmToken::Comma, ","))
+      return true;
     const MCExpr *Expr;
     if (Parser->parseExpression(Expr))
       return true;
-    if (Lexer->isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in directive");
-    Lex();
+    if (Expect(AsmToken::EndOfStatement, "eol"))
+      return true;
     // MCWasmStreamer implements this.
     getStreamer().emitELFSize(Sym, Expr);
     return false;
index fd6ad9bdefebb3bf33e4635ae9bf34785b752c54..12821748e915ddf8bb4161567021721bd507dc2f 100644 (file)
@@ -1055,6 +1055,12 @@ wasm::WasmFunction &WasmObjectFile::getDefinedFunction(uint32_t Index) {
   return Functions[Index - NumImportedFunctions];
 }
 
+const wasm::WasmFunction &
+WasmObjectFile::getDefinedFunction(uint32_t Index) const {
+  assert(isDefinedFunctionIndex(Index));
+  return Functions[Index - NumImportedFunctions];
+}
+
 wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) {
   assert(isDefinedGlobalIndex(Index));
   return Globals[Index - NumImportedGlobals];
@@ -1221,7 +1227,12 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
 }
 
 Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
-  return getSymbolValue(Symb);
+  auto &Sym = getWasmSymbol(Symb);
+  if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION &&
+      isDefinedFunctionIndex(Sym.Info.ElementIndex))
+    return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset;
+  else
+    return getSymbolValue(Symb);
 }
 
 uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
index 0a5908f4379060992b5f0719ebf0531ce43417e8..fdba4f55f61d394bfe194c863ce5ab3c480e468d 100644 (file)
@@ -298,6 +298,8 @@ public:
         Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
         Type == "f64x2")
       return wasm::ValType::V128;
+    if (Type == "except_ref")
+      return wasm::ValType::EXCEPT_REF;
     return Optional<wasm::ValType>();
   }
 
@@ -317,7 +319,7 @@ public:
     while (Lexer.is(AsmToken::Identifier)) {
       auto Type = parseType(Lexer.getTok().getString());
       if (!Type)
-        return true;
+        return error("unknown type: ", Lexer.getTok());
       Types.push_back(Type.getValue());
       Parser.Lex();
       if (!isNext(AsmToken::Comma))
@@ -561,6 +563,7 @@ public:
     auto &Out = getStreamer();
     auto &TOut =
         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+    auto &Ctx = Out.getContext();
 
     // TODO: any time we return an error, at least one token must have been
     // consumed, otherwise this will not signal an error to the caller.
@@ -578,8 +581,7 @@ public:
       if (!Type)
         return error("Unknown type in .globaltype directive: ", TypeTok);
       // Now set this symbol with the correct type.
-      auto WasmSym = cast<MCSymbolWasm>(
-          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
       WasmSym->setGlobalType(
           wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
@@ -597,8 +599,7 @@ public:
       auto SymName = expectIdent();
       if (SymName.empty())
         return true;
-      auto WasmSym = cast<MCSymbolWasm>(
-          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
       if (CurrentState == Label && WasmSym == LastLabel) {
         // This .functype indicates a start of a function.
         if (ensureEmptyNestingStack())
@@ -621,8 +622,7 @@ public:
       auto SymName = expectIdent();
       if (SymName.empty())
         return true;
-      auto WasmSym = cast<MCSymbolWasm>(
-          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
       auto Signature = make_unique<wasm::WasmSignature>();
       if (parseRegTypeList(Signature->Params))
         return true;
index a452ca1acd04b17f04ec07ae5a13b849deec1b11..64fff6f7a272cf462ff96229c652e77e86f60f89 100644 (file)
@@ -19,5 +19,5 @@
 type = Library
 name = WebAssemblyDisassembler
 parent = WebAssembly
-required_libraries = MCDisassembler WebAssemblyInfo Support
+required_libraries = MCDisassembler WebAssemblyInfo WebAssemblyAsmPrinter Support
 add_to_library_groups = WebAssembly
index 6acc9b20eed205b152bd43a248b379233dd026ba..ce91a3444fb8e4633724bde08e8af7289d0c71e6 100644 (file)
@@ -15,6 +15,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "InstPrinter/WebAssemblyInstPrinter.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -45,6 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
+  DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
+                             ArrayRef<uint8_t> Bytes, uint64_t Address,
+                             raw_ostream &VStream,
+                             raw_ostream &CStream) const override;
 
 public:
   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -77,7 +82,7 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
 }
 
 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
-                    bool Signed = false) {
+                    bool Signed) {
   unsigned N = 0;
   const char *Error = nullptr;
   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
@@ -116,6 +121,41 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
   return true;
 }
 
+MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
+    StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+    raw_ostream &VStream, raw_ostream &CStream) const {
+  Size = 0;
+  if (Address == 0) {
+    // Start of a code section: we're parsing only the function count.
+    int64_t FunctionCount;
+    if (!nextLEB(FunctionCount, Bytes, Size, false))
+      return MCDisassembler::Fail;
+    outs() << "        # " << FunctionCount << " functions in section.";
+  } else {
+    // Parse the start of a single function.
+    int64_t BodySize, LocalEntryCount;
+    if (!nextLEB(BodySize, Bytes, Size, false) ||
+        !nextLEB(LocalEntryCount, Bytes, Size, false))
+      return MCDisassembler::Fail;
+    if (LocalEntryCount) {
+      outs() << "        .local ";
+      for (int64_t I = 0; I < LocalEntryCount; I++) {
+        int64_t Count, Type;
+        if (!nextLEB(Count, Bytes, Size, false) ||
+            !nextLEB(Type, Bytes, Size, false))
+          return MCDisassembler::Fail;
+        for (int64_t J = 0; J < Count; J++) {
+          if (I || J)
+            outs() << ", ";
+          outs() << WebAssembly::anyTypeToString(Type);
+        }
+      }
+    }
+  }
+  outs() << "\n";
+  return MCDisassembler::Success;
+}
+
 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
     raw_ostream & /*OS*/, raw_ostream &CS) const {
@@ -138,7 +178,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     if (!WasmInst)
       return MCDisassembler::Fail;
     int64_t PrefixedOpc;
-    if (!nextLEB(PrefixedOpc, Bytes, Size))
+    if (!nextLEB(PrefixedOpc, Bytes, Size, false))
       return MCDisassembler::Fail;
     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
       return MCDisassembler::Fail;
diff --git a/test/MC/WebAssembly/objdump.s b/test/MC/WebAssembly/objdump.s
new file mode 100644 (file)
index 0000000..fc87174
--- /dev/null
@@ -0,0 +1,34 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o %t.o -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s
+# RUN: llvm-objdump -triple=wasm32-unknown-unknown -disassemble %t.o | FileCheck %s
+
+    .section .text.main1,"",@
+    .type    test0,@function
+test0:
+    .functype   test0 (i32, i64) -> (i32)
+    .local      f32, f64, v128, v128
+    local.get   2
+    end_function
+.Lfunc_end0:
+    .size      test0, .Lfunc_end0-test0
+
+    .section .text.main2,"",@
+    .type    test1,@function
+test1:
+    .functype   test1 (i32, i64) -> (i32)
+    .local      i32, i64, except_ref
+    local.get   3
+    end_function
+.Lfunc_end1:
+    .size      test1, .Lfunc_end1-test1
+
+
+# CHECK-LABEL: CODE:
+# CHECK:             # 2 functions in section.
+# CHECK-LABEL: test0:
+# CHECK-NEXT:        .local  f32, f64, v128, v128
+# CHECK-NEXT:       9:       20 02  local.get  2
+# CHECK-NEXT:       b:       0b     end_block
+# CHECK-LABEL: test1:
+# CHECK-NEXT:        .local  i32, i64, except_ref
+# CHECK-NEXT:      14:       20 03  local.get  3
+# CHECK-NEXT:      16:       0b     end_block
index ea496db505261fb99cacc3474273675801bafe70..9f0fd18cf979662052797dc9e11e08ee56214cfa 100644 (file)
@@ -210,13 +210,13 @@ entry:
 ; CHECK-NEXT: ...
 
 ; CHECK-SYMS: SYMBOL TABLE:
-; CHECK-SYMS-NEXT: 00000000 g     F CODE       .hidden foo
-; CHECK-SYMS-NEXT: 00000001 g     F CODE       .hidden call_direct
-; CHECK-SYMS-NEXT: 00000002 g     F CODE       .hidden call_alias
-; CHECK-SYMS-NEXT: 00000000 gw    F CODE       .hidden foo_alias
-; CHECK-SYMS-NEXT: 00000003 g     F CODE       .hidden call_direct_ptr
+; CHECK-SYMS-NEXT: 00000001 g     F CODE       .hidden foo
+; CHECK-SYMS-NEXT: 00000006 g     F CODE       .hidden call_direct
+; CHECK-SYMS-NEXT: 0000000f g     F CODE       .hidden call_alias
+; CHECK-SYMS-NEXT: 00000001 gw    F CODE       .hidden foo_alias
+; CHECK-SYMS-NEXT: 00000018 g     F CODE       .hidden call_direct_ptr
 ; CHECK-SYMS-NEXT: 00000008 g     O DATA       direct_address
-; CHECK-SYMS-NEXT: 00000004 g     F CODE       .hidden call_alias_ptr
+; CHECK-SYMS-NEXT: 0000002b g     F CODE       .hidden call_alias_ptr
 ; CHECK-SYMS-NEXT: 00000010 g     O DATA       alias_address
 ; CHECK-SYMS-NEXT: 00000000 g     O DATA       bar
 ; CHECK-SYMS-NEXT: 00000000 gw    O DATA       .hidden bar_alias
index b8455aaf5aea9239d703302ca052c6ac13dba59a..b67b0a366b1bd964a7989241a54e907b84e51894 100644 (file)
@@ -1,9 +1,9 @@
 RUN: llvm-objdump -t %p/../Inputs/trivial.obj.wasm | FileCheck %s
 
 CHECK:      SYMBOL TABLE:
-CHECK-NEXT: 00000002 g     F CODE      main
+CHECK-NEXT: 00000001 g     F CODE      main
 CHECK-NEXT: 00000000 l     O DATA      .L.str
 CHECK-NEXT: 00000000 g     F *UND*     puts
-CHECK-NEXT: 00000003 l     F CODE      .LSomeOtherFunction_bitcast
+CHECK-NEXT: 00000019 l     F CODE      .LSomeOtherFunction_bitcast
 CHECK-NEXT: 00000000 g     F *UND*     SomeOtherFunction
 CHECK-NEXT: 00000010 g     O DATA      var
index a4dbd07612f422e05b3ad385b5dea9e8762e2439..7d0e7e9406867f5db0a2e157346b43be330d1306 100644 (file)
@@ -1605,6 +1605,12 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       raw_ostream &DebugOut = nulls();
 #endif
 
+      // Some targets (like WebAssembly) have a special prelude at the start
+      // of each symbol.
+      DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
+                            SectionAddr + Start, DebugOut, CommentStream);
+      Start += Size;
+
       for (Index = Start; Index < End; Index += Size) {
         MCInst Inst;