From 98d16d99132a46cbcd23d3150cbb18aeb7c201e2 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Sat, 2 Mar 2019 00:12:57 +0000 Subject: [PATCH] [tblgen] Track CodeInit origins when possible Summary: Add an SMLoc to CodeInit that records the source line it originated from. This allows tablegen to point precisely at portions of code when reporting errors within the CodeInit. For example, in the upcoming GlobalISel combiner, it can report undefined expansions and point at the instance of the expansion. This is achieved using something like: SMLoc::getFromPointer(SMLoc::getPointer() + (StringRef - CodeInit::getValue())) The location is lost when producing a CodeInit by string concatenation so a fallback SMLoc is required (e.g. the Record::getLoc()) but that's pretty rare for CodeInits. There's a reasonable case for extending tracking of a couple other Init objects, for example StringInit's are often parsed and it would be good to point inside the string when reporting errors about that. However, location tracking also harms de-duplication. This is fine for CodeInit where there's only a few hundred of them (~160 for X86) and it may be worth it for StringInit (~86k up to ~1.9M for roughly 15MB increase for X86). However the origin tracking would be a _terrible_ idea for IntInit, BitInit, and UnsetInit. I haven't measured either of those three but BitInit would most likely be on the order of increasing the current 2 BitInit values up to billions. Reviewers: volkan, aditya_nandakumar, bogner, paquette, aemerson Reviewed By: paquette Subscribers: javed.absar, kristof.beyls, dexonsmith, llvm-commits, kristina Tags: #llvm Differential Revision: https://reviews.llvm.org/D58141 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355245 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/StringSet.h | 1 + include/llvm/TableGen/Record.h | 8 +++++--- lib/TableGen/Record.cpp | 28 +++++++++++++++++++++------- lib/TableGen/TGParser.cpp | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index fcf9519b40e..25ad359b035 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -33,6 +33,7 @@ namespace llvm { for (StringRef X : S) insert(X); } + explicit StringSet(AllocatorTy A) : base(A) {} std::pair insert(StringRef Key) { assert(!Key.empty()); diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index cdb4deb22a8..2cb0548a4ca 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -623,10 +623,11 @@ public: class CodeInit : public TypedInit { StringRef Value; + SMLoc Loc; - explicit CodeInit(StringRef V) + explicit CodeInit(StringRef V, const SMLoc &Loc) : TypedInit(IK_CodeInit, static_cast(CodeRecTy::get())), - Value(V) {} + Value(V), Loc(Loc) {} public: CodeInit(const StringInit &) = delete; @@ -636,9 +637,10 @@ public: return I->getKind() == IK_CodeInit; } - static CodeInit *get(StringRef); + static CodeInit *get(StringRef, const SMLoc &Loc); StringRef getValue() const { return Value; } + const SMLoc &getLoc() const { return Loc; } Init *convertInitializerTo(RecTy *Ty) const override; diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index cf70be67e3c..480a002b64a 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -15,9 +15,11 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" @@ -36,8 +38,13 @@ using namespace llvm; +#define DEBUG_TYPE "tblgen-records" + static BumpPtrAllocator Allocator; +STATISTIC(CodeInitsConstructed, + "The total number of unique CodeInits constructed"); + //===----------------------------------------------------------------------===// // Type implementations //===----------------------------------------------------------------------===// @@ -506,13 +513,20 @@ IntInit::convertInitializerBitRange(ArrayRef Bits) const { return BitsInit::get(NewBits); } -CodeInit *CodeInit::get(StringRef V) { - static StringMap ThePool(Allocator); +CodeInit *CodeInit::get(StringRef V, const SMLoc &Loc) { + static StringSet ThePool(Allocator); - auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first; - if (!Entry.second) - Entry.second = new(Allocator) CodeInit(Entry.getKey()); - return Entry.second; + CodeInitsConstructed++; + + // Unlike StringMap, StringSet doesn't accept empty keys. + if (V.empty()) + return new (Allocator) CodeInit("", Loc); + + // Location tracking prevents us from de-duping CodeInits as we're never + // called with the same string and same location twice. However, we can at + // least de-dupe the strings for a modest saving. + auto &Entry = *ThePool.insert(V).first; + return new(Allocator) CodeInit(Entry.getKey(), Loc); } StringInit *StringInit::get(StringRef V) { @@ -528,7 +542,7 @@ Init *StringInit::convertInitializerTo(RecTy *Ty) const { if (isa(Ty)) return const_cast(this); if (isa(Ty)) - return CodeInit::get(getValue()); + return CodeInit::get(getValue(), SMLoc()); return nullptr; } diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 97a19a91d6d..58343bda275 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -1749,7 +1749,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, break; } case tgtok::CodeFragment: - R = CodeInit::get(Lex.getCurStrVal()); + R = CodeInit::get(Lex.getCurStrVal(), Lex.getLoc()); Lex.Lex(); break; case tgtok::question: -- 2.50.1