From: John McCall <rjmccall@apple.com>
Date: Tue, 8 Mar 2011 07:59:04 +0000 (+0000)
Subject: Fix my earlier commit to work with escaped newlines and leave breadcrumbs
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=834e3f6c77d9ac03997a3f0c56934edcf406a355;p=clang

Fix my earlier commit to work with escaped newlines and leave breadcrumbs
in case we want to make a world where we can check intermediate instantiations
for this kind of breadcrumb.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@127221 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index fc9a8de434..91fcd6700d 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -236,6 +236,20 @@ public:
                                  const SourceManager &SourceMgr,
                                  const LangOptions &Features, 
                                  bool *Invalid = 0);
+
+  /// getSpelling - This method is used to get the spelling of the
+  /// token at the given source location.  If, as is usually true, it
+  /// is not necessary to copy any data, then the returned string may
+  /// not point into the provided buffer.
+  ///
+  /// This method lexes at the instantiation depth of the given
+  /// location and does not jump to the instantiation or spelling
+  /// location.
+  static llvm::StringRef getSpelling(SourceLocation loc,
+                                     llvm::SmallVectorImpl<char> &buffer,
+                                     const SourceManager &SourceMgr,
+                                     const LangOptions &Features,
+                                     bool *invalid = 0);
   
   /// MeasureTokenLength - Relex the token at the specified location and return
   /// its length in bytes in the input file.  If the token needs cleaning (e.g.
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 0b01710abf..42af489d90 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -644,10 +644,18 @@ public:
     return Diags->Report(Tok.getLocation(), DiagID);
   }
 
-  /// getSpelling() - Return the 'spelling' of the token at the given location.
+  /// getSpelling() - Return the 'spelling' of the token at the given
+  /// location; does not go up to the spelling location or down to the
+  /// instantiation location.
   ///
+  /// \param buffer A buffer which will be used only if the token requires
+  ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
   /// \param invalid If non-null, will be set \c true if an error occurs.
-  llvm::StringRef getSpelling(SourceLocation loc, bool *invalid = 0) const;
+  llvm::StringRef getSpelling(SourceLocation loc,
+                              llvm::SmallVectorImpl<char> &buffer,
+                              bool *invalid = 0) const {
+    return Lexer::getSpelling(loc, buffer, SourceMgr, Features, invalid);
+  }
 
   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
   /// token is the characters used to represent the token in the source file
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 70ec778289..0a55228494 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -678,6 +678,8 @@ public:
   /// \brief Build a partial diagnostic.
   PartialDiagnostic PDiag(unsigned DiagID = 0); // in SemaInternal.h
 
+  bool findMacroSpelling(SourceLocation &loc, llvm::StringRef name);
+
   ExprResult Owned(Expr* E) { return E; }
   ExprResult Owned(ExprResult R) { return R; }
   StmtResult Owned(Stmt* S) { return S; }
@@ -1704,6 +1706,7 @@ public:
   /// initialization.
   void CollectIvarsToConstructOrDestruct(ObjCInterfaceDecl *OI,
                                   llvm::SmallVectorImpl<ObjCIvarDecl*> &Ivars);
+
   //===--------------------------------------------------------------------===//
   // Statement Parsing Callbacks: SemaStmt.cpp.
 public:
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 9643d8e978..b511421ee7 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -216,6 +216,54 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
 // Token Spelling
 //===----------------------------------------------------------------------===//
 
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+llvm::StringRef Lexer::getSpelling(SourceLocation loc,
+                                   llvm::SmallVectorImpl<char> &buffer,
+                                   const SourceManager &SM,
+                                   const LangOptions &options,
+                                   bool *invalid) {
+  // Break down the source location.
+  std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
+
+  // Try to the load the file buffer.
+  bool invalidTemp = false;
+  llvm::StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
+  if (invalidTemp) {
+    if (invalid) *invalid = true;
+    return llvm::StringRef();
+  }
+
+  const char *tokenBegin = file.data() + locInfo.second;
+
+  // Lex from the start of the given location.
+  Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options,
+              file.begin(), tokenBegin, file.end());
+  Token token;
+  lexer.LexFromRawLexer(token);
+
+  unsigned length = token.getLength();
+
+  // Common case:  no need for cleaning.
+  if (!token.needsCleaning())
+    return llvm::StringRef(tokenBegin, length);
+  
+  // Hard case, we need to relex the characters into the string.
+  buffer.clear();
+  buffer.reserve(length);
+  
+  for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
+    unsigned charSize;
+    buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
+    ti += charSize;
+  }
+
+  return llvm::StringRef(buffer.data(), buffer.size());
+}
+
 /// getSpelling() - Return the 'spelling' of this token.  The spelling of a
 /// token are the characters used to represent the token in the source file
 /// after trigraph expansion and escaped-newline folding.  In particular, this
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 40227dc5e6..f5cdb015e9 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -278,17 +278,6 @@ void Preprocessor::CodeCompleteNaturalLanguage() {
     CodeComplete->CodeCompleteNaturalLanguage();
 }
 
-llvm::StringRef Preprocessor::getSpelling(SourceLocation loc,
-                                          bool *invalid) const {
-  bool invalidTemp = false;
-  if (!invalid) invalid = &invalidTemp;
-  const char *begin = SourceMgr.getCharacterData(loc, invalid);
-  if (*invalid) return llvm::StringRef();
-
-  unsigned length = Lexer::MeasureTokenLength(loc, SourceMgr, Features);
-  return llvm::StringRef(begin, length);
-}
-
 /// getSpelling - This method is used to get the spelling of a token into a
 /// SmallVector. Note that the returned StringRef may not point to the
 /// supplied buffer if a copy can be avoided.
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index 4bfb388081..0846845e26 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -592,6 +592,27 @@ Sema::Diag(SourceLocation Loc, const PartialDiagnostic& PD) {
   return Builder;
 }
 
+/// \brief Looks through the macro-instantiation chain for the given
+/// location, looking for a macro instantiation with the given name.
+/// If one is found, returns true and sets the location to that
+/// instantiation loc.
+bool Sema::findMacroSpelling(SourceLocation &locref, llvm::StringRef name) {
+  SourceLocation loc = locref;
+  if (!loc.isMacroID()) return false;
+
+  // There's no good way right now to look at the intermediate
+  // instantiations, so just jump to the instantiation location.
+  loc = getSourceManager().getInstantiationLoc(loc);
+
+  // If that's written with the name, stop here.
+  llvm::SmallVector<char, 16> buffer;
+  if (getPreprocessor().getSpelling(loc, buffer) == name) {
+    locref = loc;
+    return true;
+  }
+  return false;
+}
+
 /// \brief Determines the active Scope associated with the given declaration
 /// context.
 ///
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index c4218c3365..8f79428eae 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -5261,9 +5261,8 @@ bool Sema::DiagnoseConditionalForNull(Expr *LHS, Expr *RHS,
     // In this case, check to make sure that we got here from a "NULL"
     // string in the source code.
     NullExpr = NullExpr->IgnoreParenImpCasts();
-    SourceLocation Loc =
-      getSourceManager().getInstantiationLoc(NullExpr->getExprLoc());
-    if (getPreprocessor().getSpelling(Loc) != "NULL")
+    SourceLocation loc = NullExpr->getExprLoc();
+    if (!findMacroSpelling(loc, "NULL"))
       return false;
   }
 
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 772a557545..11e7c31382 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -95,12 +95,10 @@ static void diagnoseBadTypeAttribute(Sema &S, const AttributeList &attr,
 
   // The GC attributes are usually written with macros;  special-case them.
   if (useInstantiationLoc && loc.isMacroID() && attr.getParameterName()) {
-    SourceLocation instLoc = S.getSourceManager().getInstantiationLoc(loc);
-    llvm::StringRef macro = S.getPreprocessor().getSpelling(instLoc);
-    if ((macro == "__strong" && attr.getParameterName()->isStr("strong")) ||
-        (macro == "__weak" && attr.getParameterName()->isStr("weak"))) {
-      loc = instLoc;
-      name = macro;
+    if (attr.getParameterName()->isStr("strong")) {
+      if (S.findMacroSpelling(loc, "__strong")) name = "__strong";
+    } else if (attr.getParameterName()->isStr("weak")) {
+      if (S.findMacroSpelling(loc, "__weak")) name = "__weak";
     }
   }
 
diff --git a/test/SemaObjC/attr-objc-gc.m b/test/SemaObjC/attr-objc-gc.m
index cfe59516ee..a826317415 100644
--- a/test/SemaObjC/attr-objc-gc.m
+++ b/test/SemaObjC/attr-objc-gc.m
@@ -10,3 +10,10 @@ static id __attribute((objc_gc(hello))) f; // expected-warning{{'objc_gc' attrib
 static int __attribute__((objc_gc(weak))) g; // expected-warning {{'objc_gc' only applies to pointer types; type here is 'int'}}
 
 static __weak int h; // expected-warning {{'__weak' only applies to pointer types; type here is 'int'}}
+
+// TODO: it would be great if this reported as __weak
+#define WEAK __weak
+static WEAK int h; // expected-warning {{'objc_gc' only applies to pointer types; type here is 'int'}}
+
+/* expected-warning {{'__weak' only applies to pointer types; type here is 'int'}}*/ static __we\
+ak int i;