From: Kaelyn Uhrain Date: Tue, 14 Feb 2012 18:56:48 +0000 (+0000) Subject: Use several weighted factors to determine typo candidate viablity. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=63aae82bb12bbbe9028e597fb77e40fa8d348c12;p=clang Use several weighted factors to determine typo candidate viablity. Replace the simple Levenshtein edit distance for typo correction candidates--and the hacky way adding namespace qualifiers would affect the edit distance--with a synthetic "edit distance" comprised of several factors and their relative weights. This also allows the typo correction callback object to convey more information about the viability of a correction candidate than simply viable or not viable. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@150495 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Sema/TypoCorrection.h b/include/clang/Sema/TypoCorrection.h index 24d7d5f387..a8f6e1178b 100644 --- a/include/clang/Sema/TypoCorrection.h +++ b/include/clang/Sema/TypoCorrection.h @@ -23,32 +23,46 @@ namespace clang { /// @brief Simple class containing the result of Sema::CorrectTypo class TypoCorrection { public: + // "Distance" for unusable corrections + static const unsigned InvalidDistance = ~0U; + // The largest distance still considered valid (larger edit distances are + // mapped to InvalidDistance by getEditDistance). + static const unsigned MaximumDistance = 10000U; + + // Relative weightings of the "edit distance" components. The higher the + // weight, the more of a penalty to fitness the component will give (higher + // weights mean greater contribution to the total edit distance, with the + // best correction candidates having the lowest edit distance). + static const unsigned CharDistanceWeight = 100U; + static const unsigned QualifierDistanceWeight = 110U; + static const unsigned CallbackDistanceWeight = 150U; + TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl, - NestedNameSpecifier *NNS=0, unsigned distance=0) - : CorrectionName(Name), - CorrectionNameSpec(NNS), - EditDistance(distance) { + NestedNameSpecifier *NNS=0, unsigned CharDistance=0, + unsigned QualifierDistance=0) + : CorrectionName(Name), CorrectionNameSpec(NNS), + CharDistance(CharDistance), QualifierDistance(QualifierDistance), + CallbackDistance(0) { if (NameDecl) CorrectionDecls.push_back(NameDecl); } TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0, - unsigned distance=0) - : CorrectionName(Name->getDeclName()), - CorrectionNameSpec(NNS), - EditDistance(distance) { + unsigned CharDistance=0) + : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS), + CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) { if (Name) CorrectionDecls.push_back(Name); } TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0, - unsigned distance=0) - : CorrectionName(Name), - CorrectionNameSpec(NNS), - EditDistance(distance) {} + unsigned CharDistance=0) + : CorrectionName(Name), CorrectionNameSpec(NNS), + CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {} TypoCorrection() - : CorrectionNameSpec(0), EditDistance(0) {} + : CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0), + CallbackDistance(0) {} /// \brief Gets the DeclarationName of the typo correction DeclarationName getCorrection() const { return CorrectionName; } @@ -64,8 +78,41 @@ public: CorrectionNameSpec = NNS; } - /// \brief Gets the "edit distance" of the typo correction from the typo - unsigned getEditDistance() const { return EditDistance; } + void setQualifierDistance(unsigned ED) { + QualifierDistance = ED; + } + + void setCallbackDistance(unsigned ED) { + CallbackDistance = ED; + } + + // Convert the given weighted edit distance to a roughly equivalent number of + // single-character edits (typically for comparison to the length of the + // string being edited). + static unsigned NormalizeEditDistance(unsigned ED) { + if (ED > MaximumDistance) + return InvalidDistance; + return (ED + CharDistanceWeight / 2) / CharDistanceWeight; + } + + /// \brief Gets the "edit distance" of the typo correction from the typo. + /// If Normalized is true, scale the distance down by the CharDistanceWeight + /// to return the edit distance in terms of single-character edits. + unsigned getEditDistance(bool Normalized = true) const { + if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance || + CallbackDistance > MaximumDistance) + return InvalidDistance; + unsigned ED = + CharDistance * CharDistanceWeight + + QualifierDistance * QualifierDistanceWeight + + CallbackDistance * CallbackDistanceWeight; + if (ED > MaximumDistance) + return InvalidDistance; + // Half the CharDistanceWeight is added to ED to simulate rounding since + // integer division truncates the value (i.e. round-to-nearest-int instead + // of round-to-zero). + return Normalized ? NormalizeEditDistance(ED) : ED; + } /// \brief Gets the pointer to the declaration of the typo correction NamedDecl* getCorrectionDecl() const { @@ -143,13 +190,17 @@ private: DeclarationName CorrectionName; NestedNameSpecifier *CorrectionNameSpec; llvm::SmallVector CorrectionDecls; - unsigned EditDistance; + unsigned CharDistance; + unsigned QualifierDistance; + unsigned CallbackDistance; }; /// @brief Base class for callback objects used by Sema::CorrectTypo to check /// the validity of a potential typo correction. class CorrectionCandidateCallback { public: + static const unsigned InvalidDistance = TypoCorrection::InvalidDistance; + CorrectionCandidateCallback() : WantTypeSpecifiers(true), WantExpressionKeywords(true), WantCXXNamedCasts(true), WantRemainingKeywords(true), @@ -158,10 +209,26 @@ class CorrectionCandidateCallback { virtual ~CorrectionCandidateCallback() {} + /// \brief Simple predicate used by the default RankCandidate to + /// determine whether to return an edit distance of 0 or InvalidDistance. + /// This can be overrided by validators that only need to determine if a + /// candidate is viable, without ranking potentially viable candidates. + /// Only ValidateCandidate or RankCandidate need to be overriden by a + /// callback wishing to check the viability of correction candidates. virtual bool ValidateCandidate(const TypoCorrection &candidate) { return true; } + /// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank + /// to a candidate (where a lower value represents a better candidate), or + /// returning InvalidDistance if the candidate is not at all viable. For + /// validation callbacks that only need to determine if a candidate is viable, + /// the default RankCandidate returns either 0 or InvalidDistance depending + /// whether ValidateCandidate returns true or false. + virtual unsigned RankCandidate(const TypoCorrection &candidate) { + return ValidateCandidate(candidate) ? 0 : InvalidDistance; + } + // Flags for context-dependent keywords. // TODO: Expand these to apply to non-keywords or possibly remove them. bool WantTypeSpecifiers; diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp index a65ad729e4..f2d49872bc 100644 --- a/lib/Sema/SemaLookup.cpp +++ b/lib/Sema/SemaLookup.cpp @@ -3109,9 +3109,12 @@ public: return (*BestResults.begin()->second)[Name]; } - unsigned getBestEditDistance() { - return (BestResults.empty()) ? - (std::numeric_limits::max)() : BestResults.begin()->first; + unsigned getBestEditDistance(bool Normalized) { + if (BestResults.empty()) + return (std::numeric_limits::max)(); + + unsigned BestED = BestResults.begin()->first; + return Normalized ? TypoCorrection::NormalizeEditDistance(BestED) : BestED; } }; @@ -3167,7 +3170,7 @@ void TypoCorrectionConsumer::addName(StringRef Name, void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) { StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName(); - TypoResultsMap *& Map = BestResults[Correction.getEditDistance()]; + TypoResultsMap *& Map = BestResults[Correction.getEditDistance(false)]; if (!Map) Map = new TypoResultsMap; @@ -3478,6 +3481,12 @@ static void AddKeywordsToConsumer(Sema &SemaRef, } } +static bool isCandidateViable(CorrectionCandidateCallback &CCC, + TypoCorrection &Candidate) { + Candidate.setCallbackDistance(CCC.RankCandidate(Candidate)); + return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance; +} + /// \brief Try to "correct" a typo in the source code by finding /// visible declarations whose names are similar to the name that was /// present in the source code. @@ -3545,10 +3554,10 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, TypoCorrectionConsumer Consumer(*this, Typo); - // If a callback object returns true for an empty typo correction candidate, - // assume it does not do any actual validation of the candidates. + // If a callback object considers an empty typo correction candidate to be + // viable, assume it does not do any actual validation of the candidates. TypoCorrection EmptyCorrection; - bool ValidatingCallback = !CCC.ValidateCandidate(EmptyCorrection); + bool ValidatingCallback = !isCandidateViable(CCC, EmptyCorrection); // Perform name lookup to find visible, similarly-named entities. bool IsUnqualifiedLookup = false; @@ -3584,7 +3593,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // keyword case, we'll end up adding the keyword below. if (Cached->second) { if (!Cached->second.isKeyword() && - CCC.ValidateCandidate(Cached->second)) + isCandidateViable(CCC, Cached->second)) Consumer.addCorrection(Cached->second); } else { // Only honor no-correction cache hits when a callback that will validate @@ -3637,7 +3646,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // Make sure that the user typed at least 3 characters for each correction // made. Otherwise, we don't even both looking at the results. - unsigned ED = Consumer.getBestEditDistance(); + unsigned ED = Consumer.getBestEditDistance(true); if (ED > 0 && Typo->getName().size() / ED < 3) { // If this was an unqualified lookup, note that no correction was found. if (IsUnqualifiedLookup) @@ -3666,7 +3675,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // Weed out any names that could not be found by name lookup or, if a // CorrectionCandidateCallback object was provided, failed validation. - llvm::SmallPtrSet QualifiedResults; + llvm::SmallVector QualifiedResults; LookupResult TmpRes(*this, TypoName, LookupKind); TmpRes.suppressDiagnostics(); while (!Consumer.empty()) { @@ -3681,7 +3690,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, if (I->second.isResolved()) { TypoCorrectionConsumer::result_iterator Prev = I; ++I; - if (!CCC.ValidateCandidate(Prev->second)) + if (!isCandidateViable(CCC, Prev->second)) DI->second->erase(Prev); continue; } @@ -3695,7 +3704,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: case LookupResult::FoundUnresolvedValue: - QualifiedResults.insert(Name); + QualifiedResults.push_back(I->second); // We didn't find this name in our scope, or didn't like what we found; // ignore it. { @@ -3718,7 +3727,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, TRD != TRDEnd; ++TRD) I->second.addCorrectionDecl(*TRD); ++I; - if (!CCC.ValidateCandidate(Prev->second)) + if (!isCandidateViable(CCC, Prev->second)) DI->second->erase(Prev); break; } @@ -3727,7 +3736,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, TypoCorrectionConsumer::result_iterator Prev = I; I->second.setCorrectionDecl(TmpRes.getAsSingle()); ++I; - if (!CCC.ValidateCandidate(Prev->second)) + if (!isCandidateViable(CCC, Prev->second)) DI->second->erase(Prev); break; } @@ -3744,7 +3753,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // Only perform the qualified lookups for C++ if (getLangOptions().CPlusPlus) { TmpRes.suppressDiagnostics(); - for (llvm::SmallPtrSet::iterator QRI = QualifiedResults.begin(), QRIEnd = QualifiedResults.end(); QRI != QRIEnd; ++QRI) { @@ -3752,33 +3761,33 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, NIEnd = Namespaces.end(); NI != NIEnd; ++NI) { DeclContext *Ctx = NI->DeclCtx; - unsigned QualifiedED = ED + NI->EditDistance; // FIXME: Stop searching once the namespaces are too far away to create // acceptable corrections for this identifier (since the namespaces // are sorted in ascending order by edit distance). TmpRes.clear(); - TmpRes.setLookupName(*QRI); + TmpRes.setLookupName(QRI->getCorrectionAsIdentifierInfo()); if (!LookupQualifiedName(TmpRes, Ctx)) continue; // Any corrections added below will be validated in subsequent // iterations of the main while() loop over the Consumer's contents. switch (TmpRes.getResultKind()) { case LookupResult::Found: - Consumer.addName((*QRI)->getName(), TmpRes.getAsSingle(), - QualifiedED, NI->NameSpecifier); + QRI->setCorrectionDecl(TmpRes.getAsSingle()); + QRI->setCorrectionSpecifier(NI->NameSpecifier); + QRI->setQualifierDistance(NI->EditDistance); + Consumer.addCorrection(*QRI); break; - case LookupResult::FoundOverloaded: { - TypoCorrection corr(&Context.Idents.get((*QRI)->getName()), NULL, - NI->NameSpecifier, QualifiedED); + case LookupResult::FoundOverloaded: + QRI->setCorrectionSpecifier(NI->NameSpecifier); + QRI->setQualifierDistance(NI->EditDistance); for (LookupResult::iterator TRD = TmpRes.begin(), TRDEnd = TmpRes.end(); TRD != TRDEnd; ++TRD) - corr.addCorrectionDecl(*TRD); - Consumer.addCorrection(corr); + QRI->addCorrectionDecl(*TRD); + Consumer.addCorrection(*QRI); break; - } case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: case LookupResult::Ambiguous: @@ -3796,7 +3805,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, if (Consumer.empty()) return TypoCorrection(); TypoResultsMap &BestResults = *Consumer.begin()->second; - ED = Consumer.begin()->first; + ED = TypoCorrection::NormalizeEditDistance(Consumer.begin()->first); if (ED > 0 && Typo->getName().size() / ED < 3) { // If this was an unqualified lookup and we believe the callback @@ -3808,22 +3817,6 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, return TypoCorrection(); } - // If we have multiple possible corrections, eliminate the ones where we - // added namespace qualifiers to try to resolve the ambiguity (and to favor - // corrections without additional namespace qualifiers) - if (getLangOptions().CPlusPlus && BestResults.size() > 1) { - TypoCorrectionConsumer::distance_iterator DI = Consumer.begin(); - for (TypoCorrectionConsumer::result_iterator I = DI->second->begin(), - IEnd = DI->second->end(); - I != IEnd; /* Increment in loop. */) { - if (I->second.getCorrectionSpecifier() != NULL) { - TypoCorrectionConsumer::result_iterator Cur = I; - ++I; - DI->second->erase(Cur); - } else ++I; - } - } - // If only a single name remains, return that result. if (BestResults.size() == 1) { const llvm::StringMapEntry &Correction = *(BestResults.begin()); diff --git a/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp b/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp index adfd3430df..780a4efcbd 100644 --- a/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp +++ b/test/SemaCXX/missing-namespace-qualifier-typo-corrections.cpp @@ -74,3 +74,24 @@ void bar() { (void)new llvm::GraphWriter; // expected-error {{expected a type}} (void)new llvm::Graphwriter; // expected-error {{no template named 'Graphwriter' in namespace 'llvm'; did you mean 'GraphWriter'?}} } + +// If namespace prefixes and character edits have the same weight, correcting +// "fimish" to "N::famish" would have the same edit distance as correcting +// "fimish" to "Finish". The result would be no correction being suggested +// unless one of the corrections is given precedence (e.g. by filtering out +// suggestions with added namespace qualifiers). +namespace N { void famish(int); } +void Finish(int); // expected-note {{'Finish' declared here}} +void Start() { + fimish(7); // expected-error {{use of undeclared identifier 'fimish'; did you mean 'Finish'?}} +} + +// But just eliminating the corrections containing added namespace qualifiers +// won't work if both of the tied corrections have namespace qualifiers added. +namespace N { +void someCheck(int); // expected-note {{'N::someCheck' declared here}} +namespace O { void somechock(int); } +} +void confusing() { + somechick(7); // expected-error {{use of undeclared identifier 'somechick'; did you mean 'N::someCheck'?}} +}