From: Chris Lattner Date: Mon, 23 Jul 2007 06:09:34 +0000 (+0000) Subject: change the concatenation avoidance algorithm to be partially table-driven X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f0f2b295437efecc0b52220b3f4a469fbb9aeac8;p=clang change the concatenation avoidance algorithm to be partially table-driven and avoid computing the spelling of tokens when not needed. This speeds up -E on 447.dealII by 2.2% git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@40421 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/Driver/PrintPreprocessedOutput.cpp b/Driver/PrintPreprocessedOutput.cpp index cb8bfec3c4..a7d55df34b 100644 --- a/Driver/PrintPreprocessedOutput.cpp +++ b/Driver/PrintPreprocessedOutput.cpp @@ -122,6 +122,7 @@ public: } void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } + bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, DirectoryLookup::DirType FileType); @@ -305,6 +306,67 @@ struct UnknownPragmaHandler : public PragmaHandler { }; } // end anonymous namespace + +enum AvoidConcatInfo { + /// By default, a token never needs to avoid concatenation. Most tokens (e.g. + /// ',', ')', etc) don't cause a problem when concatenated. + aci_never_avoid_concat = 0, + + /// aci_custom_firstchar - AvoidConcat contains custom code to handle this + /// token's requirements, and it needs to know the first character of the + /// token. + aci_custom_firstchar = 1, + + /// aci_custom - AvoidConcat contains custom code to handle this token's + /// requirements, but it doesn't need to know the first character of the + /// token. + aci_custom = 2, + + /// aci_avoid_equal - Many tokens cannot be safely followed by an '=' + /// character. For example, "<<" turns into "<<=" when followed by an =. + aci_avoid_equal = 4 +}; + +/// This array contains information for each token on what action to take when +/// avoiding concatenation of tokens in the AvoidConcat method. +static char TokenInfo[tok::NUM_TOKENS]; + +/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be +/// marked by this function. +static void InitAvoidConcatTokenInfo() { + // These tokens have custom code in AvoidConcat. + TokenInfo[tok::identifier ] |= aci_custom; + TokenInfo[tok::numeric_constant] |= aci_custom_firstchar; + TokenInfo[tok::period ] |= aci_custom_firstchar; + TokenInfo[tok::amp ] |= aci_custom_firstchar; + TokenInfo[tok::plus ] |= aci_custom_firstchar; + TokenInfo[tok::minus ] |= aci_custom_firstchar; + TokenInfo[tok::slash ] |= aci_custom_firstchar; + TokenInfo[tok::less ] |= aci_custom_firstchar; + TokenInfo[tok::greater ] |= aci_custom_firstchar; + TokenInfo[tok::pipe ] |= aci_custom_firstchar; + TokenInfo[tok::percent ] |= aci_custom_firstchar; + TokenInfo[tok::colon ] |= aci_custom_firstchar; + TokenInfo[tok::hash ] |= aci_custom_firstchar; + TokenInfo[tok::arrow ] |= aci_custom_firstchar; + + // These tokens change behavior if followed by an '='. + TokenInfo[tok::amp ] |= aci_avoid_equal; // &= + TokenInfo[tok::plus ] |= aci_avoid_equal; // += + TokenInfo[tok::minus ] |= aci_avoid_equal; // -= + TokenInfo[tok::slash ] |= aci_avoid_equal; // /= + TokenInfo[tok::less ] |= aci_avoid_equal; // <= + TokenInfo[tok::greater ] |= aci_avoid_equal; // >= + TokenInfo[tok::pipe ] |= aci_avoid_equal; // |= + TokenInfo[tok::percent ] |= aci_avoid_equal; // %= + TokenInfo[tok::star ] |= aci_avoid_equal; // *= + TokenInfo[tok::exclaim ] |= aci_avoid_equal; // != + TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<= + TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>= + TokenInfo[tok::caret ] |= aci_avoid_equal; // ^= + TokenInfo[tok::equal ] |= aci_avoid_equal; // == +} + /// AvoidConcat - If printing PrevTok immediately followed by Tok would cause /// the two individual tokens to be lexed as a single token, return true (which /// causes a space to be printed between them). This allows the output of -E @@ -320,16 +382,35 @@ bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok, const Token &Tok) { char Buffer[256]; - // If we haven't emitted a token on this line yet, PrevTok isn't useful to - // look at and no concatenation could happen anyway. - if (!EmittedTokensOnThisLine) - return false; + tok::TokenKind PrevKind = PrevTok.getKind(); + if (PrevTok.getIdentifierInfo()) // Language keyword or named operator. + PrevKind = tok::identifier; + + // Look up information on when we should avoid concatenation with prevtok. + unsigned ConcatInfo = TokenInfo[PrevKind]; + + // If prevtok never causes a problem for anything after it, return quickly. + if (ConcatInfo == 0) return false; + if (ConcatInfo & aci_avoid_equal) { + // If the next token is '=' or '==', avoid concatenation. + if (Tok.getKind() == tok::equal || + Tok.getKind() == tok::equalequal) + return true; + ConcatInfo &= ~ConcatInfo; + } + + if (ConcatInfo == 0) return false; + + + // Basic algorithm: we look at the first character of the second token, and // determine whether it, if appended to the first token, would form (or would // contribute) to a larger token if concatenated. - char FirstChar; - if (IdentifierInfo *II = Tok.getIdentifierInfo()) { + char FirstChar = 0; + if (ConcatInfo & aci_custom) { + // If the token does not need to know the first character, don't get it. + } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) { // Avoid spelling identifiers, the most common form of token. FirstChar = II->getName()[0]; } else if (!Tok.needsCleaning()) { @@ -343,52 +424,56 @@ bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok, } else { FirstChar = PP.getSpelling(Tok)[0]; } - - tok::TokenKind PrevKind = PrevTok.getKind(); - if (PrevTok.getIdentifierInfo()) // Language keyword or named operator. - PrevKind = tok::identifier; - + switch (PrevKind) { - default: return false; + default: assert(0 && "InitAvoidConcatTokenInfo built wrong"); case tok::identifier: // id+id or id+number or id+L"foo". - return isalnum(FirstChar) || FirstChar == '_'; + if (Tok.getKind() == tok::numeric_constant || Tok.getIdentifierInfo() || + Tok.getKind() == tok::wide_string_literal /* || + Tok.getKind() == tok::wide_char_literal*/) + return true; + if (Tok.getKind() != tok::char_constant) + return false; + + // FIXME: need a wide_char_constant! + if (!Tok.needsCleaning()) { + SourceManager &SrcMgr = PP.getSourceManager(); + return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation())) + == 'L'; + } else if (Tok.getLength() < 256) { + const char *TokPtr = Buffer; + PP.getSpelling(Tok, TokPtr); + return TokPtr[0] == 'L'; + } else { + return PP.getSpelling(Tok)[0] == 'L'; + } case tok::numeric_constant: return isalnum(FirstChar) || Tok.getKind() == tok::numeric_constant || FirstChar == '+' || FirstChar == '-' || FirstChar == '.'; case tok::period: // ..., .*, .1234 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar); - case tok::amp: // &&, &= - return FirstChar == '&' || FirstChar == '='; - case tok::plus: // ++, += - return FirstChar == '+' || FirstChar == '='; - case tok::minus: // --, ->, -=, ->* - return FirstChar == '-' || FirstChar == '>' || FirstChar == '='; - case tok::slash: // /=, /*, // - return FirstChar == '=' || FirstChar == '*' || FirstChar == '/'; - case tok::less: // <<, <<=, <=, >, >=, >>=, >?=, >? - return FirstChar == '>' || FirstChar == '?' || FirstChar == '='; - case tok::pipe: // ||, |= - return FirstChar == '|' || FirstChar == '='; - case tok::percent: // %=, %>, %: - return FirstChar == '=' || FirstChar == '>' || FirstChar == ':'; + case tok::amp: // && + return FirstChar == '&'; + case tok::plus: // ++ + return FirstChar == '+'; + case tok::minus: // --, ->, ->* + return FirstChar == '-' || FirstChar == '>'; + case tok::slash: //, /*, // + return FirstChar == '*' || FirstChar == '/'; + case tok::less: // <<, <<=, <:, <% + return FirstChar == '<' || FirstChar == ':' || FirstChar == '%'; + case tok::greater: // >>, >>= + return FirstChar == '>'; + case tok::pipe: // || + return FirstChar == '|'; + case tok::percent: // %>, %: + return FirstChar == '>' || FirstChar == ':'; case tok::colon: // ::, :> return FirstChar == ':' || FirstChar == '>'; case tok::hash: // ##, #@, %:%: return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; case tok::arrow: // ->* return FirstChar == '*'; - - case tok::star: // *= - case tok::exclaim: // != - case tok::lessless: // <<= - case tok::greaterequal: // >>= - case tok::caret: // ^= - case tok::equal: // == - // Cases that concatenate only if the next char is =. - return FirstChar == '='; } } @@ -401,6 +486,7 @@ void clang::DoPrintPreprocessedInput(unsigned MainFileID, Preprocessor &PP, PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput); InitOutputBuffer(); + InitAvoidConcatTokenInfo(); Token Tok, PrevTok; char Buffer[256]; @@ -423,8 +509,11 @@ void clang::DoPrintPreprocessedInput(unsigned MainFileID, Preprocessor &PP, if (Tok.isAtStartOfLine()) { Callbacks->HandleFirstTokOnLine(Tok); } else if (Tok.hasLeadingSpace() || - // Don't print "-" next to "-", it would form "--". - Callbacks->AvoidConcat(PrevTok, Tok)) { + // If we haven't emitted a token on this line yet, PrevTok isn't + // useful to look at and no concatenation could happen anyway. + (!Callbacks->hasEmittedTokensOnThisLine() && + // Don't print "-" next to "-", it would form "--". + Callbacks->AvoidConcat(PrevTok, Tok))) { OutputChar(' '); }