]> granicus.if.org Git - python/commitdiff
Cleanup of tokenizer.c.
authorGuido van Rossum <guido@python.org>
Fri, 16 Nov 2007 00:51:45 +0000 (00:51 +0000)
committerGuido van Rossum <guido@python.org>
Fri, 16 Nov 2007 00:51:45 +0000 (00:51 +0000)
Parser/tokenizer.c

index 710c566b34e13e2763cf1f93c8754d4ada7dbd87..1c2b8e8e107e4c8f1bfb5cff68d0f19c095191a2 100644 (file)
@@ -1269,30 +1269,24 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
        /* Identifier (most frequent token!) */
        nonascii = 0;
        if (is_potential_identifier_start(c)) {
-               /* Process r"", u"" and ur"" */
-               switch (c) {
-               case 'r':
-               case 'R':
+               /* Process b"", r"" and br"" */
+               if (c == 'b' || c == 'B') {
                        c = tok_nextc(tok);
                        if (c == '"' || c == '\'')
                                goto letter_quote;
-                       break;
-               case 'b':
-               case 'B':
+               }
+               if (c == 'r' || c == 'R') {
                        c = tok_nextc(tok);
-                       if (c == 'r' || c == 'R')
-                               c = tok_nextc(tok);
                        if (c == '"' || c == '\'')
                                goto letter_quote;
-                       break;
-               }
+           }
                while (is_potential_identifier_char(c)) {
                        if (c >= 128)
                                nonascii = 1;
                        c = tok_nextc(tok);
                }
                tok_backup(tok, c);
-               if (nonascii && 
+               if (nonascii &&
                    !verify_identifier(tok->start, tok->cur)) {
                        tok->done = E_IDENTIFIER;
                        return ERRORTOKEN;
@@ -1322,7 +1316,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                        c = tok_nextc(tok);
                        if (c == '.') {
                                *p_start = tok->start;
-                               *p_end = tok->cur; 
+                               *p_end = tok->cur;
                                return ELLIPSIS;
                        } else {
                                tok_backup(tok, c);
@@ -1436,55 +1430,47 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
   letter_quote:
        /* String */
        if (c == '\'' || c == '"') {
-               Py_ssize_t quote2 = tok->cur - tok->start + 1;
-               int quote = c;
-               int triple = 0;
-               int tripcount = 0;
-               for (;;) {
-                       c = tok_nextc(tok);
-                       if (c == '\n') {
-                               if (!triple) {
-                                       tok->done = E_EOLS;
-                                       tok_backup(tok, c);
-                                       return ERRORTOKEN;
-                               }
-                               tripcount = 0;
-                                tok->cont_line = 1; /* multiline string. */
-                       }
-                       else if (c == EOF) {
-                               if (triple)
-                                       tok->done = E_EOFS;
-                               else
-                                       tok->done = E_EOLS;
-                               tok->cur = tok->inp;
-                               return ERRORTOKEN;
-                       }
-                       else if (c == quote) {
-                               tripcount++;
-                               if (tok->cur - tok->start == quote2) {
-                                       c = tok_nextc(tok);
-                                       if (c == quote) {
-                                               triple = 1;
-                                               tripcount = 0;
-                                               continue;
-                                       }
-                                       tok_backup(tok, c);
-                               }
-                               if (!triple || tripcount == 3)
-                                       break;
-                       }
-                       else if (c == '\\') {
-                               tripcount = 0;
-                               c = tok_nextc(tok);
-                               if (c == EOF) {
-                                       tok->done = E_EOLS;
-                                       tok->cur = tok->inp;
-                                       return ERRORTOKEN;
-                               }
-                       }
+               int quote = c;
+               int quote_size = 1;             /* 1 or 3 */
+               int end_quote_size = 0;
+
+               /* Find the quote size and start of string */
+               c = tok_nextc(tok);
+               if (c == quote) {
+                       c = tok_nextc(tok);
+                       if (c == quote)
+                               quote_size = 3;
                        else
-                               tripcount = 0;
+                               end_quote_size = 1;     /* empty string found */
                }
+               if (c != quote)
+                   tok_backup(tok, c);
+
+               /* Get rest of string */
+               while (end_quote_size != quote_size) {
+                       c = tok_nextc(tok);
+                       if (c == EOF) {
+                               if (quote_size == 3)
+                                       tok->done = E_EOFS;
+                               else
+                                       tok->done = E_EOLS;
+                               tok->cur = tok->inp;
+                               return ERRORTOKEN;
+                       }
+                       if (quote_size == 1 && c == '\n') {
+                           tok->done = E_EOLS;
+                           tok->cur = tok->inp;
+                           return ERRORTOKEN;
+                       }
+                       if (c == quote)
+                           end_quote_size += 1;
+                       else {
+                           end_quote_size = 0;
+                           if (c == '\\')
+                               c = tok_nextc(tok);  /* skip escaped char */
+                       }
+               }
+
                *p_start = tok->start;
                *p_end = tok->cur;
                return STRING;
@@ -1619,7 +1605,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
 /* Get -*- encoding -*- from a Python file.
 
    PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
-   the first or second line of the file (in which case the encoding 
+   the first or second line of the file (in which case the encoding
    should be assumed to be PyUnicode_GetDefaultEncoding()).
 
    The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed