Ignore encoding declarations inside strings. Fixes #603509.

author Martin v. Löwis <martin@v.loewis.de>

Tue, 3 Sep 2002 11:52:44 +0000 (11:52 +0000)

committer Martin v. Löwis <martin@v.loewis.de>

Tue, 3 Sep 2002 11:52:44 +0000 (11:52 +0000)
author Martin v. Löwis <martin@v.loewis.de>
Tue, 3 Sep 2002 11:52:44 +0000 (11:52 +0000)
committer Martin v. Löwis <martin@v.loewis.de>
Tue, 3 Sep 2002 11:52:44 +0000 (11:52 +0000)
diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex

index 33199492dfe345a76bc9509befe164e7c8856522..4947650a1dd36060fe9a23d11cb9c3fb9a2bc04b 100644 (file)
--- a/Doc/ref/ref2.tex
+++ b/Doc/ref/ref2.tex
@@ -101,7 +101,7 @@ The encoding is used for all lexical analysis, in particular to find
  the end of a string, and to interpret the contents of Unicode literals.
  String literals are converted to Unicode for syntactical analysis,
  then converted back to their original encoding before interpretation
-starts.
+starts. The encoding declaration must appear on a line of its own.
  
  \subsection{Explicit line joining\label{explicit-joining}}
  
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 4119c43d5d3eeb05431341036cf7108df107fdda..c58aadbed751a7bc66a49e2b9200f146c75cd7d4 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -128,6 +128,7 @@ tok_new(void)
         tok->read_coding_spec = 0;
         tok->issued_encoding_warning = 0;
         tok->encoding = NULL;
+        tok->cont_line = 0;
  #ifndef PGEN
         tok->decoding_readline = NULL;
         tok->decoding_buffer = NULL;
@@ -207,7 +208,15 @@ static char *
  get_coding_spec(const char *s, int size)
  {
         int i;
-       for (i = 0; i < size - 6; i++) { /* XXX inefficient search */
+       /* Coding spec must be in a comment, and that comment must be
+         * the only statement on the source code line. */
+        for (i = 0; i < size - 6; i++) {
+               if (s[i] == '#')
+                       break;
+               if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
+                       return NULL;
+       }
+       for (; i < size - 6; i++) { /* XXX inefficient search */
                 const char* t = s + i;
                 if (strncmp(t, "coding", 6) == 0) {
                         const char* begin = NULL;
@@ -247,6 +256,9 @@ check_coding_spec(const char* line, int size, struct tok_state *tok,
                   int set_readline(struct tok_state *, const char *))
  {
         int r = 1;
+        if (tok->cont_line)
+               /* It's a continuation line, so it can't be a coding spec. */
+               return 1;
         char* cs = get_coding_spec(line, size);
         if (cs != NULL) {
                 tok->read_coding_spec = 1;
@@ -1158,6 +1170,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                         goto nextline;
                 *p_start = tok->start;
                 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
+                tok->cont_line = 0;
                 return NEWLINE;
         }
         
@@ -1292,6 +1305,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                                         return ERRORTOKEN;
                                 }
                                 tripcount = 0;
+                                tok->cont_line = 1; /* multiline string. */
                         }
                         else if (c == EOF) {
                                 if (triple)
@@ -1340,6 +1354,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                         tok->cur = tok->inp;
                         return ERRORTOKEN;
                 }
+                tok->cont_line = 1;
                 goto again; /* Read next line */
         }
         
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h

index f3bac74acd6c9e814b321e20b6ed9bdff50b22e7..b3d456a27d8c4c52c0ed83d699d271562bc497d1 100644 (file)
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -45,6 +45,7 @@ struct tok_state {
         int read_coding_spec;   /* whether 'coding:...' has been read  */
         int issued_encoding_warning; /* whether non-ASCII warning was issued */
         char *encoding;
+       int cont_line;          /* whether we are in a continuation line. */
  #ifndef PGEN
         PyObject *decoding_readline; /* codecs.open(...).readline */
         PyObject *decoding_buffer;
author	Martin v. Löwis <martin@v.loewis.de>
	Tue, 3 Sep 2002 11:52:44 +0000 (11:52 +0000)
committer	Martin v. Löwis <martin@v.loewis.de>
	Tue, 3 Sep 2002 11:52:44 +0000 (11:52 +0000)
Doc/ref/ref2.tex		patch \| blob \| history
Parser/tokenizer.c		patch \| blob \| history
Parser/tokenizer.h		patch \| blob \| history