SF #941229: Decode source code with sys.stdin.encoding in interactive

author Hye-Shik Chang <hyeshik@gmail.com>

Wed, 4 Aug 2004 17:36:41 +0000 (17:36 +0000)

committer Hye-Shik Chang <hyeshik@gmail.com>

Wed, 4 Aug 2004 17:36:41 +0000 (17:36 +0000)
author Hye-Shik Chang <hyeshik@gmail.com>
Wed, 4 Aug 2004 17:36:41 +0000 (17:36 +0000)
committer Hye-Shik Chang <hyeshik@gmail.com>
Wed, 4 Aug 2004 17:36:41 +0000 (17:36 +0000)
diff --git a/Misc/NEWS b/Misc/NEWS

index 630c85ecf0faeb2dbcc595b36a887f70ca04e194..aea6867ad82fad57816239ba5a75f329ed79b593 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -70,6 +70,10 @@ Core and builtins
  - unicode.iswide() and unicode.width() is dropped and the East Asian
    Width support is moved to unicodedata extension module.
  
+- Patch #941229: The source code encoding in interactive mode
+  now refers sys.stdin.encoding not just ISO-8859-1 anymore.  This
+  allows for non-latin-1 users to write unicode strings directly.
+
  Extension modules
  -----------------
  
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 4fdc2e672f33d2a42db44ee8b304d2ef31806a6f..8fc2c267deaba4e42fb03ab292368f5bab2356d7 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -651,6 +651,63 @@ PyTokenizer_Free(struct tok_state *tok)
         PyMem_DEL(tok);
  }
  
+#if !defined(PGEN) && defined(Py_USING_UNICODE)
+static int
+tok_stdin_decode(struct tok_state *tok, char **inp)
+{
+       PyObject *enc, *sysstdin, *decoded, *utf8;
+       const char *encoding;
+       char *converted;
+
+       if (PySys_GetFile((char *)"stdin", NULL) != stdin)
+               return 0;
+       sysstdin = PySys_GetObject("stdin");
+       if (sysstdin == NULL || !PyFile_Check(sysstdin))
+               return 0;
+
+       enc = ((PyFileObject *)sysstdin)->f_encoding;
+       if (enc == NULL || !PyString_Check(enc))
+               return 0;
+       Py_INCREF(enc);
+
+       encoding = PyString_AsString(enc);
+       decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
+       if (decoded == NULL)
+               goto error_clear;
+
+       utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
+       Py_DECREF(decoded);
+       if (utf8 == NULL)
+               goto error_clear;
+
+       converted = new_string(PyString_AsString(utf8), PyString_Size(utf8));
+       Py_DECREF(utf8);
+       if (converted == NULL)
+               goto error_nomem;
+
+       PyMem_FREE(*inp);
+       *inp = converted;
+       if (tok->encoding != NULL)
+               PyMem_DEL(tok->encoding);
+       tok->encoding = new_string(encoding, strlen(encoding));
+       if (tok->encoding == NULL)
+               goto error_nomem;
+
+       Py_DECREF(enc);
+       return 0;
+
+error_nomem:
+       Py_DECREF(enc);
+       tok->done = E_NOMEM;
+       return -1;
+
+error_clear:
+       /* Fallback to iso-8859-1: for backward compatibility */
+       Py_DECREF(enc);
+       PyErr_Clear();
+       return 0;
+}
+#endif
  
  /* Get next char, updating state; error code goes into tok->done */
  
@@ -690,6 +747,10 @@ tok_nextc(register struct tok_state *tok)
                                 PyMem_FREE(new);
                                 tok->done = E_EOF;
                         }
+#if !defined(PGEN) && defined(Py_USING_UNICODE)
+                       else if (tok_stdin_decode(tok, &new) != 0)
+                               PyMem_FREE(new);
+#endif
                         else if (tok->start != NULL) {
                                 size_t start = tok->start - tok->buf;
                                 size_t oldlen = tok->cur - tok->buf;
author	Hye-Shik Chang <hyeshik@gmail.com>
	Wed, 4 Aug 2004 17:36:41 +0000 (17:36 +0000)
committer	Hye-Shik Chang <hyeshik@gmail.com>
	Wed, 4 Aug 2004 17:36:41 +0000 (17:36 +0000)
Misc/NEWS		patch \| blob \| history
Parser/tokenizer.c		patch \| blob \| history