]> granicus.if.org Git - python/commitdiff
zipimport: read_directory() uses cp437 or utf-8 (in strict mode), depending on
authorVictor Stinner <victor.stinner@haypocalc.com>
Mon, 18 Oct 2010 12:13:46 +0000 (12:13 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Mon, 18 Oct 2010 12:13:46 +0000 (12:13 +0000)
the unicode flag, to decode the filename, instead of the filesystem encoding.
Use the same choice than the zipfile module.

Modules/zipimport.c

index 5f2e16076c40bd0c3b690ffc80f39d32d6487648..0b9ad181f4d7bc99a7add6a440ae88585e51d329 100644 (file)
@@ -714,6 +714,7 @@ read_directory(PyObject *archive_obj)
     /* FIXME: work on Py_UNICODE* instead of char* */
     PyObject *files = NULL;
     FILE *fp;
+    unsigned short flags;
     long compress, crc, data_size, file_size, file_offset, date, time;
     long header_offset, name_size, header_size, header_position;
     long i, l, count;
@@ -724,6 +725,7 @@ read_directory(PyObject *archive_obj)
     char *p, endof_central_dir[22];
     long arc_offset; /* offset from beginning of file to start of zip-archive */
     PyObject *pathobj;
+    const char *charset;
 
     if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
         PyErr_SetString(PyExc_OverflowError,
@@ -776,7 +778,8 @@ read_directory(PyObject *archive_obj)
         l = PyMarshal_ReadLongFromFile(fp);
         if (l != 0x02014B50)
             break;              /* Bad: Central Dir File Header */
-        fseek(fp, header_offset + 10, 0);
+        fseek(fp, header_offset + 8, 0);
+        flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
         compress = PyMarshal_ReadShortFromFile(fp);
         time = PyMarshal_ReadShortFromFile(fp);
         date = PyMarshal_ReadShortFromFile(fp);
@@ -802,7 +805,11 @@ read_directory(PyObject *archive_obj)
         *p = 0;         /* Add terminating null byte */
         header_offset += header_size;
 
-        nameobj = PyUnicode_DecodeFSDefaultAndSize(name, name_size);
+        if (flags & 0x0800)
+            charset = "utf-8";
+        else
+            charset = "cp437";
+        nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
         if (nameobj == NULL)
             goto error;
         Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);