]> granicus.if.org Git - php/commitdiff
Add API hooks and unicode.filesystem_encoding for handling unicode
authorSara Golemon <pollita@php.net>
Fri, 31 Mar 2006 22:51:37 +0000 (22:51 +0000)
committerSara Golemon <pollita@php.net>
Fri, 31 Mar 2006 22:51:37 +0000 (22:51 +0000)
conversions of filename entries.

Normal path conversions will simply use this converter,
Certain other protocols (such as http) which specify a
required character set (utf8), may override the conversion
by defining a path_encode() and/or path_decode() wrapper ops method.

Zend/zend.c
Zend/zend_globals.h
ext/standard/file.c
main/php_streams.h
main/streams/streams.c

index c4382c47e3848e813fdfa5b643dbc3a0387e6042..b432c05b8ee856b1440f7a6b3ed23baa1a5f7b82 100644 (file)
@@ -179,6 +179,7 @@ ZEND_INI_BEGIN()
        STD_ZEND_INI_ENTRY("unicode.runtime_encoding",  NULL, ZEND_INI_ALL, OnUpdateEncoding,   runtime_encoding_conv, zend_unicode_globals, unicode_globals)
        STD_ZEND_INI_ENTRY("unicode.script_encoding",  NULL, ZEND_INI_ALL, OnUpdateEncoding,   script_encoding_conv, zend_unicode_globals, unicode_globals)
        STD_ZEND_INI_ENTRY("unicode.http_input_encoding",  NULL, ZEND_INI_ALL, OnUpdateEncoding,   http_input_encoding_conv, zend_unicode_globals, unicode_globals)
+       STD_ZEND_INI_ENTRY("unicode.filesystem_encoding",  NULL, ZEND_INI_ALL, OnUpdateEncoding,   filesystem_encoding_conv, zend_unicode_globals, unicode_globals)
 ZEND_INI_END()
 
 
index 43059d4fd14dec72310ddba0178b50cdb3733b45..be512bb15e94da8e5eab2ab00e8e3901f47fdd5e 100644 (file)
@@ -299,6 +299,7 @@ struct _zend_unicode_globals {
        UConverter *output_encoding_conv;    /* output layer converter */
        UConverter *script_encoding_conv;    /* default script encoding converter */
        UConverter *http_input_encoding_conv;/* http input encoding converter */
+       UConverter *filesystem_encoding_conv;/* default filesystem converter (entries, not contents) */ 
        UConverter *utf8_conv;                           /* all-purpose UTF-8 converter */
 
        uint16_t from_error_mode;
index 115171d06d24cf6e8c1cb8b7d8cbbb248db7bd48..09ba20dfa110019bec514813d4030e3db10ad8ed 100644 (file)
@@ -866,25 +866,34 @@ PHP_NAMED_FUNCTION(php_if_tmpfile)
 }
 /* }}} */
 
-/* {{{ proto resource fopen(string filename, string mode [, bool use_include_path [, resource context]])
+/* {{{ proto resource fopen(string filename, string mode [, bool use_include_path [, resource context]]) U
    Open a file or a URL and return a file pointer */
 PHP_NAMED_FUNCTION(php_if_fopen)
 {
        char *filename, *mode;
        int filename_len, mode_len;
+       zend_uchar filename_type;
        zend_bool use_include_path = 0;
        zval *zcontext = NULL;
        php_stream *stream;
        php_stream_context *context = NULL;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|br", &filename, &filename_len,
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts|br", &filename, &filename_len, &filename_type,
                                &mode, &mode_len, &use_include_path, &zcontext) == FAILURE) {
                RETURN_FALSE;
        }
 
        context = php_stream_context_from_zval(zcontext, 0);
-       
+
+       if (filename_type == IS_UNICODE) {
+               if (php_stream_path_encode(NULL, &filename, &filename_len, filename, filename_len, REPORT_ERRORS, context) == FAILURE) {
+                       RETURN_FALSE;
+               }
+       }
        stream = php_stream_open_wrapper_ex(filename, mode, (use_include_path ? USE_PATH : 0) | REPORT_ERRORS, NULL, context);
+       if (filename_type == IS_UNICODE) {
+               efree(filename);
+       }
        if (stream == NULL) {
                RETURN_FALSE;
        }
index 47fafc8bbd69af24d193dffa6523abbed3ca1d6e..7a80ee529e7f7d03eb295f3b73ab2a47495a0468 100755 (executable)
@@ -154,6 +154,12 @@ typedef struct _php_stream_wrapper_ops {
        /* Create/Remove directory */
        int (*stream_mkdir)(php_stream_wrapper *wrapper, char *url, int mode, int options, php_stream_context *context TSRMLS_DC);
        int (*stream_rmdir)(php_stream_wrapper *wrapper, char *url, int options, php_stream_context *context TSRMLS_DC);
+
+       /* Unicode path manipulation -- Leave NULL to use UG(filesystem_encoding_conv) for conversion */
+       int (*path_encode)(php_stream_wrapper *wrapper, char **encpath, int *encpath_len, UChar *path, int path_len,
+                                                                                                                       int options, php_stream_context *context TSRMLS_DC);
+       int (*path_decode)(php_stream_wrapper *wrapper, UChar **decpath, int *decpath_len, char *path, int path_len,
+                                                                                                                       int options, php_stream_context *context TSRMLS_DC);
 } php_stream_wrapper_ops;
 
 struct _php_stream_wrapper     {
@@ -367,6 +373,18 @@ PHPAPI int _php_stream_set_option(php_stream *stream, int option, int value, voi
 
 #define php_stream_set_chunk_size(stream, size) _php_stream_set_option((stream), PHP_STREAM_OPTION_SET_CHUNK_SIZE, (size), NULL TSRMLS_CC)
 
+PHPAPI int _php_stream_path_encode(php_stream_wrapper *wrapper,
+                               char **pathenc, int *pathenc_len, UChar *path, int path_len,
+                               int options, php_stream_context *context TSRMLS_DC);
+#define  php_stream_path_encode(wrapper, pathenc, pathenc_len, path, path_len, options, context) \
+               _php_stream_path_encode((wrapper), (pathenc), (pathenc_len), (path), (path_len), (options), (context) TSRMLS_CC)
+
+PHPAPI int _php_stream_path_decode(php_stream_wrapper *wrapper,
+                               char **pathdec, int *pathdec_len, UChar *path, int path_len,
+                               int options, php_stream_context *context TSRMLS_DC);
+#define  php_stream_path_decode(wrapper, pathdec, pathdec_len, path, path_len, options, context) \
+               _php_stream_path_decode((wrapper), (pathdec), (pathdec_len), (path), (path_len), (options), (context) TSRMLS_CC)
+
 END_EXTERN_C()
 
 
index e6a666bd1f26e89f082a765c86c01ce8ca3d569c..3b49d780f3b29f431f1f3d9d1fbdcb900596bd45 100755 (executable)
@@ -2526,6 +2526,131 @@ PHPAPI int _php_stream_scandir(char *dirname, char **namelist[], int flags, php_
 }
 /* }}} */
 
+/* {{{ php_stream_path_encode
+Encode a filepath to the appropriate characterset.
+If the wrapper supports its own encoding rules it will be dispatched to wrapper->wops->path_encode()
+Otherwise the INI defined filesystem_encoding converter will be used
+If wrapper == NULL, the path will be explored to locate the correct wrapper
+*/
+PHPAPI int _php_stream_path_encode(php_stream_wrapper *wrapper,
+                               char **pathenc, int *pathenc_len, UChar *path, int path_len,
+                               int options, php_stream_context *context TSRMLS_DC)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       int num_conv;
+
+       if (!wrapper) {
+               UChar *p;
+               U_STRING_DECL(delim, "://", 3);
+               int delim_len = 3;
+
+               U_STRING_INIT(delim, "://", 3);
+
+               p = u_strFindFirst(path, path_len, delim, delim_len);
+               if (p) {
+                       char *scheme = NULL;
+                       int scheme_len = 0;
+
+                       /* Convert just the scheme using utf8 in order to look it up in the registry */
+                       num_conv = zend_convert_from_unicode(UG(utf8_conv), &scheme, &scheme_len, path, (p - path) + delim_len, &status);
+                       if (U_FAILURE(status)) {
+                               if (options & REPORT_ERRORS) {
+                                       zend_raise_conversion_error_ex("Unable to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE,
+                                                                                       num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+                               }
+                               *pathenc = NULL;
+                               *pathenc_len = 0;
+
+                               return FAILURE;
+                       }
+                       wrapper = php_stream_locate_url_wrapper(scheme, NULL, options TSRMLS_CC);
+                       efree(scheme);
+                       if (!wrapper) {
+                               *pathenc = NULL;
+                               *pathenc_len = 0;
+
+                               return FAILURE;
+                       }                       
+               } else {
+                       wrapper = &php_plain_files_wrapper;
+               }
+       }
+
+       if (wrapper->wops->path_encode) {
+               if (wrapper->wops->path_encode(wrapper, pathenc, pathenc_len, path, path_len, options, context TSRMLS_CC) == FAILURE) {
+                       *pathenc = NULL;
+                       *pathenc_len = 0;
+
+                       return FAILURE;
+               }
+
+               return SUCCESS;
+       }
+
+       /* Otherwise, fallback on filesystem_encoding */
+       status = U_ZERO_ERROR;
+
+       num_conv = zend_convert_from_unicode(ZEND_U_CONVERTER(UG(filesystem_encoding_conv)),
+                               pathenc, pathenc_len, path, path_len, &status);
+       if (U_FAILURE(status)) {
+               if (options & REPORT_ERRORS) {
+                       zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)),
+                                                       ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+               }
+
+               *pathenc = NULL;
+               *pathenc_len = 0;
+
+               return FAILURE;
+       }
+
+       return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ php_stream_path_decode
+Decode a filepath from its character set to unicode
+If the wrapper supports its own decoding rules it will be dispatched to wrapper->wops->path_encode()
+Otherwise (or if wrapper == NULL) the INI defined filesystem_encoding converter will be used.
+*/
+PHPAPI int _php_stream_path_decode(php_stream_wrapper *wrapper,
+                               char **pathdec, int *pathdec_len, UChar *path, int path_len,
+                               int options, php_stream_context *context TSRMLS_DC)
+{
+       int num_conv;
+       UErrorCode status = U_ZERO_ERROR;
+
+       if (wrapper && wrapper->wops->path_decode) {
+               if (wrapper->wops->path_decode(wrapper, pathdec, pathdec_len, path, path_len, options, context TSRMLS_CC) == FAILURE) {
+                       *pathdec = NULL;
+                       *pathdec_len = 0;
+
+                       return FAILURE;
+               }
+               return SUCCESS;
+       }
+
+       /* Otherwise fallback on filesystem_encoding */
+       num_conv = zend_convert_to_unicode(ZEND_U_CONVERTER(UG(filesystem_encoding_conv)),
+                               pathdec, pathdec_len, path, path_len, &status);
+       if (U_FAILURE(status)) {
+               if (options & REPORT_ERRORS) {
+                       zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)),
+                                                       ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+               }
+
+               *pathdec = NULL;
+               *pathdec_len = 0;
+
+               return FAILURE;
+       }
+
+       return SUCCESS;
+
+}
+/* }}} */
+
 /*
  * Local variables:
  * tab-width: 4