From: Sara Golemon Date: Fri, 31 Mar 2006 22:51:37 +0000 (+0000) Subject: Add API hooks and unicode.filesystem_encoding for handling unicode X-Git-Tag: RELEASE_1_3~179 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fd606a8d7817310a72d56589d64c611a09323c83;p=php Add API hooks and unicode.filesystem_encoding for handling unicode conversions of filename entries. Normal path conversions will simply use this converter, Certain other protocols (such as http) which specify a required character set (utf8), may override the conversion by defining a path_encode() and/or path_decode() wrapper ops method. --- diff --git a/Zend/zend.c b/Zend/zend.c index c4382c47e3..b432c05b8e 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -179,6 +179,7 @@ ZEND_INI_BEGIN() STD_ZEND_INI_ENTRY("unicode.runtime_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, runtime_encoding_conv, zend_unicode_globals, unicode_globals) STD_ZEND_INI_ENTRY("unicode.script_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, script_encoding_conv, zend_unicode_globals, unicode_globals) STD_ZEND_INI_ENTRY("unicode.http_input_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, http_input_encoding_conv, zend_unicode_globals, unicode_globals) + STD_ZEND_INI_ENTRY("unicode.filesystem_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, filesystem_encoding_conv, zend_unicode_globals, unicode_globals) ZEND_INI_END() diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 43059d4fd1..be512bb15e 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -299,6 +299,7 @@ struct _zend_unicode_globals { UConverter *output_encoding_conv; /* output layer converter */ UConverter *script_encoding_conv; /* default script encoding converter */ UConverter *http_input_encoding_conv;/* http input encoding converter */ + UConverter *filesystem_encoding_conv;/* default filesystem converter (entries, not contents) */ UConverter *utf8_conv; /* all-purpose UTF-8 converter */ uint16_t from_error_mode; diff --git a/ext/standard/file.c b/ext/standard/file.c index 115171d06d..09ba20dfa1 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -866,25 +866,34 @@ PHP_NAMED_FUNCTION(php_if_tmpfile) } /* }}} */ -/* {{{ proto resource fopen(string filename, string mode [, bool use_include_path [, resource context]]) +/* {{{ proto resource fopen(string filename, string mode [, bool use_include_path [, resource context]]) U Open a file or a URL and return a file pointer */ PHP_NAMED_FUNCTION(php_if_fopen) { char *filename, *mode; int filename_len, mode_len; + zend_uchar filename_type; zend_bool use_include_path = 0; zval *zcontext = NULL; php_stream *stream; php_stream_context *context = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|br", &filename, &filename_len, + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts|br", &filename, &filename_len, &filename_type, &mode, &mode_len, &use_include_path, &zcontext) == FAILURE) { RETURN_FALSE; } context = php_stream_context_from_zval(zcontext, 0); - + + if (filename_type == IS_UNICODE) { + if (php_stream_path_encode(NULL, &filename, &filename_len, filename, filename_len, REPORT_ERRORS, context) == FAILURE) { + RETURN_FALSE; + } + } stream = php_stream_open_wrapper_ex(filename, mode, (use_include_path ? USE_PATH : 0) | REPORT_ERRORS, NULL, context); + if (filename_type == IS_UNICODE) { + efree(filename); + } if (stream == NULL) { RETURN_FALSE; } diff --git a/main/php_streams.h b/main/php_streams.h index 47fafc8bbd..7a80ee529e 100755 --- a/main/php_streams.h +++ b/main/php_streams.h @@ -154,6 +154,12 @@ typedef struct _php_stream_wrapper_ops { /* Create/Remove directory */ int (*stream_mkdir)(php_stream_wrapper *wrapper, char *url, int mode, int options, php_stream_context *context TSRMLS_DC); int (*stream_rmdir)(php_stream_wrapper *wrapper, char *url, int options, php_stream_context *context TSRMLS_DC); + + /* Unicode path manipulation -- Leave NULL to use UG(filesystem_encoding_conv) for conversion */ + int (*path_encode)(php_stream_wrapper *wrapper, char **encpath, int *encpath_len, UChar *path, int path_len, + int options, php_stream_context *context TSRMLS_DC); + int (*path_decode)(php_stream_wrapper *wrapper, UChar **decpath, int *decpath_len, char *path, int path_len, + int options, php_stream_context *context TSRMLS_DC); } php_stream_wrapper_ops; struct _php_stream_wrapper { @@ -367,6 +373,18 @@ PHPAPI int _php_stream_set_option(php_stream *stream, int option, int value, voi #define php_stream_set_chunk_size(stream, size) _php_stream_set_option((stream), PHP_STREAM_OPTION_SET_CHUNK_SIZE, (size), NULL TSRMLS_CC) +PHPAPI int _php_stream_path_encode(php_stream_wrapper *wrapper, + char **pathenc, int *pathenc_len, UChar *path, int path_len, + int options, php_stream_context *context TSRMLS_DC); +#define php_stream_path_encode(wrapper, pathenc, pathenc_len, path, path_len, options, context) \ + _php_stream_path_encode((wrapper), (pathenc), (pathenc_len), (path), (path_len), (options), (context) TSRMLS_CC) + +PHPAPI int _php_stream_path_decode(php_stream_wrapper *wrapper, + char **pathdec, int *pathdec_len, UChar *path, int path_len, + int options, php_stream_context *context TSRMLS_DC); +#define php_stream_path_decode(wrapper, pathdec, pathdec_len, path, path_len, options, context) \ + _php_stream_path_decode((wrapper), (pathdec), (pathdec_len), (path), (path_len), (options), (context) TSRMLS_CC) + END_EXTERN_C() diff --git a/main/streams/streams.c b/main/streams/streams.c index e6a666bd1f..3b49d780f3 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -2526,6 +2526,131 @@ PHPAPI int _php_stream_scandir(char *dirname, char **namelist[], int flags, php_ } /* }}} */ +/* {{{ php_stream_path_encode +Encode a filepath to the appropriate characterset. +If the wrapper supports its own encoding rules it will be dispatched to wrapper->wops->path_encode() +Otherwise the INI defined filesystem_encoding converter will be used +If wrapper == NULL, the path will be explored to locate the correct wrapper +*/ +PHPAPI int _php_stream_path_encode(php_stream_wrapper *wrapper, + char **pathenc, int *pathenc_len, UChar *path, int path_len, + int options, php_stream_context *context TSRMLS_DC) +{ + UErrorCode status = U_ZERO_ERROR; + int num_conv; + + if (!wrapper) { + UChar *p; + U_STRING_DECL(delim, "://", 3); + int delim_len = 3; + + U_STRING_INIT(delim, "://", 3); + + p = u_strFindFirst(path, path_len, delim, delim_len); + if (p) { + char *scheme = NULL; + int scheme_len = 0; + + /* Convert just the scheme using utf8 in order to look it up in the registry */ + num_conv = zend_convert_from_unicode(UG(utf8_conv), &scheme, &scheme_len, path, (p - path) + delim_len, &status); + if (U_FAILURE(status)) { + if (options & REPORT_ERRORS) { + zend_raise_conversion_error_ex("Unable to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE, + num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + } + *pathenc = NULL; + *pathenc_len = 0; + + return FAILURE; + } + wrapper = php_stream_locate_url_wrapper(scheme, NULL, options TSRMLS_CC); + efree(scheme); + if (!wrapper) { + *pathenc = NULL; + *pathenc_len = 0; + + return FAILURE; + } + } else { + wrapper = &php_plain_files_wrapper; + } + } + + if (wrapper->wops->path_encode) { + if (wrapper->wops->path_encode(wrapper, pathenc, pathenc_len, path, path_len, options, context TSRMLS_CC) == FAILURE) { + *pathenc = NULL; + *pathenc_len = 0; + + return FAILURE; + } + + return SUCCESS; + } + + /* Otherwise, fallback on filesystem_encoding */ + status = U_ZERO_ERROR; + + num_conv = zend_convert_from_unicode(ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), + pathenc, pathenc_len, path, path_len, &status); + if (U_FAILURE(status)) { + if (options & REPORT_ERRORS) { + zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), + ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + } + + *pathenc = NULL; + *pathenc_len = 0; + + return FAILURE; + } + + return SUCCESS; +} +/* }}} */ + + +/* {{{ php_stream_path_decode +Decode a filepath from its character set to unicode +If the wrapper supports its own decoding rules it will be dispatched to wrapper->wops->path_encode() +Otherwise (or if wrapper == NULL) the INI defined filesystem_encoding converter will be used. +*/ +PHPAPI int _php_stream_path_decode(php_stream_wrapper *wrapper, + char **pathdec, int *pathdec_len, UChar *path, int path_len, + int options, php_stream_context *context TSRMLS_DC) +{ + int num_conv; + UErrorCode status = U_ZERO_ERROR; + + if (wrapper && wrapper->wops->path_decode) { + if (wrapper->wops->path_decode(wrapper, pathdec, pathdec_len, path, path_len, options, context TSRMLS_CC) == FAILURE) { + *pathdec = NULL; + *pathdec_len = 0; + + return FAILURE; + } + return SUCCESS; + } + + /* Otherwise fallback on filesystem_encoding */ + num_conv = zend_convert_to_unicode(ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), + pathdec, pathdec_len, path, path_len, &status); + if (U_FAILURE(status)) { + if (options & REPORT_ERRORS) { + zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), + ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + } + + *pathdec = NULL; + *pathdec_len = 0; + + return FAILURE; + } + + return SUCCESS; + +} +/* }}} */ + /* * Local variables: * tab-width: 4