. Added multibyte suppport by default. Previosly php had to be compiled
with --enable-zend-multibyte. Now it can be enabled or disabled throug
zend.multibyte directive in php.ini (Dmitry)
+ . Removed compile time dependency from ext/mbstring (Dmitry)
. Added scalar typehints to the parser and the reflection API. (Ilia, Derick)
. Added support for Traits. (Stefan)
. Added closure $this support back. (Stas)
CG(script_encoding_list) = NULL;
CG(script_encoding_list_size) = 0;
CG(internal_encoding) = NULL;
- CG(encoding_detector) = NULL;
- CG(encoding_converter) = NULL;
- CG(encoding_oddlen) = NULL;
CG(encoding_declared) = 0;
}
/* }}} */
zend_encoding *internal_encoding;
- /* multibyte utility functions */
- zend_encoding_detector encoding_detector;
- zend_encoding_converter encoding_converter;
- zend_encoding_oddlen encoding_oddlen;
-
#ifdef ZTS
zval ***static_members_table;
int last_static_member;
NULL
};
+static char* dummy_encoding_detector(const unsigned char *string, size_t length, char *list TSRMLS_DC)
+{
+ return NULL;
+}
+
+static int dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
+{
+ return -1;
+}
+static size_t dummy_encoding_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
+{
+ return 0;
+}
+
+static int dummy_encoding_list_checker(const char *encoding_list TSRMLS_DC)
+{
+ return 0;
+}
+
+static const char* dummy_get_internal_encoding(TSRMLS_D)
+{
+ return NULL;
+}
+
+ZEND_API zend_encoding_detector zend_multibyte_encoding_detector = dummy_encoding_detector;
+ZEND_API zend_encoding_converter zend_multibyte_encoding_converter = dummy_encoding_converter;
+ZEND_API zend_encoding_oddlen zend_multibyte_encoding_oddlen = dummy_encoding_oddlen;
+ZEND_API zend_encoding_list_checker zend_multibyte_check_encoding_list = dummy_encoding_list_checker;
+ZEND_API zend_encoding_name_getter zend_multibyte_get_internal_encoding = dummy_get_internal_encoding;
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC)
return 0;
}
-ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen TSRMLS_DC)
+ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC)
{
- CG(encoding_detector) = encoding_detector;
- CG(encoding_converter) = encoding_converter;
- CG(encoding_oddlen) = encoding_oddlen;
+ zend_multibyte_encoding_detector = encoding_detector;
+ zend_multibyte_encoding_converter = encoding_converter;
+ zend_multibyte_encoding_oddlen = encoding_oddlen;
+ zend_multibyte_check_encoding_list = encoding_list_checker;
+ zend_multibyte_get_internal_encoding = get_internal_encoding;
return 0;
}
{
size_t oddlen;
- if (!CG(encoding_converter)) {
+ if (zend_multibyte_encoding_converter == dummy_encoding_converter) {
return 0;
}
- if (CG(encoding_oddlen)) {
- oddlen = CG(encoding_oddlen)(from, from_length, from_encoding TSRMLS_CC);
- if (oddlen > 0) {
- from_length -= oddlen;
- }
+ oddlen = zend_multibyte_encoding_oddlen(from, from_length, from_encoding TSRMLS_CC);
+ if (oddlen > 0) {
+ from_length -= oddlen;
}
- if (CG(encoding_converter)(to, to_length, from, from_length, to_encoding, from_encoding TSRMLS_CC) != 0) {
+ if (zend_multibyte_encoding_converter(to, to_length, from, from_length, to_encoding, from_encoding TSRMLS_CC) != 0) {
return 0;
}
}
/* if multiple encodings specified, detect automagically */
- if (CG(script_encoding_list_size) > 1 && CG(encoding_detector)) {
+ if (CG(script_encoding_list_size) > 1 &&
+ zend_multibyte_encoding_detector != dummy_encoding_detector) {
list = zend_multibyte_assemble_encoding_list(CG(script_encoding_list),
CG(script_encoding_list_size));
- name = CG(encoding_detector)(LANG_SCNG(script_org),
+ name = zend_multibyte_encoding_detector(LANG_SCNG(script_org),
LANG_SCNG(script_org_size), list TSRMLS_CC);
if (list) {
efree(list);
typedef size_t (*zend_encoding_oddlen)(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
+typedef int (*zend_encoding_list_checker)(const char *encoding_list TSRMLS_DC);
+
+typedef const char* (*zend_encoding_name_getter)(TSRMLS_D);
+
typedef struct _zend_encoding {
zend_encoding_filter input_filter; /* escape input filter */
zend_encoding_filter output_filter; /* escape output filter */
* zend multibyte APIs
*/
BEGIN_EXTERN_C()
+
+/* multibyte utility functions */
+ZEND_API extern zend_encoding_detector zend_multibyte_encoding_detector;
+ZEND_API extern zend_encoding_converter zend_multibyte_encoding_converter;
+ZEND_API extern zend_encoding_oddlen zend_multibyte_encoding_oddlen;
+ZEND_API extern zend_encoding_list_checker zend_multibyte_check_encoding_list;
+ZEND_API extern zend_encoding_name_getter zend_multibyte_get_internal_encoding;
+
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRMLS_DC);
-ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen TSRMLS_DC);
+ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC);
ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char *encoding_name);
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t
#include "ext/standard/php_image.h"
#include "ext/standard/info.h"
-#if defined(PHP_WIN32) || (HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING))
-#define EXIF_USE_MBSTRING 1
-#else
-#define EXIF_USE_MBSTRING 0
-#endif
-
-#if EXIF_USE_MBSTRING
-#include "ext/mbstring/mbstring.h"
-#endif
-
/* needed for ssize_t definition */
#include <sys/types.h>
ZEND_INI_MH(OnUpdateEncode)
{
-#if EXIF_USE_MBSTRING
- if (new_value && strlen(new_value) && !php_mb_check_encoding_list(new_value TSRMLS_CC)) {
+ if (new_value && strlen(new_value) && !zend_multibyte_check_encoding_list(new_value TSRMLS_CC)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal encoding ignored: '%s'", new_value);
return FAILURE;
}
-#endif
return OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
ZEND_INI_MH(OnUpdateDecode)
{
-#if EXIF_USE_MBSTRING
- if (!php_mb_check_encoding_list(new_value TSRMLS_CC)) {
+ if (!zend_multibyte_check_encoding_list(new_value TSRMLS_CC)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal encoding ignored: '%s'", new_value);
return FAILURE;
}
-#endif
return OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
PHP_MINIT_FUNCTION(exif)
{
REGISTER_INI_ENTRIES();
- REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", EXIF_USE_MBSTRING, CONST_CS | CONST_PERSISTENT);
+ if (zend_hash_exists(&module_registry, "mbstring", sizeof("mbstring"))) {
+ REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", 1, CONST_CS | CONST_PERSISTENT);
+ } else {
+ REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", 0, CONST_CS | CONST_PERSISTENT);
+ }
return SUCCESS;
}
/* }}} */
/* {{{ exif dependencies */
static const zend_module_dep exif_module_deps[] = {
ZEND_MOD_REQUIRED("standard")
-#if EXIF_USE_MBSTRING
- ZEND_MOD_REQUIRED("mbstring")
-#endif
+ ZEND_MOD_OPTIONAL("mbstring")
{NULL, NULL, NULL}
};
/* }}} */
/* {{{ exif_process_string_raw
* Copy a string in Exif header to a character string returns length of allocated buffer if any. */
-#if !EXIF_USE_MBSTRING
static int exif_process_string_raw(char **result, char *value, size_t byte_count) {
/* we cannot use strlcpy - here the problem is that we have to copy NUL
* chars up to byte_count, we also have to add a single NUL character to
}
return 0;
}
-#endif
/* }}} */
/* {{{ exif_process_string
static int exif_process_user_comment(image_info_type *ImageInfo, char **pszInfoPtr, char **pszEncoding, char *szValuePtr, int ByteCount TSRMLS_DC)
{
int a;
-
-#if EXIF_USE_MBSTRING
char *decode;
size_t len;;
-#endif
*pszEncoding = NULL;
/* Copy the comment */
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
-#if EXIF_USE_MBSTRING
/* First try to detect BOM: ZERO WIDTH NOBREAK SPACE (FEFF 16)
* since we have no encoding support for the BOM yet we skip that.
*/
} else {
decode = ImageInfo->decode_unicode_le;
}
- *pszInfoPtr = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, decode, &len TSRMLS_CC);
+ if (zend_multibyte_encoding_converter(
+ pszInfoPtr,
+ &len,
+ szValuePtr,
+ ByteCount,
+ ImageInfo->encode_unicode,
+ decode
+ TSRMLS_DC) != 0) {
+ len = exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
+ }
return len;
-#else
- return exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
-#endif
- } else
- if (!memcmp(szValuePtr, "ASCII\0\0\0", 8)) {
+ } else if (!memcmp(szValuePtr, "ASCII\0\0\0", 8)) {
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
- } else
- if (!memcmp(szValuePtr, "JIS\0\0\0\0\0", 8)) {
+ } else if (!memcmp(szValuePtr, "JIS\0\0\0\0\0", 8)) {
/* JIS should be tanslated to MB or we leave it to the user - leave it to the user */
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
-#if EXIF_USE_MBSTRING
- if (ImageInfo->motorola_intel) {
- *pszInfoPtr = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_jis, ImageInfo->decode_jis_be, &len TSRMLS_CC);
- } else {
- *pszInfoPtr = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_jis, ImageInfo->decode_jis_le, &len TSRMLS_CC);
+ if (zend_multibyte_encoding_converter(
+ pszInfoPtr,
+ &len,
+ szValuePtr,
+ ByteCount,
+ ImageInfo->encode_jis,
+ ImageInfo->motorola_intel ? ImageInfo->decode_jis_be : ImageInfo->decode_jis_le
+ TSRMLS_DC) != 0) {
+ len = exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
}
return len;
-#else
- return exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
-#endif
- } else
- if (!memcmp(szValuePtr, "\0\0\0\0\0\0\0\0", 8)) {
+ } else if (!memcmp(szValuePtr, "\0\0\0\0\0\0\0\0", 8)) {
/* 8 NULL means undefined and should be ASCII... */
*pszEncoding = estrdup("UNDEFINED");
szValuePtr = szValuePtr+8;
xp_field->tag = tag;
/* Copy the comment */
-#if EXIF_USE_MBSTRING
-/* What if MS supports big-endian with XP? */
-/* if (ImageInfo->motorola_intel) {
- xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_be, &xp_field->size TSRMLS_CC);
- } else {
- xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_le, &xp_field->size TSRMLS_CC);
- }*/
- xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_le, &xp_field->size TSRMLS_CC);
- return xp_field->size;
-#else
- xp_field->size = exif_process_string_raw(&xp_field->value, szValuePtr, ByteCount);
+ if (zend_multibyte_encoding_converter(
+ &xp_field->value,
+ &xp_field->size,
+ szValuePtr,
+ ByteCount,
+ ImageInfo->encode_unicode,
+ ImageInfo->motorola_intel ? ImageInfo->decode_unicode_be : ImageInfo->decode_unicode_le
+ TSRMLS_DC) != 0) {
+ xp_field->size = exif_process_string_raw(&xp_field->value, szValuePtr, ByteCount);
+ }
return xp_field->size;
-#endif
}
/* }}} */
static PHP_GINIT_FUNCTION(mbstring);
static PHP_GSHUTDOWN_FUNCTION(mbstring);
+static const char* php_mb_internal_encoding_name(TSRMLS_D);
static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
/* }}} */
/* {{{ MBSTRING_API php_mb_check_encoding_list */
-MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
+MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC)
+{
return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
}
/* }}} */
}
/* }}} */
+static char *php_mb_rfc1867_substring(char *start, int len, char quote TSRMLS_DC)
+{
+ char *result = emalloc(len + 2);
+ char *resp = result;
+ int i;
+
+ for (i = 0; i < len && start[i] != quote; ++i) {
+ if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
+ *resp++ = start[++i];
+ } else {
+ size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC);
+
+ while (j-- > 0 && i < len) {
+ *resp++ = start[i++];
+ }
+ --i;
+ }
+ }
+
+ *resp = '\0';
+ return result;
+}
+
+static char *php_mb_rfc1867_getword(char *str TSRMLS_DC) /* {{{ */
+{
+ while (*str && isspace(*str)) {
+ ++str;
+ }
+
+ if (!*str) {
+ return estrdup("");
+ }
+
+ if (*str == '"' || *str == '\'') {
+ char quote = *str;
+
+ str++;
+ return php_mb_rfc1867_substring(str, strlen(str), quote TSRMLS_CC);
+ } else {
+ char *strend = str;
+
+ while (*strend && !isspace(*strend)) {
+ ++strend;
+ }
+ return php_mb_rfc1867_substring(str, strend - str, 0 TSRMLS_CC);
+ }
+}
+/* }}} */
+
+static char *php_mb_rfc1867_basename(char *filename TSRMLS_DC) /* {{{ */
+{
+ char *s, *tmp;
+
+ /* The \ check should technically be needed for win32 systems only where
+ * it is a valid path separator. However, IE in all it's wisdom always sends
+ * the full path of the file on the user's filesystem, which means that unless
+ * the user does basename() they get a bogus file name. Until IE's user base drops
+ * to nill or problem is fixed this code must remain enabled for all systems. */
+ s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
+ if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
+ s = tmp;
+ }
+ if (s) {
+ return s + 1;
+ } else {
+ return filename;
+ }
+}
+/* }}} */
+
/* {{{ php.ini directive handler */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
static PHP_INI_MH(OnUpdate_mbstring_language)
#if HAVE_MBREGEX
PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
+
+ zend_multibyte_set_functions(
+ php_mb_encoding_detector,
+ php_mb_encoding_converter,
+ php_mb_oddlen,
+ php_mb_check_encoding_list,
+ php_mb_internal_encoding_name TSRMLS_CC);
+
+ php_rfc1867_set_multibyte_callbacks(
+ php_mb_encoding_translation,
+ php_mb_gpc_encoding_detector,
+ php_mb_gpc_encoding_converter,
+ php_mb_rfc1867_getword,
+ php_mb_rfc1867_basename);
+
return SUCCESS;
}
/* }}} */
/* 'd better use mbfl_memory_device? */
char *name, *list = NULL;
int n, *entry, list_size = 0;
- zend_encoding_detector encoding_detector;
- zend_encoding_converter encoding_converter;
- zend_encoding_oddlen encoding_oddlen;
/* notify script encoding to Zend Engine */
entry = MBSTRG(script_encoding_list);
if (list) {
efree(list);
}
- encoding_detector = php_mb_encoding_detector;
- encoding_converter = php_mb_encoding_converter;
- encoding_oddlen = php_mb_oddlen;
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
- zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
-
return 0;
}
/* }}} */
}
/* }}} */
+/* {{{ const char* php_mb_internal_encoding_name()
+ * returns name of internal encoding
+ */
+static const char* php_mb_internal_encoding_name(TSRMLS_D)
+{
+ const char *name = mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
+
+ if (!name ||
+ !*name ||
+ (strlen(name) == 4 &&
+ (!memcmp("pass", name, sizeof("pass") - 1) ||
+ !memcmp("auto", name, sizeof("auto") - 1) ||
+ !memcmp("none", name, sizeof("none") - 1)))) {
+ return NULL;
+ }
+ return name;
+}
+/* }}} */
+
+
#endif /* HAVE_MBSTRING */
/*
#include <langinfo.h>
#endif
-#if HAVE_MBSTRING
-# include "ext/mbstring/mbstring.h"
-ZEND_EXTERN_MODULE_GLOBALS(mbstring)
-#endif
-
#include <zend_hash.h>
#include "html_tables.h"
int i;
enum entity_charset charset = cs_utf_8;
int len = 0;
- zval *uf_result = NULL;
/* Default is now UTF-8 */
if (charset_hint == NULL)
if ((len = strlen(charset_hint)) != 0) {
goto det_charset;
}
-#if HAVE_MBSTRING
-#if !defined(COMPILE_DL_MBSTRING)
- /* XXX: Ugly things. Why don't we look for a more sophisticated way? */
- switch (MBSTRG(current_internal_encoding)) {
- case mbfl_no_encoding_8859_1:
- return cs_8859_1;
-
- case mbfl_no_encoding_utf8:
- return cs_utf_8;
-
- case mbfl_no_encoding_euc_jp:
- case mbfl_no_encoding_eucjp_win:
- return cs_eucjp;
-
- case mbfl_no_encoding_sjis:
- case mbfl_no_encoding_sjis_open:
- case mbfl_no_encoding_cp932:
- return cs_sjis;
-
- case mbfl_no_encoding_cp1252:
- return cs_cp1252;
-
- case mbfl_no_encoding_8859_15:
- return cs_8859_15;
-
- case mbfl_no_encoding_big5:
- return cs_big5;
- case mbfl_no_encoding_euc_cn:
- case mbfl_no_encoding_hz:
- case mbfl_no_encoding_cp936:
- return cs_gb2312;
-
- case mbfl_no_encoding_koi8r:
- return cs_koi8r;
-
- case mbfl_no_encoding_cp866:
- return cs_cp866;
-
- case mbfl_no_encoding_cp1251:
- return cs_cp1251;
-
- case mbfl_no_encoding_8859_5:
- return cs_8859_5;
-
- default:
- ;
- }
-#else
- {
- zval nm_mb_internal_encoding;
-
- ZVAL_STRING(&nm_mb_internal_encoding, "mb_internal_encoding", 0);
-
- if (call_user_function_ex(CG(function_table), NULL, &nm_mb_internal_encoding, &uf_result, 0, NULL, 1, NULL TSRMLS_CC) != FAILURE) {
-
- charset_hint = Z_STRVAL_P(uf_result);
- len = Z_STRLEN_P(uf_result);
-
- if ((len == 4) && /* sizeof(none|auto|pass)-1 */
- (!memcmp("pass", charset_hint, sizeof("pass") - 1) ||
- !memcmp("auto", charset_hint, sizeof("auto") - 1) ||
- !memcmp("none", charset_hint, sizeof("none") - 1))) {
-
- charset_hint = NULL;
- len = 0;
- } else {
- goto det_charset;
- }
- }
+ charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C);
+ if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
+ goto det_charset;
}
-#endif
-#endif
charset_hint = SG(default_charset);
if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
charset_hint);
}
}
- if (uf_result != NULL) {
- zval_ptr_dtor(&uf_result);
- }
return charset;
}
/* }}} */
#define DEBUG_FILE_UPLOAD ZEND_DEBUG
-PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC) = NULL;
+static int dummy_encoding_translation(TSRMLS_D)
+{
+ return 0;
+}
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
-#include "ext/mbstring/mbstring.h"
+static php_rfc1867_encoding_translation_t php_rfc1867_encoding_translation = dummy_encoding_translation;
+static php_rfc1867_encoding_detector_t php_rfc1867_encoding_detector = NULL;
+static php_rfc1867_encoding_converter_t php_rfc1867_encoding_converter = NULL;
+static php_rfc1867_getword_t php_rfc1867_getword = NULL;
+static php_rfc1867_basename_t php_rfc1867_basename = NULL;
+
+PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC) = NULL;
static void safe_php_register_variable(char *var, char *strval, int val_len, zval *track_vars_array, zend_bool override_protection TSRMLS_DC);
static void php_flush_gpc_variables(int num_vars, char **val_list, int *len_list, zval *array_ptr TSRMLS_DC) /* {{{ */
{
int i;
+ unsigned int new_val_len;
if (num_vars > 0 &&
- php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
- php_mb_gpc_encoding_converter(val_list, len_list, num_vars, NULL, NULL TSRMLS_CC);
+ php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
+ php_rfc1867_encoding_converter(val_list, len_list, num_vars, NULL, NULL TSRMLS_CC);
}
for (i = 0; i<num_vars; i += 2) {
- safe_php_register_variable(val_list[i], val_list[i+1], len_list[i+1], array_ptr, 0 TSRMLS_CC);
+ if (sapi_module.input_filter(PARSE_POST, val_list[i], &val_list[i+1], len_list[i+1], &new_val_len TSRMLS_CC)) {
+ if (php_rfc1867_callback != NULL) {
+ multipart_event_formdata event_formdata;
+ void *event_extra_data = NULL;
+
+ event_formdata.post_bytes_processed = SG(read_post_bytes);
+ event_formdata.name = val_list[i];
+ event_formdata.value = &val_list[i+1];
+ event_formdata.length = new_val_len;
+ event_formdata.newlength = &new_val_len;
+ if (php_rfc1867_callback(MULTIPART_EVENT_FORMDATA, &event_formdata, &event_extra_data TSRMLS_CC) == FAILURE) {
+ efree(val_list[i]);
+ efree(val_list[i+1]);
+ continue;
+ }
+ }
+ safe_php_register_variable(val_list[i], val_list[i+1], new_val_len, array_ptr, 0 TSRMLS_CC);
+ }
efree(val_list[i]);
efree(val_list[i+1]);
}
}
/* }}} */
-#endif
-
/* The longest property name we use in an uploaded file array */
#define MAX_SIZE_OF_INDEX sizeof("[tmp_name]")
static char *substring_conf(char *start, int len, char quote TSRMLS_DC)
{
- char *result = emalloc(len + 2);
+ char *result = emalloc(len + 1);
char *resp = result;
int i;
- for (i = 0; i < len; ++i) {
+ for (i = 0; i < len && start[i] != quote; ++i) {
if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
*resp++ = start[++i];
} else {
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
- size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC);
- while (j-- > 0 && i < len) {
- *resp++ = start[i++];
- }
- --i;
- } else {
- *resp++ = start[i];
- }
-#else
*resp++ = start[i];
-#endif
}
}
return result;
}
-static char *php_ap_getword_conf(char **line TSRMLS_DC)
+static char *php_ap_getword_conf(char *str TSRMLS_DC)
{
- char *str = *line, *strend, *res, quote;
-
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
- int len=strlen(str);
- php_mb_gpc_encoding_detector(&str, &len, 1, NULL TSRMLS_CC);
- }
-#endif
-
while (*str && isspace(*str)) {
++str;
}
if (!*str) {
- *line = str;
return estrdup("");
}
- if ((quote = *str) == '"' || quote == '\'') {
- strend = str + 1;
-look_for_quote:
- while (*strend && *strend != quote) {
- if (*strend == '\\' && strend[1] && strend[1] == quote) {
- strend += 2;
- } else {
- ++strend;
- }
- }
- if (*strend && *strend == quote) {
- char p = *(strend + 1);
- if (p != '\r' && p != '\n' && p != '\0') {
- strend++;
- goto look_for_quote;
- }
- }
-
- res = substring_conf(str + 1, strend - str - 1, quote TSRMLS_CC);
-
- if (*strend == quote) {
- ++strend;
- }
+ if (*str == '"' || *str == '\'') {
+ char quote = *str;
+ str++;
+ return substring_conf(str, strlen(str), quote TSRMLS_CC);
} else {
+ char *strend = str;
- strend = str;
while (*strend && !isspace(*strend)) {
++strend;
}
- res = substring_conf(str, strend - str, 0 TSRMLS_CC);
- }
-
- while (*strend && isspace(*strend)) {
- ++strend;
+ return substring_conf(str, strend - str, 0 TSRMLS_CC);
}
-
- *line = strend;
- return res;
}
/*
int max_file_size = 0, skip_upload = 0, anonindex = 0, is_anonymous;
zval *http_post_files = NULL;
HashTable *uploaded_files = NULL;
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
int str_len = 0, num_vars = 0, num_vars_max = 2*10, *len_list = NULL;
char **val_list = NULL;
-#endif
multipart_buffer *mbuff;
zval *array_ptr = (zval *) arg;
int fd = -1;
INIT_PZVAL(http_post_files);
PG(http_globals)[TRACK_VARS_FILES] = http_post_files;
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
val_list = (char **)ecalloc(num_vars_max+2, sizeof(char *));
len_list = (int *)ecalloc(num_vars_max+2, sizeof(int));
}
-#endif
+
zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t) php_free_hdr_entry, 0);
if (php_rfc1867_callback != NULL) {
if (param) {
efree(param);
}
- param = php_ap_getword_conf(&pair TSRMLS_CC);
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
+ if (num_vars >= num_vars_max) {
+ php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
+ }
+ val_list[num_vars] = pair;
+ len_list[num_vars] = strlen(pair);
+ num_vars++;
+ php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
+ num_vars--;
+ param = php_rfc1867_getword(pair TSRMLS_CC);
+ } else {
+ param = php_ap_getword_conf(pair TSRMLS_CC);
+ }
} else if (!strcasecmp(key, "filename")) {
if (filename) {
efree(filename);
}
- filename = php_ap_getword_conf(&pair TSRMLS_CC);
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
+ if (num_vars >= num_vars_max) {
+ php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
+ }
+ val_list[num_vars] = pair;
+ len_list[num_vars] = strlen(pair);
+ num_vars++;
+ php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
+ num_vars--;
+ filename = php_rfc1867_getword(pair TSRMLS_CC);
+ } else {
+ filename = php_ap_getword_conf(pair TSRMLS_CC);
+ }
}
}
if (key) {
value = estrdup("");
}
- if (sapi_module.input_filter(PARSE_POST, param, &value, value_len, &new_val_len TSRMLS_CC)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
+ /* postpone filtering, callback call and registration */
+ php_gpc_stack_variable(param, value, &val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
+ } else if (sapi_module.input_filter(PARSE_POST, param, &value, value_len, &new_val_len TSRMLS_CC)) {
if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
size_t newlength = new_val_len;
}
new_val_len = newlength;
}
-
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
- php_gpc_stack_variable(param, value, &val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
- } else {
- safe_php_register_variable(param, value, new_val_len, array_ptr, 0 TSRMLS_CC);
- }
-#else
safe_php_register_variable(param, value, new_val_len, array_ptr, 0 TSRMLS_CC);
-#endif
} else if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
snprintf(lbuf, llen, "%s_name", param);
}
- /* The \ check should technically be needed for win32 systems only where
- * it is a valid path separator. However, IE in all it's wisdom always sends
- * the full path of the file on the user's filesystem, which means that unless
- * the user does basename() they get a bogus file name. Until IE's user base drops
- * to nill or problem is fixed this code must remain enabled for all systems. */
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
}
val_list[num_vars] = filename;
len_list[num_vars] = strlen(filename);
num_vars++;
- if (php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
+ if (php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
str_len = strlen(filename);
- php_mb_gpc_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
- }
- s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
- if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
- s = tmp;
+ php_rfc1867_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
}
+ s = php_rfc1867_basename(filename TSRMLS_CC);
num_vars--;
} else {
-#endif
+ /* The \ check should technically be needed for win32 systems only where
+ * it is a valid path separator. However, IE in all it's wisdom always sends
+ * the full path of the file on the user's filesystem, which means that unless
+ * the user does basename() they get a bogus file name. Until IE's user base drops
+ * to nill or problem is fixed this code must remain enabled for all systems. */
s = strrchr(filename, '\\');
if ((tmp = strrchr(filename, '/')) > s) {
s = tmp;
s = tmp > s ? tmp : s;
}
#endif
-
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+ if (s) {
+ s++;
+ } else {
+ s = filename;
+ }
}
-#endif
if (!is_anonymous) {
- if (s && s > filename) {
- safe_php_register_variable(lbuf, s+1, strlen(s+1), NULL, 0 TSRMLS_CC);
- } else {
- safe_php_register_variable(lbuf, filename, strlen(filename), NULL, 0 TSRMLS_CC);
- }
+ safe_php_register_variable(lbuf, s, strlen(s), NULL, 0 TSRMLS_CC);
}
/* Add $foo[name] */
} else {
snprintf(lbuf, llen, "%s[name]", param);
}
- if (s && s > filename) {
- register_http_post_files_variable(lbuf, s+1, http_post_files, 0 TSRMLS_CC);
- } else {
- register_http_post_files_variable(lbuf, filename, http_post_files, 0 TSRMLS_CC);
- }
+ register_http_post_files_variable(lbuf, s, http_post_files, 0 TSRMLS_CC);
efree(filename);
s = NULL;
php_rfc1867_callback(MULTIPART_EVENT_END, &event_end, &event_extra_data TSRMLS_CC);
}
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
php_flush_gpc_variables(num_vars, val_list, len_list, array_ptr TSRMLS_CC);
}
-#endif
if (lbuf) efree(lbuf);
if (abuf) efree(abuf);
}
/* }}} */
+SAPI_API void php_rfc1867_set_multibyte_callbacks(
+ php_rfc1867_encoding_translation_t encoding_translation,
+ php_rfc1867_encoding_detector_t encoding_detector,
+ php_rfc1867_encoding_converter_t encoding_converter,
+ php_rfc1867_getword_t getword,
+ php_rfc1867_basename_t basename) /* {{{ */
+{
+ php_rfc1867_encoding_translation = encoding_translation;
+ php_rfc1867_encoding_detector = encoding_detector;
+ php_rfc1867_encoding_converter = encoding_converter;
+ php_rfc1867_getword = getword;
+ php_rfc1867_basename = basename;
+}
+/* }}} */
+
/*
* Local variables:
* tab-width: 4
size_t post_bytes_processed;
} multipart_event_end;
+typedef int (*php_rfc1867_encoding_translation_t)(TSRMLS_D);
+typedef int (*php_rfc1867_encoding_detector_t)(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC);
+typedef int (*php_rfc1867_encoding_converter_t)(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC);
+typedef char* (*php_rfc1867_getword_t)(char *str TSRMLS_DC);
+typedef char* (*php_rfc1867_basename_t)(char *str TSRMLS_DC);
+
SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler);
void destroy_uploaded_files_hash(TSRMLS_D);
void php_rfc1867_register_constants(TSRMLS_D);
extern PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC);
+SAPI_API void php_rfc1867_set_multibyte_callbacks(
+ php_rfc1867_encoding_translation_t encoding_translation,
+ php_rfc1867_encoding_detector_t encoding_detector,
+ php_rfc1867_encoding_converter_t encoding_converter,
+ php_rfc1867_getword_t getword,
+ php_rfc1867_basename_t basename);
+
#endif /* RFC1867_H */
--- /dev/null
+--TEST--
+RFC1867 character quotting
+--INI--
+file_uploads=1
+--POST_RAW--
+Content-Type: multipart/form-data; boundary=---------------------------20896060251896012921717172737
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name1
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name2'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name3"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\4
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\\5
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\'6
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\"7
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\8'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\\9'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\'10'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\"11'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\12"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\\13"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\'14"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\"15"
+
+testname
+-----------------------------20896060251896012921717172737--
+--FILE--
+<?php
+var_dump($_POST);
+?>
+--EXPECTF--
+array(15) {
+ ["name1"]=>
+ string(8) "testname"
+ ["name2"]=>
+ string(8) "testname"
+ ["name3"]=>
+ string(8) "testname"
+ ["name\\4"]=>
+ string(8) "testname"
+ ["name\\5"]=>
+ string(8) "testname"
+ ["name\\\'6"]=>
+ string(8) "testname"
+ ["name\\\"7"]=>
+ string(8) "testname"
+ ["name\\8"]=>
+ string(8) "testname"
+ ["name\\9"]=>
+ string(8) "testname"
+ ["name\'10"]=>
+ string(8) "testname"
+ ["name\\\"11"]=>
+ string(8) "testname"
+ ["name\\12"]=>
+ string(8) "testname"
+ ["name\\13"]=>
+ string(8) "testname"
+ ["name\\\'14"]=>
+ string(8) "testname"
+ ["name\"15"]=>
+ string(8) "testname"
+}
--- /dev/null
+--TEST--
+Shift_JIS request
+--SKIPIF--
+<?php
+if (!extension_loaded("mbstring")) {
+ die("skip Requires mbstring extension");
+}
+?>
+--INI--
+file_uploads=1
+mbstring.encoding_translation=1
+mbstring.http_input=Shift_JIS
+mbstring.internal_encoding=UTF-8
+--POST_RAW--
+Content-Type: multipart/form-data; boundary=---------------------------20896060251896012921717172737
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="\97\\8e\\94\"
+
+\83h\83\8c\83~\83t\83@\83\
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="pics"; filename="file1.txt"
+Content-Type: text/plain
+
+file1
+
+-----------------------------20896060251896012921717172737--
+--FILE--
+<?php
+var_dump($_FILES);
+var_dump($_POST);
+?>
+--EXPECTF--
+array(1) {
+ ["pics"]=>
+ array(5) {
+ ["name"]=>
+ string(9) "file1.txt"
+ ["type"]=>
+ string(10) "text/plain"
+ ["tmp_name"]=>
+ string(%d) "%s"
+ ["error"]=>
+ int(0)
+ ["size"]=>
+ int(6)
+ }
+}
+array(1) {
+ ["予蚕能"]=>
+ string(18) "ドレミファソ"
+}