} php_iconv_enc_scheme_t;
/* }}} */
+#define PHP_ICONV_MIME_DECODE_STRICT (1<<0)
+#define PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR (1<<1)
+
#ifdef HAVE_LIBICONV
#define iconv libiconv
#endif
static php_iconv_err_t _php_iconv_mime_encode(smart_str *pretval, const char *fname, size_t fname_nbytes, const char *fval, size_t fval_nbytes, unsigned int max_line_len, const char *lfchars, php_iconv_enc_scheme_t enc_scheme, const char *out_charset, const char *enc);
-static php_iconv_err_t _php_iconv_mime_decode(smart_str *pretval, const char *str, size_t str_nbytes, const char *enc);
+static php_iconv_err_t _php_iconv_mime_decode(smart_str *pretval, const char *str, size_t str_nbytes, const char *enc, int mode);
/* }}} */
/* {{{ static globals */
#endif
REGISTER_STRING_CONSTANT("ICONV_VERSION", version, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("ICONV_MIME_DECODE_STRICT", PHP_ICONV_MIME_DECODE_STRICT, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("ICONV_MIME_DECODE_CONTINUE_ON_ERROR", PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR, CONST_CS | CONST_PERSISTENT);
+
return SUCCESS;
}
/* }}} */
/* }}} */
/* {{{ _php_iconv_mime_decode() */
-static php_iconv_err_t _php_iconv_mime_decode(smart_str *pretval, const char *str, size_t str_nbytes, const char *enc)
+static php_iconv_err_t _php_iconv_mime_decode(smart_str *pretval, const char *str, size_t str_nbytes, const char *enc, int mode)
{
php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
size_t csname_len;
const char *encoded_text = NULL;
size_t encoded_text_len;
+ const char *encoded_word = NULL;
+ const char *spaces = NULL;
php_iconv_enc_scheme_t enc_scheme;
p1 = str;
for (str_left = str_nbytes; str_left > 0; str_left--, p1++) {
+ int eos = 0;
switch (scan_stat) {
case 0:
case '\n':
scan_stat = 8;
break;
-
+
case '=':
+ encoded_word = p1;
scan_stat = 1;
break;
-
+
+ case ' ': case '\t':
+ spaces = p1;
+ scan_stat = 11;
+ break;
+
default:
_php_iconv_appendc(pretval, *p1, cd_pl);
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ }
+ break;
}
break;
case 1:
if (*p1 != '?') {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ err = PHP_ICONV_ERR_MALFORMED;
+ goto out;
+ }
}
csname = p1 + 1;
scan_stat = 2;
csname_len = (size_t)(p1 - csname);
if (csname_len > sizeof(tmpbuf) - 1) {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ err = PHP_ICONV_ERR_MALFORMED;
+ goto out;
+ }
}
memcpy(tmpbuf, csname, csname_len);
cd = iconv_open(enc, tmpbuf);
if (cd == (iconv_t)(-1)) {
-#if ICONV_SUPPORTS_ERRNO
- if (errno == EINVAL) {
- err = PHP_ICONV_ERR_WRONG_CHARSET;
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
} else {
- err = PHP_ICONV_ERR_CONVERTER;
- }
+#if ICONV_SUPPORTS_ERRNO
+ if (errno == EINVAL) {
+ err = PHP_ICONV_ERR_WRONG_CHARSET;
+ } else {
+ err = PHP_ICONV_ERR_CONVERTER;
+ }
#else
- err = PHP_ICONV_ERR_UNKNOWN;
+ err = PHP_ICONV_ERR_UNKNOWN;
#endif
- goto out;
+ goto out;
+ }
}
}
break;
switch (*p1) {
case 'B':
enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64;
+ scan_stat = 4;
break;
case 'Q':
enc_scheme = PHP_ICONV_ENC_SCHEME_QPRINT;
+ scan_stat = 4;
break;
default:
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ err = PHP_ICONV_ERR_MALFORMED;
+ goto out;
+ }
}
- scan_stat = 4;
break;
case 4:
if (*p1 != '?') {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ err = PHP_ICONV_ERR_MALFORMED;
+ goto out;
+ }
}
encoded_text = p1 + 1;
scan_stat = 5;
case 5:
if (*p1 == '?') {
- char *decoded_text;
- size_t decoded_text_len;
-
- if (encoded_text == NULL) {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
- }
-
encoded_text_len = (size_t)(p1 - encoded_text);
- switch (enc_scheme) {
- case PHP_ICONV_ENC_SCHEME_BASE64:
- decoded_text = (char *)php_base64_decode((unsigned char*)encoded_text, (int)encoded_text_len, &decoded_text_len);
- break;
-
- case PHP_ICONV_ENC_SCHEME_QPRINT:
- decoded_text = (char *)php_quot_print_decode((unsigned char*)encoded_text, (int)encoded_text_len, &decoded_text_len);
- break;
- }
-
- if (decoded_text == NULL) {
- err = PHP_ICONV_ERR_UNKNOWN;
- goto out;
- }
-
- err = _php_iconv_appendl(pretval, decoded_text, decoded_text_len, cd);
- efree(decoded_text);
- if (err != PHP_ICONV_ERR_SUCCESS) {
- goto out;
- }
scan_stat = 6;
}
break;
- case 6:
- if (*p1 != '=') {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
- }
- scan_stat = 0;
- break;
-
case 7:
if (*p1 == '\n') {
scan_stat = 8;
/* bare CR */
_php_iconv_appendc(pretval, '\r', cd_pl);
_php_iconv_appendc(pretval, *p1, cd_pl);
+ scan_stat = 0;
}
break;
err = PHP_ICONV_ERR_MALFORMED;
goto out;
}
- scan_stat = 9;
+ if (encoded_word = NULL) {
+ _php_iconv_appendc(pretval, ' ', cd_pl);
+ }
+ spaces = NULL;
+ scan_stat = 11;
break;
+ case 6:
+ if (*p1 != '=') {
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ err = PHP_ICONV_ERR_MALFORMED;
+ goto out;
+ }
+ }
+ scan_stat = 9;
+ if (str_left == 1) {
+ eos = 1;
+ } else {
+ break;
+ }
+
case 9:
- if (*p1 == '=') {
- scan_stat = 1;
- } else if (*p1 != ' ' && *p1 != '\t') {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
+ switch (*p1) {
+ default:
+ if (!eos) {
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ scan_stat = 12;
+ break;
+ }
+ }
+ /* break is omitted intentionally */
+
+ case '\r': case '\n': case ' ': case '\t': {
+ char *decoded_text;
+ size_t decoded_text_len;
+
+ switch (enc_scheme) {
+ case PHP_ICONV_ENC_SCHEME_BASE64:
+ decoded_text = (char *)php_base64_decode((unsigned char*)encoded_text, (int)encoded_text_len, &decoded_text_len);
+ break;
+
+ case PHP_ICONV_ENC_SCHEME_QPRINT:
+ decoded_text = (char *)php_quot_print_decode((unsigned char*)encoded_text, (int)encoded_text_len, &decoded_text_len);
+ break;
+ }
+
+ if (decoded_text == NULL) {
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ err = PHP_ICONV_ERR_UNKNOWN;
+ goto out;
+ }
+ }
+
+ err = _php_iconv_appendl(pretval, decoded_text, decoded_text_len, cd);
+ efree(decoded_text);
+
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ err = _php_iconv_appendl(pretval, encoded_word, (size_t)((p1 + 1) - encoded_word), cd_pl);
+ if (err != PHP_ICONV_ERR_SUCCESS) {
+ goto out;
+ }
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ } else {
+ scan_stat = 0;
+ }
+ break;
+ } else {
+ goto out;
+ }
+ }
+
+ if (eos) {
+ scan_stat = 0;
+ break;
+ }
+
+ switch (*p1) {
+ case '\r':
+ scan_stat = 7;
+ break;
+
+ case '\n':
+ scan_stat = 8;
+ break;
+
+ case '=':
+ scan_stat = 1;
+ break;
+
+ case ' ': case '\t':
+ scan_stat = 11;
+ break;
+
+ default:
+ _php_iconv_appendc(pretval, *p1, cd_pl);
+ scan_stat = 12;
+ break;
+ }
+ } break;
}
break;
scan_stat = 3;
}
break;
- }
- }
- if (scan_stat != 0) {
- err = PHP_ICONV_ERR_MALFORMED;
- goto out;
+ case 11:
+ switch (*p1) {
+ case '\r':
+ scan_stat = 7;
+ break;
+
+ case '\n':
+ scan_stat = 8;
+ break;
+
+ case '=':
+ if (spaces != NULL) {
+ _php_iconv_appendl(pretval, spaces, (size_t)(p1 - spaces), cd_pl);
+ spaces = NULL;
+ }
+ encoded_word = p1;
+ scan_stat = 1;
+ break;
+
+ case ' ': case '\t':
+ break;
+
+ default: /* beginning of a word delimited by white spaces */
+ if (spaces != NULL) {
+ _php_iconv_appendl(pretval, spaces, (size_t)(p1 - spaces), cd_pl);
+ spaces = NULL;
+ }
+ _php_iconv_appendc(pretval, *p1, cd_pl);
+ encoded_word = NULL;
+ if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
+ scan_stat = 12;
+ }
+ break;
+ }
+ break;
+
+ case 12:
+ switch (*p1) {
+ case '\r':
+ scan_stat = 7;
+ break;
+
+ case '\n':
+ scan_stat = 8;
+ break;
+
+ case ' ': case '\t':
+ spaces = p1;
+ scan_stat = 11;
+ break;
+
+ default:
+ _php_iconv_appendc(pretval, *p1, cd_pl);
+ break;
+ }
+ break;
+ }
}
- if (cd != (iconv_t)(-1)) {
- if ((err = _php_iconv_appendl(pretval, NULL, 0, cd)) != PHP_ICONV_ERR_SUCCESS) {
+ if (scan_stat != 0 && scan_stat != 11 && scan_stat != 12) {
+ if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
+ if (scan_stat == 1) {
+ _php_iconv_appendc(pretval, '=', cd_pl);
+ }
+ err = 0;
+ } else {
+ err = PHP_ICONV_ERR_MALFORMED;
goto out;
}
}
+
smart_str_0(pretval);
out:
if (cd != (iconv_t)(-1)) {
break;
case PHP_ICONV_ERR_WRONG_CHARSET:
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Wrong charset, cannot convert from `%s' to `%s'",
+ php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Wrong charset, conversion from `%s' to `%s' is not allowed",
in_charset, out_charset);
break;
case PHP_ICONV_ERR_ILLEGAL_CHAR:
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Detected incomplete character in input string");
+ php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Detected an incomplete multibyte character in input string");
break;
case PHP_ICONV_ERR_ILLEGAL_SEQ:
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Detected illegal character in input string");
+ php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Detected an illegal character in input string");
break;
case PHP_ICONV_ERR_TOO_BIG:
/* should not happen */
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Run out buffer");
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Run out of buffer");
+ break;
+
+ case PHP_ICONV_ERR_MALFORMED:
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Malformed string");
break;
default:
}
/* }}} */
-/* {{{ proto string iconv_mime_decode(string encoded_string [, string charset])
+/* {{{ proto string iconv_mime_decode(string encoded_string [, int mode, string charset])
Decodes a mime header field */
PHP_FUNCTION(iconv_mime_decode)
{
int encoded_str_len;
char *charset;
int charset_len;
+ long mode;
smart_str retval = {0};
charset = ICONVG(internal_encoding);
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
- &encoded_str, &encoded_str_len, &charset, &charset_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls",
+ &encoded_str, &encoded_str_len, &mode, &charset, &charset_len) == FAILURE) {
RETURN_FALSE;
}
- err = _php_iconv_mime_decode(&retval, encoded_str, encoded_str_len, charset);
+ err = _php_iconv_mime_decode(&retval, encoded_str, encoded_str_len, charset, mode);
_php_iconv_show_error(err, charset, "???" TSRMLS_CC);
if (err == PHP_ICONV_ERR_SUCCESS) {
iconv.internal_charset=iso-8859-1
--FILE--
<?php
-$header = <<< HERE
+function my_error_handler($errno, $errmsg, $filename, $linenum, $vars)
+{
+ echo "$errno: $errmsg\n";
+}
+set_error_handler('my_error_handler');
+
+function do_single_test($header)
+{
+ global $mode;
+
+ $result = iconv_mime_decode($header, $mode, "UTF-8");
+ printf("(%d) \"%s\"\n", iconv_strlen($result, "UTF-8"), $result);
+}
+
+function do_regression_test()
+{
+ do_single_test(<<< HERE
Subject: =?ISO-8859-1?Q?Pr=FCfung?=
- =?ISO-8859-1*de_DE?Q?=20Pr=FCfung?=
+ =?ISO-8859-1*de_DE?Q?Pr=FCfung?=\t
=?ISO-8859-2?Q?k=F9=D4=F1=D3let?=
-HERE;
-$result = iconv_mime_decode($header, "UTF-8");
-printf("(%d) \"%s\"\n", iconv_strlen($result, "UTF-8"), $result);
+HERE
+);
+ do_single_test(<<< HERE
+Subject: =?ISO-8859-1?Q?Pr=FCfung?= =?ISO-8859-1*de_DE?Q?=20Pr=FCfung?= \t =?ISO-8859-2?Q?k=F9=D4=F1=D3let?=
+HERE
+);
+ do_single_test(<<< HERE
+Subject: =?ISO-8859-1?Q?Pr=FCfung?==?ISO-8859-1*de_DE?Q?Pr=FCfung?==?ISO-8859-2?Q?k=F9=D4=F1=D3let?=
+HERE
+);
+ do_single_test(<<< HERE
+Subject: =?ISO-8859-1?Q?Pr=FCfung?= =?ISO-8859-1*de_DE?Q?Pr=FCfung?? =?ISO-8859-2?X?k=F9=D4=F1=D3let?=
+HERE
+);
+}
+
+$mode = 0;
+do_regression_test();
+$mode = ICONV_MIME_DECODE_STRICT;
+do_regression_test();
+$mode = ICONV_MIME_DECODE_CONTINUE_ON_ERROR;
+do_regression_test();
+$mode = ICONV_MIME_DECODE_STRICT | ICONV_MIME_DECODE_CONTINUE_ON_ERROR;
+do_regression_test();
--EXPECT--
-(33) "Subject: Prüfung Prüfung kůÔńÓlet"
+(31) "Subject: PrüfungPrüfungkůÔńÓlet"
+(32) "Subject: Prüfung PrüfungkůÔńÓlet"
+(31) "Subject: PrüfungPrüfungkůÔńÓlet"
+2: iconv_mime_decode(): Malformed string
+(0) ""
+(31) "Subject: PrüfungPrüfungkůÔńÓlet"
+(32) "Subject: Prüfung PrüfungkůÔńÓlet"
+(100) "Subject: =?ISO-8859-1?Q?Pr=FCfung?==?ISO-8859-1*de_DE?Q?Pr=FCfung?==?ISO-8859-2?Q?k=F9=D4=F1=D3let?="
+2: iconv_mime_decode(): Malformed string
+(0) ""
+(31) "Subject: PrüfungPrüfungkůÔńÓlet"
+(32) "Subject: Prüfung PrüfungkůÔńÓlet"
+(31) "Subject: PrüfungPrüfungkůÔńÓlet"
+(84) "Subject: Prüfung=?ISO-8859-1*de_DE?Q?Pr=FCfung?? =?ISO-8859-2?X?k=F9=D4=F1=D3let?="
+(31) "Subject: PrüfungPrüfungkůÔńÓlet"
+(32) "Subject: Prüfung PrüfungkůÔńÓlet"
+(100) "Subject: =?ISO-8859-1?Q?Pr=FCfung?==?ISO-8859-1*de_DE?Q?Pr=FCfung?==?ISO-8859-2?Q?k=F9=D4=F1=D3let?="
+(84) "Subject: Prüfung=?ISO-8859-1*de_DE?Q?Pr=FCfung?? =?ISO-8859-2?X?k=F9=D4=F1=D3let?="