]> granicus.if.org Git - php/commitdiff
- Support for UTS #46.
authorGustavo André dos Santos Lopes <cataphract@php.net>
Thu, 24 Nov 2011 17:54:50 +0000 (17:54 +0000)
committerGustavo André dos Santos Lopes <cataphract@php.net>
Thu, 24 Nov 2011 17:54:50 +0000 (17:54 +0000)
ext/intl/common/common_error.c
ext/intl/idn/idn.c
ext/intl/php_intl.c
ext/intl/tests/idn_uts46_basic.phpt [new file with mode: 0644]
ext/intl/tests/idn_uts46_errors.phpt [new file with mode: 0644]

index 3ab7fdfbdd5d3ae9e8effaad33a2d7547a267890..14d9cebfe7e1a5ab28f34eb3a6b16341f370d21b 100755 (executable)
@@ -232,7 +232,6 @@ void intl_expose_icu_error_codes( INIT_FUNC_ARGS )
        INTL_EXPOSE_CONST( U_REGEX_ERROR_LIMIT );
 
        /* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes */
-#if defined(U_IDNA_PROHIBITED_ERROR)
        INTL_EXPOSE_CONST( U_IDNA_PROHIBITED_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_ERROR_START );
        INTL_EXPOSE_CONST( U_IDNA_UNASSIGNED_ERROR );
@@ -242,8 +241,8 @@ void intl_expose_icu_error_codes( INIT_FUNC_ARGS )
        INTL_EXPOSE_CONST( U_IDNA_VERIFICATION_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_LABEL_TOO_LONG_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_ZERO_LENGTH_LABEL_ERROR );
+       INTL_EXPOSE_CONST( U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_ERROR_LIMIT );
-#endif
 
        /* Aliases for StringPrep */
        INTL_EXPOSE_CONST( U_STRINGPREP_PROHIBITED_ERROR );
index 23cd0ea87244735b7bd7ebee54e20c0ab218df9c..833718e75b3605e51757869e879251d64d046002 100644 (file)
 #include "ext/standard/php_string.h"
 
 #include "intl_error.h"
- #include "intl_convert.h"
+#include "intl_convert.h"
 /* }}} */
 
+#ifdef UIDNA_INFO_INITIALIZER
+#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
+#endif
+
+enum {
+       INTL_IDN_VARIANT_2003 = 0,
+       INTL_IDN_VARIANT_UTS46
+};
+
 /* {{{ grapheme_register_constants
  * Register API constants
  */
 void idn_register_constants( INIT_FUNC_ARGS )
 {
+       /* OPTIONS */
+
        /* Option to prohibit processing of unassigned codepoints in the input and
           do not check if the input conforms to STD-3 ASCII rules. */
        REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
@@ -46,6 +57,50 @@ void idn_register_constants( INIT_FUNC_ARGS )
 
        /* Option to check if input conforms to STD-3 ASCII rules */
        REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
+
+#ifdef HAVE_46_API
+
+       /* Option to check for whether the input conforms to the BiDi rules.
+        * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
+       REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
+
+       /* Option to check for whether the input conforms to the CONTEXTJ rules.
+        * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
+       REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
+
+       /* Option for nontransitional processing in ToASCII().
+        * By default, ToASCII() uses transitional processing.
+        * Ignored by the IDNA2003 implementation. */
+       REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
+
+       /* Option for nontransitional processing in ToUnicode().
+        * By default, ToUnicode() uses transitional processing.
+        * Ignored by the IDNA2003 implementation. */
+       REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
+#endif
+
+       /* VARIANTS */
+       REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
+#ifdef HAVE_46_API
+       REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
+#endif
+
+#ifdef HAVE_46_API
+       /* PINFO ERROR CODES */
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
+#endif
 }
 /* }}} */
 
@@ -54,11 +109,100 @@ enum {
        INTL_IDN_TO_UTF8
 };
 
-static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
+/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
+static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
+{
+       intl_error_set_code(NULL, err TSRMLS_CC);
+       if (U_FAILURE(err)) {
+               char *buff;
+               spprintf(&buff, 0, "%s: %s",
+                       mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
+                       msg);
+               intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
+               efree(buff);
+               return FAILURE;
+       }
+
+       return SUCCESS;
+}
+
+static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
+{
+       php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
+}
+
+#ifdef HAVE_46_API
+static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
+               const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
+{
+       UErrorCode        status = U_ZERO_ERROR;
+       UIDNA             *uts46;
+       int32_t           len;
+       int32_t           buffer_capac = 255; /* no domain name may exceed this */
+       char              *buffer = emalloc(buffer_capac);
+       UIDNAInfo         info = UIDNA_INFO_INITIALIZER;
+       int                       buffer_used = 0;
+       
+       uts46 = uidna_openUTS46(option, &status);
+       if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
+                       mode TSRMLS_CC) == FAILURE) {
+               efree(buffer);
+               RETURN_FALSE;
+       }
+
+       if (mode == INTL_IDN_TO_ASCII) {
+               len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
+                               buffer, buffer_capac, &info, &status);
+       } else {
+               len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
+                               buffer, buffer_capac, &info, &status);
+       }
+       if (php_intl_idn_check_status(status, "failed to convert name",
+                       mode TSRMLS_CC) == FAILURE) {
+               uidna_close(uts46);
+               efree(buffer);
+               RETURN_FALSE;
+       }
+       if (len >= 255) {
+               php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length");
+       }
+
+       buffer[len] = '\0';
+
+       if (info.errors == 0) {
+               RETVAL_STRINGL(buffer, len, 0);
+               buffer_used = 1;
+       } else {
+               RETVAL_FALSE;
+       }
+
+       if (idna_info) {
+               if (buffer_used) { /* used in return_value then */
+                       zval_addref_p(return_value);
+                       add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
+               } else {
+                       zval *zv;
+                       ALLOC_INIT_ZVAL(zv);
+                       ZVAL_STRINGL(zv, buffer, len, 0);
+                       buffer_used = 1;
+                       add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
+               }
+               add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
+                               sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
+               add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
+       }
+
+       if (!buffer_used) {
+               efree(buffer);
+       }
+
+       uidna_close(uts46);
+}
+#endif
+
+static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
+               const char *domain, int domain_len, uint32_t option, int mode)
 {
-       unsigned char* domain;
-       int domain_len;
-       long option = 0;
        UChar* ustring = NULL;
        int ustring_len = 0;
        UErrorCode status;
@@ -67,18 +211,9 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
        UChar     converted[MAXPATHLEN];
        int32_t   converted_ret_len;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) {
-               return;
-       }
-
-       if (domain_len < 1) {
-               intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
-               RETURN_FALSE;
-       }
-
        /* convert the string to UTF-16. */
        status = U_ZERO_ERROR;
-       intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );
+       intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
 
        if (U_FAILURE(status)) {
                intl_error_set_code(NULL, status TSRMLS_CC);
@@ -123,11 +258,75 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
        RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
 }
 
+static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
+{
+       char *domain;
+       int domain_len;
+       long option = 0,
+                variant = INTL_IDN_VARIANT_2003;
+       zval *idna_info = NULL;
+
+       intl_error_reset(NULL TSRMLS_CC);
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
+                       &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
+               php_intl_bad_args("bad arguments", mode TSRMLS_CC);
+               RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
+       }
+
+#ifdef HAVE_46_API
+       if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
+               php_intl_bad_args("invalid variant, must be one of {"
+                       "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+#else
+       if (variant != INTL_IDN_VARIANT_2003) {
+               php_intl_bad_args("invalid variant, PHP was compiled against "
+                       "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
+                       mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+#endif
+
+       if (domain_len < 1) {
+               php_intl_bad_args("empty domain name", mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+       if (domain_len > INT32_MAX - 1) {
+               php_intl_bad_args("domain name too large", mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+       /* don't check options; it wasn't checked before */
+
+       if (idna_info != NULL) {
+               if (variant == INTL_IDN_VARIANT_2003) {
+                       php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
+                               "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
+                               "takes 3 - extra argument ignored");
+               } else {
+                       zval_dtor(idna_info);
+                       array_init(idna_info);
+               }
+       }
+       
+       if (variant == INTL_IDN_VARIANT_2003) {
+               php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
+                               domain, domain_len, (uint32_t)option, mode);
+       }
+#ifdef HAVE_46_API
+       else {
+               php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
+                               (uint32_t)option, mode, idna_info);
+       }
+#endif
+}
+
 /* {{{ proto int idn_to_ascii(string domain[, int options])
    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
 PHP_FUNCTION(idn_to_ascii)
 {
-       php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
+       php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
 }
 /* }}} */
 
@@ -136,7 +335,7 @@ PHP_FUNCTION(idn_to_ascii)
    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
 PHP_FUNCTION(idn_to_utf8)
 {
-       php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
+       php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
 }
 /* }}} */
 
index de5226b3d84bad1ae4be96116fe398a28c183a90..efe0ddd242319a0803daf045e2f839ce6fdf780a 100755 (executable)
@@ -335,13 +335,15 @@ ZEND_END_ARG_INFO()
 ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_ascii, 0, 0, 1)
        ZEND_ARG_INFO(0, domain)
        ZEND_ARG_INFO(0, option)
-       ZEND_ARG_INFO(0, status)
+       ZEND_ARG_INFO(0, variant)
+       ZEND_ARG_INFO(1, idn_info)
 ZEND_END_ARG_INFO()
 
 ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_utf8, 0, 0, 1)
        ZEND_ARG_INFO(0, domain)
        ZEND_ARG_INFO(0, option)
-       ZEND_ARG_INFO(0, status)
+       ZEND_ARG_INFO(0, variant)
+       ZEND_ARG_INFO(1, idn_info)
 ZEND_END_ARG_INFO()
 
 ZEND_BEGIN_ARG_INFO_EX( arginfo_resourcebundle_create_proc, 0, 0, 2 )
diff --git a/ext/intl/tests/idn_uts46_basic.phpt b/ext/intl/tests/idn_uts46_basic.phpt
new file mode 100644 (file)
index 0000000..2ca1850
--- /dev/null
@@ -0,0 +1,53 @@
+--TEST--
+IDN UTS #46 API basic tests
+--SKIPIF--
+<?php
+       if (!extension_loaded('intl'))
+               die('skip');
+       if (!defined('INTL_IDNA_VARIANT_UTS46'))
+               die('skip no UTS #46 API');
+--FILE--
+<?php
+$utf8dn = "www.fußball.com";
+$asciiNonTrans = "www.xn--fuball-cta.com";
+
+echo "all ok, no details:", "\n";
+var_dump(idn_to_ascii($utf8dn, 
+       IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46));
+       
+echo "all ok, no details, transitional:", "\n";
+var_dump(idn_to_ascii($utf8dn, 0, INTL_IDNA_VARIANT_UTS46));
+
+echo "all ok, with details:", "\n";
+var_dump(idn_to_ascii($utf8dn, IDNA_NONTRANSITIONAL_TO_ASCII,
+       INTL_IDNA_VARIANT_UTS46, $info));
+var_dump($info);
+
+echo "reverse, ok, with details:", "\n";
+var_dump(idn_to_utf8($asciiNonTrans, 0, INTL_IDNA_VARIANT_UTS46, $info));
+var_dump($info);
+--EXPECT--
+all ok, no details:
+string(22) "www.xn--fuball-cta.com"
+all ok, no details, transitional:
+string(16) "www.fussball.com"
+all ok, with details:
+string(22) "www.xn--fuball-cta.com"
+array(3) {
+  ["result"]=>
+  string(22) "www.xn--fuball-cta.com"
+  ["isTransitionalDifferent"]=>
+  bool(true)
+  ["errors"]=>
+  int(0)
+}
+reverse, ok, with details:
+string(16) "www.fußball.com"
+array(3) {
+  ["result"]=>
+  string(16) "www.fußball.com"
+  ["isTransitionalDifferent"]=>
+  bool(false)
+  ["errors"]=>
+  int(0)
+}
diff --git a/ext/intl/tests/idn_uts46_errors.phpt b/ext/intl/tests/idn_uts46_errors.phpt
new file mode 100644 (file)
index 0000000..a336e69
--- /dev/null
@@ -0,0 +1,89 @@
+--TEST--
+IDN UTS #46 API error tests
+--SKIPIF--
+<?php
+       if (!extension_loaded('intl'))
+               die('skip');
+       if (!defined('INTL_IDNA_VARIANT_UTS46'))
+               die('skip no UTS #46 API');
+--FILE--
+<?php
+ini_set("intl.error_level", E_WARNING);
+echo "=> PHP level errors", "\n";
+
+echo "bad args:", "\n";
+var_dump(idn_to_ascii("", 0, array()));
+var_dump(idn_to_ascii("", 0, INTL_IDNA_VARIANT_UTS46, $foo, null));
+
+echo "bad variant:", "\n";
+var_dump(idn_to_ascii("", 0, INTL_IDNA_VARIANT_UTS46 + 10));
+
+echo "empty domain:", "\n";
+var_dump(idn_to_ascii("", 0, INTL_IDNA_VARIANT_UTS46));
+
+echo "fourth arg for 2003 variant (only notice raised):", "\n";
+var_dump(idn_to_ascii("foo.com", 0, INTL_IDNA_VARIANT_2003, $foo));
+
+echo "with error, but no details arg:", "\n";
+var_dump(idn_to_ascii("www.fußball.com-", 0, INTL_IDNA_VARIANT_UTS46));
+
+echo "with error, with details arg:", "\n";
+var_dump(idn_to_ascii("www.fußball.com-", IDNA_NONTRANSITIONAL_TO_ASCII,
+       INTL_IDNA_VARIANT_UTS46, $foo));
+var_dump($foo);
+
+echo "with error, with details arg, contextj:", "\n";
+var_dump(idn_to_ascii(
+               html_entity_decode("www.a&#x200D;b.com", 0, "UTF-8"),
+               IDNA_NONTRANSITIONAL_TO_ASCII | IDNA_CHECK_CONTEXTJ,
+               INTL_IDNA_VARIANT_UTS46, $foo));
+var_dump($foo);
+var_dump($foo["errors"]==IDNA_ERROR_CONTEXTJ);
+--EXPECTF--
+=> PHP level errors
+bad args:
+
+Warning: idn_to_ascii() expects parameter 3 to be long, array given in %s on line %d
+
+Warning: idn_to_ascii(): idn_to_ascii: bad arguments in %s on line %d
+NULL
+
+Warning: idn_to_ascii() expects at most 4 parameters, 5 given in %s on line %d
+
+Warning: idn_to_ascii(): idn_to_ascii: bad arguments in %s on line %d
+NULL
+bad variant:
+
+Warning: idn_to_ascii(): idn_to_ascii: invalid variant, must be one of {INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46} in %s on line %d
+bool(false)
+empty domain:
+
+Warning: idn_to_ascii(): idn_to_ascii: empty domain name in %s on line %d
+bool(false)
+fourth arg for 2003 variant (only notice raised):
+
+Notice: idn_to_ascii(): 4 arguments were provided, but INTL_IDNA_VARIANT_2003 only takes 3 - extra argument ignored in %s on line %d
+string(7) "foo.com"
+with error, but no details arg:
+bool(false)
+with error, with details arg:
+bool(false)
+array(3) {
+  ["result"]=>
+  string(23) "www.xn--fuball-cta.com-"
+  ["isTransitionalDifferent"]=>
+  bool(true)
+  ["errors"]=>
+  int(16)
+}
+with error, with details arg, contextj:
+bool(false)
+array(3) {
+  ["result"]=>
+  string(18) "www.xn--ab-m1t.com"
+  ["isTransitionalDifferent"]=>
+  bool(true)
+  ["errors"]=>
+  int(4096)
+}
+bool(true)