]> granicus.if.org Git - php/commitdiff
- [DOC] add IDN support, idn_to_ascii and idn_to_utf8
authorPierre Joye <pajoye@php.net>
Mon, 26 Jan 2009 22:30:57 +0000 (22:30 +0000)
committerPierre Joye <pajoye@php.net>
Mon, 26 Jan 2009 22:30:57 +0000 (22:30 +0000)
  tests and MFB will follow Wednesday

ext/intl/config.m4
ext/intl/idn/idn.c [new file with mode: 0644]
ext/intl/idn/idn.h [new file with mode: 0644]
ext/intl/php_intl.c

index 214b23c23e9a3e38adeda7b787da8916e2916de5..9735b4450cf660e8e1b6588aac440d71e5189eb9 100755 (executable)
@@ -51,7 +51,8 @@ if test "$PHP_INTL" != "no"; then
     msgformat/msgformat_helpers.cpp \
     msgformat/msgformat_parse.c \
     grapheme/grapheme_string.c \
-    grapheme/grapheme_util.c,$ext_shared,,$ICU_INCS)
+    grapheme/grapheme_util.c \
+    idn/idn.c, $ext_shared,,$ICU_INCS)
 
   PHP_ADD_BUILD_DIR($ext_builddir/collator)
   PHP_ADD_BUILD_DIR($ext_builddir/common)
@@ -61,4 +62,5 @@ if test "$PHP_INTL" != "no"; then
   PHP_ADD_BUILD_DIR($ext_builddir/locale)
   PHP_ADD_BUILD_DIR($ext_builddir/msgformat)
   PHP_ADD_BUILD_DIR($ext_builddir/grapheme)
+  PHP_ADD_BUILD_DIR($ext_builddir/idn)
 fi
diff --git a/ext/intl/idn/idn.c b/ext/intl/idn/idn.c
new file mode 100644 (file)
index 0000000..34d9c1e
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+   +----------------------------------------------------------------------+
+   | PHP Version 5                                                        |
+   +----------------------------------------------------------------------+
+   | Copyright (c) 2009 The PHP Group                                     |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 3.01 of the PHP license,      |
+   | that is bundled with this package in the file LICENSE, and is        |
+   | available through the world-wide-web at the following url:           |
+   | http://www.php.net/license/3_01.txt                                  |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to          |
+   | license@php.net so we can mail you a copy immediately.               |
+   +----------------------------------------------------------------------+
+   | Author: Pierre A. Joye <pierre@php.net>                              |
+   +----------------------------------------------------------------------+
+ */
+/* $Id$ */
+
+/* {{{ includes */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <php.h>
+
+#include <unicode/uidna.h>
+#include <unicode/ustring.h>
+#include "ext/standard/php_string.h"
+
+#include "intl_error.h"
+ #include "intl_convert.h"
+/* }}} */
+
+/* {{{ grapheme_register_constants
+ * Register API constants
+ */
+void idn_register_constants( INIT_FUNC_ARGS )
+{
+       /* Option to prohibit processing of unassigned codepoints in the input and
+          do not check if the input conforms to STD-3 ASCII rules. */
+       REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
+
+       /* Option to allow processing of unassigned codepoints in the input */
+       REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
+
+       /* Option to check if input conforms to STD-3 ASCII rules */
+       REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
+}
+/* }}} */
+
+enum {
+       INTL_IDN_TO_ASCII = 0,
+       INTL_IDN_TO_UTF8
+};
+
+static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
+{
+       unsigned char* domain;
+       int domain_len;
+       long option = 0;
+       UChar* ustring = NULL;
+       int ustring_len = 0;
+       UErrorCode status;
+       char     *converted_utf8;
+       int32_t   converted_utf8_len;
+       UChar     converted[MAXPATHLEN];
+       int32_t   converted_ret_len;
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ll", (char **)&domain, &domain_len, &option, &status) == FAILURE) {
+               return;
+       }
+
+       if (domain_len < 1) {
+               intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
+               RETURN_FALSE;
+       }
+
+       /* convert the string to UTF-16. */
+       status = U_ZERO_ERROR;
+       intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );
+
+       if (U_FAILURE(status)) {
+               intl_error_set_code(NULL, status TSRMLS_CC);
+
+               /* Set error messages. */
+               intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 1 TSRMLS_CC );
+               efree(ustring);
+               RETURN_FALSE;
+       } else {
+               UParseError parse_error;
+
+               status = U_ZERO_ERROR;
+               if (mode == INTL_IDN_TO_ASCII) {
+                       converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
+               } else {
+                       converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
+               }
+               efree(ustring);
+
+               if (U_FAILURE(status)) {
+                       intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
+                       RETURN_FALSE;
+               }
+
+               status = U_ZERO_ERROR;
+               intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
+
+               if (U_FAILURE(status)) {
+                       /* Set global error code. */
+                       intl_error_set_code(NULL, status TSRMLS_CC);
+
+                       /* Set error messages. */
+                       intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 1 TSRMLS_CC );
+                       efree(converted_utf8);
+                       RETURN_FALSE;
+               }
+       }
+
+       /* return the allocated string, not a duplicate */
+       RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
+}
+
+/* {{{ proto int idn_to_ascii(string domain)
+   Converts a UTF-8 domain to ASCII, as defined in the IDNA RFC */
+PHP_FUNCTION(idn_to_ascii)
+{
+       php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
+}
+/* }}} */
+
+
+/* {{{ proto int idn_to_ascii(string domain)
+   Converts a UTF-8 domain to ASCII, as defined in the IDNA RFC */
+PHP_FUNCTION(idn_to_utf8)
+{
+       php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
+}
+/* }}} */
+
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: fdm=marker
+ * vim: noet sw=4 ts=4
+ */
diff --git a/ext/intl/idn/idn.h b/ext/intl/idn/idn.h
new file mode 100644 (file)
index 0000000..1fa4f8f
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+   +----------------------------------------------------------------------+
+   | PHP Version 5                                                        |
+   +----------------------------------------------------------------------+
+   | Copyright (c) 2009 The PHP Group                                     |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 3.01 of the PHP license,      |
+   | that is bundled with this package in the file LICENSE, and is        |
+   | available through the world-wide-web at the following url:           |
+   | http://www.php.net/license/3_01.txt                                  |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to          |
+   | license@php.net so we can mail you a copy immediately.               |
+   +----------------------------------------------------------------------+
+   | Author: Pierre A. Joye <pierre@php.net>                              |
+   +----------------------------------------------------------------------+
+ */
+/* $Id$ s*/
+
+#ifndef IDN_IDN_H
+#define IDN_IDN_H
+
+#include <php.h>
+
+PHP_FUNCTION(idn_to_ascii);
+PHP_FUNCTION(idn_to_utf8);
+
+void idn_register_constants(INIT_FUNC_ARGS);
+
+#endif /* IDN_IDN_H */
index 889250bdb98c9963b23948a8b04378671ad753aa..13f36fd03c1bb60a9172a22af7fcca4ac0bb2bd1 100755 (executable)
@@ -62,6 +62,8 @@
 #include "dateformat/dateformat_parse.h"
 #include "dateformat/dateformat_data.h"
 
+#include "idn/idn.h"
+
 #include "msgformat/msgformat.h"
 #include "common/common_error.h"
 
@@ -316,6 +318,18 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_datefmt_create, 0, 0, 3)
        ZEND_ARG_INFO(0, calendar)
        ZEND_ARG_INFO(0, pattern)
 ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_ascii, 0, 0, 1)
+       ZEND_ARG_INFO(0, domain)
+       ZEND_ARG_INFO(0, option)
+       ZEND_ARG_INFO(0, status)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_utf8, 0, 0, 1)
+       ZEND_ARG_INFO(0, domain)
+       ZEND_ARG_INFO(0, option)
+       ZEND_ARG_INFO(0, status)
+ZEND_END_ARG_INFO()
 /* }}} */
 
 /* {{{ intl_functions
@@ -422,6 +436,10 @@ zend_function_entry intl_functions[] = {
        PHP_FE( grapheme_stristr, grapheme_strstr_args )
        PHP_FE( grapheme_extract, grapheme_extract_args )
 
+       /* IDN functions */
+       PHP_FE(idn_to_ascii, arginfo_idn_to_ascii)
+       PHP_FE(idn_to_utf8, arginfo_idn_to_ascii)
+
        /* common functions */
        PHP_FE( intl_get_error_code, intl_0_args )
        PHP_FE( intl_get_error_message, intl_0_args )
@@ -521,12 +539,15 @@ PHP_MINIT_FUNCTION( intl )
        /* Expose ICU error codes to PHP scripts. */
        intl_expose_icu_error_codes( INIT_FUNC_ARGS_PASSTHRU );
 
+       /* Expose IDN constants to PHP scripts. */
+       idn_register_constants(INIT_FUNC_ARGS_PASSTHRU);
+
        /* Global error handling. */
        intl_error_init( NULL TSRMLS_CC );
 
        /* Set the default_locale value */
-       if( INTL_G(default_locale) == NULL ) {
-               INTL_G(default_locale) = pestrdup(uloc_getDefault(), 1) ;
+       if( INTL_G(default_locale) == NULL ) {
+               INTL_G(default_locale) = pestrdup(uloc_getDefault(), 1) ;
        }
 
        return SUCCESS;