bool skip_shebang;
bool increment_lineno;
+ bool variable_width_locale; /* UTF-8, Shift-JIS, Big5, ISO 2022, EUC, etc */
+ bool ascii_compatible_locale; /* locale uses ASCII characters as singletons */
+ /* and don't use them as lead/trail units */
+
zend_string *doc_comment;
uint32_t extra_fn_flags;
#include "zend_exceptions.h"
#include "zend_closures.h"
+#include <locale.h>
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
+
#ifdef __SSE2__
#include <emmintrin.h>
#endif
+#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER)
+/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */
+#define ZEND_USE_TOLOWER_L 1
+#endif
+
#ifdef ZEND_USE_TOLOWER_L
-#include <locale.h>
static _locale_t current_locale = NULL;
/* this is true global! may lead to strange effects on ZTS, but so may setlocale() */
#define zend_tolower(c) _tolower_l(c, current_locale)
}
/* }}} */
-#ifdef ZEND_USE_TOLOWER_L
ZEND_API void zend_update_current_locale(void) /* {{{ */
{
+#ifdef ZEND_USE_TOLOWER_L
+# if defined(ZEND_WIN32) && defined(_MSC_VER)
current_locale = _get_current_locale();
+# else
+ current_locale = uselocale(0);
+# endif
+#endif
+#if defined(ZEND_WIN32) && defined(_MSC_VER)
+ if (MB_CUR_MAX > 1) {
+ unsigned int cp = ___lc_codepage_func();
+ CG(variable_width_locale) = 1;
+ // TODO: EUC-* are also ASCII compatible ???
+ CG(ascii_compatible_locale) =
+ cp == 65001; /* UTF-8 */
+ } else {
+ CG(variable_width_locale) = 0;
+ CG(ascii_compatible_locale) = 1;
+ }
+#elif defined(MB_CUR_MAX)
+ /* Check if current locale uses variable width encoding */
+ if (MB_CUR_MAX > 1) {
+#if HAVE_NL_LANGINFO
+ const char *charmap = nl_langinfo(CODESET);
+#else
+ char buf[16];
+ const char *charmap = NULL;
+ const char *locale = setlocale(LC_CTYPE, NULL);
+
+ if (locale) {
+ const char *dot = strchr(locale, '.');
+ const char *modifier;
+
+ if (dot) {
+ dot++;
+ modifier = strchr(dot, '@');
+ if (!modifier) {
+ charmap = dot;
+ } else if (modifier - dot < sizeof(buf)) {
+ memcpy(buf, dot, modifier - dot);
+ buf[modifier - dot] = '\0';
+ charmap = buf;
+ }
+ }
+ }
+#endif
+ CG(variable_width_locale) = 1;
+ CG(ascii_compatible_locale) = 0;
+
+ if (charmap) {
+ size_t len = strlen(charmap);
+ static const char *ascii_compatible_charmaps[] = {
+ "utf-8",
+ "utf8",
+ // TODO: EUC-* are also ASCII compatible ???
+ NULL
+ };
+ const char **p;
+ /* Check if current locale is ASCII compatible */
+ for (p = ascii_compatible_charmaps; *p; p++) {
+ if (zend_binary_strcasecmp(charmap, len, *p, strlen(*p)) == 0) {
+ CG(ascii_compatible_locale) = 1;
+ break;
+ }
+ }
+ }
+
+ } else {
+ CG(variable_width_locale) = 0;
+ CG(ascii_compatible_locale) = 1;
+ }
+#else
+ /* We can't determine current charset. Assume the worst case */
+ CG(variable_width_locale) = 1;
+ CG(ascii_compatible_locale) = 0;
+#endif
}
/* }}} */
-#endif
static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str, size_t length) /* {{{ */ {
unsigned char *p = (unsigned char*)str;
#define convert_to_object_ex(zv) convert_to_object(zv)
#define convert_scalar_to_number_ex(zv) convert_scalar_to_number(zv)
-#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER)
-/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */
-#define ZEND_USE_TOLOWER_L 1
-#endif
-
-#ifdef ZEND_USE_TOLOWER_L
ZEND_API void zend_update_current_locale(void);
-#else
-#define zend_update_current_locale()
-#endif
/* The offset in bytes between the value and type fields of a zval */
#define ZVAL_OFFSETOF_TYPE \
/* {{{ php_basename */
PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
{
- /* State 0 is directly after a directory separator (or at the start of the string).
- * State 1 is everything else. */
- int state = 0;
- const char *basename_start = s;
- const char *basename_end = s;
- while (len > 0) {
- int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
+ const char *basename_start;
+ const char *basename_end;
+
+ if (CG(ascii_compatible_locale)) {
+#ifdef ZEND_WIN32
+ if ((len >= 2) && isalpha((int)((unsigned char *)s)[0]) && (s[1] == ':')) {
+ s += 2;
+ len -= 2;
+ }
+#endif
+
+ basename_end = s + len - 1;
+
+ /* Strip trailing slashes */
+ while (basename_end >= s && IS_SLASH_P(basename_end)) {
+ basename_end--;
+ }
+ if (basename_end < s) {
+ return ZSTR_EMPTY_ALLOC();
+ }
+
+ /* Extract filename */
+ basename_start = basename_end;
+ basename_end++;
+ while (basename_start > s && !IS_SLASH_P(basename_start - 1)) {
+ basename_start--;
+ }
+ } else {
+ /* State 0 is directly after a directory separator (or at the start of the string).
+ * State 1 is everything else. */
+ int state = 0;
+
+ basename_start = s;
+ basename_end = s;
+ while (len > 0) {
+ int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
- switch (inc_len) {
- case 0:
- goto quit_loop;
- case 1:
+ switch (inc_len) {
+ case 0:
+ goto quit_loop;
+ case 1:
#if defined(PHP_WIN32)
- if (*s == '/' || *s == '\\') {
+ if (*s == '/' || *s == '\\') {
#else
- if (*s == '/') {
+ if (*s == '/') {
#endif
- if (state == 1) {
- state = 0;
- basename_end = s;
- }
+ if (state == 1) {
+ state = 0;
+ basename_end = s;
+ }
#if defined(PHP_WIN32)
- /* Catch relative paths in c:file.txt style. They're not to confuse
- with the NTFS streams. This part ensures also, that no drive
- letter traversing happens. */
- } else if ((*s == ':' && (s - basename_start == 1))) {
- if (state == 0) {
- basename_start = s;
- state = 1;
+ /* Catch relative paths in c:file.txt style. They're not to confuse
+ with the NTFS streams. This part ensures also, that no drive
+ letter traversing happens. */
+ } else if ((*s == ':' && (s - basename_start == 1))) {
+ if (state == 0) {
+ basename_start = s;
+ state = 1;
+ } else {
+ basename_end = s;
+ state = 0;
+ }
+#endif
} else {
- basename_end = s;
- state = 0;
+ if (state == 0) {
+ basename_start = s;
+ state = 1;
+ }
+ }
+ break;
+ default:
+ if (inc_len < 0) {
+ /* If character is invalid, treat it like other non-significant characters. */
+ inc_len = 1;
+ php_mb_reset();
}
-#endif
- } else {
if (state == 0) {
basename_start = s;
state = 1;
}
- }
- break;
- default:
- if (inc_len < 0) {
- /* If character is invalid, treat it like other non-significant characters. */
- inc_len = 1;
- php_mb_reset();
- }
- if (state == 0) {
- basename_start = s;
- state = 1;
- }
- break;
+ break;
+ }
+ s += inc_len;
+ len -= inc_len;
}
- s += inc_len;
- len -= inc_len;
- }
quit_loop:
- if (state == 1) {
- basename_end = s;
+ if (state == 1) {
+ basename_end = s;
+ }
}
if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
retval = setlocale(cat, NULL);
}
# endif
- zend_update_current_locale();
if (!retval) {
return NULL;
}
BG(locale_changed) = 1;
if (cat == LC_CTYPE || cat == LC_ALL) {
+ zend_update_current_locale();
if (BG(ctype_string)) {
zend_string_release_ex(BG(ctype_string), 0);
}