From: William A. Rowe Jr Date: Fri, 5 Apr 2002 04:22:45 +0000 (+0000) Subject: One for 2.0.34 - to fix the problem of utf-8 configuration files saved X-Git-Tag: 2.0.35~22 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=95270aa043f94c5d9fc8528e4d6def97119638e0;p=apache One for 2.0.34 - to fix the problem of utf-8 configuration files saved from MeSsed up editors that believe zero byte joiner lead bytes signify a utf-8 file. Someone point me at an RFC ... gheesh. FWIW; fixes bugs report 10125 git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@94446 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/server/util.c b/server/util.c index e66f291da5..86a3c9f302 100644 --- a/server/util.c +++ b/server/util.c @@ -916,6 +916,29 @@ AP_DECLARE(apr_status_t) ap_pcfg_openfile(ap_configfile_t **ret_cfg, apr_pool_t return APR_EBADF; } +#ifdef WIN32 + /* Some twisted character [no pun intended] at MS decided that a + * zero width joiner as the lead wide character would be ideal for + * describing Unicode text files. This was further convoluted to + * another MSism that the same character mapped into utf-8, EF BB BF + * would signify utf-8 text files. + * + * Since MS configuration files are all protecting utf-8 encoded + * Unicode path, file and resource names, we already have the correct + * WinNT encoding. But at least eat the stupid three bytes up front. + */ + { + unsigned char buf[4]; + apr_size_t len = 3; + status = apr_file_read(file, buf, &len); + if ((status != APR_SUCCESS) || (len < 3) + || memcmp(buf, "\xEF\xBB\xBF", 3) != 0) { + apr_off_t zero = 0; + apr_file_seek(file, APR_SET, &zero); + } + } +#endif + new_cfg = apr_palloc(p, sizeof(*new_cfg)); new_cfg->param = file; new_cfg->name = apr_pstrdup(p, name);