echo "ERROR: Cannot run tests without CLI sapi."; \
fi
+utest:
+ -@if test -x $(SAPI_CLI_PATH) && test ! -z $(SAPI_CLI_PATH); then \
+ TEST_PHP_EXECUTABLE=$(top_builddir)/$(SAPI_CLI_PATH) \
+ TEST_PHP_SRCDIR=$(top_srcdir) \
+ CC="$(CC)" \
+ $(top_builddir)/$(SAPI_CLI_PATH) -d 'open_basedir=' -d 'safe_mode=0' -d 'output_buffering=0' $(top_srcdir)/run-tests.php -d 'unicode_semantics=on' -d 'unicode.output_encoding=utf-8' $(TESTS); \
+ else \
+ echo "ERROR: Cannot run tests without CLI sapi."; \
+ fi
+
clean:
find . -name \*.lo -o -name \*.o | xargs rm -f
find . -name \*.la -o -name \*.a | xargs rm -f
--- /dev/null
+Introduction
+============
+
+As successful as PHP has proven to be in the past several years, it is still
+the only remaining member of the P-trinity of scripting languages - Perl and
+Python being the other two - that remains blithely ignorant of the
+multilingual and multinational environment around it. The software
+development community has been moving towards Unicode Standard for some time
+now, and PHP can no longer afford to be outside of this movement. Surely,
+some steps have been taken recently to allow for easier processing of
+multibyte data with the mbstring extension, but it is not enabled in PHP by
+default and is not as intuitive or transparent as it could be.
+
+The basic goal of this document is to describe how PHP 6 will support the
+Unicode Standard natively. Since the full implementation of the Unicode
+Standard is very involved, the idea is to use the already existing,
+well-tested, full-featured, and freely available ICU (International
+Components for Unicode) library. This will allow us to concentrate on the
+details of PHP integration and speed up the implementation.
+
+General Remarks
+===============
+
+Backwards Compatibility
+-----------------------
+Throughout the design and implementation of Unicode support, backwards
+compatibility must be of paramount concern. PHP is used on an enormous number of
+sites and the upgrade to Unicode-enabled PHP has to be transparent. This means
+that the existing data types and functions must work as they have always
+done. However, the speed of certain operations may be affected, due to
+increased complexity of the code overall.
+
+Unicode Encoding
+----------------
+The initial version will not support Byte Order Mark. Characters are
+expected to be composed, Normalization Form C. Later versions will support
+BOM, and decomposed and other characters.
+
+
+Implementation Approach
+=======================
+
+The implementation is done in phases. This allows for more basic and
+low-level implementation issues to be ironed out and tested before
+proceeding to more advanced topics.
+
+Legend:
+ - TODO
+ + finished
+ * in progress
+
+ Phase I
+ -------
+ + Basic Unicode string support, including instantiation, concatenation,
+ indexing
+
+ + Simple output of Unicode strings via 'print' and 'echo' statements
+ with appropriate output encoding conversion
+
+ + Conversion of Unicode strings to/from various encodings via encode() and
+ decode() functions
+
+ + Determining length of Unicode strings via strlen() function, some
+ simple string functions ported (substr).
+
+
+ Phase II
+ --------
+ * HTTP input request decoding
+
+ + Fixing remaining string-aware operators (assignment to {}, etc)
+
+ + Comparison (collation) of Unicode strings with built-in operators
+
+ * Support for Unicode and binary strings in PHP streams
+
+ + Support for Unicode identifiers
+
+ * Configurable handling of conversion failures
+
+ + \C{} escape sequence in strings
+
+
+ Phase III
+ ---------
+ * Exposing ICU API
+
+ - Porting all remaining functions to support Unicode and/or binary
+ strings
+
+
+Encoding Names
+==============
+All the encoding settings discussed in this document accept any valid
+encoding name supported by ICU. See ICU online documentation for the full
+list of encodings.
+
+
+Internal Encoding
+=================
+
+UTF-16 is the internal encoding used for Unicode strings. UTF-16 consumes
+two bytes for any Unicode character in the Basic Multilingual Plane, which
+is where most of the current world's languages are represented. While being
+less memory efficient for basic ASCII text it simplifies the processing and
+makes interfacing with ICU easier, since ICU uses UTF-16 for its internal
+processing as well.
+
+
+Fallback Encoding
+=================
+
+This setting specifies the "fallback" encoding for all the other ones. So if
+a specific encoding setting is not set, PHP defaults it to the fallback
+encoding. If the fallback_encoding is not specified either, it is set to
+UTF-8.
+
+ fallback_encoding = "iso-8859-1"
+
+
+Runtime Encoding
+================
+
+Currently PHP neither specifies nor cares what the encoding of its strings
+is. However, the Unicode implementation needs to know what this encoding is
+for several reasons, including type coersion and encoding conversion for
+strings generated at runtime via function calls and casting. This setting
+specifies this runtime encoding.
+
+ runtime_encoding = "iso-8859-1"
+
+
+Output Encoding
+===============
+
+Automatic output encoding conversion is supported on the standard output
+stream. Therefore, command such as 'print' and 'echo' automatically convert
+their arguments to the specified encoding. No automatic output encoding is
+performed for anything else. Therefore, when writing to files or external
+resources, the developer has to manually encode the data using functions
+provided by the unicode extension or rely on stream encoding filters. The
+unicode extension provides necessary stream filters to make developers'
+lives easier.
+
+The existing default_charset setting so far has been used only for
+specifying the charset portion of the Content-Type MIME header. For several
+reasons, this setting is deprecated. Now it is only used when the Unicode
+semantics switch is disabled and does not affect the actual transcoding of
+the output stream. The output encoding setting takes precedence in all other
+cases.
+
+ output_encoding = "utf-8"
+
+
+HTTP Input Encoding
+===================
+
+To make accessing HTTP input variables easier, PHP automatically decodes
+HTTP GET and POST requests based on the specified encoding. If the HTTP
+request contains the encoding specification in the headers, then it will be
+used instead of this setting. If the HTTP input encoding setting is not
+specified, PHP falls back onto the output encoding setting, because modern
+browsers are supposed to return the data in the same encoding as they
+received it in.
+
+If the actual encoding is passed in the request itself or is found
+elsewhere, then the application can ask PHP to re-decode the raw input
+explicitly.
+
+ http_input_encoding = "utf-8"
+
+
+Script Encoding
+===============
+
+PHP scripts may be written in any encoding supported by ICU. The encoding
+of the scripts can be specified site-wide via an INI directive
+script_encoding, or with a 'declare' pragma at the beginning of the script.
+The reason for pragma is that an application written in Shift-JIS, for
+example, should be executable on a system where the INI directive cannot be
+changed by the application itself. The pragma setting is valid only for the
+script it occurs in, and does not propagate to the included files.
+
+ pragma:
+ <?php declare(encoding = 'utf-8'); ?>
+
+ INI setting:
+ script_encoding = utf-8
+
+
+Conversion Semantics
+====================
+
+Not all characters can be converted between Unicode and legacy encodings.
+Normally, when downconverting from Unicode, the default behavior of ICU
+converters is to substitute the missing sequence with the appropriate
+substitution sequence for that codepage, such as 0x1A (Control-Z) in
+ISO-8859-1. When upconverting to Unicode, if an encoding has a character
+which cannot be converted into Unicode, that sequence is replaced by the
+Unicode substitution character (U+FFFD).
+
+The conversion failure behavior can be customized:
+
+ - perform substitution as described above with a custom substitution
+ character
+ - skip any invalid characters
+ - stop the conversion, raise an error, and return partial conversion
+ results
+ - replace the missing character with a diagnostic character and continue,
+ e.g. [U+hhhh]
+
+There are two INI settings that control this.
+
+ unicode.from_error_mode = U_INVALID_SUBSTITUTE
+ U_INVALID_SKIP
+ U_INVALID_STOP
+ U_INVALID_ESCAPE
+
+ unicode.from_error_subst_char = a2
+
+The second setting is supposed to contain the Unicode code point value for
+the substitution character. This value has to be representable in the target
+encoding.
+
+Note that PHP always tries to convert as much as of the data as possible and
+returns the converted results even if an error happens.
+
+
+Unicode Switch
+==============
+
+Obviously, PHP cannot simply impose new Unicode support on everyone. There
+are many applications that do not care about Unicode and do not need it.
+Consequently, there is a switch that enables certain fundamental language
+changes related to Unicode. This switch is available as a site-wide, or
+per-dir INI setting only.
+
+Note that having switch turned off does not imply that PHP is unaware of
+Unicode at all and that no Unicode string can exist. It only affects certain
+aspects of the language, and Unicode strings can always be created
+programmatically.
+
+ unicode_semantics = On
+
+ [TODO: list areas that are affected by this switch]
+
+
+Unicode String Type
+===================
+
+Unicode string type (IS_UNICODE) is supposed to contain text data encoded in
+UTF-16 format. It is the main string type in PHP when Unicode semantics
+switch is turned on. Unicode strings can exist when the switch is off, but
+they have to be produced programmatically, via calls to functions that
+return Unicode type.
+
+The operational unit when working with Unicode strings is a code point, not
+code unit or byte. One code point in UTF-16 may be comprised of 1 or 2 code
+units, each of which is a 16-bit word. Working on the code point level is
+necessary because doing otherwise would mean offloading the processing of
+surrogate pairs onto PHP users, and that is less than desirable.
+
+The repercussions are that one cannot expect code point N to be at offset
+N in the Unicode string. Instead, one has to iterate from the beginning from
+the string using U16_FWD() macro until the desired codepoint is reached.
+
+The codepoint access is one of the primary areas targeted for optimization.
+
+
+Native Encoding String Type
+===========================
+
+Native encoding string type (IS_STRING) serves two purposes: backwards
+compatibility when Unicode semantics switch is off, and for representing
+strings in non-Unicode encodings (native encodings) when it is on. It is
+processsed on the byte level.
+
+
+Binary String Type
+==================
+
+Binary string type (IS_BINARY) can be used for storing images, PDFs, or
+other binary data intended to be processed on a byte-level and that cannot
+be intepreted as text.
+
+Binary data type does not participate in implicit conversions, and cannot be
+explicitly upconverted to other string types, although the inverse is
+possible.
+
+Printing binary data to the standard output passes it through as-is,
+independent of the output encoding.
+
+When Unicode semantics switch is off, binary string literals and binary
+strings returned by functions actually resolve to IS_STRING type, for
+backwards compatibility reasons.
+
+
+Zval Structure Changes
+======================
+
+PHP is a type-agnostic language. Its data values are encapsulated in a zval
+(Zend value) structure that can change as necessary to accomodate various types.
+
+struct _zval_struct {
+ /* Variable information */
+ union {
+ long lval; /* long value */
+ double dval; /* double value */
+ struct {
+ char *val;
+ int len;
+ } str; /* string value */
+ HashTable *ht; /* hash table value */
+ zend_object_value obj; /* object value */
+ } value;
+ zend_uint refcount;
+ zend_uchar type; /* active type */
+ zend_uchar is_ref;
+};
+
+The type field determines what is stored in the union, IS_STRING being the only
+data type pertinent to this discussion. In the current version, the strings
+are binary-safe, but, for all intents and purposes, are assumed to be
+comprised of 8-bit characters. It is possible to treat the string value as
+an opaque type containing arbitrary binary data, and in fact that is how
+mbstring extension uses it, in order to store multibyte strings. However,
+many extensions and the Zend engine itself manipulate the string value
+directly without regard to its internals. Needless to say, this can lead to
+problems.
+
+For IS_UNICODE type, we need to add another structure to the union:
+
+ union {
+ ....
+ struct {
+ UChar *val; /* Unicode string value */
+ int32_t len; /* number of UChar's */
+ ....
+ } value;
+
+This cleanly separates the two types of strings and helps preserve backwards
+compatibility. For IS_BINARY type, we can re-use the str union.
+
+
+Language Modifications
+======================
+
+If a Unicode switch is turned on, PHP string literals - single-quoted,
+double-quoted, and heredocs - become Unicode strings (IS_UNICODE type).
+They support all the same escape sequences and variable interpolations as
+previously, with the addition of some new escape sequences.
+
+The contents of the strings are interpreted as follows:
+
+ - all non-escaped characters are interpreted as a corresponding Unicode
+ codepoint based on the current script encoding, e.g. ASCII 'a' (0x51) =>
+ U+0061, Shift-JIS (0x92 0x69) => U+4E2D
+
+ - existing PHP escape sequences are also interpreted as Unicode codepoints,
+ including \xXX (hex) and \OOO (octal) numbers, e.g. "\x20" => U+0020
+
+ - two new escape sequences, \uXXXX and \UXXXXXX are interpreted as a 4 or
+ 6-hex Unicode codepoint value, e.g. \u0221 => U+0221, \U010410 =>
+ U+10410
+
+ - a new escape sequence allows specifying a character by its full
+ Unicode name, e.g. \C{THAI CHARACTER PHO SAMPHAO} => U+0E20
+
+The single-quoted string is more restrictive than the other two types: so
+far the only escape sequence allowed inside of it was \', which specifies
+a literal single quote. However, single quoted strings now support the new
+Unicode character escape sequences as well.
+
+PHP allows variable interpolation inside the double-quoted and heredoc strings.
+However, the parser separates the string into literal and variable chunks during
+compilation, e.g. "abc $var def" -> "abc" . $var . "def". This means that the
+literal chunks can be handled in the normal way for as far as Unicode
+support is concerned.
+
+Since all string literals become Unicode by default, one loses the ability
+to specify byte-oriented or binary strings. In order to create binary string
+literals, a new syntax is necessary: prefixing a string literal with letter
+'b' creates a binary string.
+
+ $var = b'abc\001';
+ $var = b"abc\001";
+ $var = b<<<EOD
+ abc\001
+ EOD;
+
+The binary string literals support the same escape sequences as the current
+PHP strings. If the Unicode switch is turned off, then the binary string
+literals generate normal string (IS_STRING) type internally, without any
+effect on the application.
+
+The string operators have been changed to accomodate the new IS_UNICODE and
+IS_BINARY types. In more detail:
+
+ - The concatenation (.) operator has been changed to automatically coerce
+ IS_STRING type to the more precise IS_UNICODE if its operands are of two
+ different string types. It does not perform coersion for IS_BINARY type,
+ however, since binary data is not considered to be in any encoding. To
+ concatenate string with binary data, strings have to be cast to binary
+ type first. The coersion uses the conversion matrix specified later in
+ this document.
+
+ - The concatenation assignment operator (.=) has been changed similarly.
+
+ - The string indexing operators {}/[] have been changed to accomodate
+ IS_UNICODE type strings and extract the specified character. Note that
+ the index specifies a code point, not a byte, or a code unit, thus
+ supporting supplementary characters as well.
+
+ - Both Unicode and binary string types can be used as array keys. If the
+ Unicode switch is on, the native encoding strings are converted to
+ Unicode, if they are used as hash keys, but binary strings are not.
+ Note that this means if Unicode switch is off, then Unicode string "abc"
+ and native string "abc" do not hash to the same value.
+
+ - Bitwise operators and increment/decrement operators do not work on
+ Unicode strings. They do work on binary strings.
+
+ - Two new casting operators are introduced, (unicode) and (binary).
+ They use the conversion matrix specified later in this document.
+
+ - The comparison operators when applied to Unicode strings, perform
+ comparison in binary code point order. They also do appropriate coersion
+ if the strings are of differing types.
+
+ - The arithmetic operators use the same semantic as today for converting
+ strings to numbers. A Unicode string is considered numeric if it
+ represents a long or a double number in en_US_POSIX locale.
+
+
+Inline HTML
+===========
+Because inline HTML blocks are intermixed with PHP ones, they are also
+written in the script encoding. PHP transcodes the HTML blocks to the output
+encoding as needed, resulting in direct passthrough if the script encoding
+matches output encoding.
+
+
+Identifiers
+===========
+Considering that scripts may be written in various encodings, we do not
+restrict identifiers to be ASCII-only. PHP allows any valid identifier based
+on the Unicode Standard Annex #31. The identifiers are case folded when
+necessary (class and function names) and converted to normalization form
+NFKC, so that two identifiers written in two compatible ways refer to the
+same thing.
+
+
+Numbers
+=======
+Unlike identifiers, we restrict numbers to consist only of ASCII digits and
+do not interpret them as written in a specific locale. The numbers are
+expected to adhere to en_US_POSIX or C locale, i.e. having no thousands
+separator and fractional separator being (.) "full stop". Numeric strings
+are supposed to adhere to the same rules, i.e. "10,3" is not interpreted as
+a number even if the current locale's fractional separator is comma.
+
+
+Parameter Parsing API Modifications
+===================================
+
+Internal PHP functions largely uses zend_parse_parameters() API in order to
+obtain the parameters passed to them by the user. For example:
+
+ char *str;
+ int len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &len) == FAILURE) {
+ return;
+ }
+
+This forces the input parameter to be a string, and its value and length are
+stored in the variables specified by the caller.
+
+There are now three new specifiers: 't', 'u', and 'T'.
+
+ 't' specifier
+ -------------
+ This specifier indicates that the caller requires the incoming parameter
+ to be string data (IS_STRING, IS_UNICODE, IS_BINARY). The caller has to provide
+ the storage for string value, length, and type.
+
+ void *str;
+ int len;
+ zend_uchar type;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &str, &len, &type) == FAILURE) {
+ return;
+ }
+ if (type == IS_UNICODE) {
+ /* process UTF-16 data */
+ } else {
+ /* process native string or binary data */
+ }
+
+ For IS_STRING and IS_BINARY types, the length represents the number of
+ bytes, and for IS_UNICODE the number of UChar's. When converting other
+ types (numbers, booleans, etc) to strings, the exact behavior depends on
+ the Unicode semantics switch: if on, they are converted to IS_UNICODE,
+ otherwise to IS_STRING.
+
+
+ 'u' specifier
+ -------------
+ This specifier indicates that the caller requires the incoming parameter
+ to be a Unicode UTF-16 encoded string. If a non-Unicode string is passed,
+ the engine creates a copy of the string and automatically convert it
+ to Unicode type before passing it to the internal function. No such
+ conversion is necessary for Unicode strings, obviously. Binary type cannot
+ be upconverted, and the engine issues an error in such case.
+
+ UChar *str;
+ int32_t len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "u", &str, &len) == FAILURE) {
+ return;
+ }
+ /* process UTF-16 data */
+
+
+ 'T' specifier
+ -------------
+ This specifier is useful when the function takes two or more strings and
+ operates on them. Using 't' specifier for each one would be somewhat
+ problematic if the passed-in strings are of mixed types, and multiple
+ checks need to be performed in order to do anything. All parameters
+ marked by the 'T' specifier are promoted to the same type.
+
+ Binary type is generally speaking the most precise one. However, we do not
+ want to convert Unicode strings to binary ones, so an error is thrown
+ if the incoming list of parameters has both Unicode and binary strings in
+ it.
+
+ If there are no binary strings, and at least one of the strings is of
+ Unicode type, then all the rest of the strings are upconverted to Unicode.
+
+ Otherwise the promotion is to IS_STRING type.
+
+
+ void *str1, *str2;
+ int len1, len2;
+ zend_uchar type1, type2;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &str1, &len1,
+ &type1, &str2, &len2, &type2) == FAILURE) {
+ return;
+ }
+ if (type1 == IS_UNICODE) {
+ /* process as Unicode, str2 is guaranteed to be Unicode as well */
+ } else {
+ /* process as native string, str2 is guaranteed to be the same */
+ }
+
+
+The existing 's' specifier has been modified as well. If a Unicode string is
+passed in, it automatically copies and converts the string to the runtime
+encoding, and issues a warning. If a binary type is passed-in, no conversion
+is necessary.
+
+
+Upgrading Existing Functions
+============================
+
+Upgrading functions to work with new data types will be a deliberate and
+involved process, because one needs to consider not only the mechanisms for
+processing Unicode characters, for example, but also the semantics of
+the function.
+
+The main tenet of the upgrade process should be that when processing Unicode
+strings, the unit of operation is a code point, not a code unit or a byte.
+For example, strlen() returns the number of code points in the string.
+
+ strlen('abc') = 3
+ strlen('ab\U010000') = 3
+ strlen('ab\uD800\uDC00') = 3 /* not 4 */
+
+Function upgrade guidelines are available in a separate document.
+
+
+Unicode Extension
+=================
+
+There will be one or more extensions that provide Unicode and i18n services
+to PHP. In phase I only the conversion service is necessary. The Unicode
+extension is 'ext/unicode' and its functions should be prefixed with 'unicode'
+or 'icu'.
+
+ Conversion Functions
+ --------------------
+
+ string unicode_encode(unicode $input, text $encoding)
+
+ Takes a UTF-16 Unicode string and converts it to the the target
+ encoding, returning the result.
+
+ unicode unicode_decode(string $input, text $encoding)
+
+ Takes a string in the source encoding and converts it to a UTF-16
+ Unicode string, returning the result.
+
+
+Type Conversion Matrix
+======================
+
+ to | IS_STRING | IS_UNICODE | IS_BINARY
+from | | |
+-------------------------------------------------------------------
+ | | |
+IS_STRING | n/a | implicit=yes | explicit=yes
+ | | explicit=yes | implicit=no
+ | | |
+-------------------------------------------------------------------
+ | | |
+IS_UNICODE | explicit=yes | n/a | explicit=yes
+ | implicit=no | | implicit=no
+ | | |
+------------------------------|------------------------------------
+ | | |
+IS_BINARY | explicit=no | explicit=no | n/a
+ | implicit=no | implicit=no |
+ | | |
+
+explicit = casting
+implicit = for concatenation, etc
+
+IS_STRING <-> IS_UNICODE uses runtime-encoding
+IS_UNICODE -> IS_BINARY converts to runtime encoding first, then to binary
+
+
+Implementation Details That Need Expanding
+==========================================
+- Streams support for Unicode - What stream filters will we be providing?
+- Conversion errors behavior - Need to define the default.
+- INI files encoding - Do we support BOMs?
+- There are likely to be other issues which are missing from this document
+
+
+Build System
+============
+
+Unicode support in PHP is always enabled. The only configuration option
+during development should be the location of the ICU headers and libraries.
+
+ --with-icu-dir=<dir> <dir> parameter specifies the location of ICU
+ header and library files.
+
+After the initial development we have to repackage ICU library for our needs
+and bundle it with PHP.
+
+
+Document History
+================
+ 0.5: Updated per latest discussions. Removed tentative language in several
+ places, since we have decided on everything described here already.
+ Clarified details according to Phase II progress.
+
+ 0.4: Updated to include all the latest discussions. Updated development
+ phases.
+
+ 0.3: Updated to include all the latest discussions.
+
+ 0.2: Updated Phase I design proposal per discussion on unicode@php.net.
+ Modified Internal Encoding section to contain only UTF-16 info..
+ Expanded Script Encoding section.
+ Added Binary Data Type section.
+ Amended Language Modifications section to describe string literals
+ behavior.
+ Amended Build System section.
+
+ 0.1: Phase I design proposal
+
+
+References
+==========
+
+ Unicode
+ http://www.unicode.org
+
+ Unicode Glossary
+ http://www.unicode.org/glossary/
+
+ UTF-8
+ http://www.utf-8.com/
+
+ UTF-16
+ http://www.ietf.org/rfc/rfc2781.txt
+
+ ICU Homepage
+ http://www.ibm.com/software/globalization/icu/
+
+ ICU User Guide and API Reference
+ http://icu.sourceforge.net/
+
+ Unicode Annex #31
+ http://www.unicode.org/reports/tr31/
+
+ PHP Parameter Parsing API
+ http://www.php.net/manual/en/zend.arguments.retrieval.php
+
+
+Authors
+=======
+ Andrei Zmievski <andrei@gravitonic.com>
+
+vim: set et :
utime.h \
sys/utsname.h \
sys/ipc.h \
-dlfcn.h
+dlfcn.h \
+assert.h
],[],[],[
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
])
AC_MSG_RESULT([$PHP_VERSIONING])
+dnl ## check for ICU library location
+dnl ## until we bundle it
+PHP_ARG_WITH(icu-dir,[location of ICU headers and libraries],
+[ --with-icu-dir=DIR Specify where ICU libraries and headers can be found], DEFAULT, no)
+if test "$PHP_ICU_DIR" = "DEFAULT"; then
+ AC_MSG_CHECKING(for ICU in default path)
+ for i in /usr/local /usr; do
+ if test -r $i/include/unicode/utypes.h; then
+ ICU_DIR=$i
+ AC_MSG_RESULT(found in $i)
+ break
+ fi
+ done
+else
+ if test -f $PHP_ICU_DIR/include/unicode/utypes.h; then
+ ICU_DIR=$PHP_ICU_DIR
+ fi
+fi
+if test -z "$ICU_DIR"; then
+ AC_MSG_RESULT(not found)
+ AC_MSG_ERROR(Please specify where ICU libraries and header file are located)
+fi
+ICU_CONFIG="icu-config"
+if ${ICU_DIR}/bin/icu-config --ldflags > /dev/null 2>&1; then
+ ICU_CONFIG=${ICU_DIR}/bin/icu-config
+fi
+AC_MSG_CHECKING(for ICU 3.4 or greater)
+icu_version_full=`$ICU_CONFIG --version`
+icu_version=`echo ${icu_version_full} | awk 'BEGIN { FS = "."; } { printf "%d", ($1 * 1000 + $2) ;}'`
+if test "$icu_version" -ge 3004; then
+ AC_MSG_RESULT($icu_version_full found)
+ ICU_LIBS=`$ICU_CONFIG --ldflags --ldflags-icuio`
+else
+ AC_MSG_ERROR(ICU version 3.4 or later is required, $icu_full_version found)
+fi
+PHP_ADD_INCLUDE($ICU_DIR/include)
+PHP_EVAL_LIBLINE($ICU_LIBS, ICU_SHARED_LIBADD)
+PHP_SUBST(ICU_SHARED_LIBADD)
+
+
divert(5)
dnl ## In diversion 5 we check which extensions should be compiled.
PHP_ADD_SOURCES(Zend, \
zend_alloc.c zend_compile.c zend_constants.c zend_dynamic_array.c \
- zend_execute_API.c zend_highlight.c zend_llist.c \
+ zend_execute_API.c zend_unicode.c zend_highlight.c zend_llist.c \
zend_opcode.c zend_operators.c zend_ptr_stack.c zend_stack.c \
zend_variables.c zend.c zend_API.c zend_extensions.c zend_hash.c \
zend_list.c zend_indent.c zend_builtin_functions.c zend_sprintf.c \
zend_ini.c zend_qsort.c zend_multibyte.c zend_ts_hash.c zend_stream.c \
- zend_iterators.c zend_interfaces.c zend_exceptions.c zend_strtod.c)
+ zend_iterators.c zend_interfaces.c zend_exceptions.c zend_strtod.c zend_strtol.c)
if test -r "$abs_srcdir/Zend/zend_objects.c"; then
PHP_ADD_SOURCES(Zend, zend_objects.c zend_object_handlers.c zend_objects_API.c zend_mm.c \
while (buckets_in->head) {
size_t bin = 0, desired;
- bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
- while (bin < bucket->buflen) {
- desired = bucket->buflen - bin;
+ bucket = buckets_in->head;
+
+ if (bucket->is_unicode) {
+ /* decompression not allowed for unicode data */
+ return PSFS_ERR_FATAL;
+ }
+
+ bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC);
+ while (bin < bucket->buf.str.len) {
+ desired = bucket->buf.str.len - bin;
if (desired > data->inbuf_len) {
desired = data->inbuf_len;
}
- memcpy(data->strm.next_in, bucket->buf + bin, desired);
+ memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired);
data->strm.avail_in = desired;
status = BZ2_bzDecompress(&(data->strm));
static php_stream_filter_ops php_bz2_decompress_ops = {
php_bz2_decompress_filter,
php_bz2_decompress_dtor,
- "bzip2.decompress"
+ "bzip2.decompress",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
/* }}} */
while (buckets_in->head) {
size_t bin = 0, desired;
- bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
+ bucket = buckets_in->head;
+
+ if (bucket->is_unicode) {
+ /* compression not allowed for unicode data */
+ return PSFS_ERR_FATAL;
+ }
+
+ bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC);
- while (bin < bucket->buflen) {
- desired = bucket->buflen - bin;
+ while (bin < bucket->buf.str.len) {
+ desired = bucket->buf.str.len - bin;
if (desired > data->inbuf_len) {
desired = data->inbuf_len;
}
- memcpy(data->strm.next_in, bucket->buf + bin, desired);
+ memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired);
data->strm.avail_in = desired;
status = BZ2_bzCompress(&(data->strm), flags & PSFS_FLAG_FLUSH_CLOSE ? BZ_FINISH : (flags & PSFS_FLAG_FLUSH_INC ? BZ_FLUSH : BZ_RUN));
static php_stream_filter_ops php_bz2_compress_ops = {
php_bz2_compress_filter,
php_bz2_compress_dtor,
- "bzip2.compress"
+ "bzip2.compress",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
/* }}} */
obj = &EG(objects_store).object_buckets[handle].bucket.obj;
if (obj->clone == NULL) {
- php_error(E_ERROR, "Trying to clone an uncloneable object of class %s", Z_OBJCE_P(zobject)->name);
+ php_error(E_ERROR, "Trying to clone an uncloneable object of class %v", Z_OBJCE_P(zobject)->name);
}
obj->clone(obj->object, &new_object TSRMLS_CC);
zend_object_value dom_objects_ze1_clone_obj(zval *zobject TSRMLS_DC)
{
- php_error(E_ERROR, "Cannot clone object of class %s due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name);
+ php_error(E_ERROR, "Cannot clone object of class %v due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name);
/* Return zobject->value.obj just to satisfy compiler */
return zobject->value.obj;
}
#define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \
__intern = (dom_object *)zend_object_store_get_object(__id TSRMLS_CC); \
if (__intern->ptr == NULL || !(__ptr = (__prtype)((php_libxml_node_ptr *)__intern->ptr)->node)) { \
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Couldn't fetch %s", __intern->std.ce->name);\
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Couldn't fetch %v", __intern->std.ce->name);\
RETURN_NULL();\
} \
}
}
/* }}} */
+#ifdef SMG_0 /* UTODO: Needs updating for unicode */
+
/* {{{ iconv stream filter */
typedef struct _php_iconv_stream_filter {
iconv_t cd;
return PHP_ICONV_ERR_SUCCESS;
}
/* }}} */
+#else /* Make dummy bypasses for the register/unregister loop */
+static php_iconv_err_t php_iconv_stream_filter_register_factory(TSRMLS_D)
+{
+ return PHP_ICONV_ERR_SUCCESS;
+}
+static php_iconv_err_t php_iconv_stream_filter_unregister_factory(TSRMLS_D)
+{
+ return PHP_ICONV_ERR_SUCCESS;
+}
+#endif /* unicode bypass */
+
+
/* }}} */
#endif
fcc.object_pp = &return_value;
if (zend_call_function(&fci, &fcc TSRMLS_CC) == FAILURE) {
- zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Could not execute %s::%s()", ce->name, ce->constructor->common.function_name);
+ zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Could not execute %v::%v()", ce->name, ce->constructor->common.function_name);
} else {
if (retval_ptr) {
zval_ptr_dtor(&retval_ptr);
efree(fci.params);
}
} else if (ctor_params) {
- zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Class %s does not have a constructor hence you cannot use ctor_params", ce->name);
+ zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Class %v does not have a constructor hence you cannot use ctor_params", ce->name);
}
}
}
static zend_object_value sxe_object_ze1_clone(zval *zobject TSRMLS_DC)
{
- php_error(E_ERROR, "Cannot clone object of class %s due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name);
+ php_error(E_ERROR, "Cannot clone object of class %v due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name);
/* Return zobject->value.obj just to satisfy compiler */
return zobject->value.obj;
}
switch(Z_TYPE_P(offset)) {
case IS_STRING:
- if (zend_symtable_find(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1, (void **) &retval) == FAILURE) {
- zend_error(E_NOTICE, "Undefined index: %s", Z_STRVAL_P(offset));
+ case IS_BINARY:
+ case IS_UNICODE:
+ if (zend_u_symtable_find(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1, (void **) &retval) == FAILURE) {
+ zend_error(E_NOTICE, "Undefined index: %R", Z_TYPE_P(offset), Z_STRVAL_P(offset));
return &EG(uninitialized_zval_ptr);
} else {
return retval;
}
switch(Z_TYPE_P(offset)) {
case IS_STRING:
+ case IS_BINARY:
+ case IS_UNICODE:
value->refcount++;
- zend_symtable_update(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1, (void**)&value, sizeof(void*), NULL);
+ zend_u_symtable_update(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1, (void**)&value, sizeof(void*), NULL);
return;
case IS_DOUBLE:
case IS_RESOURCE:
switch(Z_TYPE_P(offset)) {
case IS_STRING:
if (spl_array_get_hash_table(intern, 0 TSRMLS_CC) == &EG(symbol_table)) {
- if (zend_delete_global_variable(Z_STRVAL_P(offset), Z_STRLEN_P(offset) TSRMLS_CC)) {
+ if (zend_u_delete_global_variable(Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset) TSRMLS_CC)) {
zend_error(E_NOTICE,"Undefined index: %s", Z_STRVAL_P(offset));
}
} else {
- if (zend_symtable_del(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1) == FAILURE) {
- zend_error(E_NOTICE,"Undefined index: %s", Z_STRVAL_P(offset));
+ if (zend_u_symtable_del(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1) == FAILURE) {
+ zend_error(E_NOTICE,"Undefined index: %R", Z_TYPE_P(offset), Z_UNIVAL_P(offset));
}
}
break;
switch(Z_TYPE_P(offset)) {
case IS_STRING:
- return zend_symtable_exists(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1);
+ case IS_BINARY:
+ case IS_UNICODE:
+ return zend_u_symtable_exists(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1);
case IS_DOUBLE:
case IS_RESOURCE:
case IS_BOOL:
}
if (Z_TYPE_P(intern->array) == IS_OBJECT) {
- php_error_docref(NULL TSRMLS_CC, E_ERROR, "Cannot append properties to objects, use %s::offsetSet() instead", Z_OBJCE_P(object)->name);
+ php_error_docref(NULL TSRMLS_CC, E_ERROR, "Cannot append properties to objects, use %v::offsetSet() instead", Z_OBJCE_P(object)->name);
}
spl_array_write_dimension(object, NULL, append_value TSRMLS_CC);
if (Z_TYPE_P(intern->array) == IS_OBJECT) {
do {
- if (zend_hash_get_current_key_ex(aht, &string_key, &string_length, &num_key, 0, &intern->pos) == HASH_KEY_IS_STRING) {
- if (!string_length || string_key[0]) {
+ if (zend_hash_get_current_key_ex(aht, &string_key, &string_length, &num_key, 0, &intern->pos) == UG(unicode)?HASH_KEY_IS_UNICODE:HASH_KEY_IS_STRING) {
+ if (!string_length ||
+ ((UG(unicode) && ((UChar*)string_key)[0]) ||
+ (!UG(unicode) && string_key[0]))) {
return SUCCESS;
}
} else {
case HASH_KEY_IS_STRING:
RETVAL_STRINGL(string_key, string_length - 1, 0);
break;
+ case HASH_KEY_IS_BINARY:
+ RETVAL_BINARYL(string_key, string_length - 1, 0);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ RETVAL_UNICODEL((UChar*)string_key, string_length - 1, 0);
+ break;
case HASH_KEY_IS_LONG:
RETVAL_LONG(num_key);
break;
void spl_add_class_name(zval *list, zend_class_entry * pce, int allow, int ce_flags TSRMLS_DC)
{
if (!allow || (allow > 0 && pce->ce_flags & ce_flags) || (allow < 0 && !(pce->ce_flags & ce_flags))) {
- size_t len = strlen(pce->name);
+ size_t len = pce->name_length;
zval *tmp;
if (zend_hash_find(Z_ARRVAL_P(list), pce->name, len+1, (void*)&tmp) == FAILURE) {
static zend_function_entry spl_funcs_RecursiveFilterIterator[] = {
SPL_ME(RecursiveFilterIterator, __construct, arginfo_parent_it___construct, ZEND_ACC_PUBLIC)
+ SPL_MA(ParentIterator, accept, RecursiveFilterIterator, hasChildren, NULL, ZEND_ACC_PUBLIC)
SPL_ME(RecursiveFilterIterator, hasChildren, NULL, ZEND_ACC_PUBLIC)
SPL_ME(RecursiveFilterIterator, getChildren, NULL, ZEND_ACC_PUBLIC)
{NULL, NULL, NULL}
intern = (spl_dual_it_object*)zend_object_store_get_object(getThis() TSRMLS_CC);
if (!(intern->u.caching.flags & CIT_CALL_TOSTRING)) {
- zend_throw_exception_ex(spl_ce_BadMethodCallException, 0 TSRMLS_CC, "%s does not fetch string value (see CachingIterator::__construct)", Z_OBJCE_P(getThis())->name);
+ zend_throw_exception_ex(spl_ce_BadMethodCallException, 0 TSRMLS_CC, "%v does not fetch string value (see CachingIterator::__construct)", Z_OBJCE_P(getThis())->name);
}
if (intern->u.caching.zstr) {
RETURN_STRINGL(Z_STRVAL_P(intern->u.caching.zstr), Z_STRLEN_P(intern->u.caching.zstr), 1);
fcc.object_pp = &return_value;
if (zend_call_function(&fci, &fcc TSRMLS_CC) == FAILURE) {
- zend_throw_exception_ex(sqlite_ce_exception, 0 TSRMLS_CC, "Could not execute %s::%s()", class_name, ce->constructor->common.function_name);
+ zend_throw_exception_ex(sqlite_ce_exception, 0 TSRMLS_CC, "Could not execute %s::%v()", class_name, ce->constructor->common.function_name);
} else {
if (retval_ptr) {
zval_ptr_dtor(&retval_ptr);
if (f->nKeyLength == 0) {
Z_TYPE(first) = IS_LONG;
Z_LVAL(first) = f->h;
+ } else if (f->key.type == IS_UNICODE) {
+ Z_TYPE(first) = IS_UNICODE;
+ Z_USTRVAL(first) = f->key.u.unicode;
+ Z_USTRLEN(first) = f->nKeyLength-1;
} else {
- Z_TYPE(first) = IS_STRING;
- Z_STRVAL(first) = f->arKey;
+ Z_TYPE(first) = f->key.type;
+ Z_STRVAL(first) = f->key.u.string;
Z_STRLEN(first) = f->nKeyLength-1;
}
if (s->nKeyLength == 0) {
Z_TYPE(second) = IS_LONG;
Z_LVAL(second) = s->h;
+ } else if (s->key.type == IS_UNICODE) {
+ Z_TYPE(second) = IS_UNICODE;
+ Z_USTRVAL(second) = s->key.u.unicode;
+ Z_USTRLEN(second) = s->nKeyLength-1;
} else {
- Z_TYPE(second) = IS_STRING;
- Z_STRVAL(second) = s->arKey;
+ Z_TYPE(second) = s->key.type;
+ Z_STRVAL(second) = s->key.u.string;
Z_STRLEN(second) = s->nKeyLength-1;
}
f = *((Bucket **) a);
s = *((Bucket **) b);
- if (f->nKeyLength) {
- Z_STRVAL(key1) = estrndup(f->arKey, f->nKeyLength-1);
- Z_STRLEN(key1) = f->nKeyLength-1;
- Z_TYPE(key1) = IS_STRING;
- } else {
+ if (f->nKeyLength == 0) {
Z_LVAL(key1) = f->h;
Z_TYPE(key1) = IS_LONG;
- }
- if (s->nKeyLength) {
- Z_STRVAL(key2) = estrndup(s->arKey, s->nKeyLength-1);
- Z_STRLEN(key2) = s->nKeyLength-1;
- Z_TYPE(key2) = IS_STRING;
+ } else if (f->key.type == IS_UNICODE) {
+ Z_USTRVAL(key1) = eustrndup(f->key.u.unicode, f->nKeyLength-1);
+ Z_USTRLEN(key1) = f->nKeyLength-1;
+ Z_TYPE(key1) = IS_UNICODE;
} else {
- Z_LVAL(key2) = s->h;
- Z_TYPE(key2) = IS_LONG;
+ Z_STRVAL(key1) = estrndup(f->key.u.string, f->nKeyLength-1);
+ Z_STRLEN(key1) = f->nKeyLength-1;
+ Z_TYPE(key1) = f->key.type;
}
+ if (s->nKeyLength == 0) {
+ Z_LVAL(key2) = s->h;
+ Z_TYPE(key2) = IS_LONG;
+ } else if (s->key.type == IS_UNICODE) {
+ Z_USTRVAL(key2) = eustrndup(s->key.u.unicode, s->nKeyLength-1);
+ Z_USTRLEN(key2) = s->nKeyLength-1;
+ Z_TYPE(key2) = IS_UNICODE;
+ } else {
+ Z_STRVAL(key2) = estrndup(s->key.u.string, s->nKeyLength-1);
+ Z_STRLEN(key2) = s->nKeyLength-1;
+ Z_TYPE(key2) = s->key.type;
+ }
status = call_user_function(EG(function_table), NULL, *BG(user_compare_func_name), &retval, 2, args TSRMLS_CC);
case HASH_KEY_IS_STRING:
RETVAL_STRINGL(string_key, string_length - 1, 1);
break;
+ case HASH_KEY_IS_BINARY:
+ RETVAL_BINARYL(string_key, string_length - 1, 1);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ RETVAL_UNICODEL(string_key, string_length - 1, 1);
+ break;
case HASH_KEY_IS_LONG:
RETVAL_LONG(num_key);
break;
MAKE_STD_ZVAL(key);
/* Set up the key */
- if (zend_hash_get_current_key_ex(target_hash, &string_key, &string_key_len, &num_key, 0, &pos) == HASH_KEY_IS_LONG) {
- Z_TYPE_P(key) = IS_LONG;
- Z_LVAL_P(key) = num_key;
- } else {
- ZVAL_STRINGL(key, string_key, string_key_len-1, 1);
+ switch (zend_hash_get_current_key_ex(target_hash, &string_key, &string_key_len, &num_key, 0, &pos)) {
+ case HASH_KEY_IS_LONG:
+ Z_TYPE_P(key) = IS_LONG;
+ Z_LVAL_P(key) = num_key;
+ break;
+ case HASH_KEY_IS_STRING:
+ ZVAL_STRINGL(key, string_key, string_key_len-1, 1);
+ break;
+ case HASH_KEY_IS_BINARY:
+ ZVAL_BINARYL(key, string_key, string_key_len-1, 1);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ ZVAL_UNICODEL(key, string_key, string_key_len-1, 1);
+ break;
}
fci.size = sizeof(fci);
RETURN_FALSE;
}
if (Z_TYPE_PP(BG(array_walk_func_name)) != IS_ARRAY &&
- Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING) {
+ Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING &&
+ Z_TYPE_PP(BG(array_walk_func_name)) != IS_UNICODE) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Wrong syntax for function name");
BG(array_walk_func_name) = old_walk_func_name;
RETURN_FALSE;
BG(array_walk_func_name) = old_walk_func_name;
RETURN_FALSE;
}
- if (Z_TYPE_PP(BG(array_walk_func_name)) != IS_ARRAY && Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING) {
+ if (Z_TYPE_PP(BG(array_walk_func_name)) != IS_ARRAY &&
+ Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING &&
+ Z_TYPE_PP(BG(array_walk_func_name)) != IS_UNICODE) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Wrong syntax for function name");
BG(array_walk_func_name) = old_walk_func_name;
RETURN_FALSE;
case HASH_KEY_IS_STRING:
RETURN_STRINGL(string_key, str_key_len-1, 1);
break;
+ case HASH_KEY_IS_BINARY:
+ RETURN_BINARYL(string_key, str_key_len-1, 1);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ RETURN_UNICODEL(string_key, str_key_len-1, 1);
+ break;
case HASH_KEY_IS_LONG:
RETURN_LONG(num_key);
break;
key_type = zend_hash_get_current_key_ex(Z_ARRVAL_PP(var_array), &var_name, &var_name_len, &num_key, 0, &pos);
var_exists = 0;
- if (key_type == HASH_KEY_IS_STRING) {
+ if (key_type == HASH_KEY_IS_STRING ||
+ key_type == HASH_KEY_IS_UNICODE) {
+ if (key_type == HASH_KEY_IS_STRING) {
+ key_type = IS_STRING;
+ } else {
+ key_type = IS_UNICODE;
+ }
var_name_len--;
- var_exists = zend_hash_exists(EG(active_symbol_table), var_name, var_name_len + 1);
+ var_exists = zend_u_hash_exists(EG(active_symbol_table), key_type, var_name, var_name_len + 1);
} else if (extract_type == EXTR_PREFIX_ALL || extract_type == EXTR_PREFIX_INVALID) {
smart_str_appendl(&final_name, Z_STRVAL_PP(prefix), Z_STRLEN_PP(prefix));
smart_str_appendc(&final_name, '_');
{
zval **value_ptr, *value, *data;
- if (Z_TYPE_P(entry) == IS_STRING) {
- if (zend_hash_find(eg_active_symbol_table, Z_STRVAL_P(entry),
- Z_STRLEN_P(entry)+1, (void **)&value_ptr) != FAILURE) {
+ if (Z_TYPE_P(entry) == IS_STRING ||
+ Z_TYPE_P(entry) == IS_BINARY ||
+ Z_TYPE_P(entry) == IS_UNICODE) {
+ if (zend_u_hash_find(eg_active_symbol_table, Z_TYPE_P(entry), Z_UNIVAL_P(entry),
+ Z_UNILEN_P(entry)+1, (void **)&value_ptr) != FAILURE) {
value = *value_ptr;
ALLOC_ZVAL(data);
*data = *value;
zval_copy_ctor(data);
INIT_PZVAL(data);
- zend_hash_update(Z_ARRVAL_P(return_value), Z_STRVAL_P(entry),
- Z_STRLEN_P(entry)+1, &data, sizeof(zval *), NULL);
+ zend_u_hash_update(Z_ARRVAL_P(return_value), Z_TYPE_P(entry), Z_UNIVAL_P(entry),
+ Z_UNILEN_P(entry)+1, &data, sizeof(zval *), NULL);
}
}
else if (Z_TYPE_P(entry) == IS_ARRAY) {
switch (Z_TYPE_PP(start_key)) {
case IS_STRING:
+ case IS_UNICODE:
case IS_LONG:
case IS_DOUBLE:
/* allocate an array for return */
entry->refcount++;
/* Update output hash depending on key type */
- if (p->nKeyLength)
- zend_hash_update(out_hash, p->arKey, p->nKeyLength, &entry, sizeof(zval *), NULL);
- else
+ if (p->nKeyLength == 0) {
zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL);
+ } else {
+ zend_u_hash_update(out_hash, p->key.type, &p->key.u, p->nKeyLength, &entry, sizeof(zval *), NULL);
+ }
}
/* If hash for removed entries exists, go until offset+length and copy the entries to it */
for ( ; pos<offset+length && p; pos++, p=p->pListNext) {
entry = *((zval **)p->pData);
entry->refcount++;
- if (p->nKeyLength)
- zend_hash_update(*removed, p->arKey, p->nKeyLength, &entry, sizeof(zval *), NULL);
- else
+ if (p->nKeyLength == 0) {
zend_hash_next_index_insert(*removed, &entry, sizeof(zval *), NULL);
+ } else {
+ zend_u_hash_update(*removed, p->key.type, &p->key.u, p->nKeyLength, &entry, sizeof(zval *), NULL);
+ }
}
} else /* otherwise just skip those entries */
for ( ; pos<offset+length && p; pos++, p=p->pListNext);
for ( ; p ; p=p->pListNext) {
entry = *((zval **)p->pData);
entry->refcount++;
- if (p->nKeyLength)
- zend_hash_update(out_hash, p->arKey, p->nKeyLength, &entry, sizeof(zval *), NULL);
- else
+ if (p->nKeyLength == 0) {
zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL);
+ } else {
+ zend_u_hash_update(out_hash, p->key.type, &p->key.u, p->nKeyLength, &entry, sizeof(zval *), NULL);
+ }
}
zend_hash_internal_pointer_reset(out_hash);
char *key = NULL;
int key_len = 0;
ulong index;
+ zend_uchar key_type;
/* Get the arguments and do error-checking */
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &stack) == FAILURE) {
RETVAL_ZVAL(*val, 1, 0);
/* Delete the first or last value */
- zend_hash_get_current_key_ex(Z_ARRVAL_PP(stack), &key, &key_len, &index, 0, NULL);
+ key_type = zend_hash_get_current_key_ex(Z_ARRVAL_PP(stack), &key, &key_len, &index, 0, NULL);
if (key && Z_ARRVAL_PP(stack) == &EG(symbol_table)) {
- zend_delete_global_variable(key, key_len-1 TSRMLS_CC);
+ if (key_type == HASH_KEY_IS_UNICODE) key_type = IS_UNICODE;
+ else if (key_type == HASH_KEY_IS_STRING) key_type = IS_STRING;
+ else key_type = IS_BINARY;
+ zend_u_delete_global_variable(key_type, key, key_len-1 TSRMLS_CC);
} else {
zend_hash_del_key_or_index(Z_ARRVAL_PP(stack), key, key_len, index, (key) ? HASH_DEL_KEY : HASH_DEL_INDEX);
}
/* Copy elements from input array to the one that's returned */
while (pos < offset_val+length_val && zend_hash_get_current_data_ex(Z_ARRVAL_PP(input), (void **)&entry, &hpos) == SUCCESS) {
+ zend_uchar utype;
(*entry)->refcount++;
switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 0, &hpos)) {
case HASH_KEY_IS_STRING:
- zend_hash_update(Z_ARRVAL_P(return_value), string_key, string_key_len,
+ utype = IS_STRING;
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ utype = IS_BINARY;
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ utype = IS_UNICODE;
+ukey:
+ zend_u_hash_update(Z_ARRVAL_P(return_value), utype, string_key, string_key_len,
entry, sizeof(zval *), NULL);
break;
zend_hash_internal_pointer_reset_ex(src, &pos);
while (zend_hash_get_current_data_ex(src, (void **)&src_entry, &pos) == SUCCESS) {
+ zend_uchar utype;
+
switch (zend_hash_get_current_key_ex(src, &string_key, &string_key_len, &num_key, 0, &pos)) {
case HASH_KEY_IS_STRING:
+ utype = IS_STRING;
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ utype = IS_BINARY;
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ utype = IS_UNICODE;
+ukey:
if (recursive &&
- zend_hash_find(dest, string_key, string_key_len, (void **)&dest_entry) == SUCCESS) {
+ zend_u_hash_find(dest, utype, string_key, string_key_len, (void **)&dest_entry) == SUCCESS) {
if (*src_entry == *dest_entry && ((*dest_entry)->refcount % 2)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "recursion detected");
return 0;
} else {
(*src_entry)->refcount++;
- zend_hash_update(dest, string_key, strlen(string_key)+1,
+ zend_u_hash_update(dest, utype, string_key, string_key_len+1,
src_entry, sizeof(zval *), NULL);
}
break;
switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 1, &pos)) {
case HASH_KEY_IS_STRING:
- Z_TYPE_P(new_val) = IS_STRING;
- Z_STRVAL_P(new_val) = string_key;
- Z_STRLEN_P(new_val) = string_key_len-1;
+ ZVAL_STRINGL(new_val, string_key, string_key_len-1, 0);
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ ZVAL_BINARYL(new_val, string_key, string_key_len-1, 0);
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ ZVAL_UNICODEL(new_val, string_key, string_key_len-1, 0);
+ukey:
zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &new_val,
sizeof(zval *), NULL);
break;
} else {
Z_LVAL_PP(tmp)++;
}
- } else if (Z_TYPE_PP(entry) == IS_STRING) {
+ } else if (Z_TYPE_PP(entry) == IS_STRING ||
+ Z_TYPE_PP(entry) == IS_BINARY ||
+ Z_TYPE_PP(entry) == IS_UNICODE) {
/* make sure our array does not end up with numeric string keys */
- if (is_numeric_string(Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), NULL, NULL, 0) == IS_LONG) {
+ if ((Z_TYPE_PP(entry) == IS_STRING && is_numeric_string(Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), NULL, NULL, 0) == IS_LONG) ||
+ (Z_TYPE_PP(entry) == IS_UNICODE && is_numeric_unicode(Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), NULL, NULL, 0) == IS_LONG)) {
zval tmp_entry;
tmp_entry = **entry;
continue;
}
- if (zend_hash_find(Z_ARRVAL_P(return_value), Z_STRVAL_PP(entry), Z_STRLEN_PP(entry)+1, (void**)&tmp) == FAILURE) {
+ if (zend_u_hash_find(Z_ARRVAL_P(return_value), Z_TYPE_PP(entry), Z_UNIVAL_PP(entry), Z_UNILEN_PP(entry)+1, (void**)&tmp) == FAILURE) {
zval *data;
MAKE_STD_ZVAL(data);
Z_TYPE_P(data) = IS_LONG;
Z_LVAL_P(data) = 1;
- zend_hash_update(Z_ARRVAL_P(return_value), Z_STRVAL_PP(entry), Z_STRLEN_PP(entry) + 1, &data, sizeof(data), NULL);
+ zend_u_hash_update(Z_ARRVAL_P(return_value), Z_TYPE_PP(entry), Z_UNIVAL_PP(entry), Z_UNILEN_PP(entry) + 1, &data, sizeof(data), NULL);
} else {
Z_LVAL_PP(tmp)++;
}
zend_hash_internal_pointer_end_ex(Z_ARRVAL_PP(input), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(input), (void **)&entry, &pos) == SUCCESS) {
+ zend_uchar utype;
+
(*entry)->refcount++;
switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 0, &pos)) {
case HASH_KEY_IS_STRING:
- zend_hash_update(Z_ARRVAL_P(return_value), string_key, string_key_len, entry, sizeof(zval *), NULL);
+ utype = IS_STRING;
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ utype = IS_BINARY;
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ utype = IS_UNICODE;
+ukey:
+ zend_u_hash_update(Z_ARRVAL_P(return_value), utype, string_key, string_key_len, entry, sizeof(zval *), NULL);
break;
case HASH_KEY_IS_LONG:
MAKE_STD_ZVAL(data);
switch (zend_hash_get_current_key_ex(target_hash, &string_key, &str_key_len, &num_key, 1, &pos)) {
case HASH_KEY_IS_STRING:
- Z_STRVAL_P(data) = string_key;
- Z_STRLEN_P(data) = str_key_len-1;
- Z_TYPE_P(data) = IS_STRING;
+ ZVAL_STRINGL(data, string_key, str_key_len-1, 0);
+ break;
+ case HASH_KEY_IS_BINARY:
+ ZVAL_BINARYL(data, string_key, str_key_len-1, 0);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ ZVAL_UNICODEL(data, string_key, str_key_len-1, 0);
break;
case HASH_KEY_IS_LONG:
Z_TYPE_P(data) = IS_LONG;
if (Z_TYPE_PP(entry) == IS_LONG) {
zend_hash_index_update(Z_ARRVAL_P(return_value), Z_LVAL_PP(entry), &data, sizeof(data), NULL);
- } else if (Z_TYPE_PP(entry) == IS_STRING) {
- zend_symtable_update(Z_ARRVAL_P(return_value), Z_STRVAL_PP(entry), Z_STRLEN_PP(entry) + 1, &data, sizeof(data), NULL);
+ } else if (Z_TYPE_PP(entry) == IS_STRING ||
+ Z_TYPE_PP(entry) == IS_BINARY ||
+ Z_TYPE_PP(entry) == IS_UNICODE) {
+ zend_u_symtable_update(Z_ARRVAL_P(return_value), Z_TYPE_PP(entry), Z_UNIVAL_PP(entry), Z_UNILEN_PP(entry) + 1, &data, sizeof(data), NULL);
} else {
zval_ptr_dtor(&data); /* will free also zval structure */
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Can only flip STRING and INTEGER values!");
}
/* }}} */
+
/* {{{ proto array array_change_key_case(array input [, int case=CASE_LOWER])
Retuns an array with all string keys lowercased [or uppercased] */
PHP_FUNCTION(array_change_key_case)
php_strtoupper(new_key, str_key_len - 1);
else
php_strtolower(new_key, str_key_len - 1);
- zend_hash_update(Z_ARRVAL_P(return_value), new_key, str_key_len, entry, sizeof(entry), NULL);
+ zend_u_hash_update(Z_ARRVAL_P(return_value), IS_STRING, new_key, str_key_len, entry, sizeof(entry), NULL);
+ efree(new_key);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ new_key=eustrndup(string_key,str_key_len - 1);
+ str_key_len--;
+ if (change_to_upper)
+ new_key = php_u_strtoupper(&new_key, &str_key_len, UG(default_locale));
+ else
+ new_key = php_u_strtolower(&new_key, &str_key_len, UG(default_locale));
+ str_key_len++;
+ zend_u_hash_update(Z_ARRVAL_P(return_value), IS_UNICODE, new_key, str_key_len, entry, sizeof(entry), NULL);
efree(new_key);
break;
}
} else {
p = cmpdata->b;
}
- if (p->nKeyLength) {
+ if (p->nKeyLength == 0) {
+ zend_hash_index_del(Z_ARRVAL_P(return_value), p->h);
+ } else {
if (Z_ARRVAL_P(return_value) == &EG(symbol_table)) {
- zend_delete_global_variable(p->arKey, p->nKeyLength-1 TSRMLS_CC);
+ zend_u_delete_global_variable(p->key.type, &p->key.u, p->nKeyLength-1 TSRMLS_CC);
} else {
- zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength);
+ zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength);
}
- } else {
- zend_hash_index_del(Z_ARRVAL_P(return_value), p->h);
}
}
}
if (!p) {
goto out;
}
- if (p->nKeyLength) {
- zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength);
- } else {
+ if (p->nKeyLength == 0) {
zend_hash_index_del(Z_ARRVAL_P(return_value), p->h);
+ } else {
+ zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength);
}
}
}
/* with value < value of ptrs[i] */
for (;;) {
p = *ptrs[0];
- if (p->nKeyLength) {
- zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength);
- } else {
+ if (p->nKeyLength == 0) {
zend_hash_index_del(Z_ARRVAL_P(return_value), p->h);
+ } else {
+ zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength);
}
if (!*++ptrs[0]) {
goto out;
/* delete all entries with value as ptrs[0] */
for (;;) {
p = *ptrs[0];
- if (p->nKeyLength) {
- zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength);
- } else {
+ if (p->nKeyLength == 0) {
zend_hash_index_del(Z_ARRVAL_P(return_value), p->h);
+ } else {
+ zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength);
}
if (!*++ptrs[0]) {
goto out;
if (Z_TYPE_P(return_value) != IS_ARRAY) {
if (key_type == HASH_KEY_IS_STRING) {
RETURN_STRINGL(string_key, string_key_len-1, 1);
+ } else if (key_type == HASH_KEY_IS_BINARY) {
+ RETURN_BINARYL(string_key, string_key_len-1, 1);
+ } else if (key_type == HASH_KEY_IS_UNICODE) {
+ RETURN_UNICODEL(string_key, string_key_len-1, 1);
} else {
RETURN_LONG(num_key);
}
/* Append the result to the return value. */
if (key_type == HASH_KEY_IS_STRING)
add_next_index_stringl(return_value, string_key, string_key_len-1, 1);
+ else if (key_type == HASH_KEY_IS_BINARY)
+ add_next_index_binaryl(return_value, string_key, string_key_len-1, 1);
+ else if (key_type == HASH_KEY_IS_UNICODE)
+ add_next_index_unicodel(return_value, string_key, string_key_len-1, 1);
else
add_next_index_long(return_value, num_key);
}
for (zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(input), &pos);
zend_hash_get_current_data_ex(Z_ARRVAL_PP(input), (void **)&operand, &pos) == SUCCESS;
zend_hash_move_forward_ex(Z_ARRVAL_PP(input), &pos)) {
+ zend_uchar utype;
if (callback) {
zend_fcall_info fci;
zval_add_ref(operand);
switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 0, &pos)) {
case HASH_KEY_IS_STRING:
- zend_hash_update(Z_ARRVAL_P(return_value), string_key, string_key_len, operand, sizeof(zval *), NULL);
+ utype = IS_STRING;
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ utype = IS_BINARY;
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ utype = IS_UNICODE;
+ukey:
+ zend_u_hash_update(Z_ARRVAL_P(return_value), utype, string_key, string_key_len, operand, sizeof(zval *), NULL);
break;
case HASH_KEY_IS_LONG:
if (ZEND_NUM_ARGS() > 2) {
add_next_index_zval(return_value, result);
} else {
- if (key_type == HASH_KEY_IS_STRING) {
- add_assoc_zval_ex(return_value, str_key, str_key_len, result);
- } else {
- add_index_zval(return_value, num_key, result);
+ zend_uchar utype;
+
+ switch (key_type) {
+ case HASH_KEY_IS_STRING:
+ utype = IS_STRING;
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ utype = IS_BINARY;
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ utype = IS_UNICODE;
+ukey:
+ add_u_assoc_zval_ex(return_value, utype, str_key, str_key_len, result);
+ break;
+ default:
+ add_index_zval(return_value, num_key, result);
}
}
}
switch (Z_TYPE_PP(key)) {
case IS_STRING:
- if (zend_symtable_exists(HASH_OF(*array), Z_STRVAL_PP(key), Z_STRLEN_PP(key)+1)) {
+ case IS_UNICODE:
+ case IS_BINARY:
+ if (zend_u_symtable_exists(HASH_OF(*array), Z_TYPE_PP(key), Z_UNIVAL_PP(key), Z_UNILEN_PP(key)+1)) {
RETURN_TRUE;
}
RETURN_FALSE;
zval_add_ref(entry);
if (preserve_keys) {
+ zend_uchar utype;
+
key_type = zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &str_key,
&str_key_len, &num_key, 0, &pos);
- if (key_type == HASH_KEY_IS_STRING) {
- add_assoc_zval_ex(chunk, str_key, str_key_len, *entry);
- } else {
- add_index_zval(chunk, num_key, *entry);
+ switch (key_type) {
+ case HASH_KEY_IS_STRING:
+ utype = IS_STRING;
+ goto ukey;
+ case HASH_KEY_IS_BINARY:
+ utype = IS_BINARY;
+ goto ukey;
+ case HASH_KEY_IS_UNICODE:
+ utype = IS_UNICODE;
+ukey:
+ add_u_assoc_zval_ex(chunk, utype, str_key, str_key_len, *entry);
+ break;
+ default:
+ add_index_zval(chunk, num_key, *entry);
+ break;
}
} else {
add_next_index_zval(chunk, *entry);
WRONG_PARAM_COUNT;
}
- if (Z_TYPE_PP(assertion) == IS_STRING) {
+ if (Z_TYPE_PP(assertion) == IS_STRING || Z_TYPE_PP(assertion) == IS_UNICODE) {
zval retval;
+ zval tmp;
int old_error_reporting = 0; /* shut up gcc! */
-
- myeval = Z_STRVAL_PP(assertion);
+ int free_tmp = 0;
+
+ if (Z_TYPE_PP(assertion) == IS_UNICODE) {
+ tmp = **assertion;
+ zval_copy_ctor(&tmp);
+ convert_to_string(&tmp);
+ myeval = Z_STRVAL(tmp);
+ free_tmp = 1;
+ } else {
+ myeval = Z_STRVAL_PP(assertion);
+ }
if (ASSERTG(quiet_eval)) {
old_error_reporting = EG(error_reporting);
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failure evaluating code:\n%s", myeval);
/* php_error_docref() does not return in this case. */
}
+ if (free_tmp) {
+ zval_dtor(&tmp);
+ }
efree(compiled_string_description);
if (ASSERTG(quiet_eval)) {
ZVAL_STRING(args[0], SAFE_STRING(filename), 1);
ZVAL_LONG (args[1], lineno);
- ZVAL_STRING(args[2], SAFE_STRING(myeval), 1);
+ if (Z_TYPE_PP(assertion) == IS_UNICODE) {
+ ZVAL_UNICODEL(args[2], Z_USTRVAL_PP(assertion), Z_USTRLEN_PP(assertion), 1);
+ } else {
+ ZVAL_STRING(args[2], SAFE_STRING(myeval), 1);
+ }
MAKE_STD_ZVAL(retval);
ZVAL_FALSE(retval);
PHP_FE(unserialize, NULL)
PHP_FE(var_dump, NULL)
+ PHP_FE(var_inspect, NULL)
PHP_FE(var_export, NULL)
PHP_FE(debug_zval_dump, NULL)
PHP_FE(print_r, NULL)
RETURN_FALSE;
}
- if (Z_TYPE_PP(params[0]) != IS_STRING && Z_TYPE_PP(params[0]) != IS_ARRAY) {
+ if (Z_TYPE_PP(params[0]) != IS_STRING &&
+ Z_TYPE_PP(params[0]) != IS_UNICODE &&
+ Z_TYPE_PP(params[0]) != IS_ARRAY) {
SEPARATE_ZVAL(params[0]);
convert_to_string_ex(params[0]);
}
SEPARATE_ZVAL(params);
convert_to_array_ex(params);
- if (Z_TYPE_PP(func) != IS_STRING && Z_TYPE_PP(func) != IS_ARRAY) {
+ if (Z_TYPE_PP(func) != IS_STRING &&
+ Z_TYPE_PP(func) != IS_UNICODE &&
+ Z_TYPE_PP(func) != IS_ARRAY) {
SEPARATE_ZVAL(func);
convert_to_string_ex(func);
}
efree(params);
RETURN_FALSE;
}
- if (Z_TYPE_PP(params[1]) != IS_OBJECT && Z_TYPE_PP(params[1]) != IS_STRING) {
+ if (Z_TYPE_PP(params[1]) != IS_OBJECT &&
+ Z_TYPE_PP(params[1]) != IS_STRING &&
+ Z_TYPE_PP(params[1]) != IS_UNICODE) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument is not an object or class name");
efree(params);
RETURN_FALSE;
WRONG_PARAM_COUNT;
}
- if (Z_TYPE_PP(obj) != IS_OBJECT && Z_TYPE_PP(obj) != IS_STRING) {
+ if (Z_TYPE_PP(obj) != IS_OBJECT &&
+ Z_TYPE_PP(obj) != IS_STRING &&
+ Z_TYPE_PP(obj) != IS_UNICODE) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument is not an object or class name");
RETURN_FALSE;
}
&& zend_hash_index_find(Z_ARRVAL_P(function), 1, (void **) &method) == SUCCESS
&& Z_TYPE_PP(obj) == IS_OBJECT
&& Z_TYPE_PP(method) == IS_STRING ) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call %s::%s() - function does not exist", Z_OBJCE_PP(obj)->name, Z_STRVAL_PP(method));
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call %v::%R() - function does not exist", Z_OBJCE_PP(obj)->name, Z_TYPE_PP(method), Z_UNIVAL_PP(method));
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call tick function");
}
return 0;
}
- if (hash_key->nKeyLength == 0 || hash_key->arKey[0] != 0) {
+ if (hash_key->nKeyLength == 0 ||
+ hash_key->type != IS_STRING ||
+ hash_key->u.string[0] != 0) {
MAKE_STD_ZVAL(option);
array_init(option);
new_key = (char *) emalloc(new_key_len);
memcpy(new_key, prefix, prefix_len);
- memcpy(new_key+prefix_len, hash_key->arKey, hash_key->nKeyLength);
+ memcpy(new_key+prefix_len, hash_key->u.string, hash_key->nKeyLength);
zend_delete_global_variable(new_key, new_key_len-1 TSRMLS_CC);
ZEND_SET_SYMBOL_WITH_LENGTH(&EG(symbol_table), new_key, new_key_len, *var, (*var)->refcount+1, 0);
CREDIT_LINE("Streams Abstraction Layer", "Wez Furlong, Sara Golemon");
CREDIT_LINE("PHP Data Objects Layer", "Wez Furlong, Marcus Boerger, Sterling Hughes, George Schlossnagle");
php_info_print_table_end();
+
+ /* Unicode support */
+ php_info_print_table_start();
+ php_info_print_table_colspan_header(2, "Unicode Support");
+ php_info_print_table_header(2, "Contribution", "Authors");
+ CREDIT_LINE("Design and Architecture", "Andrei Zmievski");
+ CREDIT_LINE("Zend Engine implementation", "Andrei Zmievski, Dmitry Stogov");
+ php_info_print_table_end();
}
if (flag & PHP_CREDITS_SAPI) {
REGISTER_LONG_CONSTANT("FILE_SKIP_EMPTY_LINES", PHP_FILE_SKIP_EMPTY_LINES, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FILE_APPEND", PHP_FILE_APPEND, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FILE_NO_DEFAULT_CONTEXT", PHP_FILE_NO_DEFAULT_CONTEXT, CONST_CS | CONST_PERSISTENT);
-
+ REGISTER_LONG_CONSTANT("FILE_TEXT", PHP_FILE_TEXT, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("FILE_BINARY", PHP_FILE_BINARY, CONST_CS | CONST_PERSISTENT);
+
#ifdef HAVE_FNMATCH
REGISTER_LONG_CONSTANT("FNM_NOESCAPE", FNM_NOESCAPE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FNM_PATHNAME", FNM_PATHNAME, CONST_CS | CONST_PERSISTENT);
/* {{{ proto string file_get_contents(string filename [, bool use_include_path [, resource context [, long offset [, long maxlen]]]])
Read the entire file into a string */
+/* UTODO: Accept unicode contents -- Maybe? Perhaps a binary fetch leaving the script to icu_ucnv_toUnicode() on its own is best? */
PHP_FUNCTION(file_get_contents)
{
char *filename;
long flags = 0;
zval *zcontext = NULL;
php_stream_context *context = NULL;
+ char mode[3] = { 'w', 0, 0 };
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz/|lr!", &filename, &filename_len,
&data, &flags, &zcontext) == FAILURE) {
context = php_stream_context_from_zval(zcontext, flags & PHP_FILE_NO_DEFAULT_CONTEXT);
- stream = php_stream_open_wrapper_ex(filename, (flags & PHP_FILE_APPEND) ? "ab" : "wb",
+ if (flags & PHP_FILE_APPEND) {
+ mode[0] = 'a';
+ }
+ if (flags & PHP_FILE_BINARY) {
+ mode[1] = 'b';
+ } else if (flags & PHP_FILE_TEXT) {
+ mode[1] = 't';
+ }
+ stream = php_stream_open_wrapper_ex(filename, mode,
((flags & PHP_FILE_USE_INCLUDE_PATH) ? USE_PATH : 0) | ENFORCE_SAFE_MODE | REPORT_ERRORS, NULL, context);
if (stream == NULL) {
RETURN_FALSE;
break;
}
- case IS_NULL:
- case IS_LONG:
- case IS_DOUBLE:
- case IS_BOOL:
- case IS_CONSTANT:
- convert_to_string_ex(&data);
-
- case IS_STRING:
- if (Z_STRLEN_P(data)) {
- numbytes = php_stream_write(stream, Z_STRVAL_P(data), Z_STRLEN_P(data));
- if (numbytes != Z_STRLEN_P(data)) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", numbytes, Z_STRLEN_P(data));
- numbytes = -1;
- }
- }
- break;
-
case IS_ARRAY:
if (zend_hash_num_elements(Z_ARRVAL_P(data))) {
int bytes_written;
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(data), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_P(data), (void **) &tmp, &pos) == SUCCESS) {
- if ((*tmp)->type != IS_STRING) {
- SEPARATE_ZVAL(tmp);
- convert_to_string(*tmp);
- }
- if (Z_STRLEN_PP(tmp)) {
- numbytes += Z_STRLEN_PP(tmp);
- bytes_written = php_stream_write(stream, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp));
- if (bytes_written < 0 || bytes_written != Z_STRLEN_PP(tmp)) {
- if (bytes_written < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d bytes to %s", Z_STRLEN_PP(tmp), filename);
- } else {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", bytes_written, Z_STRLEN_PP(tmp));
- }
+ if (Z_TYPE_PP(tmp) == IS_UNICODE) {
+ int wrote_bytes = php_stream_u_write(stream, Z_USTRVAL_PP(tmp), Z_USTRLEN_PP(tmp));
+ if (wrote_bytes < 0) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d characters to %s", Z_USTRLEN_PP(tmp), filename);
+ numbytes = -1;
+ break;
+ } else if (wrote_bytes != UBYTES(Z_USTRLEN_PP(tmp))) {
+ int32_t ustrlen = u_countChar32(Z_USTRVAL_PP(tmp), Z_USTRLEN_PP(tmp));
+ int32_t numchars = u_countChar32(Z_USTRVAL_PP(tmp), wrote_bytes / UBYTES(1));
+
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d characters written, possibly out of free disk space", numchars, ustrlen);
numbytes = -1;
break;
}
+ numbytes += wrote_bytes;
+ } else { /* non-unicode */
+ int free_val = 0;
+ zval strval = **tmp;
+
+ if (Z_TYPE(strval) != IS_STRING) {
+ zval_copy_ctor(&strval);
+ convert_to_string(&strval);
+ free_val = 1;
+ }
+ if (Z_STRLEN(strval)) {
+ numbytes += Z_STRLEN(strval);
+ bytes_written = php_stream_write(stream, Z_STRVAL(strval), Z_STRLEN(strval));
+ if (bytes_written < 0 || bytes_written != Z_STRLEN(strval)) {
+ if (bytes_written < 0) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d bytes to %s", Z_STRLEN(strval), filename);
+ } else {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", bytes_written, Z_STRLEN(strval));
+ }
+ numbytes = -1;
+ break;
+ }
+ }
+ if (free_val) {
+ zval_dtor(&strval);
+ }
}
zend_hash_move_forward_ex(Z_ARRVAL_P(data), &pos);
}
}
break;
+ case IS_OBJECT:
+ /* TODO */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "2nd parameter must be non-object (for now)");
+ numbytes = -1;
+ break;
+ case IS_UNICODE:
+ if (Z_USTRLEN_P(data)) {
+ numbytes = php_stream_u_write(stream, Z_USTRVAL_P(data), Z_USTRLEN_P(data));
+ if (numbytes < 0) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d characters to %s", Z_USTRLEN_P(data), filename);
+ numbytes = -1;
+ } else if (numbytes != UBYTES(Z_USTRLEN_P(data))) {
+ int32_t ustrlen = u_countChar32(Z_USTRVAL_P(data), Z_USTRLEN_P(data));
+ int32_t numchars = u_countChar32(Z_USTRVAL_P(data), numbytes / UBYTES(1));
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d characters written, possibly out of free disk space", numchars, ustrlen);
+ numbytes = -1;
+ }
+ }
+ break;
+ case IS_NULL:
+ case IS_LONG:
+ case IS_DOUBLE:
+ case IS_BOOL:
+ case IS_CONSTANT:
+ case IS_STRING:
default:
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "The 2nd parameter should be either a string or an array");
- numbytes = -1;
+ if (Z_TYPE_P(data) != IS_STRING) {
+ convert_to_string_ex(&data);
+ }
+ if (Z_STRLEN_P(data)) {
+ numbytes = php_stream_write(stream, Z_STRVAL_P(data), Z_STRLEN_P(data));
+ if (numbytes != Z_STRLEN_P(data)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", numbytes, Z_STRLEN_P(data));
+ numbytes = -1;
+ }
+ }
break;
}
#define PHP_FILE_BUF_SIZE 80
+/* UTODO: Accept unicode contents */
PHP_FUNCTION(file)
{
char *filename;
context = php_stream_context_from_zval(zcontext, 0);
stream = php_stream_open_wrapper_ex(filename, mode, (use_include_path ? USE_PATH : 0) | ENFORCE_SAFE_MODE | REPORT_ERRORS, NULL, context);
-
if (stream == NULL) {
RETURN_FALSE;
}
Get a line from file pointer */
PHPAPI PHP_FUNCTION(fgets)
{
- zval **arg1, **arg2;
- int len = 1024;
- char *buf = NULL;
- int argc = ZEND_NUM_ARGS();
- size_t line_len = 0;
php_stream *stream;
+ zval *zstream;
+ int argc = ZEND_NUM_ARGS();
+ long length = -1;
+ UChar *buf = NULL;
+ int32_t num_chars = -1, num_bytes = -1;
+ int is_unicode;
- if (argc<1 || argc>2 || zend_get_parameters_ex(argc, &arg1, &arg2) == FAILURE) {
- WRONG_PARAM_COUNT;
+ if (zend_parse_parameters(argc TSRMLS_CC, "r|l", &zstream, &length) == FAILURE) {
+ RETURN_NULL();
}
- PHP_STREAM_TO_ZVAL(stream, arg1);
+ php_stream_from_zval(stream, &zstream);
- if (argc == 1) {
- /* ask streams to give us a buffer of an appropriate size */
- buf = php_stream_get_line(stream, NULL, 0, &line_len);
- if (buf == NULL) {
- goto exit_failed;
- }
- } else if (argc > 1) {
- convert_to_long_ex(arg2);
- len = Z_LVAL_PP(arg2);
-
- if (len <= 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Length parameter must be greater than 0");
- RETURN_FALSE;
- }
+ if (length > 0) {
+ /* Don't try to short circuit this by just using num_chars in parse_parameters, long doesn't always mean 32-bit */
+ num_chars = length;
+ }
- buf = ecalloc(len + 1, sizeof(char));
- if (php_stream_get_line(stream, buf, len, &line_len) == NULL) {
- goto exit_failed;
- }
+ if ((buf = php_stream_u_get_line(stream, NULL, &num_bytes, &num_chars, &is_unicode)) == NULL) {
+ RETURN_FALSE;
}
-
- if (PG(magic_quotes_runtime)) {
- Z_STRVAL_P(return_value) = php_addslashes(buf, line_len, &Z_STRLEN_P(return_value), 1 TSRMLS_CC);
- Z_TYPE_P(return_value) = IS_STRING;
+
+ if (is_unicode) {
+ /* UTODO: magic_quotes_runtime */
+ RETURN_UNICODEL(buf, num_chars, 0);
} else {
- ZVAL_STRINGL(return_value, buf, line_len, 0);
- /* resize buffer if it's much larger than the result.
- * Only needed if the user requested a buffer size. */
- if (argc > 1 && Z_STRLEN_P(return_value) < len / 2) {
- Z_STRVAL_P(return_value) = erealloc(buf, line_len + 1);
- }
- }
- return;
+ if (PG(magic_quotes_runtime)) {
+ int len;
+ char *str;
-exit_failed:
- RETVAL_FALSE;
- if (buf) {
- efree(buf);
+ str = php_addslashes((char*)buf, num_bytes, &len, 1 TSRMLS_CC);
+ RETURN_STRINGL(str, len, 0);
+ } else {
+ RETURN_STRINGL((char*)buf, num_bytes, 0);
+ }
}
}
/* }}} */
PHPAPI PHP_FUNCTION(fgetc)
{
zval **arg1;
- char buf[2];
- int result;
+ char buf[2 * sizeof(UChar)];
+ int is_unicode;
php_stream *stream;
+ int32_t num_bytes = UBYTES(2), num_chars = 1;
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg1) == FAILURE) {
WRONG_PARAM_COUNT;
PHP_STREAM_TO_ZVAL(stream, arg1);
- result = php_stream_getc(stream);
-
- if (result == EOF) {
+ if (!php_stream_u_read(stream, buf, &num_bytes, &num_chars, &is_unicode)) {
RETVAL_FALSE;
} else {
- buf[0] = result;
- buf[1] = '\0';
-
- RETURN_STRINGL(buf, 1, 1);
+ if (is_unicode) {
+ UChar *ubuf = buf;
+ int32_t num_u16 = num_bytes >> 1;
+ ubuf[num_u16] = 0;
+ RETURN_UNICODEL(ubuf, num_u16, 0);
+ } else {
+ buf[1] = 0;
+ RETURN_STRINGL(buf, 1, 0);
+ }
}
}
/* }}} */
/* {{{ proto string fgetss(resource fp [, int length, string allowable_tags])
Get a line from file pointer and strip HTML tags */
+/* UTODO: Accept unicode contents */
PHPAPI PHP_FUNCTION(fgetss)
{
zval **fd, **bytes = NULL, **allow=NULL;
/* {{{ proto mixed fscanf(resource stream, string format [, string ...])
Implements a mostly ANSI compatible fscanf() */
+/* UTODO: Accept unicode contents */
PHP_FUNCTION(fscanf)
{
int result;
Binary-safe file write */
PHPAPI PHP_FUNCTION(fwrite)
{
- zval **arg1, **arg2, **arg3=NULL;
- int ret;
- int num_bytes;
- char *buffer = NULL;
+ int ret, argc = ZEND_NUM_ARGS();
+ long write_len = -1;
php_stream *stream;
+ zval *zstream, *zstring;
- switch (ZEND_NUM_ARGS()) {
- case 2:
- if (zend_get_parameters_ex(2, &arg1, &arg2)==FAILURE) {
- RETURN_FALSE;
- }
- convert_to_string_ex(arg2);
- num_bytes = Z_STRLEN_PP(arg2);
- break;
-
- case 3:
- if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3)==FAILURE) {
- RETURN_FALSE;
- }
- convert_to_string_ex(arg2);
- convert_to_long_ex(arg3);
- num_bytes = MAX(0, MIN(Z_LVAL_PP(arg3), Z_STRLEN_PP(arg2)));
- break;
-
- default:
- WRONG_PARAM_COUNT;
- /* NOTREACHED */
- break;
+ if (zend_parse_parameters(argc TSRMLS_CC, "rz|l", &zstream, &zstring, &write_len) == FAILURE) {
+ RETURN_NULL();
}
- if (!num_bytes) {
+ if (!write_len) {
RETURN_LONG(0);
}
- PHP_STREAM_TO_ZVAL(stream, arg1);
+ php_stream_from_zval(stream, &zstream);
- if (PG(magic_quotes_runtime)) {
- buffer = estrndup(Z_STRVAL_PP(arg2), num_bytes);
- php_stripslashes(buffer, &num_bytes TSRMLS_CC);
- }
+ if (Z_TYPE_P(zstring) == IS_UNICODE) {
+ if (write_len >= 0) {
+ /* Convert code units to data points */
+ int32_t write_uchars = 0;
- ret = php_stream_write(stream, buffer ? buffer : Z_STRVAL_PP(arg2), num_bytes);
- if (buffer) {
- efree(buffer);
+ U16_FWD_N(Z_USTRVAL_P(zstring), write_uchars, Z_USTRLEN_P(zstring), write_len);
+ write_len = write_uchars;
+ }
+
+ if (write_len < 0 || write_len > Z_USTRLEN_P(zstring)) {
+ write_len = Z_USTRLEN_P(zstring);
+ }
+
+ /* UTODO Handle magic_quotes_runtime for unicode strings */
+
+ ret = php_stream_u_write(stream, Z_USTRVAL_P(zstring), write_len);
+
+ /* Convert data points back to code units */
+ if (ret > 0) {
+ ret = u_countChar32(Z_USTRVAL_P(zstring), ret);
+ }
+ } else {
+ char *buffer = NULL;
+ int num_bytes;
+
+ convert_to_string(zstring);
+ if (write_len < 0 || write_len > Z_STRLEN_P(zstring)) {
+ write_len = Z_STRLEN_P(zstring);
+ }
+
+ num_bytes = write_len;
+ if (argc < 3 && PG(magic_quotes_runtime)) {
+ buffer = estrndup(Z_STRVAL_P(zstring), num_bytes);
+ php_stripslashes(buffer, &num_bytes TSRMLS_CC);
+ }
+ ret = php_stream_write(stream, buffer ? buffer : Z_STRVAL_P(zstring), num_bytes);
+ if (buffer) {
+ efree(buffer);
+ }
}
RETURN_LONG(ret);
/* {{{ proto int readfile(string filename [, bool use_include_path[, resource context]])
Output a file or a URL */
+/* UTODO: Accept unicode contents */
PHP_FUNCTION(readfile)
{
char *filename;
/* {{{ proto int fpassthru(resource fp)
Output all remaining data from a file pointer */
+/* UTODO: Accept unicode contents */
PHPAPI PHP_FUNCTION(fpassthru)
{
zval **arg1;
Binary-safe file read */
PHPAPI PHP_FUNCTION(fread)
{
- zval **arg1, **arg2;
- int len;
+ zval *zstream;
+ char *buf;
+ long len;
php_stream *stream;
-
- if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
- WRONG_PARAM_COUNT;
+ int is_unicode;
+ int32_t num_bytes, num_chars;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rl", &zstream, &len) == FAILURE) {
+ RETURN_NULL();
}
- PHP_STREAM_TO_ZVAL(stream, arg1);
+ php_stream_from_zval(stream, &zstream);
- convert_to_long_ex(arg2);
- len = Z_LVAL_PP(arg2);
if (len <= 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Length parameter must be greater than 0");
RETURN_FALSE;
}
- Z_STRVAL_P(return_value) = emalloc(len + 1);
- Z_STRLEN_P(return_value) = php_stream_read(stream, Z_STRVAL_P(return_value), len);
+ num_chars = len;
+ num_bytes = UBYTES(len);
+ buf = emalloc(num_bytes + UBYTES(1));
- /* needed because recv/read/gzread doesnt put a null at the end*/
- Z_STRVAL_P(return_value)[Z_STRLEN_P(return_value)] = 0;
+ if (!php_stream_u_read(stream, buf, &num_bytes, &num_chars, &is_unicode)) {
+ efree(buf);
+ RETURN_FALSE;
+ }
+
+ if (is_unicode) {
+ /* UTODO - magic_quotes_runtime */
- if (PG(magic_quotes_runtime)) {
- Z_STRVAL_P(return_value) = php_addslashes(Z_STRVAL_P(return_value),
- Z_STRLEN_P(return_value), &Z_STRLEN_P(return_value), 1 TSRMLS_CC);
+ buf[num_bytes] = 0;
+ buf[num_bytes + 1] = 0;
+ RETURN_UNICODEL(buf, num_bytes >> 1, 0);
+ } else {
+ buf[num_bytes] = 0;
+ if (PG(magic_quotes_runtime)) {
+ buf = php_addslashes(buf, num_bytes, &num_bytes, 1 TSRMLS_CC);
+ }
+ RETURN_STRINGL(buf, num_bytes, 0);
}
- Z_TYPE_P(return_value) = IS_STRING;
}
/* }}} */
/* {{{ proto int fputcsv(resource fp, array fields [, string delimiter [, string enclosure]])
Format line as CSV and write to file pointer */
+/* UTODO: Output unicode contents */
PHP_FUNCTION(fputcsv)
{
char delimiter = ','; /* allow this to be set as parameter */
/* {{{ proto array fgetcsv(resource fp [,int length [, string delimiter [, string enclosure]]])
Get line from file pointer and parse for CSV fields */
+/* UTODO: Accept unicode contents */
PHP_FUNCTION(fgetcsv)
{
char *temp, *tptr, *bptr, *line_end, *limit;
#define PHP_FILE_APPEND 8
#define PHP_FILE_NO_DEFAULT_CONTEXT 16
+/* Specified as explicit values so that file_put_contents() can override context param default mode */
+#define PHP_FILE_TEXT 32
+#define PHP_FILE_BINARY 64
+
typedef enum _php_meta_tags_token {
TOK_EOF = 0,
TOK_OPENTAG,
while (buckets_in->head) {
bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
-
- php_strtr(bucket->buf, bucket->buflen, rot13_from, rot13_to, 52);
- consumed += bucket->buflen;
-
+
+ if (bucket->is_unicode) {
+ /* rot13 is silly enough, don't apply it to unicode data */
+ return PSFS_ERR_FATAL;
+ }
+ php_strtr(bucket->buf.str.val, bucket->buf.str.len, rot13_from, rot13_to, 52);
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ consumed += bucket->buf.str.len;
}
if (bytes_consumed) {
static php_stream_filter_ops strfilter_rot13_ops = {
strfilter_rot13_filter,
NULL,
- "string.rot13"
+ "string.rot13",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
static php_stream_filter *strfilter_rot13_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
size_t consumed = 0;
while (buckets_in->head) {
- bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
-
- php_strtr(bucket->buf, bucket->buflen, lowercase, uppercase, 26);
- consumed += bucket->buflen;
-
+ bucket = buckets_in->head;
+ if (bucket->is_unicode) {
+ UErrorCode errCode = U_ZERO_ERROR;
+ int32_t outbuflen = bucket->buf.ustr.len;
+ int is_persistent = php_stream_is_persistent(stream);
+ UChar *outbuf = peumalloc(outbuflen + 1, is_persistent);
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ while (1) {
+ if (!outbuf) {
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ return PSFS_ERR_FATAL;
+ }
+ u_strToUpper(outbuf, outbuflen, bucket->buf.ustr.val, bucket->buf.ustr.len, NULL, &errCode);
+ if (errCode != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+ outbuflen += 4;
+ outbuf = peurealloc(outbuf, outbuflen + 1, is_persistent);
+ consumed += UBYTES(bucket->buf.ustr.len);
+ }
+ if (U_FAILURE(errCode)) {
+ pefree(outbuf, is_persistent);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ return PSFS_ERR_FATAL;
+ }
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+
+ outbuf[outbuflen] = 0;
+ bucket = php_stream_bucket_new_unicode(stream, outbuf, outbuflen, 1, is_persistent TSRMLS_CC);
+ } else {
+ bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
+ php_strtr(bucket->buf.str.val, bucket->buf.str.len, lowercase, uppercase, 26);
+ consumed += bucket->buf.str.len;
+ }
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
}
if (bytes_consumed) {
*bytes_consumed = consumed;
}
-
+
return PSFS_PASS_ON;
}
size_t consumed = 0;
while (buckets_in->head) {
- bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
-
- php_strtr(bucket->buf, bucket->buflen, uppercase, lowercase, 26);
- consumed += bucket->buflen;
-
+ bucket = buckets_in->head;
+ if (bucket->is_unicode) {
+ UErrorCode errCode = U_ZERO_ERROR;
+ int32_t outbuflen = bucket->buf.ustr.len;
+ int is_persistent = php_stream_is_persistent(stream);
+ UChar *outbuf = peumalloc(outbuflen + 1, is_persistent);
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ while (1) {
+ if (!outbuf) {
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ return PSFS_ERR_FATAL;
+ }
+ u_strToLower(outbuf, outbuflen, bucket->buf.ustr.val, bucket->buf.ustr.len, NULL, &errCode);
+ if (errCode != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+ outbuflen += 4;
+ outbuf = peurealloc(outbuf, outbuflen + 1, is_persistent);
+ consumed += UBYTES(bucket->buf.ustr.len);
+ }
+ if (U_FAILURE(errCode)) {
+ pefree(outbuf, is_persistent);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ return PSFS_ERR_FATAL;
+ }
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+
+ outbuf[outbuflen] = 0;
+ bucket = php_stream_bucket_new_unicode(stream, outbuf, outbuflen, 1, is_persistent TSRMLS_CC);
+ } else {
+ bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
+ php_strtr(bucket->buf.str.val, bucket->buf.str.len, uppercase, lowercase, 26);
+ consumed += bucket->buf.str.len;
+ }
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
}
if (bytes_consumed) {
*bytes_consumed = consumed;
}
-
+
return PSFS_PASS_ON;
}
static php_stream_filter_ops strfilter_toupper_ops = {
strfilter_toupper_filter,
NULL,
- "string.toupper"
+ "string.toupper",
+ PSFO_FLAG_OUTPUTS_SAME
};
static php_stream_filter_ops strfilter_tolower_ops = {
strfilter_tolower_filter,
NULL,
- "string.tolower"
+ "string.tolower",
+ PSFO_FLAG_OUTPUTS_SAME
};
static php_stream_filter *strfilter_toupper_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
};
/* }}} */
+/* UTODO: Extend to handle unicode data */
+
/* {{{ strip_tags filter implementation */
typedef struct _php_strip_tags_filter {
const char *allowed_tags;
php_strip_tags_filter *inst = (php_strip_tags_filter *) thisfilter->abstract;
while (buckets_in->head) {
+ if (bucket->is_unicode) {
+ /* Uh oh! */
+ return PSFS_ERR_FATAL;
+ }
+
bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
- consumed = bucket->buflen;
+ consumed = bucket->buf.str.len;
- bucket->buflen = php_strip_tags(bucket->buf, bucket->buflen, &(inst->state), (char *)inst->allowed_tags, inst->allowed_tags_len);
+ bucket->buf.str.len = php_strip_tags(bucket->buf.str.val, bucket->buf.str.len, &(inst->state), (char *)inst->allowed_tags, inst->allowed_tags_len);
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
}
static php_stream_filter_ops strfilter_strip_tags_ops = {
strfilter_strip_tags_filter,
strfilter_strip_tags_dtor,
- "string.strip_tags"
+ "string.strip_tags",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
static php_stream_filter *strfilter_strip_tags_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
while (buckets_in->head != NULL) {
bucket = buckets_in->head;
+ if (bucket->is_unicode) {
+ /* Not a unicode capable filter */
+ return PSFS_ERR_FATAL;
+ }
php_stream_bucket_unlink(bucket TSRMLS_CC);
if (strfilter_convert_append_bucket(inst, stream, thisfilter,
- buckets_out, bucket->buf, bucket->buflen, &consumed,
+ buckets_out, bucket->buf.str.val, bucket->buf.str.len, &consumed,
php_stream_is_persistent(stream) TSRMLS_CC) != SUCCESS) {
goto out_failure;
}
static php_stream_filter_ops strfilter_convert_ops = {
strfilter_convert_filter,
strfilter_convert_dtor,
- "convert.*"
+ "convert.*",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
static php_stream_filter *strfilter_convert_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
ZEND_EXTERN_MODULE_GLOBALS(iconv)
#endif
+#include <unicode/uversion.h>
+
#define SECTION(name) if (!sapi_module.phpinfo_as_text) { \
PUTS("<h2>" name "</h2>\n"); \
} else { \
php_info_print_table_row(2, "Zend Memory Manager", "disabled" );
#endif
+ {
+ char buf[1024];
+ snprintf(buf, sizeof(buf), "Based on%s. ICU Version %s.", U_COPYRIGHT_STRING, U_ICU_VERSION);
+ php_info_print_table_row(2, "Unicode Support", buf);
+ }
#if HAVE_IPV6
php_info_print_table_row(2, "IPv6 Support", "enabled" );
#else
PHPAPI char *php_strtoupper(char *s, size_t len);
PHPAPI char *php_strtolower(char *s, size_t len);
+PHPAPI UChar *php_u_strtoupper(UChar **s, int32_t *len, const char *locale);
+PHPAPI UChar *php_u_strtolower(UChar **s, int32_t *len, const char *locale);
PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trlen);
PHPAPI char *php_addslashes(char *str, int length, int *new_length, int freeit TSRMLS_DC);
PHPAPI char *php_addslashes_ex(char *str, int length, int *new_length, int freeit, int ignore_sybase TSRMLS_DC);
PHPAPI int php_char_to_str_ex(char *str, uint len, char from, char *to, int to_len, pval *result, int case_sensitivity, int *replace_count);
PHPAPI int php_char_to_str(char *str, uint len, char from, char *to, int to_len, pval *result);
PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value);
-PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, int limit);
+PHPAPI void php_explode(char *delim, uint delim_len, char *str, uint str_len, zend_uchar str_type, zval *return_value, int limit);
PHPAPI size_t php_strspn(char *s1, char *s2, char *s1_end, char *s2_end);
PHPAPI size_t php_strcspn(char *s1, char *s2, char *s1_end, char *s2_end);
#include "ext/standard/php_smart_str_public.h"
PHP_FUNCTION(var_dump);
+PHP_FUNCTION(var_inspect);
PHP_FUNCTION(var_export);
PHP_FUNCTION(debug_zval_dump);
PHP_FUNCTION(serialize);
PHP_FUNCTION(memory_get_usage);
#endif
-PHPAPI void php_var_dump(zval **struc, int level TSRMLS_DC);
+PHPAPI void php_var_dump(zval **struc, int level, int verbose TSRMLS_DC);
PHPAPI void php_var_export(zval **struc, int level TSRMLS_DC);
-PHPAPI void php_debug_zval_dump(zval **struc, int level TSRMLS_DC);
+PHPAPI void php_debug_zval_dump(zval **struc, int level, int verbose TSRMLS_DC);
/* typdef HashTable php_serialize_data_t; */
#define php_serialize_data_t HashTable
add_assoc_string(return_value, "mode", stream->mode, 1);
-#if 0 /* TODO: needs updating for new filter API */
- if (stream->filterhead) {
+ if (stream->readfilters.head) {
php_stream_filter *filter;
MAKE_STD_ZVAL(newval);
array_init(newval);
- for (filter = stream->filterhead; filter != NULL; filter = filter->next) {
+ for (filter = stream->readfilters.head; filter != NULL; filter = filter->next) {
add_next_index_string(newval, (char *)filter->fops->label, 1);
}
- add_assoc_zval(return_value, "filters", newval);
+ add_assoc_zval(return_value, "read_filters", newval);
+ }
+
+ if (stream->writefilters.head) {
+ php_stream_filter *filter;
+
+ MAKE_STD_ZVAL(newval);
+ array_init(newval);
+
+ for (filter = stream->writefilters.head; filter != NULL; filter = filter->next) {
+ add_next_index_string(newval, (char *)filter->fops->label, 1);
+ }
+
+ add_assoc_zval(return_value, "write_filters", newval);
}
-#endif
- add_assoc_long(return_value, "unread_bytes", stream->writepos - stream->readpos);
+ add_assoc_long(return_value, "unread_bytes", stream->readbuf_avail);
add_assoc_bool(return_value, "seekable", (stream->ops->seek) && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0);
if (stream->orig_path) {
if (stream == NULL) {
continue;
}
- if ((stream->writepos - stream->readpos) > 0) {
+ if ((stream->readbuf_avail) > 0) {
/* allow readable non-descriptor based streams to participate in stream_select.
* Non-descriptor streams will only "work" if they have previously buffered the
* data. Not ideal, but better than nothing.
}
}
-static int parse_context_options(php_stream_context *context, zval *options)
+static int parse_context_options(php_stream_context *context, zval *options TSRMLS_DC)
{
HashPosition pos, opos;
zval **wval, **oval;
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(options), &pos);
while (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_P(options), (void**)&wval, &pos)) {
- if (HASH_KEY_IS_STRING == zend_hash_get_current_key_ex(Z_ARRVAL_P(options), &wkey, &wkey_len, &num_key, 0, &pos)
+ int wtype = zend_hash_get_current_key_ex(Z_ARRVAL_P(options), &wkey, &wkey_len, &num_key, 0, &pos);
+ if (((HASH_KEY_IS_STRING == wtype) || (HASH_KEY_IS_UNICODE == wtype))
&& Z_TYPE_PP(wval) == IS_ARRAY) {
+ if (HASH_KEY_IS_UNICODE == wtype) {
+ /* fold to string */
+ UErrorCode errCode = 0;
+
+ zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &wkey, &wkey_len, (UChar*)wkey, wkey_len, &errCode);
+ }
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(wval), &opos);
while (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(wval), (void**)&oval, &opos)) {
-
- if (HASH_KEY_IS_STRING == zend_hash_get_current_key_ex(Z_ARRVAL_PP(wval), &okey, &okey_len, &num_key, 0, &opos)) {
+ int otype = zend_hash_get_current_key_ex(Z_ARRVAL_PP(wval), &okey, &okey_len, &num_key, 0, &opos);
+ if (HASH_KEY_IS_UNICODE == otype) {
+ /* fold to string */
+ UErrorCode errCode = 0;
+
+ zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &okey, &okey_len, (UChar*)okey, okey_len, &errCode);
+ php_stream_context_set_option(context, wkey, okey, *oval);
+ efree(okey);
+ }
+ if (HASH_KEY_IS_STRING == otype) {
php_stream_context_set_option(context, wkey, okey, *oval);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(wval), &opos);
}
-
+ if (wtype == HASH_KEY_IS_UNICODE) {
+ efree(wkey);
+ }
} else {
- zend_error(E_WARNING, "options should have the form [\"wrappername\"][\"optionname\"] = $value");
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "options should have the form [\"wrappername\"][\"optionname\"] = $value");
}
zend_hash_move_forward_ex(Z_ARRVAL_P(options), &pos);
}
return ret;
}
-static int parse_context_params(php_stream_context *context, zval *params)
+static int parse_context_params(php_stream_context *context, zval *params TSRMLS_DC)
{
int ret = SUCCESS;
zval **tmp;
-
- if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "notification", sizeof("notification"), (void**)&tmp)) {
+ U_STRING_DECL(u_notification, "notification", 12);
+ U_STRING_DECL(u_options, "options", 7);
+ U_STRING_DECL(u_input_encoding, "input_encoding", 14);
+ U_STRING_DECL(u_output_encoding, "output_encoding", 15);
+ U_STRING_DECL(u_default_mode, "default_mode", 12);
+
+ U_STRING_INIT(u_notification, "notification", 12);
+ U_STRING_INIT(u_options, "options", 7);
+ U_STRING_INIT(u_input_encoding, "input_encoding", 14);
+ U_STRING_INIT(u_output_encoding, "output_encoding", 15);
+ U_STRING_INIT(u_default_mode, "default_mode", 12);
+
+ if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "notification", sizeof("notification"), (void**)&tmp) ||
+ SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_notification, sizeof("notification"), (void**)&tmp)) {
if (context->notifier) {
php_stream_notification_free(context->notifier);
ZVAL_ADDREF(*tmp);
context->notifier->dtor = user_space_stream_notifier_dtor;
}
- if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "options", sizeof("options"), (void**)&tmp)) {
- parse_context_options(context, *tmp);
+ if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "options", sizeof("options"), (void**)&tmp) ||
+ SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_options, sizeof("options"), (void**)&tmp)) {
+ parse_context_options(context, *tmp TSRMLS_CC);
+ }
+ if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "input_encoding", sizeof("input_encoding"), (void**)&tmp) ||
+ SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_input_encoding, sizeof("input_encoding"), (void**)&tmp)) {
+ zval strval = **tmp;
+
+ if (context->input_encoding) {
+ efree(context->input_encoding);
+ }
+
+ zval_copy_ctor(&strval);
+ convert_to_string(&strval);
+ context->input_encoding = Z_STRVAL(strval);
+ }
+ if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "output_encoding", sizeof("output_encoding"), (void**)&tmp) ||
+ SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_output_encoding, sizeof("output_encoding"), (void**)&tmp)) {
+ zval strval = **tmp;
+
+ if (context->output_encoding) {
+ efree(context->output_encoding);
+ }
+
+ zval_copy_ctor(&strval);
+ convert_to_string(&strval);
+ context->output_encoding = Z_STRVAL(strval);
+ }
+ if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "default_mode", sizeof("default_mode"), (void**)&tmp) ||
+ SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_default_mode, sizeof("default_mode"), (void**)&tmp)) {
+ zval longval = **tmp;
+
+ zval_copy_ctor(&longval);
+ convert_to_long(&longval);
+ context->default_mode = Z_LVAL(longval);
+ zval_dtor(&longval);
}
-
return ret;
}
if (options) {
/* handle the array syntax */
- RETVAL_BOOL(parse_context_options(context, options) == SUCCESS);
+ RETVAL_BOOL(parse_context_options(context, options TSRMLS_CC) == SUCCESS);
} else {
php_stream_context_set_option(context, wrappername, optionname, zvalue);
RETVAL_TRUE;
RETURN_FALSE;
}
- RETVAL_BOOL(parse_context_params(context, params) == SUCCESS);
+ RETVAL_BOOL(parse_context_params(context, params TSRMLS_CC) == SUCCESS);
}
/* }}} */
context = FG(default_context);
if (params) {
- parse_context_options(context, params);
+ parse_context_options(context, params TSRMLS_CC);
}
php_stream_context_to_zval(context, return_value);
context = php_stream_context_alloc();
if (params) {
- parse_context_options(context, params);
+ parse_context_options(context, params TSRMLS_CC);
}
php_stream_context_to_zval(context, return_value);
RETURN_FALSE;
}
- if (append) {
+ if (append) {
php_stream_filter_append(&stream->readfilters, filter);
} else {
php_stream_filter_prepend(&stream->readfilters, filter);
}
+ if (FAILURE == php_stream_filter_check_chain(&stream->readfilters)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Readfilter chain unstable -- unresolvable unicode/string conversion conflict");
+ }
}
if (read_write & PHP_STREAM_FILTER_WRITE) {
} else {
php_stream_filter_prepend(&stream->writefilters, filter);
}
+ if (FAILURE == php_stream_filter_check_chain(&stream->writefilters)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Writefilter chain unstable -- unresolvable unicode/string conversion conflict");
+ }
}
if (filter) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not invalidate filter, not removing");
RETURN_FALSE;
} else {
+ if (FAILURE == php_stream_filter_check_chain(filter->chain)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filterchain unstable -- unresolvable unicode/string conversion conflict");
+ }
+
php_stream_filter_remove(filter, 1 TSRMLS_CC);
RETURN_TRUE;
}
/* {{{ proto string stream_get_line(resource stream, int maxlen [, string ending])
Read up to maxlen bytes from a stream or until the ending string is found */
+/* UTODO */
PHP_FUNCTION(stream_get_line)
{
char *str = NULL;
#include "TSRM.h"
#endif
+#include "unicode/uchar.h"
+
#define STR_PAD_LEFT 0
#define STR_PAD_RIGHT 1
#define STR_PAD_BOTH 2
}
/* }}} */
+/* {{{ php_expand_u_trim_range()
+ * Expands possible ranges of the form 'a..b' in input charlist,
+ * where a < b in code-point order
+ */
+static int php_expand_u_trim_range(UChar **range, int32_t *range_len)
+{
+ UChar32 *codepts, *tmp, *input, *end, c;
+ int32_t len, tmp_len, idx;
+ UErrorCode err;
+ int expanded = 0;
+ int result = SUCCESS;
+
+ /* First, convert UTF-16 to UTF-32 */
+ len = *range_len;
+ codepts = (UChar32 *)emalloc((len+1)*sizeof(UChar32));
+ err = U_ZERO_ERROR;
+ u_strToUTF32((UChar32 *)codepts, len+1, &len, *range, len, &err);
+
+ /* Expand ranges, if any - taken from php_charmask() */
+ tmp_len = len;
+ tmp = (UChar32 *)emalloc((tmp_len+1)*sizeof(UChar32));
+ input = codepts;
+ for ( idx = 0, end = input+len ; input < end ; input++ ) {
+ c = input[0];
+ if ( (input+3 < end) && input[1] == '.' && input[2] == '.' && input[3] >= c ) {
+ tmp_len += (input[3] - c + 1);
+ tmp = (UChar32 *)erealloc(tmp, tmp_len*sizeof(UChar));
+ for ( ; c <= input[3] ; c++ ) {
+ if ( U_IS_UNICODE_CHAR(c) ) tmp[idx++] = c;
+ }
+ input += 3;
+ expanded++;
+ } else if ( (input+1 < end) && input[0] == '.' && input[1] == '.' ) {
+ /* Error, try to be as helpful as possible:
+ (a range ending/starting with '.' won't be captured here) */
+ if ( end-len >= input ) { /* There is no 'left' char */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
+ result = FAILURE;
+ continue;
+ }
+ if ( input+2 >= end ) { /* There is no 'right' char */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
+ result = FAILURE;
+ continue;
+ }
+ if ( input[-1] > input[2] ) { /* Wrong order */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
+ result = FAILURE;
+ continue;
+ }
+ /* FIXME: Better error (a..b..c is the only left possibility?) */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range");
+ result = FAILURE;
+ continue;
+ } else {
+ tmp[idx++] = c;
+ }
+ }
+
+ /* If any ranges were expanded, convert the expanded results back to UTF-16 */
+ if ( expanded > 0 ) {
+ len = tmp_len;
+ *range = (UChar *)erealloc(*range, (len+1)*sizeof(UChar));
+ err = U_ZERO_ERROR;
+ u_strFromUTF32(*range, len+1, &len, tmp, tmp_len, &err);
+ if ( U_FAILURE(err) == U_BUFFER_OVERFLOW_ERROR ) {
+ err = U_ZERO_ERROR;
+ *range = (UChar *)erealloc(*range, (len+1)*sizeof(UChar));
+ u_strFromUTF32(*range, len+1, NULL, tmp, tmp_len, &err);
+ if ( U_FAILURE(err) ) { /* Internal ICU error */
+ result = FAILURE;
+ }
+ }
+ *range_len = len;
+ }
+
+ efree(tmp);
+ efree(codepts);
+
+ return result;
+}
+/* }}} */
+
+/* {{{ php_u_trim()
+ * Unicode capable version of php_trim()
+ */
+static UChar *php_u_trim(UChar *c, int32_t len, UChar *what, int32_t what_len, zval *return_value, int mode TSRMLS_DC)
+{
+ int32_t i,j;
+ UChar ch,wh;
+ int32_t start = 0, end = len;
+
+ if ( what ) {
+ php_expand_u_trim_range(&what, &what_len);
+ }
+
+ if ( mode & 1 ) {
+ for ( i = 0 ; i < end ; ) {
+ U16_NEXT(c, i, end, ch);
+ if ( what ) {
+ for ( j = 0 ; j < what_len ; ) {
+ U16_NEXT(what, j, what_len, wh);
+ if ( wh == ch ) break;
+ }
+ if ( wh != ch ) break;
+ } else {
+ if ( u_isWhitespace(ch) == FALSE ) break;
+ }
+ }
+ if ( i < end ) {
+ U16_BACK_1(c, 0, i); /* U16_NEXT() post-increments 'i' */
+ }
+ start = i;
+ }
+ if ( mode & 2 ) {
+ for ( i = end ; i > start ; ) {
+ U16_PREV(c, 0, i, ch);
+ if ( what ) {
+ for ( j = 0 ; j < what_len ; ) {
+ U16_NEXT(what, j, what_len, wh);
+ if ( wh == ch ) break;
+ }
+ if ( wh != ch ) break;
+ } else {
+ if ( u_isWhitespace(ch) == FALSE ) break;
+ }
+ }
+ end = i;
+ }
+
+ if ( start < len ) {
+ if ( return_value ) {
+ RETVAL_UNICODEL(c+start, end-start+1, 1);
+ } else {
+ return eustrndup(c+start, end-start+1);
+ }
+ } else { /* Trimmed the whole string */
+ if ( return_value ) {
+ RETURN_EMPTY_UNICODE();
+ } else {
+ return (USTR_MAKE(""));
+ }
+ }
+
+}
+/* }}} */
+
/* {{{ php_do_trim
* Base for trim(), rtrim() and ltrim() functions.
*/
static void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
- zval **str;
- zval **what = NULL;
- int argc = ZEND_NUM_ARGS();
-
- if (argc < 1 || argc > 2 || zend_get_parameters_ex(argc, &str, &what) == FAILURE) {
- WRONG_PARAM_COUNT;
+ void *str;
+ int32_t str_len;
+ zend_uchar str_type;
+ void *what;
+ int32_t what_len;
+ zend_uchar what_type;
+ int argc = ZEND_NUM_ARGS();
+
+ if ( zend_parse_parameters(argc TSRMLS_CC, "T|T", &str, &str_len, &str_type,
+ &what, &what_len, &what_type) == FAILURE ) {
+ return;
}
- convert_to_string_ex(str);
-
- if (argc > 1) {
- convert_to_string_ex(what);
- php_trim(Z_STRVAL_PP(str), Z_STRLEN_PP(str), Z_STRVAL_PP(what), Z_STRLEN_PP(what), return_value, mode TSRMLS_CC);
+ if ( argc > 1 ) {
+ if ( str_type == IS_UNICODE ) {
+ php_u_trim(str, str_len, what, what_len, return_value, mode TSRMLS_CC);
+ } else {
+ php_trim(str, str_len, what, what_len, return_value, mode TSRMLS_CC);
+ }
} else {
- php_trim(Z_STRVAL_PP(str), Z_STRLEN_PP(str), NULL, 0, return_value, mode TSRMLS_CC);
+ if ( str_type == IS_UNICODE ) {
+ php_u_trim(str, str_len, NULL, 0, return_value, mode TSRMLS_CC);
+ } else {
+ php_trim(str, str_len, NULL, 0, return_value, mode TSRMLS_CC);
+ }
}
}
/* }}} */
/* {{{ php_explode
*/
-PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, int limit)
+PHPAPI void php_explode(char *delim, uint delim_len, char *str, uint str_len, zend_uchar str_type, zval *return_value, int limit)
{
char *p1, *p2, *endp;
- endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
-
- p1 = Z_STRVAL_P(str);
- p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
+ endp = str + str_len;
+ p1 = str;
+ p2 = php_memnstr(str, delim, delim_len, endp);
- if (p2 == NULL) {
- add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1);
+ if ( p2 == NULL ) {
+ if ( str_type == IS_BINARY ) {
+ add_next_index_binaryl(return_value, p1, str_len, 1);
+ } else {
+ add_next_index_stringl(return_value, p1, str_len, 1);
+ }
} else {
do {
- add_next_index_stringl(return_value, p1, p2 - p1, 1);
- p1 = p2 + Z_STRLEN_P(delim);
- } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL &&
- (limit == -1 || --limit > 1));
+ if ( str_type == IS_BINARY ) {
+ add_next_index_binaryl(return_value, p1, p2-p1, 1);
+ } else {
+ add_next_index_stringl(return_value, p1, p2-p1, 1);
+ }
+ p1 = p2 + delim_len;
+ } while ( (p2 = php_memnstr(p1, delim, delim_len, endp)) != NULL &&
+ (limit == -1 || --limit > 1) );
- if (p1 <= endp)
- add_next_index_stringl(return_value, p1, endp-p1, 1);
+ if ( p1 <= endp ) {
+ if ( str_type == IS_BINARY ) {
+ add_next_index_binaryl(return_value, p1, endp-p1, 1);
+ } else {
+ add_next_index_stringl(return_value, p1, endp-p1, 1);
+ }
+ }
}
}
/* }}} */
/* {{{ php_explode_negative_limit
*/
-PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, int limit)
+PHPAPI void php_explode_negative_limit(char *delim, uint delim_len, char *str, uint str_len, zend_uchar str_type, zval *return_value, int limit)
{
#define EXPLODE_ALLOC_STEP 50
char *p1, *p2, *endp;
int allocated = EXPLODE_ALLOC_STEP, found = 0, i = 0, to_return = 0;
char **positions = safe_emalloc(allocated, sizeof(char *), 0);
-
- endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
- p1 = Z_STRVAL_P(str);
- p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
+ endp = str + str_len;
+ p1 = str;
+ p2 = php_memnstr(str, delim, delim_len, endp);
- if (p2 == NULL) {
+ if ( p2 == NULL ) {
/*
do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
by doing nothing we return empty array
} else {
positions[found++] = p1;
do {
- if (found >= allocated) {
+ if ( found >= allocated ) {
allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
positions = erealloc(positions, allocated*sizeof(char *));
}
- positions[found++] = p1 = p2 + Z_STRLEN_P(delim);
- } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL);
+ positions[found++] = p1 = p2 + delim_len;
+ } while ( (p2 = php_memnstr(p1, delim, delim_len, endp)) != NULL );
to_return = limit + found;
/* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
- for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */
- add_next_index_stringl(return_value, positions[i],
- (positions[i+1] - Z_STRLEN_P(delim)) - positions[i],
- 1
- );
+ for ( i = 0 ; i < to_return ; i++ ) { /* this checks also for to_return > 0 */
+ if ( str_type == IS_BINARY ) {
+ add_next_index_binaryl(return_value, positions[i],
+ (positions[i+1]-delim_len) - positions[i], 1);
+ } else {
+ add_next_index_stringl(return_value, positions[i],
+ (positions[i+1]-delim_len) - positions[i], 1);
+ }
}
}
efree(positions);
}
/* }}} */
+/* {{{ php_u_explode
+ * Unicode capable version of php_explode()
+ */
+static void php_u_explode(UChar *delim, uint delim_len, UChar *str, uint str_len, zval *return_value, int limit)
+{
+ UChar *p1, *p2, *endp;
+
+ endp = str + str_len;
+ p1 = str;
+ p2 = zend_u_memnstr(str, delim, delim_len, endp);
+
+ if ( p2 == NULL ) {
+ add_next_index_unicodel(return_value, p1, str_len, 1);
+ } else {
+ do {
+ add_next_index_unicodel(return_value, p1, p2-p1, 1);
+ p1 = (UChar *)p2 + delim_len;
+ } while ((p2 = zend_u_memnstr(p1, delim, delim_len, endp)) != NULL &&
+ (limit == -1 || --limit > 1) );
+
+ if ( p1 <= endp ) {
+ add_next_index_unicodel(return_value, p1, endp-p1, 1);
+ }
+ }
+}
+/* }}} */
+
+/* {{{ php_u_explode_negative_limit
+ * Unicode capable version of php_explode_negative_limit()
+ */
+static void php_u_explode_negative_limit(UChar *delim, uint delim_len, UChar *str, uint str_len, zval *return_value, int limit)
+{
+#define EXPLODE_ALLOC_STEP 50
+ UChar *p1, *p2, *endp;
+ int allocated = EXPLODE_ALLOC_STEP, found = 0, i = 0, to_return = 0;
+ UChar **positions = safe_emalloc(allocated, sizeof(UChar *), 0);
+
+ endp = str + str_len;
+ p1 = str;
+ p2 = zend_u_memnstr(str, delim, delim_len, endp);
+
+ if ( p2 == NULL ) {
+ /*
+ do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
+ by doing nothing we return empty array
+ */
+ } else {
+ positions[found++] = p1;
+ do {
+ if ( found >= allocated ) {
+ allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
+ positions = erealloc(positions, allocated*sizeof(UChar *));
+ }
+ positions[found++] = p1 = p2 + delim_len;
+ } while ( (p2 = zend_u_memnstr(p1, delim, delim_len, endp)) != NULL );
+
+ to_return = limit + found;
+ /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
+ for ( i = 0 ; i < to_return ; i++ ) { /* this checks also for to_return > 0 */
+ add_next_index_unicodel(return_value, positions[i],
+ (positions[i+1]-delim_len) - positions[i], 1);
+ }
+ }
+ efree(positions);
+#undef EXPLODE_ALLOC_STEP
+}
+/* }}} */
/* {{{ proto array explode(string separator, string str [, int limit])
Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
PHP_FUNCTION(explode)
{
- zval **str, **delim, **zlimit = NULL;
- int limit = -1;
- int argc = ZEND_NUM_ARGS();
+ void *str, *delim;
+ int32_t str_len, delim_len;
+ zend_uchar str_type, delim_type;
+ int limit = -1;
+ int argc = ZEND_NUM_ARGS();
- if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &delim, &str, &zlimit) == FAILURE) {
+ if ( argc < 2 || argc > 3 ) {
WRONG_PARAM_COUNT;
}
- convert_to_string_ex(str);
- convert_to_string_ex(delim);
- if (argc > 2) {
- convert_to_long_ex(zlimit);
- limit = Z_LVAL_PP(zlimit);
+ if ( zend_parse_parameters(argc TSRMLS_CC, "TT|l", &delim, &delim_len, &delim_type,
+ &str, &str_len, &str_type, &limit) == FAILURE) {
+ return;
}
- if (! Z_STRLEN_PP(delim)) {
+ if ( delim_len == 0 ) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter.");
RETURN_FALSE;
}
array_init(return_value);
- if (! Z_STRLEN_PP(str)) {
- add_next_index_stringl(return_value, "", sizeof("") - 1, 1);
+ if ( str_len == 0 ) {
+ if ( str_type == IS_UNICODE ) {
+ add_next_index_unicodel(return_value, USTR_MAKE(""), sizeof("")-1, 1);
+ } else if ( str_type == IS_BINARY ) {
+ add_next_index_binaryl(return_value, "", sizeof("")-1, 1);
+ } else {
+ add_next_index_stringl(return_value, "", sizeof("")-1, 1);
+ }
return;
}
if (limit == 0 || limit == 1) {
- add_index_stringl(return_value, 0, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
+ if ( str_type == IS_UNICODE ) {
+ add_index_unicodel(return_value, 0, (UChar *)str, str_len, 1);
+ } else if ( str_type == IS_BINARY ) {
+ add_index_binaryl(return_value, 0, (char *)str, str_len, 1);
+ } else {
+ add_index_stringl(return_value, 0, (char *)str, str_len, 1);
+ }
} else if (limit < 0 && argc == 3) {
- php_explode_negative_limit(*delim, *str, return_value, limit);
+ if ( str_type == IS_UNICODE ) {
+ php_u_explode_negative_limit((UChar *)delim, delim_len, (UChar *)str, str_len, return_value, limit);
+ } else {
+ php_explode_negative_limit((char *)delim, delim_len, (char *)str, str_len, str_type, return_value, limit);
+ }
} else {
- php_explode(*delim, *str, return_value, limit);
+ if ( str_type == IS_UNICODE ) {
+ php_u_explode((UChar *)delim, delim_len, (UChar *)str, str_len, return_value, limit);
+ } else {
+ php_explode((char *)delim, delim_len, (char *)str, str_len, str_type, return_value, limit);
+ }
}
}
/* }}} */
}
/* }}} */
+/* {{{ php_u_strtoupper
+ */
+PHPAPI UChar* php_u_strtoupper(UChar **s, int32_t *len, const char* locale)
+{
+ UChar *dest = NULL;
+ int32_t dest_len;
+ UErrorCode status;
+
+ dest_len = *len;
+ while (1) {
+ status = U_ZERO_ERROR;
+ dest = eurealloc(dest, dest_len+1);
+ dest_len = u_strToUpper(dest, dest_len, *s, *len, locale, &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+ }
+
+ if (U_SUCCESS(status)) {
+ efree(*s);
+ dest[dest_len] = 0;
+ *s = dest;
+ *len = dest_len;
+ } else {
+ efree(dest);
+ }
+
+ return *s;
+}
+/* }}} */
+
/* {{{ proto string strtoupper(string str)
Makes a string uppercase */
PHP_FUNCTION(strtoupper)
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg)) {
WRONG_PARAM_COUNT;
}
- convert_to_string_ex(arg);
+ if (Z_TYPE_PP(arg) != IS_STRING && Z_TYPE_PP(arg) != IS_UNICODE) {
+ if (UG(unicode)) {
+ convert_to_unicode_ex(arg);
+ } else {
+ convert_to_string_ex(arg);
+ }
+ }
RETVAL_ZVAL(*arg, 1, 0);
- php_strtoupper(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value));
+ if (Z_TYPE_P(return_value) == IS_UNICODE) {
+ php_u_strtoupper(&Z_USTRVAL_P(return_value), &Z_USTRLEN_P(return_value), UG(default_locale));
+ } else {
+ php_strtoupper(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value));
+ }
+}
+/* }}} */
+
+/* {{{ php_u_strtolower
+ */
+PHPAPI UChar *php_u_strtolower(UChar **s, int32_t *len, const char* locale)
+{
+ UChar *dest = NULL;
+ int32_t dest_len;
+ UErrorCode status = U_ZERO_ERROR;
+
+ dest_len = *len;
+ while (1) {
+ status = U_ZERO_ERROR;
+ dest = eurealloc(dest, dest_len+1);
+ dest_len = u_strToLower(dest, dest_len, *s, *len, locale, &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+ }
+
+ if (U_SUCCESS(status)) {
+ efree(*s);
+ dest[dest_len] = 0;
+ *s = dest;
+ *len = dest_len;
+ } else {
+ efree(dest);
+ }
+ return *s;
}
/* }}} */
PHP_FUNCTION(strtolower)
{
zval **str;
- char *ret;
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &str)) {
WRONG_PARAM_COUNT;
}
- convert_to_string_ex(str);
+ if (Z_TYPE_PP(str) != IS_STRING && Z_TYPE_PP(str) != IS_UNICODE) {
+ if (UG(unicode)) {
+ convert_to_unicode_ex(str);
+ } else {
+ convert_to_string_ex(str);
+ }
+ }
RETVAL_ZVAL(*str, 1, 0);
- ret = php_strtolower(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value));
+ if (Z_TYPE_P(return_value) == IS_UNICODE) {
+ php_u_strtolower(&Z_USTRVAL_P(return_value), &Z_USTRLEN_P(return_value), UG(default_locale));
+ } else {
+ php_strtolower(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value));
+ }
}
/* }}} */
Finds first occurrence of a string within another */
PHP_FUNCTION(strstr)
{
- char *haystack;
- long haystack_len;
- zval *needle;
+ void *haystack;
+ int32_t haystack_len;
+ zend_uchar haystack_type;
+ zval **needle;
+ void *found = NULL;
+ char needle_char[2];
+ UChar u_needle_char[3];
+ int32_t n_len = 0;
+ size_t found_offset;
zend_bool part = 0;
- char *found = NULL;
- char needle_char[2];
- long found_offset;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz|b", &haystack, &haystack_len, &needle, &part) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|b", &haystack, &haystack_len, &haystack_type, &needle, &part) == FAILURE) {
return;
}
- if (Z_TYPE_P(needle) == IS_STRING) {
- if (!Z_STRLEN_P(needle)) {
+ if (Z_TYPE_PP(needle) == IS_STRING || Z_TYPE_PP(needle) == IS_UNICODE || Z_TYPE_PP(needle) == IS_BINARY) {
+ if (!Z_STRLEN_PP(needle)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter.");
RETURN_FALSE;
}
- found = php_memnstr(haystack,
- Z_STRVAL_P(needle),
- Z_STRLEN_P(needle),
- haystack + haystack_len);
+ /* haystack type determines the needle type */
+ if (haystack_type == IS_UNICODE) {
+ convert_to_unicode_ex(needle);
+ found = zend_u_memnstr((UChar*)haystack,
+ Z_USTRVAL_PP(needle),
+ Z_USTRLEN_PP(needle),
+ (UChar*)haystack + haystack_len);
+ } else {
+ convert_to_string_ex(needle);
+ found = php_memnstr((char*)haystack,
+ Z_STRVAL_PP(needle),
+ Z_STRLEN_PP(needle),
+ (char*)haystack + haystack_len);
+ }
} else {
- convert_to_long_ex(&needle);
- needle_char[0] = (char) Z_LVAL_P(needle);
- needle_char[1] = 0;
+ convert_to_long_ex(needle);
+ if (haystack_type == IS_UNICODE) {
+ if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) {
+ php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
+ RETURN_FALSE;
+ }
+ /* supplementary codepoint values may require 2 UChar's */
+ if (U_IS_BMP(Z_LVAL_PP(needle))) {
+ u_needle_char[n_len++] = (UChar) Z_LVAL_PP(needle);
+ u_needle_char[n_len] = 0;
+ } else {
+ u_needle_char[n_len++] = (UChar) U16_LEAD(Z_LVAL_PP(needle));
+ u_needle_char[n_len++] = (UChar) U16_TRAIL(Z_LVAL_PP(needle));
+ u_needle_char[n_len] = 0;
+ }
- found = php_memnstr(haystack,
- needle_char,
- 1,
- haystack + haystack_len);
+ found = zend_u_memnstr((UChar*)haystack,
+ u_needle_char,
+ n_len,
+ (UChar*)haystack + haystack_len);
+ } else {
+ needle_char[0] = (char) Z_LVAL_PP(needle);
+ needle_char[1] = 0;
+
+ found = php_memnstr((char*)haystack,
+ needle_char,
+ 1,
+ (char*)haystack + haystack_len);
+ }
}
if (found) {
- found_offset = found - haystack;
- if (part) {
- char *ret;
- ret = emalloc(found_offset + 1);
- strncpy(ret, haystack, found_offset);
- ret[found_offset] = '\0';
- RETURN_STRINGL(ret , found_offset, 0);
- } else {
- RETURN_STRINGL(found, haystack_len - found_offset, 1);
+ switch (haystack_type) {
+ case IS_UNICODE:
+ found_offset = (UChar*)found - (UChar*)haystack;
+ if (part) {
+ char *ret;
+ ret = eumalloc(found_offset + 1);
+ u_strncpy(ret, haystack, found_offset);
+ ret[found_offset] = '\0';
+ RETURN_UNICODEL(ret , found_offset, 0);
+ } else {
+ RETURN_UNICODEL(found, haystack_len - found_offset, 1);
+ }
+ break;
+
+ case IS_STRING:
+ found_offset = (char *)found - (char *)haystack;
+ if (part) {
+ char *ret;
+ ret = emalloc(found_offset + 1);
+ strncpy(ret, haystack, found_offset);
+ ret[found_offset] = '\0';
+ RETURN_STRINGL(ret , found_offset, 0);
+ } else {
+ RETURN_STRINGL(found, haystack_len - found_offset, 1);
+ }
+ break;
+
+ case IS_BINARY:
+ found_offset = (char *)found - (char *)haystack;
+ if (part) {
+ char *ret;
+ ret = emalloc(found_offset + 1);
+ strncpy(ret, haystack, found_offset);
+ ret[found_offset] = '\0';
+ RETURN_BINARYL(ret , found_offset, 0);
+ } else {
+ RETURN_BINARYL(found, haystack_len - found_offset, 1);
+ }
+ break;
}
} else {
RETURN_FALSE;
An alias for strstr */
/* }}} */
-/* {{{ proto int strpos(string haystack, string needle [, int offset])
+/* {{{ proto int strpos(text haystack, mixed needle [, int offset])
Finds position of first occurrence of a string within another */
PHP_FUNCTION(strpos)
{
- zval **haystack, **needle, **z_offset;
- char *found = NULL;
- char needle_char[2];
+ void *haystack;
+ int32_t haystack_len;
+ zend_uchar haystack_type;
+ zval **needle;
int offset = 0;
- int argc = ZEND_NUM_ARGS();
-
- if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &haystack, &needle, &z_offset) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
- convert_to_string_ex(haystack);
+ void *found = NULL;
+ char needle_char[2];
+ UChar u_needle_char[3];
+ int32_t n_len = 0;
- if (argc > 2) {
- convert_to_long_ex(z_offset);
- offset = Z_LVAL_PP(z_offset);
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|l", &haystack,
+ &haystack_len, &haystack_type, &needle, &offset) == FAILURE) {
+ return;
}
- if (offset < 0 || offset > Z_STRLEN_PP(haystack)) {
+ /*
+ * Unicode note: it's okay to not convert offset to codepoint offset here.
+ * We'll just do a rough check that the offset does not exceed length in
+ * code units, and leave the rest to zend_u_memnstr().
+ */
+ if (offset < 0 || offset > haystack_len) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string.");
RETURN_FALSE;
}
- if (Z_TYPE_PP(needle) == IS_STRING) {
+ if (Z_TYPE_PP(needle) == IS_STRING || Z_TYPE_PP(needle) == IS_UNICODE || Z_TYPE_PP(needle) == IS_BINARY) {
if (!Z_STRLEN_PP(needle)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter.");
RETURN_FALSE;
}
- found = php_memnstr(Z_STRVAL_PP(haystack) + offset,
- Z_STRVAL_PP(needle),
- Z_STRLEN_PP(needle),
- Z_STRVAL_PP(haystack) + Z_STRLEN_PP(haystack));
+ /* haystack type determines the needle type */
+ if (haystack_type == IS_UNICODE) {
+ int32_t cp_offset = 0;
+ convert_to_unicode_ex(needle);
+ /* locate the codepoint at the specified offset */
+ U16_FWD_N((UChar*)haystack, cp_offset, haystack_len, offset);
+ found = zend_u_memnstr((UChar*)haystack + cp_offset,
+ Z_USTRVAL_PP(needle),
+ Z_USTRLEN_PP(needle),
+ (UChar*)haystack + haystack_len);
+ } else {
+ convert_to_string_ex(needle);
+ found = php_memnstr((char*)haystack + offset,
+ Z_STRVAL_PP(needle),
+ Z_STRLEN_PP(needle),
+ (char*)haystack + haystack_len);
+ }
} else {
convert_to_long_ex(needle);
- needle_char[0] = (char) Z_LVAL_PP(needle);
- needle_char[1] = 0;
+ if (haystack_type == IS_UNICODE) {
+ int32_t cp_offset = 0;
+ if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) {
+ php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
+ RETURN_FALSE;
+ }
+ /* supplementary codepoint values may require 2 UChar's */
+ if (U_IS_BMP(Z_LVAL_PP(needle))) {
+ u_needle_char[n_len++] = (UChar) Z_LVAL_PP(needle);
+ u_needle_char[n_len] = 0;
+ } else {
+ u_needle_char[n_len++] = (UChar) U16_LEAD(Z_LVAL_PP(needle));
+ u_needle_char[n_len++] = (UChar) U16_TRAIL(Z_LVAL_PP(needle));
+ u_needle_char[n_len] = 0;
+ }
- found = php_memnstr(Z_STRVAL_PP(haystack) + offset,
- needle_char,
- 1,
- Z_STRVAL_PP(haystack) + Z_STRLEN_PP(haystack));
+ /* locate the codepoint at the specified offset */
+ U16_FWD_N((UChar*)haystack, cp_offset, haystack_len, offset);
+ found = zend_u_memnstr((UChar*)haystack + cp_offset,
+ u_needle_char,
+ n_len,
+ (UChar*)haystack + haystack_len);
+ } else {
+ needle_char[0] = (char) Z_LVAL_PP(needle);
+ needle_char[1] = 0;
+
+ found = php_memnstr((char*)haystack + offset,
+ needle_char,
+ 1,
+ (char*)haystack + haystack_len);
+ }
}
if (found) {
- RETURN_LONG(found - Z_STRVAL_PP(haystack));
+ if (haystack_type == IS_UNICODE) {
+ /* simple subtraction will not suffice, since there may be
+ supplementary codepoints */
+ RETURN_LONG(u_countChar32(haystack, ((char *)found - (char *)haystack)/sizeof(UChar)));
+ } else {
+ RETURN_LONG((char *)found - (char *)haystack);
+ }
} else {
RETURN_FALSE;
}
Returns part of a string */
PHP_FUNCTION(substr)
{
- zval **str, **from, **len;
- int l;
+ void *str;
+ int32_t str_len, cp_len;
+ zend_uchar str_type;
+ int l = -1;
int f;
- int argc = ZEND_NUM_ARGS();
- if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &str, &from, &len) == FAILURE) {
- WRONG_PARAM_COUNT;
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tl|l", &str, &str_len, &str_type, &f, &l) == FAILURE) {
+ return;
}
- convert_to_string_ex(str);
- convert_to_long_ex(from);
-
- if (argc > 2) {
- convert_to_long_ex(len);
- l = Z_LVAL_PP(len);
+ if (str_type == IS_UNICODE) {
+ cp_len = u_countChar32(str, str_len);
} else {
- l = Z_STRLEN_PP(str);
+ cp_len = str_len;
}
-
- f = Z_LVAL_PP(from);
+ if (ZEND_NUM_ARGS() == 2) {
+ l = cp_len;
+ }
+
/* if "from" position is negative, count start position from the end
* of the string
*/
if (f < 0) {
- f = Z_STRLEN_PP(str) + f;
+ f = cp_len + f;
if (f < 0) {
f = 0;
}
* needed to stop that many chars from the end of the string
*/
if (l < 0) {
- l = (Z_STRLEN_PP(str) - f) + l;
+ l = (cp_len - f) + l;
if (l < 0) {
l = 0;
}
}
- if (f >= Z_STRLEN_PP(str)) {
+ if (f >= cp_len) {
RETURN_FALSE;
}
- if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) {
- l = Z_STRLEN_PP(str) - f;
+ if (((unsigned) f + (unsigned) l) > cp_len) {
+ l = cp_len - f;
}
- RETURN_STRINGL(Z_STRVAL_PP(str) + f, l, 1);
+ if (str_type == IS_UNICODE) {
+ int32_t start = 0, end = 0;
+ U16_FWD_N((UChar*)str, end, str_len, f);
+ start = end;
+ U16_FWD_N((UChar*)str, end, str_len, l);
+ RETURN_UNICODEL((UChar*)str + start, end-start, 1);
+ } else {
+ RETURN_STRINGL((char*)str + f, l, 1);
+ }
}
/* }}} */
Returns the input string repeat mult times */
PHP_FUNCTION(str_repeat)
{
- zval **input_str; /* Input string */
- zval **mult; /* Multiplier */
- char *result; /* Resulting string */
- int result_len; /* Length of the resulting string */
-
- if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &input_str, &mult) == FAILURE) {
- WRONG_PARAM_COUNT;
+ void *input_str; /* Input string */
+ int32_t input_str_len;
+ int32_t input_str_chars;
+ zend_uchar input_str_type;
+ long mult; /* Multiplier */
+ void *result; /* Resulting string */
+ int32_t result_len; /* Length of the resulting string, in bytes */
+ int32_t result_chars; /* Chars/UChars in resulting string */
+
+ if ( zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tl", &input_str,
+ &input_str_chars, &input_str_type, &mult) == FAILURE ) {
+ return;
}
-
- /* Make sure we're dealing with proper types */
- convert_to_string_ex(input_str);
- convert_to_long_ex(mult);
-
- if (Z_LVAL_PP(mult) < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument has to be greater than or equal to 0.");
+
+ if ( mult < 0 ) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument has to be greater than or equal to 0");
return;
}
- /* Don't waste our time if it's empty */
- if (Z_STRLEN_PP(input_str) == 0)
- RETURN_STRINGL("", 0, 1);
-
- /* ... or if the multiplier is zero */
- if (Z_LVAL_PP(mult) == 0)
- RETURN_STRINGL("", 0, 1);
-
+ /* Don't waste our time if input is empty or if the multiplier is zero */
+ if ( input_str_chars == 0 || mult == 0 ) {
+ if ( input_str_type == IS_UNICODE ) {
+ RETURN_UNICODEL(USTR_MAKE(""), 0, 0);
+ } else if ( input_str_type == IS_STRING ) {
+ RETURN_STRINGL("", 0, 1);
+ } else {
+ RETURN_BINARYL("", 0, 1);
+ }
+ }
+
/* Initialize the result string */
- result_len = Z_STRLEN_PP(input_str) * Z_LVAL_PP(mult);
- if (result_len < 1 || result_len > 2147483647) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then 2147483647 bytes");
- RETURN_FALSE;
+ result_chars = (input_str_chars * mult) + 1;
+ if ( input_str_type == IS_UNICODE ) {
+ input_str_len = UBYTES(input_str_chars);
+ result_len = UBYTES(result_chars);
+ if ( result_chars < 1 || result_chars > (2147483647/UBYTES(1)) ) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then %ld characters", 2147483647/UBYTES(1));
+ RETURN_FALSE;
+ }
+ } else {
+ input_str_len = input_str_chars;
+ result_len = result_chars;
+ if ( result_chars < 1 || result_chars > 2147483647 ) {
+ if ( input_str_type == IS_STRING ) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then 2147483647 characters");
+ } else {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then 2147483647 bytes");
+ }
+ RETURN_FALSE;
+ }
}
- result = (char *)emalloc(result_len + 1);
+ result = emalloc(result_len);
/* Heavy optimization for situations where input string is 1 byte long */
- if (Z_STRLEN_PP(input_str) == 1) {
- memset(result, *(Z_STRVAL_PP(input_str)), Z_LVAL_PP(mult));
+ if ( input_str_len == 1 ) {
+ memset(result, *((char *)input_str), mult);
} else {
char *s, *e, *ee;
int l=0;
- memcpy(result, Z_STRVAL_PP(input_str), Z_STRLEN_PP(input_str));
+ memcpy(result, input_str, input_str_len);
s = result;
- e = result + Z_STRLEN_PP(input_str);
+ e = result + input_str_len;
ee = result + result_len;
-
- while (e<ee) {
+
+ while ( e < ee ) {
l = (e-s) < (ee-e) ? (e-s) : (ee-e);
memmove(e, s, l);
e += l;
}
}
-
- result[result_len] = '\0';
- RETURN_STRINGL(result, result_len, 0);
+ if ( input_str_type == IS_UNICODE ) {
+ *(((UChar *)result)+result_chars-1) = 0;
+ RETURN_UNICODEL((UChar *)result, result_chars, 0);
+ } else {
+ *(((char *)result)+result_chars-1) = '\0';
+ if ( input_str_type == IS_BINARY ) {
+ RETURN_BINARYL((char *)result, result_chars, 0);
+ } else {
+ RETURN_STRINGL((char *)result, result_chars, 0);
+ }
+ }
}
/* }}} */
Crash inside stream_get_line(), when length=0
--FILE--
<?php
+die("Temporary unavailable in unicode PHP. Remove this line.");
$path = dirname(__FILE__) . '/test.html';
file_put_contents($path, "foo<br>bar<br>foo");
RETVAL_STRING("string", 1);
break;
+ case IS_BINARY:
+ RETVAL_STRING("binary", 1);
+ break;
+
+ case IS_UNICODE:
+ RETVAL_STRING("unicode", 1);
+ break;
+
case IS_ARRAY:
RETVAL_STRING("array", 1);
break;
case IS_DOUBLE:
case IS_LONG:
case IS_STRING:
+ case IS_BINARY:
+ case IS_UNICODE:
RETURN_TRUE;
break;
php_stream_filter *thisfilter,
php_stream_bucket_brigade *buckets_in,
php_stream_bucket_brigade *buckets_out,
- size_t *bytes_consumed,
+ size_t *consumed,
int flags
TSRMLS_DC)
{
args[1] = &zout;
ALLOC_INIT_ZVAL(zconsumed);
- if (bytes_consumed) {
- ZVAL_LONG(zconsumed, *bytes_consumed);
- } else {
- ZVAL_NULL(zconsumed);
- }
+ ZVAL_NULL(zconsumed);
args[2] = &zconsumed;
ALLOC_INIT_ZVAL(zclosing);
if (call_result == SUCCESS && retval != NULL) {
convert_to_long(retval);
+ if (consumed) {
+ convert_to_long(zconsumed);
+ *consumed = Z_LVAL_P(zconsumed);
+ }
ret = Z_LVAL_P(retval);
} else if (call_result == FAILURE) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "failed to call filter function");
}
- if (bytes_consumed) {
- *bytes_consumed = Z_LVAL_P(zconsumed);
- }
-
if (retval)
zval_ptr_dtor(&retval);
zval_ptr_dtor(&zclosing);
static php_stream_filter_ops userfilter_ops = {
userfilter_filter,
userfilter_dtor,
- "user-filter"
+ "user-filter",
+ PSFO_FLAG_OUTPUTS_SAME
};
static php_stream_filter *user_filter_factory_create(const char *filtername,
add_property_zval(return_value, "bucket", zbucket);
/* add_property_zval increments the refcount which is unwanted here */
zval_ptr_dtor(&zbucket);
- add_property_stringl(return_value, "data", bucket->buf, bucket->buflen, 1);
- add_property_long(return_value, "datalen", bucket->buflen);
+ if (bucket->is_unicode) {
+ zval *unicode_data;
+
+ ALLOC_INIT_ZVAL(unicode_data);
+ ZVAL_UNICODEL(unicode_data, bucket->buf.ustr.val, bucket->buf.ustr.len, 1);
+ add_property_zval(return_value, "data", unicode_data);
+ add_property_long(return_value, "datalen", bucket->buf.str.len);
+ } else {
+ add_property_stringl(return_value, "data", bucket->buf.str.val, bucket->buf.str.len, 1);
+ add_property_long(return_value, "datalen", bucket->buf.str.len);
+ }
}
}
/* }}} */
ZEND_FETCH_RESOURCE(brigade, php_stream_bucket_brigade *, &zbrigade, -1, PHP_STREAM_BRIGADE_RES_NAME, le_bucket_brigade);
ZEND_FETCH_RESOURCE(bucket, php_stream_bucket *, pzbucket, -1, PHP_STREAM_BUCKET_RES_NAME, le_bucket);
- if (SUCCESS == zend_hash_find(Z_OBJPROP_P(zobject), "data", 5, (void**)&pzdata) && (*pzdata)->type == IS_STRING) {
+ if (SUCCESS == zend_hash_find(Z_OBJPROP_P(zobject), "data", 5, (void**)&pzdata)) {
if (!bucket->own_buf) {
bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC);
}
- if (bucket->buflen != Z_STRLEN_PP(pzdata)) {
- bucket->buf = perealloc(bucket->buf, Z_STRLEN_PP(pzdata), bucket->is_persistent);
- bucket->buflen = Z_STRLEN_PP(pzdata);
+ if (Z_TYPE_PP(pzdata) == IS_UNICODE) {
+ if (!bucket->is_unicode) {
+ pefree(bucket->buf.str.val, bucket->is_persistent);
+ bucket->buf.ustr.len = Z_USTRLEN_PP(pzdata);
+ bucket->buf.ustr.val = safe_pemalloc(sizeof(UChar), bucket->buf.ustr.len, 0, bucket->is_persistent);
+ bucket->is_unicode = 1;
+ }
+ if (bucket->buf.ustr.len < Z_USTRLEN_PP(pzdata)) {
+ pefree(bucket->buf.ustr.val, bucket->is_persistent);
+ bucket->buf.ustr.len = Z_USTRLEN_PP(pzdata);
+ bucket->buf.ustr.val = safe_pemalloc(sizeof(UChar), bucket->buf.ustr.len, 0, bucket->is_persistent);
+ }
+ bucket->buf.ustr.len = Z_USTRLEN_PP(pzdata);
+ memcpy(bucket->buf.ustr.val, Z_USTRVAL_PP(pzdata), bucket->buf.ustr.len * sizeof(UChar));
+ } else { /* string -- or at least string expressable */
+ SEPARATE_ZVAL_IF_NOT_REF(pzdata);
+ convert_to_string_ex(pzdata);
+ if (bucket->is_unicode) {
+ pefree(bucket->buf.ustr.val, bucket->is_persistent);
+ bucket->buf.str.len = Z_STRLEN_PP(pzdata);
+ bucket->buf.str.val = pemalloc(bucket->buf.str.len, bucket->is_persistent);
+ bucket->is_unicode = 0;
+ }
+ if (bucket->buf.str.len < Z_STRLEN_PP(pzdata)) {
+ bucket->buf.str.len = Z_STRLEN_PP(pzdata);
+ bucket->buf.str.val = perealloc(bucket->buf.str.val, bucket->buf.str.len, bucket->is_persistent);
+ }
+ bucket->buf.str.len = Z_STRLEN_PP(pzdata);
+ memcpy(bucket->buf.str.val, Z_STRVAL_PP(pzdata), bucket->buf.str.len);
}
- memcpy(bucket->buf, Z_STRVAL_PP(pzdata), bucket->buflen);
}
if (append) {
{
zval *zstream, *zbucket;
php_stream *stream;
- char *buffer;
+ zval *buffer;
char *pbuffer;
- int buffer_len;
php_stream_bucket *bucket;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs", &zstream, &buffer, &buffer_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &zstream, &buffer) == FAILURE) {
RETURN_FALSE;
}
php_stream_from_zval(stream, &zstream);
- if (!(pbuffer = pemalloc(buffer_len, php_stream_is_persistent(stream)))) {
- RETURN_FALSE;
- }
-
- memcpy(pbuffer, buffer, buffer_len);
-
- bucket = php_stream_bucket_new(stream, pbuffer, buffer_len, 1, php_stream_is_persistent(stream) TSRMLS_CC);
+ object_init(return_value);
+ if (Z_TYPE_P(buffer) == IS_UNICODE) {
+ bucket = php_stream_bucket_new_unicode(stream, Z_USTRVAL_P(buffer), Z_USTRLEN_P(buffer), 0, php_stream_is_persistent(stream) TSRMLS_CC);
+ ZVAL_ADDREF(buffer);
+ add_property_zval(return_value, "data", buffer);
+ add_property_long(return_value, "datalen", Z_USTRLEN_P(buffer));
+ } else {
+ convert_to_string(buffer);
+ bucket = php_stream_bucket_new(stream, Z_STRVAL_P(buffer), Z_STRLEN_P(buffer), 0, php_stream_is_persistent(stream) TSRMLS_CC);
+
+ add_property_zval(return_value, "data", buffer);
+ add_property_long(return_value, "datalen", Z_STRLEN_P(buffer));
+ }
ALLOC_INIT_ZVAL(zbucket);
ZEND_REGISTER_RESOURCE(zbucket, bucket, le_bucket);
- object_init(return_value);
add_property_zval(return_value, "bucket", zbucket);
/* add_property_zval increments the refcount which is unwanted here */
zval_ptr_dtor(&zbucket);
- add_property_stringl(return_value, "data", bucket->buf, bucket->buflen, 1);
- add_property_long(return_value, "datalen", bucket->buflen);
}
/* }}} */
/* }}} */
/* {{{ php_var_dump */
+/* temporary, for debugging */
+static void php_var_dump_unicode(UChar *ustr, int32_t length, int verbose TSRMLS_DC)
+{
+ UChar32 c;
+ int32_t i;
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t clen;
+ char *out = NULL;
+
+ if (length == 0) {
+ php_printf("\"\"");
+ return;
+ }
+
+ clen = length * ucnv_getMaxCharSize(ZEND_U_CONVERTER(UG(output_encoding_conv))) + 1;
+ while (1) {
+ status = U_ZERO_ERROR;
+ out = erealloc(out, clen+1);
+ clen = ucnv_fromUChars(ZEND_U_CONVERTER(UG(output_encoding_conv)), out, clen+1, ustr, length, &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+ }
+ if(U_FAILURE(status) || status==U_STRING_NOT_TERMINATED_WARNING) {
+ php_printf("problem converting string from Unicode: %s\n", u_errorName(status));
+ efree(out);
+ return;
+ }
+
+ if (verbose) {
+ php_printf("\"%s\" {", out);
+
+ /* output the code points (not code units) */
+ if(length>=0) {
+ /* s is not NUL-terminated */
+ for(i=0; i<length; /* U16_NEXT post-increments */) {
+ U16_NEXT(ustr, i, length, c);
+ php_printf(" %04x", c);
+ }
+ } else {
+ /* s is NUL-terminated */
+ for(i=0; /* condition in loop body */; /* U16_NEXT post-increments */) {
+ U16_NEXT(ustr, i, length, c);
+ if(c==0) {
+ break;
+ }
+ php_printf(" %04x", c);
+ }
+ }
+ php_printf(" }");
+ } else {
+ php_printf("\"%s\"", out);
+ }
+ efree(out);
+}
+
static int php_array_element_dump(zval **zv, int num_args, va_list args, zend_hash_key *hash_key)
{
int level;
+ int verbose;
TSRMLS_FETCH();
level = va_arg(args, int);
+ verbose = va_arg(args, int);
if (hash_key->nKeyLength==0) { /* numeric key */
php_printf("%*c[%ld]=>\n", level + 1, ' ', hash_key->h);
} else { /* string key */
- if (va_arg(args, int) && hash_key->arKey[0] == '\0') {
+ if (va_arg(args, int) &&
+ ((hash_key->type == IS_STRING && hash_key->u.string[0] == 0) ||
+ (hash_key->type == IS_UNICODE && hash_key->u.unicode[0] == '\0'))) {
/* XXX: perhaps when we are inside the class we should permit access to
* private & protected values
*/
return 0;
}
- php_printf("%*c[\"", level + 1, ' ');
- PHPWRITE(hash_key->arKey, hash_key->nKeyLength - 1);
- php_printf("\"]=>\n");
+ php_printf("%*c[", level + 1, ' ');
+ if (hash_key->type == IS_STRING) {
+ php_printf("\"");
+ PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1);
+ php_printf("\"");
+ } else if (hash_key->type == IS_BINARY) {
+ php_printf("b\"");
+ PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1);
+ php_printf("\"");
+ } else if (hash_key->type == IS_UNICODE) {
+ php_printf("u");
+ php_var_dump_unicode(hash_key->u.unicode, hash_key->nKeyLength-1, verbose TSRMLS_CC);
+ }
+ php_printf("]=>\n");
}
- php_var_dump(zv, level + 2 TSRMLS_CC);
+ php_var_dump(zv, level + 2, 0 TSRMLS_CC);
return 0;
}
{
int level;
char *prop_name, *class_name;
+ int verbose;
TSRMLS_FETCH();
level = va_arg(args, int);
+ verbose = va_arg(args, int);
if (hash_key->nKeyLength ==0 ) { /* numeric key */
php_printf("%*c[%ld]=>\n", level + 1, ' ', hash_key->h);
} else { /* string key */
- zend_unmangle_property_name(hash_key->arKey, &class_name, &prop_name);
+ zend_u_unmangle_property_name(hash_key->type, hash_key->u.string, &class_name, &prop_name);
+ php_printf("%*c[", level + 1, ' ');
+
if (class_name) {
- php_printf("%*c[\"%s", level + 1, ' ', prop_name);
+ if (hash_key->type == IS_STRING) {
+ php_printf("\"");
+ PHPWRITE(prop_name, strlen(prop_name));
+ php_printf("\"");
+ } else if (hash_key->type == IS_UNICODE) {
+ php_printf("u");
+ php_var_dump_unicode((UChar*)prop_name, u_strlen((UChar*)prop_name), verbose TSRMLS_CC);
+ }
if (class_name[0]=='*') {
ZEND_PUTS(":protected");
} else {
ZEND_PUTS(":private");
}
} else {
- php_printf("%*c[\"%s", level + 1, ' ', hash_key->arKey);
-#ifdef ANDREY_0
+ if (hash_key->type == IS_STRING) {
+ php_printf("\"");
+ PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1);
+ php_printf("\"");
+ } else if (hash_key->type == IS_UNICODE) {
+ php_printf("u");
+ php_var_dump_unicode(hash_key->u.unicode, hash_key->nKeyLength-1, verbose TSRMLS_CC);
+ }
ZEND_PUTS(":public");
-#endif
}
+#ifdef ANDREY_0
+#endif
ZEND_PUTS("\"]=>\n");
}
- php_var_dump(zv, level + 2 TSRMLS_CC);
+ php_var_dump(zv, level + 2, verbose TSRMLS_CC);
return 0;
}
-PHPAPI void php_var_dump(zval **struc, int level TSRMLS_DC)
+PHPAPI void php_var_dump(zval **struc, int level, int verbose TSRMLS_DC)
{
HashTable *myht = NULL;
char *class_name;
PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc));
PUTS("\"\n");
break;
+ case IS_BINARY:
+ php_printf("%sbinary(%d) \"", COMMON, Z_STRLEN_PP(struc));
+ PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc));
+ PUTS("\"\n");
+ break;
+ case IS_UNICODE:
+ /* temporary, for debugging */
+ php_printf("%sunicode(%d) ", COMMON, u_countChar32((*struc)->value.ustr.val, (*struc)->value.ustr.len));
+ php_var_dump_unicode((*struc)->value.ustr.val, (*struc)->value.ustr.len, verbose TSRMLS_CC);
+ PUTS("\n");
+ break;
case IS_ARRAY:
myht = Z_ARRVAL_PP(struc);
if (myht->nApplyCount > 1) {
}
Z_OBJ_HANDLER(**struc, get_class_name)(*struc, &class_name, &class_name_len, 0 TSRMLS_CC);
- php_printf("%sobject(%s)#%d (%d) {\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0);
+ php_printf("%sobject(%v)#%d (%d) {\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0);
efree(class_name);
php_element_dump_func = php_object_property_dump;
head_done:
if (myht) {
- zend_hash_apply_with_arguments(myht, (apply_func_args_t) php_element_dump_func, 1, level, (Z_TYPE_PP(struc) == IS_ARRAY ? 0 : 1));
+ zend_hash_apply_with_arguments(myht, (apply_func_args_t) php_element_dump_func, 3, level, verbose, (Z_TYPE_PP(struc) == IS_ARRAY ? 0 : 1));
}
if (level > 1) {
php_printf("%*c", level-1, ' ');
}
for (i=0; i<argc; i++)
- php_var_dump(args[i], 1 TSRMLS_CC);
+ php_var_dump(args[i], 1, 0 TSRMLS_CC);
+
+ efree(args);
+}
+/* }}} */
+
+
+/* {{{ proto void var_inspect(mixed var)
+ Dumps a string representation of variable to output (verbose form) */
+PHP_FUNCTION(var_inspect)
+{
+ zval ***args;
+ int argc;
+ int i;
+
+ argc = ZEND_NUM_ARGS();
+
+ args = (zval ***)safe_emalloc(argc, sizeof(zval **), 0);
+ if (ZEND_NUM_ARGS() == 0 || zend_get_parameters_array_ex(argc, args) == FAILURE) {
+ efree(args);
+ WRONG_PARAM_COUNT;
+ }
+
+ for (i=0; i<argc; i++)
+ php_var_dump(args[i], 1, 1 TSRMLS_CC);
efree(args);
}
/* XXX: perphaps when we are inside the class we should permit access to
* private & protected values
*/
- if (va_arg(args, int) && hash_key->arKey[0] == '\0') {
+ if (va_arg(args, int) &&
+ ((hash_key->type == IS_STRING && hash_key->u.string[0] == 0) ||
+ (hash_key->type == IS_UNICODE && hash_key->u.unicode[0] == '\0'))) {
return 0;
}
- php_printf("%*c[\"", level + 1, ' ');
- PHPWRITE(hash_key->arKey, hash_key->nKeyLength - 1);
- php_printf("\"]=>\n");
+ php_printf("%*c[", level + 1, ' ');
+ if (hash_key->type == IS_STRING) {
+ php_printf("\"");
+ PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1);
+ php_printf("\"");
+ } else if (hash_key->type == IS_BINARY) {
+ php_printf("b\"");
+ PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1);
+ php_printf("\"");
+ } else if (hash_key->type == IS_UNICODE) {
+ php_printf("u");
+ php_var_dump_unicode(hash_key->u.unicode, hash_key->nKeyLength-1, 1 TSRMLS_CC);
+ }
+ php_printf("]=>\n");
}
- php_debug_zval_dump(zv, level + 2 TSRMLS_CC);
+ php_debug_zval_dump(zv, level + 2, 1 TSRMLS_CC);
return 0;
}
-PHPAPI void php_debug_zval_dump(zval **struc, int level TSRMLS_DC)
+PHPAPI void php_debug_zval_dump(zval **struc, int level, int verbose TSRMLS_DC)
{
HashTable *myht = NULL;
char *class_name;
PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc));
php_printf("\" refcount(%u)\n", Z_REFCOUNT_PP(struc));
break;
+ case IS_BINARY:
+ php_printf("%sbinary(%d) \"", COMMON, Z_STRLEN_PP(struc));
+ PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc));
+ php_printf("\" refcount(%u)\n", Z_REFCOUNT_PP(struc));
+ break;
+ case IS_UNICODE:
+ /* temporary, for debugging */
+ php_printf("%sunicode(%d) ", COMMON, u_countChar32((*struc)->value.ustr.val, (*struc)->value.ustr.len));
+ php_var_dump_unicode((*struc)->value.ustr.val, (*struc)->value.ustr.len, verbose TSRMLS_CC);
+ php_printf("\" refcount(%u)\n", Z_REFCOUNT_PP(struc));
+ break;
case IS_ARRAY:
myht = Z_ARRVAL_PP(struc);
if (myht->nApplyCount > 1) {
}
ce = Z_OBJCE(**struc);
Z_OBJ_HANDLER(**struc, get_class_name)(*struc, &class_name, &class_name_len, 0 TSRMLS_CC);
- php_printf("%sobject(%s)#%d (%d) refcount(%u){\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0, Z_REFCOUNT_PP(struc));
+ php_printf("%sobject(%v)#%d (%d) refcount(%u){\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0, Z_REFCOUNT_PP(struc));
efree(class_name);
head_done:
if (myht) {
}
for (i=0; i<argc; i++)
- php_debug_zval_dump(args[i], 1 TSRMLS_CC);
+ php_debug_zval_dump(args[i], 1, 1 TSRMLS_CC);
efree(args);
}
if (hash_key->nKeyLength==0) { /* numeric key */
php_printf("%*c%ld => ", level + 1, ' ', hash_key->h);
} else { /* string key */
- char *key;
- int key_len;
- key = php_addcslashes(hash_key->arKey, hash_key->nKeyLength - 1, &key_len, 0, "'\\", 2 TSRMLS_CC);
php_printf("%*c'", level + 1, ' ');
- PHPWRITE(key, key_len);
+ if (hash_key->type == IS_UNICODE) {
+ php_printf("%r", hash_key->u.unicode);
+ } else {
+ char *key;
+ int key_len;
+
+ key = php_addcslashes(hash_key->u.string, hash_key->nKeyLength - 1, &key_len, 0, "'\\", 2 TSRMLS_CC);
+ PHPWRITE(key, key_len);
+ efree(key);
+ }
php_printf("' => ");
- efree(key);
}
php_var_export(zv, level + 2 TSRMLS_CC);
PUTS (",\n");
if (hash_key->nKeyLength != 0) {
php_printf("%*c", level + 1, ' ');
- zend_unmangle_property_name(hash_key->arKey, &class_name, &prop_name);
+ zend_u_unmangle_property_name(hash_key->type, hash_key->u.string, &class_name, &prop_name);
if (class_name) {
if (class_name[0] == '*') {
php_printf("protected");
} else {
php_printf("public");
}
- php_printf(" $%s = ", prop_name);
+ php_printf(" $%R = ", hash_key->type, prop_name);
php_var_export(zv, level + 2 TSRMLS_CC);
PUTS (";\n");
}
case IS_DOUBLE:
php_printf("%.*G", (int) EG(precision), Z_DVAL_PP(struc));
break;
+ case IS_BINARY:
+ PUTS ("b");
case IS_STRING:
tmp_str = php_addcslashes(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc), &tmp_len, 0, "'\\", 2 TSRMLS_CC);
PUTS ("'");
PUTS ("'");
efree (tmp_str);
break;
+ case IS_UNICODE:
+/* TODO
+ tmp_str = php_addcslashes(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc), &tmp_len, 0, "'\\", 2 TSRMLS_CC);
+*/
+ PUTS ("'");
+ php_printf("%r", Z_USTRVAL_PP(struc));
+ PUTS ("'");
+ break;
case IS_ARRAY:
myht = Z_ARRVAL_PP(struc);
if (level > 1) {
php_printf("\n%*c", level - 1, ' ');
}
Z_OBJ_HANDLER(**struc, get_class_name)(*struc, &class_name, &class_name_len, 0 TSRMLS_CC);
- php_printf ("class %s {\n", class_name);
+ php_printf ("class %v {\n", class_name);
efree(class_name);
if (myht) {
zend_hash_apply_with_arguments(myht, (apply_func_args_t) php_object_element_export, 1, level);
smart_str_appendl(buf, "\";", 2);
}
+static inline void php_var_serialize_binary(smart_str *buf, char *str, int len)
+{
+ smart_str_appendl(buf, "B:", 2);
+ smart_str_append_long(buf, len);
+ smart_str_appendl(buf, ":\"", 2);
+ smart_str_appendl(buf, str, len);
+ smart_str_appendl(buf, "\";", 2);
+}
+
+static inline void php_var_serialize_ustr(smart_str *buf, UChar *ustr, int len)
+{
+ static const char hex[] = "0123456789abcdef";
+ UChar32 c;
+ int32_t i;
+
+ for(i=0; i<len; /* U16_NEXT post-increments */) {
+ U16_NEXT(ustr, i, len, c);
+ smart_str_appendl(buf, "\\u", 2);
+ smart_str_appendc(buf, hex[(c >> 12) & 0xf]);
+ smart_str_appendc(buf, hex[(c >> 8) & 0xf]);
+ smart_str_appendc(buf, hex[(c >> 4) & 0xf]);
+ smart_str_appendc(buf, hex[(c >> 0) & 0xf]);
+ }
+}
+
+static inline void php_var_serialize_unicode(smart_str *buf, UChar *ustr, int len)
+{
+ smart_str_appendl(buf, "U:", 2);
+ smart_str_append_long(buf, len);
+ smart_str_appendl(buf, ":\"", 2);
+ php_var_serialize_ustr(buf, ustr, len);
+ smart_str_appendl(buf, "\";", 2);
+}
+
static inline zend_bool php_var_serialize_class_name(smart_str *buf, zval **struc TSRMLS_DC)
{
PHP_CLASS_ATTRIBUTES;
smart_str_appendl(buf, "O:", 2);
smart_str_append_long(buf, name_len);
smart_str_appendl(buf, ":\"", 2);
- smart_str_appendl(buf, class_name, name_len);
+ if (UG(unicode)) {
+ php_var_serialize_ustr(buf, (UChar*)class_name, name_len);
+ } else {
+ smart_str_appendl(buf, class_name, name_len);
+ }
smart_str_appendl(buf, "\":", 2);
PHP_CLEANUP_CLASS_ATTRIBUTES();
return incomplete_class;
zend_hash_get_current_data_ex(HASH_OF(retval_ptr),
(void **) &name, &pos);
- if (Z_TYPE_PP(name) != IS_STRING) {
+ if (Z_TYPE_PP(name) != (UG(unicode)?IS_UNICODE:IS_STRING)) {
php_error_docref(NULL TSRMLS_CC, E_NOTICE, "__sleep should return an array only "
"containing the names of instance-variables to "
"serialize.");
smart_str_appendl(buf,"N;", 2);
continue;
}
- if (zend_hash_find(Z_OBJPROP_PP(struc), Z_STRVAL_PP(name),
- Z_STRLEN_PP(name) + 1, (void *) &d) == SUCCESS) {
- php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name));
+ if (zend_u_hash_find(Z_OBJPROP_PP(struc), Z_TYPE_PP(name), Z_UNIVAL_PP(name),
+ Z_UNILEN_PP(name) + 1, (void *) &d) == SUCCESS) {
+ if (Z_TYPE_PP(name) == IS_UNICODE) {
+ php_var_serialize_unicode(buf, Z_USTRVAL_PP(name), Z_USTRLEN_PP(name));
+ } else {
+ php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name));
+ }
php_var_serialize_intern(buf, d, var_hash TSRMLS_CC);
} else {
zend_class_entry *ce;
int prop_name_length;
do {
- zend_mangle_property_name(&priv_name, &prop_name_length, ce->name, ce->name_length,
+ zend_u_mangle_property_name(&priv_name, &prop_name_length, Z_TYPE_PP(name), ce->name, ce->name_length,
Z_STRVAL_PP(name), Z_STRLEN_PP(name), ce->type & ZEND_INTERNAL_CLASS);
- if (zend_hash_find(Z_OBJPROP_PP(struc), priv_name, prop_name_length+1, (void *) &d) == SUCCESS) {
- php_var_serialize_string(buf, priv_name, prop_name_length);
+ if (zend_u_hash_find(Z_OBJPROP_PP(struc), Z_TYPE_PP(name), priv_name, prop_name_length, (void *) &d) == SUCCESS) {
+ if (Z_TYPE_PP(name) == IS_UNICODE) {
+ php_var_serialize_unicode(buf, priv_name, prop_name_length-1);
+ } else {
+ php_var_serialize_string(buf, priv_name, prop_name_length-1);
+ }
efree(priv_name);
php_var_serialize_intern(buf, d, var_hash TSRMLS_CC);
break;
}
efree(priv_name);
- zend_mangle_property_name(&prot_name, &prop_name_length, "*", 1,
+ zend_u_mangle_property_name(&prot_name, &prop_name_length, Z_TYPE_PP(name), "*", 1,
Z_STRVAL_PP(name), Z_STRLEN_PP(name), ce->type & ZEND_INTERNAL_CLASS);
- if (zend_hash_find(Z_OBJPROP_PP(struc), prot_name, prop_name_length+1, (void *) &d) == SUCCESS) {
- php_var_serialize_string(buf, prot_name, prop_name_length);
+ if (zend_u_hash_find(Z_OBJPROP_PP(struc), Z_TYPE_PP(name), prot_name, prop_name_length, (void *) &d) == SUCCESS) {
+ if (Z_TYPE_PP(name) == IS_UNICODE) {
+ php_var_serialize_unicode(buf, prot_name, prop_name_length-1);
+ } else {
+ php_var_serialize_string(buf, prot_name, prop_name_length-1);
+ }
efree(prot_name);
php_var_serialize_intern(buf, d, var_hash TSRMLS_CC);
break;
}
efree(prot_name);
php_error_docref(NULL TSRMLS_CC, E_NOTICE, "\"%s\" returned as member variable from __sleep() but does not exist", Z_STRVAL_PP(name));
- php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name));
+ if (Z_TYPE_PP(name) == IS_UNICODE) {
+ php_var_serialize_unicode(buf, Z_USTRVAL_PP(name), Z_USTRLEN_PP(name));
+ } else {
+ php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name));
+ }
php_var_serialize_intern(buf, &nvalp, var_hash TSRMLS_CC);
} while (0);
} else {
- php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name));
+ if (Z_TYPE_PP(name) == IS_UNICODE) {
+ php_var_serialize_unicode(buf, Z_USTRVAL_PP(name), Z_USTRLEN_PP(name));
+ } else {
+ php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name));
+ }
php_var_serialize_intern(buf, &nvalp, var_hash TSRMLS_CC);
}
}
php_var_serialize_string(buf, Z_STRVAL_PP(struc), Z_STRLEN_PP(struc));
return;
+ case IS_BINARY:
+ php_var_serialize_binary(buf, Z_STRVAL_PP(struc), Z_STRLEN_PP(struc));
+ return;
+
+ case IS_UNICODE:
+ php_var_serialize_unicode(buf, Z_USTRVAL_PP(struc), Z_USTRLEN_PP(struc));
+ return;
+
case IS_OBJECT: {
zval *retval_ptr = NULL;
zval fname;
case HASH_KEY_IS_STRING:
php_var_serialize_string(buf, key, key_len - 1);
break;
+ case HASH_KEY_IS_BINARY:
+ php_var_serialize_binary(buf, key, key_len - 1);
+ break;
+ case HASH_KEY_IS_UNICODE:
+ php_var_serialize_unicode(buf, (UChar*)key, key_len - 1);
+ break;
}
/* we should still add element even if it's not OK,
long datalen;
if(ce->unserialize == NULL) {
- zend_error(E_WARNING, "Class %s has no unserializer", ce->name);
+ zend_error(E_WARNING, "Class %v has no unserializer", ce->name);
return 0;
}
long datalen;
if(ce->unserialize == NULL) {
- zend_error(E_WARNING, "Class %s has no unserializer", ce->name);
+ zend_error(E_WARNING, "Class %v has no unserializer", ce->name);
return 0;
}
--- /dev/null
+unicode
+Andrei Zmievski
--- /dev/null
+this extension is experimental,
+its functions may change their names
+or move to extension all together
+so do not rely to much on them
+you have been warned!
--- /dev/null
+dnl
+dnl $ Id: $
+dnl
+
+PHP_ARG_ENABLE(unicode, whether to enable unicode functions,
+[ --disable-unicode Disable Unicode API support])
+
+if test "$PHP_UNICODE" != "no"; then
+ PHP_SUBST(UNICODE_SHARED_LIBADD)
+ AC_DEFINE(HAVE_UNICODE, 1, [ ])
+ PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c, $ext_shared)
+fi
+
--- /dev/null
+// $ Id: $
+// vim:ft=javascript
+
+ARG_ENABLE('unicode' , 'ICU API extension', 'no');
+if (PHP_UNICODE) {
+
+ EXTENSION("unicode", "unicode.c");
+ AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension');
+}
--- /dev/null
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.0 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_0.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Andrei Zmievski <andrei@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+/* $ Id: $ */
+
+#include "php_unicode.h"
+
+#if HAVE_UNICODE
+
+static void php_canonicalize_locale_id(char **target, int32_t *target_len, char *locale, UErrorCode *status)
+{
+ char *canonicalized = NULL;
+ int32_t canonicalized_len = 128;
+
+ while (1) {
+ *status = U_ZERO_ERROR;
+ canonicalized = erealloc(canonicalized, canonicalized_len + 1);
+ canonicalized_len = uloc_canonicalize(locale, canonicalized, canonicalized_len, status);
+ if (*status != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+ }
+
+ canonicalized[canonicalized_len] = 0;
+ *target = canonicalized;
+ *target_len = canonicalized_len;
+}
+
+PHP_FUNCTION(icu_loc_get_default)
+{
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
+ return;
+ }
+
+ RETURN_STRING(UG(default_locale), 1);
+}
+
+PHP_FUNCTION(icu_loc_set_default)
+{
+ char *locale;
+ int locale_len;
+ char *canonicalized = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &locale, &locale_len) == FAILURE) {
+ return;
+ }
+
+ php_canonicalize_locale_id(&canonicalized, &locale_len, locale, &status);
+ /*
+ * UTODO: is this right? canonicalization does not seem to perform locale
+ * validation.
+ */
+ if (U_FAILURE(status)) {
+ php_error(E_WARNING, "Invalid locale: %s\n", locale);
+ RETURN_FALSE;
+ }
+ /* don't bother if locales are identical */
+ if (!strcmp(UG(default_locale), canonicalized)) {
+ efree(canonicalized);
+ RETURN_FALSE;
+ }
+ efree(UG(default_locale));
+ UG(default_locale) = canonicalized;
+ zend_reset_locale_deps(TSRMLS_C);
+ RETURN_TRUE;
+}
+
+#endif /* HAVE_UNICODE */
+
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
--- /dev/null
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE package SYSTEM "http://pear.php.net/dtd/package-1.0">
+<package>
+ <name>unicode</name>
+ <summary>ICU API extension</summary>
+ <license>PHP</license>
+
+ <maintainers>
+ <maintainer>
+ <user>andrei</user>
+ <name>Andrei Zmievski</name>
+ <email>andrei@php.net</email>
+ <role>lead</role>
+ </maintainer>
+ </maintainers>
+
+ <release>
+ <version>1.0</version>
+ <date>2005-04-18</date>
+ <state>unknown</state>
+ </release>
+
+ <filelist>
+ <dir role="doc" name="/">
+ <file role="doc">EXPERIMENTAL</file>
+ <file role="doc">CREDITS</file>
+ <file role="src">config.m4</file>
+ <file role="src">unicode.dsp</file>
+ <file role="src">config.w32</file>
+ <file role="src">unicode.c</file>
+ <file role="src">php_unicode.h</file>
+ </dir>
+ </filelist>
+</package>
--- /dev/null
+/*
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.0 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_0.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Andrei Zmievski <andrei@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+/* $ Id: $ */
+
+#ifndef PHP_UNICODE_H
+#define PHP_UNICODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <php.h>
+
+#ifdef HAVE_UNICODE
+
+#include <php_ini.h>
+#include <SAPI.h>
+#include <ext/standard/info.h>
+
+
+extern zend_module_entry unicode_module_entry;
+#define phpext_unicode_ptr &unicode_module_entry
+
+#ifdef PHP_WIN32
+#define PHP_UNICODE_API __declspec(dllexport)
+#else
+#define PHP_UNICODE_API
+#endif
+
+PHP_MINIT_FUNCTION(unicode);
+PHP_MSHUTDOWN_FUNCTION(unicode);
+PHP_RINIT_FUNCTION(unicode);
+PHP_RSHUTDOWN_FUNCTION(unicode);
+PHP_MINFO_FUNCTION(unicode);
+
+#ifdef ZTS
+#include "TSRM.h"
+#endif
+
+PHP_FUNCTION(icu_loc_get_default);
+PHP_FUNCTION(icu_loc_set_default);
+
+extern php_stream_filter_factory php_unicode_filter_factory;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#include <zend_unicode.h>
+#include <unicode/uloc.h>
+#endif /* PHP_HAVE_UNICODE */
+
+#endif /* PHP_UNICODE_H */
+
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
--- /dev/null
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.0 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_0.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Andrei Zmievski <andrei@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+/* $ Id: $ */
+
+#include "php_unicode.h"
+
+#if HAVE_UNICODE
+
+/* {{{ unicode_functions[] */
+function_entry unicode_functions[] = {
+ PHP_FE(icu_loc_get_default, NULL)
+ PHP_FE(icu_loc_set_default, NULL)
+ { NULL, NULL, NULL }
+};
+/* }}} */
+
+
+/* {{{ unicode_module_entry
+ */
+zend_module_entry unicode_module_entry = {
+ STANDARD_MODULE_HEADER,
+ "unicode",
+ unicode_functions,
+ PHP_MINIT(unicode), /* Replace with NULL if there is nothing to do at php startup */
+ PHP_MSHUTDOWN(unicode), /* Replace with NULL if there is nothing to do at php shutdown */
+ PHP_RINIT(unicode), /* Replace with NULL if there is nothing to do at request start */
+ PHP_RSHUTDOWN(unicode), /* Replace with NULL if there is nothing to do at request end */
+ PHP_MINFO(unicode),
+ "1.0",
+ STANDARD_MODULE_PROPERTIES
+};
+/* }}} */
+
+#ifdef COMPILE_DL_UNICODE
+ZEND_GET_MODULE(unicode)
+#endif
+
+
+/* {{{ PHP_MINIT_FUNCTION */
+PHP_MINIT_FUNCTION(unicode)
+{
+ if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
+ return FAILURE;
+ }
+ /* add your stuff here */
+
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ PHP_MSHUTDOWN_FUNCTION */
+PHP_MSHUTDOWN_FUNCTION(unicode)
+{
+ if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) {
+ return FAILURE;
+ }
+ /* add your stuff here */
+
+
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ PHP_RINIT_FUNCTION */
+PHP_RINIT_FUNCTION(unicode)
+{
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ PHP_RSHUTDOWN_FUNCTION */
+PHP_RSHUTDOWN_FUNCTION(unicode)
+{
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ PHP_MINFO_FUNCTION */
+PHP_MINFO_FUNCTION(unicode)
+{
+ php_info_print_box_start(0);
+ php_printf("ICU API extension\n");
+ php_printf("Based on ICU library %s\n", U_COPYRIGHT_STRING);
+ php_printf("ICU Version %s\n", U_ICU_VERSION);
+ php_info_print_box_end();
+ /* add your stuff here */
+
+}
+/* }}} */
+
+
+#endif /* HAVE_UNICODE */
+
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
--- /dev/null
+# Microsoft Developer Studio Project File - Name="unicode" - Package Owner=<4>\r
+# Microsoft Developer Studio Generated Build File, Format Version 6.00\r
+# ** DO NOT EDIT **\r
+\r
+# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102\r
+\r
+CFG=unicode - Win32 Debug_TS\r
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,\r
+!MESSAGE use the Export Makefile command and run\r
+!MESSAGE \r
+!MESSAGE NMAKE /f "unicode.mak".\r
+!MESSAGE \r
+!MESSAGE You can specify a configuration when running NMAKE\r
+!MESSAGE by defining the macro CFG on the command line. For example:\r
+!MESSAGE \r
+!MESSAGE NMAKE /f "unicode.mak" CFG="unicode - Win32 Debug_TS"\r
+!MESSAGE \r
+!MESSAGE Possible choices for configuration are:\r
+!MESSAGE \r
+!MESSAGE "unicode - Win32 Release_TS" (based on "Win32 (x86) Dynamic-Link Library")\r
+!MESSAGE "unicode - Win32 Debug_TS" (based on "Win32 (x86) Dynamic-Link Library")\r
+!MESSAGE \r
+\r
+# Begin Project\r
+# PROP AllowPerConfigDependencies 0\r
+# PROP Scc_ProjName ""\r
+# PROP Scc_LocalPath ""\r
+CPP=cl.exe\r
+MTL=midl.exe\r
+RSC=rc.exe\r
+\r
+!IF "$(CFG)" == "unicode - Win32 Release_TS"\r
+\r
+# PROP BASE Use_MFC 0\r
+# PROP BASE Use_Debug_Libraries 0\r
+# PROP BASE Output_Dir "Release_TS"\r
+# PROP BASE Intermediate_Dir "Release_TS"\r
+# PROP BASE Target_Dir ""\r
+# PROP Use_MFC 0\r
+# PROP Use_Debug_Libraries 0\r
+# PROP Output_Dir "Release_TS"\r
+# PROP Intermediate_Dir "Release_TS"\r
+# PROP Ignore_Export_Lib 0\r
+# PROP Target_Dir ""\r
+# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "UNICODE_EXPORTS" /YX /FD /c\r
+# ADD CPP /nologo /MT /W3 /GX /O2 /I "..\.." /I "..\..\Zend" /I "..\..\TSRM" /I "..\..\main" /D "WIN32" /D "PHP_EXPORTS" /D "COMPILE_DL_UNICODE" /D ZTS=1 /D HAVE_UNICODE=1 /D ZEND_DEBUG=0 /D "NDEBUG" /D "_WINDOWS" /D "ZEND_WIN32" /D "PHP_WIN32" /YX /FD /c\r
+# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32\r
+# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32\r
+# ADD BASE RSC /l 0x407 /d "NDEBUG"\r
+# ADD RSC /l 0x407 /d "NDEBUG"\r
+BSC32=bscmake.exe\r
+# ADD BASE BSC32 /nologo\r
+# ADD BSC32 /nologo\r
+LINK32=link.exe\r
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386\r
+# ADD LINK32 php4ts.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 /out:"..\..\Release_TS\php_unicode.dll" /libpath:"..\..\Release_TS" /libpath:"..\..\Release_TS_Inline"\r
+\r
+!ELSEIF "$(CFG)" == "unicode - Win32 Debug_TS"\r
+\r
+# PROP BASE Use_MFC 0\r
+# PROP BASE Use_Debug_Libraries 1\r
+# PROP BASE Output_Dir "Debug_TS"\r
+# PROP BASE Intermediate_Dir "Debug_TS"\r
+# PROP BASE Target_Dir ""\r
+# PROP Use_MFC 0\r
+# PROP Use_Debug_Libraries 1\r
+# PROP Output_Dir "Debug_TS"\r
+# PROP Intermediate_Dir "Debug_TS"\r
+# PROP Ignore_Export_Lib 0\r
+# PROP Target_Dir ""\r
+# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "UNICODE_EXPORTS" /YX /FD /GZ /c\r
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\.." /I "..\..\Zend" /I "..\..\TSRM" /I "..\..\main" /D ZEND_DEBUG=1 /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "PHP_EXPORTS" /D "COMPILE_DL_UNICODE" /D ZTS=1 /D "ZEND_WIN32" /D "PHP_WIN32" /D HAVE_UNICODE=1 /YX /FD /GZ /c\r
+# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32\r
+# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32\r
+# ADD BASE RSC /l 0x407 /d "_DEBUG"\r
+# ADD RSC /l 0x407 /d "_DEBUG"\r
+BSC32=bscmake.exe\r
+# ADD BASE BSC32 /nologo\r
+# ADD BSC32 /nologo\r
+LINK32=link.exe\r
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept\r
+# ADD LINK32 php4ts_debug.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /out:"..\..\Debug_TS\php_unicode.dll" /pdbtype:sept /libpath:"..\..\Debug_TS"\r
+\r
+!ENDIF \r
+\r
+# Begin Target\r
+\r
+# Name "unicode - Win32 Release_TS"\r
+# Name "unicode - Win32 Debug_TS"\r
+\r
+# Begin Group "Source Files"\r
+\r
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"\r
+\r
+# Begin Source File\r
+\r
+SOURCE=.\unicode.c\r
+# End Source File\r
+\r
+# End Group\r
+\r
+# Begin Group "Header Files"\r
+\r
+# PROP Default_Filter "h;hpp;hxx;hm;inl"\r
+\r
+# Begin Source File\r
+\r
+SOURCE=.\php_unicode.h\r
+# End Source File\r
+# End Group\r
+# End Target\r
+# End Project\r
--- /dev/null
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.0 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_0.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Sara Golemon (pollita@php.net) |
+ +----------------------------------------------------------------------+
+*/
+
+/* $Id$ */
+
+
+#include "php.h"
+#include <unicode/ucnv.h>
+
+/* {{{ data structure */
+typedef struct _php_unicode_filter_data {
+ char is_persistent;
+ UConverter *conv;
+
+ char to_unicode;
+} php_unicode_filter_data;
+/* }}} */
+
+/* {{{ unicode.* filter implementation */
+
+/* unicode.to.* -- Expects String -- Returns Unicode */
+static php_stream_filter_status_t php_unicode_to_string_filter(
+ php_stream *stream,
+ php_stream_filter *thisfilter,
+ php_stream_bucket_brigade *buckets_in,
+ php_stream_bucket_brigade *buckets_out,
+ size_t *bytes_consumed,
+ int flags
+ TSRMLS_DC)
+{
+ php_unicode_filter_data *data;
+ php_stream_filter_status_t exit_status = PSFS_FEED_ME;
+ size_t consumed = 0;
+
+ if (!thisfilter || !thisfilter->abstract) {
+ /* Should never happen */
+ return PSFS_ERR_FATAL;
+ }
+
+ data = (php_unicode_filter_data *)(thisfilter->abstract);
+ while (buckets_in->head) {
+ php_stream_bucket *bucket = buckets_in->head;
+ UChar *src = bucket->buf.ustr.val;
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ if (!bucket->is_unicode) {
+ /* Already ASCII, can't really do anything with it */
+ consumed += bucket->buf.str.len;
+ php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ exit_status = PSFS_PASS_ON;
+ continue;
+ }
+
+ while (src < (bucket->buf.ustr.val + bucket->buf.ustr.len)) {
+ int remaining = bucket->buf.ustr.len - (src - bucket->buf.ustr.val);
+ char *destp, *destbuf;
+ int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
+ UErrorCode errCode = U_ZERO_ERROR;
+ php_stream_bucket *new_bucket;
+
+ destp = destbuf = (char *)pemalloc(destlen, data->is_persistent);
+
+ ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode);
+ new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
+ php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
+ exit_status = PSFS_PASS_ON;
+ }
+ consumed += UBYTES(bucket->buf.ustr.len);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+
+ if (flags & PSFS_FLAG_FLUSH_CLOSE) {
+ UErrorCode errCode = U_ZERO_ERROR;
+ char d[64], *dest = d, *destp = d + 64;
+ /* Spit it out! */
+
+ ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
+ if (dest > d) {
+ php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC);
+ php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ exit_status = PSFS_PASS_ON;
+ }
+ }
+
+ if (bytes_consumed) {
+ *bytes_consumed = consumed;
+ }
+
+ return exit_status;
+}
+
+/* unicode.from.* -- Expects Unicode -- Returns String */
+static php_stream_filter_status_t php_unicode_from_string_filter(
+ php_stream *stream,
+ php_stream_filter *thisfilter,
+ php_stream_bucket_brigade *buckets_in,
+ php_stream_bucket_brigade *buckets_out,
+ size_t *bytes_consumed,
+ int flags
+ TSRMLS_DC)
+{
+ php_unicode_filter_data *data;
+ php_stream_filter_status_t exit_status = PSFS_FEED_ME;
+ size_t consumed = 0;
+
+ if (!thisfilter || !thisfilter->abstract) {
+ /* Should never happen */
+ return PSFS_ERR_FATAL;
+ }
+
+ data = (php_unicode_filter_data *)(thisfilter->abstract);
+ while (buckets_in->head) {
+ php_stream_bucket *bucket = buckets_in->head;
+ char *src = bucket->buf.str.val;
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ if (bucket->is_unicode) {
+ /* already in unicode, nothing to do */
+ consumed += UBYTES(bucket->buf.ustr.len);
+ php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ exit_status = PSFS_PASS_ON;
+ continue;
+ }
+
+ while (src < (bucket->buf.str.val + bucket->buf.str.len)) {
+ int remaining = bucket->buf.str.len - (src - bucket->buf.str.val);
+ UChar *destp, *destbuf;
+ int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
+ UErrorCode errCode = U_ZERO_ERROR;
+ php_stream_bucket *new_bucket;
+
+ destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent);
+
+ ucnv_toUnicode(data->conv, &destp, destbuf + destlen, (const char**)&src, src + remaining, NULL, FALSE, &errCode);
+
+ new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
+ php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
+ exit_status = PSFS_PASS_ON;
+ }
+ consumed += bucket->buf.str.len;
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+
+ if (flags & PSFS_FLAG_FLUSH_CLOSE) {
+ UErrorCode errCode = U_ZERO_ERROR;
+ UChar d[64], *dest = d, *destp = d + 64;
+ /* Spit it out! */
+
+ ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
+ if (dest > d) {
+ php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC);
+ php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ exit_status = PSFS_PASS_ON;
+ }
+ }
+
+ if (bytes_consumed) {
+ *bytes_consumed = consumed;
+ }
+
+ return exit_status;
+}
+
+/* unicode.tidy.* -- Expects anything -- Returns whatever is preferred by subsequent filters
+ Can be used to "magically" fix-up bucket messes */
+static php_stream_filter_status_t php_unicode_tidy_filter(
+ php_stream *stream,
+ php_stream_filter *thisfilter,
+ php_stream_bucket_brigade *buckets_in,
+ php_stream_bucket_brigade *buckets_out,
+ size_t *bytes_consumed,
+ int flags
+ TSRMLS_DC)
+{
+ php_unicode_filter_data *data;
+ int prefer_unicode = php_stream_filter_output_prefer_unicode(thisfilter);
+
+ if (!thisfilter || !thisfilter->abstract) {
+ /* Should never happen */
+ return PSFS_ERR_FATAL;
+ }
+
+ data = (php_unicode_filter_data *)(thisfilter->abstract);
+
+ if (prefer_unicode) {
+ if (!data->to_unicode) {
+ ucnv_resetToUnicode(data->conv);
+ data->to_unicode = prefer_unicode;
+ }
+ return php_unicode_from_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
+ } else {
+ if (data->to_unicode) {
+ ucnv_resetFromUnicode(data->conv);
+ data->to_unicode = prefer_unicode;
+ }
+ return php_unicode_to_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
+ }
+}
+
+static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC)
+{
+ if (thisfilter && thisfilter->abstract) {
+ php_unicode_filter_data *data = (php_unicode_filter_data *)thisfilter->abstract;
+ ucnv_close(data->conv);
+ pefree(data, data->is_persistent);
+ }
+}
+
+static php_stream_filter_ops php_unicode_to_string_filter_ops = {
+ php_unicode_to_string_filter,
+ php_unicode_filter_dtor,
+ "unicode.to.*",
+ PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING
+};
+
+static php_stream_filter_ops php_unicode_from_string_filter_ops = {
+ php_unicode_from_string_filter,
+ php_unicode_filter_dtor,
+ "unicode.from.*",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE
+};
+
+static php_stream_filter_ops php_unicode_tidy_filter_ops = {
+ php_unicode_tidy_filter,
+ php_unicode_filter_dtor,
+ "unicode.tidy.*",
+ PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY
+};
+/* }}} */
+
+
+/* {{{ unicode.* factory */
+
+static php_stream_filter *php_unicode_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
+{
+ php_unicode_filter_data *data;
+ const char *charset, *direction;
+ php_stream_filter_ops *fops;
+ UErrorCode ucnvError = U_ZERO_ERROR;
+ char to_unicode = 0;
+
+ if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) {
+ /* Never happens */
+ return NULL;
+ }
+
+ direction = filtername + sizeof("unicode.") - 1;
+ if (strncmp(direction, "to.", sizeof("to.") - 1) == 0) {
+ fops = &php_unicode_to_string_filter_ops;
+ charset = direction + sizeof("to.") - 1;
+ } else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) {
+ fops = &php_unicode_from_string_filter_ops;
+ to_unicode = 1;
+ charset = direction + sizeof("from.") - 1;
+ } else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) {
+ fops = &php_unicode_tidy_filter_ops;
+ charset = direction + sizeof("tidy.") - 1;
+ } else if (strcmp(direction, "tidy") == 0) {
+ fops = &php_unicode_tidy_filter_ops;
+ charset = "utf8";
+ } else {
+ /* Shouldn't happen */
+ return NULL;
+ }
+
+ /* Create this filter */
+ data = (php_unicode_filter_data *)pecalloc(1, sizeof(php_unicode_filter_data), persistent);
+ if (!data) {
+ php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed allocating %d bytes.", sizeof(php_unicode_filter_data));
+ return NULL;
+ }
+
+ data->conv = ucnv_open(charset, &ucnvError);
+ data->to_unicode = to_unicode;
+ if (!data->conv) {
+ char *reason = "Unknown Error";
+ pefree(data, persistent);
+ switch (ucnvError) {
+ case U_MEMORY_ALLOCATION_ERROR:
+ reason = "unable to allocate memory";
+ break;
+ case U_FILE_ACCESS_ERROR:
+ reason = "file access error";
+ break;
+ default:
+ ;
+ }
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to open charset converter, %s", reason);
+ return NULL;
+ }
+
+ return php_stream_filter_alloc(fops, data, persistent);
+}
+
+php_stream_filter_factory php_unicode_filter_factory = {
+ php_unicode_filter_create
+};
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: sw=4 ts=4 fdm=marker
+ * vim<600: sw=4 ts=4
+ */
+
zend_hash_index_find(Z_ARRVAL_P(handler), 1, (void **) &method) == SUCCESS &&
Z_TYPE_PP(obj) == IS_OBJECT &&
Z_TYPE_PP(method) == IS_STRING) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %s::%s()", Z_OBJCE_PP(obj)->name, Z_STRVAL_PP(method));
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %v::%R()", Z_OBJCE_PP(obj)->name, Z_TYPE_PP(method), Z_UNIVAL_PP(method));
} else
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler");
}
while (buckets_in->head) {
size_t bin = 0, desired;
+ bucket = buckets_in->head;
+
+ if (bucket->is_unicode) {
+ /* inflation not allowed for unicode data */
+ return PSFS_ERR_FATAL;
+ }
+
bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
- while (bin < bucket->buflen) {
- desired = bucket->buflen - bin;
+ while (bin < bucket->buf.str.len) {
+ desired = bucket->buf.str.len - bin;
if (desired > data->inbuf_len) {
desired = data->inbuf_len;
}
- memcpy(data->strm.next_in, bucket->buf + bin, desired);
+ memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired);
data->strm.avail_in = desired;
status = inflate(&(data->strm), flags & PSFS_FLAG_FLUSH_CLOSE ? Z_FINISH : Z_SYNC_FLUSH);
desired -= data->strm.avail_in; /* desired becomes what we consumed this round through */
data->strm.next_in = data->inbuf;
data->strm.avail_in = 0;
- consumed += desired;
bin += desired;
if (data->strm.avail_out < data->outbuf_len) {
exit_status = PSFS_PASS_ON;
}
}
+ consumed += bucket->buf.str.len;
php_stream_bucket_delref(bucket TSRMLS_CC);
}
static php_stream_filter_ops php_zlib_inflate_ops = {
php_zlib_inflate_filter,
php_zlib_inflate_dtor,
- "zlib.inflate"
+ "zlib.inflate",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
/* }}} */
while (buckets_in->head) {
size_t bin = 0, desired;
- bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
+ bucket = buckets_in->head;
- while (bin < bucket->buflen) {
- desired = bucket->buflen - bin;
+ if (bucket->is_unicode) {
+ /* inflation not allowed for unicode data */
+ return PSFS_ERR_FATAL;
+ }
+
+ bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC);
+
+ while (bin < bucket->buf.str.len) {
+ desired = bucket->buf.str.len - bin;
if (desired > data->inbuf_len) {
desired = data->inbuf_len;
}
- memcpy(data->strm.next_in, bucket->buf + bin, desired);
+ memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired);
data->strm.avail_in = desired;
status = deflate(&(data->strm), flags & PSFS_FLAG_FLUSH_CLOSE ? Z_FULL_FLUSH : (flags & PSFS_FLAG_FLUSH_INC ? Z_SYNC_FLUSH : Z_NO_FLUSH));
desired -= data->strm.avail_in; /* desired becomes what we consumed this round through */
data->strm.next_in = data->inbuf;
data->strm.avail_in = 0;
- consumed += desired;
bin += desired;
if (data->strm.avail_out < data->outbuf_len) {
exit_status = PSFS_PASS_ON;
}
}
+ consumed += bucket->buf.str.len;
php_stream_bucket_delref(bucket TSRMLS_CC);
}
if (bytes_consumed) {
*bytes_consumed = consumed;
}
+
return exit_status;
}
static php_stream_filter_ops php_zlib_deflate_ops = {
php_zlib_deflate_filter,
php_zlib_deflate_dtor,
- "zlib.deflate"
+ "zlib.deflate",
+ PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING
};
/* }}} */
}
+/* {{{ sapi_update_default_charset */
+SAPI_API void sapi_update_default_charset(TSRMLS_D)
+{
+ if (UG(unicode)) {
+ const char *canonical_name = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+
+ canonical_name = ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status);
+ SG(default_charset) = (char *)ucnv_getStandardName(canonical_name, "MIME", &status);
+ } else {
+ SG(default_charset) = zend_ini_string("default_charset", sizeof("default_charset"), 0);
+ }
+}
+/* }}} */
+
+
SAPI_API char *sapi_get_default_content_type(TSRMLS_D)
{
char *mimetype, *charset, *content_type;
mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
+ /*
+ * Apache SAPI may invoke this function directly, before php_request_startup() is
+ * called, so we need to update the default charset explicitly.
+ */
+ sapi_update_default_charset(TSRMLS_C);
charset = SG(default_charset) ? SG(default_charset) : SAPI_DEFAULT_CHARSET;
if (strncasecmp(mimetype, "text/", 5) == 0 && *charset) {
{
char *charset, *newtype;
size_t newlen;
+
charset = SG(default_charset) ? SG(default_charset) : SAPI_DEFAULT_CHARSET;
if (*mimetype != NULL) {
SAPI_API char *sapi_get_default_content_type(TSRMLS_D);
SAPI_API void sapi_get_default_content_type_header(sapi_header_struct *default_header TSRMLS_DC);
SAPI_API size_t sapi_apply_default_charset(char **mimetype, size_t len TSRMLS_DC);
+SAPI_API void sapi_update_default_charset(TSRMLS_D);
SAPI_API void sapi_activate_headers_only(TSRMLS_D);
SAPI_API int sapi_get_fd(int *fd TSRMLS_DC);
zend_set_timeout(EG(timeout_seconds));
return SUCCESS;
}
+
+static ZEND_INI_MH(OnUpdateOutputEncoding)
+{
+ if (new_value) {
+ if (zend_set_converter_encoding(&UG(output_encoding_conv), new_value) == FAILURE) {
+ zend_error(E_CORE_ERROR, "Unrecognized encoding '%s' used for %s", new_value ? new_value : "null", entry->name);
+ return FAILURE;
+ }
+ } else {
+ if (UG(output_encoding_conv)) {
+ ucnv_close(UG(output_encoding_conv));
+ }
+ UG(output_encoding_conv) = NULL;
+ }
+ if (UG(output_encoding_conv)) {
+ zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode));
+ zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len));
+ if (stage == ZEND_INI_STAGE_RUNTIME) {
+ sapi_update_default_charset(TSRMLS_C);
+ }
+ }
+
+ return SUCCESS;
+}
/* }}} */
/* Need to convert to strings and make use of:
STD_PHP_INI_ENTRY("doc_root", NULL, PHP_INI_SYSTEM, OnUpdateStringUnempty, doc_root, php_core_globals, core_globals)
STD_PHP_INI_ENTRY("default_charset", SAPI_DEFAULT_CHARSET, PHP_INI_ALL, OnUpdateString, default_charset, sapi_globals_struct,sapi_globals)
STD_PHP_INI_ENTRY("default_mimetype", SAPI_DEFAULT_MIMETYPE, PHP_INI_ALL, OnUpdateString, default_mimetype, sapi_globals_struct,sapi_globals)
+ ZEND_INI_ENTRY("unicode.output_encoding", NULL, ZEND_INI_ALL, OnUpdateOutputEncoding)
STD_PHP_INI_ENTRY("error_log", NULL, PHP_INI_ALL, OnUpdateString, error_log, php_core_globals, core_globals)
STD_PHP_INI_ENTRY("extension_dir", PHP_EXTENSION_DIR, PHP_INI_SYSTEM, OnUpdateStringUnempty, extension_dir, php_core_globals, core_globals)
STD_PHP_INI_ENTRY("include_path", PHP_INCLUDE_PATH, PHP_INI_ALL, OnUpdateStringUnempty, include_path, php_core_globals, core_globals)
int buffer_len = 0;
char *space;
char *class_name = get_active_class_name(&space TSRMLS_CC);
- char *function;
+ char *function = NULL;
char *origin;
char *message;
- int is_function = 0;
+ char *stage;
/* get error text into buffer and escape for html if necessary */
buffer_len = vspprintf(&buffer, 0, format, args);
/* which function caused the problem if any at all */
if (php_during_module_startup()) {
- function = "PHP Startup";
+ stage = "PHP Startup";
} else if (php_during_module_shutdown()) {
- function = "PHP Shutdown";
+ stage = "PHP Shutdown";
} else {
function = get_active_function_name(TSRMLS_C);
- if (!function || !strlen(function)) {
- function = "Unknown";
- } else {
- is_function = 1;
+ if (function && !USTR_LEN(function)) {
+ stage = "Unknown";
+ function = NULL;
}
}
/* if we still have memory then format the origin */
- if (is_function) {
- spprintf(&origin, 0, "%s%s%s(%s)", class_name, space, function, params);
+ if (function) {
+ spprintf(&origin, 0, "%v%s%v(%s)", class_name, space, function, params);
} else {
- spprintf(&origin, 0, "%s", function);
+ spprintf(&origin, 0, "%v", stage);
}
/* origin and buffer available, so lets come up with the error message */
}
/* no docref given but function is known (the default) */
- if (!docref && is_function) {
- spprintf(&docref_buf, 0, "function.%s", function);
+ if (!docref && function) {
+ spprintf(&docref_buf, 0, "function.%v", function);
while((p = strchr(docref_buf, '_')) != NULL) {
*p = '-';
}
* - we show erroes in html mode OR
* - the user wants to see the links anyway
*/
- if (docref && is_function && (PG(html_errors) || strlen(PG(docref_root)))) {
+ if (docref && function && (PG(html_errors) || strlen(PG(docref_root)))) {
if (strncmp(docref, "http://", 7)) {
/* We don't have 'http://' so we use docref_root */
/* We turn this off in php_execute_script() */
/* PG(during_request_startup) = 0; */
+ sapi_update_default_charset(TSRMLS_C);
+
php_hash_environment(TSRMLS_C);
zend_activate_modules(TSRMLS_C);
PG(modules_activated)=1;
zend_utility_values zuv;
int module_number=0; /* for REGISTER_INI_ENTRIES() */
char *php_os;
+ zend_bool orig_unicode;
#ifdef ZTS
zend_executor_globals *executor_globals;
void ***tsrm_ls;
REGISTER_INI_ENTRIES();
zend_register_standard_ini_entries(TSRMLS_C);
+ orig_unicode = UG(unicode);
+ UG(unicode) = 0;
+
/* Disable realpath cache if safe_mode or open_basedir are set */
if (PG(safe_mode) || (PG(open_basedir) && *PG(open_basedir))) {
CWDG(realpath_cache_size_limit) = 0;
/* start Zend extensions */
zend_startup_extensions();
-#ifdef ZTS
+ UG(unicode) = orig_unicode;
zend_post_startup(TSRMLS_C);
-#endif
module_initialized = 1;
sapi_deactivate(TSRMLS_C);
}
}
} else if (output_handler && output_handler->type == IS_OBJECT) {
- php_error_docref(NULL TSRMLS_CC, E_ERROR, "No method name given: use ob_start(array($object,'method')) to specify instance $object and the name of a method of class %s to use as output handler", Z_OBJCE_P(output_handler)->name);
+ php_error_docref(NULL TSRMLS_CC, E_ERROR, "No method name given: use ob_start(array($object,'method')) to specify instance $object and the name of a method of class %v to use as output handler", Z_OBJCE_P(output_handler)->name);
result = FAILURE;
} else {
result = php_ob_init_named(initial_size, block_size, OB_DEFAULT_HANDLER_NAME, NULL, chunk_size, erase TSRMLS_CC);
BEGIN_EXTERN_C()
void phperror(char *error);
PHPAPI int php_write(void *buf, uint size TSRMLS_DC);
-PHPAPI int php_printf(const char *format, ...) PHP_ATTRIBUTE_FORMAT(printf, 1,
- 2);
+PHPAPI int php_printf(const char *format, ...);
PHPAPI void php_log_err(char *log_message TSRMLS_DC);
-int Debug(char *format, ...) PHP_ATTRIBUTE_FORMAT(printf, 1, 2);
+int Debug(char *format, ...);
int cfgparse(void);
END_EXTERN_C()
PHPAPI void php_set_error_handling(error_handling_t error_handling, zend_class_entry *exception_class TSRMLS_DC);
#define php_std_error_handling() php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC)
-PHPAPI void php_verror(const char *docref, const char *params, int type, const char *format, va_list args TSRMLS_DC) PHP_ATTRIBUTE_FORMAT(printf, 4, 0);
+PHPAPI void php_verror(const char *docref, const char *params, int type, const char *format, va_list args TSRMLS_DC);
#ifdef ZTS
#define PHP_ATTR_FMT_OFFSET 1
#endif
/* PHPAPI void php_error(int type, const char *format, ...); */
-PHPAPI void php_error_docref0(const char *docref TSRMLS_DC, int type, const char *format, ...)
- PHP_ATTRIBUTE_FORMAT(printf, PHP_ATTR_FMT_OFFSET + 3, PHP_ATTR_FMT_OFFSET + 4);
-PHPAPI void php_error_docref1(const char *docref TSRMLS_DC, const char *param1, int type, const char *format, ...)
- PHP_ATTRIBUTE_FORMAT(printf, PHP_ATTR_FMT_OFFSET + 4, PHP_ATTR_FMT_OFFSET + 5);
-PHPAPI void php_error_docref2(const char *docref TSRMLS_DC, const char *param1, const char *param2, int type, const char *format, ...)
- PHP_ATTRIBUTE_FORMAT(printf, PHP_ATTR_FMT_OFFSET + 5, PHP_ATTR_FMT_OFFSET + 6);
+PHPAPI void php_error_docref0(const char *docref TSRMLS_DC, int type, const char *format, ...);
+PHPAPI void php_error_docref1(const char *docref TSRMLS_DC, const char *param1, int type, const char *format, ...);
+PHPAPI void php_error_docref2(const char *docref TSRMLS_DC, const char *param1, const char *param2, int type, const char *format, ...);
END_EXTERN_C()
#define php_error_docref php_error_docref0
/* buffer */
off_t position; /* of underlying stream */
- unsigned char *readbuf;
- size_t readbuflen;
- off_t readpos;
- off_t writepos;
+
+ php_stream_bucket_brigade readbuf;
+ off_t readbuf_ofs, readbuf_avail;
/* how much data to read when filling buffer */
size_t chunk_size;
PHPAPI off_t _php_stream_tell(php_stream *stream TSRMLS_DC);
#define php_stream_tell(stream) _php_stream_tell((stream) TSRMLS_CC)
+/* Convert using runtime_encoding if necessary -- return string */
PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t count TSRMLS_DC);
#define php_stream_read(stream, buf, count) _php_stream_read((stream), (buf), (count) TSRMLS_CC)
+/* Convert using runtime_encoding if necessary -- return unicode */
+PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int32_t size TSRMLS_DC);
+#define php_stream_read_unicode(stream, buf, size) _php_stream_read_unicode((stream), (buf), (size) TSRMLS_CC)
+
+/* Read count data points (char or UChar) until buffered type changes -- return single type without conversion */
+PHPAPI void *_php_stream_u_read(php_stream *stream, void *buf, int32_t *pnum_bytes, int32_t *pnum_chars, int *pis_unicode TSRMLS_DC);
+#define php_stream_u_read(stream, buf, pnum_bytes, pnum_chars, pis_unicode) \
+ _php_stream_u_read((stream), (buf), (pnum_bytes), (pnum_chars), (pis_unicode) TSRMLS_CC)
+
PHPAPI size_t _php_stream_write(php_stream *stream, const char *buf, size_t count TSRMLS_DC);
#define php_stream_write_string(stream, str) _php_stream_write(stream, str, strlen(str) TSRMLS_CC)
#define php_stream_write(stream, buf, count) _php_stream_write(stream, (buf), (count) TSRMLS_CC)
+PHPAPI size_t _php_stream_u_write(php_stream *stream, const UChar *buf, int32_t count TSRMLS_DC);
+#define php_stream_u_write(stream, buf, count) _php_stream_u_write((stream), (buf), (count) TSRMLS_CC)
+
PHPAPI size_t _php_stream_printf(php_stream *stream TSRMLS_DC, const char *fmt, ...);
/* php_stream_printf macro & function require TSRMLS_CC */
#define php_stream_printf _php_stream_printf
PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC);
#define php_stream_flush(stream) _php_stream_flush((stream), 0 TSRMLS_CC)
+PHPAPI void _php_stream_flush_readbuf(php_stream *stream TSRMLS_DC);
+#define php_stream_flush_readbuf(stream) _php_stream_flush_readbuf((stream) TSRMLS_CC)
+
PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, size_t *returned_len TSRMLS_DC);
#define php_stream_gets(stream, buf, maxlen) _php_stream_get_line((stream), (buf), (maxlen), NULL TSRMLS_CC)
-
#define php_stream_get_line(stream, buf, maxlen, retlen) _php_stream_get_line((stream), (buf), (maxlen), (retlen) TSRMLS_CC)
PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC);
+PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_u16, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC);
+#define php_stream_u_get_line(stream, buf, max_u16, max_chars, is_unicode) _php_stream_u_get_line((stream), (buf), (max_u16), (max_chars), (is_unicode) TSRMLS_CC)
+
/* CAREFUL! this is equivalent to puts NOT fputs! */
PHPAPI int _php_stream_puts(php_stream *stream, char *buf TSRMLS_DC);
#define php_stream_puts(stream, buf) _php_stream_puts((stream), (buf) TSRMLS_CC)
+PHPAPI int _php_stream_will_read_unicode(php_stream *stream TSRMLS_DC);
+#define php_stream_will_read_unicode(stream) _php_stream_will_read_unicode((stream) TSRMLS_CC);
+
PHPAPI int _php_stream_stat(php_stream *stream, php_stream_statbuf *ssb TSRMLS_DC);
#define php_stream_stat(stream, ssb) _php_stream_stat((stream), (ssb) TSRMLS_CC)
php_register_variable_ex(var, &new_entry, track_vars_array TSRMLS_CC);
}
+PHPAPI void php_u_register_variable_safe(UChar *var, UChar *strval, int32_t str_len, zval *track_vars_array TSRMLS_DC)
+{
+ zval new_entry;
+ assert(strval != NULL);
+
+ /* Prepare value */
+ Z_USTRLEN(new_entry) = str_len;
+ if (PG(magic_quotes_gpc)) {
+ /* UTODO implement php_u_addslashes() */
+ //Z_USTRVAL(new_entry) = php_addslashes(strval, Z_USTRLEN(new_entry), &Z_USTRLEN(new_entry), 0 TSRMLS_CC);
+ Z_USTRVAL(new_entry) = eustrndup(strval, Z_USTRLEN(new_entry));
+ } else {
+ Z_USTRVAL(new_entry) = eustrndup(strval, Z_USTRLEN(new_entry));
+ }
+ Z_TYPE(new_entry) = IS_UNICODE;
+
+ php_u_register_variable_ex(var, &new_entry, track_vars_array TSRMLS_CC);
+}
+
PHPAPI void php_register_variable_ex(char *var, zval *val, zval *track_vars_array TSRMLS_DC)
{
char *p = NULL;
}
}
+PHPAPI void php_u_register_variable_ex(UChar *var, zval *val, pval *track_vars_array TSRMLS_DC)
+{
+ UChar *p = NULL;
+ UChar *ip; /* index pointer */
+ UChar *index;
+ int32_t var_len, index_len;
+ zval *gpc_element, **gpc_element_p;
+ zend_bool is_array;
+ HashTable *symtable1=NULL;
+
+ assert(var != NULL);
+
+ if (track_vars_array) {
+ symtable1 = Z_ARRVAL_P(track_vars_array);
+ } else if (PG(register_globals)) {
+ symtable1 = EG(active_symbol_table);
+ }
+ if (!symtable1) {
+ /* Nothing to do */
+ zval_dtor(val);
+ return;
+ }
+
+ /*
+ * Prepare variable name
+ */
+ ip = u_strchr(var, 0x5b /*'['*/);
+ if (ip) {
+ is_array = 1;
+ *ip = 0;
+ } else {
+ is_array = 0;
+ }
+ /* ignore leading spaces in the variable name */
+ while (*var && *var==0x20 /*' '*/) {
+ var++;
+ }
+ var_len = u_strlen(var);
+ if (var_len==0) { /* empty variable name, or variable name with a space in it */
+ zval_dtor(val);
+ return;
+ }
+ /* ensure that we don't have spaces or dots in the variable name (not binary safe) */
+ for (p=var; *p; p++) {
+ switch(*p) {
+ case 0x20: /*' '*/
+ case 0x2e: /*'.'*/
+ *p=0x5f; /*'_'*/
+ break;
+ }
+ }
+
+ index = var;
+ index_len = var_len;
+
+ while (1) {
+ if (is_array) {
+ UChar *escaped_index = NULL, *index_s;
+ int32_t new_idx_len = 0;
+
+ ip++;
+ index_s = ip;
+ if (u_isspace(*ip)) {
+ ip++;
+ }
+ if (*ip==0x5d /*']'*/) {
+ index_s = NULL;
+ } else {
+ ip = u_strchr(ip, 0x5d /*']'*/);
+ if (!ip) {
+ /* PHP variables cannot contain '[' in their names, so we replace the character with a '_' */
+ *(index_s - 1) = 0x5f; /*'_'*/
+
+ index_len = var_len = 0;
+ if (index) {
+ index_len = var_len = u_strlen(index);
+ }
+ goto plain_var;
+ return;
+ }
+ *ip = 0;
+ new_idx_len = u_strlen(index_s);
+ }
+
+ if (!index) {
+ MAKE_STD_ZVAL(gpc_element);
+ array_init(gpc_element);
+ zend_hash_next_index_insert(symtable1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p);
+ } else {
+ if (PG(magic_quotes_gpc) && (index!=var)) {
+ /* UTODO fix for magic_quotes_gpc case */
+ /* no need to addslashes() the index if it's the main variable name */
+ //escaped_index = php_addslashes(index, index_len, &index_len, 0 TSRMLS_CC);
+ escaped_index = index;
+ } else {
+ escaped_index = index;
+ }
+ if (zend_u_symtable_find(symtable1, IS_UNICODE, escaped_index, index_len+1, (void **) &gpc_element_p)==FAILURE
+ || Z_TYPE_PP(gpc_element_p) != IS_ARRAY) {
+ MAKE_STD_ZVAL(gpc_element);
+ array_init(gpc_element);
+ zend_u_symtable_update(symtable1, IS_UNICODE, escaped_index, index_len+1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p);
+ }
+ if (index!=escaped_index) {
+ efree(escaped_index);
+ }
+ }
+ symtable1 = Z_ARRVAL_PP(gpc_element_p);
+ /* ip pointed to the '[' character, now obtain the key */
+ index = index_s;
+ index_len = new_idx_len;
+
+ ip++;
+ if (*ip==0x5b /*'['*/) {
+ is_array = 1;
+ *ip = 0;
+ } else {
+ is_array = 0;
+ }
+ } else {
+plain_var:
+ MAKE_STD_ZVAL(gpc_element);
+ gpc_element->value = val->value;
+ Z_TYPE_P(gpc_element) = Z_TYPE_P(val);
+ if (!index) {
+ zend_hash_next_index_insert(symtable1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p);
+ } else {
+ /* UTODO fix for php_addslashes case */
+ //char *escaped_index = php_addslashes(index, index_len, &index_len, 0 TSRMLS_CC);
+ UChar *escaped_index = index;
+ zend_u_symtable_update(symtable1, IS_UNICODE, escaped_index, index_len+1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p);
+ //efree(escaped_index);
+ }
+ break;
+ }
+ }
+}
+
SAPI_API SAPI_POST_HANDLER_FUNC(php_std_post_handler)
{
char *var, *val;
char *strtok_buf = NULL;
zval *array_ptr = (zval *) arg;
+ UConverter *input_conv = UG(http_input_encoding_conv);
if (SG(request_info).post_data == NULL) {
return;
}
+ if (!input_conv) {
+ input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv));
+ }
+
var = php_strtok_r(SG(request_info).post_data, "&", &strtok_buf);
while (var) {
val = strchr(var, '=');
if (val) { /* have a value */
- unsigned int val_len, new_val_len;
-
- *val++ = '\0';
- php_url_decode(var, strlen(var));
- val_len = php_url_decode(val, strlen(val));
- val = estrndup(val, val_len);
- if (sapi_module.input_filter(PARSE_POST, var, &val, val_len, &new_val_len TSRMLS_CC)) {
- php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ if (UG(unicode)) {
+ UChar *u_var, *u_val;
+ int32_t u_var_len, u_val_len;
+ int32_t var_len;
+ int32_t val_len;
+ UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
+
+ *val++ = '\0';
+ var_len = strlen(var);
+ php_url_decode(var, var_len);
+ val_len = php_url_decode(val, strlen(val));
+ zend_convert_to_unicode(input_conv, &u_var, &u_var_len, var, var_len, &status1);
+ zend_convert_to_unicode(input_conv, &u_val, &u_val_len, val, val_len, &status2);
+ if (U_SUCCESS(status1) && U_SUCCESS(status2)) {
+ /* UTODO add input filtering */
+ php_u_register_variable_safe(u_var, u_val, u_val_len, array_ptr TSRMLS_CC);
+ } else {
+ /* UTODO set a user-accessible flag to indicate that conversion failed? */
+ }
+ efree(u_var);
+ efree(u_val);
+ } else {
+ unsigned int val_len, new_val_len;
+
+ *val++ = '\0';
+ php_url_decode(var, strlen(var));
+ val_len = php_url_decode(val, strlen(val));
+ val = estrndup(val, val_len);
+ if (sapi_module.input_filter(PARSE_POST, var, &val, val_len, &new_val_len TSRMLS_CC)) {
+ php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ }
+ efree(val);
}
- efree(val);
}
var = php_strtok_r(NULL, "&", &strtok_buf);
}
zval *array_ptr;
int free_buffer = 0;
char *strtok_buf = NULL;
+ UConverter *input_conv = UG(http_input_encoding_conv);
switch (arg) {
case PARSE_POST:
break;
}
+ if (!input_conv) {
+ input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv));
+ }
+
var = php_strtok_r(res, separator, &strtok_buf);
while (var) {
- val = strchr(var, '=');
- if (val) { /* have a value */
- int val_len;
- unsigned int new_val_len;
+ int32_t var_len;
+ val = strchr(var, '=');
+ if (val) {
*val++ = '\0';
- php_url_decode(var, strlen(var));
- val_len = php_url_decode(val, strlen(val));
- val = estrndup(val, val_len);
- if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) {
- php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ }
+ var_len = strlen(var);
+ php_url_decode(var, var_len);
+
+ if (UG(unicode)) {
+ UChar *u_var, *u_val;
+ int32_t u_var_len, u_val_len;
+ UErrorCode status = U_ZERO_ERROR;
+
+ zend_convert_to_unicode(input_conv, &u_var, &u_var_len, var, var_len, &status);
+ if (U_FAILURE(status)) {
+ /* UTODO set a user-accessible flag to indicate that conversion failed? */
+ efree(u_var);
+ goto next_var;
+ }
+
+ if (val) { /* have a value */
+ int val_len;
+ unsigned int new_val_len;
+
+ val_len = php_url_decode(val, strlen(val));
+ zend_convert_to_unicode(input_conv, &u_val, &u_val_len, val, val_len, &status);
+ if (U_FAILURE(status)) {
+ /* UTODO set a user-accessible flag to indicate that conversion failed? */
+ efree(u_var);
+ efree(u_val);
+ goto next_var;
+ }
+ php_u_register_variable_safe(u_var, u_val, u_val_len, array_ptr TSRMLS_CC);
+ /* UTODO need to make input_filter Unicode aware */
+ /*
+ if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) {
+ php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ }
+ */
+ efree(u_var);
+ efree(u_val);
+ } else {
+ u_val_len = 0;
+ u_val = USTR_MAKE("");
+ php_u_register_variable_safe(u_var, u_val, u_val_len, array_ptr TSRMLS_CC);
+ /*
+ if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) {
+ php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ }
+ */
+ efree(u_val);
}
- efree(val);
} else {
- int val_len;
- unsigned int new_val_len;
-
- php_url_decode(var, strlen(var));
- val_len = 0;
- val = estrndup("", val_len);
- if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) {
- php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ if (val) { /* have a value */
+ int val_len;
+ unsigned int new_val_len;
+
+ *val++ = '\0';
+ val_len = php_url_decode(val, strlen(val));
+ val = estrndup(val, val_len);
+ if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) {
+ php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ }
+ efree(val);
+ } else {
+ int val_len;
+ unsigned int new_val_len;
+
+ val_len = 0;
+ val = estrndup("", val_len);
+ if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) {
+ php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
+ }
+ efree(val);
}
- efree(val);
}
+next_var:
var = php_strtok_r(NULL, separator, &strtok_buf);
}
/* binary-safe version */
PHPAPI void php_register_variable_safe(char *var, char *val, int val_len, pval *track_vars_array TSRMLS_DC);
PHPAPI void php_register_variable_ex(char *var, zval *val, pval *track_vars_array TSRMLS_DC);
+PHPAPI void php_u_register_variable_safe(UChar *var, UChar *strval, int32_t str_len, zval *track_vars_array TSRMLS_DC);
+PHPAPI void php_u_register_variable_ex(UChar *var, zval *val, pval *track_vars_array TSRMLS_DC);
int php_hash_environment(TSRMLS_D);
END_EXTERN_C()
}
+static void normalize_u_protected_variable(UChar *varname TSRMLS_DC)
+{
+ UChar *s=varname, *index=NULL, *indexend=NULL, *p;
+
+ /* overjump leading space */
+ while (*s == 0x20 /*' '*/) {
+ s++;
+ }
+
+ /* and remove it */
+ if (s != varname) {
+ u_memmove(varname, s, u_strlen(s)+1);
+ }
+
+ for (p=varname; *p && *p != 0x5b /*'['*/; p++) {
+ switch(*p) {
+ case 0x20: /*' '*/
+ case 0x2e: /*'.'*/
+ *p=0x5f; /*'_'*/
+ break;
+ }
+ }
+
+ /* find index */
+ index = u_strchr(varname, 0x5b /*'['*/);
+ if (index) {
+ index++;
+ s=index;
+ } else {
+ return;
+ }
+
+ /* done? */
+ while (index) {
+
+ while (*index == 0x20 /*' '*/ ||
+ *index == 0x0d /*'\r'*/ ||
+ *index == 0x0a /*'\n'*/ ||
+ *index == 0x09 /*'\t'*/) {
+ index++;
+ }
+ indexend = u_strchr(index, 0x5d /*']'*/);
+ indexend = indexend ? indexend + 1 : index + u_strlen(index);
+
+ if (s != index) {
+ u_memmove(s, index, u_strlen(index)+1);
+ s += indexend-index;
+ } else {
+ s = indexend;
+ }
+
+ if (*s == 0x5b /*'['*/) {
+ s++;
+ index = s;
+ } else {
+ index = NULL;
+ }
+ }
+ *s++ = 0;
+}
+
static void add_protected_variable(char *varname TSRMLS_DC)
{
int dummy=1;
}
+static zend_bool is_u_protected_variable(UChar *varname TSRMLS_DC)
+{
+ normalize_u_protected_variable(varname TSRMLS_CC);
+ return zend_u_hash_exists(&PG(rfc1867_protected_variables), IS_UNICODE, varname, u_strlen(varname)+1);
+}
+
+
static void safe_php_register_variable(char *var, char *strval, zval *track_vars_array, zend_bool override_protection TSRMLS_DC)
{
if (override_protection || !is_protected_variable(var TSRMLS_CC)) {
}
+static void safe_u_php_register_variable(UChar *var, UChar *str_val, int32_t str_len, zval *track_vars_array, zend_bool override_protection TSRMLS_DC)
+{
+ if (override_protection || !is_u_protected_variable(var TSRMLS_CC)) {
+ php_u_register_variable_safe(var, str_val, str_len, track_vars_array TSRMLS_CC);
+ }
+}
+
+
static void register_http_post_files_variable(char *strvar, char *val, zval *http_post_files, zend_bool override_protection TSRMLS_DC)
{
int register_globals = PG(register_globals);
}
+static inline UChar *php_ap_to_unicode(char *in, int32_t in_len, int32_t *out_len TSRMLS_DC)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UChar *buf;
+ int32_t buf_len = 0;
+ UConverter *input_conv = UG(http_input_encoding_conv);
+
+ if (!input_conv) {
+ input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv));
+ }
+
+ input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv));
+ zend_convert_to_unicode(input_conv, &buf, &buf_len, in, in_len, &status);
+ if (U_SUCCESS(status)) {
+ if (out_len)
+ *out_len = buf_len;
+ return buf;
+ } else {
+ efree(buf);
+ if (out_len)
+ *out_len = 0;
+ return NULL;
+ }
+}
+
+
/*
* Following code is based on apache_multipart_buffer.c from libapreq-0.33 package.
*
static int fill_buffer(multipart_buffer *self TSRMLS_DC)
{
int bytes_to_read, total_read = 0, actual_read = 0;
+ static zend_bool done = 0;
/* shift the existing data if necessary */
if (self->bytes_in_buffer > 0 && self->buf_begin != self->buffer) {
total_read += actual_read;
bytes_to_read -= actual_read;
} else {
+ if (!done) {
+ fprintf(stderr, "\n###################\n%s\n#################\n", self->buffer);
+ done = 1;
+ }
break;
}
}
}
+static UChar *php_u_ap_getword(UChar **line, UChar stop TSRMLS_DC)
+{
+ UChar *pos = *line, quote;
+ UChar *res;
+
+ while (*pos && *pos != stop) {
+
+ if ((quote = *pos) == '"' || quote == '\'') {
+ ++pos;
+ while (*pos && *pos != quote) {
+ if (*pos == '\\' && pos[1] && pos[1] == quote) {
+ pos += 2;
+ } else {
+ ++pos;
+ }
+ }
+ if (*pos) {
+ ++pos;
+ }
+ } else ++pos;
+
+ }
+ if (*pos == '\0') {
+ res = eustrdup(*line);
+ *line += u_strlen(*line);
+ return res;
+ }
+
+ res = eustrndup(*line, pos - *line);
+
+ while (*pos == stop) {
+ ++pos;
+ }
+
+ *line = pos;
+ return res;
+}
+
+
static char *php_ap_getword(char **line, char stop)
{
char *pos = *line, quote;
}
+static UChar *substring_u_conf(UChar *start, int32_t len, UChar quote TSRMLS_DC)
+{
+ UChar *result = eumalloc(len + 2);
+ UChar *resp = result;
+ int32_t i;
+
+ for (i = 0; i < len; ++i) {
+ if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
+ *resp++ = start[++i];
+ } else {
+ *resp++ = start[i];
+ }
+ }
+
+ *resp++ = 0;
+ return result;
+}
+
+
static char *substring_conf(char *start, int len, char quote TSRMLS_DC)
{
char *result = emalloc(len + 2);
}
+static UChar *php_u_ap_getword_conf(UChar **line TSRMLS_DC)
+{
+ UChar *str = *line, *strend, *res, quote;
+
+ while (*str && u_isspace(*str)) {
+ ++str;
+ }
+
+ if (!*str) {
+ *line = str;
+ return USTR_MAKE("");
+ }
+
+ if ((quote = *str) == '"' || quote == '\'') {
+ strend = str + 1;
+look_for_quote:
+ while (*strend && *strend != quote) {
+ if (*strend == '\\' && strend[1] && strend[1] == quote) {
+ strend += 2;
+ } else {
+ ++strend;
+ }
+ }
+ if (*strend && *strend == quote) {
+ UChar p = *(strend + 1);
+ if (p != '\r' && p != '\n' && p != '\0') {
+ strend++;
+ goto look_for_quote;
+ }
+ }
+
+ res = substring_u_conf(str + 1, strend - str - 1, quote TSRMLS_CC);
+
+ if (*strend == quote) {
+ ++strend;
+ }
+
+ } else {
+
+ strend = str;
+ while (*strend && !u_isspace(*strend)) {
+ ++strend;
+ }
+ res = substring_u_conf(str, strend - str, 0 TSRMLS_CC);
+ }
+
+ while (*strend && u_isspace(*strend)) {
+ ++strend;
+ }
+
+ *line = strend;
+ return res;
+}
+
+
static char *php_ap_getword_conf(char **line TSRMLS_DC)
{
char *str = *line, *strend, *res, quote;
return out;
}
-
-/*
- * The combined READER/HANDLER
- *
- */
-
-SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler)
+static SAPI_POST_HANDLER_FUNC(rfc1867_post_handler_unicode)
{
char *boundary, *s=NULL, *boundary_end = NULL, *start_arr=NULL, *array_index=NULL;
char *temp_filename=NULL, *lbuf=NULL, *abuf=NULL;
zend_bool magic_quotes_gpc;
multipart_buffer *mbuff;
zval *array_ptr = (zval *) arg;
- int fd=-1;
+ FILE *fp;
zend_llist header;
+ UConverter *input_conv = UG(http_input_encoding_conv);
+ U_STRING_DECL(name_key, "name", 4);
+ U_STRING_DECL(filename_key, "filename", 8);
+ U_STRING_DECL(maxfilesize_key, "MAX_FILE_SIZE", 13);
+ static zend_bool did_string_init = FALSE;
if (SG(request_info).content_length > SG(post_max_size)) {
sapi_module.sapi_error(E_WARNING, "POST Content-Length of %ld bytes exceeds the limit of %ld bytes", SG(request_info).content_length, SG(post_max_size));
}
/* Initialize $_FILES[] */
- zend_hash_init(&PG(rfc1867_protected_variables), 5, NULL, NULL, 0);
+ zend_u_hash_init(&PG(rfc1867_protected_variables), 5, NULL, NULL, 0, 1);
ALLOC_HASHTABLE(uploaded_files);
- zend_hash_init(uploaded_files, 5, NULL, (dtor_func_t) free_estring, 0);
+ zend_u_hash_init(uploaded_files, 5, NULL, (dtor_func_t) free_estring, 0, 1);
SG(rfc1867_uploaded_files) = uploaded_files;
ALLOC_ZVAL(http_post_files);
#endif
zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t) php_free_hdr_entry, 0);
+ if (!did_string_init) {
+ U_STRING_INIT(name_key, "name", 4);
+ U_STRING_INIT(filename_key, "filename", 8);
+ U_STRING_INIT(maxfilesize_key, "MAX_FILE_SIZE", 13);
+ did_string_init = TRUE;
+ }
+
+ if (!input_conv) {
+ input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv));
+ }
+
while (!multipart_buffer_eof(mbuff TSRMLS_CC))
{
char buff[FILLUNIT];
- char *cd=NULL,*param=NULL,*filename=NULL, *tmp=NULL;
+ char *cd=NULL, *tmp=NULL;
int blen=0, wlen=0;
+ UChar *param = NULL, *filename = NULL;
+ int32_t param_len;
zend_llist_clean(&header);
}
if ((cd = php_mime_get_hdr_value(header, "Content-Disposition"))) {
- char *pair=NULL;
+ UChar *pair = NULL;
+ UChar *ucd = NULL, *ucd_start = NULL;
int end=0;
-
+
while (isspace(*cd)) {
++cd;
}
- while (*cd && (pair = php_ap_getword(&cd, ';')))
+ ucd_start = php_ap_to_unicode(cd, strlen(cd), NULL TSRMLS_CC);
+ if (!ucd) {
+ /* UTODO error condition */
+ }
+ ucd = ucd_start;
+
+ while (*ucd && (pair = php_u_ap_getword(&ucd, ';' TSRMLS_CC)))
{
- char *key=NULL, *word = pair;
+ UChar *key=NULL, *word = pair;
- while (isspace(*cd)) {
- ++cd;
+ while (u_isspace(*ucd)) {
+ ++ucd;
}
- if (strchr(pair, '=')) {
- key = php_ap_getword(&pair, '=');
-
- if (!strcasecmp(key, "name")) {
+ if (u_strchr(pair, '=')) {
+ key = php_u_ap_getword(&pair, '=' TSRMLS_CC);
+
+ if (!u_strcasecmp(key, name_key, 0)) {
if (param) {
efree(param);
}
- param = php_ap_getword_conf(&pair TSRMLS_CC);
- } else if (!strcasecmp(key, "filename")) {
+ param = php_u_ap_getword_conf(&pair TSRMLS_CC);
+ } else if (!u_strcasecmp(key, filename_key, 0)) {
if (filename) {
efree(filename);
}
- filename = php_ap_getword_conf(&pair TSRMLS_CC);
+ filename = php_u_ap_getword_conf(&pair TSRMLS_CC);
}
}
if (key) {
}
efree(word);
}
+
+ efree(ucd_start);
/* Normal form variable, safe to read all data into memory */
if (!filename && param) {
+ UChar *u_val;
+ int32_t u_val_len;
+ UErrorCode status = U_ZERO_ERROR;
char *value = multipart_buffer_read_body(mbuff TSRMLS_CC);
unsigned int new_val_len; /* Dummy variable */
- if (!value) {
- value = estrdup("");
- }
-
- if (sapi_module.input_filter(PARSE_POST, param, &value, strlen(value), &new_val_len TSRMLS_CC)) {
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
- php_mb_gpc_stack_variable(param, value, &val_list, &len_list,
- &num_vars, &num_vars_max TSRMLS_CC);
- } else {
- safe_php_register_variable(param, value, array_ptr, 0 TSRMLS_CC);
+ if (value) {
+ /* UTODO use 'charset' parameter for conversion */
+ zend_convert_to_unicode(input_conv, &u_val, &u_val_len, value, strlen(value), &status);
+ if (U_FAILURE(status)) {
+ /* UTODO set a user-accessible flag to indicate that conversion failed? */
+ goto var_done;
}
-#else
- safe_php_register_variable(param, value, array_ptr, 0 TSRMLS_CC);
-#endif
+ } else {
+ u_val = USTR_MAKE("");
}
- if (!strcasecmp(param, "MAX_FILE_SIZE")) {
- max_file_size = atol(value);
+
+ /* UTODO use input filtering */
+ //if (sapi_module.input_filter(PARSE_POST, param, &value, strlen(value), &new_val_len TSRMLS_CC)) {
+ safe_u_php_register_variable(param, u_val, u_val_len, array_ptr, 0 TSRMLS_CC);
+ //}
+ if (!u_strcasecmp(param, maxfilesize_key, 0)) {
+ max_file_size = zend_u_strtol(u_val, NULL, 10);
}
+var_done:
efree(param);
efree(value);
+ efree(u_val);
continue;
}
sapi_module.sapi_error(E_WARNING, "File Upload Mime headers garbled");
SAFE_RETURN;
}
-
+
if (!param) {
is_anonymous = 1;
- param = emalloc(MAX_SIZE_ANONNAME);
- snprintf(param, MAX_SIZE_ANONNAME, "%u", anonindex++);
+ param = eumalloc(MAX_SIZE_ANONNAME);
+ u_snprintf(param, MAX_SIZE_ANONNAME, "%u", anonindex++);
} else {
is_anonymous = 0;
}
-
+ param_len = u_strlen(param);
+
/* New Rule: never repair potential malicious user input */
if (!skip_upload) {
- char *tmp = param;
- long c = 0;
-
- while (*tmp) {
- if (*tmp == '[') {
- c++;
- } else if (*tmp == ']') {
- c--;
- if (tmp[1] && tmp[1] != '[') {
+ UChar32 c = 0;
+ int32_t ic;
+ long l = 0;
+
+ for (ic = 0; ic < param_len; ) {
+ U16_NEXT(param, ic, param_len, c);
+ if (c == 0x5b /*'['*/) {
+ l++;
+ } else if (c == 0x5d /*']'*/) {
+ l--;
+ U16_NEXT(param, ic, param_len, c);
+ if (ic < param_len && c != 0x5b /*'['*/) {
skip_upload = 1;
break;
+ } else {
+ /* decrement index so that the same character is retrieved again */
+ ic--;
}
}
- if (c < 0) {
+ if (l < 0) {
skip_upload = 1;
break;
}
- tmp++;
}
}
if (!skip_upload) {
/* Handle file */
- fd = php_open_temporary_fd(PG(upload_tmp_dir), "php", &temp_filename TSRMLS_CC);
- if (fd==-1) {
+ fp = php_open_temporary_file(PG(upload_tmp_dir), "php", &temp_filename TSRMLS_CC);
+ if (!fp) {
sapi_module.sapi_error(E_WARNING, "File upload error - unable to create a temporary file");
cancel_upload = UPLOAD_ERROR_E;
}
efree(param);
efree(filename);
continue;
- }
+ }
- if(strlen(filename) == 0) {
+ if(u_strlen(filename) == 0) {
#if DEBUG_FILE_UPLOAD
sapi_module.sapi_error(E_NOTICE, "No file uploaded");
#endif
#endif
cancel_upload = UPLOAD_ERROR_B;
} else if (blen > 0) {
- wlen = write(fd, buff, blen);
-
+ wlen = fwrite(buff, 1, blen, fp);
+
if (wlen < blen) {
#if DEBUG_FILE_UPLOAD
sapi_module.sapi_error(E_NOTICE, "Only %d bytes were written, expected to write %d", wlen, blen);
#endif
- cancel_upload = UPLOAD_ERROR_F;
+ cancel_upload = UPLOAD_ERROR_C;
} else {
total_bytes += wlen;
}
}
}
- if (fd!=-1) { /* may not be initialized if file could not be created */
- close(fd);
+ if (fp) { /* may not be initialized if file could not be created */
+ fclose(fp);
}
if (!cancel_upload && !end) {
#if DEBUG_FILE_UPLOAD
}
array_index = estrndup(start_arr+1, array_len-2);
}
-
+
/* Add $foo_name */
if (lbuf) {
efree(lbuf);
}
lbuf = (char *) emalloc(strlen(param) + MAX_SIZE_OF_INDEX + 1);
-
+
if (is_arr_upload) {
if (abuf) efree(abuf);
abuf = estrndup(param, strlen(param)-array_len);
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
filedone:
#endif
-
+
if (!is_anonymous) {
if (s && s > filename) {
safe_php_register_variable(lbuf, s+1, NULL, 0 TSRMLS_CC);
}
efree(filename);
s = NULL;
-
+
/* Possible Content-Type: */
if (cancel_upload || !(cd = php_mime_get_hdr_value(header, "Content-Type"))) {
cd = "";
if (!is_anonymous) {
safe_php_register_variable(param, temp_filename, NULL, 1 TSRMLS_CC);
}
-
+
/* Add $foo[tmp_name] */
if (is_arr_upload) {
sprintf(lbuf, "%s[tmp_name][%s]", abuf, array_index);
file_size.value.lval = total_bytes;
file_size.type = IS_LONG;
}
-
+
if (is_arr_upload) {
sprintf(lbuf, "%s[error][%s]", abuf, array_index);
} else {
SAFE_RETURN;
}
+static SAPI_POST_HANDLER_FUNC(rfc1867_post_handler_legacy)
+{
+ char *boundary, *s=NULL, *boundary_end = NULL, *start_arr=NULL, *array_index=NULL;
+ char *temp_filename=NULL, *lbuf=NULL, *abuf=NULL;
+ int boundary_len=0, total_bytes=0, cancel_upload=0, is_arr_upload=0, array_len=0;
+ int max_file_size=0, skip_upload=0, anonindex=0, is_anonymous;
+ zval *http_post_files=NULL; HashTable *uploaded_files=NULL;
+#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+ int str_len = 0, num_vars = 0, num_vars_max = 2*10, *len_list = NULL;
+ char **val_list = NULL;
+#endif
+ zend_bool magic_quotes_gpc;
+ multipart_buffer *mbuff;
+ zval *array_ptr = (zval *) arg;
+ int fd=-1;
+ zend_llist header;
+
+ if (SG(request_info).content_length > SG(post_max_size)) {
+ sapi_module.sapi_error(E_WARNING, "POST Content-Length of %ld bytes exceeds the limit of %ld bytes", SG(request_info).content_length, SG(post_max_size));
+ return;
+ }
+
+ /* Get the boundary */
+ boundary = strstr(content_type_dup, "boundary");
+ if (!boundary || !(boundary=strchr(boundary, '='))) {
+ sapi_module.sapi_error(E_WARNING, "Missing boundary in multipart/form-data POST data");
+ return;
+ }
+
+ boundary++;
+ boundary_len = strlen(boundary);
+
+ if (boundary[0] == '"') {
+ boundary++;
+ boundary_end = strchr(boundary, '"');
+ if (!boundary_end) {
+ sapi_module.sapi_error(E_WARNING, "Invalid boundary in multipart/form-data POST data");
+ return;
+ }
+ } else {
+ /* search for the end of the boundary */
+ boundary_end = strchr(boundary, ',');
+ }
+ if (boundary_end) {
+ boundary_end[0] = '\0';
+ boundary_len = boundary_end-boundary;
+ }
+
+ /* Initialize the buffer */
+ if (!(mbuff = multipart_buffer_new(boundary, boundary_len))) {
+ sapi_module.sapi_error(E_WARNING, "Unable to initialize the input buffer");
+ return;
+ }
+
+ /* Initialize $_FILES[] */
+ zend_hash_init(&PG(rfc1867_protected_variables), 5, NULL, NULL, 0);
+
+ ALLOC_HASHTABLE(uploaded_files);
+ zend_hash_init(uploaded_files, 5, NULL, (dtor_func_t) free_estring, 0);
+ SG(rfc1867_uploaded_files) = uploaded_files;
+
+ ALLOC_ZVAL(http_post_files);
+ array_init(http_post_files);
+ INIT_PZVAL(http_post_files);
+ PG(http_globals)[TRACK_VARS_FILES] = http_post_files;
+
+#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+ if (php_mb_encoding_translation(TSRMLS_C)) {
+ val_list = (char **)ecalloc(num_vars_max+2, sizeof(char *));
+ len_list = (int *)ecalloc(num_vars_max+2, sizeof(int));
+ }
+#endif
+ zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t) php_free_hdr_entry, 0);
+
+ while (!multipart_buffer_eof(mbuff TSRMLS_CC))
+ {
+ char buff[FILLUNIT];
+ char *cd=NULL,*param=NULL,*filename=NULL, *tmp=NULL;
+ int blen=0, wlen=0;
+
+ zend_llist_clean(&header);
+
+ if (!multipart_buffer_headers(mbuff, &header TSRMLS_CC)) {
+ SAFE_RETURN;
+ }
+
+ if ((cd = php_mime_get_hdr_value(header, "Content-Disposition"))) {
+ char *pair=NULL;
+ int end=0;
+
+ while (isspace(*cd)) {
+ ++cd;
+ }
+
+ while (*cd && (pair = php_ap_getword(&cd, ';')))
+ {
+ char *key=NULL, *word = pair;
+
+ while (isspace(*cd)) {
+ ++cd;
+ }
+
+ if (strchr(pair, '=')) {
+ key = php_ap_getword(&pair, '=');
+
+ if (!strcasecmp(key, "name")) {
+ if (param) {
+ efree(param);
+ }
+ param = php_ap_getword_conf(&pair TSRMLS_CC);
+ } else if (!strcasecmp(key, "filename")) {
+ if (filename) {
+ efree(filename);
+ }
+ filename = php_ap_getword_conf(&pair TSRMLS_CC);
+ }
+ }
+ if (key) {
+ efree(key);
+ }
+ efree(word);
+ }
+
+ /* Normal form variable, safe to read all data into memory */
+ if (!filename && param) {
+
+ char *value = multipart_buffer_read_body(mbuff TSRMLS_CC);
+ unsigned int new_val_len; /* Dummy variable */
+
+ if (!value) {
+ value = estrdup("");
+ }
+
+ if (sapi_module.input_filter(PARSE_POST, param, &value, strlen(value), &new_val_len TSRMLS_CC)) {
+#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+ if (php_mb_encoding_translation(TSRMLS_C)) {
+ php_mb_gpc_stack_variable(param, value, &val_list, &len_list,
+ &num_vars, &num_vars_max TSRMLS_CC);
+ } else {
+ safe_php_register_variable(param, value, array_ptr, 0 TSRMLS_CC);
+ }
+#else
+ safe_php_register_variable(param, value, array_ptr, 0 TSRMLS_CC);
+#endif
+ }
+ if (!strcasecmp(param, "MAX_FILE_SIZE")) {
+ max_file_size = atol(value);
+ }
+
+ efree(param);
+ efree(value);
+ continue;
+ }
+
+ /* If file_uploads=off, skip the file part */
+ if (!PG(file_uploads)) {
+ skip_upload = 1;
+ }
+
+ /* Return with an error if the posted data is garbled */
+ if (!param && !filename) {
+ sapi_module.sapi_error(E_WARNING, "File Upload Mime headers garbled");
+ SAFE_RETURN;
+ }
+
+ if (!param) {
+ is_anonymous = 1;
+ param = emalloc(MAX_SIZE_ANONNAME);
+ snprintf(param, MAX_SIZE_ANONNAME, "%u", anonindex++);
+ } else {
+ is_anonymous = 0;
+ }
+
+ /* New Rule: never repair potential malicious user input */
+ if (!skip_upload) {
+ char *tmp = param;
+ long c = 0;
+
+ while (*tmp) {
+ if (*tmp == '[') {
+ c++;
+ } else if (*tmp == ']') {
+ c--;
+ if (tmp[1] && tmp[1] != '[') {
+ skip_upload = 1;
+ break;
+ }
+ }
+ if (c < 0) {
+ skip_upload = 1;
+ break;
+ }
+ tmp++;
+ }
+ }
+
+ total_bytes = cancel_upload = 0;
+
+ if (!skip_upload) {
+ /* Handle file */
+ fd = php_open_temporary_fd(PG(upload_tmp_dir), "php", &temp_filename TSRMLS_CC);
+ if (fd==-1) {
+ sapi_module.sapi_error(E_WARNING, "File upload error - unable to create a temporary file");
+ cancel_upload = UPLOAD_ERROR_E;
+ }
+ }
+ if (skip_upload) {
+ efree(param);
+ efree(filename);
+ continue;
+ }
+
+ if(strlen(filename) == 0) {
+#if DEBUG_FILE_UPLOAD
+ sapi_module.sapi_error(E_NOTICE, "No file uploaded");
+#endif
+ cancel_upload = UPLOAD_ERROR_D;
+ }
+
+ end = 0;
+ while (!cancel_upload && (blen = multipart_buffer_read(mbuff, buff, sizeof(buff), &end TSRMLS_CC)))
+ {
+ if (PG(upload_max_filesize) > 0 && total_bytes > PG(upload_max_filesize)) {
+#if DEBUG_FILE_UPLOAD
+ sapi_module.sapi_error(E_NOTICE, "upload_max_filesize of %ld bytes exceeded - file [%s=%s] not saved", PG(upload_max_filesize), param, filename);
+#endif
+ cancel_upload = UPLOAD_ERROR_A;
+ } else if (max_file_size && (total_bytes > max_file_size)) {
+#if DEBUG_FILE_UPLOAD
+ sapi_module.sapi_error(E_NOTICE, "MAX_FILE_SIZE of %ld bytes exceeded - file [%s=%s] not saved", max_file_size, param, filename);
+#endif
+ cancel_upload = UPLOAD_ERROR_B;
+ } else if (blen > 0) {
+ wlen = write(fd, buff, blen);
+
+ if (wlen < blen) {
+#if DEBUG_FILE_UPLOAD
+ sapi_module.sapi_error(E_NOTICE, "Only %d bytes were written, expected to write %d", wlen, blen);
+#endif
+ cancel_upload = UPLOAD_ERROR_F;
+ } else {
+ total_bytes += wlen;
+ }
+ }
+ }
+ if (fd!=-1) { /* may not be initialized if file could not be created */
+ close(fd);
+ }
+ if (!cancel_upload && !end) {
+#if DEBUG_FILE_UPLOAD
+ sapi_module.sapi_error(E_NOTICE, "Missing mime boundary at the end of the data for file %s", strlen(filename) > 0 ? filename : "");
+#endif
+ cancel_upload = UPLOAD_ERROR_C;
+ }
+#if DEBUG_FILE_UPLOAD
+ if(strlen(filename) > 0 && total_bytes == 0 && !cancel_upload) {
+ sapi_module.sapi_error(E_WARNING, "Uploaded file size 0 - file [%s=%s] not saved", param, filename);
+ cancel_upload = 5;
+ }
+#endif
+
+ if (cancel_upload) {
+ if (temp_filename) {
+ if (cancel_upload != UPLOAD_ERROR_E) { /* file creation failed */
+ unlink(temp_filename);
+ }
+ efree(temp_filename);
+ }
+ temp_filename="";
+ } else {
+ zend_hash_add(SG(rfc1867_uploaded_files), temp_filename, strlen(temp_filename) + 1, &temp_filename, sizeof(char *), NULL);
+ }
+
+ /* is_arr_upload is true when name of file upload field
+ * ends in [.*]
+ * start_arr is set to point to 1st [
+ */
+ is_arr_upload = (start_arr = strchr(param,'[')) && (param[strlen(param)-1] == ']');
+
+ if (is_arr_upload) {
+ array_len = strlen(start_arr);
+ if (array_index) {
+ efree(array_index);
+ }
+ array_index = estrndup(start_arr+1, array_len-2);
+ }
+
+ /* Add $foo_name */
+ if (lbuf) {
+ efree(lbuf);
+ }
+ lbuf = (char *) emalloc(strlen(param) + MAX_SIZE_OF_INDEX + 1);
+
+ if (is_arr_upload) {
+ if (abuf) efree(abuf);
+ abuf = estrndup(param, strlen(param)-array_len);
+ sprintf(lbuf, "%s_name[%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s_name", param);
+ }
+
+#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+ if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (num_vars>=num_vars_max){
+ php_mb_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max,
+ 1 TSRMLS_CC);
+ }
+ val_list[num_vars] = filename;
+ len_list[num_vars] = strlen(filename);
+ num_vars++;
+ if(php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
+ str_len = strlen(filename);
+ php_mb_gpc_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
+ }
+ s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
+ if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
+ s = tmp;
+ }
+ num_vars--;
+ goto filedone;
+ }
+#endif
+ /* The \ check should technically be needed for win32 systems only where
+ * it is a valid path separator. However, IE in all it's wisdom always sends
+ * the full path of the file on the user's filesystem, which means that unless
+ * the user does basename() they get a bogus file name. Until IE's user base drops
+ * to nill or problem is fixed this code must remain enabled for all systems.
+ */
+ s = strrchr(filename, '\\');
+ if ((tmp = strrchr(filename, '/')) > s) {
+ s = tmp;
+ }
+#ifdef PHP_WIN32
+ if (PG(magic_quotes_gpc)) {
+ s = s ? s : filename;
+ tmp = strrchr(s, '\'');
+ s = tmp > s ? tmp : s;
+ tmp = strrchr(s, '"');
+ s = tmp > s ? tmp : s;
+ }
+#endif
+
+#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+filedone:
+#endif
+
+ if (!is_anonymous) {
+ if (s && s > filename) {
+ safe_php_register_variable(lbuf, s+1, NULL, 0 TSRMLS_CC);
+ } else {
+ safe_php_register_variable(lbuf, filename, NULL, 0 TSRMLS_CC);
+ }
+ }
+
+ /* Add $foo[name] */
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s[name][%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s[name]", param);
+ }
+ if (s && s > filename) {
+ register_http_post_files_variable(lbuf, s+1, http_post_files, 0 TSRMLS_CC);
+ } else {
+ register_http_post_files_variable(lbuf, filename, http_post_files, 0 TSRMLS_CC);
+ }
+ efree(filename);
+ s = NULL;
+
+ /* Possible Content-Type: */
+ if (cancel_upload || !(cd = php_mime_get_hdr_value(header, "Content-Type"))) {
+ cd = "";
+ } else {
+ /* fix for Opera 6.01 */
+ s = strchr(cd, ';');
+ if (s != NULL) {
+ *s = '\0';
+ }
+ }
+
+ /* Add $foo_type */
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s_type[%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s_type", param);
+ }
+ if (!is_anonymous) {
+ safe_php_register_variable(lbuf, cd, NULL, 0 TSRMLS_CC);
+ }
+
+ /* Add $foo[type] */
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s[type][%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s[type]", param);
+ }
+ register_http_post_files_variable(lbuf, cd, http_post_files, 0 TSRMLS_CC);
+
+ /* Restore Content-Type Header */
+ if (s != NULL) {
+ *s = ';';
+ }
+ s = "";
+
+ /* Initialize variables */
+ add_protected_variable(param TSRMLS_CC);
+
+ magic_quotes_gpc = PG(magic_quotes_gpc);
+ PG(magic_quotes_gpc) = 0;
+ /* if param is of form xxx[.*] this will cut it to xxx */
+ if (!is_anonymous) {
+ safe_php_register_variable(param, temp_filename, NULL, 1 TSRMLS_CC);
+ }
+
+ /* Add $foo[tmp_name] */
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s[tmp_name][%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s[tmp_name]", param);
+ }
+ add_protected_variable(lbuf TSRMLS_CC);
+ register_http_post_files_variable(lbuf, temp_filename, http_post_files, 1 TSRMLS_CC);
+
+ PG(magic_quotes_gpc) = magic_quotes_gpc;
+
+ {
+ zval file_size, error_type;
+
+ error_type.value.lval = cancel_upload;
+ error_type.type = IS_LONG;
+
+ /* Add $foo[error] */
+ if (cancel_upload) {
+ file_size.value.lval = 0;
+ file_size.type = IS_LONG;
+ } else {
+ file_size.value.lval = total_bytes;
+ file_size.type = IS_LONG;
+ }
+
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s[error][%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s[error]", param);
+ }
+ register_http_post_files_variable_ex(lbuf, &error_type, http_post_files, 0 TSRMLS_CC);
+
+ /* Add $foo_size */
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s_size[%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s_size", param);
+ }
+ if (!is_anonymous) {
+ safe_php_register_variable_ex(lbuf, &file_size, NULL, 0 TSRMLS_CC);
+ }
+
+ /* Add $foo[size] */
+ if (is_arr_upload) {
+ sprintf(lbuf, "%s[size][%s]", abuf, array_index);
+ } else {
+ sprintf(lbuf, "%s[size]", param);
+ }
+ register_http_post_files_variable_ex(lbuf, &file_size, http_post_files, 0 TSRMLS_CC);
+ }
+ efree(param);
+ }
+ }
+
+ SAFE_RETURN;
+}
+
+/*
+ * The combined READER/HANDLER
+ *
+ */
+
+SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler)
+{
+ if (UG(unicode)) {
+ rfc1867_post_handler_unicode(content_type_dup, arg TSRMLS_CC);
+ } else {
+ rfc1867_post_handler_legacy(content_type_dup, arg TSRMLS_CC);
+ }
+}
+
/*
* Local variables:
* tab-width: 4
static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap)
{
register char *s = NULL;
+ register UChar *u = NULL;
char *q;
int s_len;
+ int32_t u_len;
register int min_width = 0;
int precision = 0;
char num_buf[NUM_BUF_SIZE];
char char_buf[2]; /* for printing %% and %<unknown> */
+ zend_bool free_s; /* free string if allocated here */
/*
* Flag variables
boolean_e adjust_width;
bool_int is_negative;
+ TSRMLS_FETCH();
+
while (*fmt) {
if (*fmt != '%') {
INS_CHAR(xbuf, *fmt);
alternate_form = print_sign = print_blank = NO;
pad_char = ' ';
prefix_char = NUL;
+ free_s = 0;
fmt++;
}
break;
+ case 'v':
+ if (UG(unicode)) {
+ goto fmt_unicode;
+ } else {
+ goto fmt_string;
+ }
+ break;
+
+ case 'R':
+ {
+ int type = va_arg(ap, int);
+ if (type != IS_UNICODE) {
+ if (alternate_form) {
+ va_arg(ap, UConverter *);
+ }
+ goto fmt_string;
+ }
+ }
+ /* break omitted */
+
+ case 'r':
+fmt_unicode:
+ {
+ UConverter *conv = ZEND_U_CONVERTER(UG(output_encoding_conv));
+ UErrorCode status = U_ZERO_ERROR;
+ char *res = NULL;
+
+ if (alternate_form) {
+ conv = va_arg(ap, UConverter *);
+ }
+
+ u = va_arg(ap, UChar *);
+ if (u == NULL) {
+ s = S_NULL;
+ s_len = S_NULL_LEN;
+ break;
+ }
+
+ u_len = u_strlen(u);
+ zend_convert_from_unicode(conv, &res, &s_len, u, u_len, &status);
+ if (U_FAILURE(status)) {
+ php_error(E_WARNING, "Could not convert Unicode to printable form in s[np]printf call");
+ return;
+ }
+ s = res;
+ free_s = 1;
+ pad_char = ' ';
+ break;
+ }
case 's':
+fmt_string:
s = va_arg(ap, char *);
if (s != NULL) {
s_len = strlen(s);
pad_char = ' ';
break;
-
case 'f':
case 'e':
case 'E':
* Print the string s.
*/
INS_STRING(xbuf, s, s_len);
+ if (free_s) efree(s);
if (adjust_width && adjust == LEFT && min_width > s_len)
PAD(xbuf, min_width - s_len, pad_char);
#include "snprintf.h"
BEGIN_EXTERN_C()
-PHPAPI int spprintf( char **pbuf, size_t max_len, const char *format, ...) PHP_ATTRIBUTE_FORMAT(printf, 3, 4);
+PHPAPI int spprintf( char **pbuf, size_t max_len, const char *format, ...);
-PHPAPI int vspprintf(char **pbuf, size_t max_len, const char *format, va_list ap) PHP_ATTRIBUTE_FORMAT(printf, 3, 0);
+PHPAPI int vspprintf(char **pbuf, size_t max_len, const char *format, va_list ap);
END_EXTERN_C()
#endif /* SNPRINTF_H */
off_t dummy;
stream->ops->seek(stream, stream->position, SEEK_SET, &dummy TSRMLS_CC);
- stream->readpos = stream->writepos = 0;
+
+ php_stream_flush_readbuf(stream);
}
}
exit_success:
- if ((stream->writepos - stream->readpos) > 0 &&
+ if ((stream->readbuf_avail) > 0 &&
stream->fclose_stdiocast != PHP_STREAM_FCLOSE_FOPENCOOKIE &&
(flags & PHP_STREAM_CAST_INTERNAL) == 0) {
/* the data we have buffered will be lost to the third party library that
php_error_docref(NULL TSRMLS_CC, E_WARNING,
"%ld bytes of buffered data lost during stream conversion!",
- (long)(stream->writepos - stream->readpos));
+ stream->readbuf_avail);
}
if (castas == PHP_STREAM_AS_STDIO && ret)
if (is_persistent && !buf_persistent) {
/* all data in a persistent bucket must also be persistent */
- bucket->buf = pemalloc(buflen, 1);
+ bucket->buf.str.val = pemalloc(buflen, 1);
- if (bucket->buf == NULL) {
+ if (bucket->buf.str.val == NULL) {
pefree(bucket, 1);
return NULL;
}
- memcpy(bucket->buf, buf, buflen);
- bucket->buflen = buflen;
+ memcpy(bucket->buf.str.val, buf, buflen);
+ bucket->buf.str.len = buflen;
bucket->own_buf = 1;
} else {
- bucket->buf = buf;
- bucket->buflen = buflen;
+ bucket->buf.str.val = buf;
+ bucket->buf.str.len = buflen;
bucket->own_buf = own_buf;
}
+ bucket->is_unicode = 0;
+ bucket->is_persistent = is_persistent;
+ bucket->refcount = 1;
+
+ return bucket;
+}
+
+PHPAPI php_stream_bucket *php_stream_bucket_new_unicode(php_stream *stream, UChar *buf, int32_t buflen, int own_buf, int buf_persistent TSRMLS_DC)
+{
+ int is_persistent = php_stream_is_persistent(stream);
+ php_stream_bucket *bucket;
+
+ bucket = (php_stream_bucket*)pemalloc(sizeof(php_stream_bucket), is_persistent);
+
+ if (bucket == NULL) {
+ return NULL;
+ }
+
+ bucket->next = bucket->prev = NULL;
+
+ if (is_persistent && !buf_persistent) {
+ /* all data in a persistent bucket must also be persistent */
+ bucket->buf.ustr.val = safe_pemalloc(sizeof(UChar), buflen, 0, 1);
+
+ if (bucket->buf.ustr.val == NULL) {
+ pefree(bucket, 1);
+ return NULL;
+ }
+
+ memcpy(bucket->buf.ustr.val, buf, buflen);
+ bucket->buf.ustr.len = buflen;
+ bucket->own_buf = 1;
+ } else {
+ bucket->buf.ustr.val = buf;
+ bucket->buf.ustr.len = buflen;
+ bucket->own_buf = own_buf;
+ }
+ bucket->is_unicode = 1;
bucket->is_persistent = is_persistent;
bucket->refcount = 1;
retval = (php_stream_bucket*)pemalloc(sizeof(php_stream_bucket), bucket->is_persistent);
memcpy(retval, bucket, sizeof(*retval));
- retval->buf = pemalloc(retval->buflen, retval->is_persistent);
- memcpy(retval->buf, bucket->buf, retval->buflen);
+ if (bucket->is_unicode) {
+ retval->buf.ustr.val = safe_pemalloc(sizeof(UChar), retval->buf.ustr.len, 0, retval->is_persistent);
+ memcpy(retval->buf.ustr.val, bucket->buf.ustr.val, retval->buf.ustr.len * sizeof(UChar));
+ } else {
+ retval->buf.str.val = pemalloc(retval->buf.str.len, retval->is_persistent);
+ memcpy(retval->buf.str.val, bucket->buf.str.val, retval->buf.str.len);
+ }
retval->refcount = 1;
retval->own_buf = 1;
goto exit_fail;
}
- (*left)->buf = pemalloc(length, in->is_persistent);
- (*left)->buflen = length;
- memcpy((*left)->buf, in->buf, length);
+ if (in->is_unicode) {
+ (*left)->buf.ustr.val = safe_pemalloc(sizeof(UChar), length, 0, in->is_persistent);
+ (*left)->buf.ustr.len = length;
+ memcpy((*left)->buf.str.val, in->buf.str.val, length * sizeof(UChar));
+
+ (*right)->buf.ustr.len = in->buf.ustr.len - length;
+ (*right)->buf.ustr.val = pemalloc((*right)->buf.ustr.len, in->is_persistent);
+ memcpy((*right)->buf.ustr.val, in->buf.ustr.val + (length * sizeof(UChar)), (*right)->buf.str.len * sizeof(UChar));
+ } else {
+ (*left)->buf.str.val = pemalloc(length, in->is_persistent);
+ (*left)->buf.str.len = length;
+ memcpy((*left)->buf.str.val, in->buf.str.val, length);
+
+ (*right)->buf.str.len = in->buf.str.len - length;
+ (*right)->buf.str.val = pemalloc((*right)->buf.str.len, in->is_persistent);
+ memcpy((*right)->buf.str.val, in->buf.str.val + length, (*right)->buf.str.len);
+ }
+
(*left)->refcount = 1;
(*left)->own_buf = 1;
(*left)->is_persistent = in->is_persistent;
-
- (*right)->buflen = in->buflen - length;
- (*right)->buf = pemalloc((*right)->buflen, in->is_persistent);
- memcpy((*right)->buf, in->buf + length, (*right)->buflen);
+ (*left)->is_unicode = in->is_unicode;
+
(*right)->refcount = 1;
(*right)->own_buf = 1;
(*right)->is_persistent = in->is_persistent;
+ (*right)->is_unicode = in->is_unicode;
return SUCCESS;
exit_fail:
if (*right) {
- if ((*right)->buf) {
- pefree((*right)->buf, in->is_persistent);
+ if ((*right)->is_unicode) {
+ if ((*right)->buf.ustr.val) {
+ pefree((*right)->buf.ustr.val, in->is_persistent);
+ }
+ } else {
+ if ((*right)->buf.str.val) {
+ pefree((*right)->buf.str.val, in->is_persistent);
+ }
}
pefree(*right, in->is_persistent);
}
if (*left) {
- if ((*left)->buf) {
- pefree((*left)->buf, in->is_persistent);
+ if ((*left)->is_unicode) {
+ if ((*left)->buf.ustr.val) {
+ pefree((*left)->buf.ustr.val, in->is_persistent);
+ }
+ } else {
+ if ((*left)->buf.str.val) {
+ pefree((*left)->buf.str.val, in->is_persistent);
+ }
}
pefree(*left, in->is_persistent);
}
{
if (--bucket->refcount == 0) {
if (bucket->own_buf) {
- pefree(bucket->buf, bucket->is_persistent);
+ pefree(bucket->is_unicode ? bucket->buf.ustr.val : bucket->buf.str.val, bucket->is_persistent);
}
pefree(bucket, bucket->is_persistent);
}
chain->tail = filter;
filter->chain = chain;
- if (&(stream->readfilters) == chain && (stream->writepos - stream->readpos) > 0) {
+ if (&(stream->readfilters) == chain && (stream->readbuf_avail) > 0) {
/* Let's going ahead and wind anything in the buffer through this filter */
- php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = { NULL, NULL };
- php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = &brig_out;
+ php_stream_bucket_brigade brig_out = { NULL, NULL };
+ php_stream_bucket_brigade *brig_outp = &brig_out;
php_stream_filter_status_t status;
php_stream_bucket *bucket;
- size_t consumed = 0;
- bucket = php_stream_bucket_new(stream, stream->readbuf + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
- php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
- status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);
-
- if (stream->readpos + consumed > stream->writepos || consumed < 0) {
- /* No behaving filter should cause this. */
- status = PSFS_ERR_FATAL;
+ if (stream->readbuf_ofs) {
+ /* Mask readbuf_ofs from filter */
+ bucket = stream->readbuf.head;
+ if (bucket->is_unicode) {
+ bucket->buf.ustr.len -= stream->readbuf_ofs;
+ memmove(bucket->buf.ustr.val, bucket->buf.ustr.val + (stream->readbuf_ofs * sizeof(UChar)), bucket->buf.ustr.len * sizeof(UChar));
+ } else {
+ bucket->buf.str.len -= stream->readbuf_ofs;
+ memmove(bucket->buf.str.val, bucket->buf.str.val + stream->readbuf_ofs, bucket->buf.str.len);
+ }
}
+ status = filter->fops->filter(stream, filter, &stream->readbuf, brig_outp, NULL, PSFS_FLAG_NORMAL TSRMLS_CC);
+
switch (status) {
case PSFS_ERR_FATAL:
- /* If this first cycle simply fails then there's something wrong with the filter.
- Pull the filter off the chain and leave the read buffer alone. */
+ /* filter is fundamentally broken, invalidate readbuf and strip the filter */
if (chain->head == filter) {
chain->head = NULL;
chain->tail = NULL;
filter->prev->next = NULL;
chain->tail = filter->prev;
}
- php_stream_bucket_unlink(bucket TSRMLS_CC);
- php_stream_bucket_delref(bucket TSRMLS_CC);
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain.");
+ php_stream_flush_readbuf(stream);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data");
+ /* Passthru -- Anything successfully filtered can go back on the readbuf */
+ case PSFS_PASS_ON:
+ stream->readbuf_ofs = stream->readbuf_avail = 0;
+
+ /* Merge brig_out */
+ while((bucket = brig_out.head)) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC);
+ }
+
+ /* Count available bytes */
+ for(bucket = stream->readbuf.head; bucket; bucket = bucket->next) {
+ stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len;
+ }
break;
case PSFS_FEED_ME:
/* We don't actually need data yet,
leave this filter in a feed me state until data is needed.
Reset stream's internal read buffer since the filter is "holding" it. */
- stream->readpos = 0;
- stream->writepos = 0;
+ stream->readbuf.head = stream->readbuf.tail = NULL;
+ stream->readbuf_avail = stream->readbuf_ofs = 0;
break;
- case PSFS_PASS_ON:
- /* Put any filtered data onto the readbuffer stack.
- Previously read data has been at least partially consumed. */
- stream->readpos += consumed;
-
- if (stream->writepos == stream->readpos) {
- /* Entirely consumed */
- stream->writepos = 0;
- stream->readpos = 0;
- }
+ }
+ }
+}
- while (brig_outp->head) {
- bucket = brig_outp->head;
- /* Grow buffer to hold this bucket if need be.
- TODO: See warning in main/stream/streams.c::php_stream_fill_read_buffer */
- if (stream->readbuflen - stream->writepos < bucket->buflen) {
- stream->readbuflen += bucket->buflen;
- stream->readbuf = perealloc(stream->readbuf, stream->readbuflen, stream->is_persistent);
- }
- memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen);
- stream->writepos += bucket->buflen;
+PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC)
+{
+ php_stream_filter *filter;
+ long last_output = PSFO_FLAG_OUTPUTS_ANY;
- php_stream_bucket_unlink(bucket TSRMLS_CC);
- php_stream_bucket_delref(bucket TSRMLS_CC);
- }
- break;
+ for(filter = chain->head; filter; filter = filter->next) {
+ if ((((filter->fops->flags & PSFO_FLAG_ACCEPT_MASK) << PSFO_FLAG_ACCEPT_SHIFT) & last_output) == 0) {
+ /* Nothing which the last filter outputs is accepted by this filter */
+ return FAILURE;
+ }
+ if (filter->fops->flags & PSFO_FLAG_OUTPUTS_SAME) {
+ continue;
+ }
+ if (filter->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) {
+ last_output = ((last_output & PSFO_FLAG_OUTPUTS_STRING) ? PSFO_FLAG_OUTPUTS_UNICODE : 0) |
+ ((last_output & PSFO_FLAG_OUTPUTS_UNICODE) ? PSFO_FLAG_OUTPUTS_STRING : 0);
+ continue;
+ }
+ last_output = filter->fops->flags & PSFO_FLAG_OUTPUTS_ANY;
+ }
+
+ return SUCCESS;
+}
+
+PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter TSRMLS_DC)
+{
+ php_stream_filter_chain *chain = filter->chain;
+ php_stream_filter *f;
+ int inverted = 0;
+ int preferred = (chain = &chain->stream->readfilters ? 1 : 0);
+
+ for (f = filter->next; f ; f = f->next) {
+ if ((f->fops->flags & PSFO_FLAG_ACCEPTS_STRING) == 0) {
+ return inverted ^= 1;
+ }
+ if ((f->fops->flags & PSFO_FLAG_ACCEPTS_UNICODE) == 0) {
+ return inverted;
+ }
+ if (((f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) == 0) &&
+ ((f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) == 0)) {
+ /* Input type for next filter won't effect output -- Might as well go for unicode */
+ return inverted ^ 1;
+ }
+ if (f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) {
+ continue;
+ }
+ if (f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) {
+ inverted ^= 1;
+ continue;
}
}
+ return preferred ^ inverted;
}
PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS_DC)
Do something with it */
for(bucket = inp->head; bucket; bucket = bucket->next) {
- flushed_size += bucket->buflen;
+ flushed_size += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len;
}
if (flushed_size == 0) {
}
if (chain == &(stream->readfilters)) {
- /* Dump any newly flushed data to the read buffer */
- if (stream->readpos > 0) {
- /* Back the buffer up */
- memcpy(stream->readbuf, stream->readbuf + stream->readpos, stream->writepos - stream->readpos);
- stream->readpos = 0;
- stream->writepos -= stream->readpos;
- }
- if (flushed_size > (stream->readbuflen - stream->writepos)) {
- /* Grow the buffer */
- stream->readbuf = perealloc(stream->readbuf, stream->writepos + flushed_size + stream->chunk_size, stream->is_persistent);
- }
- while ((bucket = inp->head)) {
- memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen);
- stream->writepos += bucket->buflen;
- php_stream_bucket_unlink(bucket TSRMLS_CC);
- php_stream_bucket_delref(bucket TSRMLS_CC);
+ if (stream->readbuf.head) {
+ /* Merge inp with readbuf */
+ for(bucket = inp->head; bucket; bucket = bucket->next) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC);
+ stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len;
+ }
+ } else {
+ /* Just plop it in */
+ stream->readbuf = *inp;
+ stream->readbuf_avail = flushed_size;
+ stream->readbuf_ofs = 0;
}
} else if (chain == &(stream->writefilters)) {
/* Send flushed data to the stream */
while ((bucket = inp->head)) {
- stream->ops->write(stream, bucket->buf, bucket->buflen TSRMLS_CC);
+ char *data;
+ int datalen;
+
+ if (bucket->is_unicode) {
+ data = bucket->buf.ustr.val;
+ datalen = bucket->buf.ustr.len * sizeof(UChar);
+ } else {
+ data = bucket->buf.str.val;
+ datalen = bucket->buf.str.len;
+ }
+ stream->ops->write(stream, data, datalen TSRMLS_CC);
php_stream_bucket_unlink(bucket TSRMLS_CC);
php_stream_bucket_delref(bucket TSRMLS_CC);
}
return filter;
}
+PHPAPI int php_stream_bucket_tounicode(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC)
+{
+ int is_persistent = php_stream_is_persistent(stream);
+ php_stream_bucket *bucket = *pbucket, *prior = bucket->prev, *next = bucket->next;
+ php_stream_bucket_brigade *brigade = bucket->brigade;
+ UErrorCode status = U_ZERO_ERROR;
+ UChar *val;
+ int32_t len;
+
+ if (bucket->is_unicode) {
+ return SUCCESS;
+ }
+
+ zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &val, &len, bucket->buf.str.val, bucket->buf.str.len, &status);
+
+ if (U_FAILURE(status)) {
+ efree(val);
+ return FAILURE;
+ }
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+
+ /* convert from unicode defaults to non-persistent */
+ bucket = php_stream_bucket_new_unicode(stream, val, len, 1, is_persistent TSRMLS_CC);
+ if (is_persistent) {
+ efree(val);
+ }
+
+ bucket->brigade = brigade;
+ bucket->prev = prior;
+ bucket->next = next;
+
+ if (prior) {
+ prior->next = bucket;
+ } else if (brigade) {
+ brigade->head = bucket;
+ }
+
+ if (next) {
+ next->prev = bucket;
+ } else if (brigade) {
+ brigade->tail = bucket;
+ }
+
+ *pbucket = bucket;
+
+ return SUCCESS;
+}
+
+PHPAPI int php_stream_bucket_tostring(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC)
+{
+ int is_persistent = php_stream_is_persistent(stream);
+ php_stream_bucket *bucket = *pbucket, *prior = bucket->prev, *next = bucket->next;
+ php_stream_bucket_brigade *brigade = bucket->brigade;
+ UErrorCode status = U_ZERO_ERROR;
+ char *val;
+ int len;
+
+ if (!bucket->is_unicode) {
+ return SUCCESS;
+ }
+
+ zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &val, &len, bucket->buf.ustr.val, bucket->buf.ustr.len, &status);
+
+ if (U_FAILURE(status)) {
+ efree(val);
+ return FAILURE;
+ }
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+
+ /* convert from unicode defaults to non-persistent */
+ bucket = php_stream_bucket_new(stream, val, len, 1, is_persistent TSRMLS_CC);
+ if (is_persistent) {
+ efree(val);
+ }
+
+ bucket->brigade = brigade;
+ bucket->prev = prior;
+ bucket->next = next;
+
+ if (prior) {
+ prior->next = bucket;
+ } else if (brigade) {
+ brigade->head = bucket;
+ }
+
+ if (next) {
+ next->prev = bucket;
+ } else if (brigade) {
+ brigade->tail = bucket;
+ }
+
+ *pbucket = bucket;
+
+ return SUCCESS;
+}
+
/*
* Local variables:
* tab-width: 4
struct _php_stream_context {
php_stream_notifier *notifier;
+ char *output_encoding; /* unicode->string character set */
+ char *input_encoding; /* string->unicode character set */
+ int default_mode; /* default fopen mode -- PHP_FILE_BINARY vs. PHP_FILE_TEXT -- potentially support other fpc() flags later */
zval *options; /* hash keyed by wrapper family or specific wrapper */
zval *links; /* hash keyed by hostent for connection pooling */
int rsrc_id; /* used for auto-cleanup */
php_stream_bucket *next, *prev;
php_stream_bucket_brigade *brigade;
- char *buf;
- size_t buflen;
+ union {
+ struct {
+ char *val;
+ size_t len;
+ } str;
+ struct {
+ UChar *val;
+ int32_t len;
+ } ustr;
+ } buf;
+
/* if non-zero, buf should be pefreed when the bucket is destroyed */
- int own_buf;
- int is_persistent;
-
+ char own_buf;
+ char is_persistent;
+ char is_unicode;
+
/* destroy this struct when refcount falls to zero */
int refcount;
};
/* Buckets API. */
BEGIN_EXTERN_C()
PHPAPI php_stream_bucket *php_stream_bucket_new(php_stream *stream, char *buf, size_t buflen, int own_buf, int buf_persistent TSRMLS_DC);
+PHPAPI php_stream_bucket *php_stream_bucket_new_unicode(php_stream *stream, UChar *buf, int32_t buflen, int own_buf, int buf_persistent TSRMLS_DC);
PHPAPI int php_stream_bucket_split(php_stream_bucket *in, php_stream_bucket **left, php_stream_bucket **right, size_t length TSRMLS_DC);
PHPAPI void php_stream_bucket_delref(php_stream_bucket *bucket TSRMLS_DC);
#define php_stream_bucket_addref(bucket) (bucket)->refcount++
PHPAPI void php_stream_bucket_append(php_stream_bucket_brigade *brigade, php_stream_bucket *bucket TSRMLS_DC);
PHPAPI void php_stream_bucket_unlink(php_stream_bucket *bucket TSRMLS_DC);
PHPAPI php_stream_bucket *php_stream_bucket_make_writeable(php_stream_bucket *bucket TSRMLS_DC);
+PHPAPI int php_stream_bucket_tounicode(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC);
+PHPAPI int php_stream_bucket_tostring(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC);
END_EXTERN_C()
-#define PSFS_FLAG_NORMAL 0 /* regular read/write */
-#define PSFS_FLAG_FLUSH_INC 1 /* an incremental flush */
-#define PSFS_FLAG_FLUSH_CLOSE 2 /* final flush prior to closing */
+#define PSFS_FLAG_NORMAL 0 /* regular read/write */
+#define PSFS_FLAG_FLUSH_INC 1 /* an incremental flush */
+#define PSFS_FLAG_FLUSH_CLOSE 2 /* final flush prior to closing */
+
+#define PSFO_FLAG_ACCEPTS_STRING (1<<0) /* can process non-unicode buckets */
+#define PSFO_FLAG_ACCEPTS_UNICODE (1<<1) /* can process unicode buckets */
+#define PSFO_FLAG_ACCEPTS_ANY (PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_ACCEPTS_UNICODE)
+
+#define PSFO_FLAG_OUTPUTS_STRING (1<<2) /* can produce non-unicode buckets */
+#define PSFO_FLAG_OUTPUTS_UNICODE (1<<3) /* can produce unicode buckets */
+#define PSFO_FLAG_OUTPUTS_ANY (PSFO_FLAG_OUTPUTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE)
+
+/* produces buckets of the same type as provided */
+#define PSFO_FLAG_OUTPUTS_SAME ((1<<4) | PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY)
+
+/* produces buckets of the opposite type as provided */
+#define PSFO_FLAG_OUTPUTS_OPPOSITE ((1<<5) | PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY)
+
+#define PSFO_FLAG_ACCEPT_MASK PSFO_FLAG_ACCEPTS_ANY
+#define PSFO_FLAG_ACCEPT_SHIFT 2 /* For comparing filter to filter bucket passing compatability */
typedef struct _php_stream_filter_ops {
void (*dtor)(php_stream_filter *thisfilter TSRMLS_DC);
const char *label;
-
+
+ int flags;
} php_stream_filter_ops;
typedef struct _php_stream_filter_chain {
/* filters are auto_registered when they're applied */
int rsrc_id;
+ int flags;
};
/* stack filter onto a stream */
BEGIN_EXTERN_C()
PHPAPI void _php_stream_filter_prepend(php_stream_filter_chain *chain, php_stream_filter *filter TSRMLS_DC);
PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream_filter *filter TSRMLS_DC);
+PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC);
+PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter TSRMLS_DC);
PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS_DC);
PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC);
PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC);
#define php_stream_filter_prepend(chain, filter) _php_stream_filter_prepend((chain), (filter) TSRMLS_CC)
#define php_stream_filter_append(chain, filter) _php_stream_filter_append((chain), (filter) TSRMLS_CC)
#define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC)
+#define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC)
+#define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC)
#define php_stream_is_filtered(stream) ((stream)->readfilters.head || (stream)->writefilters.head)
stream->wrapperdata = NULL;
}
- if (stream->readbuf) {
- pefree(stream->readbuf, stream->is_persistent);
- stream->readbuf = NULL;
+ while (stream->readbuf.head) {
+ php_stream_bucket *bucket = stream->readbuf.head;
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
}
if (stream->is_persistent && (close_options & PHP_STREAM_FREE_PERSISTENT)) {
static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_DC)
{
- /* allocate/fill the buffer */
-
if (stream->readfilters.head) {
char *chunk_buf;
int err_flag = 0;
/* allocate a buffer for reading chunks */
chunk_buf = emalloc(stream->chunk_size);
- while (!err_flag && (stream->writepos - stream->readpos < (off_t)size)) {
+ while (!err_flag && (stream->readbuf_avail < (off_t)size)) {
size_t justread = 0;
int flags;
php_stream_bucket *bucket;
/* we get here when the last filter in the chain has data to pass on.
* in this situation, we are passing the brig_in brigade into the
* stream read buffer */
- while (brig_inp->head) {
- bucket = brig_inp->head;
- /* grow buffer to hold this bucket
- * TODO: this can fail for persistent streams */
- if (stream->readbuflen - stream->writepos < bucket->buflen) {
- stream->readbuflen += bucket->buflen;
- stream->readbuf = perealloc(stream->readbuf, stream->readbuflen,
- stream->is_persistent);
- }
- memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen);
- stream->writepos += bucket->buflen;
-
+ while ((bucket = brig_inp->head)) {
+ php_stream_bucket *tail = stream->readbuf.tail;
php_stream_bucket_unlink(bucket TSRMLS_CC);
- php_stream_bucket_delref(bucket TSRMLS_CC);
+ if (bucket->is_unicode &&
+ U16_IS_SURROGATE(*bucket->buf.ustr.val) &&
+ !U16_IS_SURROGATE_LEAD(*bucket->buf.ustr.val) &&
+ tail && tail->is_unicode &&
+ U16_IS_SURROGATE(tail->buf.ustr.val[tail->buf.ustr.len - 1]) &&
+ U16_IS_SURROGATE_LEAD(tail->buf.ustr.val[tail->buf.ustr.len - 1])) {
+ /* Surrogate pair got split between buckets -- Unlikely */
+ UChar *tmp;
+
+ tmp = peumalloc(bucket->buf.ustr.len + 1, bucket->is_persistent);
+ *tmp = stream->readbuf.tail->buf.ustr.val[--tail->buf.ustr.len];
+ memmove(tmp + UBYTES(1), bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len));
+ pefree(bucket->buf.ustr.val, bucket->is_persistent);
+ bucket->buf.ustr.val = tmp;
+
+ if (tail->buf.ustr.len <= 0) {
+ /* Tail was only a one UChar bucket */
+ php_stream_bucket_unlink(tail TSRMLS_CC);
+ php_stream_bucket_delref(tail TSRMLS_CC);
+ } else if (tail == stream->readbuf.head && (tail->buf.ustr.len <= stream->readbuf_ofs)) {
+ /* Tail was head and last char was only unused portion */
+ php_stream_bucket_unlink(tail TSRMLS_CC);
+ php_stream_bucket_delref(tail TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ }
+ }
+ php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC);
+ stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len;
}
-
break;
case PSFS_FEED_ME:
} else {
/* is there enough data in the buffer ? */
- if (stream->writepos - stream->readpos < (off_t)size) {
+ if (stream->readbuf_avail < (off_t)size) {
+ char *chunk_buf;
size_t justread = 0;
+ int is_persistent = php_stream_is_persistent(stream);
- /* reduce buffer memory consumption if possible, to avoid a realloc */
- if (stream->readbuf && stream->readbuflen - stream->writepos < stream->chunk_size) {
- memmove(stream->readbuf, stream->readbuf + stream->readpos, stream->readbuflen - stream->readpos);
- stream->writepos -= stream->readpos;
- stream->readpos = 0;
- }
-
- /* grow the buffer if required
- * TODO: this can fail for persistent streams */
- if (stream->readbuflen - stream->writepos < stream->chunk_size) {
- stream->readbuflen += stream->chunk_size;
- stream->readbuf = perealloc(stream->readbuf, stream->readbuflen,
- stream->is_persistent);
- }
+ chunk_buf = pemalloc(stream->chunk_size, is_persistent);
+ justread = stream->ops->read(stream, chunk_buf, stream->chunk_size TSRMLS_CC);
- justread = stream->ops->read(stream, stream->readbuf + stream->writepos,
- stream->readbuflen - stream->writepos
- TSRMLS_CC);
+ if (justread == (size_t)-1 || justread == 0) {
+ pefree(chunk_buf, is_persistent);
+ } else {
+ php_stream_bucket *bucket;
- if (justread != (size_t)-1) {
- stream->writepos += justread;
+ bucket = php_stream_bucket_new(stream, chunk_buf, justread, 1, is_persistent TSRMLS_CC);
+ php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC);
+ stream->readbuf_avail += justread;
}
}
}
PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS_DC)
{
+ php_stream_bucket *bucket;
size_t toread = 0, didread = 0;
while (size > 0) {
-
/* take from the read buffer first.
* It is possible that a buffered stream was switched to non-buffered, so we
* drain the remainder of the buffer before using the "raw" read mode for
* the excess */
- if (stream->writepos > stream->readpos) {
- toread = stream->writepos - stream->readpos;
+ while (size > 0 && (bucket = stream->readbuf.head)) {
+ if (bucket->is_unicode) {
+ /* This is an string read func, convert to string first */
+ php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC);
+ }
+ toread = bucket->buf.str.len - stream->readbuf_ofs;
if (toread > size) {
toread = size;
}
-
- memcpy(buf, stream->readbuf + stream->readpos, toread);
- stream->readpos += toread;
+ memcpy(buf, bucket->buf.str.val + stream->readbuf_ofs, toread);
+ stream->readbuf_ofs += toread;
+ stream->readbuf_avail -= toread;
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ }
size -= toread;
buf += toread;
didread += toread;
break;
}
+ /* just break anyway, to avoid greedy read */
+ if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) {
+ break;
+ }
+
if (!stream->readfilters.head && (stream->flags & PHP_STREAM_FLAG_NO_BUFFER || stream->chunk_size == 1)) {
toread = stream->ops->read(stream, buf, size TSRMLS_CC);
- } else {
- php_stream_fill_read_buffer(stream, size TSRMLS_CC);
+ if (toread <= 0) {
+ break;
+ }
+ buf += toread;
+ size -= toread;
+ didread += toread;
+ continue;
+ }
+
+ php_stream_fill_read_buffer(stream, size TSRMLS_CC);
+ if (stream->readbuf_avail <= 0) {
+ /* EOF, or temporary end of data (for non-blocking mode). */
+ break;
+ }
+ }
+
+ if (didread > 0) {
+ stream->position += didread;
+ }
+ return didread;
+}
- toread = stream->writepos - stream->readpos;
+PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int32_t size TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+ size_t toread = 0, didread = 0;
+
+ while (size > 0) {
+ /* take from the read buffer first.
+ * It is possible that a buffered stream was switched to non-buffered, so we
+ * drain the remainder of the buffer before using the "raw" read mode for
+ * the excess */
+
+ while (size > 0 && (bucket = stream->readbuf.head)) {
+ UChar lastchar = 0;
+
+ if (!bucket->is_unicode) {
+ /* This is a unicode read func, convert to unicode first */
+ php_stream_bucket_tounicode(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC);
+ }
+ toread = bucket->buf.ustr.len - stream->readbuf_ofs;
if (toread > size) {
toread = size;
}
-
- if (toread > 0) {
- memcpy(buf, stream->readbuf + stream->readpos, toread);
- stream->readpos += toread;
+ lastchar = *(bucket->buf.ustr.val + stream->readbuf_ofs + toread - 1);
+ if (U16_IS_SURROGATE(lastchar) && U16_IS_SURROGATE_LEAD(lastchar)) {
+ toread--;
+ /* The only time we should encounter a split surrogate is when the buffer size is truncating the data
+ In this case, reduce size along with toread to avoid getting stuck */
+ size--;
+ }
+ memcpy(buf, bucket->buf.ustr.val + stream->readbuf_ofs, toread * sizeof(UChar));
+ stream->readbuf_ofs += toread;
+ stream->readbuf_avail -= toread;
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
}
- }
- if (toread > 0) {
- didread += toread;
- buf += toread;
size -= toread;
- } else {
- /* EOF, or temporary end of data (for non-blocking mode). */
+ buf += toread;
+ didread += toread;
+ }
+
+ /* ignore eof here; the underlying state might have changed */
+ if (size == 0) {
break;
}
/* just break anyway, to avoid greedy read */
- if (stream->wrapper != &php_plain_files_wrapper) {
+ if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) {
+ break;
+ }
+
+ php_stream_fill_read_buffer(stream, size * sizeof(UChar) TSRMLS_CC);
+ if (stream->readbuf_avail <= 0) {
+ /* EOF, or temporary end of data (for non-blocking mode). */
break;
}
}
return didread;
}
+/* buf mabe NULL (in which case it will be allocated)
+ num_bytes and num_chars must be initialized upon entry to maximum for each (-1 for no maximum)
+ num_bytes/num_chars will be set on exit to actual contents of buf
+ Will return unicode/string type dependent on the first character unit in the read buf
+ Will return as many characters as possible (and permitted by max lengths) without changing unicode/string type
+ Will not split surrogate pairs */
+PHPAPI void *_php_stream_u_read(php_stream *stream, void *buf, int32_t *pnum_bytes, int32_t *pnum_chars, int *pis_unicode TSRMLS_DC)
+{
+ int grow_mode = 0;
+ int32_t num_bytes = 0, num_chars = 0;
+ int32_t max_bytes = *pnum_bytes, max_chars = *pnum_chars;
+ int32_t buflen = buf ? max_bytes : 2048;
+ int32_t bufpos = 0;
+ int is_unicode;
+ php_stream_bucket *bucket;
+
+ /* It's possible that we have a readbuf, but that it's only half of a surrogate pair */
+ if (!stream->readbuf.head ||
+ (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode &&
+ (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 &&
+ U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) {
+ php_stream_fill_read_buffer(stream, max_bytes ? max_bytes : (max_chars ? max_chars : stream->chunk_size) TSRMLS_CC);
+ }
+
+
+ if (!stream->readbuf.head ||
+ (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode &&
+ (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 &&
+ U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) {
+ /* Nothing to return */
+ *pnum_bytes = 0;
+ *pnum_chars = 0;
+ *pis_unicode = 0;
+ return NULL;
+ }
+
+
+ if (!buf) {
+ grow_mode = 1;
+ buf = emalloc(buflen);
+ }
+
+ is_unicode = stream->readbuf.head->is_unicode;
+ if (is_unicode) {
+ /* normalize byte boundary */
+ if (max_bytes >= 0 && (max_bytes % sizeof(UChar))) {
+ max_bytes -= (max_bytes % sizeof(UChar));
+ }
+ if (max_bytes >= 0 && max_bytes < UBYTES(max_chars)) {
+ /* max_bytes needs to be at least twice max_chars when both are provided */
+ max_chars = (max_bytes / sizeof(UChar));
+ }
+ } else {
+ if (max_chars < 0 && max_bytes >= 0) {
+ max_chars = max_bytes;
+ } else if (max_chars >= 0 && grow_mode) {
+ max_bytes = max_chars;
+ }
+ }
+
+ for (;;) {
+ if (buflen - bufpos < 1024 && max_bytes >= 0 && max_bytes > buflen) {
+ buflen += 1024;
+ if (buflen > max_bytes) {
+ buflen = max_bytes;
+ }
+ buf = erealloc(buf, buflen);
+ }
+
+ if ((bucket = stream->readbuf.head)) {
+ if ((bucket->is_unicode && !is_unicode) ||
+ (!bucket->is_unicode && is_unicode)) {
+ /* data type swap, exit now */
+ break;
+ }
+ if (bucket->is_unicode) {
+ UChar *s = bucket->buf.ustr.val + stream->readbuf_ofs, *p;
+ int bytes_in_buf, chars_in_buf;
+ int32_t ofs = 0;
+
+ chars_in_buf = u_countChar32(s, bucket->buf.ustr.len - stream->readbuf_ofs);
+
+ if (chars_in_buf > max_chars && max_chars >= 0) {
+ chars_in_buf = max_chars;
+ }
+ /* u_countChar32 tells us that we won't overrun anyway */
+ U16_FWD_N_UNSAFE(s, ofs, chars_in_buf);
+ p = s + ofs;
+ bytes_in_buf = UBYTES(ofs);
+ if (bytes_in_buf > (max_bytes - num_bytes)) {
+ bytes_in_buf = max_bytes - num_bytes;
+ bytes_in_buf -= bytes_in_buf & 1; /* normalize */
+ p = s + (bytes_in_buf >> 1);
+ if (p > s && U16_IS_SURROGATE(p[-1]) && U16_IS_SURROGATE_LEAD(p[-1])) {
+ /* Don't split surrogate pairs */
+ p--;
+ bytes_in_buf -= UBYTES(1);
+ }
+ if (bytes_in_buf <= 0) {
+ /* No room to copy data (surrogate pair) */
+ break;
+ }
+ chars_in_buf = u_countChar32(s, p - s);
+ }
+ memcpy((char *)buf + num_bytes, s, bytes_in_buf);
+ num_bytes += bytes_in_buf;
+ num_chars += chars_in_buf;
+ stream->readbuf_ofs += p - s;
+ stream->readbuf_avail -= p - s;
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ } else if (stream->readbuf_ofs == (bucket->buf.ustr.len - 1) &&
+ U16_IS_SURROGATE(bucket->buf.ustr.val[bucket->buf.ustr.len - 1]) &&
+ bucket->next && bucket->next->is_unicode) {
+ /* Only one char left in the bucket, avoid already split surrogates getting "stuck" -- Should never happen thanks to fill_read_buffer */
+ php_stream_bucket *next_bucket = bucket->next;
+
+ bucket->buf.ustr.val = peurealloc(bucket->buf.ustr.val, next_bucket->buf.ustr.len + 1, bucket->is_persistent);
+ bucket->buf.ustr.val[0] = bucket->buf.ustr.val[bucket->buf.ustr.len - 1];
+ memcpy(bucket->buf.ustr.val + 1, next_bucket->buf.ustr.val, UBYTES(next_bucket->buf.ustr.len));
+ php_stream_bucket_unlink(next_bucket TSRMLS_CC);
+ php_stream_bucket_delref(next_bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ } else {
+ /* Reached max limits */
+ break;
+ }
+ } else {
+ int want = (max_chars < 0 || max_chars >= buflen) ? (buflen - num_bytes) : (max_chars - num_chars);
+ int avail = bucket->buf.str.len - stream->readbuf_ofs;
+
+ if (max_bytes >= 0 && want > max_bytes) {
+ want = max_bytes;
+ }
+
+ if (want > avail) {
+ want = avail;
+ }
+
+ memcpy((char *)buf + num_bytes, bucket->buf.str.val + stream->readbuf_ofs, want);
+ stream->readbuf_ofs += want;
+ stream->readbuf_avail -= want;
+ num_bytes += want;
+ num_chars += want;
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ } else {
+ /* Reached max limit */
+ break;
+ }
+ }
+ } else {
+ /* No more data */
+ break;
+ }
+ }
+ /* Successful exit */
+ *pnum_bytes = num_bytes;
+ *pnum_chars = num_chars;
+ *pis_unicode = is_unicode;
+
+ if (num_chars == 0 && grow_mode) {
+ efree(buf);
+ buf = NULL;
+ }
+ return buf;
+}
+
PHPAPI int _php_stream_eof(php_stream *stream TSRMLS_DC)
{
/* if there is data in the buffer, it's not EOF */
- if (stream->writepos - stream->readpos > 0) {
+ if (stream->readbuf_avail > 0) {
return 0;
}
return (stream->ops->stat)(stream, ssb TSRMLS_CC);
}
+/* buf != NULL Still used by file() in ext/standard/file.c
+ buf == NULL semantics no longer supported */
PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len TSRMLS_DC)
{
size_t avail;
char *readptr;
if (!buf) {
- readptr = stream->readbuf + stream->readpos;
- avail = stream->writepos - stream->readpos;
+ return NULL;
} else {
readptr = buf;
avail = buf_len;
/* If buf == NULL, the buffer will be allocated automatically and will be of an
* appropriate length to hold the line, regardless of the line length, memory
- * permitting */
+ * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */
PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
size_t *returned_len TSRMLS_DC)
{
- size_t avail = 0;
- size_t current_buf_size = 0;
+ php_stream_bucket *bucket;
size_t total_copied = 0;
- int grow_mode = 0;
- char *bufstart = buf;
+ int growmode = 0;
- if (buf == NULL) {
- grow_mode = 1;
- } else if (maxlen == 0) {
- return NULL;
+ if (!buf) {
+ maxlen = stream->chunk_size + 1;
+ buf = emalloc(maxlen);
+ growmode = 1;
}
- /*
- * If the underlying stream operations block when no new data is readable,
- * we need to take extra precautions.
- *
- * If there is buffered data available, we check for a EOL. If it exists,
- * we pass the data immediately back to the caller. This saves a call
- * to the read implementation and will not block where blocking
- * is not necessary at all.
- *
- * If the stream buffer contains more data than the caller requested,
- * we can also avoid that costly step and simply return that data.
- */
+ /* Leave room for NULL */
+ maxlen--;
- for (;;) {
- avail = stream->writepos - stream->readpos;
+ for(;;) {
+ /* Fill buf with buffered data
+ until no space is left in the buffer
+ or EOL is found */
+ char lastchar = 0;
- if (avail > 0) {
- size_t cpysz = 0;
- char *readptr;
+ /* consumed readbuf if possible */
+ while ((bucket = stream->readbuf.head)) {
char *eol;
- int done = 0;
+ size_t tocopy;
+ size_t wanted = maxlen - total_copied;
+ int bucket_consumed = 0;
- readptr = stream->readbuf + stream->readpos;
- eol = php_stream_locate_eol(stream, NULL, 0 TSRMLS_CC);
+ if (bucket->is_unicode) {
+ /* This is a string read func, convert to string first */
+ php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC);
+ }
- if (eol) {
- cpysz = eol - readptr + 1;
- done = 1;
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') {
+ /* Line ending was actually found in the last char of the last bucket
+ Since it was \r it could have been MAC or DOS */
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (bucket->buf.str.val[stream->readbuf_ofs] == '\n') {
+ /* First byte here is a \n, put them together and you get DOS line endings */
+ stream->readbuf_ofs++;
+ stream->readbuf_avail--;
+ buf[total_copied++] = '\n';
+ /* unlikely -- It'd mean a one byte bucket -- possible though */
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+ } else {
+ /* Seeing no \n in the first char of this bucket, we know it was MAC */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_getline;
+ } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
+ char *cr, *lf;
+ lf = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs);
+ cr = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs);
+ eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf;
+ } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
+ eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs);
} else {
- cpysz = avail;
+ eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs);
}
- if (grow_mode) {
- /* allow room for a NUL. If this realloc is really a realloc
- * (ie: second time around), we get an extra byte. In most
- * cases, with the default chunk size of 8K, we will only
- * incur that overhead once. When people have lines longer
- * than 8K, we waste 1 byte per additional 8K or so.
- * That seems acceptable to me, to avoid making this code
- * hard to follow */
- bufstart = erealloc(bufstart, current_buf_size + cpysz + 1);
- current_buf_size += cpysz + 1;
- buf = bufstart + total_copied;
- } else {
- if (cpysz >= maxlen - 1) {
- cpysz = maxlen - 1;
- done = 1;
+ /* No \r or \n found in bucket -- grab it all */
+ if (!eol) {
+ eol = bucket->buf.str.val + bucket->buf.str.len - 1;
+ }
+ tocopy = eol - (bucket->buf.str.val + stream->readbuf_ofs) + 1;
+
+ /* maxlen exceeded */
+ if (tocopy > wanted && growmode) {
+ if (tocopy - wanted > stream->chunk_size) {
+ maxlen += tocopy - wanted;
+ } else {
+ maxlen += stream->chunk_size;
}
+ buf = erealloc(buf, maxlen + 1);
+ wanted = maxlen - total_copied;
}
- memcpy(buf, readptr, cpysz);
+ if (tocopy > wanted) {
+ tocopy = wanted;
+ }
- stream->position += cpysz;
- stream->readpos += cpysz;
- buf += cpysz;
- maxlen -= cpysz;
- total_copied += cpysz;
+ memcpy(buf + total_copied, bucket->buf.str.val + stream->readbuf_ofs, tocopy);
+ total_copied += tocopy;
+ stream->readbuf_ofs += tocopy;
+ stream->readbuf_avail -= tocopy;
+ lastchar = buf[total_copied-1];
- if (done) {
- break;
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ bucket_consumed = 1;
}
- } else if (stream->eof) {
- break;
- } else {
- /* XXX: Should be fine to always read chunk_size */
- size_t toread;
-
- if (grow_mode) {
- toread = stream->chunk_size;
- } else {
- toread = maxlen - 1;
- if (toread > stream->chunk_size) {
- toread = stream->chunk_size;
- }
+
+ if (total_copied >= maxlen) {
+ goto exit_getline;
}
- php_stream_fill_read_buffer(stream, toread TSRMLS_CC);
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL &&
+ bucket_consumed && lastchar == '\r') {
+ /* Could be MAC, could be DOS...
+ Need to check the first char of the next bucket to be sure */
+ continue;
+ }
- if (stream->writepos - stream->readpos == 0) {
- break;
+ if (lastchar == '\r' || lastchar == '\n') {
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (lastchar == '\r') {
+ /* if there were a \n in this bucket after the \r, we would be looking at it */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_getline;
}
}
- }
- if (total_copied == 0) {
- if (grow_mode) {
- assert(bufstart == NULL);
+ if (stream->eof) {
+ if (total_copied == 0) {
+ if (growmode) {
+ efree(buf);
+ }
+ return NULL;
+ }
+ goto exit_getline;
}
- return NULL;
+
+ if (maxlen - total_copied) {
+ size_t bufneeded = maxlen - total_copied;
+
+ if (growmode) {
+ bufneeded = stream->chunk_size;
+ }
+ php_stream_fill_read_buffer(stream, bufneeded TSRMLS_CC);
+ }
+
}
- buf[0] = '\0';
+ exit_getline:
+
if (returned_len) {
*returned_len = total_copied;
}
+ buf[total_copied] = 0;
+ stream->position += total_copied;
+
+ return buf;
+}
+
+/* If buf == NULL, the buffer will be allocated automatically and will be of an
+ * appropriate length to hold the line, regardless of the line length, memory
+ * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */
+PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+ int32_t num_bytes = 0, num_chars = 0;
+ int32_t max_bytes = *pmax_bytes, max_chars = *pmax_chars;
+ int growmode = 0, is_unicode;
+
+ while (!stream->readbuf.head) {
+ /* Nothing buffered, get an idea of the data type by polling */
+ int32_t fillsize = (max_chars > 0) ? max_chars : ((max_bytes > 0) ? max_bytes : stream->chunk_size);
+
+ php_stream_fill_read_buffer(stream, fillsize TSRMLS_CC);
+ if (!stream->readbuf.head) {
+ *pmax_bytes = 0;
+ *pmax_chars = 0;
+ *pis_unicode = 0;
+ return NULL;
+ }
+ }
+
+ *pis_unicode = is_unicode = stream->readbuf.head->is_unicode;
+
+ if (!is_unicode) {
+ /* Wrap normal get_line() */
+ int returned_len;
+ char *retbuf = php_stream_get_line(stream, (char*)buf, max_chars, &returned_len);
+
+ *pmax_chars = returned_len;
+ *pmax_bytes = returned_len;
+ return (UChar*)retbuf;
+ }
+
+ /* Now act like php_stream_u_read(), but stopping at 000A, 000D, or 000D 000A */
+
+ if (!buf) {
+ max_bytes = UBYTES(257);
+ buf = emalloc(max_bytes);
+ growmode = 1;
+ }
+
+ /* Leave room for NULL */
+ max_bytes -= UBYTES(1);
+
+ for(;;) {
+ /* Fill buf with buffered data
+ until no space is left in the buffer
+ or EOL is found */
+ UChar lastchar = 0;
+
+ /* consumed readbuf if possible */
+ while ((bucket = stream->readbuf.head)) {
+ UChar *eol, *s;
+ int32_t want_chars = max_chars - num_chars;
+ int32_t want_bytes = max_bytes - num_bytes;
+ int32_t count_chars;
+ int32_t count_bytes;
+ int bucket_consumed = 0;
+
+ if (!bucket->is_unicode) {
+ /* Done with unicode data, bail as though EOL was reached (even though it wasn't) */
+ goto exit_ugetline;
+ }
+
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') {
+ /* Line ending was actually found in the last char of the last bucket
+ Since it was \r it could have been MAC or DOS */
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (bucket->buf.ustr.val[stream->readbuf_ofs] == '\n') {
+ /* First byte here is a \n, put them together and you get DOS line endings */
+ stream->readbuf_ofs++;
+ stream->readbuf_avail--;
+ buf[num_bytes >> 1] = '\n'; /* Can't use num_chars here, surrogate pairs will foul it up */
+ num_bytes += UBYTES(1);
+ num_chars++;
+ /* unlikely -- It'd mean a one UChar bucket -- possible though */
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+ } else {
+ /* Seeing no \n in the first char of this bucket, we know it was MAC */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_ugetline;
+ } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
+ UChar *cr, *lf;
+ lf = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs);
+ cr = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs);
+ eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf;
+ } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
+ eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs);
+ } else {
+ eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs);
+ }
+
+ /* No \r or \n found in bucket -- grab it all */
+ if (!eol) {
+ eol = bucket->buf.ustr.val + bucket->buf.ustr.len - 1;
+ }
+ s = bucket->buf.ustr.val + stream->readbuf_ofs;
+
+ count_bytes = UBYTES(eol - s + 1);
+ if (count_bytes > want_bytes && growmode) {
+ max_bytes = num_bytes + count_bytes + UBYTES(256);
+ want_bytes = max_bytes - num_bytes;
+ buf = erealloc(buf, max_bytes + UBYTES(1));
+ } else if (count_bytes > want_bytes) {
+ count_bytes = want_bytes;
+ }
+ if (U16_IS_SURROGATE(s[(count_bytes >> 1) - 1]) &&
+ U16_IS_SURROGATE_LEAD(s[(count_bytes >> 1) - 1])) {
+ count_bytes -= UBYTES(1);
+ }
+ if (count_bytes <= 0) {
+ /* Not enough space in buffer, just break out */
+ goto exit_ugetline;
+ }
+ count_chars = u_countChar32(s, count_bytes >> 1);
+
+ if (max_chars >= 0 && count_chars > want_chars) {
+ count_chars = want_chars;
+ count_bytes = 0;
+ U16_FWD_N_UNSAFE(s, count_bytes, count_chars);
+ count_bytes <<= 1; /* translate U16 to bytes */
+ }
+
+ memcpy(buf + num_bytes, s, count_bytes);
+ num_bytes += count_bytes;
+ num_chars += count_chars;
+ stream->readbuf_ofs += count_bytes >> 1;
+ stream->readbuf_avail -= count_bytes >> 1;
+
+ lastchar = buf[(num_bytes >> 1) - 1];
+
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ bucket_consumed = 1;
+ }
+
+ if ((max_bytes >= 0 && num_bytes >= max_bytes) ||
+ (max_chars >= 0 && num_chars >= max_chars)) {
+ goto exit_ugetline;
+ }
+
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL &&
+ bucket_consumed && lastchar == '\r') {
+ /* Could be MAC, could be DOS...
+ Need to check the first char of the next bucket to be sure */
+ continue;
+ }
+
+ if (lastchar == '\r' || lastchar == '\n') {
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (lastchar == '\r') {
+ /* if there were a \n in this bucket after the \r, we would be looking at it */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_ugetline;
+ }
+ }
+
+ if (stream->eof) {
+ if (num_bytes == 0) {
+ if (growmode) {
+ efree(buf);
+ }
+ buf = NULL;
+ }
+ goto exit_ugetline;
+ }
+
+ if (max_bytes - num_bytes) {
+ int32_t want_bytes = max_bytes - num_bytes;
+
+ if (growmode) {
+ want_bytes = stream->chunk_size;
+ }
+ php_stream_fill_read_buffer(stream, want_bytes TSRMLS_CC);
+ }
+
+ }
+
+ exit_ugetline:
- return bufstart;
+ *pmax_chars = num_chars;
+ *pmax_bytes = num_bytes;
+ *pis_unicode = is_unicode;
+ if (buf) {
+ buf[num_bytes >> 1] = 0;
+ }
+ stream->position += num_bytes;
+
+ return buf;
}
PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC)
{
+ /* UTODO: Needs desperate rewriting for unicode conversion */
+ return NULL;
+
+#ifdef SMG_0
char *e, *buf;
size_t toread;
int skip = 0;
toread = maxlen;
} else {
if (delim_len == 1) {
- e = memchr(stream->readbuf + stream->readpos, *delim, stream->writepos - stream->readpos);
+ e = memchr(stream->readbuf, *delim, stream->readbuf_len);
} else {
- e = php_memnstr(stream->readbuf + stream->readpos, delim, delim_len, (stream->readbuf + stream->writepos));
+ e = php_memnstr(stream->readbuf, delim, delim_len, (stream->readbuf + stream->readbuflen));
}
if (!e) {
toread = maxlen;
} else {
- toread = e - (char *) stream->readbuf - stream->readpos;
+ toread = e - (char *) stream->readbuf;
skip = 1;
}
}
efree(buf);
return NULL;
}
+#endif
+}
+
+PHPAPI void _php_stream_flush_readbuf(php_stream *stream TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+
+ while ((bucket = stream->readbuf.head)) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+ stream->readbuf_ofs = stream->readbuf_avail = 0;
}
/* Writes a buffer directly to a stream, using multiple of the chunk size */
/* if we have a seekable stream we need to ensure that data is written at the
* current stream->position. This means invalidating the read buffer and then
* performing a low-level seek */
+/* UTODO: FIX this
if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0 && stream->readpos != stream->writepos) {
- stream->readpos = stream->writepos = 0;
+*/
+ if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0) {
+ php_stream_flush_readbuf(stream);
stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC);
}
* This may trigger a real write to the stream.
* Returns the number of bytes consumed from buf by the first filter in the chain.
* */
-static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags TSRMLS_DC)
+static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags, int is_unicode TSRMLS_DC)
{
size_t consumed = 0;
php_stream_bucket *bucket;
php_stream_filter *filter;
if (buf) {
- bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC);
- php_stream_bucket_append(&brig_in, bucket TSRMLS_CC);
+ if (is_unicode) {
+ bucket = php_stream_bucket_new_unicode(stream, (UChar *)buf, count, 0, 0 TSRMLS_CC);
+ } else {
+ bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC);
+ }
+ php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
}
for (filter = stream->writefilters.head; filter; filter = filter->next) {
/* for our return value, we are interested in the number of bytes consumed from
* the first filter in the chain */
- status = filter->fops->filter(stream, filter, brig_inp, brig_outp,
- filter == stream->writefilters.head ? &consumed : NULL, flags TSRMLS_CC);
-
+ status = filter->fops->filter(stream, filter, brig_inp, brig_outp, (filter == stream->writefilters.head) ? &consumed : NULL, flags TSRMLS_CC);
if (status != PSFS_PASS_ON) {
break;
}
* underlying stream */
while (brig_inp->head) {
bucket = brig_inp->head;
- _php_stream_write_buffer(stream, bucket->buf, bucket->buflen TSRMLS_CC);
+ if (bucket->is_unicode) {
+ _php_stream_write_buffer(stream, (char *)bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len) TSRMLS_CC);
+ } else {
+ _php_stream_write_buffer(stream, bucket->buf.str.val, bucket->buf.str.len TSRMLS_CC);
+ }
/* Potential error situation - eg: no space on device. Perhaps we should keep this brigade
* hanging around and try to write it later.
* At the moment, we just drop it on the floor
return consumed;
}
+PHPAPI int _php_stream_will_read_unicode(php_stream *stream TSRMLS_DC)
+{
+ php_stream_filter *filter;
+ int inverted = 0;
+
+ if (stream->readbuf.head) {
+ /* If there are buckets available, what do they hold */
+ return stream->readbuf.head->is_unicode;
+ }
+
+ if (!stream->readfilters.head) {
+ /* Not filtered == reads as string */
+ return 0;
+ }
+
+ for(filter = stream->readfilters.tail; filter; filter = filter->prev) {
+ if (filter->flags & PSFO_FLAG_OUTPUTS_SAME) {
+ continue;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) {
+ inverted ^= 1;
+ continue;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_ANY) {
+ /* Indeterminate */
+ return -1;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_STRING) {
+ /* If an inversion happens, it'll be unicode, otherwise string */
+ return inverted;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_UNICODE) {
+ /* If an inversion happens, it'll be string, otherwise unicode */
+ return inverted ^ 1;
+ }
+ }
+
+ /* string comes from stream so apply same logic as filter outputting string */
+ return inverted;
+}
+
PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC)
{
int ret = 0;
if (stream->writefilters.head) {
- _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC TSRMLS_CC);
+ _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC, 0 TSRMLS_CC);
}
if (stream->ops->flush) {
}
if (stream->writefilters.head) {
- return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL TSRMLS_CC);
+ return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL, 0 TSRMLS_CC);
} else {
return _php_stream_write_buffer(stream, buf, count TSRMLS_CC);
}
}
+PHPAPI size_t _php_stream_u_write(php_stream *stream, const UChar *buf, int32_t count TSRMLS_DC)
+{
+ if (buf == NULL || count == 0 || stream->ops->write == NULL) {
+ return 0;
+ }
+
+ if (stream->writefilters.head) {
+ return _php_stream_write_filtered(stream, (const char*)buf, count, PSFS_FLAG_NORMAL, 1 TSRMLS_CC);
+ } else {
+ int32_t ret;
+
+ ret = _php_stream_write_buffer(stream, (const char*)buf, UBYTES(count) TSRMLS_CC);
+
+ /* Return data points, not bytes */
+ if (ret > 0) {
+ ret >>= 1;
+ }
+ return ret;
+ }
+}
+
PHPAPI size_t _php_stream_printf(php_stream *stream TSRMLS_DC, const char *fmt, ...)
{
size_t count;
/* handle the case where we are in the buffer */
if ((stream->flags & PHP_STREAM_FLAG_NO_BUFFER) == 0) {
switch(whence) {
+ case SEEK_SET:
+ if (offset < stream->position ||
+ offset > stream->position + stream->readbuf_avail) {
+ break;
+ }
+ /* act like SEEK_CUR */
+ whence = SEEK_CUR;
+ offset -= stream->position;
+ /* fall through */
case SEEK_CUR:
- if (offset > 0 && offset < stream->writepos - stream->readpos) {
- stream->readpos += offset;
- stream->position += offset;
- stream->eof = 0;
+ if (offset == 0) {
+ /* nothing to do */
return 0;
}
- break;
- case SEEK_SET:
- if (offset > stream->position &&
- offset < stream->position + stream->writepos - stream->readpos) {
- stream->readpos += offset - stream->position;
- stream->position = offset;
+
+ if (offset > 0 && offset <= stream->readbuf_avail) {
+ php_stream_bucket *bucket;
+
+ while (offset && (bucket = stream->readbuf.head)) {
+ int consume = bucket->buf.str.len - stream->readbuf_ofs;
+
+ if (consume > offset) {
+ /* seeking within this bucket */
+ stream->readbuf_ofs += offset;
+ stream->readbuf_avail -= offset;
+ stream->position += offset;
+ break;
+ }
+
+ /* consume the remaining bucket */
+ stream->position += consume;
+ stream->readbuf_ofs = 0;
+ stream->readbuf_avail -= consume;
+ offset -= consume;
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
stream->eof = 0;
return 0;
}
if (stream->writefilters.head) {
_php_stream_flush(stream, 0 TSRMLS_CC);
}
-
+
switch(whence) {
case SEEK_CUR:
offset = stream->position + offset;
}
/* invalidate the buffer contents */
- stream->readpos = stream->writepos = 0;
+ php_stream_flush_readbuf(stream);
return ret;
}
char *path_to_open;
int persistent = options & STREAM_OPEN_PERSISTENT;
char *copy_of_path = NULL;
-
+ int implicit_mode[16];
+ int modelen = strlen(mode);
if (opened_path) {
*opened_path = NULL;
return NULL;
}
+ memcpy(implicit_mode, mode, modelen);
+ if (context && context->default_mode && modelen < 15 && !strchr(mode, 't') && !strchr(mode, 'b')) {
+ if (context->default_mode & PHP_FILE_BINARY) {
+ implicit_mode[modelen++] = 'b';
+ } else if (context->default_mode & PHP_FILE_TEXT) {
+ implicit_mode[modelen++] = 't';
+ }
+ implicit_mode[modelen] = 0;
+ }
+
if (wrapper) {
stream = wrapper->wops->stream_opener(wrapper,
- path_to_open, mode, options ^ REPORT_ERRORS,
+ path_to_open, implicit_mode, options ^ REPORT_ERRORS,
opened_path, context STREAMS_REL_CC TSRMLS_CC);
/* if the caller asked for a persistent stream but the wrapper did not
if (stream) {
stream->wrapper = wrapper;
+ memcpy(stream->mode, implicit_mode, modelen + 1);
}
}
}
}
+ /* Output encoding on text mode streams defaults to utf8 unless specified in context parameter */
+ if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+'))) {
+ php_stream_filter *filter;
+ char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8";
+ char *filtername;
+ int encoding_len = strlen(encoding);
+
+ filtername = emalloc(encoding_len + sizeof("unicode.to."));
+ memcpy(filtername, "unicode.to.", sizeof("unicode.to.") - 1);
+ memcpy(filtername + sizeof("unicode.to.") - 1, encoding, encoding_len + 1);
+
+ filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC);
+ if (!filter) {
+ php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying output encoding");
+ } else {
+ php_stream_filter_append(&stream->writefilters, filter);
+ }
+ efree(filtername);
+ }
+
+ if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) {
+ php_stream_filter *filter;
+ char *filtername;
+ char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8";
+ int input_encoding_len = strlen(encoding);
+
+ filtername = emalloc(input_encoding_len + sizeof("unicode.from."));
+ memcpy(filtername, "unicode.from.", sizeof("unicode.from.") - 1);
+ memcpy(filtername + sizeof("unicode.from.") - 1, encoding, input_encoding_len + 1);
+
+ filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC);
+ if (!filter) {
+ php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying input encoding");
+ } else {
+ php_stream_filter_append(&stream->readfilters, filter);
+ }
+ efree(filtername);
+ }
+
if (stream == NULL && (options & REPORT_ERRORS)) {
php_stream_display_wrapper_errors(wrapper, path, "failed to open stream" TSRMLS_CC);
}
pefree(copy_of_path, persistent);
}
#endif
+
+
return stream;
}
/* }}} */
php_stream_notification_free(context->notifier);
context->notifier = NULL;
}
+ if (context->input_encoding) {
+ efree(context->input_encoding);
+ }
+ if (context->output_encoding) {
+ efree(context->output_encoding);
+ }
if (context->links) {
zval_ptr_dtor(&context->links);
context->links = NULL;
if ($just_save_results || !mail_qa_team($failed_tests_data, $compression, $status)) {
$output_file = $CUR_DIR . '/php_test_results_' . date('Ymd_Hi') . ( $compression ? '.txt.gz' : '.txt' );
- $fp = fopen($output_file, "w");
+ $fp = fopen($output_file, "wt");
fwrite($fp, $failed_tests_data);
fclose($fp);
{
global $DETAILED;
- $fp = @fopen($filename,'w') or error("Cannot open file '" . $filename . "' (save_text)");
+ $fp = @fopen($filename,'wt') or error("Cannot open file '" . $filename . "' (save_text)");
fwrite($fp,$text);
fclose($fp);
if (1 < $DETAILED) echo "
'ARGS' => '',
);
- $fp = @fopen($file, "r") or error("Cannot open test file: $file");
+ $fp = @fopen($file, "rt") or error("Cannot open test file: $file");
$borked = false;
$bork_info = '';
$line = fgets($fp);
// Match the beginning of a section.
- if (ereg('^--([A-Z]+)--',$line,$r)) {
+ // UTODO changed to use preg, because ereg was crapping out
+ if (preg_match('/^--([A-Z]+)--/',$line,$r)) {
$section = $r[1];
$section_text[$section] = '';
continue;
$tmp = realpath(dirname($file));
}
- $diff_filename = $tmp . DIRECTORY_SEPARATOR . ereg_replace('\.phpt$','.diff', basename($file));
- $log_filename = $tmp . DIRECTORY_SEPARATOR . ereg_replace('\.phpt$','.log', basename($file));
- $exp_filename = $tmp . DIRECTORY_SEPARATOR . ereg_replace('\.phpt$','.exp', basename($file));
- $output_filename = $tmp . DIRECTORY_SEPARATOR . ereg_replace('\.phpt$','.out', basename($file));
+ $diff_filename = $tmp . DIRECTORY_SEPARATOR . preg_replace('/\.phpt$/','.diff', basename($file));
+ $log_filename = $tmp . DIRECTORY_SEPARATOR . preg_replace('/\.phpt$/','.log', basename($file));
+ $exp_filename = $tmp . DIRECTORY_SEPARATOR . preg_replace('/\.phpt$/','.exp', basename($file));
+ $output_filename = $tmp . DIRECTORY_SEPARATOR . preg_replace('/\.phpt$/','.out', basename($file));
$tmp_skipif = $tmp . DIRECTORY_SEPARATOR . uniqid('/phpt.');
- $tmp_file = $tmp . DIRECTORY_SEPARATOR . ereg_replace('\.phpt$','.php',basename($file));
+ $tmp_file = $tmp . DIRECTORY_SEPARATOR . preg_replace('/\.phpt$/','.php',basename($file));
$tmp_post = $tmp . DIRECTORY_SEPARATOR . uniqid('/phpt.');
if (is_array($IN_REDIRECT)) {
// write .exp
if (strpos($log_format,'E') !== FALSE) {
- $log = fopen($exp_filename,'w') or error("Cannot create test log - $exp_filename");
+ $log = fopen($exp_filename,'wt') or error("Cannot create test log - $exp_filename");
fwrite($log,$wanted);
fclose($log);
}
// write .out
if (strpos($log_format,'O') !== FALSE) {
- $log = fopen($output_filename,'w') or error("Cannot create test log - $output_filename");
+ $log = fopen($output_filename,'wt') or error("Cannot create test log - $output_filename");
fwrite($log,$output);
fclose($log);
}
// write .diff
if (strpos($log_format,'D') !== FALSE) {
- $log = fopen($diff_filename,'w') or error("Cannot create test log - $diff_filename");
+ $log = fopen($diff_filename,'wt') or error("Cannot create test log - $diff_filename");
fwrite($log,generate_diff($wanted,$wanted_re,$output));
fclose($log);
}
// write .log
if (strpos($log_format,'L') !== FALSE) {
- $log = fopen($log_filename,'w') or error("Cannot create test log - $log_filename");
+ $log = fopen($log_filename,'wt') or error("Cannot create test log - $log_filename");
fwrite($log,"
---- EXPECTED OUTPUT
$wanted
{
char *mimetype;
- if (SG(default_mimetype) || SG(default_charset)) {
- /* Assume output will be of the default MIME type. Individual
- scripts may change this later. */
- char *tmpmimetype;
- tmpmimetype = sapi_get_default_content_type(TSRMLS_C);
- mimetype = pstrdup(r->pool, tmpmimetype);
- efree(tmpmimetype);
- } else {
- mimetype = SAPI_DEFAULT_MIMETYPE "; charset=" SAPI_DEFAULT_CHARSET;
- }
+ /* Assume output will be of the default MIME type. Individual
+ scripts may change this later. */
+ char *tmpmimetype;
+ tmpmimetype = sapi_get_default_content_type(TSRMLS_C);
+ mimetype = pstrdup(r->pool, tmpmimetype);
+ efree(tmpmimetype);
return mimetype;
}
/* }}} */
{
php_per_dir_entry *orig_per_dir_entry;
- if (zend_hash_find(target_ht, hash_key->arKey, hash_key->nKeyLength, (void **) &orig_per_dir_entry)==FAILURE) {
+ if (zend_u_hash_find(target_ht, hash_key->type, hash_key->u.string, hash_key->nKeyLength, (void **) &orig_per_dir_entry)==FAILURE) {
return 1; /* does not exist in dest, copy from source */
}
{
char *mimetype;
- if (SG(default_mimetype) || SG(default_charset)) {
- /* Assume output will be of the default MIME type. Individual
- scripts may change this later. */
- char *tmpmimetype;
- tmpmimetype = sapi_get_default_content_type(TSRMLS_C);
- mimetype = pstrdup(r->pool, tmpmimetype);
- efree(tmpmimetype);
- } else {
- mimetype = SAPI_DEFAULT_MIMETYPE "; charset=" SAPI_DEFAULT_CHARSET;
- }
+ /* Assume output will be of the default MIME type. Individual
+ scripts may change this later. */
+ char *tmpmimetype;
+ tmpmimetype = sapi_get_default_content_type(TSRMLS_C);
+ mimetype = pstrdup(r->pool, tmpmimetype);
+ efree(tmpmimetype);
return mimetype;
}
/* }}} */
--- /dev/null
+\ eþÿ<?php
+function \12µÁÂ() {
+ echo "µÁ - ok\n";
+}
+
+µÁÂ();
+?>
--- /dev/null
+--TEST--
+Script encoding autodetection (SCSU)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-SCSU.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+Script encoding autodetection (UTF-16BE)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-UTF16BE.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+Script encoding autodetection (UTF-16LE)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-UTF16LE.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+Script encoding autodetection (UTF-32BE)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-UTF32BE.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+Script encoding autodetection (UTF-32LE)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-UTF32LE.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
++/v8<?php
+function +BEIENQRBBEI() {
+ echo "+BEIENQRBBEI - ok+AFw-n";
+}
+
++BEIENQRBBEI();
+?>
--- /dev/null
+--TEST--
+Script encoding autodetection (UTF7)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-UTF7.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+<?php
+function тест() {
+ echo "тест - ok\n";
+}
+
+тест();
+?>
--- /dev/null
+--TEST--
+Script encoding autodetection (UTF8)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+include(dirname(__FILE__)."/autodetect-UTF8.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+~}<?php
+function ~{'d'V'c'd~}() {
+ echo "~{'d'V'c'd~} - ok\n";
+}
+
+~{'d'V'c'd~}();
+?>
--- /dev/null
+--TEST--
+Script encoding (HZ)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+ini_set("unicode.script_encoding", "HZ");
+include(dirname(__FILE__)."/enc-HZ.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+\e$)C<?php
+function \ e,d,V,c,d\ f() {
+ echo "\ e,d,V,c,d\ f - ok\82n";
+}
+
+\ e,d,V,c,d\ f();
+?>
--- /dev/null
+--TEST--
+Script encoding (ISO-2022-KR)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+ini_set("unicode.script_encoding", "ISO-2022-KR");
+include(dirname(__FILE__)."/enc-ISO-2022-KR.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+<?php
+function \e$B'd'V'c'd\e(B() {
+ echo "\e$B'd'V'c'd\e(B - ok\n";
+}
+
+\e$B'd'V'c'd\e(B();
+?>
--- /dev/null
+--TEST--
+Script encoding (JIS)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+ini_set("unicode.script_encoding", "JIS");
+include(dirname(__FILE__)."/enc-JIS.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+<?php
+function \84\84\84u\84\83\84\84() {
+ echo "\84\84\84u\84\83\84\84 - ok\n";
+}
+
+\84\84\84u\84\83\84\84();
+?>
--- /dev/null
+--TEST--
+Script encoding (SJIS)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+ini_set("unicode.script_encoding", "SJIS");
+include(dirname(__FILE__)."/enc-SJIS.inc");
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+declare script encoding (HZ)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+declare(encoding="HZ");
+
+function ~{'d'V'c'd~}() {
+ echo "~{'d'V'c'd~} - ok\n";
+}
+
+~{'d'V'c'd~}();
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+declare script encoding (ISO-2022-KR)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+declare(encoding="ISO-2022-KR");
+
+function \ e,d,V,c,d\ f() {
+ echo "\ e,d,V,c,d\ f - ok\82n";
+}
+
+\ e,d,V,c,d\ f();
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+declare script encoding (JIS)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+declare(encoding="JIS");
+
+function \e$B'd'V'c'd\e(B() {
+ echo "\e$B'd'V'c'd\e(B - ok\n";
+}
+
+\e$B'd'V'c'd\e(B();
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+declare script encoding (SJIS)
+--INI--
+unicode_semantics=on
+unicode.output_encoding=CP866
+--FILE--
+<?php
+declare(encoding="SJIS");
+
+function \84\84\84u\84\83\84\84() {
+ echo "\84\84\84u\84\83\84\84 - ok\n";
+}
+
+\84\84\84u\84\83\84\84();
+?>
+--EXPECT--
+â¥áâ - ok
--- /dev/null
+--TEST--
+Unicode: strpos() function test
+--FILE--
+<?php
+$a = "a™ᄒ\U020021z";
+var_dump(strpos($a, 'a'));
+var_dump(strpos($a, 'U'));
+var_dump(strpos($a, 'z'));
+var_dump(strpos($a, '\u2122'));
+var_dump(strpos($a, 0x1112));
+var_dump(strpos($a, 0x20021));
+
+$b = "\U020022z\U020021z";
+var_dump(strpos($b, 'z', 1));
+var_dump(strpos($b, 'z', 2));
+var_dump(strpos($b, 'z', 4));
+var_dump(strpos($b, 'z\U020021'));
+
+$c = "-A\u030a-Å-Å";
+var_dump(strpos($c, 'A'));
+var_dump(strpos($c, '\u030a'));
+var_dump(strpos($c, '\u00c5'));
+var_dump(strpos($c, '\u212b'));
+
+?>
+--EXPECT--
+int(0)
+bool(false)
+int(4)
+int(1)
+int(2)
+int(3)
+int(1)
+int(3)
+bool(false)
+int(1)
+int(1)
+int(2)
+int(4)
+int(6)
--- /dev/null
+--TEST--
+Unicode: strpos() function test
+--FILE--
+<?php
+$a = "a™ᄒ\U020021z";
+var_dump(strstr($a, 'a'));
+var_dump(strstr($a, 'U'));
+var_dump(strstr($a, 'z'));
+var_dump(strstr($a, '\u2122'));
+var_dump(strstr($a, '\udc21'));
+var_dump(strstr($a, 0x1112));
+var_dump(strstr($a, 0x20021));
+
+$b = "\U020022z\U020021z";
+var_dump(strstr($b, '\U020021'));
+var_dump(strstr($b, 'z\U020021'));
+
+$c = "-A\u030a-Å-Å";
+var_dump(strstr($c, 'A'));
+var_dump(strstr($c, '\u030a'));
+var_dump(strstr($c, '\u00c5'));
+var_dump(strstr($c, '\u212b'));
+?>
+--EXPECT--
+unicode(5) "a™ᄒ𠀡z"
+bool(false)
+unicode(1) "z"
+unicode(4) "™ᄒ𠀡z"
+bool(false)
+unicode(3) "ᄒ𠀡z"
+unicode(2) "𠀡z"
+unicode(2) "𠀡z"
+unicode(3) "z𠀡z"
+unicode(6) "Å-Å-Å"
+unicode(5) "̊-Å-Å"
+unicode(3) "Å-Å"
+unicode(1) "Å"
--- /dev/null
+--TEST--
+Unicode identifiers normalization (${})
+--INI--
+unicode_semantics=on
+--FILE--
+<?php
+${"\u212B"} = "ok\n";
+echo ${"\u00C5"};
+?>
+--EXPECT--
+ok
--- /dev/null
+--TEST--
+Unicode identifiers normalization ($$)
+--INI--
+unicode_semantics=on
+--FILE--
+<?php
+$a = "\u212B";
+$b = "\u00C5";
+$$a = "ok\n";
+echo $$b;
+?>
+--EXPECT--
+ok
--- /dev/null
+--TEST--
+Unicode identifiers normalization (indirect function call)
+--INI--
+unicode_semantics=on
+--FILE--
+<?php
+declare(encoding = "ISO-8859-1");
+
+function Å() {
+ echo "ok\n";
+}
+
+$f1 = "\u212B";
+$f1();
+$f2 = "\u00C5";
+$f2();
+?>
+--EXPECT--
+ok
+ok
--- /dev/null
+--TEST--
+Unicode identifiers normalization ($GLOBALS[])
+--INI--
+unicode_semantics=on
+--FILE--
+<?php
+$GLOBALS["\u212B"] = "ok\n";
+echo $GLOBALS["\u00C5"];
+?>
+--EXPECT--
+ok
--- /dev/null
+
+Doing a for loop to iterate over a string character-by-character may be
+slow, since accessing a character at offset n requires us to scan the string
+from the beginning until the required codepoint is accessed.
+
+strlen() will return the number of codepoints which have to be calculated.
--- /dev/null
+* unicode in `..`?
+
+* EBCDIC support?
+
+* Discuss putting ZEND_ATTRIBUTE_FORMAT back on zend_error() or create a new
+ zend_error_ex() function that supports new specifiers
+
+* Finalize zend_hash_get_current_key_ex() discussion. We probably need to
+ have zend_u_hash_get_current_key_ex() and have the old function proxy to it.
+ The question is what happens when an IS_UNICODE key is present and the old
+ function is called.
+
+* use zend_literal_to_unicode when comparing "this" and such
+
+* fix string offset operator to work only on strings, and array offset one
+ to work only on arrays
+
+* Should we apply identifier validity checks in functions that deal with
+ identifiers, such as define(), get_class(), etc?
+
+* Resolve the differences in %G output format. sprintf() removes trailing
+ zeroes from the fractional part, and u_sprintf() doesn't.
+
+* Determine how to deal with filesystem and filenames when Unicode is
+ involved. This concerns both the extension functions and things like
+ compile_file(), open_file_for_scanning(), etc.
+
+* Use U_STRING_DECL/U_STRING/INIT for initializing literals possibly.
+
+* Measure performance difference when doing quickCheck + normalize versus
+ simple normalize.
+
+* Find all instances where unicode strings are compare with memcmp() and
+ replace either with u_memcmpCodePointOrder() or ucol_strcoll()
+
+* Opening a collator may return U_USING_DEFAULT_WARNING,
+ U_USING_FALLBACK_WARNING
+
+* Need to make http input work as described in the design doc.
+
+* Solve ZTS issues. Some extensions store pointers to internal
+ zend_class_entries during extension startup, but these pointers can be changed
+ from request to request (dependent on "unicode" setting). Right now these
+ pointers are reinitialized during request startup but they are still stored
+ in real global variables. Probably the problem should be solved in other way
+ or pinters to zend_class_entries should be stored in module globals.
+
+* output.c needs a lot of work as it passes char* around
+
+* Require and/or bundle ICU 3.4.
+
+* Right now if a function passes "TT" to zend_parse_parameters(), and one of
+ the arguments is IS_BINARY, then all of the rest of T arguments are
+ converted to IS_BINARY as well. I will modify it so that if one of the other
+ arguments is IS_UNICODE then we generate an error and abort parsing. If we
+ do run across a function that really needs to accept IS_BINARY as one
+ argument and IS_UNICODE as another, then we can use "bu" for the parsing
+ format.
+
+* Comparison operators should act similar to concatenation one, check the
+ types and coerce when necessary.
// General libs
// urlmon.lib ole32.lib oleaut32.lib uuid.lib gdi32.lib winspool.lib comdlg32.lib
-DEFINE("LIBS", "kernel32.lib ole32.lib user32.lib advapi32.lib shell32.lib ws2_32.lib");
+DEFINE("LIBS", "kernel32.lib ole32.lib user32.lib advapi32.lib shell32.lib ws2_32.lib icuuc.lib icuin.lib icuio.lib icule.lib iculx.lib");
// Set some debug/release specific options
if (PHP_DEBUG == "yes") {
zend_hash.c zend_list.c zend_indent.c zend_builtin_functions.c \
zend_sprintf.c zend_ini.c zend_qsort.c zend_multibyte.c zend_ts_hash.c \
zend_stream.c zend_iterators.c zend_interfaces.c zend_objects.c \
- zend_object_handlers.c zend_objects_API.c \
+ zend_object_handlers.c zend_objects_API.c zend_unicode.c zend_strtol.c \
zend_mm.c zend_default_classes.c zend_reflection_api.c zend_execute.c zend_strtod.c");
ADD_SOURCES("main", "main.c snprintf.c spprintf.c safe_mode.c fopen_wrappers.c \