#define PREG_PATTERN_ORDER 1
#define PREG_SET_ORDER 2
#define PREG_OFFSET_CAPTURE (1<<8)
+#define PREG_UNMATCHED_AS_NULL (1<<9)
#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
/* }}} */
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
+static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
{
zval match_pair, tmp;
array_init_size(&match_pair, 2);
/* Add (match, offset) to the return value */
- if (offset < 0) { /* unset substring */
+ if (unmatched_as_null && offset < 0) {
ZVAL_NULL(&tmp);
} else {
ZVAL_STRINGL(&tmp, str, len);
{
zval result_set, /* Holds a set of subpatterns after
a global match */
- *match_sets = NULL; /* An array of sets of matches for each
+ *match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
pcre_extra *extra = pce->extra;/* Holds results of studying */
pcre_extra extra_data; /* Used locally for exec options */
char **subpat_names; /* Array for named subpatterns */
int i;
int subpats_order; /* Order of subpattern matches */
- int offset_capture; /* Capture match offsets: yes/no */
- unsigned char *mark = NULL; /* Target for MARK name */
- zval marks; /* Array of marks for PREG_PATTERN_ORDER */
+ int offset_capture; /* Capture match offsets: yes/no */
+ int unmatched_as_null; /* Null non-matches: yes/no */
+ unsigned char *mark = NULL; /* Target for MARK name */
+ zval marks; /* Array of marks for PREG_PATTERN_ORDER */
ALLOCA_FLAG(use_heap);
ZVAL_UNDEF(&marks);
if (use_flags) {
offset_capture = flags & PREG_OFFSET_CAPTURE;
+ unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
/*
* subpats_order is pre-set to pattern mode so we change it only if
}
} else {
offset_capture = 0;
+ unmatched_as_null = 0;
}
/* Negative offset counts from the end of the string. */
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&match_sets[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&match_sets[i]);
} else {
add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
/*
* If the number of captured subpatterns on this run is
* less than the total possible number, pad the result
- * arrays with NULLs.
+ * arrays with NULLs or empty strings.
*/
if (count < num_subpats) {
for (; i < num_subpats; i++) {
- add_next_index_null(&match_sets[i]);
+ if (unmatched_as_null) {
+ add_next_index_null(&match_sets[i]);
+ } else {
+ add_next_index_string(&match_sets[i], "");
+ }
}
}
} else {
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) { /* unset substring */
- add_assoc_null(&result_set, subpat_names[i]);
- } else {
+ if (unmatched_as_null && offsets[i<<1] < 0) {
+ add_assoc_null(&result_set, subpat_names[i]);
+ } else {
add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
+ offsets[(i<<1)+1] - offsets[i<<1]);
+ }
}
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&result_set);
} else {
add_next_index_stringl(&result_set, (char *)stringlist[i],
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&result_set);
} else {
add_next_index_stringl(&result_set, (char *)stringlist[i],
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], subpat_names[i]);
+ offsets[i<<1], subpat_names[i], unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_assoc_null(subpats, subpat_names[i]);
} else {
add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(subpats);
} else {
add_next_index_stringl(subpats, (char *)stringlist[i],
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], NULL);
+ offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(subpats);
} else {
add_next_index_stringl(subpats, (char *)stringlist[i],
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
+ add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0);
} else {
/* Add the piece to the return value */
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
- add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
+ add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
} else {
ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
- if (!no_empty || start_offset < subject_len)
- {
+ if (!no_empty || start_offset < subject_len) {
if (offset_capture) {
/* Add the last (match, offset) pair to the return value */
- add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
+ add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0);
} else {
/* Add the last piece to the return value */
ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);