From fd4025069d561b4196851c2e9a85b5bfdfe1465a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 5 May 2016 14:49:08 +0200 Subject: [PATCH] Enable HT RC assertions with escape-hatch HT functions that modify the array now assert that rc=1. As we don't respect this COW constraint everywhere, either for a good reason or because fixing it would take more work, we provide an escape hatch in the form of HT_ALLOW_COW_VIOLATION(ht). If this macro is called assertions on this ht are disabled. The macro is a no-op in release mode. --- Zend/zend_gc.c | 4 ++ Zend/zend_hash.c | 78 ++++++++++++++++---------------- Zend/zend_hash.h | 7 +++ ext/standard/var_unserializer.c | 24 ++++++---- ext/standard/var_unserializer.re | 6 +++ main/php_variables.c | 5 ++ 6 files changed, 77 insertions(+), 47 deletions(-) diff --git a/Zend/zend_gc.c b/Zend/zend_gc.c index a971681cf9..cd2cadc276 100644 --- a/Zend/zend_gc.c +++ b/Zend/zend_gc.c @@ -1162,6 +1162,10 @@ ZEND_API int zend_gc_collect_cycles(void) zend_array *arr = (zend_array*)p; GC_TYPE(arr) = IS_NULL; + + /* GC may destroy arrays with rc>1. This is valid and safe. */ + HT_ALLOW_COW_VIOLATION(arr); + zend_hash_destroy(arr); } current = GC_G(next_to_free); diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 64668c73a8..5c629e29ee 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -24,13 +24,15 @@ #include "zend_globals.h" #include "zend_variables.h" -#define HT_DEBUG 0 -#if HT_DEBUG -# define HT_ASSERT(c) ZEND_ASSERT(c) +#if ZEND_DEBUG +# define HT_ASSERT(ht, expr) \ + ZEND_ASSERT((expr) || ((ht)->u.flags & HASH_FLAG_ALLOW_COW_VIOLATION)) #else -# define HT_ASSERT(c) +# define HT_ASSERT(ht, expr) #endif +#define HT_ASSERT_RC1(ht) HT_ASSERT(ht, GC_REFCOUNT(ht) == 1) + #define HT_POISONED_PTR ((HashTable *) (intptr_t) -1) #if ZEND_DEBUG @@ -127,7 +129,7 @@ static zend_always_inline uint32_t zend_hash_check_size(uint32_t nSize) static zend_always_inline void zend_hash_real_init_ex(HashTable *ht, int packed) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); ZEND_ASSERT(!((ht)->u.flags & HASH_FLAG_INITIALIZED)); if (packed) { HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT)); @@ -156,7 +158,7 @@ static zend_always_inline void zend_hash_real_init_ex(HashTable *ht, int packed) static zend_always_inline void zend_hash_check_init(HashTable *ht, int packed) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (UNEXPECTED(!((ht)->u.flags & HASH_FLAG_INITIALIZED))) { zend_hash_real_init_ex(ht, packed); } @@ -185,7 +187,7 @@ ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_ static void ZEND_FASTCALL zend_hash_packed_grow(HashTable *ht) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (ht->nTableSize >= HT_MAX_SIZE) { zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%u * %zu + %zu)", ht->nTableSize * 2, sizeof(Bucket), sizeof(Bucket)); } @@ -197,7 +199,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_real_init(HashTable *ht, zend_bool packed) { IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); zend_hash_real_init_ex(ht, packed); } @@ -206,7 +208,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_packed_to_hash(HashTable *ht) void *new_data, *old_data = HT_GET_DATA_ADDR(ht); Bucket *old_buckets = ht->arData; - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); ht->u.flags &= ~HASH_FLAG_PACKED; new_data = pemalloc(HT_SIZE_EX(ht->nTableSize, -ht->nTableSize), (ht)->u.flags & HASH_FLAG_PERSISTENT); ht->nTableMask = -ht->nTableSize; @@ -221,7 +223,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_to_packed(HashTable *ht) void *new_data, *old_data = HT_GET_DATA_ADDR(ht); Bucket *old_buckets = ht->arData; - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); new_data = pemalloc(HT_SIZE_EX(ht->nTableSize, HT_MIN_MASK), (ht)->u.flags & HASH_FLAG_PERSISTENT); ht->u.flags |= HASH_FLAG_PACKED | HASH_FLAG_STATIC_KEYS; ht->nTableMask = HT_MIN_MASK; @@ -241,7 +243,7 @@ ZEND_API void ZEND_FASTCALL _zend_hash_init_ex(HashTable *ht, uint32_t nSize, dt ZEND_API void ZEND_FASTCALL zend_hash_extend(HashTable *ht, uint32_t nSize, zend_bool packed) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (nSize == 0) return; if (UNEXPECTED(!((ht)->u.flags & HASH_FLAG_INITIALIZED))) { if (nSize > ht->nTableSize) { @@ -545,7 +547,7 @@ static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_s Bucket *p; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (UNEXPECTED(!(ht->u.flags & HASH_FLAG_INITIALIZED))) { CHECK_INIT(ht, 0); @@ -708,7 +710,7 @@ static zend_always_inline zval *_zend_hash_index_add_or_update_i(HashTable *ht, Bucket *p; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (UNEXPECTED(!(ht->u.flags & HASH_FLAG_INITIALIZED))) { CHECK_INIT(ht, h < ht->nTableSize); @@ -847,7 +849,7 @@ static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht) { IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { /* additional term is there to amortize the cost of compaction */ zend_hash_rehash(ht); @@ -1021,7 +1023,7 @@ static zend_always_inline void _zend_hash_del_el(HashTable *ht, uint32_t idx, Bu ZEND_API void ZEND_FASTCALL zend_hash_del_bucket(HashTable *ht, Bucket *p) { IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); _zend_hash_del_el(ht, HT_IDX_TO_HASH(p - ht->arData), p); } @@ -1034,7 +1036,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_del(HashTable *ht, zend_string *key) Bucket *prev = NULL; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); h = zend_string_hash_val(key); nIndex = h | ht->nTableMask; @@ -1065,7 +1067,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_del_ind(HashTable *ht, zend_string *key) Bucket *prev = NULL; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); h = zend_string_hash_val(key); nIndex = h | ht->nTableMask; @@ -1114,7 +1116,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_str_del_ind(HashTable *ht, const char *str, Bucket *prev = NULL; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); h = zend_inline_hash_func(str, len); nIndex = h | ht->nTableMask; @@ -1158,7 +1160,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_str_del(HashTable *ht, const char *str, siz Bucket *prev = NULL; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); h = zend_inline_hash_func(str, len); nIndex = h | ht->nTableMask; @@ -1187,7 +1189,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_index_del(HashTable *ht, zend_ulong h) Bucket *prev = NULL; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (ht->u.flags & HASH_FLAG_PACKED) { if (h < ht->nNumUsed) { @@ -1219,7 +1221,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_destroy(HashTable *ht) Bucket *p, *end; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) <= 1); + HT_ASSERT(ht, GC_REFCOUNT(ht) <= 1); if (ht->nNumUsed) { p = ht->arData; @@ -1281,7 +1283,7 @@ ZEND_API void ZEND_FASTCALL zend_array_destroy(HashTable *ht) Bucket *p, *end; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) <= 1); + HT_ASSERT(ht, GC_REFCOUNT(ht) <= 1); /* break possible cycles */ GC_REMOVE_FROM_BUFFER(ht); @@ -1334,7 +1336,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_clean(HashTable *ht) Bucket *p, *end; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (ht->nNumUsed) { p = ht->arData; @@ -1403,7 +1405,7 @@ ZEND_API void ZEND_FASTCALL zend_symtable_clean(HashTable *ht) Bucket *p, *end; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (ht->nNumUsed) { p = ht->arData; @@ -1443,7 +1445,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_graceful_destroy(HashTable *ht) Bucket *p; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); p = ht->arData; for (idx = 0; idx < ht->nNumUsed; idx++, p++) { @@ -1463,7 +1465,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_graceful_reverse_destroy(HashTable *ht) Bucket *p; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); idx = ht->nNumUsed; p = ht->arData + ht->nNumUsed; @@ -1505,7 +1507,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_apply(HashTable *ht, apply_func_t apply_fu result = apply_func(&p->val); if (result & ZEND_HASH_APPLY_REMOVE) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); _zend_hash_del_el(ht, HT_IDX_TO_HASH(idx), p); } if (result & ZEND_HASH_APPLY_STOP) { @@ -1531,7 +1533,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_apply_with_argument(HashTable *ht, apply_f result = apply_func(&p->val, argument); if (result & ZEND_HASH_APPLY_REMOVE) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); _zend_hash_del_el(ht, HT_IDX_TO_HASH(idx), p); } if (result & ZEND_HASH_APPLY_STOP) { @@ -1564,7 +1566,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_apply_with_arguments(HashTable *ht, apply_ result = apply_func(&p->val, num_args, args, &hash_key); if (result & ZEND_HASH_APPLY_REMOVE) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); _zend_hash_del_el(ht, HT_IDX_TO_HASH(idx), p); } if (result & ZEND_HASH_APPLY_STOP) { @@ -1596,7 +1598,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_reverse_apply(HashTable *ht, apply_func_t result = apply_func(&p->val); if (result & ZEND_HASH_APPLY_REMOVE) { - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); _zend_hash_del_el(ht, HT_IDX_TO_HASH(idx), p); } if (result & ZEND_HASH_APPLY_STOP) { @@ -1616,7 +1618,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_copy(HashTable *target, HashTable *source, IS_CONSISTENT(source); IS_CONSISTENT(target); - HT_ASSERT(GC_REFCOUNT(target) == 1); + HT_ASSERT_RC1(target); setTargetPointer = (target->nInternalPointer == HT_INVALID_IDX); for (idx = 0; idx < source->nNumUsed; idx++) { @@ -1853,7 +1855,7 @@ ZEND_API void ZEND_FASTCALL _zend_hash_merge(HashTable *target, HashTable *sourc IS_CONSISTENT(source); IS_CONSISTENT(target); - HT_ASSERT(GC_REFCOUNT(target) == 1); + HT_ASSERT_RC1(target); if (overwrite) { for (idx = 0; idx < source->nNumUsed; idx++) { @@ -1924,7 +1926,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_merge_ex(HashTable *target, HashTable *sou IS_CONSISTENT(source); IS_CONSISTENT(target); - HT_ASSERT(GC_REFCOUNT(target) == 1); + HT_ASSERT_RC1(target); for (idx = 0; idx < source->nNumUsed; idx++) { p = source->arData + idx; @@ -2046,7 +2048,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_internal_pointer_reset_ex(HashTable *ht, H uint32_t idx; IS_CONSISTENT(ht); - HT_ASSERT(&ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); + HT_ASSERT(ht, &ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); for (idx = 0; idx < ht->nNumUsed; idx++) { if (Z_TYPE(ht->arData[idx].val) != IS_UNDEF) { @@ -2066,7 +2068,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_internal_pointer_end_ex(HashTable *ht, Has uint32_t idx; IS_CONSISTENT(ht); - HT_ASSERT(&ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); + HT_ASSERT(ht, &ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); idx = ht->nNumUsed; while (idx > 0) { @@ -2085,7 +2087,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_move_forward_ex(HashTable *ht, HashPosition uint32_t idx = *pos; IS_CONSISTENT(ht); - HT_ASSERT(&ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); + HT_ASSERT(ht, &ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); if (idx != HT_INVALID_IDX) { while (1) { @@ -2109,7 +2111,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_move_backwards_ex(HashTable *ht, HashPositi uint32_t idx = *pos; IS_CONSISTENT(ht); - HT_ASSERT(&ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); + HT_ASSERT(ht, &ht->nInternalPointer != pos || GC_REFCOUNT(ht) == 1); if (idx != HT_INVALID_IDX) { while (idx > 0) { @@ -2246,7 +2248,7 @@ ZEND_API int ZEND_FASTCALL zend_hash_sort_ex(HashTable *ht, sort_func_t sort, co uint32_t i, j; IS_CONSISTENT(ht); - HT_ASSERT(GC_REFCOUNT(ht) == 1); + HT_ASSERT_RC1(ht); if (!(ht->nNumOfElements>1) && !(renumber && ht->nNumOfElements>0)) { /* Doesn't require sorting */ return SUCCESS; diff --git a/Zend/zend_hash.h b/Zend/zend_hash.h index 950968a9cf..fa63999c2a 100644 --- a/Zend/zend_hash.h +++ b/Zend/zend_hash.h @@ -41,6 +41,7 @@ #define HASH_FLAG_INITIALIZED (1<<3) #define HASH_FLAG_STATIC_KEYS (1<<4) /* long and interned strings */ #define HASH_FLAG_HAS_EMPTY_IND (1<<5) +#define HASH_FLAG_ALLOW_COW_VIOLATION (1<<6) #define HT_IS_PACKED(ht) \ (((ht)->u.flags & HASH_FLAG_PACKED) != 0) @@ -51,6 +52,12 @@ #define HT_HAS_STATIC_KEYS_ONLY(ht) \ (((ht)->u.flags & (HASH_FLAG_PACKED|HASH_FLAG_STATIC_KEYS)) != 0) +#if ZEND_DEBUG +# define HT_ALLOW_COW_VIOLATION(ht) (ht)->u.flags |= HASH_FLAG_ALLOW_COW_VIOLATION +#else +# define HT_ALLOW_COW_VIOLATION(ht) +#endif + typedef struct _zend_hash_key { zend_ulong h; zend_string *key; diff --git a/ext/standard/var_unserializer.c b/ext/standard/var_unserializer.c index 19cc5f3082..a8d93f6bc5 100644 --- a/ext/standard/var_unserializer.c +++ b/ext/standard/var_unserializer.c @@ -676,7 +676,7 @@ static int php_var_unserialize_internal(UNSERIALIZE_PARAMETER) yy2: ++YYCURSOR; yy3: -#line 999 "ext/standard/var_unserializer.re" +#line 1005 "ext/standard/var_unserializer.re" { return 0; } #line 682 "ext/standard/var_unserializer.c" yy4: @@ -725,7 +725,7 @@ yy14: goto yy3; yy15: ++YYCURSOR; -#line 993 "ext/standard/var_unserializer.re" +#line 999 "ext/standard/var_unserializer.re" { /* this is the case where we have less data than planned */ php_error_docref(NULL, E_NOTICE, "Unexpected end of serialized data"); @@ -1158,7 +1158,7 @@ yy81: goto yy18; yy82: ++YYCURSOR; -#line 841 "ext/standard/var_unserializer.re" +#line 847 "ext/standard/var_unserializer.re" { size_t len, len2, len3, maxlen; zend_long elements; @@ -1368,13 +1368,19 @@ yy86: zend_hash_real_init(Z_ARRVAL_P(rval), 0); } + /* The array may contain references to itself, in which case we'll be modifying an + * rc>1 array. This is okay, since the array is, ostensibly, only visible to + * unserialize (in practice unserialization handlers also see it). Ideally we should + * prohibit "r:" references to non-objects, as we only generate them for objects. */ + HT_ALLOW_COW_VIOLATION(Z_ARRVAL_P(rval)); + if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_ARRVAL_P(rval), elements, 0)) { return 0; } return finish_nested_data(UNSERIALIZE_PASSTHRU); } -#line 1378 "ext/standard/var_unserializer.c" +#line 1384 "ext/standard/var_unserializer.c" yy88: yych = *++YYCURSOR; if (yych <= ',') { @@ -1399,7 +1405,7 @@ yy91: goto yy18; yy92: ++YYCURSOR; -#line 830 "ext/standard/var_unserializer.re" +#line 836 "ext/standard/var_unserializer.re" { long elements; if (!var_hash) return 0; @@ -1410,7 +1416,7 @@ yy92: } return object_common2(UNSERIALIZE_PASSTHRU, elements); } -#line 1414 "ext/standard/var_unserializer.c" +#line 1420 "ext/standard/var_unserializer.c" yy94: ++YYCURSOR; #line 740 "ext/standard/var_unserializer.re" @@ -1445,7 +1451,7 @@ yy94: ZVAL_STRINGL(rval, str, len); return 1; } -#line 1449 "ext/standard/var_unserializer.c" +#line 1455 "ext/standard/var_unserializer.c" yy96: yych = *++YYCURSOR; if (yych <= '/') goto yy18; @@ -1469,9 +1475,9 @@ yy97: return 1; } -#line 1473 "ext/standard/var_unserializer.c" +#line 1479 "ext/standard/var_unserializer.c" } -#line 1001 "ext/standard/var_unserializer.re" +#line 1007 "ext/standard/var_unserializer.re" return 0; diff --git a/ext/standard/var_unserializer.re b/ext/standard/var_unserializer.re index d56cc2e014..04675c9981 100644 --- a/ext/standard/var_unserializer.re +++ b/ext/standard/var_unserializer.re @@ -820,6 +820,12 @@ use_double: zend_hash_real_init(Z_ARRVAL_P(rval), 0); } + /* The array may contain references to itself, in which case we'll be modifying an + * rc>1 array. This is okay, since the array is, ostensibly, only visible to + * unserialize (in practice unserialization handlers also see it). Ideally we should + * prohibit "r:" references to non-objects, as we only generate them for objects. */ + HT_ALLOW_COW_VIOLATION(Z_ARRVAL_P(rval)); + if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_ARRVAL_P(rval), elements, 0)) { return 0; } diff --git a/main/php_variables.c b/main/php_variables.c index 9dd1860880..ff43466014 100644 --- a/main/php_variables.c +++ b/main/php_variables.c @@ -795,6 +795,11 @@ static zend_bool php_auto_globals_create_server(zend_string *name) zend_hash_update(&EG(symbol_table), name, &PG(http_globals)[TRACK_VARS_SERVER]); Z_ADDREF(PG(http_globals)[TRACK_VARS_SERVER]); + /* TODO: TRACK_VARS_SERVER is modified in a number of places (e.g. phar) past this point, + * where rc>1 due to the $_SERVER global. Ideally this shouldn't happen, but for now we + * ignore this issue, as it would probably require larger changes. */ + HT_ALLOW_COW_VIOLATION(Z_ARRVAL(PG(http_globals)[TRACK_VARS_SERVER])); + return 0; /* don't rearm */ } -- 2.40.0