From: Dmitry Stogov Date: Fri, 20 Feb 2015 13:08:57 +0000 (+0300) Subject: Use fast SSE2 memcpy() for copying block of data from SHM to process memory X-Git-Tag: PRE_PHP7_EREG_MYSQL_REMOVALS~104 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=623810eff40d82e6d7c9f17aeaacec43d741d4c6;p=php Use fast SSE2 memcpy() for copying block of data from SHM to process memory --- diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 62e62e1821..cdcbfeda56 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -1163,7 +1163,13 @@ static zend_persistent_script *cache_script_in_shared_memory(zend_persistent_scr memory_used = zend_accel_script_persist_calc(new_persistent_script, key, key_length); /* Allocate shared memory */ +#ifdef __SSE2__ + /* Align to 64-byte boundary */ + ZCG(mem) = zend_shared_alloc(memory_used + 64); + ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); +#else ZCG(mem) = zend_shared_alloc(memory_used); +#endif if (!ZCG(mem)) { zend_accel_schedule_restart_if_necessary(ACCEL_RESTART_OOM); zend_shared_alloc_unlock(); diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c index 8a52a53103..2e7f0c8da5 100644 --- a/ext/opcache/zend_accelerator_util_funcs.c +++ b/ext/opcache/zend_accelerator_util_funcs.c @@ -771,6 +771,34 @@ failure: zend_error(E_ERROR, "Cannot redeclare class %s", ce1->name->val); } +#ifdef __SSE2__ +#include +#include + +static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) +{ + __m128i *dqdest = (__m128i*)dest; + const __m128i *dqsrc = (const __m128i*)src; + const __m128i *end = (const __m128i*)((const char*)src + size); + + do { + _mm_prefetch(dqsrc + 4, _MM_HINT_NTA); + _mm_prefetch(dqsrc + 6, _MM_HINT_NTA); + + __m128i xmm0 = _mm_load_si128(dqsrc + 0); + __m128i xmm1 = _mm_load_si128(dqsrc + 1); + __m128i xmm2 = _mm_load_si128(dqsrc + 2); + __m128i xmm3 = _mm_load_si128(dqsrc + 3); + dqsrc += 4; + _mm_stream_si128(dqdest + 0, xmm0); + _mm_stream_si128(dqdest + 1, xmm1); + _mm_stream_si128(dqdest + 2, xmm2); + _mm_stream_si128(dqdest + 3, xmm3); + dqdest += 4; + } while (dqsrc != end); +} +#endif + zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script, int from_shared_memory) { zend_op_array *op_array; @@ -784,8 +812,15 @@ zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script, ZCG(current_persistent_script) = persistent_script; ZCG(arena_mem) = NULL; if (EXPECTED(persistent_script->arena_size)) { +#ifdef __SSE2__ + /* Target address must be aligned to 64-byte boundary */ + ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size + 64); + ZCG(arena_mem) = (void*)(((zend_uintptr_t)ZCG(arena_mem) + 63L) & ~63L); + fast_memcpy(ZCG(arena_mem), persistent_script->arena_mem, persistent_script->arena_size); +#else ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size); memcpy(ZCG(arena_mem), persistent_script->arena_mem, persistent_script->arena_size); +#endif } /* Copy all the necessary stuff from shared memory to regular memory, and protect the shared script */ diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index 1c440eceae..eb442e9742 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -734,6 +734,11 @@ zend_persistent_script *zend_accel_script_persist(zend_persistent_script *script *key = zend_accel_memdup(*key, key_length + 1); zend_accel_store_string(script->full_path); +#ifdef __SSE2__ + /* Align to 64-byte boundary */ + ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); +#endif + script->arena_mem = ZCG(arena_mem) = ZCG(mem); ZCG(mem) = (void*)((char*)ZCG(mem) + script->arena_size); diff --git a/ext/opcache/zend_persist_calc.c b/ext/opcache/zend_persist_calc.c index 2f211e43cb..7d77d62b8d 100644 --- a/ext/opcache/zend_persist_calc.c +++ b/ext/opcache/zend_persist_calc.c @@ -361,10 +361,20 @@ uint zend_accel_script_persist_calc(zend_persistent_script *new_persistent_scrip ADD_DUP_SIZE(key, key_length + 1); ADD_STRING(new_persistent_script->full_path); +#ifdef __SSE2__ + /* Align size to 64-byte boundary */ + new_persistent_script->size = (new_persistent_script->size + 63) & ~63; +#endif + zend_accel_persist_class_table_calc(&new_persistent_script->class_table); zend_hash_persist_calc(&new_persistent_script->function_table, zend_persist_op_array_calc); zend_persist_op_array_calc_ex(&new_persistent_script->main_op_array); +#ifdef __SSE2__ + /* Align size to 64-byte boundary */ + new_persistent_script->arena_size = (new_persistent_script->arena_size + 63) & ~63; +#endif + new_persistent_script->size += new_persistent_script->arena_size; ZCG(current_persistent_script) = NULL;