From c2c2a95708685156a643e920b497d48597e0267c Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Mon, 25 Feb 2013 19:28:19 +0400 Subject: [PATCH] x86: don't use the red zone on win64 --- common/x86/cpu-a.asm | 3 +++ common/x86/deblock-a.asm | 6 +++++- common/x86/mc-a.asm | 4 ++++ common/x86/pixel-a.asm | 15 ++++++++------- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm index 9104db26..ea27158b 100644 --- a/common/x86/cpu-a.asm +++ b/common/x86/cpu-a.asm @@ -165,6 +165,9 @@ cglobal safe_intel_cpu_indicator_init %endif push rbp mov rbp, rsp +%if WIN64 + sub rsp, 32 ; shadow space +%endif and rsp, ~15 call intel_cpu_indicator_init leave diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm index 4e390646..9692621a 100644 --- a/common/x86/deblock-a.asm +++ b/common/x86/deblock-a.asm @@ -1429,7 +1429,11 @@ DEBLOCK_LUMA v, 16 %define t5 m11 %define mask0 m12 %define mask1p m13 +%if WIN64 + %define mask1q [rsp] +%else %define mask1q [rsp-24] +%endif %define mpb_0 m14 %define mpb_1 m15 %else @@ -1448,7 +1452,7 @@ DEBLOCK_LUMA v, 16 ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_%1_luma_intra, 4,6,16,ARCH_X86_64*0x50-0x50 +cglobal deblock_%1_luma_intra, 4,6,16,0-(1-ARCH_X86_64)*0x50-WIN64*0x10 lea r4, [r1*4] lea r5, [r1*3] ; 3*stride dec r2d ; alpha-1 diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm index 8161389f..9b46680b 100644 --- a/common/x86/mc-a.asm +++ b/common/x86/mc-a.asm @@ -1514,7 +1514,11 @@ ALIGN 4 mov t0, r0 mov t1, r1 mov t2, r3 +%if WIN64 + %define multy0 r4m +%else %define multy0 [rsp-8] +%endif mova multy0, m5 %else mov r3m, r3 diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm index 75c3b913..e71017ac 100644 --- a/common/x86/pixel-a.asm +++ b/common/x86/pixel-a.asm @@ -1913,15 +1913,16 @@ cglobal hadamard_load ; void intra_satd_x3_4x4( uint8_t *fenc, uint8_t *fdec, int *res ) ;----------------------------------------------------------------------------- cglobal intra_satd_x3_4x4, 3,3 -%if ARCH_X86_64 +%if UNIX64 ; stack is 16 byte aligned because abi says so %define top_1d rsp-8 ; size 8 %define left_1d rsp-16 ; size 8 %else - ; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned - SUB esp, 16 - %define top_1d esp+8 - %define left_1d esp + ; WIN64: stack is 16 byte aligned because abi says so + ; X86_32: stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned + SUB rsp, 16 + %define top_1d rsp+8 + %define left_1d rsp %endif call hadamard_load @@ -1943,8 +1944,8 @@ cglobal intra_satd_x3_4x4, 3,3 movd [r2+0], m0 ; i4x4_v satd movd [r2+4], m4 ; i4x4_h satd movd [r2+8], m5 ; i4x4_dc satd -%if ARCH_X86_64 == 0 - ADD esp, 16 +%if UNIX64 == 0 + ADD rsp, 16 %endif RET -- 2.40.0