Globally define t#[qdwb], so that only t# needs to be locally defined when reorganizing registers
cextern x264_cabac_transition
cextern x264_cabac_renorm_shift
-%macro DEF_TMP 16
- %rep 8
- %define t%1d r%9d
- %define t%1b r%9b
- %define t%1 r%9
- %rotate 1
- %endrep
-%endmacro
-
; t3 must be ecx, since it's used for shift.
%ifdef ARCH_X86_64
- DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
+ DECLARE_REG_TMP 0,1,2,3,4,5,6,10
%define pointer resq
%else
- DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
+ DECLARE_REG_TMP 0,3,2,1,4,5,6,3
%define pointer resd
%endif
; implicit bipred only:
; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
%ifdef ARCH_X86_64
- %define t0 r0
- %define t1 r1
- %define t2 r2
- %define t3 r3
- %define t4 r4
- %define t5 r5
- %define t6d r10d
- %define t7d r11d
+ DECLARE_REG_TMP 0,1,2,3,4,5,10,11
%macro AVG_START 0
PROLOGUE 6,7
.height_loop:
%endmacro
%else
- %define t0 r1
- %define t1 r2
- %define t2 r3
- %define t3 r4
- %define t4 r5
- %define t5 r6
- %define t6d r1d
- %define t7d r2d
+ DECLARE_REG_TMP 1,2,3,4,5,6,1,2
%macro AVG_START 0
PROLOGUE 0,7
mov t0, r0m
; chroma MC
;=============================================================================
- %define t0d eax
- %define t0 rax
+ %define t0 rax
%ifdef ARCH_X86_64
- %define t1d r10d
+ %define t1 r10
%else
- %define t1d r1d
+ %define t1 r1
%endif
%macro MC_CHROMA_START 0
pxor m6, m6 ; sum squared
pxor m7, m7 ; zero
%ifdef ARCH_X86_64
- %define t3d r3d
+ %define t3 r3
%else
- %define t3d r2d
+ %define t3 r2
%endif
%endmacro
; stack is 16 byte aligned because abi says so
%define top_1d rsp-8 ; size 8
%define left_1d rsp-16 ; size 8
- %define t0 r10
- %define t0d r10d
+ %define t0 r10
%else
; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
SUB esp, 16
%define top_1d esp+8
%define left_1d esp
- %define t0 r2
- %define t0d r2d
+ %define t0 r2
%endif
call load_hadamard
RET
%ifdef ARCH_X86_64
- %define t0 r10
- %define t0d r10d
- %define t2 r11
- %define t2w r11w
- %define t2d r11d
+ %define t0 r10
+ %define t2 r11
%else
- %define t0 r0
- %define t0d r0d
- %define t2 r2
- %define t2w r2w
- %define t2d r2d
+ %define t0 r0
+ %define t2 r2
%endif
;-----------------------------------------------------------------------------
%macro ADS_START 1 ; unroll_size
%ifdef ARCH_X86_64
- %define t0 r6
+ %define t0 r6
mov r10, rsp
%else
- %define t0 r4
+ %define t0 r4
mov rbp, rsp
%endif
mov r0d, r5m
%endmacro
%ifdef ARCH_X86_64
- %define t0 r4
- %define t0d r4d
- %define t1 r3
- %define t1d r3d
- %define t2 r2
- %define t2d r2d
+ DECLARE_REG_TMP 4,3,2
%else
- %define t0 r2
- %define t0d r2d
- %define t1 r0
- %define t1d r0d
- %define t2 r1
- %define t2d r1d
+ DECLARE_REG_TMP 2,0,1
%endif
%macro DEQUANT_START 2
DECLARE_REG_SIZE di, dil
DECLARE_REG_SIZE bp, bpl
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+ %assign %%i 0
+ %rep %0
+ CAT_XDEFINE t, %%i, r%1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+ %rep %0
+ %define t%1q t%1 %+ q
+ %define t%1d t%1 %+ d
+ %define t%1w t%1 %+ w
+ %define t%1b t%1 %+ b
+ %rotate 1
+ %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
+
%ifdef ARCH_X86_64
%define gprsize 8
%else