From: Janne Grunau Date: Tue, 18 Nov 2014 23:33:55 +0000 (+0100) Subject: aarch64: cabac_encode_{decision,bypass,terminal}_asm X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=59b9c252cfa6242c7fa6424a463e51913996fe6a;p=libx264 aarch64: cabac_encode_{decision,bypass,terminal}_asm benchmarks on a Nexus 9 (nvidia denver): 101.3 cycles in x264_cabac_encode_decision_c, 67105369 runs, 3495 skips 97.3 cycles in x264_cabac_encode_decision_asm, 67105493 runs, 3371 skips 132.8 cycles in x264_cabac_encode_terminal_c, 1046950 runs, 1626 skips 116.1 cycles in x264_cabac_encode_terminal_asm, 1048424 runs, 152 skips 92.4 cycles in x264_cabac_encode_bypass_c, 16776192 runs, 1024 skips 89.6 cycles in x264_cabac_encode_bypass_asm, 16776453 runs, 763 skips Cycle counts are not as stable as one would like. The dynamic code optimisation seems to produce different results for small chnages in a binary. Repeated runs with the same binary produce stable results though (ignoring the first run). --- diff --git a/Makefile b/Makefile index f2935421..12c74e44 100644 --- a/Makefile +++ b/Makefile @@ -129,13 +129,15 @@ endif ifeq ($(ARCH),AARCH64) ifneq ($(AS),) ASMSRC += common/aarch64/bitstream-a.S \ + common/aarch64/cabac-a.S \ common/aarch64/dct-a.S \ common/aarch64/deblock-a.S \ common/aarch64/mc-a.S \ common/aarch64/pixel-a.S \ common/aarch64/predict-a.S \ common/aarch64/quant-a.S -SRCS += common/aarch64/mc-c.c \ +SRCS += common/aarch64/asm-offsets.c \ + common/aarch64/mc-c.c \ common/aarch64/predict-c.c OBJASM = $(ASMSRC:%.S=%.o) endif diff --git a/common/aarch64/asm-offsets.c b/common/aarch64/asm-offsets.c new file mode 100644 index 00000000..c0630d45 --- /dev/null +++ b/common/aarch64/asm-offsets.c @@ -0,0 +1,42 @@ +/***************************************************************************** + * asm-offsets.c: check asm offsets for aarch64 + ***************************************************************************** + * Copyright (C) 2014 x264 project + * + * Authors: Janne Grunau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "asm-offsets.h" + +#define X264_CHECK_OFFSET(s, m, o) struct check_##s##_##m \ +{ \ + int m_##m[2 * (offsetof(s, m) == o) - 1]; \ +} + +X264_CHECK_OFFSET(x264_cabac_t, i_low, CABAC_I_LOW); +X264_CHECK_OFFSET(x264_cabac_t, i_range, CABAC_I_RANGE); +X264_CHECK_OFFSET(x264_cabac_t, i_queue, CABAC_I_QUEUE); +X264_CHECK_OFFSET(x264_cabac_t, i_bytes_outstanding, CABAC_I_BYTES_OUTSTANDING); +X264_CHECK_OFFSET(x264_cabac_t, p_start, CABAC_P_START); +X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P); +X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END); +X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED); +X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE); diff --git a/common/aarch64/asm-offsets.h b/common/aarch64/asm-offsets.h new file mode 100644 index 00000000..b35baae1 --- /dev/null +++ b/common/aarch64/asm-offsets.h @@ -0,0 +1,39 @@ +/***************************************************************************** + * asm-offsets.h: asm offsets for aarch64 + ***************************************************************************** + * Copyright (C) 2014 x264 project + * + * Authors: Janne Grunau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_AARCH64_ASM_OFFSETS_H +#define X264_AARCH64_ASM_OFFSETS_H + +#define CABAC_I_LOW 0x00 +#define CABAC_I_RANGE 0x04 +#define CABAC_I_QUEUE 0x08 +#define CABAC_I_BYTES_OUTSTANDING 0x0c +#define CABAC_P_START 0x10 +#define CABAC_P 0x18 +#define CABAC_P_END 0x20 +#define CABAC_F8_BITS_ENCODED 0x30 +#define CABAC_STATE 0x34 + +#endif diff --git a/common/aarch64/cabac-a.S b/common/aarch64/cabac-a.S new file mode 100644 index 00000000..abd8b6a2 --- /dev/null +++ b/common/aarch64/cabac-a.S @@ -0,0 +1,122 @@ +/***************************************************************************** + * cabac-a.S: aarch64 cabac + ***************************************************************************** + * Copyright (C) 2014 x264 project + * + * Authors: Janne Grunau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "asm.S" +#include "asm-offsets.h" + +// w11 holds x264_cabac_t.i_low +// w12 holds x264_cabac_t.i_range + +function x264_cabac_encode_decision_asm, export=1 + movrel x8, X(x264_cabac_range_lps) + movrel x9, X(x264_cabac_transition) + add w10, w1, #CABAC_STATE + ldrb w3, [x0, x10] // i_state + ldr w12, [x0, #CABAC_I_RANGE] + and x4, x3, #~1 + asr w5, w12, #6 + add x8, x8, x4, lsl #1 + sub w5, w5, #4 + eor w6, w2, w3 // b ^ i_state + ldrb w4, [x8, x5] // i_range_lps + ldr w11, [x0, #CABAC_I_LOW] + sub w12, w12, w4 + tbz w6, #0, 1f // (b ^ i_state) & 1 + add w11, w11, w12 + mov w12, w4 +1: + orr w4, w2, w3, lsl #1 + ldrb w9, [x9, x4] + strb w9, [x0, x10] // i_state + +cabac_encode_renorm: + clz w5, w12 + ldr w2, [x0, #CABAC_I_QUEUE] + sub w5, w5, #23 + lsl w12, w12, w5 + lsl w11, w11, w5 +2: + adds w2, w2, w5 + str w12, [x0, #CABAC_I_RANGE] + b.lt 0f +cabac_putbyte: + mov w13, #0x400 + add w12, w2, #10 + lsl w13, w13, w2 + asr w4, w11, w12 // out + sub w2, w2, #8 + sub w13, w13, #1 + subs w5, w4, #0xff + and w11, w11, w13 + ldr w6, [x0, #CABAC_I_BYTES_OUTSTANDING] + str w2, [x0, #CABAC_I_QUEUE] + b.ne 1f + + add w6, w6, #1 + str w11, [x0, #CABAC_I_LOW] + str w6, [x0, #CABAC_I_BYTES_OUTSTANDING] + ret + +1: + ldr x7, [x0, #CABAC_P] + asr w5, w4, #8 // carry + ldrb w8, [x7, #-1] + add w8, w8, w5 + sub w5, w5, #1 + strb w8, [x7, #-1] + cbz w6, 3f +2: + subs w6, w6, #1 + strb w5, [x7], #1 + b.gt 2b +3: + strb w4, [x7], #1 + str wzr, [x0, #CABAC_I_BYTES_OUTSTANDING] + str x7, [x0, #CABAC_P] +0: + str w11, [x0, #CABAC_I_LOW] + str w2, [x0, #CABAC_I_QUEUE] + ret +endfunc + +function x264_cabac_encode_bypass_asm, export=1 + ldr w12, [x0, #CABAC_I_RANGE] + ldr w11, [x0, #CABAC_I_LOW] + ldr w2, [x0, #CABAC_I_QUEUE] + and w1, w1, w12 + add w11, w1, w11, lsl #1 + adds w2, w2, #1 + b.ge cabac_putbyte + str w11, [x0, #CABAC_I_LOW] + str w2, [x0, #CABAC_I_QUEUE] + ret +endfunc + +function x264_cabac_encode_terminal_asm, export=1 + ldr w12, [x0, #CABAC_I_RANGE] + ldr w11, [x0, #CABAC_I_LOW] + sub w12, w12, #2 + b cabac_encode_renorm +endfunc diff --git a/common/cabac.h b/common/cabac.h index dbe68206..cc277616 100644 --- a/common/cabac.h +++ b/common/cabac.h @@ -72,6 +72,10 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb ); #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm +#elif defined(ARCH_AARCH64) +#define x264_cabac_encode_decision x264_cabac_encode_decision_asm +#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm +#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm #else #define x264_cabac_encode_decision x264_cabac_encode_decision_c #define x264_cabac_encode_bypass x264_cabac_encode_bypass_c diff --git a/tools/checkasm.c b/tools/checkasm.c index c90c0cfc..2ac5f0c7 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -2437,6 +2437,8 @@ static void run_cabac_terminal_##cpu( x264_t *h, uint8_t *dst )\ DECL_CABAC(c) #if HAVE_MMX DECL_CABAC(asm) +#elif defined(ARCH_AARCH64) +DECL_CABAC(asm) #else #define run_cabac_decision_asm run_cabac_decision_c #define run_cabac_bypass_asm run_cabac_bypass_c