From 4529dc848332b78421c04408c8f8fe698ea0c21e Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 15 Nov 2019 10:40:47 -0800 Subject: [PATCH] Disable -ftrivial-auto-var-init= for hot code This helps to improve some benchmarks by 10%, e.g. decode_time PCFullStackTest.VP9SVC_3SL_Low Bug: 1020220, 977230 Change-Id: Ic992f1eec369f46a08e19eb33bc3a7c15c1e7c87 --- vpx_dsp/x86/convolve.h | 12 ++++++------ vpx_ports/mem.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h index b75d4d721..6fd40fef9 100644 --- a/vpx_dsp/x86/convolve.h +++ b/vpx_dsp/x86/convolve.h @@ -107,7 +107,7 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, assert(x_step_q4 == 16); \ assert(y_step_q4 == 16); \ if (filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ h + 7); \ @@ -116,7 +116,7 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, y_step_q4, w, h); \ } else if (filter_x[2] | filter_x[5]) { \ const int num_taps = is_avg ? 8 : 4; \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt( \ src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1); \ @@ -124,7 +124,7 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, dst, dst_stride, filter, x0_q4, \ x_step_q4, y0_q4, y_step_q4, w, h); \ } else { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ x_step_q4, y0_q4, y_step_q4, w, h + 1); \ vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ @@ -242,7 +242,7 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, if (x_step_q4 == 16 && y_step_q4 == 16) { \ if ((filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) || \ filter_x[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ fdata2, 64, filter, x0_q4, x_step_q4, \ y0_q4, y_step_q4, w, h + 7, bd); \ @@ -251,7 +251,7 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, y0_q4, y_step_q4, w, h, bd); \ } else if (filter_x[2] | filter_x[5]) { \ const int num_taps = is_avg ? 8 : 4; \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt( \ src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1, \ @@ -260,7 +260,7 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, fdata2 + 64 * (num_taps / 2 - 1), 64, dst, dst_stride, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, \ w, h + 1, bd); \ diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h index 317c6dc06..737e9e071 100644 --- a/vpx_ports/mem.h +++ b/vpx_ports/mem.h @@ -51,4 +51,18 @@ #define VPX_WITH_ASAN 0 #endif // __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +#if __has_attribute(uninitialized) +// Attribute disables -ftrivial-auto-var-init=pattern for specific variables. +// -ftrivial-auto-var-init is security risk mitigation feature, so attribute +// should not be used "just in case", but only to fix real performance +// bottlenecks when other approaches do not work. In general compiler is quite +// effective eleminating unneeded initializations introduced by the flag, e.g. +// when they are followed by actual initialization by a program. +// However if compiler optimization fails and code refactoring is hard, the +// attribute can be used as a workaround. +#define VPX_UNINITIALIZED __attribute__((uninitialized)) +#else +#define VPX_UNINITIALIZED +#endif // __has_attribute(uninitialized) + #endif // VPX_VPX_PORTS_MEM_H_ -- 2.40.0