From: Florian Hahn Date: Thu, 15 Jun 2017 09:31:23 +0000 (+0000) Subject: [AArch64] Enable FeatureFuseAES for the generic processor model. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4127960e358b6a412e9b39a4d0c4dce16555946a;p=llvm [AArch64] Enable FeatureFuseAES for the generic processor model. Summary: Scheduling AESE/AESMC and AESD/AESIMC instruction pairs back-to-back gives a double digit speedup on benchmarks using those instructions on Cortex-A processors. In GCC, this optimization is part of the generic processor model as well. This change should not have a major performance impact on processors that do not optimize AES instruction pairs, although I only had access to Cortex-A processors for benchmarking. Reviewers: rengolin, kristof.beyls, javed.absar, evandro, silviu.baranga, MatzeB, mcrosier, joelkevinjones, joel_k_jones, bmakam, t.p.northover Reviewed By: evandro Subscribers: sbaranga, aemerson, llvm-commits Differential Revision: https://reviews.llvm.org/D33836 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305457 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index abe28460c83..53eef79c4df 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -362,6 +362,7 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", def : ProcessorModel<"generic", NoSchedModel, [ FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler diff --git a/test/CodeGen/AArch64/misched-fusion-aes.ll b/test/CodeGen/AArch64/misched-fusion-aes.ll index bd7c69c910c..8ee4dbcee52 100644 --- a/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -1,7 +1,9 @@ -; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX -; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX -; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX -; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKM1 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k) @@ -74,22 +76,23 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ret void ; CHECK-LABEL: aesea: -; CHECKCORTEX: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VA]] -; CHECKCORTEX: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VB]] -; CHECKCORTEX: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VC]] -; CHECKCORTEX: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VD]] -; CHECKCORTEX: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VE]] -; CHECKCORTEX: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VF]] -; CHECKCORTEX: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VG]] -; CHECKCORTEX: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VA]] +; CHECKFUSEALLPAIRS: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VB]] +; CHECKFUSEALLPAIRS: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VC]] +; CHECKFUSEALLPAIRS: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VD]] +; CHECKFUSEALLPAIRS: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VE]] +; CHECKFUSEALLPAIRS: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VF]] +; CHECKFUSEALLPAIRS: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VG]] +; CHECKFUSEALLPAIRS: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS-NOT: aesmc ; CHECKM1: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VA]] @@ -175,22 +178,23 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ret void ; CHECK-LABEL: aesda: -; CHECKCORTEX: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VA]] -; CHECKCORTEX: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VB]] -; CHECKCORTEX: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VC]] -; CHECKCORTEX: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VD]] -; CHECKCORTEX: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VE]] -; CHECKCORTEX: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VF]] -; CHECKCORTEX: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VG]] -; CHECKCORTEX: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} -; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VA]] +; CHECKFUSEALLPAIRS: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VB]] +; CHECKFUSEALLPAIRS: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VC]] +; CHECKFUSEALLPAIRS: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VD]] +; CHECKFUSEALLPAIRS: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VE]] +; CHECKFUSEALLPAIRS: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VF]] +; CHECKFUSEALLPAIRS: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VG]] +; CHECKFUSEALLPAIRS: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS-NOT: aesimc ; CHECKM1: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VA]] @@ -236,4 +240,5 @@ entry: ; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VA]] ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VB]] +; CHECK-NOT: aesmc }