]> granicus.if.org Git - llvm/commitdiff
[AArch64] Add new subtarget feature to fuse AES crypto operations
authorEvandro Menezes <e.menezes@samsung.com>
Wed, 1 Feb 2017 02:54:39 +0000 (02:54 +0000)
committerEvandro Menezes <e.menezes@samsung.com>
Wed, 1 Feb 2017 02:54:39 +0000 (02:54 +0000)
This feature enables the fusion of such operations on Cortex A57, as
recommended in its Software Optimisation Guide, section 4.13, and on Exynos
M1.

Differential revision: https://reviews.llvm.org/D28491

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293738 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AArch64/AArch64.td
lib/Target/AArch64/AArch64MacroFusion.cpp
lib/Target/AArch64/AArch64SchedA57.td
lib/Target/AArch64/AArch64SchedM1.td
lib/Target/AArch64/AArch64Subtarget.h

index cb6d8fc96688da34db7196789558115a6153d6d0..1f21da75681aaeb224b358a704740dff520aeb9c 100644 (file)
@@ -103,6 +103,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
     "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
     "CPU fuses arithmetic + cbz/cbnz operations">;
 
+def FeatureFuseAES : SubtargetFeature<
+    "fuse-aes", "HasFuseAES", "true",
+    "CPU fuses AES crypto operations">;
+
 def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
@@ -184,6 +188,7 @@ def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    FeatureCrypto,
                                    FeatureCustomCheapAsMoveHandling,
                                    FeatureFPARMv8,
+                                   FeatureFuseAES,
                                    FeatureNEON,
                                    FeaturePerfMon,
                                    FeaturePostRAScheduler,
@@ -230,6 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
                                      FeatureCrypto,
                                      FeatureCustomCheapAsMoveHandling,
                                      FeatureFPARMv8,
+                                     FeatureFuseAES,
                                      FeatureNEON,
                                      FeaturePerfMon,
                                      FeaturePostRAScheduler,
index c1adc862fedf83d716c4b1ebe738d00a010c96a6..f6d693262c363e0ee1b21d9aa14e02c3118a8757 100644 (file)
@@ -116,6 +116,19 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
         return true;
       }
 
+  if (ST.hasFuseAES())
+    // Fuse AES crypto operations.
+    switch(FirstOpcode) {
+    // AES encode.
+    case AArch64::AESErr:
+      return SecondOpcode == AArch64::AESMCrr ||
+             SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+    // AES decode.
+    case AArch64::AESDrr:
+      return SecondOpcode == AArch64::AESIMCrr ||
+             SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+    }
+
   return false;
 }
 
index 99c48d0146e473959b7f252cb92a77e925d1f71a..303398ea0b7f3b524b9419a3aecf45c34412642f 100644 (file)
@@ -162,7 +162,9 @@ def : InstRW<[A57Write_2cyc_1M],    (instregex "BFM")>;
 // Cryptography Extensions
 // -----------------------------------------------------------------------------
 
-def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+def A57ReadAES  : SchedReadAdvance<3, [A57Write_3cyc_1W]>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>;
+def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>;
 def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
 def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
 def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
index 14d6891253facf6856eb19d2f5fd5594cc611ab9..3fbbc0be682d739c4cf60a633565519fd5d8d1c0 100644 (file)
@@ -366,7 +366,8 @@ def : InstRW<[M1WriteNALU1],  (instregex "^ZIP[12]v")>;
 // Cryptography instructions.
 def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
 def M1ReadAES  : SchedReadAdvance<1, [M1WriteAES]>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>;
+def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
+def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
 
 def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
index 48bcdbc82d09cbce3b4b0263664686e4bc2d7f39..26eac3c63493fb2f598926f6f0ba6b017b47cb6b 100644 (file)
@@ -84,6 +84,7 @@ protected:
   bool UseAlternateSExtLoadCVTF32Pattern = false;
   bool HasArithmeticBccFusion = false;
   bool HasArithmeticCbzFusion = false;
+  bool HasFuseAES = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
   uint8_t MaxInterleaveFactor = 2;
@@ -197,6 +198,7 @@ public:
   }
   bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+  bool hasFuseAES() const { return HasFuseAES; }
   bool useRSqrt() const { return UseRSqrt; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   unsigned getVectorInsertExtractBaseCost() const {