From: Joel Jones Date: Tue, 7 Mar 2017 19:42:40 +0000 (+0000) Subject: [AArch64] Vulcan is now ThunderXT99 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=18b5c0bc713f0a74f7ddc82e9dda9d39c976a193;p=llvm [AArch64] Vulcan is now ThunderXT99 Broadcom Vulcan is now Cavium ThunderX2T99. LLVM Bugzilla: http://bugs.llvm.org/show_bug.cgi?id=32113 Minor fixes for the alignments of loops and functions for ThunderX T81/T83/T88 (better performance). Patch was tested with SpecCPU2006. Patch by Stefan Teleman Differential Revision: https://reviews.llvm.org/D30510 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297190 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def index 15c3f16d0de..46d253bf0ec 100644 --- a/include/llvm/Support/AArch64TargetParser.def +++ b/include/llvm/Support/AArch64TargetParser.def @@ -73,8 +73,9 @@ AARCH64_CPU_NAME("falkor", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) AARCH64_CPU_NAME("kryo", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) -AARCH64_CPU_NAME("vulcan", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) +AARCH64_CPU_NAME("thunderx2t99", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_CRC | + AArch64::AEK_CRYPTO)) AARCH64_CPU_NAME("thunderx", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt88", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 3df43e225a2..a6dbb2a1c39 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -161,7 +161,7 @@ include "AArch64SchedFalkor.td" include "AArch64SchedKryo.td" include "AArch64SchedM1.td" include "AArch64SchedThunderX.td" -include "AArch64SchedVulcan.td" +include "AArch64SchedThunderX2T99.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", [ @@ -288,16 +288,18 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeatureZCZeroing ]>; -def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", - "Broadcom Vulcan processors", [ - FeatureCRC, - FeatureCrypto, - FeatureFPARMv8, - FeatureArithmeticBccFusion, - FeatureNEON, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - HasV8_1aOps]>; +def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", + "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + HasV8_1aOps]>; def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", "Cavium ThunderX processors", [ @@ -363,12 +365,13 @@ def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>; def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>; def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; -def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>; // Cavium ThunderX/ThunderX T8X Processors def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>; def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>; def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; //===----------------------------------------------------------------------===// // Assembly parser diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td index 224ee3d0856..9a0cb702518 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/lib/Target/AArch64/AArch64SchedThunderX.td @@ -23,6 +23,7 @@ def ThunderXT8XModel : SchedMachineModel { let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. let LoadLatency = 3; // Optimistic load latency. let MispredictPenalty = 8; // Branch mispredict penalty. + let PostRAScheduler = 1; // Use PostRA scheduler. let CompleteModel = 1; } diff --git a/lib/Target/AArch64/AArch64SchedVulcan.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td similarity index 64% rename from lib/Target/AArch64/AArch64SchedVulcan.td rename to lib/Target/AArch64/AArch64SchedThunderX2T99.td index 35a40c314bf..3654eeca530 100644 --- a/lib/Target/AArch64/AArch64SchedVulcan.td +++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -1,4 +1,4 @@ -//=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=// +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -6,23 +6,23 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// 1. Introduction // -// This file defines the machine model for Broadcom Vulcan to support -// instruction scheduling and other instruction cost heuristics. +// This file defines the scheduling model for Cavium ThunderX2T99 +// processors. +// Based on Broadcom Vulcan. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // 2. Pipeline Description. -def VulcanModel : SchedMachineModel { +def ThunderX2T99Model : SchedMachineModel { let IssueWidth = 4; // 4 micro-ops dispatched at a time. let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 12; // Extra cycles for mispredicted branch. // Determined via a mix of micro-arch details and experimentation. - let LoopMicroOpBufferSize = 32; + let LoopMicroOpBufferSize = 32; let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; } @@ -30,155 +30,155 @@ def VulcanModel : SchedMachineModel { // Define the issue ports. // Port 0: ALU, FP/SIMD. -def VulcanP0 : ProcResource<1>; +def THX2T99P0 : ProcResource<1>; // Port 1: ALU, FP/SIMD, integer mul/div. -def VulcanP1 : ProcResource<1>; +def THX2T99P1 : ProcResource<1>; // Port 2: ALU, Branch. -def VulcanP2 : ProcResource<1>; +def THX2T99P2 : ProcResource<1>; // Port 3: Store data. -def VulcanP3 : ProcResource<1>; +def THX2T99P3 : ProcResource<1>; // Port 4: Load/store. -def VulcanP4 : ProcResource<1>; +def THX2T99P4 : ProcResource<1>; // Port 5: Load/store. -def VulcanP5 : ProcResource<1>; +def THX2T99P5 : ProcResource<1>; -let SchedModel = VulcanModel in { +let SchedModel = ThunderX2T99Model in { // Define groups for the functional units on each issue port. Each group // created will be used by a WriteRes later on. // // NOTE: Some groups only contain one member. This is a way to create names for // the various functional units that share a single issue port. For example, -// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for FP ops on port 1. +// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1. // Integer divide and multiply micro-ops only on port 1. -def VulcanI1 : ProcResGroup<[VulcanP1]>; +def THX2T99I1 : ProcResGroup<[THX2T99P1]>; // Branch micro-ops only on port 2. -def VulcanI2 : ProcResGroup<[VulcanP2]>; +def THX2T99I2 : ProcResGroup<[THX2T99P2]>; // ALU micro-ops on ports 0, 1, and 2. -def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>; +def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>; // Crypto FP/SIMD micro-ops only on port 1. -def VulcanF1 : ProcResGroup<[VulcanP1]>; +def THX2T99F1 : ProcResGroup<[THX2T99P1]>; // FP/SIMD micro-ops on ports 0 and 1. -def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>; +def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>; // Store data micro-ops only on port 3. -def VulcanSD : ProcResGroup<[VulcanP3]>; +def THX2T99SD : ProcResGroup<[THX2T99P3]>; // Load/store micro-ops on ports 4 and 5. -def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>; +def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; // 60 entry unified scheduler. -def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2, - VulcanP3, VulcanP4, VulcanP5]> { +def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, + THX2T99P3, THX2T99P4, THX2T99P5]> { let BufferSize=60; } // Define commonly used write types for InstRW specializations. -// All definitions follow the format: VulcanWrite_Cyc_. +// All definitions follow the format: THX2T99Write_Cyc_. // 3 cycles on I1. -def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; } +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; } // 4 cycles on I1. -def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; } +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; } // 1 cycle on I0, I1, or I2. -def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; } +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; } // 5 cycles on F1. -def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; } +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; } // 7 cycles on F1. -def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; } +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; } // 4 cycles on F0 or F1. -def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; } +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; } // 5 cycles on F0 or F1. -def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; } +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; } // 6 cycles on F0 or F1. -def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; } +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; } // 7 cycles on F0 or F1. -def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; } +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; } // 8 cycles on F0 or F1. -def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; } +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; } // 16 cycles on F0 or F1. -def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> { +def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 16; let ResourceCycles = [8]; } // 23 cycles on F0 or F1. -def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> { +def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 23; let ResourceCycles = [11]; } // 1 cycles on LS0 or LS1. -def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; } +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; } // 4 cycles on LS0 or LS1. -def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; } +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; } // 5 cycles on LS0 or LS1. -def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; } +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; } // 6 cycles on LS0 or LS1. -def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; } +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; } // 5 cycles on LS0 or LS1 and I0, I1, or I2. -def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> { +def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { let Latency = 5; let NumMicroOps = 2; } // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def VulcanWrite_6Cyc_LS01_I012_I012 : - SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> { +def THX2T99Write_6Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { let Latency = 6; let NumMicroOps = 3; } // 1 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { +def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 1; let NumMicroOps = 2; } // 5 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { +def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 5; let NumMicroOps = 2; } // 6 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { +def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 6; let NumMicroOps = 2; } // 7 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { +def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 7; let NumMicroOps = 2; } // 8 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { +def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 8; let NumMicroOps = 2; } @@ -202,7 +202,7 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // 3. Instruction Tables. -let SchedModel = VulcanModel in { +let SchedModel = ThunderX2T99Model in { //--- // 3.1 Branch Instructions @@ -211,7 +211,7 @@ let SchedModel = VulcanModel in { // Branch, immed // Branch and link, immed // Compare and branch -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } @@ -222,7 +222,7 @@ def : WriteRes { let Unsupported = 1; } // Branch, register // Branch and link, register != LR // Branch and link, register = LR -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } //--- // 3.2 Arithmetic and Logical Instructions @@ -233,25 +233,25 @@ def : WriteRes { let Latency = 1; } // Conditional compare // Conditional select // Address generation -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } def : InstRW<[WriteI], (instrs COPY)>; // ALU, extend and/or shift -def : WriteRes { +def : WriteRes { let Latency = 2; let ResourceCycles = [2]; } -def : WriteRes { +def : WriteRes { let Latency = 2; let ResourceCycles = [2]; } // Move immed -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } // Variable shift -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } //--- // 3.4 Divide and Multiply Instructions @@ -259,33 +259,33 @@ def : WriteRes { let Latency = 1; } // Divide, W-form // Latency range of 13-23. Take the average. -def : WriteRes { +def : WriteRes { let Latency = 18; let ResourceCycles = [18]; } // Divide, X-form // Latency range of 13-39. Take the average. -def : WriteRes { +def : WriteRes { let Latency = 26; let ResourceCycles = [26]; } // Multiply accumulate, W-form -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 5; } // Multiply accumulate, X-form -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 5; } // Bitfield extract, two reg -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } // Bitfield move, basic // Bitfield move, insert // NOTE: Handled by WriteIS. // Count leading -def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$", +def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$")>; // Reverse bits/bytes @@ -300,13 +300,13 @@ def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$", // Load register, unscaled immed // Load register, immed unprivileged // Load register, unsigned immed -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } // Load register, immed post-index // NOTE: Handled by WriteLD, WriteI. // Load register, immed pre-index // NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } // Load register offset, basic // Load register, register offset, scale by 4/8 @@ -314,15 +314,15 @@ def : WriteRes { let Latency = 1; } // Load register offset, extend // Load register, register offset, extend, scale by 4/8 // Load register, register offset, extend, scale by 2 -def VulcanWriteLDIdx : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; +def THX2T99WriteLDIdx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; -def VulcanReadAdrBase : SchedReadVariant<[ +def THX2T99ReadAdrBase : SchedReadVariant<[ SchedVar, SchedVar]>; -def : SchedAlias; +def : SchedAlias; // Load pair, immed offset, normal // Load pair, immed offset, signed words, base != SP @@ -347,7 +347,7 @@ def : WriteRes { // Store register, unscaled immed // Store register, immed unprivileged // Store register, unsigned immed -def : WriteRes { +def : WriteRes { let Latency = 1; let NumMicroOps = 2; } @@ -364,14 +364,14 @@ def : WriteRes { // Store register, register offset, extend // Store register, register offset, extend, scale by 4/8 // Store register, register offset, extend, scale by 1 -def : WriteRes { +def : WriteRes { let Latency = 1; let NumMicroOps = 3; } // Store pair, immed offset, W-form // Store pair, immed offset, X-form -def : WriteRes { +def : WriteRes { let Latency = 1; let NumMicroOps = 2; } @@ -389,35 +389,35 @@ def : WriteRes { // FP absolute value // FP min/max // FP negate -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 5; } // FP arithmetic -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; // FP compare -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 5; } // FP divide, S-form // FP square root, S-form -def : WriteRes { +def : WriteRes { let Latency = 16; let ResourceCycles = [8]; } // FP divide, D-form // FP square root, D-form -def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; // FP multiply // FP multiply accumulate -def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } // FP round to integral -def : InstRW<[VulcanWrite_7Cyc_F01], +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; // FP select -def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>; +def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; //--- // 3.9 FP Miscellaneous Instructions @@ -426,16 +426,16 @@ def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>; // FP convert, from vec to vec reg // FP convert, from gen to vec reg // FP convert, from vec to gen reg -def : WriteRes { let Latency = 7; } +def : WriteRes { let Latency = 7; } // FP move, immed // FP move, register -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } // FP transfer, from gen to vec reg // FP transfer, from vec to gen reg -def : WriteRes { let Latency = 4; } -def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; +def : WriteRes { let Latency = 4; } +def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; //--- // 3.12 ASIMD Integer Instructions @@ -470,39 +470,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; // ASIMD shift by register, basic, Q-form // ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, Q-form -def : WriteRes { let Latency = 7; } +def : WriteRes { let Latency = 7; } // ASIMD arith, reduce, 4H/4S // ASIMD arith, reduce, 8B/8H // ASIMD arith, reduce, 16B -def : InstRW<[VulcanWrite_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD logical (MOV, MVN, ORN, ORR) -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; // ASIMD polynomial (8x8) multiply long -def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; //--- // 3.13 ASIMD Floating-point Instructions //--- // ASIMD FP absolute value -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; // ASIMD FP arith, normal, D-form // ASIMD FP arith, normal, Q-form -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; // ASIMD FP arith,pairwise, D-form // ASIMD FP arith, pairwise, Q-form -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; // ASIMD FP compare, D-form // ASIMD FP compare, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", "^FCMGTv", "^FCMLEv", "^FCMLTv")>; @@ -513,42 +513,42 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", // NOTE: Handled by WriteV. // ASIMD FP divide, D-form, F32 -def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; // ASIMD FP divide, Q-form, F32 -def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; // ASIMD FP divide, Q-form, F64 -def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; // ASIMD FP max/min, normal, D-form // ASIMD FP max/min, normal, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", "^FMINv", "^FMINNMv")>; // ASIMD FP max/min, pairwise, D-form // ASIMD FP max/min, pairwise, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", "^FMINPv", "^FMINNMPv")>; // ASIMD FP max/min, reduce -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", "^FMINVv", "^FMINNMVv")>; // ASIMD FP multiply, D-form, FZ // ASIMD FP multiply, D-form, no FZ // ASIMD FP multiply, Q-form, FZ // ASIMD FP multiply, Q-form, no FZ -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; // ASIMD FP multiply accumulate, Dform, FZ // ASIMD FP multiply accumulate, Dform, no FZ // ASIMD FP multiply accumulate, Qform, FZ // ASIMD FP multiply accumulate, Qform, no FZ -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; // ASIMD FP negate -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; // ASIMD FP round, D-form // ASIMD FP round, Q-form @@ -559,39 +559,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>; //-- // ASIMD bit reverse -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; // ASIMD bitwise insert, D-form // ASIMD bitwise insert, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; // ASIMD count, D-form // ASIMD count, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; // ASIMD duplicate, gen reg // ASIMD duplicate, element -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; // ASIMD extract -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; // ASIMD extract narrow // ASIMD extract narrow, saturating // NOTE: Handled by WriteV. // ASIMD insert, element to element -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD move, integer immed -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; // ASIMD move, FP immed -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; // ASIMD reciprocal estimate, D-form // ASIMD reciprocal estimate, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", "^FRSQRTEv", "^URSQRTEv")>; @@ -599,31 +599,31 @@ def : InstRW<[VulcanWrite_5Cyc_F01], // ASIMD reciprocal step, D-form, no FZ // ASIMD reciprocal step, Q-form, FZ // ASIMD reciprocal step, Q-form, no FZ -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; // ASIMD reverse -def : InstRW<[VulcanWrite_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^REV16v", "^REV32v", "^REV64v")>; // ASIMD table lookup, D-form // ASIMD table lookup, Q-form -def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; +def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; // ASIMD transfer, element to word or word -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>; // ASIMD transfer, element to gen reg -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; // ASIMD transfer gen reg to element -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD transpose -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; // ASIMD unzip/zip -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; //-- // 3.15 ASIMD Load Instructions @@ -631,114 +631,114 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; // ASIMD load, 1 element, multiple, 1 reg, D-form // ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[VulcanWrite_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form // ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[VulcanWrite_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form // ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[VulcanWrite_5Cyc_LS01], +def : InstRW<[THX2T99Write_5Cyc_LS01], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form // ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[VulcanWrite_6Cyc_LS01], +def : InstRW<[THX2T99Write_6Cyc_LS01], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D // ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D // ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lone, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D -def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[VulcanWrite_7Cyc_LS01_F01], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D -def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[VulcanWrite_6Cyc_LS01_F01], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; //-- @@ -747,82 +747,82 @@ def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], // ASIMD store, 1 element, multiple, 1 reg, D-form // ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form // ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form // ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form // ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H // ASIMD store, 4 element, one lane, S // ASIMD store, 4 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4i(8|16|32|64)_POST$")>; //-- @@ -830,23 +830,23 @@ def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], //-- // Crypto AES ops -def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>; // Crypto polynomial (64x64) multiply long -def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; // Crypto SHA1 xor ops // Crypto SHA1 schedule acceleration ops // Crypto SHA256 schedule acceleration op (1 u-op) // Crypto SHA256 schedule acceleration op (2 u-ops) // Crypto SHA256 hash acceleration ops -def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>; //-- // 3.18 CRC //-- // CRC checksum ops -def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>; +def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>; -} // SchedModel = VulcanModel +} // SchedModel = ThunderX2T99Model diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 2ee0193cdeb..a9222c43476 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -81,16 +81,22 @@ void AArch64Subtarget::initializeProperties() { MinPrefetchStride = 1024; MaxPrefetchIterationsAhead = 11; break; - case Vulcan: + case ThunderX2T99: + CacheLineSize = 64; + PrefFunctionAlignment = 3; + PrefLoopAlignment = 2; MaxInterleaveFactor = 4; + PrefetchDistance = 128; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 4; break; case ThunderX: case ThunderXT88: case ThunderXT81: case ThunderXT83: CacheLineSize = 128; - PrefFunctionAlignment = 4; - PrefLoopAlignment = 4; + PrefFunctionAlignment = 3; + PrefLoopAlignment = 2; break; case CortexA35: break; case CortexA53: break; diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 8bc65aa7b48..d491b33b56a 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -45,7 +45,7 @@ public: ExynosM1, Falkor, Kryo, - Vulcan, + ThunderX2T99, ThunderX, ThunderXT81, ThunderXT83, diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll index 50685cf5d34..f65144def24 100644 --- a/test/CodeGen/AArch64/cpus.ll +++ b/test/CodeGen/AArch64/cpus.ll @@ -12,7 +12,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m3 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=falkor 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=vulcan 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx2t99 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; CHECK-NOT: {{.*}} is not a recognized processor for this target diff --git a/test/CodeGen/AArch64/machine-combiner-madd.ll b/test/CodeGen/AArch64/machine-combiner-madd.ll index ea311378946..4efe4e9cfb0 100644 --- a/test/CodeGen/AArch64/machine-combiner-madd.ll +++ b/test/CodeGen/AArch64/machine-combiner-madd.ll @@ -6,7 +6,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m1 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m2 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=kryo < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=vulcan < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s | FileCheck %s ; Make sure that inst-combine fuses the multiply add in the addressing mode of ; the load. diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll index 5081a9da340..80a054beb2a 100644 --- a/test/CodeGen/AArch64/remat.ll +++ b/test/CodeGen/AArch64/remat.ll @@ -8,7 +8,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m3 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=falkor -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx2t99 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s %X = type { i64, i64, i64 } diff --git a/unittests/Support/TargetParserTest.cpp b/unittests/Support/TargetParserTest.cpp index 347128cbc07..f0bfe7dbde9 100644 --- a/unittests/Support/TargetParserTest.cpp +++ b/unittests/Support/TargetParserTest.cpp @@ -643,8 +643,9 @@ TEST(TargetParserTest, testAArch64CPU) { "kryo", "armv8-a", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD, "8-A")); EXPECT_TRUE(testAArch64CPU( - "vulcan", "armv8.1-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD, "8.1-A")); + "thunderx2t99", "armv8.1-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | + AArch64::AEK_SIMD, "8.1-A")); EXPECT_TRUE(testAArch64CPU( "thunderx", "armv8-a", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD | @@ -700,7 +701,7 @@ TEST(TargetParserTest, testAArch64Extension) { EXPECT_FALSE(testAArch64Extension("cyclone", 0, "ras")); EXPECT_FALSE(testAArch64Extension("exynos-m1", 0, "ras")); EXPECT_FALSE(testAArch64Extension("kryo", 0, "ras")); - EXPECT_FALSE(testAArch64Extension("vulcan", 0, "ras")); + EXPECT_FALSE(testAArch64Extension("thunderx2t99", 0, "ras")); EXPECT_FALSE(testAArch64Extension("thunderx", 0, "lse")); EXPECT_FALSE(testAArch64Extension("thunderxt81", 0, "lse")); EXPECT_FALSE(testAArch64Extension("thunderxt83", 0, "lse"));