From 8ff4fe417f7993462cf4e16a0eb43b09bc26ad36 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 18 Mar 2017 05:08:58 +0000 Subject: [PATCH] ExecutionDepsFix: Let targets specialize the pass; NFC Let targets specialize the pass with the register class so we can get a parameterless default constructor and can put the pass into the pass registry to enable testing with -run-pass=. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298184 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ExecutionDepsFix.h | 221 ++++++++++++++++++++++++ include/llvm/CodeGen/Passes.h | 6 - lib/CodeGen/ExecutionDepsFix.cpp | 214 +---------------------- lib/Target/ARM/ARMTargetMachine.cpp | 21 ++- lib/Target/X86/X86TargetMachine.cpp | 20 ++- 5 files changed, 261 insertions(+), 221 deletions(-) create mode 100644 include/llvm/CodeGen/ExecutionDepsFix.h diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h new file mode 100644 index 00000000000..629683d333d --- /dev/null +++ b/include/llvm/CodeGen/ExecutionDepsFix.h @@ -0,0 +1,221 @@ +//===- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Execution Dependency Fix pass. +/// +/// Some X86 SSE instructions like mov, and, or, xor are available in different +/// variants for different operand types. These variant instructions are +/// equivalent, but on Nehalem and newer cpus there is extra latency +/// transferring data between integer and floating point domains. ARM cores +/// have similar issues when they are configured with both VFP and NEON +/// pipelines. +/// +/// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H +#define LLVM_CODEGEN_EXECUTIONDEPSFIX_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/Allocator.h" +#include + +namespace llvm { + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact that the register is now available in multiple +/// domains. +struct DomainValue { + // Basic reference counting. + unsigned Refs; + + // Bitmask of available domains. For an open DomainValue, it is the still + // possible domains for collapsing. For a collapsed DomainValue it is the + // domains where the register is available for free. + unsigned AvailableDomains; + + // Pointer to the next DomainValue in a chain. When two DomainValues are + // merged, Victim.Next is set to point to Victor, so old DomainValue + // references can be updated by following the chain. + DomainValue *Next; + + // Twiddleable instructions using or defining these registers. + SmallVector Instrs; + + // A collapsed DomainValue has no instructions to twiddle - it simply keeps + // track of the domains where the registers are already available. + bool isCollapsed() const { return Instrs.empty(); } + + // Is domain available? + bool hasDomain(unsigned domain) const { + assert(domain < + static_cast(std::numeric_limits::digits) && + "undefined behavior"); + return AvailableDomains & (1u << domain); + } + + // Mark domain as available. + void addDomain(unsigned domain) { + AvailableDomains |= 1u << domain; + } + + // Restrict to a single domain available. + void setSingleDomain(unsigned domain) { + AvailableDomains = 1u << domain; + } + + // Return bitmask of domains that are available and in mask. + unsigned getCommonDomains(unsigned mask) const { + return AvailableDomains & mask; + } + + // First domain available. + unsigned getFirstDomain() const { + return countTrailingZeros(AvailableDomains); + } + + DomainValue() : Refs(0) { clear(); } + + // Clear this DomainValue and point to next which has all its data. + void clear() { + AvailableDomains = 0; + Next = nullptr; + Instrs.clear(); + } +}; + +/// Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; + +class ExecutionDepsFix : public MachineFunctionPass { + SpecificBumpPtrAllocator Allocator; + SmallVector Avail; + + const TargetRegisterClass *const RC; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; + std::vector> AliasMap; + const unsigned NumRegs; + LiveReg *LiveRegs; + struct MBBInfo { + // Keeps clearance and domain information for all registers. Note that this + // is different from the usual definition notion of liveness. The CPU + // doesn't care whether or not we consider a register killed. + LiveReg *OutRegs; + + // Whether we have gotten to this block in primary processing yet. + bool PrimaryCompleted; + + // The number of predecessors for which primary processing has completed + unsigned IncomingProcessed; + + // The value of `IncomingProcessed` at the start of primary processing + unsigned PrimaryIncoming; + + // The number of predecessors for which all processing steps are done. + unsigned IncomingCompleted; + + MBBInfo() + : OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0), + PrimaryIncoming(0), IncomingCompleted(0) {} + }; + typedef DenseMap MBBInfoMap; + MBBInfoMap MBBInfos; + + /// List of undefined register reads in this block in forward order. + std::vector > UndefReads; + + /// Storage for register unit liveness. + LivePhysRegs LiveRegSet; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; +public: + ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) + : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + iterator_range::const_iterator> + regIndices(unsigned Reg) const; + // DomainValue allocation. + DomainValue *alloc(int domain = -1); + DomainValue *retain(DomainValue *DV) { + if (DV) ++DV->Refs; + return DV; + } + void release(DomainValue*); + DomainValue *resolve(DomainValue*&); + + // LiveRegs manipulations. + void setLiveReg(int rx, DomainValue *DV); + void kill(int rx); + void force(int rx, unsigned domain); + void collapse(DomainValue *dv, unsigned domain); + bool merge(DomainValue *A, DomainValue *B); + + void enterBasicBlock(MachineBasicBlock*); + void leaveBasicBlock(MachineBasicBlock*); + bool isBlockDone(MachineBasicBlock *); + void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass); + void updateSuccessors(MachineBasicBlock *MBB, bool PrimaryPass); + bool visitInstr(MachineInstr *); + void processDefs(MachineInstr *, bool breakDependency, bool Kill); + void visitSoftInstr(MachineInstr*, unsigned mask); + void visitHardInstr(MachineInstr*, unsigned domain); + void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref); + bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); + void processUndefReads(MachineBasicBlock*); +}; + +} // end namepsace llvm + +#endif diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index dc50dbf1a90..7185653081c 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -323,12 +323,6 @@ namespace llvm { /// ExpandISelPseudos - This pass expands pseudo-instructions. extern char &ExpandISelPseudosID; - /// This pass fixes execution time problems with dependent instructions, such - /// as switching execution domains to match. - /// - /// The pass will examine instructions using and defining registers in RC. - FunctionPass *createExecutionDepsFixPass(const TargetRegisterClass *RC); - /// UnpackMachineBundles - This pass unpack machine instruction bundles. extern char &UnpackMachineBundlesID; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index adf21814af6..6b305409256 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -6,21 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file contains the execution dependency fix pass. -// -// Some X86 SSE instructions like mov, and, or, xor are available in different -// variants for different operand types. These variant instructions are -// equivalent, but on Nehalem and newer cpus there is extra latency -// transferring data between integer and floating point domains. ARM cores -// have similar issues when they are configured with both VFP and NEON -// pipelines. -// -// This pass changes the variant instructions to minimize domain crossings. -// -//===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ExecutionDepsFix.h" + #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -37,200 +25,6 @@ using namespace llvm; #define DEBUG_TYPE "execution-deps-fix" -/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track -/// of execution domains. -/// -/// An open DomainValue represents a set of instructions that can still switch -/// execution domain. Multiple registers may refer to the same open -/// DomainValue - they will eventually be collapsed to the same execution -/// domain. -/// -/// A collapsed DomainValue represents a single register that has been forced -/// into one of more execution domains. There is a separate collapsed -/// DomainValue for each register, but it may contain multiple execution -/// domains. A register value is initially created in a single execution -/// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact that the register is now available in multiple -/// domains. -namespace { -struct DomainValue { - // Basic reference counting. - unsigned Refs; - - // Bitmask of available domains. For an open DomainValue, it is the still - // possible domains for collapsing. For a collapsed DomainValue it is the - // domains where the register is available for free. - unsigned AvailableDomains; - - // Pointer to the next DomainValue in a chain. When two DomainValues are - // merged, Victim.Next is set to point to Victor, so old DomainValue - // references can be updated by following the chain. - DomainValue *Next; - - // Twiddleable instructions using or defining these registers. - SmallVector Instrs; - - // A collapsed DomainValue has no instructions to twiddle - it simply keeps - // track of the domains where the registers are already available. - bool isCollapsed() const { return Instrs.empty(); } - - // Is domain available? - bool hasDomain(unsigned domain) const { - assert(domain < - static_cast(std::numeric_limits::digits) && - "undefined behavior"); - return AvailableDomains & (1u << domain); - } - - // Mark domain as available. - void addDomain(unsigned domain) { - AvailableDomains |= 1u << domain; - } - - // Restrict to a single domain available. - void setSingleDomain(unsigned domain) { - AvailableDomains = 1u << domain; - } - - // Return bitmask of domains that are available and in mask. - unsigned getCommonDomains(unsigned mask) const { - return AvailableDomains & mask; - } - - // First domain available. - unsigned getFirstDomain() const { - return countTrailingZeros(AvailableDomains); - } - - DomainValue() : Refs(0) { clear(); } - - // Clear this DomainValue and point to next which has all its data. - void clear() { - AvailableDomains = 0; - Next = nullptr; - Instrs.clear(); - } -}; -} - -namespace { -/// Information about a live register. -struct LiveReg { - /// Value currently in this register, or NULL when no value is being tracked. - /// This counts as a DomainValue reference. - DomainValue *Value; - - /// Instruction that defined this register, relative to the beginning of the - /// current basic block. When a LiveReg is used to represent a live-out - /// register, this value is relative to the end of the basic block, so it - /// will be a negative number. - int Def; -}; -} // anonymous namespace - -namespace { -class ExecutionDepsFix : public MachineFunctionPass { - static char ID; - SpecificBumpPtrAllocator Allocator; - SmallVector Avail; - - const TargetRegisterClass *const RC; - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RegClassInfo; - std::vector> AliasMap; - const unsigned NumRegs; - LiveReg *LiveRegs; - struct MBBInfo { - // Keeps clearance and domain information for all registers. Note that this - // is different from the usual definition notion of liveness. The CPU - // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs; - - // Whether we have gotten to this block in primary processing yet. - bool PrimaryCompleted; - - // The number of predecessors for which primary processing has completed - unsigned IncomingProcessed; - - // The value of `IncomingProcessed` at the start of primary processing - unsigned PrimaryIncoming; - - // The number of predecessors for which all processing steps are done. - unsigned IncomingCompleted; - - MBBInfo() - : OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0), - PrimaryIncoming(0), IncomingCompleted(0) {} - }; - typedef DenseMap MBBInfoMap; - MBBInfoMap MBBInfos; - - /// List of undefined register reads in this block in forward order. - std::vector > UndefReads; - - /// Storage for register unit liveness. - LivePhysRegs LiveRegSet; - - /// Current instruction number. - /// The first instruction in each basic block is 0. - int CurInstr; -public: - ExecutionDepsFix(const TargetRegisterClass *rc) - : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - - StringRef getPassName() const override { return "Execution dependency fix"; } - -private: - iterator_range::const_iterator> - regIndices(unsigned Reg) const; - // DomainValue allocation. - DomainValue *alloc(int domain = -1); - DomainValue *retain(DomainValue *DV) { - if (DV) ++DV->Refs; - return DV; - } - void release(DomainValue*); - DomainValue *resolve(DomainValue*&); - - // LiveRegs manipulations. - void setLiveReg(int rx, DomainValue *DV); - void kill(int rx); - void force(int rx, unsigned domain); - void collapse(DomainValue *dv, unsigned domain); - bool merge(DomainValue *A, DomainValue *B); - - void enterBasicBlock(MachineBasicBlock*); - void leaveBasicBlock(MachineBasicBlock*); - bool isBlockDone(MachineBasicBlock *); - void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass); - void updateSuccessors(MachineBasicBlock *MBB, bool PrimaryPass); - bool visitInstr(MachineInstr *); - void processDefs(MachineInstr *, bool breakDependency, bool Kill); - void visitSoftInstr(MachineInstr*, unsigned mask); - void visitHardInstr(MachineInstr*, unsigned domain); - void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, - unsigned Pref); - bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); - void processUndefReads(MachineBasicBlock*); -}; -} - -char ExecutionDepsFix::ID = 0; - /// Translate TRI register number to a list of indices into our smaller tables /// of interesting registers. iterator_range::const_iterator> @@ -951,7 +745,3 @@ bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) { return false; } - -FunctionPass *llvm::createExecutionDepsFixPass(const TargetRegisterClass *RC) { - return new ExecutionDepsFix(RC); -} diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 131528090e4..b8dadb331ec 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" @@ -76,6 +77,10 @@ static cl::opt EnableGlobalMerge("arm-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); +namespace llvm { + void initializeARMExecutionDepsFixPass(PassRegistry&); +} + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine X(getTheARMLETarget()); @@ -88,6 +93,7 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMConstantIslandsPass(Registry); + initializeARMExecutionDepsFixPass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -441,8 +447,21 @@ public: void addPreEmitPass() override; }; +class ARMExecutionDepsFix : public ExecutionDepsFix { +public: + static char ID; + ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {} + StringRef getPassName() const override { + return "ARM Execution Dependency Fix"; + } +}; +char ARMExecutionDepsFix::ID; + } // end anonymous namespace +INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", + "ARM Execution Dependency Fix", false, false) + TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(this, PM); } @@ -536,7 +555,7 @@ void ARMPassConfig::addPreSched2() { if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass()); - addPass(createExecutionDepsFixPass(&ARM::DPRRegClass)); + addPass(new ARMExecutionDepsFix()); } // Expand some pseudo instructions into multiple instructions to allow diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 9e72da29f26..c4d6b451276 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,12 +30,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -61,6 +62,7 @@ static cl::opt EnableMachineCombinerPass("x86-machine-combiner", namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); +void initializeX86ExecutionDepsFixPass(PassRegistry &); } // end namespace llvm @@ -74,6 +76,7 @@ extern "C" void LLVMInitializeX86Target() { initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); + initializeX86ExecutionDepsFixPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -349,8 +352,21 @@ public: void addPreSched2() override; }; +class X86ExecutionDepsFix : public ExecutionDepsFix { +public: + static char ID; + X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {} + StringRef getPassName() const override { + return "X86 Execution Dependency Fix"; + } +}; +char X86ExecutionDepsFix::ID; + } // end anonymous namespace +INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix", + "X86 Execution Dependency Fix", false, false); + TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { return new X86PassConfig(this, PM); } @@ -432,7 +448,7 @@ void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } void X86PassConfig::addPreEmitPass() { if (getOptLevel() != CodeGenOpt::None) - addPass(createExecutionDepsFixPass(&X86::VR128XRegClass)); + addPass(new X86ExecutionDepsFix()); if (UseVZeroUpper) addPass(createX86IssueVZeroUpperPass()); -- 2.50.1