[SPARC] Fixes for hardware errata on LEON processor.

author Chris Dewhurst <chris.dewhurst@lero.ie>

Sun, 19 Jun 2016 11:03:28 +0000 (11:03 +0000)

committer Chris Dewhurst <chris.dewhurst@lero.ie>

Sun, 19 Jun 2016 11:03:28 +0000 (11:03 +0000)
author Chris Dewhurst <chris.dewhurst@lero.ie>
Sun, 19 Jun 2016 11:03:28 +0000 (11:03 +0000)
committer Chris Dewhurst <chris.dewhurst@lero.ie>
Sun, 19 Jun 2016 11:03:28 +0000 (11:03 +0000)
diff --git a/lib/Target/Sparc/LeonFeatures.td b/lib/Target/Sparc/LeonFeatures.td

index 7de98407dac8ba85aed95bcf046cbab4834918bd..e2282abbbdba34bf808961604a580a6a996fcbb8 100755 (executable)
--- a/lib/Target/Sparc/LeonFeatures.td
+++ b/lib/Target/Sparc/LeonFeatures.td
@@ -43,3 +43,24 @@ def InsertNOPLoad: SubtargetFeature<
    "true",
    "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction" 
  >;
+
+def FixFSMULD : SubtargetFeature<
+  "fixfsmuld",
+  "FixFSMULD",
+  "true",
+  "LEON erratum fix: Do not use FSMULD" 
+>;
+
+def ReplaceFMULS : SubtargetFeature<
+  "replacefmuls",
+  "ReplaceFMULS",
+  "true",
+  "LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions" 
+>;
+
+def FixAllFDIVSQRT : SubtargetFeature<
+  "fixallfdivsqrt",
+  "FixAllFDIVSQRT",
+  "true",
+  "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store" 
+>;
diff --git a/lib/Target/Sparc/LeonPasses.cpp b/lib/Target/Sparc/LeonPasses.cpp

index 31261c4a3e2eae7cf165f6c3cd4066d9999fb630..9d39fa2a8c72d2c3d37f01b878837db57c1423ad 100755 (executable)
--- a/lib/Target/Sparc/LeonPasses.cpp
+++ b/lib/Target/Sparc/LeonPasses.cpp
@@ -30,6 +30,51 @@ LEONMachineFunctionPass::LEONMachineFunctionPass(char& ID) :
  {
  }
  
+int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr& MI, int OperandIndex)
+{
+  if (MI.getNumOperands() > 0) {
+    if (OperandIndex == LAST_OPERAND) {
+      OperandIndex = MI.getNumOperands() - 1;
+    }
+
+    if (MI.getNumOperands() > (unsigned) OperandIndex
+        &&
+        MI.getOperand(OperandIndex).isReg()) {
+      return (int) MI.getOperand(OperandIndex).getReg();
+    }
+  }
+
+  static int NotFoundIndex = -10;
+  // Return a different number each time to avoid any comparisons between the values returned.
+  NotFoundIndex -= 10;
+  return NotFoundIndex;
+}
+
+void LEONMachineFunctionPass::clearUsedRegisterList()
+{
+  UsedRegisters.clear();
+}
+
+void LEONMachineFunctionPass::markRegisterUsed(int registerIndex)
+{
+  UsedRegisters.push_back(registerIndex);
+}
+
+//finds a new free FP register
+//checks also the AllocatedRegisters vector
+int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo& MRI)
+{
+  for (int RegisterIndex = SP::F0 ; RegisterIndex <= SP::F31 ; ++RegisterIndex) {
+    if (!MRI.isPhysRegUsed(RegisterIndex) &&
+        !(std::find(UsedRegisters.begin(), UsedRegisters.end(), RegisterIndex) != UsedRegisters.end())) {
+      return RegisterIndex;
+    }
+  }
+
+  return -1;
+}
+
+
  //*****************************************************************************
  //**** InsertNOPLoad pass
  //*****************************************************************************
@@ -76,3 +121,293 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction& MF)
  
    return Modified;
  }
+
+//*****************************************************************************
+//**** FixFSMULD pass
+//*****************************************************************************
+//this pass should convert the FSMULD operands to double precision in scratch registers,
+//then calculate the result with the FMULD instruction. Therefore, the pass should replace operations of the form:
+//fsmuld %f20,%f21,%f8
+//with the sequence:
+//fstod %f20,%f0
+//fstod %f21,%f2
+//fmuld %f0,%f2,%f8
+//
+char FixFSMULD::ID = 0;
+
+FixFSMULD::FixFSMULD(TargetMachine &tm) :
+                    LEONMachineFunctionPass(tm, ID)
+{
+}
+
+bool FixFSMULD::runOnMachineFunction(MachineFunction& MF)
+{
+  Subtarget = &MF.getSubtarget<SparcSubtarget>();
+  const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = DebugLoc();
+
+  //errs() << "FixFSMULD on function " << MF.getName() << "\n";
+
+  bool Modified = false;
+  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
+
+      MachineInstr &MI = *MBBI;
+      unsigned Opcode = MI.getOpcode();
+
+      const int UNASSIGNED_INDEX = -1;
+      int Reg1Index = UNASSIGNED_INDEX;
+      int Reg2Index = UNASSIGNED_INDEX;
+      int Reg3Index = UNASSIGNED_INDEX;
+
+      if (Opcode == SP::FSMULD && MI.getNumOperands() == 3) {
+        //errs() << "Detected FSMULD\n";
+        //take the registers from fsmuld %f20,%f21,%f8
+        Reg1Index = MI.getOperand(0).getReg();
+        Reg2Index = MI.getOperand(1).getReg();
+        Reg3Index = MI.getOperand(2).getReg();
+      }
+      else if (MI.isInlineAsm()) {
+        std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
+        std::string FMULSOpCoode ("fsmuld");
+        std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
+        if (AsmString.find(FMULSOpCoode) == 0) { // this is an inline FSMULD instruction
+          //errs() << "Detected InlineAsm FSMULD\n";
+
+          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
+
+          //extracts the registers from the inline assembly instruction
+          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
+            const MachineOperand &MO = MI.getOperand(i);
+            if (MO.isReg()) {
+              if (Reg1Index == UNASSIGNED_INDEX) Reg1Index = MO.getReg();
+              else if (Reg2Index == UNASSIGNED_INDEX) Reg2Index = MO.getReg();
+              else if (Reg3Index == UNASSIGNED_INDEX) Reg3Index = MO.getReg();
+            }
+            if (Reg3Index != UNASSIGNED_INDEX)
+              break;
+          }
+        }
+      }
+
+      if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX && Reg3Index != UNASSIGNED_INDEX) {
+        clearUsedRegisterList();
+        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+        //Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
+        markRegisterUsed(Reg3Index);
+        const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
+        markRegisterUsed(ScratchReg1Index);
+        const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
+        markRegisterUsed(ScratchReg2Index);
+
+        if (ScratchReg1Index == UNASSIGNED_INDEX || ScratchReg2Index == UNASSIGNED_INDEX) {
+          //errs() << "Cannot allocate free scratch registers for the FixFSMULD pass." << "\n";
+        }
+        else {
+          //create fstod %f20,%f0
+          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
+          .addReg(ScratchReg1Index)
+          .addReg(Reg1Index);
+
+          //create fstod %f21,%f2
+          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
+          .addReg(ScratchReg2Index)
+          .addReg(Reg2Index);
+
+          //create fmuld %f0,%f2,%f8
+          BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
+          .addReg(Reg3Index)
+          .addReg(ScratchReg1Index)
+          .addReg(ScratchReg2Index);
+
+          MI.eraseFromParent();
+          MBBI = NMBBI;
+
+          Modified = true;
+        }
+      }
+    }
+  }
+
+  return Modified;
+}
+
+
+//*****************************************************************************
+//**** ReplaceFMULS pass
+//*****************************************************************************
+//This pass converts the FMULS operands to double precision in scratch registers,
+//then calculates the result with the FMULD instruction.
+//The pass should replace operations of the form:
+//fmuls %f20,%f21,%f8
+//with the sequence:
+//fstod %f20,%f0
+//fstod %f21,%f2
+//fmuld %f0,%f2,%f8
+//
+char ReplaceFMULS::ID = 0;
+
+ReplaceFMULS::ReplaceFMULS(TargetMachine &tm) :
+                    LEONMachineFunctionPass(tm, ID)
+{
+}
+
+bool ReplaceFMULS::runOnMachineFunction(MachineFunction& MF)
+{
+  Subtarget = &MF.getSubtarget<SparcSubtarget>();
+  const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = DebugLoc();
+
+  //errs() << "ReplaceFMULS on function " << MF.getName() << "\n";
+
+  bool Modified = false;
+  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
+      MachineInstr &MI = *MBBI;
+      unsigned Opcode = MI.getOpcode();
+
+      const int UNASSIGNED_INDEX = -1;
+      int Reg1Index = UNASSIGNED_INDEX;
+      int Reg2Index = UNASSIGNED_INDEX;
+      int Reg3Index = UNASSIGNED_INDEX;
+
+      if (Opcode == SP::FMULS && MI.getNumOperands() == 3) {
+        //errs() << "Detected FMULS\n";
+        //take the registers from fmuls %f20,%f21,%f8
+        Reg1Index = MI.getOperand(0).getReg();
+        Reg2Index = MI.getOperand(1).getReg();
+        Reg3Index = MI.getOperand(2).getReg();
+      }
+      else if (MI.isInlineAsm()) {
+        std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
+        std::string FMULSOpCoode ("fmuls");
+        std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
+        if (AsmString.find(FMULSOpCoode) == 0) { // this is an inline FMULS instruction
+          //errs() << "Detected InlineAsm FMULS\n";
+
+          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
+
+          //extracts the registers from the inline assembly instruction
+          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
+            const MachineOperand &MO = MI.getOperand(i);
+            if (MO.isReg()) {
+              if (Reg1Index == UNASSIGNED_INDEX) Reg1Index = MO.getReg();
+              else if (Reg2Index == UNASSIGNED_INDEX) Reg2Index = MO.getReg();
+              else if (Reg3Index == UNASSIGNED_INDEX) Reg3Index = MO.getReg();
+            }
+            if (Reg3Index != UNASSIGNED_INDEX)
+              break;
+          }
+        }
+      }
+
+      if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX && Reg3Index != UNASSIGNED_INDEX) {
+        clearUsedRegisterList();
+        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+        //Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
+        markRegisterUsed(Reg3Index);
+        const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
+        markRegisterUsed(ScratchReg1Index);
+        const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
+        markRegisterUsed(ScratchReg2Index);
+
+        if (ScratchReg1Index == UNASSIGNED_INDEX || ScratchReg2Index == UNASSIGNED_INDEX) {
+          //errs() << "Cannot allocate free scratch registers for the ReplaceFMULS pass." << "\n";
+        }
+        else {
+          //create fstod %f20,%f0
+          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
+          .addReg(ScratchReg1Index)
+          .addReg(Reg1Index);
+
+          //create fstod %f21,%f2
+          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
+          .addReg(ScratchReg2Index)
+          .addReg(Reg2Index);
+
+          //create fmuld %f0,%f2,%f8
+          BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
+          .addReg(Reg3Index)
+          .addReg(ScratchReg1Index)
+          .addReg(ScratchReg2Index);
+
+          MI.eraseFromParent();
+          MBBI = NMBBI;
+
+          Modified = true;
+        }
+      }
+    }
+  }
+
+  return Modified;
+}
+
+//*****************************************************************************
+//**** FixAllFDIVSQRT pass
+//*****************************************************************************
+//This pass implements two fixes:
+// 1) fixing the FSQRTS and FSQRTD instructions;
+// 2) fixing the FDIVS and FDIVD instructions.
+//
+char FixAllFDIVSQRT::ID = 0;
+
+FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm) :
+                    LEONMachineFunctionPass(tm, ID)
+{
+}
+
+bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction& MF)
+{
+  Subtarget = &MF.getSubtarget<SparcSubtarget>();
+  const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = DebugLoc();
+
+  //errs() << "FixAllFDIVSQRT on function " << MF.getName() << "\n";
+
+  bool Modified = false;
+  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    //MBB.print(errs());
+    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
+      MachineInstr &MI = *MBBI;
+      //MI.print(errs());
+      unsigned Opcode = MI.getOpcode();
+
+      if (MI.isInlineAsm()) {
+        std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
+        std::string FSQRTDOpCode ("fsqrtd");
+        std::string FDIVDOpCode ("fdivd");
+        std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
+        if (AsmString.find(FSQRTDOpCode) == 0) { // this is an inline fsqrts instruction
+          //errs() << "Detected InlineAsm FSQRTD\n";
+          Opcode = SP::FSQRTD;
+        }
+        else if (AsmString.find(FDIVDOpCode) == 0) { // this is an inline fsqrts instruction
+          //errs() << "Detected InlineAsm FDIVD\n";
+          Opcode = SP::FDIVD;
+        }
+      }
+
+      // Note: FDIVS and FSQRTS cannot be generated when this erratum fix is switched on
+      // so we don't need to check for them here. They will already have been converted
+      // to FSQRTD or FDIVD earlier in the pipeline.
+      if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) {
+        //errs() << "Inserting 5 NOPs before FSQRTD,FDIVD.\n";
+        for (int InsertedCount=0; InsertedCount<5; InsertedCount++)
+          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
+
+        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+        //errs() << "Inserting 28 NOPs after FSQRTD,FDIVD.\n";
+        for (int InsertedCount=0; InsertedCount<28; InsertedCount++)
+          BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
+
+        Modified = true;
+      }
+    }
+  }
+
+  return Modified;
+}
diff --git a/lib/Target/Sparc/LeonPasses.h b/lib/Target/Sparc/LeonPasses.h

index ca4b0f90c82267f61e20f4eb8a8786e1078f33f0..798f63407e378c5fb59893db4364c640f351dde9 100755 (executable)
--- a/lib/Target/Sparc/LeonPasses.h
+++ b/lib/Target/Sparc/LeonPasses.h
@@ -25,10 +25,19 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass
      : public MachineFunctionPass {
  protected:
    const SparcSubtarget *Subtarget;
+  const int LAST_OPERAND = -1;
+
+  //this vector holds free registers that we allocate in groups for some of the LEON passes
+  std::vector <int> UsedRegisters;
  
  protected:
    LEONMachineFunctionPass(TargetMachine &tm, char& ID);
    LEONMachineFunctionPass(char& ID);
+
+  int GetRegIndexForOperand(MachineInstr& MI, int OperandIndex);
+  void clearUsedRegisterList();
+  void markRegisterUsed(int registerIndex);
+  int getUnusedFPRegister(MachineRegisterInfo& MRI);
  };
  
  class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
@@ -42,6 +51,42 @@ public:
      return "InsertNOPLoad: Erratum Fix LBR35: insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction";
    }
  };
+
+class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass {
+public:
+  static char ID;
+
+  FixFSMULD(TargetMachine &tm);
+  bool runOnMachineFunction(MachineFunction& MF) override;
+
+  const char *getPassName() const override {
+    return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
+  }
+};
+
+class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass {
+public:
+  static char ID;
+
+  ReplaceFMULS(TargetMachine &tm);
+  bool runOnMachineFunction(MachineFunction& MF) override;
+
+  const char *getPassName() const override {
+    return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a routine using conversions/double precision operations to replace FMULS";
+  }
+};
+
+class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass {
+public:
+  static char ID;
+
+  FixAllFDIVSQRT(TargetMachine &tm);
+  bool runOnMachineFunction(MachineFunction& MF) override;
+
+  const char *getPassName() const override {
+    return "FixAllFDIVSQRT: Erratum Fix LBR34: fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store";
+  }
+};
  } // namespace llvm
  
  #endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td

index 4fb82ff5c74bb428ecdb5f015d4144d51991f3bb..6ae73125c9e04c3f1c37b14de59f9032b4b4e2c0 100644 (file)
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -122,7 +122,7 @@ def : Processor<"leon3", LEON3Itineraries,
  // LEON 3 FT (UT699). Provides features for the UT699 processor
  // - covers all the erratum fixes for LEON3, but does not support the CASA instruction.
  def : Processor<"ut699", LEON3Itineraries, 
-                [FeatureLeon, InsertNOPLoad]>;
+                [FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>;
  
  // LEON3 FT (GR712RC). Provides features for the GR712RC processor.
  // - covers all the erratum fixed for LEON3 and support for the CASA instruction. 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp

index 605acd6df054b170db65210c19180510b20332eb..7837fc8a020e428dab8037bf4d0a61d8a0d8fc6a 100644 (file)
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1824,6 +1824,19 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
      }
    }
  
+  if (Subtarget->fixAllFDIVSQRT()) {
+    // Promote FDIVS and FSQRTS to FDIVD and FSQRTD instructions instead as
+    // the former instructions generate errata on LEON processors.
+    setOperationAction(ISD::FDIV, MVT::f32, Promote);
+    setOperationAction(ISD::FSQRT, MVT::f32, Promote);
+  }
+
+  if (Subtarget->replaceFMULS()) {
+    // Promote FMULS to FMULD instructions instead as
+    // the former instructions generate errata on LEON processors.
+    setOperationAction(ISD::FMUL, MVT::f32, Promote);
+  }
+
    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  
    setMinFunctionAlignment(2);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td

index 3f64ef0bac8d52068c209f64f89bd0177de8e4c4..cc55c9c8e032f75f91e1a487b434e40bf42c1504 100644 (file)
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -57,6 +57,10 @@ def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
  // UMAC and SMAC instructions
  def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;
  
+def HasNoFdivSqrtFix : Predicate<"!Subtarget->fixAllFDIVSQRT()">;
+def HasNoFmulsFix : Predicate<"!Subtarget->replaceFMULS()">;
+def HasNoFsmuldFix : Predicate<"!Subtarget->fixFSMULD()">;
+
  // UseDeprecatedInsts - This predicate is true when the target processor is a
  // V8, or when it is V9 but the V8 deprecated instructions are efficient enough
  // to use when appropriate.  In either of these cases, the instruction selector
@@ -1172,6 +1176,9 @@ def FABSS : F3_3u<2, 0b110100, 0b000001001,
  
  
  // Floating-point Square Root Instructions, p.145
+// FSQRTS generates an erratum on LEON processors, so by disabling this instruction
+// this will be promoted to use FSQRTD with doubles instead.
+let Predicates = [HasNoFdivSqrtFix] in 
  def FSQRTS : F3_3u<2, 0b110100, 0b000101001,
                    (outs FPRegs:$rd), (ins FPRegs:$rs2),
                    "fsqrts $rs2, $rd",
@@ -1225,6 +1232,9 @@ def FSUBQ  : F3_3<2, 0b110100, 0b001000111,
  
  
  // Floating-point Multiply and Divide Instructions, p. 147
+// FMULS generates an erratum on LEON processors, so by disabling this instruction
+// this will be promoted to use FMULD with doubles instead.
+let Predicates = [HasNoFmulsFix] in 
  def FMULS  : F3_3<2, 0b110100, 0b001001001,
                    (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                    "fmuls $rs1, $rs2, $rd",
@@ -1241,6 +1251,7 @@ def FMULQ  : F3_3<2, 0b110100, 0b001001011,
                    [(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>,
                    Requires<[HasHardQuad]>;
  
+let Predicates = [HasNoFsmuldFix] in
  def FSMULD : F3_3<2, 0b110100, 0b001101001,
                    (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                    "fsmuld $rs1, $rs2, $rd",
@@ -1254,6 +1265,8 @@ def FDMULQ : F3_3<2, 0b110100, 0b001101110,
                                           (fextend f64:$rs2)))]>,
                    Requires<[HasHardQuad]>;
  
+// FDIVS generates an erratum on LEON processors, so by disabling this instruction
+// this will be promoted to use FDIVD with doubles instead.
  def FDIVS  : F3_3<2, 0b110100, 0b001001101,
                   (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                   "fdivs $rs1, $rs2, $rd",
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp

index 3dd8a84ba477accf47b67f1f9b37f55c9b5108d4..97d4aef3378cc30f9a0cd935cdfdfb51970c4b07 100644 (file)
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -40,6 +40,9 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
    HasLeonCasa = false;
    HasUmacSmac = false;
    InsertNOPLoad = false;
+  FixFSMULD = false;
+  ReplaceFMULS = false;
+  FixAllFDIVSQRT = false;
  
    // Determine default and user specified characteristics
    std::string CPUName = CPU;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h

index a69c2efd913d33f545188035497e453969a82d5d..9122f54f7c85a6b1d23298748f20e31ccaf745bb 100644 (file)
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -45,6 +45,9 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
    bool HasUmacSmac;
    bool HasLeonCasa;
    bool InsertNOPLoad;
+  bool FixFSMULD;
+  bool ReplaceFMULS;
+  bool FixAllFDIVSQRT;
  
    SparcInstrInfo InstrInfo;
    SparcTargetLowering TLInfo;
@@ -85,6 +88,9 @@ public:
    bool hasUmacSmac() const { return HasUmacSmac; }
    bool hasLeonCasa() const { return HasLeonCasa; }
    bool insertNOPLoad() const { return InsertNOPLoad; }
+  bool fixFSMULD() const { return FixFSMULD; }
+  bool replaceFMULS() const { return ReplaceFMULS; }
+  bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
  
    /// ParseSubtargetFeatures - Parses features string setting specified
    /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp

index cc2be3a9ef00ca35e24092e3892f28a088d25921..d8a047a72ba8d0db35ab7624412e6ab45d264458 100644 (file)
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -149,6 +149,18 @@ void SparcPassConfig::addPreEmitPass(){
    {
      addPass(new InsertNOPLoad(getSparcTargetMachine()));
    }
+  if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD())
+  {
+    addPass(new FixFSMULD(getSparcTargetMachine()));
+  }
+  if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS())
+  {
+    addPass(new ReplaceFMULS(getSparcTargetMachine()));
+  }
+  if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT())
+  {
+    addPass(new FixAllFDIVSQRT(getSparcTargetMachine()));
+  }
  }
  
  void SparcV8TargetMachine::anchor() { }
diff --git a/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll b/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll

new file mode 100755 (executable)

index 0000000..d15e790
--- /dev/null
+++ b/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s\r
+\r
+; CHECK-LABEL: test_fix_fsmuld_1\r
+; CHECK:       fsmuld %f20, %f21, %f8\r
+\r
+define double @test_fix_fsmuld_1() {\r
+entry:\r
+  %a = alloca float, align 4\r
+  %b = alloca float, align 4\r
+  store float 0x402ECCCCC0000000, float* %a, align 4\r
+  store float 0x4022333340000000, float* %b, align 4\r
+  %0 = load float, float* %b, align 4\r
+  %1 = load float, float* %a, align 4\r
+  %mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)\r
+\r
+  ret double %mul\r
+}\r
+\r
+; CHECK-LABEL: test_fix_fsmuld_2\r
+; CHECK:       fsmuld %f20, %f21, %f8\r
+define double @test_fix_fsmuld_2(float* %a, float* %b) {\r
+entry:\r
+  %mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)\r
+\r
+  ret double %mul\r
+}\r
diff --git a/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll b/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll

new file mode 100755 (executable)

index 0000000..76c2daa
--- /dev/null
+++ b/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s\r
+\r
+; CHECK-LABEL: fmuls_fix_test\r
+; CHECK:       fmuls %f20, %f21, %f8\r
+define double @fmuls_fix_test() {\r
+entry:\r
+  %a = alloca float, align 4\r
+  %b = alloca float, align 4\r
+  store float 0x402ECCCCC0000000, float* %a, align 4\r
+  store float 0x4022333340000000, float* %b, align 4\r
+  %0 = load float, float* %b, align 4\r
+  %1 = load float, float* %a, align 4\r
+  %mul = tail call double asm sideeffect "fmuls $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)\r
+\r
+  ret double %mul\r
+}\r
diff --git a/test/CodeGen/SPARC/LeonSMACUMACInstructionUT.ll b/test/CodeGen/SPARC/LeonSMACUMACInstructionUT.ll

new file mode 100755 (executable)

index 0000000..281113b
--- /dev/null
+++ b/test/CodeGen/SPARC/LeonSMACUMACInstructionUT.ll
@@ -0,0 +1,20 @@
+; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -o - | FileCheck %s\r
+; RUN: llc %s -O0 -march=sparc -mcpu=leon3 -o - | FileCheck %s\r
+; RUN: llc %s -O0 -march=sparc -mcpu=leon4 -o - | FileCheck %s\r
+\r
+; CHECK-LABEL: smac_test:\r
+; CHECK:       smac %o1, %o0, %o0\r
+define i32 @smac_test(i16* %a, i16* %b) {\r
+entry:\r
+;  %0 = tail call i32 asm sideeffect "smac $2, $1, $0", "={r2},{r3},{r4}"(i16* %a, i16* %b)\r
+  %0 = tail call i32 asm sideeffect "smac $2, $1, $0", "=r,rI,r"(i16* %a, i16* %b)\r
+  ret i32 %0\r
+}\r
+\r
+; CHECK-LABEL: umac_test:\r
+; CHECK:       umac %o1, %o0, %o0\r
+define i32 @umac_test(i16* %a, i16* %b) {\r
+entry:\r
+  %0 = tail call i32 asm sideeffect "umac $2, $1, $0", "=r,rI,r"(i16* %a, i16* %b)\r
+  ret i32 %0\r
+}\r
author	Chris Dewhurst <chris.dewhurst@lero.ie>
	Sun, 19 Jun 2016 11:03:28 +0000 (11:03 +0000)
committer	Chris Dewhurst <chris.dewhurst@lero.ie>
	Sun, 19 Jun 2016 11:03:28 +0000 (11:03 +0000)
lib/Target/Sparc/LeonFeatures.td		patch \| blob \| history
lib/Target/Sparc/LeonPasses.cpp		patch \| blob \| history
lib/Target/Sparc/LeonPasses.h		patch \| blob \| history
lib/Target/Sparc/Sparc.td		patch \| blob \| history
lib/Target/Sparc/SparcISelLowering.cpp		patch \| blob \| history
lib/Target/Sparc/SparcInstrInfo.td		patch \| blob \| history
lib/Target/Sparc/SparcSubtarget.cpp		patch \| blob \| history
lib/Target/Sparc/SparcSubtarget.h		patch \| blob \| history
lib/Target/Sparc/SparcTargetMachine.cpp		patch \| blob \| history
test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll	[new file with mode: 0755]	patch \| blob
test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll	[new file with mode: 0755]	patch \| blob
test/CodeGen/SPARC/LeonSMACUMACInstructionUT.ll	[new file with mode: 0755]	patch \| blob