[X86] Don't fold into memory operands into insertps in the generated folding tables.

author Benjamin Kramer <benny.kra@googlemail.com>

Fri, 2 Jun 2017 10:50:22 +0000 (10:50 +0000)

committer Benjamin Kramer <benny.kra@googlemail.com>

Fri, 2 Jun 2017 10:50:22 +0000 (10:50 +0000)
author Benjamin Kramer <benny.kra@googlemail.com>
Fri, 2 Jun 2017 10:50:22 +0000 (10:50 +0000)
committer Benjamin Kramer <benny.kra@googlemail.com>
Fri, 2 Jun 2017 10:50:22 +0000 (10:50 +0000)
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll

index a00d47bb13e946e3a5d5e02fcb56a3d6f1afef14..f937d484ce0d336504757fcd744fdc5688822fa1 100644 (file)
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1926,5 +1926,19 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
    ret <8 x float> %6
  }
  
+define <4 x float> @stack_nofold_insertps(<8 x float> %a0, <8 x float> %a1) {
+; Cannot fold this without changing the immediate.
+; CHECK-LABEL: stack_nofold_insertps
+; CHECK:       32-byte Spill
+; CHECK:       nop
+; CHECK:       32-byte Reload
+; CHECK:       vinsertps $179, {{%xmm., %xmm., %xmm.}}
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %v0 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v1 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v0, <4 x float> %v1, i8 179)
+  ret <4 x float> %res
+}
+
  attributes #0 = { "unsafe-fp-math"="false" }
  attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp

index b89cee2ce4bbb03d8af93d061a0d37c5eae6b1f6..34f5fbc6ea31ebcfe86920ea3d7b0442ac632caf 100644 (file)
--- a/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -101,6 +101,11 @@ const char *const NoFoldSet[] = {
      "BTS16rr", "BTS32rr", "BTS64rr",
      "BTS16mr", "BTS32mr", "BTS64mr",
  
+    // insertps cannot be folded without adjusting the immediate. There's custom
+    // code to handle it in X86InstrInfo.cpp, ignore it here.
+    "INSERTPSrr", "INSERTPSrm",
+    "VINSERTPSrr", "VINSERTPSrm", "VINSERTPSZrr", "VINSERTPSZrm",
+
      // Memory folding is enabled only when optimizing for size by DAG
      // patterns only. (issue detailed in D28744 review)
      "VCVTSS2SDrm",            "VCVTSS2SDrr",
author	Benjamin Kramer <benny.kra@googlemail.com>
	Fri, 2 Jun 2017 10:50:22 +0000 (10:50 +0000)
committer	Benjamin Kramer <benny.kra@googlemail.com>
	Fri, 2 Jun 2017 10:50:22 +0000 (10:50 +0000)
test/CodeGen/X86/stack-folding-fp-avx1.ll		patch \| blob \| history
utils/TableGen/X86FoldTablesEmitter.cpp		patch \| blob \| history