let ScalarMemoryVT = i16;
}
-// TODO: These need renamed to simple_store/simple_load and then split
-// into a volatile/atomic/ordered flavors so that respective transforms
-// can pick the right combination.
-def nonvolatile_load : PatFrag<(ops node:$ptr),
- (load node:$ptr), [{
+// TODO: Split these into volatile and unordered flavors to enable
+// selectively legal optimizations for each. (See D66309)
+def simple_load : PatFrag<(ops node:$ptr),
+ (load node:$ptr), [{
return cast<LoadSDNode>(N)->isSimple();
}]>;
-def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
+def simple_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->isSimple();
}]>;
let Predicates = [FeatureVectorEnhancements1] in
def SelectVR128 : SelectWrapper<f128, VR128>;
-defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
-defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
+defm CondStoreF32 : CondStores<FP32, simple_store,
+ simple_load, bdxaddr20only>;
+defm CondStoreF64 : CondStores<FP64, simple_store,
+ simple_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Move instructions
defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>,
Requires<[FeatureHighWord]>;
-defm CondStore32Mux : CondStores<GRX32, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>,
+defm CondStore32Mux : CondStores<GRX32, simple_store,
+ simple_load, bdxaddr20only>,
Requires<[FeatureLoadStoreOnCond2]>;
defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32 : CondStores<GR32, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
+defm CondStore32 : CondStores<GR32, simple_store,
+ simple_load, bdxaddr20only>;
defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
nonvolatile_anyextloadi16, bdxaddr20only>;
defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
nonvolatile_anyextloadi32, bdxaddr20only>;
-defm CondStore64 : CondStores<GR64, nonvolatile_store,
- nonvolatile_load, bdxaddr20only>;
+defm CondStore64 : CondStores<GR64, simple_store,
+ simple_load, bdxaddr20only>;
//===----------------------------------------------------------------------===//
// Move instructions
// Load on condition. Matched via DAG pattern.
// Expands to LOC or LOCFH, depending on the choice of register.
- def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>;
- defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>;
+ def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
+ defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;
// Store on condition. Expanded from CondStore* pseudos.
// Expands to STOC or STOCFH, depending on the choice of register.
}
// Load on condition. Matched via DAG pattern.
- defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>;
- defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, nonvolatile_load, GR64, 8>;
+ defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>;
+ defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
// Store on condition. Expanded from CondStore* pseudos.
defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
// binary size compared to a regular MOV, but it introduces an unnecessary
// load, so is not suitable for regular or optsize functions.
let Predicates = [OptForMinSize] in {
-def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
-def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
-def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
-def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
-def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
-def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
+def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
+def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
+def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
}
// In kernel code model, we can get the address of a label
def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
(x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(x86mmx (MMX_X86movdq2q (v2i64 (nonvolatile_load addr:$src)))),
+def : Pat<(x86mmx (MMX_X86movdq2q (v2i64 (simple_load addr:$src)))),
(x86mmx (MMX_MOVQ64rm addr:$src))>;
def : Pat<(v2i64 (X86vzmovl (scalar_to_vector
// This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
+ def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1,
(i8 -28)),
(MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
// This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
+ def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
// Use MOVLPD to load into the low bits from a full vector unless we can use
// BLENDPD.
- def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))),
+ def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
}
let Predicates = [UseSSE2] in {
// Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
- (v2f64 (nonvolatile_load addr:$src2)))),
+ (v2f64 (simple_load addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
}
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
- def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
(VMOVDDUPrr VR128:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
(VMOVDDUPrm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPrm addr:$src)>;