#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/Local.h"
}
// TODO, Obvious Missing Transforms:
-// * Dereferenceable address -> speculative load/select
// * Narrow width by halfs excluding zero/undef lanes
static Value *simplifyMaskedLoad(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
+ Value *LoadPtr = II.getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
- Value *LoadPtr = II.getArgOperand(0);
- unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+ if (maskIsAllOneOrUndef(II.getArgOperand(2)))
return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
+
+ // If we can unconditionally load from this address, replace with a
+ // load/select idiom. TODO: use DT for context sensitive query
+ if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment,
+ II.getModule()->getDataLayout(),
+ &II, nullptr)) {
+ Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
}
return nullptr;
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
-; CHECK-NEXT: ret <2 x double> [[RES]]
+; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
double %pt, <2 x i1> %mask) {
%ptv1 = insertelement <2 x double> undef, double %pt, i64 0