AliasAnalysis &AA;
DominatorTree &DT;
ScalarEvolution &SE;
+ TargetTransformInfo &TTI;
const DataLayout &DL;
IRBuilder<> Builder;
ValueListMap StoreRefs;
ValueListMap LoadRefs;
- unsigned VecRegSize;
public:
Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
- ScalarEvolution &SE, unsigned VecRegSize)
- : F(F), AA(AA), DT(DT), SE(SE), DL(F.getParent()->getDataLayout()),
- Builder(SE.getContext()), VecRegSize(VecRegSize) {}
+ ScalarEvolution &SE, TargetTransformInfo &TTI)
+ : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
+ DL(F.getParent()->getDataLayout()),
+ Builder(SE.getContext()) {}
bool run();
class LoadStoreVectorizer : public FunctionPass {
public:
static char ID;
- unsigned VecRegSize;
- LoadStoreVectorizer(unsigned VecRegSize = 128) : FunctionPass(ID),
- VecRegSize(VecRegSize) {
+ LoadStoreVectorizer() : FunctionPass(ID) {
initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
}
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
}
};
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
"Vectorize load and store instructions", false, false);
char LoadStoreVectorizer::ID = 0;
-Pass *llvm::createLoadStoreVectorizerPass(unsigned VecRegSize) {
- return new LoadStoreVectorizer(VecRegSize);
+Pass *llvm::createLoadStoreVectorizerPass() {
+ return new LoadStoreVectorizer();
}
bool LoadStoreVectorizer::runOnFunction(Function &F) {
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TargetTransformInfo &TTI
+ = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- Vectorizer V(F, AA, DT, SE, VecRegSize);
+ Vectorizer V(F, AA, DT, SE, TTI);
return V.run();
}
if (TySize < 8)
continue;
+ Value *Ptr = LI->getPointerOperand();
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
+
// No point in looking at these if they're too big to vectorize.
if (TySize > VecRegSize / 2)
continue;
// TODO: Target hook to filter types.
// Save the load locations.
- Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
- LoadRefs[Ptr].push_back(LI);
+ Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
+ LoadRefs[ObjPtr].push_back(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isSimple())
if (TySize < 8)
continue;
+ Value *Ptr = SI->getPointerOperand();
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
if (TySize > VecRegSize / 2)
continue;
continue;
// Save store location.
- Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
- StoreRefs[Ptr].push_back(SI);
+ Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
+ StoreRefs[ObjPtr].push_back(SI);
}
}
}
}
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
+ unsigned AS = S0->getPointerAddressSpace();
+ unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
// Set insert point.
Builder.SetInsertPoint(&*Last);
- unsigned AS = S0->getPointerAddressSpace();
Value *Vec = UndefValue::get(VecTy);
}
unsigned Sz = DL.getTypeSizeInBits(LoadTy);
+ unsigned AS = L0->getPointerAddressSpace();
+ unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
// Set insert point.
Builder.SetInsertPoint(&*Last);
- unsigned AS = L0->getPointerAddressSpace();
Value *Bitcast =
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
--- /dev/null
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-4 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT4 -check-prefix=ALL %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT8 -check-prefix=ALL %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT16 -check-prefix=ALL %s
+
+; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
+; ELT4: store i32
+; ELT4: store i32
+; ELT4: store i32
+; ELT4: store i32
+
+; ELT8: store <2 x i32>
+; ELT8: store <2 x i32>
+
+; ELT16: store <4 x i32>
+define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32* %out, i32 3
+
+ store i32 9, i32* %out
+ store i32 1, i32* %out.gep.1
+ store i32 23, i32* %out.gep.2
+ store i32 19, i32* %out.gep.3
+ ret void
+}
+
+; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
+; ALL: store <4 x i8>
+define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8* %out, i32 3
+
+ store i8 9, i8* %out
+ store i8 1, i8* %out.gep.1
+ store i8 23, i8* %out.gep.2
+ store i8 19, i8* %out.gep.3
+ ret void
+}
+
+; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
+; ALL: store <2 x i16>
+define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16* %out, i32 1
+
+ store i16 9, i16* %out
+ store i16 12, i16* %out.gep.1
+ ret void
+}
+
+attributes #0 = { nounwind }