From bfc8711de97f7bcc2ac9b4a82400f158ec0aedb1 Mon Sep 17 00:00:00 2001 From: Ayal Zaks Date: Mon, 26 Jun 2017 22:26:54 +0000 Subject: [PATCH] reverting 306331. Causes TBAA metadata to be generates on reverse shuffles, investigating. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 579 ++++++++++----------- 1 file changed, 286 insertions(+), 293 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 496d0589c08..509f5a8f919 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -532,34 +532,21 @@ protected: /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; - /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a - /// vector or scalar value on-demand if one is not yet available. When - /// vectorizing a loop, we visit the definition of an instruction before its - /// uses. When visiting the definition, we either vectorize or scalarize the - /// instruction, creating an entry for it in the corresponding map. (In some - /// cases, such as induction variables, we will create both vector and scalar - /// entries.) Then, as we encounter uses of the definition, we derive values - /// for each scalar or vector use unless such a value is already available. - /// For example, if we scalarize a definition and one of its uses is vector, - /// we build the required vector on-demand with an insertelement sequence - /// when visiting the use. Otherwise, if the use is scalar, we can use the - /// existing scalar definition. - /// - /// Return a value in the new loop corresponding to \p V from the original - /// loop at unroll index \p Part. If the value has already been vectorized, - /// the corresponding vector entry in VectorLoopValueMap is returned. If, + /// Return a constant reference to the VectorParts corresponding to \p V from + /// the original loop. If the value has already been vectorized, the + /// corresponding vector entry in VectorLoopValueMap is returned. If, /// however, the value has a scalar entry in VectorLoopValueMap, we construct - /// a new vector value on-demand by inserting the scalar values into a vector + /// new vector values on-demand by inserting the scalar values into vectors /// with an insertelement sequence. If the value has been neither vectorized /// nor scalarized, it must be loop invariant, so we simply broadcast the - /// value into a vector. - Value *getOrCreateVectorValue(Value *V, unsigned Part); + /// value into vectors. + const VectorParts &getVectorValue(Value *V); /// Return a value in the new loop corresponding to \p V from the original /// loop at unroll index \p Part and vector index \p Lane. If the value has /// been vectorized but not scalarized, the necessary extractelement /// instruction will be generated. - Value *getOrCreateScalarValue(Value *V, unsigned Part, unsigned Lane); + Value *getScalarValue(Value *V, unsigned Part, unsigned Lane); /// Try to vectorize the interleaved access group that \p Instr belongs to. void vectorizeInterleaveGroup(Instruction *Instr); @@ -614,103 +601,90 @@ protected: /// UF x VF scalar values in the new loop. UF and VF are the unroll and /// vectorization factors, respectively. /// - /// Entries can be added to either map with setVectorValue and setScalarValue, - /// which assert that an entry was not already added before. If an entry is to - /// replace an existing one, call resetVectorValue. This is currently needed - /// to modify the mapped values during "fix-up" operations that occur once the - /// first phase of widening is complete. These operations include type - /// truncation and the second phase of recurrence widening. + /// Entries can be added to either map with initVector and initScalar, which + /// initialize and return a constant reference to the new entry. If a + /// non-constant reference to a vector entry is required, getVector can be + /// used to retrieve a mutable entry. We currently directly modify the mapped + /// values during "fix-up" operations that occur once the first phase of + /// widening is complete. These operations include type truncation and the + /// second phase of recurrence widening. /// - /// Entries from either map can be retrieved using the getVectorValue and - /// getScalarValue functions, which assert that the desired value exists. - + /// Otherwise, entries from either map should be accessed using the + /// getVectorValue or getScalarValue functions from InnerLoopVectorizer. + /// getVectorValue and getScalarValue coordinate to generate a vector or + /// scalar value on-demand if one is not yet available. When vectorizing a + /// loop, we visit the definition of an instruction before its uses. When + /// visiting the definition, we either vectorize or scalarize the + /// instruction, creating an entry for it in the corresponding map. (In some + /// cases, such as induction variables, we will create both vector and scalar + /// entries.) Then, as we encounter uses of the definition, we derive values + /// for each scalar or vector use unless such a value is already available. + /// For example, if we scalarize a definition and one of its uses is vector, + /// we build the required vector on-demand with an insertelement sequence + /// when visiting the use. Otherwise, if the use is scalar, we can use the + /// existing scalar definition. struct ValueMap { /// Construct an empty map with the given unroll and vectorization factors. - ValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {} - - /// \return True if the map has any vector entry for \p Key. - bool hasAnyVectorValue(Value *Key) const { - return VectorMapStorage.count(Key); - } - - /// \return True if the map has a vector entry for \p Key and \p Part. - bool hasVectorValue(Value *Key, unsigned Part) const { - assert(Part < UF && "Queried Vector Part is too large."); - if (!hasAnyVectorValue(Key)) - return false; - const VectorParts &Entry = VectorMapStorage.find(Key)->second; - assert(Entry.size() == UF && "VectorParts has wrong dimensions."); - return Entry[Part] != nullptr; - } - - /// \return True if the map has any scalar entry for \p Key. - bool hasAnyScalarValue(Value *Key) const { - return ScalarMapStorage.count(Key); - } - - /// \return True if the map has a scalar entry for \p Key, \p Part and - /// \p Part. - bool hasScalarValue(Value *Key, unsigned Part, unsigned Lane) const { - assert(Part < UF && "Queried Scalar Part is too large."); - assert(Lane < VF && "Queried Scalar Lane is too large."); - if (!hasAnyScalarValue(Key)) - return false; - const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; - assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); - assert(Entry[Part].size() == VF && "ScalarParts has wrong dimensions."); - return Entry[Part][Lane] != nullptr; + ValueMap(unsigned UnrollFactor, unsigned VecWidth) + : UF(UnrollFactor), VF(VecWidth) { + // The unroll and vectorization factors are only used in asserts builds + // to verify map entries are sized appropriately. + (void)UF; + (void)VF; } - /// Retrieve the existing vector value that corresponds to \p Key and - /// \p Part. - Value *getVectorValue(Value *Key, unsigned Part) { - assert(hasVectorValue(Key, Part) && "Getting non-existent value."); - return VectorMapStorage[Key][Part]; + /// \return True if the map has a vector entry for \p Key. + bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); } + + /// \return True if the map has a scalar entry for \p Key. + bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); } + + /// \brief Map \p Key to the given VectorParts \p Entry, and return a + /// constant reference to the new vector map entry. The given key should + /// not already be in the map, and the given VectorParts should be + /// correctly sized for the current unroll factor. + const VectorParts &initVector(Value *Key, const VectorParts &Entry) { + assert(!hasVector(Key) && "Vector entry already initialized"); + assert(Entry.size() == UF && "VectorParts has wrong dimensions"); + VectorMapStorage[Key] = Entry; + return VectorMapStorage[Key]; } - /// Retrieve the existing scalar value that corresponds to \p Key, \p Part - /// and \p Lane. - Value *getScalarValue(Value *Key, unsigned Part, unsigned Lane) { - assert(hasScalarValue(Key, Part, Lane) && "Getting non-existent value."); - return ScalarMapStorage[Key][Part][Lane]; + /// \brief Map \p Key to the given ScalarParts \p Entry, and return a + /// constant reference to the new scalar map entry. The given key should + /// not already be in the map, and the given ScalarParts should be + /// correctly sized for the current unroll and vectorization factors. + const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) { + assert(!hasScalar(Key) && "Scalar entry already initialized"); + assert(Entry.size() == UF && + all_of(make_range(Entry.begin(), Entry.end()), + [&](const SmallVectorImpl &Values) -> bool { + return Values.size() == VF; + }) && + "ScalarParts has wrong dimensions"); + ScalarMapStorage[Key] = Entry; + return ScalarMapStorage[Key]; } - /// Set a vector value associated with \p Key and \p Part. Assumes such a - /// value is not already set. If it is, use resetVectorValue() instead. - void setVectorValue(Value *Key, unsigned Part, Value *Vector) { - assert(!hasVectorValue(Key, Part) && "Vector value already set for part"); - if (!VectorMapStorage.count(Key)) { - VectorParts Entry(UF); - VectorMapStorage[Key] = Entry; - } - VectorMapStorage[Key][Part] = Vector; + /// \return A reference to the vector map entry corresponding to \p Key. + /// The key should already be in the map. This function should only be used + /// when it's necessary to update values that have already been vectorized. + /// This is the case for "fix-up" operations including type truncation and + /// the second phase of recurrence vectorization. If a non-const reference + /// isn't required, getVectorValue should be used instead. + VectorParts &getVector(Value *Key) { + assert(hasVector(Key) && "Vector entry not initialized"); + return VectorMapStorage.find(Key)->second; } - /// Set a scalar value associated with \p Key for \p Part and \p Lane. - /// Assumes such a value is not already set. - void setScalarValue(Value *Key, unsigned Part, unsigned Lane, - Value *Scalar) { - assert(!hasScalarValue(Key, Part, Lane) && "Scalar value already set"); - if (!ScalarMapStorage.count(Key)) { - ScalarParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part].resize(VF, nullptr); - // TODO: Consider storing uniform values only per-part, as they occupy - // lane 0 only, keeping the other VF-1 redundant entries null. - ScalarMapStorage[Key] = Entry; - } - ScalarMapStorage[Key][Part][Lane] = Scalar; - } - - /// Reset the vector value associated with \p Key for the given \p Part. - /// This function can be used to update values that have already been - /// vectorized. This is the case for "fix-up" operations including type - /// truncation and the second phase of recurrence vectorization. - void resetVectorValue(Value *Key, unsigned Part, Value *Vector) { - assert(hasVectorValue(Key, Part) && "Vector value not set for part"); - VectorMapStorage[Key][Part] = Vector; - } + /// Retrieve an entry from the vector or scalar maps. The preferred way to + /// access an existing mapped entry is with getVectorValue or + /// getScalarValue from InnerLoopVectorizer. Until those functions can be + /// moved inside ValueMap, we have to declare them as friends. + friend const VectorParts &InnerLoopVectorizer::getVectorValue(Value *V); + friend Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part, + unsigned Lane); private: /// The unroll factor. Each entry in the vector map contains UF vector @@ -2443,13 +2417,15 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", &*LoopVectorBody->getFirstInsertionPt()); Instruction *LastInduction = VecInd; + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); - if (isa(EntryVal)) - addMetadata(LastInduction, EntryVal); + Entry[Part] = LastInduction; LastInduction = cast(addFastMathFlag( Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"))); } + VectorLoopValueMap.initVector(EntryVal, Entry); + if (isa(EntryVal)) + addMetadata(Entry, EntryVal); // Move the last step to the end of the latch block. This ensures consistent // placement of all induction updates. @@ -2555,13 +2531,13 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { // induction variable, and build the necessary step vectors. if (!VectorizedIV) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *EntryPart = + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); - VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); - if (Trunc) - addMetadata(EntryPart, Trunc); - } + VectorLoopValueMap.initVector(EntryVal, Entry); + if (Trunc) + addMetadata(Entry, Trunc); } // If an induction variable is only used for counting loop iterations or @@ -2661,14 +2637,17 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Cost->isUniformAfterVectorization(cast(EntryVal), VF) ? 1 : VF; // Compute the scalar steps and save the results in VectorLoopValueMap. + ScalarParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { + Entry[Part].resize(VF); for (unsigned Lane = 0; Lane < Lanes; ++Lane) { auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); - VectorLoopValueMap.setScalarValue(EntryVal, Part, Lane, Add); + Entry[Part][Lane] = Add; } } + VectorLoopValueMap.initScalar(EntryVal, Entry); } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { @@ -2686,7 +2665,8 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } -Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { +const InnerLoopVectorizer::VectorParts & +InnerLoopVectorizer::getVectorValue(Value *V) { assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); assert(!V->getType()->isVoidTy() && "Type does not produce a value"); @@ -2695,16 +2675,17 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { if (Legal->hasStride(V)) V = ConstantInt::get(V->getType(), 1); - // If we have a vector mapped to this value, return it. - if (VectorLoopValueMap.hasVectorValue(V, Part)) - return VectorLoopValueMap.getVectorValue(V, Part); + // If we have this scalar in the map, return it. + if (VectorLoopValueMap.hasVector(V)) + return VectorLoopValueMap.VectorMapStorage[V]; // If the value has not been vectorized, check if it has been scalarized // instead. If it has been scalarized, and we actually need the value in // vector form, we will construct the vector values on demand. - if (VectorLoopValueMap.hasAnyScalarValue(V)) { + if (VectorLoopValueMap.hasScalar(V)) { - Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, Part, 0); + // Initialize a new vector map entry. + VectorParts Entry(UF); // If we've scalarized a value, that value should be an instruction. auto *I = cast(V); @@ -2712,8 +2693,9 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { // If we aren't vectorizing, we can just copy the scalar map values over to // the vector map. if (VF == 1) { - VectorLoopValueMap.setVectorValue(V, Part, ScalarValue); - return ScalarValue; + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = getScalarValue(V, Part, 0); + return VectorLoopValueMap.initVector(V, Entry); } // Get the last scalar instruction we generated for V. If the value is @@ -2721,8 +2703,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { // of the last unroll iteration. Otherwise, the last instruction is the one // we created for the last vector lane of the last unroll iteration. unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1; - auto *LastInst = - cast(getOrCreateScalarValue(V, UF - 1, LastLane)); + auto *LastInst = cast(getScalarValue(V, UF - 1, LastLane)); // Set the insert point after the last scalarized instruction. This ensures // the insertelement sequence will directly follow the scalar definitions. @@ -2736,50 +2717,52 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { // iteration. Otherwise, we construct the vector values using insertelement // instructions. Since the resulting vectors are stored in // VectorLoopValueMap, we will only generate the insertelements once. - Value *VectorValue = nullptr; - if (Cost->isUniformAfterVectorization(I, VF)) { - VectorValue = getBroadcastInstrs(ScalarValue); - } else { - VectorValue = UndefValue::get(VectorType::get(V->getType(), VF)); - for (unsigned Lane = 0; Lane < VF; ++Lane) - VectorValue = Builder.CreateInsertElement( - VectorValue, getOrCreateScalarValue(V, Part, Lane), - Builder.getInt32(Lane)); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *VectorValue = nullptr; + if (Cost->isUniformAfterVectorization(I, VF)) { + VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0)); + } else { + VectorValue = UndefValue::get(VectorType::get(V->getType(), VF)); + for (unsigned Lane = 0; Lane < VF; ++Lane) + VectorValue = Builder.CreateInsertElement( + VectorValue, getScalarValue(V, Part, Lane), + Builder.getInt32(Lane)); + } + Entry[Part] = VectorValue; } - VectorLoopValueMap.setVectorValue(V, Part, VectorValue); Builder.restoreIP(OldIP); - return VectorValue; + return VectorLoopValueMap.initVector(V, Entry); } // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); - VectorLoopValueMap.setVectorValue(V, Part, B); - return B; + return VectorLoopValueMap.initVector(V, VectorParts(UF, B)); } -Value *InnerLoopVectorizer::getOrCreateScalarValue(Value *V, unsigned Part, - unsigned Lane) { +Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part, + unsigned Lane) { // If the value is not an instruction contained in the loop, it should // already be scalar. if (OrigLoop->isLoopInvariant(V)) return V; - assert(Lane > 0 ? !Cost->isUniformAfterVectorization(cast(V), VF) - : true && "Uniform values only have lane zero"); + assert(Lane > 0 ? + !Cost->isUniformAfterVectorization(cast(V), VF) + : true && "Uniform values only have lane zero"); // If the value from the original loop has not been vectorized, it is // represented by UF x VF scalar values in the new loop. Return the requested // scalar value. - if (VectorLoopValueMap.hasScalarValue(V, Part, Lane)) - return VectorLoopValueMap.getScalarValue(V, Part, Lane); + if (VectorLoopValueMap.hasScalar(V)) + return VectorLoopValueMap.ScalarMapStorage[V][Part][Lane]; // If the value has not been scalarized, get its entry in VectorLoopValueMap // for the given unroll part. If this entry is not a vector type (i.e., the // vectorization factor is one), there is no need to generate an // extractelement instruction. - auto *U = getOrCreateVectorValue(V, Part); + auto *U = getVectorValue(V)[Part]; if (!U->getType()->isVectorTy()) { assert(VF == 1 && "Value not scalarized has non-vector type"); return U; @@ -2861,7 +2844,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { Index += (VF - 1) * Group->getFactor(); for (unsigned Part = 0; Part < UF; Part++) { - Value *NewPtr = getOrCreateScalarValue(Ptr, Part, 0); + Value *NewPtr = getScalarValue(Ptr, Part, 0); // Notice current instruction could be any index. Need to adjust the address // to the member of index 0. @@ -2904,6 +2887,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { if (!Member) continue; + VectorParts Entry(UF); Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( @@ -2915,11 +2899,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy); } - if (Group->isReverse()) - StridedVec = reverseVector(StridedVec); - - VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); + Entry[Part] = + Group->isReverse() ? reverseVector(StridedVec) : StridedVec; } + VectorLoopValueMap.initVector(Member, Entry); } return; } @@ -2936,8 +2919,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { Instruction *Member = Group->getMember(i); assert(Member && "Fail to get a member from an interleaved store group"); - Value *StoredVec = getOrCreateVectorValue( - cast(Member)->getValueOperand(), Part); + Value *StoredVec = + getVectorValue(cast(Member)->getValueOperand())[Part]; if (Group->isReverse()) StoredVec = reverseVector(StoredVec); @@ -2998,14 +2981,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { bool CreateGatherScatter = (Decision == LoopVectorizationCostModel::CM_GatherScatter); - // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector - // gather/scatter. Otherwise Decision should have been to Scalarize. - assert((ConsecutiveStride || CreateGatherScatter) && - "The instruction should be scalarized"); + VectorParts VectorGep; // Handle consecutive loads/stores. - if (ConsecutiveStride) - Ptr = getOrCreateScalarValue(Ptr, 0, 0); + if (ConsecutiveStride) { + Ptr = getScalarValue(Ptr, 0, 0); + } else { + // At this point we should vector version of GEP for Gather or Scatter + assert(CreateGatherScatter && "The instruction should be scalarized"); + VectorGep = getVectorValue(Ptr); + } VectorParts Mask = createBlockInMask(Instr->getParent()); // Handle Stores: @@ -3013,15 +2998,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { assert(!Legal->isUniform(SI->getPointerOperand()) && "We do not allow storing to uniform addresses"); setDebugLocFromInst(Builder, SI); + // We don't want to update the value in the map as it might be used in + // another expression. So don't use a reference type for "StoredVal". + VectorParts StoredVal = getVectorValue(SI->getValueOperand()); for (unsigned Part = 0; Part < UF; ++Part) { Instruction *NewSI = nullptr; - Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part); if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr; - Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); + NewSI = Builder.CreateMaskedScatter(StoredVal[Part], VectorGep[Part], + Alignment, MaskPart); } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = @@ -3030,7 +3016,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (Reverse) { // If we store to reverse consecutive memory locations, then we need // to reverse the order of elements in the stored value. - StoredVal = reverseVector(StoredVal); + StoredVal[Part] = reverseVector(StoredVal[Part]); // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. PartPtr = @@ -3044,10 +3030,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); if (Legal->isMaskRequired(SI)) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, + NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment, Mask[Part]); else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + NewSI = + Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); } addMetadata(NewSI, SI); } @@ -3057,13 +3044,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Handle loads. assert(LI && "Must have a load instruction"); setDebugLocFromInst(Builder, LI); + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Value *NewLI; + Instruction *NewLI; if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr; - Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart, + NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart, nullptr, "wide.masked.gather"); + Entry[Part] = NewLI; } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = @@ -3085,12 +3073,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { "wide.masked.load"); else NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); - if (Reverse) - NewLI = reverseVector(NewLI); + Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI; } addMetadata(NewLI, LI); - VectorLoopValueMap.setVectorValue(Instr, Part, NewLI); } + VectorLoopValueMap.initVector(Instr, Entry); } void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, @@ -3107,6 +3094,9 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); + // Initialize a new scalar map entry. + ScalarParts Entry(UF); + VectorParts Cond; if (IfPredicateInstr) Cond = createBlockInMask(Instr->getParent()); @@ -3118,6 +3108,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // For each vector unroll 'part': for (unsigned Part = 0; Part < UF; ++Part) { + Entry[Part].resize(VF); // For each scalar that we create: for (unsigned Lane = 0; Lane < Lanes; ++Lane) { @@ -3138,7 +3129,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { - auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Part, Lane); + auto *NewOp = getScalarValue(Instr->getOperand(op), Part, Lane); Cloned->setOperand(op, NewOp); } addNewMetadata(Cloned, Instr); @@ -3147,7 +3138,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, Builder.Insert(Cloned); // Add the cloned scalar to the scalar map entry. - VectorLoopValueMap.setScalarValue(Instr, Part, Lane, Cloned); + Entry[Part][Lane] = Cloned; // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -3159,6 +3150,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp)); } } + VectorLoopValueMap.initScalar(Instr, Entry); } PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, @@ -3794,10 +3786,10 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // If the value wasn't vectorized, we must maintain the original scalar // type. The absence of the value from VectorLoopValueMap indicates that it // wasn't vectorized. - if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) + if (!VectorLoopValueMap.hasVector(KV.first)) continue; - for (unsigned Part = 0; Part < UF; ++Part) { - Value *I = getOrCreateVectorValue(KV.first, Part); + VectorParts &Parts = VectorLoopValueMap.getVector(KV.first); + for (Value *&I : Parts) { if (Erased.count(I) || I->use_empty() || !isa(I)) continue; Type *OriginalTy = I->getType(); @@ -3886,7 +3878,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { I->replaceAllUsesWith(Res); cast(I)->eraseFromParent(); Erased.insert(I); - VectorLoopValueMap.resetVectorValue(KV.first, Part, Res); + I = Res; } } @@ -3895,15 +3887,15 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // If the value wasn't vectorized, we must maintain the original scalar // type. The absence of the value from VectorLoopValueMap indicates that it // wasn't vectorized. - if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) + if (!VectorLoopValueMap.hasVector(KV.first)) continue; - for (unsigned Part = 0; Part < UF; ++Part) { - Value *I = getOrCreateVectorValue(KV.first, Part); + VectorParts &Parts = VectorLoopValueMap.getVector(KV.first); + for (Value *&I : Parts) { ZExtInst *Inst = dyn_cast(I); if (Inst && Inst->use_empty()) { Value *NewI = Inst->getOperand(0); Inst->eraseFromParent(); - VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI); + I = NewI; } } } @@ -4033,8 +4025,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // We constructed a temporary phi node in the first phase of vectorization. // This phi node will eventually be deleted. - Builder.SetInsertPoint( - cast(VectorLoopValueMap.getVectorValue(Phi, 0))); + VectorParts &PhiParts = VectorLoopValueMap.getVector(Phi); + Builder.SetInsertPoint(cast(PhiParts[0])); // Create a phi node for the new recurrence. The current value will either be // the initial value inserted into a vector or loop-varying vector value. @@ -4042,19 +4034,19 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { VecPhi->addIncoming(VectorInit, LoopVectorPreHeader); // Get the vectorized previous value. - Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1); + auto &PreviousParts = getVectorValue(Previous); // Set the insertion point after the previous value if it is an instruction. // Note that the previous value may have been constant-folded so it is not // guaranteed to be an instruction in the vector loop. Also, if the previous // value is a phi node, we should insert after all the phi nodes to avoid // breaking basic block verification. - if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) || - isa(PreviousLastPart)) + if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]) || + isa(PreviousParts[UF - 1])) Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); else Builder.SetInsertPoint( - &*++BasicBlock::iterator(cast(PreviousLastPart))); + &*++BasicBlock::iterator(cast(PreviousParts[UF - 1]))); // We will construct a vector for the recurrence by combining the values for // the current and previous iterations. This is the required shuffle mask. @@ -4069,16 +4061,15 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // Shuffle the current and previous vector and update the vector parts. for (unsigned Part = 0; Part < UF; ++Part) { - Value *PreviousPart = getOrCreateVectorValue(Previous, Part); - Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part); auto *Shuffle = - VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart, - ConstantVector::get(ShuffleMask)) - : Incoming; - PhiPart->replaceAllUsesWith(Shuffle); - cast(PhiPart)->eraseFromParent(); - VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle); - Incoming = PreviousPart; + VF > 1 + ? Builder.CreateShuffleVector(Incoming, PreviousParts[Part], + ConstantVector::get(ShuffleMask)) + : Incoming; + PhiParts[Part]->replaceAllUsesWith(Shuffle); + cast(PhiParts[Part])->eraseFromParent(); + PhiParts[Part] = Shuffle; + Incoming = PreviousParts[Part]; } // Fix the latch value of the new recurrence in the vector loop. @@ -4106,7 +4097,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // `Incoming`. This is analogous to the vectorized case above: extracting the // second last element when VF > 1. else if (UF > 1) - ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2); + ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2]; // Fix the initial value of the original recurrence in the scalar loop. Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); @@ -4157,7 +4148,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); // This is the vector-clone of the value that leaves the loop. - Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType(); + const VectorParts &VectorExit = getVectorValue(LoopExitInst); + Type *VecTy = VectorExit[0]->getType(); // Find the reduction identity variable. Zero for addition, or, xor, // one for multiplication, -1 for And. @@ -4195,17 +4187,18 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // Reductions do not have to start at zero. They can start with // any loop invariant values. + const VectorParts &VecRdxPhi = getVectorValue(Phi); BasicBlock *Latch = OrigLoop->getLoopLatch(); Value *LoopVal = Phi->getIncomingValueForBlock(Latch); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part); - Value *Val = getOrCreateVectorValue(LoopVal, Part); + const VectorParts &Val = getVectorValue(LoopVal); + for (unsigned part = 0; part < UF; ++part) { // Make sure to add the reduction stat value only to the // first unroll part. - Value *StartVal = (Part == 0) ? VectorStart : Identity; - cast(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader); - cast(VecRdxPhi) - ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); + Value *StartVal = (part == 0) ? VectorStart : Identity; + cast(VecRdxPhi[part]) + ->addIncoming(StartVal, LoopVectorPreHeader); + cast(VecRdxPhi[part]) + ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch()); } // Before each round, move the insertion point right between @@ -4214,6 +4207,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // instructions. Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); + VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst); setDebugLocFromInst(Builder, LoopExitInst); // If the vector reduction can be performed in a smaller type, we truncate @@ -4222,42 +4216,37 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) { Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); Builder.SetInsertPoint(LoopVectorBody->getTerminator()); - VectorParts RdxParts(UF); - for (unsigned Part = 0; Part < UF; ++Part) { - RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); - Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + for (unsigned part = 0; part < UF; ++part) { + Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy); Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) - : Builder.CreateZExt(Trunc, VecTy); - for (Value::user_iterator UI = RdxParts[Part]->user_begin(); - UI != RdxParts[Part]->user_end();) + : Builder.CreateZExt(Trunc, VecTy); + for (Value::user_iterator UI = RdxParts[part]->user_begin(); + UI != RdxParts[part]->user_end();) if (*UI != Trunc) { - (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd); - RdxParts[Part] = Extnd; + (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd); + RdxParts[part] = Extnd; } else { ++UI; } } Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); - for (unsigned Part = 0; Part < UF; ++Part) { - RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); - VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]); - } + for (unsigned part = 0; part < UF; ++part) + RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy); } // Reduce all of the unrolled parts into a single vector. - Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0); + Value *ReducedPartRdx = RdxParts[0]; unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK); setDebugLocFromInst(Builder, ReducedPartRdx); - for (unsigned Part = 1; Part < UF; ++Part) { - Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); + for (unsigned part = 1; part < UF; ++part) { if (Op != Instruction::ICmp && Op != Instruction::FCmp) // Floating point operations had to be 'fast' to enable the reduction. ReducedPartRdx = addFastMathFlag( - Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart, + Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part], ReducedPartRdx, "bin.rdx")); else ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp( - Builder, MinMaxKind, ReducedPartRdx, RdxPart); + Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]); } if (VF > 1) { @@ -4529,16 +4518,14 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { assert(BI && "Unexpected terminator found"); if (BI->isConditional()) { + VectorParts EdgeMask = getVectorValue(BI->getCondition()); - VectorParts EdgeMask(UF); - for (unsigned Part = 0; Part < UF; ++Part) { - auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part); - if (BI->getSuccessor(0) != Dst) - EdgeMaskPart = Builder.CreateNot(EdgeMaskPart); + if (BI->getSuccessor(0) != Dst) + for (unsigned part = 0; part < UF; ++part) + EdgeMask[part] = Builder.CreateNot(EdgeMask[part]); - EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]); - EdgeMask[Part] = EdgeMaskPart; - } + for (unsigned part = 0; part < UF; ++part) + EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]); EdgeMaskCache[Edge] = EdgeMask; return EdgeMask; @@ -4557,27 +4544,23 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { if (BCEntryIt != BlockMaskCache.end()) return BCEntryIt->second; - VectorParts BlockMask(UF); - // Loop incoming mask is all-one. if (OrigLoop->getHeader() == BB) { Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); - for (unsigned Part = 0; Part < UF; ++Part) - BlockMask[Part] = getOrCreateVectorValue(C, Part); + const VectorParts &BlockMask = getVectorValue(C); BlockMaskCache[BB] = BlockMask; return BlockMask; } // This is the block mask. We OR all incoming edges, and with zero. Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0); - for (unsigned Part = 0; Part < UF; ++Part) - BlockMask[Part] = getOrCreateVectorValue(Zero, Part); + VectorParts BlockMask = getVectorValue(Zero); // For each pred: - for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) { - VectorParts EM = createEdgeMask(*It, BB); - for (unsigned Part = 0; Part < UF; ++Part) - BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]); + for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) { + VectorParts EM = createEdgeMask(*it, BB); + for (unsigned part = 0; part < UF; ++part) + BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]); } BlockMaskCache[BB] = BlockMask; @@ -4592,14 +4575,15 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) { - for (unsigned Part = 0; Part < UF; ++Part) { + VectorParts Entry(UF); + for (unsigned part = 0; part < UF; ++part) { // This is phase one of vectorizing PHIs. Type *VecTy = (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF); - Value *EntryPart = PHINode::Create( + Entry[part] = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - VectorLoopValueMap.setVectorValue(P, Part, EntryPart); } + VectorLoopValueMap.initVector(P, Entry); return; } @@ -4623,22 +4607,21 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, for (unsigned In = 0; In < NumIncoming; In++) { VectorParts Cond = createEdgeMask(P->getIncomingBlock(In), P->getParent()); + const VectorParts &In0 = getVectorValue(P->getIncomingValue(In)); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *In0 = getOrCreateVectorValue(P->getIncomingValue(In), Part); + for (unsigned part = 0; part < UF; ++part) { // We might have single edge PHIs (blocks) - use an identity // 'select' for the first PHI operand. if (In == 0) - Entry[Part] = Builder.CreateSelect(Cond[Part], In0, In0); + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]); else // Select between the current value and the previous incoming edge // based on the incoming mask. - Entry[Part] = Builder.CreateSelect(Cond[Part], In0, Entry[Part], + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part], "predphi"); } } - for (unsigned Part = 0; Part < UF; ++Part) - VectorLoopValueMap.setVectorValue(P, Part, Entry[Part]); + VectorLoopValueMap.initVector(P, Entry); return; } @@ -4669,15 +4652,18 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF; // These are the scalar results. Notice that we don't generate vector GEPs // because scalar GEPs result in better code. + ScalarParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { + Entry[Part].resize(VF); for (unsigned Lane = 0; Lane < Lanes; ++Lane) { Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL); SclrGep->setName("next.gep"); - VectorLoopValueMap.setScalarValue(P, Part, Lane, SclrGep); + Entry[Part][Lane] = SclrGep; } } + VectorLoopValueMap.initScalar(P, Entry); return; } } @@ -4727,6 +4713,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. auto *GEP = cast(&I); + VectorParts Entry(UF); if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) { // If we are vectorizing, but the GEP has only loop-invariant operands, @@ -4742,11 +4729,8 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // collectLoopScalars() and teach getVectorValue() to broadcast // the lane-zero scalar value. auto *Clone = Builder.Insert(GEP->clone()); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *EntryPart = Builder.CreateVectorSplat(VF, Clone); - VectorLoopValueMap.setVectorValue(&I, Part, EntryPart); - addMetadata(EntryPart, GEP); - } + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = Builder.CreateVectorSplat(VF, Clone); } else { // If the GEP has at least one loop-varying operand, we are sure to // produce a vector of pointers. But if we are only unrolling, we want @@ -4759,10 +4743,9 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = - OrigLoop->isLoopInvariant(GEP->getPointerOperand()) - ? GEP->getPointerOperand() - : getOrCreateVectorValue(GEP->getPointerOperand(), Part); + auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand()) + ? GEP->getPointerOperand() + : getVectorValue(GEP->getPointerOperand())[Part]; // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. @@ -4771,7 +4754,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { if (OrigLoop->isLoopInvariant(U.get())) Indices.push_back(U.get()); else - Indices.push_back(getOrCreateVectorValue(U.get(), Part)); + Indices.push_back(getVectorValue(U.get())[Part]); } // Create the new GEP. Note that this GEP may be a scalar if VF == 1, @@ -4781,11 +4764,12 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { : Builder.CreateGEP(Ptr, Indices); assert((VF == 1 || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); - VectorLoopValueMap.setVectorValue(&I, Part, NewGEP); - addMetadata(NewGEP, GEP); + Entry[Part] = NewGEP; } } + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, GEP); break; } case Instruction::UDiv: @@ -4816,20 +4800,22 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // Just widen binops. auto *BinOp = cast(&I); setDebugLocFromInst(Builder, BinOp); + const VectorParts &A = getVectorValue(BinOp->getOperand(0)); + const VectorParts &B = getVectorValue(BinOp->getOperand(1)); + // Use this vector value for all users of the original instruction. + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part); - Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); if (BinaryOperator *VecOp = dyn_cast(V)) VecOp->copyIRFlags(BinOp); - // Use this vector value for all users of the original instruction. - VectorLoopValueMap.setVectorValue(&I, Part, V); - addMetadata(V, BinOp); + Entry[Part] = V; } + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, BinOp); break; } case Instruction::Select: { @@ -4845,19 +4831,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { // loop. This means that we can't just use the original 'cond' value. // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. + const VectorParts &Cond = getVectorValue(I.getOperand(0)); + const VectorParts &Op0 = getVectorValue(I.getOperand(1)); + const VectorParts &Op1 = getVectorValue(I.getOperand(2)); - auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), 0, 0); + auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0); + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part); - Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part); - Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part); - Value *Sel = - Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1); - VectorLoopValueMap.setVectorValue(&I, Part, Sel); - addMetadata(Sel, &I); + Entry[Part] = Builder.CreateSelect( + InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]); } + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); break; } @@ -4867,20 +4854,22 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { bool FCmp = (I.getOpcode() == Instruction::FCmp); auto *Cmp = dyn_cast(&I); setDebugLocFromInst(Builder, Cmp); + const VectorParts &A = getVectorValue(Cmp->getOperand(0)); + const VectorParts &B = getVectorValue(Cmp->getOperand(1)); + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part); - Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part); Value *C = nullptr; if (FCmp) { - C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); cast(C)->copyFastMathFlags(Cmp); } else { - C = Builder.CreateICmp(Cmp->getPredicate(), A, B); + C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]); } - VectorLoopValueMap.setVectorValue(&I, Part, C); - addMetadata(C, &I); + Entry[Part] = C; } + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); break; } @@ -4917,12 +4906,12 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { Type *DestTy = (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = getOrCreateVectorValue(CI->getOperand(0), Part); - Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - VectorLoopValueMap.setVectorValue(&I, Part, Cast); - addMetadata(Cast, &I); - } + const VectorParts &A = getVectorValue(CI->getOperand(0)); + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy); + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); break; } @@ -4960,14 +4949,17 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { break; } + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Args; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { Value *Arg = CI->getArgOperand(i); // Some intrinsics have a scalar argument - don't replace it with a // vector. - if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) - Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part); + if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) { + const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i)); + Arg = VectorArg[Part]; + } Args.push_back(Arg); } @@ -5000,10 +4992,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { if (isa(V)) V->copyFastMathFlags(CI); - VectorLoopValueMap.setVectorValue(&I, Part, V); - addMetadata(V, &I); + Entry[Part] = V; } + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); break; } -- 2.50.1