/// Return a uniquified Attribute object that has the specific
/// alignment set.
- static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align);
- static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align);
+ static Attribute getWithAlignment(LLVMContext &Context, Align Alignment);
+ static Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment);
static Attribute getWithDereferenceableBytes(LLVMContext &Context,
uint64_t Bytes);
static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context,
setArgOperand(ARG_DEST, Ptr);
}
- void setDestAlignment(unsigned Align) {
+ void setDestAlignment(unsigned Alignment) {
removeParamAttr(ARG_DEST, Attribute::Alignment);
- if (Align > 0)
- addParamAttr(ARG_DEST,
- Attribute::getWithAlignment(getContext(), Align));
+ if (Alignment > 0)
+ addParamAttr(ARG_DEST, Attribute::getWithAlignment(getContext(),
+ Align(Alignment)));
}
void setLength(Value *L) {
BaseCL::setArgOperand(ARG_SOURCE, Ptr);
}
- void setSourceAlignment(unsigned Align) {
+ void setSourceAlignment(unsigned Alignment) {
BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment);
- if (Align > 0)
- BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(
- BaseCL::getContext(), Align));
+ if (Alignment > 0)
+ BaseCL::addParamAttr(ARG_SOURCE,
+ Attribute::getWithAlignment(BaseCL::getContext(),
+ Align(Alignment)));
}
};
return Attribute(PA);
}
-Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
- assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
- assert(Align <= 0x40000000 && "Alignment too large.");
- return get(Context, Alignment, Align);
+Attribute Attribute::getWithAlignment(LLVMContext &Context, Align A) {
+ assert(A <= 0x40000000 && "Alignment too large.");
+ return get(Context, Alignment, A.value());
}
-Attribute Attribute::getWithStackAlignment(LLVMContext &Context,
- uint64_t Align) {
- assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
- assert(Align <= 0x100 && "Alignment too large.");
- return get(Context, StackAlignment, Align);
+Attribute Attribute::getWithStackAlignment(LLVMContext &Context, Align A) {
+ assert(A <= 0x100 && "Alignment too large.");
+ return get(Context, StackAlignment, A.value());
}
Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context,
Attr = Attribute::getWithByValType(C, B.getByValType());
break;
case Attribute::Alignment:
- Attr = Attribute::getWithAlignment(C, B.getAlignment());
+ Attr = Attribute::getWithAlignment(C, Align(B.getAlignment()));
break;
case Attribute::StackAlignment:
- Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment());
+ Attr = Attribute::getWithStackAlignment(C, Align(B.getStackAlignment()));
break;
case Attribute::Dereferenceable:
Attr = Attribute::getWithDereferenceableBytes(
void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
Argument *A = unwrap<Argument>(Arg);
- A->addAttr(Attribute::getWithAlignment(A->getContext(), align));
+ A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(align)));
}
/*--.. Operations on ifuncs ................................................--*/
void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
unsigned align) {
auto *Call = unwrap<CallBase>(Instr);
- Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align);
+ Attribute AlignAttr =
+ Attribute::getWithAlignment(Call->getContext(), Align(align));
Call->addAttribute(index, AlignAttr);
}
CallInst *CI = createCallHelper(TheFn, Ops, this);
// Set the alignment of the pointer args.
- CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign));
- CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign));
+ CI->addParamAttr(
+ 0, Attribute::getWithAlignment(CI->getContext(), Align(DstAlign)));
+ CI->addParamAttr(
+ 1, Attribute::getWithAlignment(CI->getContext(), Align(SrcAlign)));
// Set the TBAA info if present.
if (TBAATag)
if (STM.isXNACKEnabled())
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
- unsigned MaxKernArgAlign;
+ Align MaxKernArgAlign;
Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
- // These alignment values are specified in powers of two, so alignment =
- // 2^n. The minimum alignment is 2^4 = 16.
- Out.kernarg_segment_alignment = std::max<size_t>(4,
- countTrailingZeros(MaxKernArgAlign));
+ // kernarg_segment_alignment is specified as log of the alignment.
+ // The minimum alignment is 16.
+ Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL);
- unsigned MaxKernArgAlign;
+ Align MaxKernArgAlign;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
MaxKernArgAlign);
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
+ HSACodeProps.mKernargSegmentAlign =
+ std::max(MaxKernArgAlign, Align(4)).value();
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
auto Kern = HSAMetadataDoc->getMapNode();
- unsigned MaxKernArgAlign;
+ Align MaxKernArgAlign;
Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
STM.getKernArgSegmentSize(F, MaxKernArgAlign));
Kern[".group_segment_fixed_size"] =
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
Kern[".kernarg_segment_align"] =
- Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign));
+ Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
Kern[".wavefront_size"] =
Kern.getDocument()->getNode(STM.getWavefrontSize());
Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
const AMDGPUSubtarget &ST =
AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction());
unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
- unsigned Alignment = ST.getAlignmentForImplicitArgPtr();
+ const Align Alignment = ST.getAlignmentForImplicitArgPtr();
uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
ExplicitArgOffset;
switch (Param) {
BasicBlock &EntryBlock = *F.begin();
IRBuilder<> Builder(&*EntryBlock.begin());
- const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary
+ const Align KernArgBaseAlign(16); // FIXME: Increase if necessary
const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
- unsigned MaxAlign;
+ Align MaxAlign;
// FIXME: Alignment is broken broken with explicit arg offset.;
const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
if (TotalKernArgSize == 0)
for (Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned Align = DL.getABITypeAlignment(ArgTy);
+ unsigned ABITypeAlign = DL.getABITypeAlignment(ArgTy);
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
- uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
- ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
+ uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
if (Arg.use_empty())
continue;
int64_t AlignDownOffset = alignDown(EltOffset, 4);
int64_t OffsetDiff = EltOffset - AlignDownOffset;
- unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
- KernArgBaseAlign);
+ Align AdjustedAlign = commonAlignment(
+ KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);
Value *ArgPtr;
Type *AdjustedArgTy;
ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
LoadInst *Load =
- Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign.value());
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);
}
KernArgSegment->addAttribute(
- AttributeList::ReturnIndex,
- Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
+ AttributeList::ReturnIndex,
+ Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
return true;
}
MachineFunctionInfo(),
LocalMemoryObjects(),
ExplicitKernArgSize(0),
- MaxKernArgAlign(0),
LDSSize(0),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
protected:
uint64_t ExplicitKernArgSize; // Cache for this.
- unsigned MaxKernArgAlign; // Cache for this.
+ Align MaxKernArgAlign; // Cache for this.
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
return ExplicitKernArgSize;
}
- unsigned getMaxKernArgAlign() const {
- return MaxKernArgAlign;
- }
+ unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); }
unsigned getLDSSize() const {
return LDSSize;
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
- unsigned &MaxAlign) const {
+ Align &MaxAlign) const {
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL);
const DataLayout &DL = F.getParent()->getDataLayout();
uint64_t ExplicitArgBytes = 0;
- MaxAlign = 1;
+ MaxAlign = Align::None();
for (const Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned Align = DL.getABITypeAlignment(ArgTy);
+ const Align Alignment(DL.getABITypeAlignment(ArgTy));
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
- ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize;
- MaxAlign = std::max(MaxAlign, Align);
+ ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
+ MaxAlign = std::max(MaxAlign, Alignment);
}
return ExplicitArgBytes;
}
unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
- unsigned &MaxAlign) const {
+ Align &MaxAlign) const {
uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
if (ImplicitBytes != 0) {
- unsigned Alignment = getAlignmentForImplicitArgPtr();
+ const Align Alignment = getAlignmentForImplicitArgPtr();
TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
}
return LocalMemorySize;
}
- unsigned getAlignmentForImplicitArgPtr() const {
- return isAmdHsaOS() ? 8 : 4;
+ Align getAlignmentForImplicitArgPtr() const {
+ return isAmdHsaOS() ? Align(8) : Align(4);
}
/// Returns the offset in bytes from the start of the input buffer
return 16;
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
}
- uint64_t getExplicitKernArgSize(const Function &F,
- unsigned &MaxAlign) const;
- unsigned getKernArgSegmentSize(const Function &F,
- unsigned &MaxAlign) const;
+ uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
+ unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
virtual ~AMDGPUSubtarget() {}
};
bool SIMachineFunctionInfo::initializeBaseYamlFields(
const yaml::SIMachineFunctionInfo &YamlMFI) {
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
- MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
+ MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
LDSSize = YamlMFI.LDSSize;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
IsEntryFunction = YamlMFI.IsEntryFunction;
getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
if (getAssumedAlign() > 1)
- Attrs.emplace_back(Attribute::getWithAlignment(Ctx, getAssumedAlign()));
+ Attrs.emplace_back(
+ Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
}
/// See AbstractAttribute::getAsStr().
# ALL-LABEL: name: no_mfi
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 0
-# FULL-NEXT: maxKernArgAlign: 0
+# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
+# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
# ALL-LABEL: name: empty_mfi
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 0
-# FULL-NEXT: maxKernArgAlign: 0
+# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
+# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
# ALL-LABEL: name: empty_mfi_entry_func
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 0
-# FULL-NEXT: maxKernArgAlign: 0
+# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
+# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
; CHECK-LABEL: {{^}}name: ps_shader
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
-; CHECK-NEXT: maxKernArgAlign: 0
+; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
; CHECK-NEXT: isEntryFunction: true
; CHECK-NEXT: noSignedZerosFPMath: false
; CHECK-LABEL: {{^}}name: function
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
-; CHECK-NEXT: maxKernArgAlign: 0
+; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
; CHECK-NEXT: isEntryFunction: false
; CHECK-NEXT: noSignedZerosFPMath: false
; CHECK-LABEL: {{^}}name: function_nsz
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
-; CHECK-NEXT: maxKernArgAlign: 0
+; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
; CHECK-NEXT: isEntryFunction: false
; CHECK-NEXT: noSignedZerosFPMath: true
TEST(Attributes, RemoveAlign) {
LLVMContext C;
- Attribute AlignAttr = Attribute::getWithAlignment(C, 8);
- Attribute StackAlignAttr = Attribute::getWithStackAlignment(C, 32);
+ Attribute AlignAttr = Attribute::getWithAlignment(C, Align(8));
+ Attribute StackAlignAttr = Attribute::getWithStackAlignment(C, Align(32));
AttrBuilder B_align_readonly;
B_align_readonly.addAttribute(AlignAttr);
B_align_readonly.addAttribute(Attribute::ReadOnly);
LLVMContext C;
AttributeList AL;
AL = AL.addAttribute(C, AttributeList::FirstArgIndex,
- Attribute::getWithAlignment(C, 8));
+ Attribute::getWithAlignment(C, Align(8)));
AL = AL.addAttribute(C, AttributeList::FirstArgIndex + 1,
- Attribute::getWithAlignment(C, 32));
+ Attribute::getWithAlignment(C, Align(32)));
EXPECT_EQ(8U, AL.getParamAlignment(0));
EXPECT_EQ(32U, AL.getParamAlignment(1));