The ``#pragma clang loop`` directive is used to specify hints for optimizing the
subsequent for, while, do-while, or c++11 range-based for loop. The directive
-provides options for vectorization, interleaving, unrolling and
+provides options for vectorization, interleaving, predication, unrolling and
distribution. Loop hints can be specified before any loop and will be ignored if
the optimization is not safe to apply.
-Vectorization and Interleaving
-------------------------------
+Vectorization, Interleaving, and Predication
+--------------------------------------------
A vectorized loop performs multiple iterations of the original loop
in parallel using vector instructions. The instruction set of the target
Specifying a width/count of 1 disables the optimization, and is equivalent to
``vectorize(disable)`` or ``interleave(disable)``.
+Vector predication is enabled by ``vectorize_predicate(enable)``, for example:
+
+.. code-block:: c++
+
+ #pragma clang loop vectorize(enable)
+ #pragma clang loop vectorize_predicate(enable)
+ for(...) {
+ ...
+ }
+
+This predicates (masks) all instructions in the loop, which allows the scalar
+remainder loop (the tail) to be folded into the main vectorized loop. This
+might be more efficient when vector predication is efficiently supported by the
+target platform.
+
Loop Unrolling
--------------
let Args = [EnumArgument<"Option", "OptionType",
["vectorize", "vectorize_width", "interleave", "interleave_count",
"unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count",
- "pipeline", "pipeline_initiation_interval", "distribute"],
+ "pipeline", "pipeline_initiation_interval", "distribute",
+ "vectorize_predicate"],
["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount",
"Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount",
- "PipelineDisabled", "PipelineInitiationInterval", "Distribute"]>,
+ "PipelineDisabled", "PipelineInitiationInterval", "Distribute",
+ "VectorizePredicate"]>,
EnumArgument<"State", "LoopHintState",
["enable", "disable", "numeric", "assume_safety", "full"],
["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>,
case PipelineDisabled: return "pipeline";
case PipelineInitiationInterval: return "pipeline_initiation_interval";
case Distribute: return "distribute";
+ case VectorizePredicate: return "vectorize_predicate";
}
llvm_unreachable("Unhandled LoopHint option.");
}
let Content = [{
The ``#pragma clang loop`` directive allows loop optimization hints to be
specified for the subsequent loop. The directive allows pipelining to be
-disabled, or vectorization, interleaving, and unrolling to be enabled or disabled.
-Vector width, interleave count, unrolling count, and the initiation interval
-for pipelining can be explicitly specified. See `language extensions
+disabled, or vectorization, vector predication, interleaving, and unrolling to
+be enabled or disabled. Vector width, vector predication, interleave count,
+unrolling count, and the initiation interval for pipelining can be explicitly
+specified. See `language extensions
<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations>`_
for details.
}];
not initialized on device side. It has internal linkage and is initialized by
the initializer on host side.
}];
-}
\ No newline at end of file
+}
def err_pragma_loop_invalid_option : Error<
"%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, "
"vectorize_width, interleave, interleave_count, unroll, unroll_count, "
- "pipeline, pipeline_initiation_interval, or distribute">;
+ "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">;
def err_pragma_fp_invalid_option : Error<
"%select{invalid|missing}0 option%select{ %1|}0; expected contract">;
if (Attrs.VectorizeEnable == LoopAttributes::Disable)
Enabled = false;
else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified ||
+ Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified ||
Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0)
Enabled = true;
Args.push_back(TempNode.get());
Args.append(LoopProperties.begin(), LoopProperties.end());
+ // Setting vectorize.predicate
+ if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified) {
+ Metadata *Vals[] = {
+ MDString::get(Ctx, "llvm.loop.vectorize.predicate.enable"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ llvm::Type::getInt1Ty(Ctx),
+ (Attrs.VectorizePredicateEnable == LoopAttributes::Enable)))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
// Setting vectorize.width
if (Attrs.VectorizeWidth > 0) {
Metadata *Vals[] = {
LoopAttributes::LoopAttributes(bool IsParallel)
: IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
UnrollEnable(LoopAttributes::Unspecified),
- UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
+ UnrollAndJamEnable(LoopAttributes::Unspecified),
+ VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0),
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
PipelineInitiationInterval(0) {}
VectorizeEnable = LoopAttributes::Unspecified;
UnrollEnable = LoopAttributes::Unspecified;
UnrollAndJamEnable = LoopAttributes::Unspecified;
+ VectorizePredicateEnable = LoopAttributes::Unspecified;
DistributeEnable = LoopAttributes::Unspecified;
PipelineDisabled = false;
PipelineInitiationInterval = 0;
Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
Attrs.PipelineInitiationInterval == 0 &&
+ Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
BeforeJam.InterleaveCount = Attrs.InterleaveCount;
BeforeJam.VectorizeEnable = Attrs.VectorizeEnable;
BeforeJam.DistributeEnable = Attrs.DistributeEnable;
+ BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable;
switch (Attrs.UnrollEnable) {
case LoopAttributes::Unspecified:
break;
}
+ AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable;
AfterJam.UnrollCount = Attrs.UnrollCount;
AfterJam.PipelineDisabled = Attrs.PipelineDisabled;
AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval;
// add it manually.
SmallVector<Metadata *, 1> BeforeLoopProperties;
if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified ||
+ BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified ||
BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0)
BeforeLoopProperties.push_back(
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
case LoopHintAttr::UnrollAndJam:
setUnrollAndJamState(LoopAttributes::Disable);
break;
+ case LoopHintAttr::VectorizePredicate:
+ setVectorizePredicateState(LoopAttributes::Disable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(false);
break;
case LoopHintAttr::UnrollAndJam:
setUnrollAndJamState(LoopAttributes::Enable);
break;
+ case LoopHintAttr::VectorizePredicate:
+ setVectorizePredicateState(LoopAttributes::Enable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(true);
break;
break;
case LoopHintAttr::Unroll:
case LoopHintAttr::UnrollAndJam:
+ case LoopHintAttr::VectorizePredicate:
case LoopHintAttr::UnrollCount:
case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::Distribute:
case LoopHintAttr::PipelineDisabled:
case LoopHintAttr::PipelineInitiationInterval:
+ case LoopHintAttr::VectorizePredicate:
llvm_unreachable("Options cannot be used with 'full' hint.");
break;
}
break;
case LoopHintAttr::Unroll:
case LoopHintAttr::UnrollAndJam:
+ case LoopHintAttr::VectorizePredicate:
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::Distribute:
/// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full).
LVEnableState UnrollAndJamEnable;
+ /// Value for llvm.loop.vectorize.predicate metadata
+ LVEnableState VectorizePredicateEnable;
+
/// Value for llvm.loop.vectorize.width metadata.
unsigned VectorizeWidth;
StagedAttrs.UnrollEnable = State;
}
+ /// Set the next pushed vectorize predicate state.
+ void setVectorizePredicateState(const LoopAttributes::LVEnableState &State) {
+ StagedAttrs.VectorizePredicateEnable = State;
+ }
+
/// Set the next pushed loop unroll_and_jam state.
void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) {
StagedAttrs.UnrollAndJamEnable = State;
StateOption = llvm::StringSwitch<bool>(OptionInfo->getName())
.Case("vectorize", true)
.Case("interleave", true)
+ .Case("vectorize_predicate", true)
.Default(false) ||
OptionUnroll || OptionUnrollAndJam || OptionDistribute ||
OptionPipelineDisabled;
/// 'vectorize' '(' loop-hint-keyword ')'
/// 'interleave' '(' loop-hint-keyword ')'
/// 'unroll' '(' unroll-hint-keyword ')'
+/// 'vectorize_predicate' '(' loop-hint-keyword ')'
/// 'vectorize_width' '(' loop-hint-value ')'
/// 'interleave_count' '(' loop-hint-value ')'
/// 'unroll_count' '(' loop-hint-value ')'
.Case("interleave", true)
.Case("unroll", true)
.Case("distribute", true)
+ .Case("vectorize_predicate", true)
.Case("vectorize_width", true)
.Case("interleave_count", true)
.Case("unroll_count", true)
.Case("vectorize", LoopHintAttr::Vectorize)
.Case("vectorize_width", LoopHintAttr::VectorizeWidth)
.Case("interleave", LoopHintAttr::Interleave)
+ .Case("vectorize_predicate", LoopHintAttr::VectorizePredicate)
.Case("interleave_count", LoopHintAttr::InterleaveCount)
.Case("unroll", LoopHintAttr::Unroll)
.Case("unroll_count", LoopHintAttr::UnrollCount)
State = LoopHintAttr::Numeric;
} else if (Option == LoopHintAttr::Vectorize ||
Option == LoopHintAttr::Interleave ||
+ Option == LoopHintAttr::VectorizePredicate ||
Option == LoopHintAttr::Unroll ||
Option == LoopHintAttr::Distribute ||
Option == LoopHintAttr::PipelineDisabled) {
const LoopHintAttr *StateAttr;
const LoopHintAttr *NumericAttr;
} HintAttrs[] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr},
- {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}};
+ {nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr},
+ {nullptr, nullptr}};
for (const auto *I : Attrs) {
const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(I);
Unroll,
UnrollAndJam,
Distribute,
- Pipeline
+ Pipeline,
+ VectorizePredicate
} Category;
switch (Option) {
case LoopHintAttr::Vectorize:
case LoopHintAttr::PipelineInitiationInterval:
Category = Pipeline;
break;
+ case LoopHintAttr::VectorizePredicate:
+ Category = VectorizePredicate;
+ break;
};
assert(Category < sizeof(HintAttrs) / sizeof(HintAttrs[0]));
if (Option == LoopHintAttr::Vectorize ||
Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll ||
Option == LoopHintAttr::UnrollAndJam ||
+ Option == LoopHintAttr::VectorizePredicate ||
Option == LoopHintAttr::PipelineDisabled ||
Option == LoopHintAttr::Distribute) {
// Enable|Disable|AssumeSafety hint. For example, vectorize(enable).
// CHECK: #pragma clang loop distribute(disable)
// CHECK-NEXT: #pragma clang loop vectorize(enable)
// CHECK-NEXT: #pragma clang loop interleave(disable)
+// CHECK-NEXT: #pragma clang loop vectorize_predicate(disable)
#pragma clang loop distribute(disable)
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(disable)
+#pragma clang loop vectorize_predicate(disable)
// CHECK-NEXT: while (i - 1 < Length)
while (i - 1 < Length) {
List[i] = i * 2;
// CHECK: #pragma clang loop distribute(enable)
// CHECK-NEXT: #pragma clang loop vectorize(disable)
// CHECK-NEXT: #pragma clang loop interleave(enable)
+// CHECK-NEXT: #pragma clang loop vectorize_predicate(enable)
#pragma clang loop distribute(enable)
#pragma clang loop vectorize(disable)
#pragma clang loop interleave(enable)
+#pragma clang loop vectorize_predicate(enable)
// CHECK-NEXT: while (i - 2 < Length)
while (i - 2 < Length) {
List[i] = i * 2;
--- /dev/null
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+void test0(int *List, int Length) {
+// CHECK-LABEL: @{{.*}}test0{{.*}}(
+// CHECK: br label {{.*}}, !llvm.loop ![[LOOP0:.*]]
+
+ #pragma clang loop vectorize(enable)
+ for (int i = 0; i < Length; i++)
+ List[i] = i * 2;
+}
+
+void test1(int *List, int Length) {
+// CHECK-LABEL: @{{.*}}test1{{.*}}(
+// CHECK: br label {{.*}}, !llvm.loop ![[LOOP1:.*]]
+
+ #pragma clang loop vectorize(enable) vectorize_predicate(enable)
+ for (int i = 0; i < Length; i++)
+ List[i] = i * 2;
+}
+
+void test2(int *List, int Length) {
+// CHECK-LABEL: @{{.*}}test2{{.*}}(
+// CHECK: br label {{.*}}, !llvm.loop ![[LOOP2:.*]]
+
+ #pragma clang loop vectorize(enable) vectorize_predicate(disable)
+ for (int i = 0; i < Length; i++)
+ List[i] = i * 2;
+}
+
+// CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3}
+// CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK-NEXT: ![[LOOP1]] = distinct !{![[LOOP1]], !5, !3}
+// CHECK-NEXT: !5 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
+// CHECK-NEXT: ![[LOOP2]] = distinct !{![[LOOP2]], !7, !3}
+// CHECK-NEXT: !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 false}
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
+#pragma clang loop vectorize_predicate(enable)
#pragma clang loop unroll(full)
while (i + 1 < Length) {
List[i] = i;
#pragma clang loop vectorize(disable)
#pragma clang loop interleave(disable)
+#pragma clang loop vectorize_predicate(disable)
#pragma clang loop unroll(disable)
while (i - 1 < Length) {
List[i] = i;
}
int VList[Length];
-#pragma clang loop vectorize(disable) interleave(disable) unroll(disable)
+#pragma clang loop vectorize(disable) interleave(disable) unroll(disable) vectorize_predicate(disable)
for (int j : VList) {
VList[j] = List[j];
}
/* expected-error {{expected '('}} */ #pragma clang loop vectorize
/* expected-error {{expected '('}} */ #pragma clang loop interleave
+/* expected-error {{expected '('}} */ #pragma clang loop vectorize_predicate
/* expected-error {{expected '('}} */ #pragma clang loop unroll
/* expected-error {{expected '('}} */ #pragma clang loop distribute
/* expected-error {{expected ')'}} */ #pragma clang loop vectorize(enable
/* expected-error {{expected ')'}} */ #pragma clang loop interleave(enable
+/* expected-error {{expected ')'}} */ #pragma clang loop vectorize_predicate(enable
/* expected-error {{expected ')'}} */ #pragma clang loop unroll(full
/* expected-error {{expected ')'}} */ #pragma clang loop distribute(enable
/* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll()
/* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop distribute()
-/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, or distribute}} */ #pragma clang loop
+/* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop
/* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword
/* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword(enable)
/* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop vectorize(enable) badkeyword(4)
/* expected-error {{duplicate directives 'vectorize(enable)' and 'vectorize(disable)'}} */ #pragma clang loop vectorize(disable)
#pragma clang loop interleave(enable)
/* expected-error {{duplicate directives 'interleave(enable)' and 'interleave(disable)'}} */ #pragma clang loop interleave(disable)
+#pragma clang loop vectorize_predicate(enable)
+/* expected-error@+1 {{duplicate directives 'vectorize_predicate(enable)' and 'vectorize_predicate(disable)'}} */
+#pragma clang loop vectorize_predicate(disable)
#pragma clang loop unroll(full)
/* expected-error {{duplicate directives 'unroll(full)' and 'unroll(disable)'}} */ #pragma clang loop unroll(disable)
#pragma clang loop distribute(enable)
#pragma clang loop interleave(enable)
/* expected-error {{expected statement}} */ }
+
+void foo(void) {
+#pragma clang loop vectorize_predicate(enable)
+/* expected-error {{expected statement}} */ }
}
// pragma clang unroll_and_jam is disabled for the moment
-/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, or distribute}} */ #pragma clang loop unroll_and_jam(4)
+/* expected-error {{invalid option 'unroll_and_jam'; expected vectorize, vectorize_width, interleave, interleave_count, unroll, unroll_count, pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute}} */ #pragma clang loop unroll_and_jam(4)
for (int i = 0; i < Length; i++) {
for (int j = 0; j < Length; j++) {
List[i * Length + j] = Value;