Skip to content

Commit f155661

Browse files
rwestrelmcimadamorechhagedorn
committed
8342692: C2: long counted loop/long range checks: don't create loop-nest for short running loops
Co-authored-by: Maurizio Cimadamore <[email protected]> Co-authored-by: Christian Hagedorn <[email protected]> Reviewed-by: chagedorn, thartmann
1 parent c68697e commit f155661

27 files changed

+1665
-79
lines changed

src/hotspot/share/jvmci/vmStructs_jvmci.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,7 @@
763763
declare_constant(Deoptimization::Reason_constraint) \
764764
declare_constant(Deoptimization::Reason_div0_check) \
765765
declare_constant(Deoptimization::Reason_loop_limit_check) \
766+
declare_constant(Deoptimization::Reason_short_running_long_loop) \
766767
declare_constant(Deoptimization::Reason_auto_vectorization_check) \
767768
declare_constant(Deoptimization::Reason_type_checked_inlining) \
768769
declare_constant(Deoptimization::Reason_optimized_type_check) \

src/hotspot/share/opto/c2_globals.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,15 @@
872872
"could corrupt the graph in rare cases and should be used with " \
873873
"care.") \
874874
\
875+
product(bool, ShortRunningLongLoop, true, DIAGNOSTIC, \
876+
"long counted loop/long range checks: don't create loop nest if " \
877+
"loop runs for small enough number of iterations. Long loop is " \
878+
"converted to a single int loop.") \
879+
\
880+
develop(bool, StressShortRunningLongLoop, false, \
881+
"Speculate all long counted loops are short running when bounds " \
882+
"are unknown even if profile data doesn't say so.") \
883+
\
875884
develop(bool, StressLoopPeeling, false, \
876885
"Randomize loop peeling decision") \
877886

src/hotspot/share/opto/castnode.cpp

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
#include "opto/addnode.hpp"
2727
#include "opto/callnode.hpp"
2828
#include "opto/castnode.hpp"
29+
#include "opto/cfgnode.hpp"
2930
#include "opto/connode.hpp"
31+
#include "opto/loopnode.hpp"
3032
#include "opto/matcher.hpp"
3133
#include "opto/phaseX.hpp"
3234
#include "opto/subnode.hpp"
@@ -323,6 +325,67 @@ const Type* CastLLNode::Value(PhaseGVN* phase) const {
323325
return widen_type(phase, res, T_LONG);
324326
}
325327

328+
bool CastLLNode::is_inner_loop_backedge(ProjNode* proj) {
329+
if (proj != nullptr) {
330+
Node* ctrl_use = proj->unique_ctrl_out_or_null();
331+
if (ctrl_use != nullptr && ctrl_use->Opcode() == Op_Loop &&
332+
ctrl_use->in(2) == proj &&
333+
ctrl_use->as_Loop()->is_loop_nest_inner_loop()) {
334+
return true;
335+
}
336+
}
337+
return false;
338+
}
339+
340+
bool CastLLNode::cmp_used_at_inner_loop_exit_test(CmpNode* cmp) {
341+
for (DUIterator_Fast imax, i = cmp->fast_outs(imax); i < imax; i++) {
342+
Node* bol = cmp->fast_out(i);
343+
if (bol->Opcode() == Op_Bool) {
344+
for (DUIterator_Fast jmax, j = bol->fast_outs(jmax); j < jmax; j++) {
345+
Node* iff = bol->fast_out(j);
346+
if (iff->Opcode() == Op_If) {
347+
ProjNode* true_proj = iff->as_If()->proj_out_or_null(true);
348+
ProjNode* false_proj = iff->as_If()->proj_out_or_null(false);
349+
if (is_inner_loop_backedge(true_proj) || is_inner_loop_backedge(false_proj)) {
350+
return true;
351+
}
352+
}
353+
}
354+
}
355+
}
356+
return false;
357+
}
358+
359+
// Find if this is a cast node added by PhaseIdealLoop::create_loop_nest() to narrow the number of iterations of the
360+
// inner loop
361+
bool CastLLNode::used_at_inner_loop_exit_test() const {
362+
for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
363+
Node* convl2i = fast_out(i);
364+
if (convl2i->Opcode() == Op_ConvL2I) {
365+
for (DUIterator_Fast jmax, j = convl2i->fast_outs(jmax); j < jmax; j++) {
366+
Node* cmp_or_sub = convl2i->fast_out(j);
367+
if (cmp_or_sub->Opcode() == Op_CmpI) {
368+
if (cmp_used_at_inner_loop_exit_test(cmp_or_sub->as_Cmp())) {
369+
// (Loop .. .. (IfProj (If (Bool (CmpI (ConvL2I (CastLL )))))))
370+
return true;
371+
}
372+
} else if (cmp_or_sub->Opcode() == Op_SubI && cmp_or_sub->in(1)->find_int_con(-1) == 0) {
373+
for (DUIterator_Fast kmax, k = cmp_or_sub->fast_outs(kmax); k < kmax; k++) {
374+
Node* cmp = cmp_or_sub->fast_out(k);
375+
if (cmp->Opcode() == Op_CmpI) {
376+
if (cmp_used_at_inner_loop_exit_test(cmp->as_Cmp())) {
377+
// (Loop .. .. (IfProj (If (Bool (CmpI (SubI 0 (ConvL2I (CastLL ))))))))
378+
return true;
379+
}
380+
}
381+
}
382+
}
383+
}
384+
}
385+
}
386+
return false;
387+
}
388+
326389
Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
327390
Node* progress = ConstraintCastNode::Ideal(phase, can_reshape);
328391
if (progress != nullptr) {
@@ -352,7 +415,12 @@ Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
352415
}
353416
}
354417
}
355-
return optimize_integer_cast(phase, T_LONG);
418+
// If it's a cast created by PhaseIdealLoop::short_running_loop(), don't transform it until the counted loop is created
419+
// in next loop opts pass
420+
if (!can_reshape || !used_at_inner_loop_exit_test()) {
421+
return optimize_integer_cast(phase, T_LONG);
422+
}
423+
return nullptr;
356424
}
357425

358426
//------------------------------Value------------------------------------------

src/hotspot/share/opto/castnode.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,12 @@ class CastLLNode: public ConstraintCastNode {
138138
}
139139

140140
virtual const Type* Value(PhaseGVN* phase) const;
141+
142+
static bool is_inner_loop_backedge(ProjNode* proj);
143+
144+
static bool cmp_used_at_inner_loop_exit_test(CmpNode* cmp);
145+
bool used_at_inner_loop_exit_test() const;
146+
141147
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
142148
virtual int Opcode() const;
143149
virtual uint ideal_reg() const { return Op_RegL; }

src/hotspot/share/opto/graphKit.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4050,6 +4050,11 @@ void GraphKit::add_parse_predicate(Deoptimization::DeoptReason reason, const int
40504050
// Add Parse Predicates which serve as placeholders to create new Runtime Predicates above them. All
40514051
// Runtime Predicates inside a Runtime Predicate block share the same uncommon trap as the Parse Predicate.
40524052
void GraphKit::add_parse_predicates(int nargs) {
4053+
if (ShortRunningLongLoop) {
4054+
// Will narrow the limit down with a cast node. Predicates added later may depend on the cast so should be last when
4055+
// walking up from the loop.
4056+
add_parse_predicate(Deoptimization::Reason_short_running_long_loop, nargs);
4057+
}
40534058
if (UseLoopPredicate) {
40544059
add_parse_predicate(Deoptimization::Reason_predicate, nargs);
40554060
if (UseProfiledLoopPredicate) {

src/hotspot/share/opto/ifnode.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,7 @@ ParsePredicateNode::ParsePredicateNode(Node* control, Deoptimization::DeoptReaso
21782178
case Deoptimization::Reason_profile_predicate:
21792179
case Deoptimization::Reason_auto_vectorization_check:
21802180
case Deoptimization::Reason_loop_limit_check:
2181+
case Deoptimization::Reason_short_running_long_loop:
21812182
break;
21822183
default:
21832184
assert(false, "unsupported deoptimization reason for Parse Predicate");
@@ -2226,6 +2227,9 @@ void ParsePredicateNode::dump_spec(outputStream* st) const {
22262227
case Deoptimization::DeoptReason::Reason_loop_limit_check:
22272228
st->print("Loop_Limit_Check ");
22282229
break;
2230+
case Deoptimization::DeoptReason::Reason_short_running_long_loop:
2231+
st->print("Short_Running_Long_Loop ");
2232+
break;
22292233
default:
22302234
fatal("unknown kind");
22312235
}

src/hotspot/share/opto/loopPredicate.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,7 +1054,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
10541054
#ifdef ASSERT
10551055
const bool exact_trip_count = cl->has_exact_trip_count();
10561056
const uint trip_count = cl->trip_count();
1057-
loop->compute_trip_count(this);
1057+
loop->compute_trip_count(this, T_INT);
10581058
assert(exact_trip_count == cl->has_exact_trip_count() && trip_count == cl->trip_count(),
10591059
"should have computed trip count on Loop Predication entry");
10601060
#endif
@@ -1171,7 +1171,7 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree* loop) {
11711171
// Do nothing for iteration-splitted loops
11721172
return false;
11731173
}
1174-
loop->compute_trip_count(this);
1174+
loop->compute_trip_count(this, T_INT);
11751175
if (cl->trip_count() == 1) {
11761176
// Not worth to hoist checks out of a loop that is only run for one iteration since the checks are only going to
11771177
// be executed once anyway.

src/hotspot/share/opto/loopTransform.cpp

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,11 @@ void IdealLoopTree::record_for_igvn() {
9696
//------------------------------compute_exact_trip_count-----------------------
9797
// Compute loop trip count if possible. Do not recalculate trip count for
9898
// split loops (pre-main-post) which have their limits and inits behind Opaque node.
99-
void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
100-
if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) {
99+
void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase, BasicType loop_bt) {
100+
if (!_head->as_Loop()->is_valid_counted_loop(loop_bt)) {
101101
return;
102102
}
103-
CountedLoopNode* cl = _head->as_CountedLoop();
103+
BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
104104
// Trip count may become nonexact for iteration split loops since
105105
// RCE modifies limits. Note, _trip_count value is not reset since
106106
// it is used to limit unrolling of main loop.
@@ -119,24 +119,62 @@ void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
119119
Node* init_n = cl->init_trip();
120120
Node* limit_n = cl->limit();
121121
if (init_n != nullptr && limit_n != nullptr) {
122-
// Use longs to avoid integer overflow.
123-
int stride_con = cl->stride_con();
124-
const TypeInt* init_type = phase->_igvn.type(init_n)->is_int();
125-
const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
126-
jlong init_con = (stride_con > 0) ? init_type->_lo : init_type->_hi;
127-
jlong limit_con = (stride_con > 0) ? limit_type->_hi : limit_type->_lo;
128-
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
129-
jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
122+
jlong stride_con = cl->stride_con();
123+
const TypeInteger* init_type = phase->_igvn.type(init_n)->is_integer(loop_bt);
124+
const TypeInteger* limit_type = phase->_igvn.type(limit_n)->is_integer(loop_bt);
125+
126+
// compute trip count
127+
// It used to be computed as:
128+
// max(1, limit_con - init_con + stride_m) / stride_con
129+
// with stride_m = stride_con - (stride_con > 0 ? 1 : -1)
130+
// for int counted loops only and by promoting all values to long to avoid overflow
131+
// This implements the computation for int and long counted loops in a way that promotion to the next larger integer
132+
// type is not needed to protect against overflow.
133+
//
134+
// Use unsigned longs to avoid overflow: number of iteration is a positive number but can be really large for
135+
// instance if init_con = min_jint, limit_con = max_jint
136+
jlong init_con = (stride_con > 0) ? init_type->lo_as_long() : init_type->hi_as_long();
137+
julong uinit_con = init_con;
138+
jlong limit_con = (stride_con > 0) ? limit_type->hi_as_long() : limit_type->lo_as_long();
139+
julong ulimit_con = limit_con;
130140
// The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
131141
// init <= limit (for stride_con < 0).
132-
trip_count = MAX2(trip_count, (jlong)1);
133-
if (trip_count < (jlong)max_juint) {
142+
julong udiff = 1;
143+
if (stride_con > 0 && limit_con > init_con) {
144+
udiff = ulimit_con - uinit_con;
145+
} else if (stride_con < 0 && limit_con < init_con) {
146+
udiff = uinit_con - ulimit_con;
147+
}
148+
// The loop runs for one more iteration if the limit is (stride > 0 in this example):
149+
// init + k * stride + small_value, 0 < small_value < stride
150+
julong utrip_count = udiff / ABS(stride_con);
151+
if (utrip_count * ABS(stride_con) != udiff) {
152+
// Guaranteed to not overflow because it can only happen for ABS(stride) > 1 in which case, utrip_count can't be
153+
// max_juint/max_julong
154+
utrip_count++;
155+
}
156+
157+
#ifdef ASSERT
158+
if (loop_bt == T_INT) {
159+
// Use longs to avoid integer overflow.
160+
jlong init_con = (stride_con > 0) ? init_type->is_int()->_lo : init_type->is_int()->_hi;
161+
jlong limit_con = (stride_con > 0) ? limit_type->is_int()->_hi : limit_type->is_int()->_lo;
162+
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
163+
jlong trip_count = (limit_con - init_con + stride_m) / stride_con;
164+
// The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
165+
// init <= limit (for stride_con < 0).
166+
trip_count = MAX2(trip_count, (jlong)1);
167+
assert(checked_cast<juint>(trip_count) == checked_cast<juint>(utrip_count), "incorrect trip count computation");
168+
}
169+
#endif
170+
171+
if (utrip_count < max_unsigned_integer(loop_bt)) {
134172
if (init_n->is_Con() && limit_n->is_Con()) {
135173
// Set exact trip count.
136-
cl->set_exact_trip_count((uint)trip_count);
137-
} else if (cl->unrolled_count() == 1) {
174+
cl->set_exact_trip_count(utrip_count);
175+
} else if (loop_bt == T_LONG || cl->as_CountedLoop()->unrolled_count() == 1) {
138176
// Set maximum trip count before unrolling.
139-
cl->set_trip_count((uint)trip_count);
177+
cl->set_trip_count(utrip_count);
140178
}
141179
}
142180
}
@@ -1851,7 +1889,7 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
18511889
#ifndef PRODUCT
18521890
if (TraceLoopOpts) {
18531891
if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
1854-
tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
1892+
tty->print("Unroll %d(" JULONG_FORMAT_W(2) ") ", loop_head->unrolled_count()*2, loop_head->trip_count());
18551893
} else {
18561894
tty->print("Unroll %d ", loop_head->unrolled_count()*2);
18571895
}
@@ -2104,7 +2142,7 @@ void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new
21042142
assert(cl->trip_count() > 0, "");
21052143
#ifndef PRODUCT
21062144
if (TraceLoopOpts) {
2107-
tty->print("MaxUnroll %d ", cl->trip_count());
2145+
tty->print("MaxUnroll " JULONG_FORMAT " ", cl->trip_count());
21082146
loop->dump_head();
21092147
}
21102148
#endif
@@ -3359,7 +3397,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
33593397
return false;
33603398
}
33613399
// Compute loop trip count if possible.
3362-
compute_trip_count(phase);
3400+
compute_trip_count(phase, T_INT);
33633401

33643402
// Convert one-iteration loop into normal code.
33653403
if (do_one_iteration_loop(phase)) {

0 commit comments

Comments
 (0)