Skip to content

Commit ac141c2

Browse files
author
Xiaohong Gong
committed
8359419: AArch64: Relax min vector length to 32-bit for short vectors
Reviewed-by: aph, fgao, bkilambi, dlunden
1 parent ed70910 commit ac141c2

File tree

10 files changed

+350
-128
lines changed

10 files changed

+350
-128
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,17 +2362,34 @@ int Matcher::max_vector_size(const BasicType bt) {
23622362
}
23632363

23642364
int Matcher::min_vector_size(const BasicType bt) {
2365-
int max_size = max_vector_size(bt);
2366-
// Limit the min vector size to 8 bytes.
2367-
int size = 8 / type2aelembytes(bt);
2368-
if (bt == T_BYTE) {
2369-
// To support vector api shuffle/rearrange.
2370-
size = 4;
2371-
} else if (bt == T_BOOLEAN) {
2372-
// To support vector api load/store mask.
2373-
size = 2;
2365+
// Usually, the shortest vector length supported by AArch64 ISA and
2366+
// Vector API species is 64 bits. However, we allow 32-bit or 16-bit
2367+
// vectors in a few special cases.
2368+
int size;
2369+
switch(bt) {
2370+
case T_BOOLEAN:
2371+
// Load/store a vector mask with only 2 elements for vector types
2372+
// such as "2I/2F/2L/2D".
2373+
size = 2;
2374+
break;
2375+
case T_BYTE:
2376+
// Generate a "4B" vector, to support vector cast between "8B/16B"
2377+
// and "4S/4I/4L/4F/4D".
2378+
size = 4;
2379+
break;
2380+
case T_SHORT:
2381+
// Generate a "2S" vector, to support vector cast between "4S/8S"
2382+
// and "2I/2L/2F/2D".
2383+
size = 2;
2384+
break;
2385+
default:
2386+
// Limit the min vector length to 64-bit.
2387+
size = 8 / type2aelembytes(bt);
2388+
// The number of elements in a vector should be at least 2.
2389+
size = MAX2(size, 2);
23742390
}
2375-
if (size < 2) size = 2;
2391+
2392+
int max_size = max_vector_size(bt);
23762393
return MIN2(size, max_size);
23772394
}
23782395

src/hotspot/cpu/aarch64/aarch64_vector.ad

Lines changed: 63 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ source %{
131131
// These operations are not profitable to be vectorized on NEON, because no direct
132132
// NEON instructions support them. But the match rule support for them is profitable for
133133
// Vector API intrinsics.
134-
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
134+
if ((opcode == Op_VectorCastD2X && (bt == T_INT || bt == T_SHORT)) ||
135135
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
136136
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
137137
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
@@ -189,6 +189,18 @@ source %{
189189
return false;
190190
}
191191
break;
192+
case Op_AddReductionVI:
193+
case Op_AndReductionV:
194+
case Op_OrReductionV:
195+
case Op_XorReductionV:
196+
case Op_MinReductionV:
197+
case Op_MaxReductionV:
198+
// Reductions with less than 8 bytes vector length are
199+
// not supported.
200+
if (length_in_bytes < 8) {
201+
return false;
202+
}
203+
break;
192204
case Op_MulReductionVD:
193205
case Op_MulReductionVF:
194206
case Op_MulReductionVI:
@@ -4244,8 +4256,8 @@ instruct vzeroExtStoX(vReg dst, vReg src) %{
42444256
assert(bt == T_INT || bt == T_LONG, "must be");
42454257
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
42464258
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4247-
// 4S to 4I
4248-
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
4259+
// 2S to 2I/2L, 4S to 4I
4260+
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
42494261
$src$$FloatRegister, T_SHORT, /* is_unsigned */ true);
42504262
} else {
42514263
assert(UseSVE > 0, "must be sve");
@@ -4265,11 +4277,11 @@ instruct vzeroExtItoX(vReg dst, vReg src) %{
42654277
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
42664278
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
42674279
// 2I to 2L
4268-
__ neon_vector_extend($dst$$FloatRegister, T_LONG, length_in_bytes,
4280+
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
42694281
$src$$FloatRegister, T_INT, /* is_unsigned */ true);
42704282
} else {
42714283
assert(UseSVE > 0, "must be sve");
4272-
__ sve_vector_extend($dst$$FloatRegister, __ D,
4284+
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
42734285
$src$$FloatRegister, __ S, /* is_unsigned */ true);
42744286
}
42754287
%}
@@ -4343,11 +4355,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{
43434355
BasicType bt = Matcher::vector_element_basic_type(this);
43444356
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
43454357
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4346-
// 4S to 4I/4F
4347-
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
4348-
$src$$FloatRegister, T_SHORT);
4349-
if (bt == T_FLOAT) {
4350-
__ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister);
4358+
if (is_floating_point_type(bt)) {
4359+
// 2S to 2F/2D, 4S to 4F
4360+
__ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG,
4361+
length_in_bytes, $src$$FloatRegister, T_SHORT);
4362+
__ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister);
4363+
} else {
4364+
// 2S to 2I/2L, 4S to 4I
4365+
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
4366+
$src$$FloatRegister, T_SHORT);
43514367
}
43524368
} else {
43534369
assert(UseSVE > 0, "must be sve");
@@ -4371,7 +4387,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{
43714387
effect(TEMP_DEF dst);
43724388
format %{ "vcvtItoX_narrow_neon $dst, $src" %}
43734389
ins_encode %{
4374-
// 4I to 4B/4S
4390+
// 2I to 2S, 4I to 4B/4S
43754391
BasicType bt = Matcher::vector_element_basic_type(this);
43764392
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
43774393
__ neon_vector_narrow($dst$$FloatRegister, bt,
@@ -4434,28 +4450,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{
44344450

44354451
// VectorCastL2X
44364452

4437-
instruct vcvtLtoI_neon(vReg dst, vReg src) %{
4438-
predicate(Matcher::vector_element_basic_type(n) == T_INT &&
4453+
instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{
4454+
predicate((Matcher::vector_element_basic_type(n) == T_INT ||
4455+
Matcher::vector_element_basic_type(n) == T_SHORT) &&
44394456
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
44404457
match(Set dst (VectorCastL2X src));
4441-
format %{ "vcvtLtoI_neon $dst, $src" %}
4458+
format %{ "vcvtLtoX_narrow_neon $dst, $src" %}
44424459
ins_encode %{
4443-
// 2L to 2I
4460+
// 2L to 2S/2I
4461+
BasicType bt = Matcher::vector_element_basic_type(this);
44444462
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4445-
__ neon_vector_narrow($dst$$FloatRegister, T_INT,
4463+
__ neon_vector_narrow($dst$$FloatRegister, bt,
44464464
$src$$FloatRegister, T_LONG, length_in_bytes);
44474465
%}
44484466
ins_pipe(pipe_slow);
44494467
%}
44504468

4451-
instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{
4452-
predicate((Matcher::vector_element_basic_type(n) == T_INT &&
4453-
!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) ||
4454-
Matcher::vector_element_basic_type(n) == T_BYTE ||
4455-
Matcher::vector_element_basic_type(n) == T_SHORT);
4469+
instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{
4470+
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) &&
4471+
!is_floating_point_type(Matcher::vector_element_basic_type(n)) &&
4472+
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
44564473
match(Set dst (VectorCastL2X src));
44574474
effect(TEMP_DEF dst, TEMP tmp);
4458-
format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %}
4475+
format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %}
44594476
ins_encode %{
44604477
assert(UseSVE > 0, "must be sve");
44614478
BasicType bt = Matcher::vector_element_basic_type(this);
@@ -4521,10 +4538,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{
45214538
effect(TEMP_DEF dst);
45224539
format %{ "vcvtFtoX_narrow_neon $dst, $src" %}
45234540
ins_encode %{
4524-
// 4F to 4B/4S
4541+
// 2F to 2S, 4F to 4B/4S
45254542
BasicType bt = Matcher::vector_element_basic_type(this);
45264543
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4527-
__ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister);
4544+
__ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S,
4545+
$src$$FloatRegister);
45284546
__ neon_vector_narrow($dst$$FloatRegister, bt,
45294547
$dst$$FloatRegister, T_INT, length_in_bytes);
45304548
%}
@@ -4590,19 +4608,25 @@ instruct vcvtFtoX(vReg dst, vReg src) %{
45904608
// VectorCastD2X
45914609

45924610
instruct vcvtDtoI_neon(vReg dst, vReg src) %{
4593-
predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT);
4611+
predicate(UseSVE == 0 &&
4612+
(Matcher::vector_element_basic_type(n) == T_INT ||
4613+
Matcher::vector_element_basic_type(n) == T_SHORT));
45944614
match(Set dst (VectorCastD2X src));
45954615
effect(TEMP_DEF dst);
4596-
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %}
4616+
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/2I" %}
45974617
ins_encode %{
4598-
// 2D to 2I
4618+
// 2D to 2S/2I
45994619
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1);
46004620
// We can't use fcvtzs(vector, integer) instruction here because we need
46014621
// saturation arithmetic. See JDK-8276151.
46024622
__ fcvtzdw(rscratch1, $src$$FloatRegister);
46034623
__ fcvtzdw(rscratch2, $dst$$FloatRegister);
46044624
__ fmovs($dst$$FloatRegister, rscratch1);
46054625
__ mov($dst$$FloatRegister, __ S, 1, rscratch2);
4626+
if (Matcher::vector_element_basic_type(this) == T_SHORT) {
4627+
__ neon_vector_narrow($dst$$FloatRegister, T_SHORT,
4628+
$dst$$FloatRegister, T_INT, 8);
4629+
}
46064630
%}
46074631
ins_pipe(pipe_slow);
46084632
%}
@@ -4676,7 +4700,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
46764700
ins_encode %{
46774701
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
46784702
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4679-
// 4HF to 4F
4703+
// 2HF to 2F, 4HF to 4F
46804704
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
46814705
} else {
46824706
assert(UseSVE > 0, "must be sve");
@@ -4692,9 +4716,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
46924716
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
46934717
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
46944718
match(Set dst (VectorCastF2HF src));
4695-
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
4719+
format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/4F to 2HF/4HF" %}
46964720
ins_encode %{
4697-
// 4F to 4HF
4721+
// 2F to 2HF, 4F to 4HF
46984722
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
46994723
%}
47004724
ins_pipe(pipe_slow);
@@ -6396,14 +6420,12 @@ instruct vpopcountI(vReg dst, vReg src) %{
63966420
} else {
63976421
assert(bt == T_SHORT || bt == T_INT, "unsupported");
63986422
if (UseSVE == 0) {
6399-
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
6400-
__ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6401-
$src$$FloatRegister);
6402-
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6403-
$dst$$FloatRegister);
6423+
assert(length_in_bytes <= 16, "unsupported");
6424+
bool isQ = length_in_bytes == 16;
6425+
__ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister);
6426+
__ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister);
64046427
if (bt == T_INT) {
6405-
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H,
6406-
$dst$$FloatRegister);
6428+
__ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister);
64076429
}
64086430
} else {
64096431
__ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
@@ -6465,7 +6487,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{
64656487
format %{ "vblend_neon $dst, $src1, $src2" %}
64666488
ins_encode %{
64676489
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
6468-
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
6490+
assert(length_in_bytes <= 16, "must be");
64696491
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
64706492
$src2$$FloatRegister, $src1$$FloatRegister);
64716493
%}
@@ -6852,7 +6874,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
68526874
} else {
68536875
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
68546876
if (UseSVE == 0) {
6855-
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
6877+
assert(length_in_bytes <= 16, "unsupported");
68566878
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
68576879
bt, /* isQ */ length_in_bytes == 16);
68586880
if (bt != T_LONG) {
@@ -6911,7 +6933,7 @@ instruct vreverse(vReg dst, vReg src) %{
69116933
} else {
69126934
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
69136935
if (UseSVE == 0) {
6914-
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
6936+
assert(length_in_bytes <= 16, "unsupported");
69156937
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
69166938
bt, /* isQ */ length_in_bytes == 16);
69176939
} else {
@@ -6947,7 +6969,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{
69476969
BasicType bt = Matcher::vector_element_basic_type(this);
69486970
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
69496971
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
6950-
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
6972+
assert(length_in_bytes <= 16, "unsupported");
69516973
if (bt == T_BYTE) {
69526974
if ($dst$$FloatRegister != $src$$FloatRegister) {
69536975
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,

0 commit comments

Comments
 (0)