@@ -131,7 +131,7 @@ source %{
131
131
// These operations are not profitable to be vectorized on NEON, because no direct
132
132
// NEON instructions support them. But the match rule support for them is profitable for
133
133
// Vector API intrinsics.
134
- if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
134
+ if ((opcode == Op_VectorCastD2X && ( bt == T_INT || bt == T_SHORT) ) ||
135
135
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
136
136
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
137
137
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
@@ -189,6 +189,18 @@ source %{
189
189
return false;
190
190
}
191
191
break;
192
+ case Op_AddReductionVI:
193
+ case Op_AndReductionV:
194
+ case Op_OrReductionV:
195
+ case Op_XorReductionV:
196
+ case Op_MinReductionV:
197
+ case Op_MaxReductionV:
198
+ // Reductions with less than 8 bytes vector length are
199
+ // not supported.
200
+ if (length_in_bytes < 8) {
201
+ return false;
202
+ }
203
+ break;
192
204
case Op_MulReductionVD:
193
205
case Op_MulReductionVF:
194
206
case Op_MulReductionVI:
@@ -4244,8 +4256,8 @@ instruct vzeroExtStoX(vReg dst, vReg src) %{
4244
4256
assert(bt == T_INT || bt == T_LONG, "must be");
4245
4257
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
4246
4258
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4247
- // 4S to 4I
4248
- __ neon_vector_extend($dst$$FloatRegister, T_INT , length_in_bytes,
4259
+ // 2S to 2I/2L, 4S to 4I
4260
+ __ neon_vector_extend($dst$$FloatRegister, bt , length_in_bytes,
4249
4261
$src$$FloatRegister, T_SHORT, /* is_unsigned */ true);
4250
4262
} else {
4251
4263
assert(UseSVE > 0, "must be sve");
@@ -4265,11 +4277,11 @@ instruct vzeroExtItoX(vReg dst, vReg src) %{
4265
4277
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
4266
4278
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4267
4279
// 2I to 2L
4268
- __ neon_vector_extend($dst$$FloatRegister, T_LONG , length_in_bytes,
4280
+ __ neon_vector_extend($dst$$FloatRegister, bt , length_in_bytes,
4269
4281
$src$$FloatRegister, T_INT, /* is_unsigned */ true);
4270
4282
} else {
4271
4283
assert(UseSVE > 0, "must be sve");
4272
- __ sve_vector_extend($dst$$FloatRegister, __ D ,
4284
+ __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt) ,
4273
4285
$src$$FloatRegister, __ S, /* is_unsigned */ true);
4274
4286
}
4275
4287
%}
@@ -4343,11 +4355,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{
4343
4355
BasicType bt = Matcher::vector_element_basic_type(this);
4344
4356
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
4345
4357
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4346
- // 4S to 4I/4F
4347
- __ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
4348
- $src$$FloatRegister, T_SHORT);
4349
- if (bt == T_FLOAT) {
4350
- __ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister);
4358
+ if (is_floating_point_type(bt)) {
4359
+ // 2S to 2F/2D, 4S to 4F
4360
+ __ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG,
4361
+ length_in_bytes, $src$$FloatRegister, T_SHORT);
4362
+ __ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister);
4363
+ } else {
4364
+ // 2S to 2I/2L, 4S to 4I
4365
+ __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
4366
+ $src$$FloatRegister, T_SHORT);
4351
4367
}
4352
4368
} else {
4353
4369
assert(UseSVE > 0, "must be sve");
@@ -4371,7 +4387,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{
4371
4387
effect(TEMP_DEF dst);
4372
4388
format %{ "vcvtItoX_narrow_neon $dst, $src" %}
4373
4389
ins_encode %{
4374
- // 4I to 4B/4S
4390
+ // 2I to 2S, 4I to 4B/4S
4375
4391
BasicType bt = Matcher::vector_element_basic_type(this);
4376
4392
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4377
4393
__ neon_vector_narrow($dst$$FloatRegister, bt,
@@ -4434,28 +4450,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{
4434
4450
4435
4451
// VectorCastL2X
4436
4452
4437
- instruct vcvtLtoI_neon(vReg dst, vReg src) %{
4438
- predicate(Matcher::vector_element_basic_type(n) == T_INT &&
4453
+ instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{
4454
+ predicate((Matcher::vector_element_basic_type(n) == T_INT ||
4455
+ Matcher::vector_element_basic_type(n) == T_SHORT) &&
4439
4456
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
4440
4457
match(Set dst (VectorCastL2X src));
4441
- format %{ "vcvtLtoI_neon $dst, $src" %}
4458
+ format %{ "vcvtLtoX_narrow_neon $dst, $src" %}
4442
4459
ins_encode %{
4443
- // 2L to 2I
4460
+ // 2L to 2S/2I
4461
+ BasicType bt = Matcher::vector_element_basic_type(this);
4444
4462
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4445
- __ neon_vector_narrow($dst$$FloatRegister, T_INT ,
4463
+ __ neon_vector_narrow($dst$$FloatRegister, bt ,
4446
4464
$src$$FloatRegister, T_LONG, length_in_bytes);
4447
4465
%}
4448
4466
ins_pipe(pipe_slow);
4449
4467
%}
4450
4468
4451
- instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{
4452
- predicate((Matcher::vector_element_basic_type(n) == T_INT &&
4453
- !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) ||
4454
- Matcher::vector_element_basic_type(n) == T_BYTE ||
4455
- Matcher::vector_element_basic_type(n) == T_SHORT);
4469
+ instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{
4470
+ predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) &&
4471
+ !is_floating_point_type(Matcher::vector_element_basic_type(n)) &&
4472
+ type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
4456
4473
match(Set dst (VectorCastL2X src));
4457
4474
effect(TEMP_DEF dst, TEMP tmp);
4458
- format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %}
4475
+ format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %}
4459
4476
ins_encode %{
4460
4477
assert(UseSVE > 0, "must be sve");
4461
4478
BasicType bt = Matcher::vector_element_basic_type(this);
@@ -4521,10 +4538,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{
4521
4538
effect(TEMP_DEF dst);
4522
4539
format %{ "vcvtFtoX_narrow_neon $dst, $src" %}
4523
4540
ins_encode %{
4524
- // 4F to 4B/4S
4541
+ // 2F to 2S, 4F to 4B/4S
4525
4542
BasicType bt = Matcher::vector_element_basic_type(this);
4526
4543
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4527
- __ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister);
4544
+ __ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S,
4545
+ $src$$FloatRegister);
4528
4546
__ neon_vector_narrow($dst$$FloatRegister, bt,
4529
4547
$dst$$FloatRegister, T_INT, length_in_bytes);
4530
4548
%}
@@ -4590,19 +4608,25 @@ instruct vcvtFtoX(vReg dst, vReg src) %{
4590
4608
// VectorCastD2X
4591
4609
4592
4610
instruct vcvtDtoI_neon(vReg dst, vReg src) %{
4593
- predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT);
4611
+ predicate(UseSVE == 0 &&
4612
+ (Matcher::vector_element_basic_type(n) == T_INT ||
4613
+ Matcher::vector_element_basic_type(n) == T_SHORT));
4594
4614
match(Set dst (VectorCastD2X src));
4595
4615
effect(TEMP_DEF dst);
4596
- format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %}
4616
+ format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/ 2I" %}
4597
4617
ins_encode %{
4598
- // 2D to 2I
4618
+ // 2D to 2S/ 2I
4599
4619
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1);
4600
4620
// We can't use fcvtzs(vector, integer) instruction here because we need
4601
4621
// saturation arithmetic. See JDK-8276151.
4602
4622
__ fcvtzdw(rscratch1, $src$$FloatRegister);
4603
4623
__ fcvtzdw(rscratch2, $dst$$FloatRegister);
4604
4624
__ fmovs($dst$$FloatRegister, rscratch1);
4605
4625
__ mov($dst$$FloatRegister, __ S, 1, rscratch2);
4626
+ if (Matcher::vector_element_basic_type(this) == T_SHORT) {
4627
+ __ neon_vector_narrow($dst$$FloatRegister, T_SHORT,
4628
+ $dst$$FloatRegister, T_INT, 8);
4629
+ }
4606
4630
%}
4607
4631
ins_pipe(pipe_slow);
4608
4632
%}
@@ -4676,7 +4700,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
4676
4700
ins_encode %{
4677
4701
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
4678
4702
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4679
- // 4HF to 4F
4703
+ // 2HF to 2F, 4HF to 4F
4680
4704
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
4681
4705
} else {
4682
4706
assert(UseSVE > 0, "must be sve");
@@ -4692,9 +4716,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
4692
4716
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
4693
4717
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
4694
4718
match(Set dst (VectorCastF2HF src));
4695
- format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
4719
+ format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/ 4F to 2HF/ 4HF" %}
4696
4720
ins_encode %{
4697
- // 4F to 4HF
4721
+ // 2F to 2HF, 4F to 4HF
4698
4722
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
4699
4723
%}
4700
4724
ins_pipe(pipe_slow);
@@ -6396,14 +6420,12 @@ instruct vpopcountI(vReg dst, vReg src) %{
6396
6420
} else {
6397
6421
assert(bt == T_SHORT || bt == T_INT, "unsupported");
6398
6422
if (UseSVE == 0) {
6399
- assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
6400
- __ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6401
- $src$$FloatRegister);
6402
- __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6403
- $dst$$FloatRegister);
6423
+ assert(length_in_bytes <= 16, "unsupported");
6424
+ bool isQ = length_in_bytes == 16;
6425
+ __ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister);
6426
+ __ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister);
6404
6427
if (bt == T_INT) {
6405
- __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H,
6406
- $dst$$FloatRegister);
6428
+ __ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister);
6407
6429
}
6408
6430
} else {
6409
6431
__ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
@@ -6465,7 +6487,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{
6465
6487
format %{ "vblend_neon $dst, $src1, $src2" %}
6466
6488
ins_encode %{
6467
6489
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
6468
- assert(length_in_bytes == 8 || length_in_bytes = = 16, "must be");
6490
+ assert(length_in_bytes < = 16, "must be");
6469
6491
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6470
6492
$src2$$FloatRegister, $src1$$FloatRegister);
6471
6493
%}
@@ -6852,7 +6874,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
6852
6874
} else {
6853
6875
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
6854
6876
if (UseSVE == 0) {
6855
- assert(length_in_bytes == 8 || length_in_bytes = = 16, "unsupported");
6877
+ assert(length_in_bytes < = 16, "unsupported");
6856
6878
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
6857
6879
bt, /* isQ */ length_in_bytes == 16);
6858
6880
if (bt != T_LONG) {
@@ -6911,7 +6933,7 @@ instruct vreverse(vReg dst, vReg src) %{
6911
6933
} else {
6912
6934
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
6913
6935
if (UseSVE == 0) {
6914
- assert(length_in_bytes == 8 || length_in_bytes = = 16, "unsupported");
6936
+ assert(length_in_bytes < = 16, "unsupported");
6915
6937
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
6916
6938
bt, /* isQ */ length_in_bytes == 16);
6917
6939
} else {
@@ -6947,7 +6969,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{
6947
6969
BasicType bt = Matcher::vector_element_basic_type(this);
6948
6970
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
6949
6971
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
6950
- assert(length_in_bytes == 8 || length_in_bytes = = 16, "unsupported");
6972
+ assert(length_in_bytes < = 16, "unsupported");
6951
6973
if (bt == T_BYTE) {
6952
6974
if ($dst$$FloatRegister != $src$$FloatRegister) {
6953
6975
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
0 commit comments