Skip to content

Commit 70df8f6

Browse files
committed
re-enable direct bitcasts for Int/Float vector transmutes (but not ones involving pointers)
1 parent af1b680 commit 70df8f6

File tree

2 files changed

+196
-0
lines changed

2 files changed

+196
-0
lines changed

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
253253
return OperandValue::poison(bx, cast);
254254
}
255255

256+
// To or from pointers takes different methods, so we use this to restrict
257+
// the SimdVector case to types which can be `bitcast` between each other.
258+
#[inline]
259+
fn vector_can_bitcast(x: abi::Scalar) -> bool {
260+
matches!(
261+
x,
262+
abi::Scalar::Initialized {
263+
value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
264+
..
265+
}
266+
)
267+
}
268+
256269
let cx = bx.cx();
257270
match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
258271
_ if cast.is_zst() => OperandValue::ZeroSized,
@@ -269,6 +282,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
269282
) if from_scalar.size(cx) == to_scalar.size(cx) => {
270283
OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
271284
}
285+
(
286+
OperandValue::Immediate(imm),
287+
abi::BackendRepr::SimdVector { element: from_scalar, .. },
288+
abi::BackendRepr::SimdVector { element: to_scalar, .. },
289+
) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
290+
let to_backend_ty = bx.cx().immediate_backend_type(cast);
291+
OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
292+
}
272293
(
273294
OperandValue::Pair(imm_a, imm_b),
274295
abi::BackendRepr::ScalarPair(in_a, in_b),
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
2+
//@ only-64bit (so I don't need to worry about usize)
3+
4+
#![crate_type = "lib"]
5+
#![feature(core_intrinsics)]
6+
#![feature(repr_simd)]
7+
8+
use std::intrinsics::transmute;
9+
10+
#[repr(simd)]
11+
pub struct I32X4([i32; 4]);
12+
#[repr(simd)]
13+
pub struct I64X2([i64; 2]);
14+
#[repr(simd)]
15+
pub struct F32X4([f32; 4]);
16+
#[repr(simd)]
17+
pub struct F64X2([f64; 2]);
18+
#[repr(simd)]
19+
pub struct PtrX2([*const (); 2]);
20+
21+
// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v)
22+
#[no_mangle]
23+
pub extern "sysv64" fn mixed_int(v: I32X4) -> I64X2 {
24+
// CHECK-NOT: alloca
25+
// CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64>
26+
// CHECK: ret <2 x i64> %[[RET]]
27+
unsafe { transmute(v) }
28+
}
29+
30+
// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v)
31+
#[no_mangle]
32+
pub extern "sysv64" fn mixed_float(v: F32X4) -> F64X2 {
33+
// CHECK-NOT: alloca
34+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double>
35+
// CHECK: ret <2 x double> %[[RET]]
36+
unsafe { transmute(v) }
37+
}
38+
39+
// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v)
40+
#[no_mangle]
41+
pub extern "sysv64" fn float_int_same_lanes(v: F32X4) -> I32X4 {
42+
// CHECK-NOT: alloca
43+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32>
44+
// CHECK: ret <4 x i32> %[[RET]]
45+
unsafe { transmute(v) }
46+
}
47+
48+
// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v)
49+
#[no_mangle]
50+
pub extern "sysv64" fn int_float_same_lanes(v: I64X2) -> F64X2 {
51+
// CHECK-NOT: alloca
52+
// CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double>
53+
// CHECK: ret <2 x double> %[[RET]]
54+
unsafe { transmute(v) }
55+
}
56+
57+
// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v)
58+
#[no_mangle]
59+
pub extern "sysv64" fn float_int_widen(v: F32X4) -> I64X2 {
60+
// CHECK-NOT: alloca
61+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64>
62+
// CHECK: ret <2 x i64> %[[RET]]
63+
unsafe { transmute(v) }
64+
}
65+
66+
// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v)
67+
#[no_mangle]
68+
pub extern "sysv64" fn int_float_widen(v: I32X4) -> F64X2 {
69+
// CHECK-NOT: alloca
70+
// CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double>
71+
// CHECK: ret <2 x double> %[[RET]]
72+
unsafe { transmute(v) }
73+
}
74+
75+
// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v)
76+
#[no_mangle]
77+
pub extern "sysv64" fn float_int_narrow(v: F64X2) -> I32X4 {
78+
// CHECK-NOT: alloca
79+
// CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32>
80+
// CHECK: ret <4 x i32> %[[RET]]
81+
unsafe { transmute(v) }
82+
}
83+
84+
// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v)
85+
#[no_mangle]
86+
pub extern "sysv64" fn int_float_narrow(v: I64X2) -> F32X4 {
87+
// CHECK-NOT: alloca
88+
// CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float>
89+
// CHECK: ret <4 x float> %[[RET]]
90+
unsafe { transmute(v) }
91+
}
92+
93+
// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v)
94+
#[no_mangle]
95+
pub extern "sysv64" fn float_ptr_same_lanes(v: F64X2) -> PtrX2 {
96+
// CHECK-NOT: alloca
97+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
98+
// CHECK-NOT: alloca
99+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
100+
// CHECK: store <2 x double> %v, ptr %[[TEMP]]
101+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
102+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
103+
// CHECK: ret <2 x ptr> %[[RET]]
104+
unsafe { transmute(v) }
105+
}
106+
107+
// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v)
108+
#[no_mangle]
109+
pub extern "sysv64" fn ptr_float_same_lanes(v: PtrX2) -> F64X2 {
110+
// CHECK-NOT: alloca
111+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
112+
// CHECK-NOT: alloca
113+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
114+
// CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
115+
// CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]]
116+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
117+
// CHECK: ret <2 x double> %[[RET]]
118+
unsafe { transmute(v) }
119+
}
120+
121+
// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v)
122+
#[no_mangle]
123+
pub extern "sysv64" fn int_ptr_same_lanes(v: I64X2) -> PtrX2 {
124+
// CHECK-NOT: alloca
125+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
126+
// CHECK-NOT: alloca
127+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
128+
// CHECK: store <2 x i64> %v, ptr %[[TEMP]]
129+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
130+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
131+
// CHECK: ret <2 x ptr> %[[RET]]
132+
unsafe { transmute(v) }
133+
}
134+
135+
// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v)
136+
#[no_mangle]
137+
pub extern "sysv64" fn ptr_int_same_lanes(v: PtrX2) -> I64X2 {
138+
// CHECK-NOT: alloca
139+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
140+
// CHECK-NOT: alloca
141+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
142+
// CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
143+
// CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]]
144+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
145+
// CHECK: ret <2 x i64> %[[RET]]
146+
unsafe { transmute(v) }
147+
}
148+
149+
// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v)
150+
#[no_mangle]
151+
pub extern "sysv64" fn float_ptr_widen(v: F32X4) -> PtrX2 {
152+
// CHECK-NOT: alloca
153+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
154+
// CHECK-NOT: alloca
155+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
156+
// CHECK: store <4 x float> %v, ptr %[[TEMP]]
157+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
158+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
159+
// CHECK: ret <2 x ptr> %[[RET]]
160+
unsafe { transmute(v) }
161+
}
162+
163+
// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v)
164+
#[no_mangle]
165+
pub extern "sysv64" fn int_ptr_widen(v: I32X4) -> PtrX2 {
166+
// CHECK-NOT: alloca
167+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
168+
// CHECK-NOT: alloca
169+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
170+
// CHECK: store <4 x i32> %v, ptr %[[TEMP]]
171+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
172+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
173+
// CHECK: ret <2 x ptr> %[[RET]]
174+
unsafe { transmute(v) }
175+
}

0 commit comments

Comments
 (0)