diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 72401a7a259c0..e0ce3d1dcb620 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -40,6 +40,8 @@ class WebAssemblyTargetLowering final : public TargetLowering { MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override; MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override; + bool softPromoteHalfType() const override { return true; } + private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll index f4ea5b5b30621..9cd10a719f285 100644 --- a/llvm/test/CodeGen/Generic/half.ll +++ b/llvm/test/CodeGen/Generic/half.ll @@ -40,7 +40,7 @@ ; RUN: %if spirv-registered-target %{ llc %s -o - -mtriple=spirv-unknown-unknown | FileCheck %s --check-prefixes=NOCRASH %} ; RUN: %if systemz-registered-target %{ llc %s -o - -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if ve-registered-target %{ llc %s -o - -mtriple=ve-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD %} -; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD %} +; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll similarity index 99% rename from llvm/test/CodeGen/WebAssembly/half-precision.ll rename to llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll index 4e8ff5955c63b..8033ec5d310fa 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128 | FileCheck %s +; Tests for `llvm.wasm.*.*f16` intrinsics + declare float @llvm.wasm.loadf32.f16(ptr) declare void @llvm.wasm.storef16.f32(float, ptr) diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll index b67c0c16d4651..3c31d55abfadc 100644 --- a/llvm/test/CodeGen/WebAssembly/f16.ll +++ b/llvm/test/CodeGen/WebAssembly/f16.ll @@ -1,69 +1,627 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; Test that f16 is expanded. +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s --check-prefixes=ALL,DEFISEL +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes=ALL,FASTISEL + +; Tests for various operations on half precison float. Much of the test is +; copied from test/CodeGen/X86/half.ll. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: demote.f32: -; CHECK-NEXT: .functype demote.f32 (f32) -> (f32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: call $push[[L1:[0-9]+]]=, __truncsfhf2, $pop[[L0]]{{$}} -; CHECK-NEXT: call $push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[L2]]{{$}} -define half @demote.f32(float %f) { - %t = fptrunc float %f to half - ret half %t -} - -; CHECK-LABEL: promote.f32: -; CHECK-NEXT: .functype promote.f32 (f32) -> (f32){{$}} -; CHECK-NEXT: local.get $push0=, 0{{$}} -; CHECK-NEXT: return $pop0{{$}} -define float @promote.f32(half %f) { - %t = fpext half %f to float - ret float %t -} - -; CHECK-LABEL: demote.f64: -; CHECK-NEXT: .functype demote.f64 (f64) -> (f32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: call $push[[L1:[0-9]+]]=, __truncdfhf2, $pop[[L0]]{{$}} -; CHECK-NEXT: call $push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[L2]]{{$}} -define half @demote.f64(double %f) { - %t = fptrunc double %f to half - ret half %t -} - -; CHECK-LABEL: promote.f64: -; CHECK-NEXT: .functype promote.f64 (f32) -> (f64){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.promote_f32 $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} -; CHECK-NEXT: return $pop[[L1]]{{$}} -define double @promote.f64(half %f) { - %t = fpext half %f to double - ret double %t -} - -; CHECK-LABEL: demote.f128: -; CHECK-NEXT: .functype demote.f128 (i64, i64) -> (f32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: call $push[[L2:[0-9]+]]=, __trunctfhf2, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: call $push[[L3:[0-9]+]]=, __extendhfsf2, $pop[[L2]]{{$}} -; CHECK-NEXT: return $pop[[L3]]{{$}} -define half @demote.f128(fp128 %f) { - %t = fptrunc fp128 %f to half - ret half %t -} - -; CHECK-LABEL: promote.f128: -; CHECK-NEXT: .functype promote.f128 (i32, f32) -> (){{$}} -; CHECK: call __extendsftf2 -; CHECK: i64.store -; CHECK: i64.store -define fp128 @promote.f128(half %f) { - %t = fpext half %f to fp128 - ret fp128 %t +define void @store(half %x, ptr %p) nounwind { +; ALL-LABEL: store: +; ALL: .functype store (i32, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: local.get $push0=, 0 +; ALL-NEXT: i32.store16 0($pop1), $pop0 +; ALL-NEXT: return + store half %x, ptr %p + ret void +} + +define half @return(ptr %p) nounwind { +; ALL-LABEL: return: +; ALL: .functype return (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: i32.load16_u $push0=, 0($pop1) +; ALL-NEXT: return $pop0 + %r = load half, ptr %p + ret half %r +} + +define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind { +; DEFISEL-LABEL: loadd: +; DEFISEL: .functype loadd (i32) -> (f64) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push3=, 0 +; DEFISEL-NEXT: i32.load16_u $push0=, 2($pop3) +; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 +; DEFISEL-NEXT: f64.promote_f32 $push2=, $pop1 +; DEFISEL-NEXT: return $pop2 +; +; FASTISEL-LABEL: loadd: +; FASTISEL: .functype loadd (i32) -> (f64) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push3=, 0 +; FASTISEL-NEXT: i32.load16_u $push2=, 2($pop3) +; FASTISEL-NEXT: call $push1=, __extendhfsf2, $pop2 +; FASTISEL-NEXT: f64.promote_f32 $push0=, $pop1 +; FASTISEL-NEXT: return $pop0 + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 + %x = load i16, ptr %arrayidx, align 2 + %ret = tail call double @llvm.convert.from.fp16.f64(i16 %x) + ret double %ret +} + +define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind { +; DEFISEL-LABEL: loadf: +; DEFISEL: .functype loadf (i32) -> (f32) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push2=, 0 +; DEFISEL-NEXT: i32.load16_u $push0=, 2($pop2) +; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 +; DEFISEL-NEXT: return $pop1 +; +; FASTISEL-LABEL: loadf: +; FASTISEL: .functype loadf (i32) -> (f32) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push2=, 0 +; FASTISEL-NEXT: i32.load16_u $push1=, 2($pop2) +; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop1 +; FASTISEL-NEXT: return $pop0 + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1 + %x = load i16, ptr %arrayidx, align 2 + %ret = tail call float @llvm.convert.from.fp16.f32(i16 %x) + ret float %ret +} + +define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { +; ALL-LABEL: stored: +; ALL: .functype stored (i32, f64) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: call $push0=, __truncdfhf2, $pop1 +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return + %x = tail call i16 @llvm.convert.to.fp16.f64(double %b) + store i16 %x, ptr %a, align 2 + ret void +} + +define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { +; ALL-LABEL: storef: +; ALL: .functype storef (i32, f32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: call $push0=, __truncsfhf2, $pop1 +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return + %x = tail call i16 @llvm.convert.to.fp16.f32(float %b) + store i16 %x, ptr %a, align 2 + ret void +} + +define void @test_load_store(ptr %in, ptr %out) nounwind { +; ALL-LABEL: test_load_store: +; ALL: .functype test_load_store (i32, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 1 +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: i32.load16_u $push0=, 0($pop1) +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return + %val = load half, ptr %in + store half %val, ptr %out + ret void +} + +define i16 @test_bitcast_from_half(ptr %addr) nounwind { +; ALL-LABEL: test_bitcast_from_half: +; ALL: .functype test_bitcast_from_half (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: i32.load16_u $push0=, 0($pop1) +; ALL-NEXT: return $pop0 + %val = load half, ptr %addr + %val_int = bitcast half %val to i16 + ret i16 %val_int +} + +define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind { +; ALL-LABEL: test_bitcast_to_half: +; ALL: .functype test_bitcast_to_half (i32, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: local.get $push0=, 1 +; ALL-NEXT: i32.store16 0($pop1), $pop0 +; ALL-NEXT: return + %val_fp = bitcast i16 %in to half + store half %val_fp, ptr %addr + ret void +} + +define half @from_bits(i16 %x) nounwind { +; ALL-LABEL: from_bits: +; ALL: .functype from_bits (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push0=, 0 +; ALL-NEXT: return $pop0 + %res = bitcast i16 %x to half + ret half %res +} + +define i16 @to_bits(half %x) nounwind { +; ALL-LABEL: to_bits: +; ALL: .functype to_bits (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push0=, 0 +; ALL-NEXT: return $pop0 + %res = bitcast half %x to i16 + ret i16 %res +} + +define float @test_extend32(ptr %addr) nounwind { +; DEFISEL-LABEL: test_extend32: +; DEFISEL: .functype test_extend32 (i32) -> (f32) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push2=, 0 +; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop2) +; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 +; DEFISEL-NEXT: return $pop1 +; +; FASTISEL-LABEL: test_extend32: +; FASTISEL: .functype test_extend32 (i32) -> (f32) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push2=, 0 +; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop2) +; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop1 +; FASTISEL-NEXT: return $pop0 + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to float + ret float %val32 +} + +define double @test_extend64(ptr %addr) nounwind { +; DEFISEL-LABEL: test_extend64: +; DEFISEL: .functype test_extend64 (i32) -> (f64) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push3=, 0 +; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop3) +; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 +; DEFISEL-NEXT: f64.promote_f32 $push2=, $pop1 +; DEFISEL-NEXT: return $pop2 +; +; FASTISEL-LABEL: test_extend64: +; FASTISEL: .functype test_extend64 (i32) -> (f64) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push3=, 0 +; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop3) +; FASTISEL-NEXT: call $push2=, __extendhfsf2, $pop1 +; FASTISEL-NEXT: f64.promote_f32 $push0=, $pop2 +; FASTISEL-NEXT: return $pop0 + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to double + ret double %val32 +} + +define fp128 @test_extend128(ptr %addr) nounwind { +; ALL-LABEL: test_extend128: +; ALL: .functype test_extend128 (i32, i32) -> () +; ALL-NEXT: .local i32 +; ALL-NEXT: # %bb.0: +; ALL-NEXT: global.get $push4=, __stack_pointer +; ALL-NEXT: i32.const $push5=, 16 +; ALL-NEXT: i32.sub $push9=, $pop4, $pop5 +; ALL-NEXT: local.tee $push8=, 2, $pop9 +; ALL-NEXT: global.set __stack_pointer, $pop8 +; ALL-NEXT: local.get $push11=, 2 +; ALL-NEXT: local.get $push10=, 1 +; ALL-NEXT: i32.load16_u $push0=, 0($pop10) +; ALL-NEXT: call $push1=, __extendhfsf2, $pop0 +; ALL-NEXT: call __extendsftf2, $pop11, $pop1 +; ALL-NEXT: local.get $push13=, 0 +; ALL-NEXT: local.get $push12=, 2 +; ALL-NEXT: i64.load $push2=, 8($pop12) +; ALL-NEXT: i64.store 8($pop13), $pop2 +; ALL-NEXT: local.get $push15=, 0 +; ALL-NEXT: local.get $push14=, 2 +; ALL-NEXT: i64.load $push3=, 0($pop14) +; ALL-NEXT: i64.store 0($pop15), $pop3 +; ALL-NEXT: local.get $push16=, 2 +; ALL-NEXT: i32.const $push6=, 16 +; ALL-NEXT: i32.add $push7=, $pop16, $pop6 +; ALL-NEXT: global.set __stack_pointer, $pop7 +; ALL-NEXT: return + %val16 = load half, ptr %addr + %val32 = fpext half %val16 to fp128 + ret fp128 %val32 +} + +define void @test_trunc32(float %in, ptr %addr) nounwind { +; ALL-LABEL: test_trunc32: +; ALL: .functype test_trunc32 (f32, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 1 +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: call $push0=, __truncsfhf2, $pop1 +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return + %val16 = fptrunc float %in to half + store half %val16, ptr %addr + ret void +} + +define void @test_trunc64(double %in, ptr %addr) nounwind { +; ALL-LABEL: test_trunc64: +; ALL: .functype test_trunc64 (f64, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 1 +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: call $push0=, __truncdfhf2, $pop1 +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return + %val16 = fptrunc double %in to half + store half %val16, ptr %addr + ret void +} + +define void @test_trunc128(fp128 %in, ptr %addr) nounwind { +; ALL-LABEL: test_trunc128: +; ALL: .functype test_trunc128 (i64, i64, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push3=, 2 +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: call $push0=, __trunctfhf2, $pop2, $pop1 +; ALL-NEXT: i32.store16 0($pop3), $pop0 +; ALL-NEXT: return + %val16 = fptrunc fp128 %in to half + store half %val16, ptr %addr + ret void +} + +define i64 @test_fptosi_i64(ptr %p) nounwind { +; DEFISEL-LABEL: test_fptosi_i64: +; DEFISEL: .functype test_fptosi_i64 (i32) -> (i64) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push3=, 0 +; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop3) +; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 +; DEFISEL-NEXT: i64.trunc_sat_f32_s $push2=, $pop1 +; DEFISEL-NEXT: return $pop2 +; +; FASTISEL-LABEL: test_fptosi_i64: +; FASTISEL: .functype test_fptosi_i64 (i32) -> (i64) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push3=, 0 +; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop3) +; FASTISEL-NEXT: call $push2=, __extendhfsf2, $pop1 +; FASTISEL-NEXT: i64.trunc_sat_f32_s $push0=, $pop2 +; FASTISEL-NEXT: return $pop0 + %a = load half, ptr %p, align 2 + %r = fptosi half %a to i64 + ret i64 %r +} + +define void @test_sitofp_i64(i64 %a, ptr %p) nounwind { +; ALL-LABEL: test_sitofp_i64: +; ALL: .functype test_sitofp_i64 (i64, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push3=, 1 +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: f32.convert_i64_s $push0=, $pop2 +; ALL-NEXT: call $push1=, __truncsfhf2, $pop0 +; ALL-NEXT: i32.store16 0($pop3), $pop1 +; ALL-NEXT: return + %r = sitofp i64 %a to half + store half %r, ptr %p + ret void +} + +define i64 @test_fptoui_i64(ptr %p) nounwind { +; DEFISEL-LABEL: test_fptoui_i64: +; DEFISEL: .functype test_fptoui_i64 (i32) -> (i64) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push3=, 0 +; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop3) +; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0 +; DEFISEL-NEXT: i64.trunc_sat_f32_u $push2=, $pop1 +; DEFISEL-NEXT: return $pop2 +; +; FASTISEL-LABEL: test_fptoui_i64: +; FASTISEL: .functype test_fptoui_i64 (i32) -> (i64) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push3=, 0 +; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop3) +; FASTISEL-NEXT: call $push2=, __extendhfsf2, $pop1 +; FASTISEL-NEXT: i64.trunc_sat_f32_u $push0=, $pop2 +; FASTISEL-NEXT: return $pop0 + %a = load half, ptr %p, align 2 + %r = fptoui half %a to i64 + ret i64 %r +} + +define void @test_uitofp_i64(i64 %a, ptr %p) nounwind { +; ALL-LABEL: test_uitofp_i64: +; ALL: .functype test_uitofp_i64 (i64, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push3=, 1 +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: f32.convert_i64_u $push0=, $pop2 +; ALL-NEXT: call $push1=, __truncsfhf2, $pop0 +; ALL-NEXT: i32.store16 0($pop3), $pop1 +; ALL-NEXT: return + %r = uitofp i64 %a to half + store half %r, ptr %p + ret void +} + +define <4 x float> @test_extend32_vec4(ptr %p) nounwind { +; ALL-LABEL: test_extend32_vec4: +; ALL: .functype test_extend32_vec4 (i32, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push9=, 0 +; ALL-NEXT: local.get $push8=, 1 +; ALL-NEXT: i32.load16_u $push0=, 6($pop8) +; ALL-NEXT: call $push1=, __extendhfsf2, $pop0 +; ALL-NEXT: f32.store 12($pop9), $pop1 +; ALL-NEXT: local.get $push11=, 0 +; ALL-NEXT: local.get $push10=, 1 +; ALL-NEXT: i32.load16_u $push2=, 4($pop10) +; ALL-NEXT: call $push3=, __extendhfsf2, $pop2 +; ALL-NEXT: f32.store 8($pop11), $pop3 +; ALL-NEXT: local.get $push13=, 0 +; ALL-NEXT: local.get $push12=, 1 +; ALL-NEXT: i32.load16_u $push4=, 2($pop12) +; ALL-NEXT: call $push5=, __extendhfsf2, $pop4 +; ALL-NEXT: f32.store 4($pop13), $pop5 +; ALL-NEXT: local.get $push15=, 0 +; ALL-NEXT: local.get $push14=, 1 +; ALL-NEXT: i32.load16_u $push6=, 0($pop14) +; ALL-NEXT: call $push7=, __extendhfsf2, $pop6 +; ALL-NEXT: f32.store 0($pop15), $pop7 +; ALL-NEXT: return + %a = load <4 x half>, ptr %p, align 8 + %b = fpext <4 x half> %a to <4 x float> + ret <4 x float> %b +} + +define <4 x double> @test_extend64_vec4(ptr %p) nounwind { +; ALL-LABEL: test_extend64_vec4: +; ALL: .functype test_extend64_vec4 (i32, i32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push13=, 0 +; ALL-NEXT: local.get $push12=, 1 +; ALL-NEXT: i64.load16_u $push0=, 6($pop12) +; ALL-NEXT: call $push1=, __extendhfsf2, $pop0 +; ALL-NEXT: f64.promote_f32 $push2=, $pop1 +; ALL-NEXT: f64.store 24($pop13), $pop2 +; ALL-NEXT: local.get $push15=, 0 +; ALL-NEXT: local.get $push14=, 1 +; ALL-NEXT: i64.load16_u $push3=, 4($pop14) +; ALL-NEXT: call $push4=, __extendhfsf2, $pop3 +; ALL-NEXT: f64.promote_f32 $push5=, $pop4 +; ALL-NEXT: f64.store 16($pop15), $pop5 +; ALL-NEXT: local.get $push17=, 0 +; ALL-NEXT: local.get $push16=, 1 +; ALL-NEXT: i64.load16_u $push6=, 2($pop16) +; ALL-NEXT: call $push7=, __extendhfsf2, $pop6 +; ALL-NEXT: f64.promote_f32 $push8=, $pop7 +; ALL-NEXT: f64.store 8($pop17), $pop8 +; ALL-NEXT: local.get $push19=, 0 +; ALL-NEXT: local.get $push18=, 1 +; ALL-NEXT: i64.load16_u $push9=, 0($pop18) +; ALL-NEXT: call $push10=, __extendhfsf2, $pop9 +; ALL-NEXT: f64.promote_f32 $push11=, $pop10 +; ALL-NEXT: f64.store 0($pop19), $pop11 +; ALL-NEXT: return + %a = load <4 x half>, ptr %p, align 8 + %b = fpext <4 x half> %a to <4 x double> + ret <4 x double> %b +} + +define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind { +; DEFISEL-LABEL: test_trunc32_vec4: +; DEFISEL: .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> () +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push5=, 4 +; DEFISEL-NEXT: local.get $push4=, 3 +; DEFISEL-NEXT: call $push0=, __truncsfhf2, $pop4 +; DEFISEL-NEXT: i32.store16 6($pop5), $pop0 +; DEFISEL-NEXT: local.get $push7=, 4 +; DEFISEL-NEXT: local.get $push6=, 2 +; DEFISEL-NEXT: call $push1=, __truncsfhf2, $pop6 +; DEFISEL-NEXT: i32.store16 4($pop7), $pop1 +; DEFISEL-NEXT: local.get $push9=, 4 +; DEFISEL-NEXT: local.get $push8=, 1 +; DEFISEL-NEXT: call $push2=, __truncsfhf2, $pop8 +; DEFISEL-NEXT: i32.store16 2($pop9), $pop2 +; DEFISEL-NEXT: local.get $push11=, 4 +; DEFISEL-NEXT: local.get $push10=, 0 +; DEFISEL-NEXT: call $push3=, __truncsfhf2, $pop10 +; DEFISEL-NEXT: i32.store16 0($pop11), $pop3 +; DEFISEL-NEXT: return +; +; FASTISEL-LABEL: test_trunc32_vec4: +; FASTISEL: .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> () +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push5=, 4 +; FASTISEL-NEXT: local.get $push4=, 0 +; FASTISEL-NEXT: call $push0=, __truncsfhf2, $pop4 +; FASTISEL-NEXT: i32.store16 0($pop5), $pop0 +; FASTISEL-NEXT: local.get $push7=, 4 +; FASTISEL-NEXT: local.get $push6=, 1 +; FASTISEL-NEXT: call $push1=, __truncsfhf2, $pop6 +; FASTISEL-NEXT: i32.store16 2($pop7), $pop1 +; FASTISEL-NEXT: local.get $push9=, 4 +; FASTISEL-NEXT: local.get $push8=, 2 +; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop8 +; FASTISEL-NEXT: i32.store16 4($pop9), $pop2 +; FASTISEL-NEXT: local.get $push11=, 4 +; FASTISEL-NEXT: local.get $push10=, 3 +; FASTISEL-NEXT: call $push3=, __truncsfhf2, $pop10 +; FASTISEL-NEXT: i32.store16 6($pop11), $pop3 +; FASTISEL-NEXT: return + %v = fptrunc <4 x float> %a to <4 x half> + store <4 x half> %v, ptr %p + ret void +} + +define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind { +; DEFISEL-LABEL: test_trunc64_vec4: +; DEFISEL: .functype test_trunc64_vec4 (f64, f64, f64, f64, i32) -> () +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push5=, 4 +; DEFISEL-NEXT: local.get $push4=, 3 +; DEFISEL-NEXT: call $push0=, __truncdfhf2, $pop4 +; DEFISEL-NEXT: i32.store16 6($pop5), $pop0 +; DEFISEL-NEXT: local.get $push7=, 4 +; DEFISEL-NEXT: local.get $push6=, 2 +; DEFISEL-NEXT: call $push1=, __truncdfhf2, $pop6 +; DEFISEL-NEXT: i32.store16 4($pop7), $pop1 +; DEFISEL-NEXT: local.get $push9=, 4 +; DEFISEL-NEXT: local.get $push8=, 1 +; DEFISEL-NEXT: call $push2=, __truncdfhf2, $pop8 +; DEFISEL-NEXT: i32.store16 2($pop9), $pop2 +; DEFISEL-NEXT: local.get $push11=, 4 +; DEFISEL-NEXT: local.get $push10=, 0 +; DEFISEL-NEXT: call $push3=, __truncdfhf2, $pop10 +; DEFISEL-NEXT: i32.store16 0($pop11), $pop3 +; DEFISEL-NEXT: return +; +; FASTISEL-LABEL: test_trunc64_vec4: +; FASTISEL: .functype test_trunc64_vec4 (f64, f64, f64, f64, i32) -> () +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push5=, 4 +; FASTISEL-NEXT: local.get $push4=, 0 +; FASTISEL-NEXT: call $push0=, __truncdfhf2, $pop4 +; FASTISEL-NEXT: i32.store16 0($pop5), $pop0 +; FASTISEL-NEXT: local.get $push7=, 4 +; FASTISEL-NEXT: local.get $push6=, 1 +; FASTISEL-NEXT: call $push1=, __truncdfhf2, $pop6 +; FASTISEL-NEXT: i32.store16 2($pop7), $pop1 +; FASTISEL-NEXT: local.get $push9=, 4 +; FASTISEL-NEXT: local.get $push8=, 2 +; FASTISEL-NEXT: call $push2=, __truncdfhf2, $pop8 +; FASTISEL-NEXT: i32.store16 4($pop9), $pop2 +; FASTISEL-NEXT: local.get $push11=, 4 +; FASTISEL-NEXT: local.get $push10=, 3 +; FASTISEL-NEXT: call $push3=, __truncdfhf2, $pop10 +; FASTISEL-NEXT: i32.store16 6($pop11), $pop3 +; FASTISEL-NEXT: return + %v = fptrunc <4 x double> %a to <4 x half> + store <4 x half> %v, ptr %p + ret void +} + +define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind { +; DEFISEL-LABEL: test_sitofp_fadd_i32: +; DEFISEL: .functype test_sitofp_fadd_i32 (i32, i32) -> (f32) +; DEFISEL-NEXT: # %bb.0: +; DEFISEL-NEXT: local.get $push8=, 1 +; DEFISEL-NEXT: i32.load16_u $push7=, 0($pop8) +; DEFISEL-NEXT: local.set 1, $pop7 +; DEFISEL-NEXT: local.get $push9=, 0 +; DEFISEL-NEXT: f32.convert_i32_s $push0=, $pop9 +; DEFISEL-NEXT: call $push1=, __truncsfhf2, $pop0 +; DEFISEL-NEXT: call $push2=, __extendhfsf2, $pop1 +; DEFISEL-NEXT: local.get $push10=, 1 +; DEFISEL-NEXT: call $push3=, __extendhfsf2, $pop10 +; DEFISEL-NEXT: f32.add $push4=, $pop2, $pop3 +; DEFISEL-NEXT: call $push5=, __truncsfhf2, $pop4 +; DEFISEL-NEXT: call $push6=, __extendhfsf2, $pop5 +; DEFISEL-NEXT: return $pop6 +; +; FASTISEL-LABEL: test_sitofp_fadd_i32: +; FASTISEL: .functype test_sitofp_fadd_i32 (i32, i32) -> (f32) +; FASTISEL-NEXT: # %bb.0: +; FASTISEL-NEXT: local.get $push8=, 1 +; FASTISEL-NEXT: i32.load16_u $push7=, 0($pop8) +; FASTISEL-NEXT: local.set 1, $pop7 +; FASTISEL-NEXT: local.get $push9=, 0 +; FASTISEL-NEXT: f32.convert_i32_s $push1=, $pop9 +; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop1 +; FASTISEL-NEXT: call $push3=, __extendhfsf2, $pop2 +; FASTISEL-NEXT: local.get $push10=, 1 +; FASTISEL-NEXT: call $push4=, __extendhfsf2, $pop10 +; FASTISEL-NEXT: f32.add $push5=, $pop3, $pop4 +; FASTISEL-NEXT: call $push6=, __truncsfhf2, $pop5 +; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop6 +; FASTISEL-NEXT: return $pop0 + %tmp0 = load half, ptr %b + %tmp1 = sitofp i32 %a to half + %tmp2 = fadd half %tmp0, %tmp1 + %tmp3 = fpext half %tmp2 to float + ret float %tmp3 +} + +define half @chained_fp_ops(half %x) { +; ALL-LABEL: chained_fp_ops: +; ALL: .functype chained_fp_ops (i32) -> (i32) +; ALL-NEXT: .local f32 +; ALL-NEXT: # %bb.0: # %start +; ALL-NEXT: local.get $push8=, 0 +; ALL-NEXT: call $push7=, __extendhfsf2, $pop8 +; ALL-NEXT: local.tee $push6=, 1, $pop7 +; ALL-NEXT: local.get $push9=, 1 +; ALL-NEXT: f32.add $push0=, $pop6, $pop9 +; ALL-NEXT: call $push2=, __truncsfhf2, $pop0 +; ALL-NEXT: call $push3=, __extendhfsf2, $pop2 +; ALL-NEXT: f32.const $push1=, 0x1p-1 +; ALL-NEXT: f32.mul $push4=, $pop3, $pop1 +; ALL-NEXT: call $push5=, __truncsfhf2, $pop4 +; ALL-NEXT: return $pop5 +start: + %y = fmul half %x, 0xH4000 + %z = fdiv half %y, 0xH4000 + ret half %z +} + +define half @test_select_cc(half) nounwind { +; ALL-LABEL: test_select_cc: +; ALL: .functype test_select_cc (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: i32.const $push4=, 15360 +; ALL-NEXT: i32.const $push3=, 0 +; ALL-NEXT: local.get $push6=, 0 +; ALL-NEXT: call $push1=, __extendhfsf2, $pop6 +; ALL-NEXT: f32.const $push0=, 0x0p0 +; ALL-NEXT: f32.ne $push2=, $pop1, $pop0 +; ALL-NEXT: i32.select $push5=, $pop4, $pop3, $pop2 +; ALL-NEXT: return $pop5 + %2 = fcmp une half %0, 0xH0000 + %3 = uitofp i1 %2 to half + ret half %3 +} + +define half @fabs(half %x) nounwind { +; ALL-LABEL: fabs: +; ALL: .functype fabs (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: i32.const $push0=, 32767 +; ALL-NEXT: i32.and $push1=, $pop2, $pop0 +; ALL-NEXT: return $pop1 + %a = call half @llvm.fabs.f16(half %x) + ret half %a +} + +define half @fcopysign(half %x, half %y) nounwind { +; ALL-LABEL: fcopysign: +; ALL: .functype fcopysign (i32, i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push5=, 0 +; ALL-NEXT: i32.const $push2=, 32767 +; ALL-NEXT: i32.and $push3=, $pop5, $pop2 +; ALL-NEXT: local.get $push6=, 1 +; ALL-NEXT: i32.const $push0=, -32768 +; ALL-NEXT: i32.and $push1=, $pop6, $pop0 +; ALL-NEXT: i32.or $push4=, $pop3, $pop1 +; ALL-NEXT: return $pop4 + %a = call half @llvm.copysign.f16(half %x, half %y) + ret half %a } diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 137994ceac132..0fbaf815b9eba 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -103,10 +103,9 @@ entry: define i32 @stest_f16i32(half %x) { ; CHECK-LABEL: stest_f16i32: -; CHECK: .functype stest_f16i32 (f32) -> (i32) +; CHECK: .functype stest_f16i32 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -122,10 +121,9 @@ entry: define i32 @stest_f16i32_cse(half %x) { ; CHECK-LABEL: stest_f16i32_cse: -; CHECK: .functype stest_f16i32_cse (f32) -> (i32) +; CHECK: .functype stest_f16i32_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: i32.wrap_i64 @@ -138,10 +136,9 @@ entry: define i32 @utesth_f16i32(half %x) { ; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32) -> (i32) +; CHECK: .functype utesth_f16i32 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -155,10 +152,9 @@ entry: define i32 @utesth_f16i32_cse(half %x) { ; CHECK-LABEL: utesth_f16i32_cse: -; CHECK: .functype utesth_f16i32_cse (f32) -> (i32) +; CHECK: .functype utesth_f16i32_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_u ; CHECK-NEXT: i32.wrap_i64 @@ -171,10 +167,9 @@ entry: define i32 @ustest_f16i32(half %x) { ; CHECK-LABEL: ustest_f16i32: -; CHECK: .functype ustest_f16i32 (f32) -> (i32) +; CHECK: .functype ustest_f16i32 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -190,10 +185,9 @@ entry: define i32 @ustest_f16i32_cse(half %x) { ; CHECK-LABEL: ustest_f16i32_cse: -; CHECK: .functype ustest_f16i32_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i32_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -373,22 +367,20 @@ entry: define i16 @stest_f16i16(half %x) { ; CHECK-LABEL: stest_f16i16: -; CHECK: .functype stest_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype stest_f16i16 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 32767 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 32767 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const -32768 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -32768 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -405,16 +397,14 @@ entry: define i16 @utesth_f16i16(half %x) { ; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype utesth_f16i16 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_u ; CHECK-NEXT: i32.select @@ -429,10 +419,9 @@ entry: define i16 @utesth_f16i16_cse(half %x) { ; CHECK-LABEL: utesth_f16i16_cse: -; CHECK: .functype utesth_f16i16_cse (f32) -> (i32) +; CHECK: .functype utesth_f16i16_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -444,22 +433,20 @@ entry: define i16 @ustest_f16i16(half %x) { ; CHECK-LABEL: ustest_f16i16: -; CHECK: .functype ustest_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype ustest_f16i16 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -476,10 +463,9 @@ entry: define i16 @ustest_f16i16_cse(half %x) { ; CHECK-LABEL: ustest_f16i16_cse: -; CHECK: .functype ustest_f16i16_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i16_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -863,10 +849,9 @@ entry: define i64 @stest_f16i64(half %x) { ; CHECK-LABEL: stest_f16i64: -; CHECK: .functype stest_f16i64 (f32) -> (i64) +; CHECK: .functype stest_f16i64 (i32) -> (i64) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -882,7 +867,7 @@ entry: define i64 @utesth_f16i64(half %x) { ; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32) -> (i64) +; CHECK: .functype utesth_f16i64 (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -892,7 +877,6 @@ define i64 @utesth_f16i64(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -921,7 +905,7 @@ entry: define i64 @utesth_f16i64_cse(half %x) { ; CHECK-LABEL: utesth_f16i64_cse: -; CHECK: .functype utesth_f16i64_cse (f32) -> (i64) +; CHECK: .functype utesth_f16i64_cse (i32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -931,7 +915,6 @@ define i64 @utesth_f16i64_cse(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -951,7 +934,7 @@ entry: define i64 @ustest_f16i64(half %x) { ; CHECK-LABEL: ustest_f16i64: -; CHECK: .functype ustest_f16i64 (f32) -> (i64) +; CHECK: .functype ustest_f16i64 (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -961,7 +944,6 @@ define i64 @ustest_f16i64(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 1 @@ -1103,10 +1085,9 @@ entry: define i32 @stest_f16i32_mm(half %x) { ; CHECK-LABEL: stest_f16i32_mm: -; CHECK: .functype stest_f16i32_mm (f32) -> (i32) +; CHECK: .functype stest_f16i32_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -1120,10 +1101,9 @@ entry: define i32 @utesth_f16i32_mm(half %x) { ; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32) -> (i32) +; CHECK: .functype utesth_f16i32_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1136,10 +1116,9 @@ entry: define i32 @ustest_f16i32_mm(half %x) { ; CHECK-LABEL: ustest_f16i32_mm: -; CHECK: .functype ustest_f16i32_mm (f32) -> (i32) +; CHECK: .functype ustest_f16i32_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1153,10 +1132,9 @@ entry: define i32 @ustest_f16i32_mm_cse(half %x) { ; CHECK-LABEL: ustest_f16i32_mm_cse: -; CHECK: .functype ustest_f16i32_mm_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i32_mm_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1325,22 +1303,20 @@ entry: define i16 @stest_f16i16_mm(half %x) { ; CHECK-LABEL: stest_f16i16_mm: -; CHECK: .functype stest_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype stest_f16i16_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 32767 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 32767 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const -32768 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -32768 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -1355,16 +1331,14 @@ entry: define i16 @utesth_f16i16_mm(half %x) { ; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype utesth_f16i16_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_u ; CHECK-NEXT: i32.select @@ -1378,22 +1352,20 @@ entry: define i16 @ustest_f16i16_mm(half %x) { ; CHECK-LABEL: ustest_f16i16_mm: -; CHECK: .functype ustest_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype ustest_f16i16_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -1408,10 +1380,9 @@ entry: define i16 @ustest_f16i16_mm_cse(half %x) { ; CHECK-LABEL: ustest_f16i16_mm_cse: -; CHECK: .functype ustest_f16i16_mm_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i16_mm_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1622,10 +1593,9 @@ entry: define i64 @stest_f16i64_mm(half %x) { ; CHECK-LABEL: stest_f16i64_mm: -; CHECK: .functype stest_f16i64_mm (f32) -> (i64) +; CHECK: .functype stest_f16i64_mm (i32) -> (i64) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -1639,7 +1609,7 @@ entry: define i64 @utesth_f16i64_mm(half %x) { ; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32) -> (i64) +; CHECK: .functype utesth_f16i64_mm (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1649,7 +1619,6 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -1677,7 +1646,7 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; CHECK-LABEL: ustest_f16i64_mm: -; CHECK: .functype ustest_f16i64_mm (f32) -> (i64) +; CHECK: .functype ustest_f16i64_mm (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1687,7 +1656,6 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 1 @@ -1726,7 +1694,7 @@ entry: define i64 @utesth_f16i64_mm_cse(half %x) { ; CHECK-LABEL: utesth_f16i64_mm_cse: -; CHECK: .functype utesth_f16i64_mm_cse (f32) -> (i64) +; CHECK: .functype utesth_f16i64_mm_cse (i32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1736,7 +1704,6 @@ define i64 @utesth_f16i64_mm_cse(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 7190e162eb010..fabc5c174fd0b 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -174,27 +174,24 @@ entry: define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32: -; CHECK: .functype stest_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK: .functype stest_f16i32 (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 @@ -211,27 +208,24 @@ entry: define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK: .functype utesth_f16i32 (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -246,27 +240,24 @@ entry: define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32: -; CHECK: .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK: .functype ustest_f16i32 (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -433,73 +424,65 @@ entry: define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16: -; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 +; CHECK: .functype stest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 14 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 10 +; CHECK-NEXT: local.tee 15 ; CHECK-NEXT: v128.and -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 14 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 10 +; CHECK-NEXT: local.get 15 ; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -515,63 +498,55 @@ entry: define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128 +; CHECK: .functype utesth_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_u -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -585,68 +560,60 @@ entry: define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-LABEL: ustest_f16i16: -; CHECK: .functype ustest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128 +; CHECK: .functype ustest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 14 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 14 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -1186,7 +1153,7 @@ entry: define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64: -; CHECK: .functype stest_f16i64 (f32, f32) -> (v128) +; CHECK: .functype stest_f16i64 (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1198,12 +1165,10 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -1297,7 +1262,7 @@ entry: define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128) +; CHECK: .functype utesth_f16i64 (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1309,12 +1274,10 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 @@ -1356,7 +1319,7 @@ entry: define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64: -; CHECK: .functype ustest_f16i64 (f32, f32) -> (v128) +; CHECK: .functype ustest_f16i64 (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1368,12 +1331,10 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -1616,27 +1577,24 @@ entry: define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32_mm: -; CHECK: .functype stest_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK: .functype stest_f16i32_mm (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 @@ -1651,27 +1609,24 @@ entry: define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK: .functype utesth_f16i32_mm (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -1685,27 +1640,24 @@ entry: define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32_mm: -; CHECK: .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK: .functype ustest_f16i32_mm (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -1860,73 +1812,65 @@ entry: define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16_mm: -; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 +; CHECK: .functype stest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 14 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 10 +; CHECK-NEXT: local.tee 15 ; CHECK-NEXT: v128.and -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 14 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 10 +; CHECK-NEXT: local.get 15 ; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -1940,63 +1884,55 @@ entry: define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128 +; CHECK: .functype utesth_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_u -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -2009,68 +1945,60 @@ entry: define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: ustest_f16i16_mm: -; CHECK: .functype ustest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128 +; CHECK: .functype ustest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 14 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 14 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -2566,7 +2494,7 @@ entry: define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64_mm: -; CHECK: .functype stest_f16i64_mm (f32, f32) -> (v128) +; CHECK: .functype stest_f16i64_mm (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2578,12 +2506,10 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -2675,7 +2601,7 @@ entry: define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128) +; CHECK: .functype utesth_f16i64_mm (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2687,12 +2613,10 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 @@ -2733,7 +2657,7 @@ entry: define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: -; CHECK: .functype ustest_f16i64_mm (f32, f32) -> (v128) +; CHECK: .functype ustest_f16i64_mm (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2745,12 +2669,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 diff --git a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll index 3c10b09525573..0608a60b739f8 100644 --- a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll +++ b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll @@ -4,36 +4,36 @@ define { half, half } @test_sincos_f16(half %a) #0 { ; WASM32-LABEL: test_sincos_f16: -; WASM32: .functype test_sincos_f16 (i32, f32) -> () +; WASM32: .functype test_sincos_f16 (i32, i32) -> () +; WASM32-NEXT: .local f32 ; WASM32-NEXT: # %bb.0: ; WASM32-NEXT: local.get 0 ; WASM32-NEXT: local.get 1 -; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: call __extendhfsf2 -; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: local.tee 2 ; WASM32-NEXT: call cosf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 2 ; WASM32-NEXT: local.get 0 -; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 ; WASM32-NEXT: call sinf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 0 ; WASM32-NEXT: # fallthrough-return ; ; WASM64-LABEL: test_sincos_f16: -; WASM64: .functype test_sincos_f16 (i64, f32) -> () +; WASM64: .functype test_sincos_f16 (i64, i32) -> () +; WASM64-NEXT: .local f32 ; WASM64-NEXT: # %bb.0: ; WASM64-NEXT: local.get 0 ; WASM64-NEXT: local.get 1 -; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: call __extendhfsf2 -; WASM64-NEXT: local.tee 1 +; WASM64-NEXT: local.tee 2 ; WASM64-NEXT: call cosf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 2 ; WASM64-NEXT: local.get 0 -; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 ; WASM64-NEXT: call sinf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 0 @@ -44,12 +44,12 @@ define { half, half } @test_sincos_f16(half %a) #0 { define half @test_sincos_f16_only_use_sin(half %a) #0 { ; CHECK-LABEL: test_sincos_f16_only_use_sin: -; CHECK: .functype test_sincos_f16_only_use_sin (f32) -> (f32) +; CHECK: .functype test_sincos_f16_only_use_sin (i32) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call sinf +; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: # fallthrough-return %result = call { half, half } @llvm.sincos.f16(half %a) %result.0 = extractvalue { half, half } %result, 0 @@ -58,12 +58,12 @@ define half @test_sincos_f16_only_use_sin(half %a) #0 { define half @test_sincos_f16_only_use_cos(half %a) #0 { ; CHECK-LABEL: test_sincos_f16_only_use_cos: -; CHECK: .functype test_sincos_f16_only_use_cos (f32) -> (f32) +; CHECK: .functype test_sincos_f16_only_use_cos (i32) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call cosf +; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: # fallthrough-return %result = call { half, half } @llvm.sincos.f16(half %a) %result.1 = extractvalue { half, half } %result, 1 @@ -72,62 +72,60 @@ define half @test_sincos_f16_only_use_cos(half %a) #0 { define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 { ; WASM32-LABEL: test_sincos_v2f16: -; WASM32: .functype test_sincos_v2f16 (i32, f32, f32) -> () +; WASM32: .functype test_sincos_v2f16 (i32, i32, i32) -> () +; WASM32-NEXT: .local f32, f32 ; WASM32-NEXT: # %bb.0: ; WASM32-NEXT: local.get 0 ; WASM32-NEXT: local.get 2 -; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: call __extendhfsf2 -; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: local.tee 3 ; WASM32-NEXT: call cosf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 6 ; WASM32-NEXT: local.get 0 ; WASM32-NEXT: local.get 1 -; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: call __extendhfsf2 -; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: local.tee 4 ; WASM32-NEXT: call cosf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 4 ; WASM32-NEXT: local.get 0 -; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 ; WASM32-NEXT: call sinf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 2 ; WASM32-NEXT: local.get 0 -; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 4 ; WASM32-NEXT: call sinf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 0 ; WASM32-NEXT: # fallthrough-return ; ; WASM64-LABEL: test_sincos_v2f16: -; WASM64: .functype test_sincos_v2f16 (i64, f32, f32) -> () +; WASM64: .functype test_sincos_v2f16 (i64, i32, i32) -> () +; WASM64-NEXT: .local f32, f32 ; WASM64-NEXT: # %bb.0: ; WASM64-NEXT: local.get 0 ; WASM64-NEXT: local.get 2 -; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: call __extendhfsf2 -; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: local.tee 3 ; WASM64-NEXT: call cosf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 6 ; WASM64-NEXT: local.get 0 ; WASM64-NEXT: local.get 1 -; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: call __extendhfsf2 -; WASM64-NEXT: local.tee 1 +; WASM64-NEXT: local.tee 4 ; WASM64-NEXT: call cosf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 4 ; WASM64-NEXT: local.get 0 -; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 ; WASM64-NEXT: call sinf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 2 ; WASM64-NEXT: local.get 0 -; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 4 ; WASM64-NEXT: call sinf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 0