From 03b786e6a5c0df11ff6398a97abe085b384a3c4b Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Thu, 8 Aug 2024 15:26:38 +0900 Subject: [PATCH 1/4] [AMDGPU] Disable inline constants for pseudo scalar transcendentals Prevent operand folding from inlining constants into pseudo scalar transcendental f16 instructions. However still allow literal constants. --- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 ++ .../AMDGPU/pseudo-scalar-transcendental.mir | 120 ++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index def89c785b855..902f51ae358d5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively. bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; } + /// \returns true if inline constants are not supported for F16 pseudo + /// scalar transcendentals. + bool hasNoF16PseudoScalarTransInlineConstants() const { + return getGeneration() == GFX12; + } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9147242046ced..9ee65e390317a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5776,6 +5776,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } + } else if (isVOP3(MI) && ST.hasNoF16PseudoScalarTransInlineConstants() && + !MO->isReg() && isInlineConstant(*MO, OpInfo)) { + switch (MI.getOpcode()) { + case AMDGPU::V_S_EXP_F16_e64: + case AMDGPU::V_S_LOG_F16_e64: + case AMDGPU::V_S_RCP_F16_e64: + case AMDGPU::V_S_RSQ_F16_e64: + case AMDGPU::V_S_SQRT_F16_e64: + return false; + default: + break; + } } if (MO->isReg()) { diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir new file mode 100644 index 0000000000000..17bed38bd046d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir @@ -0,0 +1,120 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s + +# Do not use inline constants for f16 pseudo scalar transcendentals. +# But allow literal constants. + +--- +name: exp_f16_imm +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: exp_f16_imm + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360 + ; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 15360 + %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: exp_f16_literal +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: exp_f16_literal + ; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 16960 + %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: log_f16_imm +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: log_f16_imm + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360 + ; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 15360 + %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: log_f16_literal +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: log_f16_literal + ; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 16960 + %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: rcp_f16_imm +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: rcp_f16_imm + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360 + ; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 15360 + %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: rcp_f16_literal +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: rcp_f16_literal + ; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 16960 + %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: rsq_f16_imm +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: rsq_f16_imm + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360 + ; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 15360 + %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: rsq_f16_literal +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: rsq_f16_literal + ; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 16960 + %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: sqrt_f16_imm +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: sqrt_f16_imm + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360 + ; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 15360 + %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... + +--- +name: sqrt_f16_literal +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: sqrt_f16_literal + ; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec + %0:sgpr_32 = S_MOV_B32 16960 + %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec +... From 75d7065555c6df2faddd852bbc600035c9f75d95 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Thu, 15 Aug 2024 14:03:30 +0900 Subject: [PATCH 2/4] - Address reviewer comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9ee65e390317a..3a82e13efa0de 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5776,7 +5776,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } - } else if (isVOP3(MI) && ST.hasNoF16PseudoScalarTransInlineConstants() && + } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && isVOP3(MI) && !MO->isReg() && isInlineConstant(*MO, OpInfo)) { switch (MI.getOpcode()) { case AMDGPU::V_S_EXP_F16_e64: From 7d00498487dbbf7c0de65229a5aa25a94b939a53 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Fri, 16 Aug 2024 11:41:24 +0900 Subject: [PATCH 3/4] - Address reviewer comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 16 ++++------------ llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 ++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3a82e13efa0de..0f7cfb3496920 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5776,18 +5776,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } - } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && isVOP3(MI) && - !MO->isReg() && isInlineConstant(*MO, OpInfo)) { - switch (MI.getOpcode()) { - case AMDGPU::V_S_EXP_F16_e64: - case AMDGPU::V_S_LOG_F16_e64: - case AMDGPU::V_S_RCP_F16_e64: - case AMDGPU::V_S_RSQ_F16_e64: - case AMDGPU::V_S_SQRT_F16_e64: - return false; - default: - break; - } + } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && + isF16PseudoScalarTrans(MI.getOpcode()) && !MO->isReg() && + isInlineConstant(*MO, OpInfo)) { + return false; } if (MO->isReg()) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1712dfe8d406c..c2bc479865860 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { Opcode == AMDGPU::DS_GWS_BARRIER; } + bool isF16PseudoScalarTrans(unsigned Opcode) const { + return Opcode == AMDGPU::V_S_EXP_F16_e64 || + Opcode == AMDGPU::V_S_LOG_F16_e64 || + Opcode == AMDGPU::V_S_RCP_F16_e64 || + Opcode == AMDGPU::V_S_RSQ_F16_e64 || + Opcode == AMDGPU::V_S_SQRT_F16_e64; + } + static bool doesNotReadTiedSource(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } From ead00b82a1ae946be677ed966e147e333e63a2bc Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Fri, 16 Aug 2024 19:00:09 +0900 Subject: [PATCH 4/4] - Address reviewer comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++-- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0f7cfb3496920..6dce41d1605fa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5776,8 +5776,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } - } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && - isF16PseudoScalarTrans(MI.getOpcode()) && !MO->isReg() && + } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() && + isF16PseudoScalarTrans(MI.getOpcode()) && isInlineConstant(*MO, OpInfo)) { return false; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index c2bc479865860..91855fb14f6f3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -946,7 +946,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { Opcode == AMDGPU::DS_GWS_BARRIER; } - bool isF16PseudoScalarTrans(unsigned Opcode) const { + static bool isF16PseudoScalarTrans(unsigned Opcode) { return Opcode == AMDGPU::V_S_EXP_F16_e64 || Opcode == AMDGPU::V_S_LOG_F16_e64 || Opcode == AMDGPU::V_S_RCP_F16_e64 ||