From e8c44383ee9d9170f2b90bfc0f7a60ccb75a93f6 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Mon, 15 Jul 2024 15:00:05 +0800 Subject: [PATCH 1/5] [RISCV] Precommit test for tail duplication --- .../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll new file mode 100644 index 0000000000000..ae52773d71463 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3 + +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3 + +@a = external dso_local local_unnamed_addr global i32 +@b = external dso_local local_unnamed_addr global i32 +@c = external dso_local local_unnamed_addr global i32 + +declare i32 @foo(i32) + +define dso_local i32 @test(i32 %n) { +; CHECK-O2-LABEL: test: +; CHECK-O2: # %bb.0: # %entry +; CHECK-O2-NEXT: sext.w a1, a0 +; CHECK-O2-NEXT: blez a1, .LBB0_2 +; CHECK-O2-NEXT: # %bb.1: # %if.then +; CHECK-O2-NEXT: lui a1, %hi(a) +; CHECK-O2-NEXT: lw a1, %lo(a)(a1) +; CHECK-O2-NEXT: mul a0, a1, a0 +; CHECK-O2-NEXT: j .LBB0_3 +; CHECK-O2-NEXT: .LBB0_2: # %if.else +; CHECK-O2-NEXT: lui a1, %hi(b) +; CHECK-O2-NEXT: lw a1, %lo(b)(a1) +; CHECK-O2-NEXT: divw a0, a1, a0 +; CHECK-O2-NEXT: .LBB0_3: # %if.end +; CHECK-O2-NEXT: lui a1, %hi(c) +; CHECK-O2-NEXT: lw a1, %lo(c)(a1) +; CHECK-O2-NEXT: addi a0, a0, -1 +; CHECK-O2-NEXT: mulw a0, a0, a1 +; CHECK-O2-NEXT: tail foo +; +; CHECK-O3-LABEL: test: +; CHECK-O3: # %bb.0: # %entry +; CHECK-O3-NEXT: sext.w a1, a0 +; CHECK-O3-NEXT: blez a1, .LBB0_2 +; CHECK-O3-NEXT: # %bb.1: # %if.then +; CHECK-O3-NEXT: lui a1, %hi(a) +; CHECK-O3-NEXT: lw a1, %lo(a)(a1) +; CHECK-O3-NEXT: mul a0, a1, a0 +; CHECK-O3-NEXT: j .LBB0_3 +; CHECK-O3-NEXT: .LBB0_2: # %if.else +; CHECK-O3-NEXT: lui a1, %hi(b) +; CHECK-O3-NEXT: lw a1, %lo(b)(a1) +; CHECK-O3-NEXT: divw a0, a1, a0 +; CHECK-O3-NEXT: .LBB0_3: # %if.end +; CHECK-O3-NEXT: lui a1, %hi(c) +; CHECK-O3-NEXT: lw a1, %lo(c)(a1) +; CHECK-O3-NEXT: addi a0, a0, -1 +; CHECK-O3-NEXT: mulw a0, a0, a1 +; CHECK-O3-NEXT: tail foo +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %va = load i32, ptr @a + %mul = mul nsw i32 %va, %n + br label %if.end + +if.else: + %vb = load i32, ptr @b + %div = sdiv i32 %vb, %n + br label %if.end + +if.end: + %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ] + %vc = load i32, ptr @c + %add = add nsw i32 %phi, -1 + %arg = mul i32 %add, %vc + %ret = tail call i32 @foo(i32 %arg) + ret i32 %ret +} From 34cdb61fb35f4b4c6a54fb929b6eff5a64aab39f Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Mon, 15 Jul 2024 15:10:46 +0800 Subject: [PATCH 2/5] [RISCV] Increase default tail duplication threshold to 6 at -O3 This is just like AArch64. Changing the threshold to 6 will increase the code size, but will also decrease direct branches. CPUs with wide fetch/issue units can benefit from it. The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 5 +++++ llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 ++ llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 9 ++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 0620c3fc12adc..b59401576420c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { return ArrayRef(TargetFlags); } +unsigned int +RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const { + return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2; +} + // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. bool RISCV::isSEXT_W(const MachineInstr &MI) { return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 025e12d81e60d..06ec0cff95912 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -288,6 +288,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; + unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; + unsigned getUndefInitOpcode(unsigned RegClassID) const override { switch (RegClassID) { case RISCV::VRRegClassID: diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll index ae52773d71463..84373ce80843f 100644 --- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll +++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2 -; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3 @a = external dso_local local_unnamed_addr global i32 @b = external dso_local local_unnamed_addr global i32 @@ -41,12 +41,15 @@ define dso_local i32 @test(i32 %n) { ; CHECK-O3-NEXT: lui a1, %hi(a) ; CHECK-O3-NEXT: lw a1, %lo(a)(a1) ; CHECK-O3-NEXT: mul a0, a1, a0 -; CHECK-O3-NEXT: j .LBB0_3 +; CHECK-O3-NEXT: lui a1, %hi(c) +; CHECK-O3-NEXT: lw a1, %lo(c)(a1) +; CHECK-O3-NEXT: addi a0, a0, -1 +; CHECK-O3-NEXT: mulw a0, a0, a1 +; CHECK-O3-NEXT: tail foo ; CHECK-O3-NEXT: .LBB0_2: # %if.else ; CHECK-O3-NEXT: lui a1, %hi(b) ; CHECK-O3-NEXT: lw a1, %lo(b)(a1) ; CHECK-O3-NEXT: divw a0, a1, a0 -; CHECK-O3-NEXT: .LBB0_3: # %if.end ; CHECK-O3-NEXT: lui a1, %hi(c) ; CHECK-O3-NEXT: lw a1, %lo(c)(a1) ; CHECK-O3-NEXT: addi a0, a0, -1 From 13796fcb5ae2fe65ad6f290ef4d41c801415559b Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 18 Jul 2024 12:06:54 +0800 Subject: [PATCH 3/5] Remove dso_local/local_unnamed_addr --- llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll index 84373ce80843f..ff0ff6e0dd3bd 100644 --- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll +++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll @@ -6,13 +6,13 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3 -@a = external dso_local local_unnamed_addr global i32 -@b = external dso_local local_unnamed_addr global i32 -@c = external dso_local local_unnamed_addr global i32 +@a = external global i32 +@b = external global i32 +@c = external global i32 declare i32 @foo(i32) -define dso_local i32 @test(i32 %n) { +define i32 @test(i32 %n) { ; CHECK-O2-LABEL: test: ; CHECK-O2: # %bb.0: # %entry ; CHECK-O2-NEXT: sext.w a1, a0 From 8c63d85347ff45be150d02feda1aa4029cdd60b5 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 18 Jul 2024 12:11:24 +0800 Subject: [PATCH 4/5] The default threshold at -O2 should be 2 --- llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll index ff0ff6e0dd3bd..0508016736004 100644 --- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll +++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll @@ -2,8 +2,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3 -; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2 -; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=2 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=2 < %s | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3 @a = external global i32 From f7e99ad8b6eb54e9bc59c3de390cdf0326cf4ea2 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 18 Jul 2024 12:26:16 +0800 Subject: [PATCH 5/5] Add TailDupAggressiveThreshold to RISCVTuneInfo --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 7 ++++--- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVProcessors.td | 5 ++++- llvm/lib/Target/RISCV/RISCVSubtarget.h | 7 +++++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b59401576420c..9dd79027d7a16 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3763,9 +3763,10 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { return ArrayRef(TargetFlags); } -unsigned int -RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const { - return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2; +unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const { + return OptLevel >= CodeGenOptLevel::Aggressive + ? STI.getTailDupAggressiveThreshold() + : 2; } // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 06ec0cff95912..1612f56a8b506 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -288,7 +288,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; - unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; + unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; unsigned getUndefInitOpcode(unsigned RegClassID) const override { switch (RegClassID) { diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 1729bc0282f51..25b24980e0bf6 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -21,6 +21,9 @@ class RISCVTuneInfo { bits<32> MaxPrefetchIterationsAhead = -1; bits<32> MinimumJumpTableEntries = 5; + + // Tail duplication threshold at -O3. + bits<32> TailDupAggressiveThreshold = 6; } def RISCVTuneInfoTable : GenericTable { @@ -29,7 +32,7 @@ def RISCVTuneInfoTable : GenericTable { let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment", "CacheLineSize", "PrefetchDistance", "MinPrefetchStride", "MaxPrefetchIterationsAhead", - "MinimumJumpTableEntries"]; + "MinimumJumpTableEntries", "TailDupAggressiveThreshold"]; } def getRISCVTuneInfo : SearchIndex { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index d38952e5196f0..ea54ff1df0b7c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -50,6 +50,9 @@ struct RISCVTuneInfo { unsigned MaxPrefetchIterationsAhead; unsigned MinimumJumpTableEntries; + + // Tail duplication threshold at -O3. + unsigned TailDupAggressiveThreshold; }; #define GET_RISCVTuneInfoTable_DECL @@ -300,6 +303,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned getMinimumJumpTableEntries() const; + unsigned getTailDupAggressiveThreshold() const { + return TuneInfo->TailDupAggressiveThreshold; + } + bool supportsInitUndef() const override { return hasVInstructions(); } }; } // End llvm namespace