Skip to content

ELF: CFI jump table relaxation. #147424

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: users/pcc/spr/main.elf-cfi-jump-table-relaxation
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions lld/ELF/Arch/X86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class X86_64 : public TargetInfo {
bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
InputSection *nextIS) const override;
bool relaxOnce(int pass) const override;
void relaxCFIJumpTables() const override;
void applyBranchToBranchOpt() const override;

private:
Expand Down Expand Up @@ -317,6 +318,170 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
return true;
}

void X86_64::relaxCFIJumpTables() const {
// Relax CFI jump tables.
// - Split jump table into pieces and place target functions inside the jump
// table if small enough.
// - Move jump table before last called function and delete last branch
// instruction.
std::map<InputSection *, std::vector<InputSection *>> sectionReplacements;
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : ctx.outputSections) {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
if (sec->type != SHT_LLVM_CFI_JUMP_TABLE || sec->entsize == 0 ||
sec->size % sec->entsize != 0)
continue;

// We're going to replace the jump table with this list of sections. This
// list will be made up of slices of the original section and function
// bodies that were moved into the jump table.
std::vector<InputSection *> replacements;

// First, push the original jump table section. This is only so that it
// can act as a relocation target. Later on, we will set the size of the
// jump table section to 0 so that the slices and moved function bodies
// become the actual relocation targets.
replacements.push_back(sec);

// Add the slice [begin, end) of the original section to the replacement
// list. [rbegin, rend) is the slice of the relocation list that covers
// [begin, end).
auto addSectionSlice = [&](size_t begin, size_t end, Relocation *rbegin,
Relocation *rend) {
auto *slice = make<InputSection>(
sec->file, sec->name, sec->type, sec->flags, sec->entsize,
sec->entsize,
sec->contentMaybeDecompress().slice(begin, end - begin));
for (const Relocation &r : ArrayRef<Relocation>(rbegin, rend)) {
slice->relocations.push_back(
Relocation{r.expr, r.type, r.offset - begin, r.addend, r.sym});
}
replacements.push_back(slice);
};

// r is the only relocation in a jump table entry. Figure out whether it
// is a branch pointing to the start of a statically known section that
// hasn't already been moved while processing a different jump table
// section, and if so return it.
auto getMovableSection = [&](Relocation &r) -> InputSection * {
if (r.type != R_X86_64_PC32 && r.type != R_X86_64_PLT32)
return nullptr;
auto *sym = dyn_cast_or_null<Defined>(r.sym);
if (!sym || sym->isPreemptible || sym->isGnuIFunc() ||
sym->value + r.addend != -4ull)
return nullptr;
auto *target = dyn_cast_or_null<InputSection>(sym->section);
if (!target || target->addralign > sec->entsize ||
sectionReplacements.count(target))
return nullptr;
return target;
};

// Figure out the movable section for the last entry. We do this first
// because the last entry controls which output section the jump table is
// placed into, which affects move eligibility for other sections.
auto *lastSec = [&]() -> InputSection * {
Relocation *lastReloc = sec->relocs().end();
while (lastReloc != sec->relocs().begin() &&
(lastReloc - 1)->offset >= sec->size - sec->entsize)
--lastReloc;
if (lastReloc + 1 != sec->relocs().end())
return nullptr;
return getMovableSection(*lastReloc);
}();
OutputSection *targetOutputSec;
if (lastSec) {
// We've already decided to move the output section so make sure that we
// don't try to move it again.
sectionReplacements[lastSec] = replacements;
targetOutputSec = lastSec->getParent();
} else {
targetOutputSec = sec->getParent();
}

// Walk the jump table entries other than the last one looking for sections
// that are small enough to be moved into the jump table and in the same
// section as the jump table's destination.
size_t begin = 0;
Relocation *rbegin = sec->relocs().begin();
size_t cur = begin;
Relocation *rcur = rbegin;
while (cur != sec->size - sec->entsize) {
size_t next = cur + sec->entsize;
Relocation *rnext = rcur;
while (rnext != sec->relocs().end() && rnext->offset < next)
++rnext;
if (rcur + 1 == rnext) {
InputSection *target = getMovableSection(*rcur);
if (target && target->size <= sec->entsize &&
target->getParent() == targetOutputSec) {
// Okay, we found a small enough section. Move it into the jump
// table. First add a slice for the unmodified jump table entries
// before this one.
addSectionSlice(begin, cur, rbegin, rcur);
// Add the target to our replacement list, and set the target's
// replacement list to the empty list. This removes it from its
// original position and adds it here, as well as causing
// future getMovableSection() queries to return nullptr.
replacements.push_back(target);
sectionReplacements[target] = {};
begin = next;
rbegin = rnext;
}
}
cur = next;
rcur = rnext;
}

// Finally, process the last entry. If it is movable, move the entire
// jump table behind it and delete the last entry (so that the last
// function's body acts as the last jump table entry), otherwise leave the
// jump table where it is and keep the last entry.
if (lastSec) {
addSectionSlice(begin, cur, rbegin, rcur);
replacements.push_back(lastSec);
sectionReplacements[sec] = {};
sectionReplacements[lastSec] = replacements;
for (auto *s : replacements)
s->parent = lastSec->parent;
} else {
addSectionSlice(begin, sec->size, rbegin, sec->relocs().end());
sectionReplacements[sec] = replacements;
for (auto *s : replacements)
s->parent = sec->parent;
}

// Everything from the original section has been recreated, so delete the
// original contents.
sec->relocations.clear();
sec->size = 0;
}
}

// Now that we have the complete mapping of replacements, go through the input
// section lists and apply the replacements.
for (OutputSection *osec : ctx.outputSections) {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (SectionCommand *cmd : osec->commands) {
auto *isd = dyn_cast<InputSectionDescription>(cmd);
if (!isd)
continue;
SmallVector<InputSection *> newSections;
for (auto *sec : isd->sections) {
auto i = sectionReplacements.find(sec);
if (i == sectionReplacements.end())
newSections.push_back(sec);
else
newSections.append(i->second.begin(), i->second.end());
}
isd->sections = std::move(newSections);
}
}
}

bool X86_64::relaxOnce(int pass) const {
uint64_t minVA = UINT64_MAX, maxVA = 0;
for (OutputSection *osec : ctx.outputSections) {
Expand Down
3 changes: 2 additions & 1 deletion lld/ELF/OutputSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ static bool canMergeToProgbits(Ctx &ctx, unsigned type) {
return type == SHT_NOBITS || type == SHT_PROGBITS || type == SHT_INIT_ARRAY ||
type == SHT_PREINIT_ARRAY || type == SHT_FINI_ARRAY ||
type == SHT_NOTE ||
(type == SHT_X86_64_UNWIND && ctx.arg.emachine == EM_X86_64);
(type == SHT_X86_64_UNWIND && ctx.arg.emachine == EM_X86_64) ||
type == SHT_LLVM_CFI_JUMP_TABLE;
}

// Record that isec will be placed in the OutputSection. isec does not become
Expand Down
2 changes: 1 addition & 1 deletion lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,7 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
// branch-to-branch optimization.
if (is_contained({EM_RISCV, EM_LOONGARCH}, ctx.arg.emachine) ||
(ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") ||
ctx.arg.branchToBranch)
ctx.arg.branchToBranch || sec->type == SHT_LLVM_CFI_JUMP_TABLE)
llvm::stable_sort(sec->relocs(),
[](const Relocation &lhs, const Relocation &rhs) {
return lhs.offset < rhs.offset;
Expand Down
2 changes: 2 additions & 0 deletions lld/ELF/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class TargetInfo {

// Do a linker relaxation pass and return true if we changed something.
virtual bool relaxOnce(int pass) const { return false; }
// Relax CFI jump tables if implemented by target.
virtual void relaxCFIJumpTables() const {}
// Do finalize relaxation after collecting relaxation infos.
virtual void finalizeRelax(int passes) const {}

Expand Down
2 changes: 2 additions & 0 deletions lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1528,6 +1528,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
if (ctx.arg.randomizeSectionPadding)
randomizeSectionPadding(ctx);

ctx.target->relaxCFIJumpTables();

uint32_t pass = 0, assignPasses = 0;
for (;;) {
bool changed = ctx.target->needsThunks
Expand Down
181 changes: 181 additions & 0 deletions lld/test/ELF/x86_64-relax-jump-tables.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
// RUN: ld.lld %t.o -shared -o %t
// RUN: llvm-objdump -d --show-all-symbols %t | FileCheck %s

// Mostly positive cases, except for f2.
.section .text.jt1,"ax",@llvm_cfi_jump_table,8
// Function fits.
f1:
jmp f1.cfi
.balign 8, 0xcc

// Function too large.
f2:
jmp f2.cfi
.balign 8, 0xcc

// Function too large, but may be placed at the end.
// Because this causes the jump table to move, it is tested below.
f3:
jmp f3.cfi
.balign 8, 0xcc

// Mostly negative cases, except for f4.
.section .text.jt2,"ax",@llvm_cfi_jump_table,16
// Function already moved into jt1.
// CHECK: <f1a>:
// CHECK-NEXT: jmp {{.*}} <f1.cfi>
f1a:
jmp f1.cfi
.balign 16, 0xcc

// Function already moved into jt1.
// CHECK: <f3a>:
// CHECK-NEXT: jmp {{.*}} <f3.cfi>
f3a:
jmp f3.cfi
.balign 16, 0xcc

// Function too large for jt1 but small enough for jt2.
// CHECK: <f4>:
// CHECK-NEXT: <f4.cfi>:
// CHECK-NEXT: retq $0x4
f4:
jmp f4.cfi
.balign 16, 0xcc

// Function too large for jt2.
// CHECK: <f5>:
// CHECK-NEXT: jmp {{.*}} <f5.cfi>
f5:
jmp f5.cfi
.balign 16, 0xcc

// Branch target not at start of section.
// CHECK: <f6>:
// CHECK-NEXT: jmp {{.*}} <f6.cfi>
f6:
jmp f6.cfi
.balign 16, 0xcc

// Overaligned section.
// CHECK: <f7>:
// CHECK-NEXT: jmp {{.*}} <f7.cfi>
f7:
jmp f7.cfi
.balign 16, 0xcc

// Branch to IFUNC.
// CHECK: <f8>:
// CHECK-NEXT: jmp 0x[[IPLT:[0-9a-f]*]]
f8:
jmp f8.cfi
.balign 16, 0xcc

// Unexpected number of relocations in entry.
// CHECK: <f9>:
// CHECK-NEXT: jmp {{.*}} <f9.cfi>
// CHECK-NEXT: jmp {{.*}} <f9.cfi>
f9:
jmp f9.cfi
jmp f9.cfi
.balign 16, 0xcc

// Branch to different output section.
f10:
jmp f10.cfi
.balign 16, 0xcc

// Branch via PLT to STB_GLOBAL symbol.
// CHECK: <f11>:
// CHECK-NEXT: jmp {{.*}} <f11.cfi@plt>
f11:
jmp f11.cfi
.balign 16, 0xcc

// Invalid jumptable: entsize unset.
// CHECK: <f12>:
// CHECK-NEXT: jmp {{.*}} <f12.cfi>
.section .text.jt3,"ax",@0x6fff4c0e
f12:
jmp f12.cfi
.balign 8, 0xcc

// Invalid jumptable: size not a multiple of entsize.
// CHECK: <f13>:
// CHECK-NEXT: jmp {{.*}} <f13.cfi>
.section .text.jt4,"ax",@llvm_cfi_jump_table,8
f13:
jmp f13.cfi

// CHECK: <f1>:
// CHECK-NEXT: <f1.cfi>:
// CHECK-NEXT: retq $0x1
.section .text.f1,"ax",@progbits
f1.cfi:
ret $1

// CHECK: <f2>:
// CHECK-NEXT: jmp {{.*}} <f2.cfi>
.section .text.f2,"ax",@progbits
f2.cfi:
ret $2
.zero 16

// CHECK: <f3>:
// CHECK-NEXT: <f3.cfi>:
// CHECK-NEXT: retq $0x3
.section .text.f3,"ax",@progbits
f3.cfi:
ret $3
.zero 16

.section .text.f4,"ax",@progbits
f4.cfi:
ret $4
.zero 13

.section .text.f5,"ax",@progbits
f5.cfi:
ret $5
.zero 14

.section .text.f6,"ax",@progbits
nop
f6.cfi:
ret $6

.section .text.f7,"ax",@progbits
.balign 32
f7.cfi:
ret $7

.section .text.f8,"ax",@progbits
.type f8.cfi,@gnu_indirect_function
f8.cfi:
ret $8

.section .text.f9,"ax",@progbits
f9.cfi:
ret $9

.section foo,"ax",@progbits
f10.cfi:
ret $10

.section .text.f11,"ax",@progbits
.globl f11.cfi
f11.cfi:
ret $11

.section .text.f12,"ax",@progbits
f12.cfi:
ret $12

.section .text.f13,"ax",@progbits
f13.cfi:
ret $13

// CHECK: <.iplt>:
// CHECK-NEXT: [[IPLT]]:
Loading