Skip to content

Support dynamic linking #244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ STAGE0 := shecc
STAGE1 := shecc-stage1.elf
STAGE2 := shecc-stage2.elf

BUILTIN_LIBC ?= c.c
STAGE0_FLAGS ?= --dump-ir
STAGE1_FLAGS ?=
ifeq ($(DYNLINK),1)
BUILTIN_LIBC := dyn-c.c
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you avoid dyn-c.c?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will rename it to c.h to reflect that it is a header file.

STAGE0_FLAGS += --dynlink
STAGE1_FLAGS += --dynlink
endif

OUT ?= out
ARCHS = arm riscv
ARCH ?= $(firstword $(ARCHS))
Expand Down Expand Up @@ -111,9 +120,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
$(VECHO) " CC+LD\t$@\n"
$(Q)$(CC) $(CFLAGS) -o $@ $^

$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
$(VECHO) " GEN\t$@\n"
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
$(Q)$(RM) $(OUT)/c.normalized.c

Expand All @@ -128,12 +137,12 @@ $(OUT)/$(STAGE0): $(OUT)/libc.inc $(OBJS)
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
$(Q)$(STAGE1_CHECK_CMD)
$(VECHO) " SHECC\t$@\n"
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
$(Q)chmod a+x $@

$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
$(VECHO) " SHECC\t$@\n"
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c

bootstrap: $(OUT)/$(STAGE2)
$(Q)chmod 775 $(OUT)/$(STAGE2)
Expand Down
45 changes: 45 additions & 0 deletions lib/dyn-c.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* shecc - Self-Hosting and Educational C Compiler.
*
* shecc is freely redistributable under the BSD 2 clause license. See the
* file "LICENSE" for information on usage and redistribution of this file.
*/

/* Declaractions of C Standard library functions */

#define NULL 0

#define bool _Bool
#define true 1
#define false 0

/* File I/O */
typedef int FILE;
FILE *fopen(char *filename, char *mode);
int fclose(FILE *stream);
int fgetc(FILE *stream);
char *fgets(char *str, int n, FILE *stream);
int fputc(int c, FILE *stream);

/* string-related functions */
int strlen(char *str);
int strcmp(char *s1, char *s2);
int strncmp(char *s1, char *s2, int len);
char *strcpy(char *dest, char *src);
char *strncpy(char *dest, char *src, int len);
char *memcpy(char *dest, char *src, int count);
void *memset(void *s, int c, int n);

/* formatted output string */
int printf(char *str, ...);
int sprintf(char *buffer, char *str, ...);
int snprintf(char *buffer, int n, char *str, ...);

/* Terminating program */
void exit(int exit_code);
void abort(void);

/* Dynamic memory allocation/deallocation functions */
void *malloc(int size);
void *calloc(int n, int size);
void free(void *ptr);
6 changes: 6 additions & 0 deletions mk/arm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ ARCH_DEFS = \
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
\#define ELF_FLAGS 0x5000200\n$\
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
\#define LIBC_SO \"libc.so.6\"\n$\
\#define PLT_FIXUP_SIZE 20\n$\
\#define PLT_ENT_SIZE 12\n$\
\#define R_ARCH_JUMP_SLOT 0x16\n$\
"
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/
3 changes: 3 additions & 0 deletions mk/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))

# Generate the path to the architecture-specific qemu
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
ifeq ($(DYNLINK),1)
TARGET_EXEC += $(RUNNER_LD_PREFIX)
endif
endif
export TARGET_EXEC

Expand Down
8 changes: 8 additions & 0 deletions mk/riscv.mk
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,12 @@ ARCH_DEFS = \
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
\#define ELF_MACHINE 0xf3\n$\
\#define ELF_FLAGS 0\n$\
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
\#define LIBC_SO \"libc.so.6\"\n$\
\#define PLT_FIXUP_SIZE 20\n$\
\#define PLT_ENT_SIZE 12\n$\
\#define R_ARCH_JUMP_SLOT 0x5\n$\
"

# TODO: Set this variable for RISC-V architecture
RUNNER_LD_PREFIX=
119 changes: 96 additions & 23 deletions src/arm-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,16 @@ void update_elf_offset(ph2_ir_t *ph2_ir)

void cfg_flatten(void)
{
func_t *func = find_func("__syscall");
func->bbs->elf_offset = 44; /* offset of start + exit in codegen */
func_t *func;

if (dynlink)
elf_offset = 108; /* offset of start + exit in codegen */
else {
func = find_func("__syscall");
func->bbs->elf_offset = 44; /* offset of start + exit in codegen */
elf_offset = 80; /* offset of start + exit + syscall in codegen */
}

elf_offset = 80; /* offset of start + exit + syscall in codegen */
GLOBAL_FUNC->bbs->elf_offset = elf_offset;

for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
Expand All @@ -147,9 +153,15 @@ void cfg_flatten(void)
}

/* prepare 'argc' and 'argv', then proceed to 'main' function */
elf_offset += 24;
if (dynlink)
elf_offset += 12;
else
elf_offset += 24;

for (func = FUNC_LIST.head; func; func = func->next) {
if (!func->bbs)
continue;

/* reserve stack */
ph2_ir_t *flatten_ir = add_ph2_ir(OP_define);
flatten_ir->src0 = func->stack_size;
Expand Down Expand Up @@ -286,15 +298,23 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
return;
case OP_call:
func = find_func(ph2_ir->func_name);
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
if (func->bbs)
ofs = func->bbs->elf_offset - elf_code->size;
else
ofs = (elf_plt_start + func->plt_offset) -
(elf_code_start + elf_code->size);
emit(__bl(__AL, ofs));
return;
case OP_load_data_address:
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
return;
case OP_address_of_func:
func = find_func(ph2_ir->func_name);
ofs = elf_code_start + func->bbs->elf_offset;
if (func->bbs)
ofs = elf_code_start + func->bbs->elf_offset;
else
ofs = elf_plt_start + func->plt_offset;
emit(__movw(__AL, __r8, ofs));
emit(__movt(__AL, __r8, ofs));
emit(__sw(__AL, __r8, rn, 0));
Expand Down Expand Up @@ -451,11 +471,40 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
}
}

void plt_generate(void);
void code_generate(void)
{
elf_data_start = elf_code_start + elf_offset;
if (dynlink) {
plt_generate();
/* Call __libc_start_main() */
emit(__mov_i(__AL, __r11, 0));
emit(__mov_i(__AL, __lr, 0));
emit(__pop_word(__AL, __r1));
emit(__mov_r(__AL, __r2, __sp));
emit(__push_reg(__AL, __r2));
emit(__push_reg(__AL, __r0));
emit(__mov_i(__AL, __r12, 0));
emit(__push_reg(__AL, __r12));
emit(__movw(__AL, __r0, elf_code_start + 56));
emit(__movt(__AL, __r0, elf_code_start + 56));
emit(__mov_i(__AL, __r3, 0));
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
(elf_code_start + elf_code->size)));
/* Goto the 'exit' code snippet if __libc_start_main returns */
emit(__mov_i(__AL, __r0, 127));
emit(__bl(__AL, 28));

/* start */
/* If the compiled program is dynamic linking, the starting
* point of 'start' is located here.
*
* Preserve the 'argc' and 'argv' for the 'main' function.
* */
emit(__mov_r(__AL, __r9, __r0));
emit(__mov_r(__AL, __r10, __r1));
}
/* If the compiled program is static linking, the starting point
* of 'start' is here.
* */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__sub_r(__AL, __sp, __sp, __r8));
Expand All @@ -470,32 +519,56 @@ void code_generate(void)
emit(__mov_i(__AL, __r7, 1));
emit(__svc());

/* syscall */
emit(__mov_r(__AL, __r7, __r0));
emit(__mov_r(__AL, __r0, __r1));
emit(__mov_r(__AL, __r1, __r2));
emit(__mov_r(__AL, __r2, __r3));
emit(__mov_r(__AL, __r3, __r4));
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__mov_r(__AL, __pc, __lr));
if (!dynlink) {
/* syscall */
emit(__mov_r(__AL, __r7, __r0));
emit(__mov_r(__AL, __r0, __r1));
emit(__mov_r(__AL, __r1, __r2));
emit(__mov_r(__AL, __r2, __r3));
emit(__mov_r(__AL, __r3, __r4));
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__mov_r(__AL, __pc, __lr));
}

ph2_ir_t *ph2_ir;
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
ph2_ir = ph2_ir->next)
emit_ph2_ir(ph2_ir);

/* prepare 'argc' and 'argv', then proceed to 'main' function */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __r8, __r12, __r8));
emit(__lw(__AL, __r0, __r8, 0));
emit(__add_i(__AL, __r1, __r8, 4));
if (dynlink) {
emit(__mov_r(__AL, __r0, __r9));
emit(__mov_r(__AL, __r1, __r10));
} else {
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __r8, __r12, __r8));
emit(__lw(__AL, __r0, __r8, 0));
emit(__add_i(__AL, __r1, __r8, 4));
}
emit(__b(__AL, MAIN_BB->elf_offset - elf_code->size));

for (int i = 0; i < ph2_ir_idx; i++) {
ph2_ir = PH2_IR_FLATTEN[i];
emit_ph2_ir(ph2_ir);
}
}

void plt_generate(void)
{
int addr_of_got = elf_got_start + PTR_SIZE * 2;
int end = plt_sz - PLT_FIXUP_SIZE;
elf_write_int(elf_plt, __push_reg(__AL, __lr));
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
}
}
10 changes: 10 additions & 0 deletions src/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
}

int __push_reg(arm_cond_t cond, arm_reg rt)
{
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
}

int __pop_word(arm_cond_t cond, arm_reg rt)
{
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
}

int __b(arm_cond_t cond, int ofs)
{
int o = (ofs - 8) >> 2;
Expand Down
38 changes: 38 additions & 0 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,18 @@
#define MAX_SYMTAB 65536
#define MAX_STRTAB 65536
#define MAX_HEADER 1024
#define MAX_PROGRAM_HEADER 1024
#define MAX_SECTION 1024
#define MAX_ALIASES 128
#define MAX_SECTION_HEADER 1024
#define MAX_SHSTR 1024
#define MAX_INTERP 1024
#define MAX_DYNAMIC 1024
#define MAX_DYNSYM 1024
#define MAX_DYNSTR 1024
#define MAX_RELPLT 1024
#define MAX_PLT 1024
#define MAX_GOTPLT 1024
#define MAX_CONSTANTS 1024
#define MAX_CASES 128
#define MAX_NESTING 128
Expand Down Expand Up @@ -515,6 +525,11 @@ struct func {
int bb_cnt;
int visited;

/* Information used for dynamic linking */
bool is_used;
int plt_offset;
int got_offset;

struct func *next;
};

Expand Down Expand Up @@ -577,3 +592,26 @@ typedef struct {
int sh_addralign;
int sh_entsize;
} elf32_shdr_t;

/* Structures for dynamic linked program */
/* For .dynsym section. */
typedef struct {
int st_name;
int st_value;
int st_size;
char st_info;
char st_other;
char st_shndx[2];
} elf32_sym_t;

/* For .rel.plt section */
typedef struct {
int r_offset;
int r_info;
} elf32_rel_t;

/* For .dynamic section */
typedef struct {
int d_tag;
int d_un;
} elf32_dyn_t;
Loading