* [PATCH v4 1/4] LoongArch: Add section of GOT for kernel module
2022-07-29 8:38 [PATCH v4 0/4] LoongArch: Support new relocation types Xi Ruoyao
@ 2022-07-29 8:39 ` Xi Ruoyao
2022-07-29 8:40 ` [PATCH v4 2/4] LoongArch: Support R_LARCH_SOP_PUSH_GPREL relocation type in " Xi Ruoyao
` (4 subsequent siblings)
5 siblings, 0 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 8:39 UTC (permalink / raw)
To: loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Youling Tang, Jinyang He
The address of external symbols will locate more than 32-bit offset. We
were using the `-Wa,-mla-global-with-abs` and `-Wa,-mla-local-with-abs`
to prevent the compiler and assembler from generating GOT relocations,
but these options are undocumented hacks and do not work anymore with
GAS 2.40 and GCC 13.
Let the module loader emit GOT entries for data symbols so we would be
able to handle GOT relocations. The GOT entry is just the data symbol
address.
In module.lds, emit a stub .got section for a section header entry.
The actual content of the entry will be filled at runtime by
module_frob_arch_sections.
A special symbol named "_GLOBAL_OFFSET_TABLE_" is used by stack-based
relocations for the PC-relative offset of a GOT entry, like:
R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_
R_LARCH_SOP_PUSH_GPREL foo
R_LARCH_SOP_ADD
Each kernel module has its own GOT (like a shared object), so we need
to generate _GLOBAL_OFFSET_TABLE_ as a local symbol for each module.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
---
arch/loongarch/include/asm/module.h | 23 +++++++++++++
arch/loongarch/include/asm/module.lds.h | 1 +
arch/loongarch/kernel/module-sections.c | 43 ++++++++++++++++++++++---
3 files changed, 63 insertions(+), 4 deletions(-)
diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index 9f6718df1854..76a98a0ab8a0 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -19,6 +19,7 @@ struct mod_section {
struct mod_arch_specific {
struct mod_section plt;
struct mod_section plt_idx;
+ struct mod_section got;
};
struct plt_entry {
@@ -28,11 +29,16 @@ struct plt_entry {
u32 inst_jirl;
};
+struct got_entry {
+ Elf_Addr symbol_addr;
+};
+
struct plt_idx_entry {
unsigned long symbol_addr;
};
Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val);
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val);
static inline struct plt_entry emit_plt_entry(unsigned long val)
{
@@ -51,6 +57,11 @@ static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val)
return (struct plt_idx_entry) { val };
}
+static inline struct got_entry emit_got_entry(Elf_Addr val)
+{
+ return (struct got_entry) { val };
+}
+
static inline int get_plt_idx(unsigned long val, const struct mod_section *sec)
{
int i;
@@ -77,4 +88,16 @@ static inline struct plt_entry *get_plt_entry(unsigned long val,
return plt + plt_idx;
}
+static inline struct got_entry *get_got_entry(Elf_Addr val,
+ const struct mod_section *sec)
+{
+ struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr;
+ int i;
+
+ for (i = 0; i < sec->num_entries; i++)
+ if (got[i].symbol_addr == val)
+ return &got[i];
+ return NULL;
+}
+
#endif /* _ASM_MODULE_H */
diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h
index 31c1c0db11a3..42b7cca0b947 100644
--- a/arch/loongarch/include/asm/module.lds.h
+++ b/arch/loongarch/include/asm/module.lds.h
@@ -4,4 +4,5 @@ SECTIONS {
. = ALIGN(4);
.plt : { BYTE(0) }
.plt.idx : { BYTE(0) }
+ .got : { HIDDEN(_GLOBAL_OFFSET_TABLE_ = .); BYTE(0) }
}
diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
index 6d498288977d..36a77771d18c 100644
--- a/arch/loongarch/kernel/module-sections.c
+++ b/arch/loongarch/kernel/module-sections.c
@@ -33,6 +33,25 @@ Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val)
return (Elf_Addr)&plt[nr];
}
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val)
+{
+ struct mod_section *got_sec = &mod->arch.got;
+ int i = got_sec->num_entries;
+ struct got_entry *got = get_got_entry(val, got_sec);
+
+ if (got)
+ return (Elf_Addr)got;
+
+ /* There is no GOT entry existing for val yet. Create a new one. */
+ got = (struct got_entry *)got_sec->shdr->sh_addr;
+ got[i] = emit_got_entry(val);
+
+ got_sec->num_entries++;
+ BUG_ON(got_sec->num_entries > got_sec->max_entries);
+
+ return (Elf_Addr)&got[i];
+}
+
static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y)
{
return x->r_info == y->r_info && x->r_addend == y->r_addend;
@@ -50,7 +69,8 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx)
return false;
}
-static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts)
+static void count_max_entries(Elf_Rela *relas, int num,
+ unsigned int *plts, unsigned int *gots)
{
unsigned int i, type;
@@ -59,14 +79,16 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts)
if (type == R_LARCH_SOP_PUSH_PLT_PCREL) {
if (!duplicate_rela(relas, i))
(*plts)++;
- }
+ } else if (type == R_LARCH_SOP_PUSH_GPREL)
+ if (!duplicate_rela(relas, i))
+ (*gots)++;
}
}
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
- unsigned int i, num_plts = 0;
+ unsigned int i, num_plts = 0, num_gots = 0;
/*
* Find the empty .plt sections.
@@ -76,6 +98,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.plt.shdr = sechdrs + i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx"))
mod->arch.plt_idx.shdr = sechdrs + i;
+ else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got"))
+ mod->arch.got.shdr = sechdrs + i;
}
if (!mod->arch.plt.shdr) {
@@ -86,6 +110,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
pr_err("%s: module PLT.IDX section(s) missing\n", mod->name);
return -ENOEXEC;
}
+ if (!mod->arch.got.shdr) {
+ pr_err("%s: module GOT section(s) missing\n", mod->name);
+ return -ENOEXEC;
+ }
/* Calculate the maxinum number of entries */
for (i = 0; i < ehdr->e_shnum; i++) {
@@ -100,7 +128,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
if (!(dst_sec->sh_flags & SHF_EXECINSTR))
continue;
- count_max_entries(relas, num_rela, &num_plts);
+ count_max_entries(relas, num_rela, &num_plts, &num_gots);
}
mod->arch.plt.shdr->sh_type = SHT_NOBITS;
@@ -117,5 +145,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.plt_idx.num_entries = 0;
mod->arch.plt_idx.max_entries = num_plts;
+ mod->arch.got.shdr->sh_type = SHT_NOBITS;
+ mod->arch.got.shdr->sh_flags = SHF_ALLOC;
+ mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry);
+ mod->arch.got.num_entries = 0;
+ mod->arch.got.max_entries = num_gots;
+
return 0;
}
--
2.37.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH v4 2/4] LoongArch: Support R_LARCH_SOP_PUSH_GPREL relocation type in kernel module
2022-07-29 8:38 [PATCH v4 0/4] LoongArch: Support new relocation types Xi Ruoyao
2022-07-29 8:39 ` [PATCH v4 1/4] LoongArch: Add section of GOT for kernel module Xi Ruoyao
@ 2022-07-29 8:40 ` Xi Ruoyao
2022-07-29 8:41 ` [PATCH v4 3/4] LoongArch: Remove -fplt and -Wa,-mla-* from CFLAGS Xi Ruoyao
` (3 subsequent siblings)
5 siblings, 0 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 8:40 UTC (permalink / raw)
To: loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Youling Tang, Jinyang He
This relocation type pushes the offset of the GOT entry for a symbol
from the beginning of GOT into the relocation stack. Our linker script
has initialized an empty GOT, so we need to create a new GOT entry if
there is no exist one for a symbol.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
---
arch/loongarch/kernel/module.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index 638427ff0d51..3ac4fbb5f109 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -122,6 +122,16 @@ static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, E
return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top, type);
}
+static int apply_r_larch_sop_push_gprel(struct module *mod, u32 *location,
+ Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
+ unsigned int type)
+{
+ Elf_Addr got = module_emit_got_entry(mod, v);
+ ptrdiff_t offset = (void *)got - (void *)mod->arch.got.shdr->sh_addr;
+
+ return rela_stack_push(offset, rela_stack, rela_stack_top);
+}
+
static int apply_r_larch_sop(struct module *mod, u32 *location, Elf_Addr v,
s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
{
@@ -306,6 +316,7 @@ static reloc_rela_handler reloc_rela_handlers[] = {
[R_LARCH_SOP_PUSH_PCREL] = apply_r_larch_sop_push_pcrel,
[R_LARCH_SOP_PUSH_ABSOLUTE] = apply_r_larch_sop_push_absolute,
[R_LARCH_SOP_PUSH_DUP] = apply_r_larch_sop_push_dup,
+ [R_LARCH_SOP_PUSH_GPREL] = apply_r_larch_sop_push_gprel,
[R_LARCH_SOP_PUSH_PLT_PCREL] = apply_r_larch_sop_push_plt_pcrel,
[R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop,
[R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
--
2.37.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH v4 3/4] LoongArch: Remove -fplt and -Wa,-mla-* from CFLAGS
2022-07-29 8:38 [PATCH v4 0/4] LoongArch: Support new relocation types Xi Ruoyao
2022-07-29 8:39 ` [PATCH v4 1/4] LoongArch: Add section of GOT for kernel module Xi Ruoyao
2022-07-29 8:40 ` [PATCH v4 2/4] LoongArch: Support R_LARCH_SOP_PUSH_GPREL relocation type in " Xi Ruoyao
@ 2022-07-29 8:41 ` Xi Ruoyao
2022-07-29 8:42 ` [PATCH v4 4/4] LoongArch: Support modules with new relocation types Xi Ruoyao
` (2 subsequent siblings)
5 siblings, 0 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 8:41 UTC (permalink / raw)
To: loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Youling Tang, Jinyang He
-fplt is the default of all supported compilers (GCC, and maybe Clang in
the future), so it needs not to be explicitly specified.
-Wa,-mla-* options were used to prevent the assembler from generating
GOT accesses for "la.local" and "la.global" macros. But GCC >= 13 will
generate GOT access explicitly without use of these macros by default if
the assembler supports explicit specification for relocations. And,
it's not recommended to override the default as the use of assembler
macros may limit optimization.
Now we can handle GOT and GOT-based relocations properly, so we can
remove these options and use GOT access for both GCC 12 and 13
(or newer).
GAS <= 2.39 does not support "la.got [reg], [sym] + [offset]" with a
non-zero offset. So in the assembly code we explicitly use "la.pcrel"
instead of "la" (now defaulted to "la.got") where a PC-relative
addressing is suitable, in order to work around this limitation and keep
the compatibility with old toolchains.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
---
arch/loongarch/Makefile | 4 ----
arch/loongarch/kernel/head.S | 10 +++++-----
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 039dcc4fe1f3..800349ea9310 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -40,10 +40,6 @@ endif
cflags-y += -G0 -pipe -msoft-float
LDFLAGS_vmlinux += -G0 -static -n -nostdlib
-KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel
-KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel
-KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs
-KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
cflags-y += -ffreestanding
cflags-y += $(call cc-option, -mno-check-zero-division)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 74ea7bf6c8d6..193329ed6e8c 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -60,17 +60,17 @@ SYM_CODE_START(kernel_entry) # kernel entry point
la.abs t0, 0f
jirl zero, t0, 0
0:
- la t0, __bss_start # clear .bss
+ la.pcrel t0, __bss_start # clear .bss
st.d zero, t0, 0
- la t1, __bss_stop - LONGSIZE
+ la.pcrel t1, __bss_stop - LONGSIZE
1:
addi.d t0, t0, LONGSIZE
st.d zero, t0, 0
bne t0, t1, 1b
- la t0, fw_arg0
+ la.pcrel t0, fw_arg0
st.d a0, t0, 0 # firmware arguments
- la t0, fw_arg1
+ la.pcrel t0, fw_arg1
st.d a1, t0, 0
/* KSave3 used for percpu base, initialized as 0 */
@@ -78,7 +78,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point
/* GPR21 used for percpu base (runtime), initialized as 0 */
or u0, zero, zero
- la tp, init_thread_union
+ la.pcrel tp, init_thread_union
/* Set the SP after an empty pt_regs. */
PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE)
PTR_ADD sp, sp, tp
--
2.37.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH v4 4/4] LoongArch: Support modules with new relocation types
2022-07-29 8:38 [PATCH v4 0/4] LoongArch: Support new relocation types Xi Ruoyao
` (2 preceding siblings ...)
2022-07-29 8:41 ` [PATCH v4 3/4] LoongArch: Remove -fplt and -Wa,-mla-* from CFLAGS Xi Ruoyao
@ 2022-07-29 8:42 ` Xi Ruoyao
2022-08-01 9:45 ` Youling Tang
2022-08-09 11:31 ` Youling Tang
2022-07-29 9:47 ` [PATCH v4 0/4] LoongArch: Support " WANG Xuerui
2022-07-29 9:49 ` Youling Tang
5 siblings, 2 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 8:42 UTC (permalink / raw)
To: loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Youling Tang, Jinyang He
If GAS 2.40 and/or GCC 13 is used to build the kernel, the modules will
contain R_LARCH_B26, R_LARCH_PCALA_HI20, R_LARCH_PCALA_LO12,
R_LARCH_GOT_PC_HI20, and R_LARCH_GOT_PC_LO12 relocations. Support them
in the module loader to allow a kernel built with latest toolchain
capable to load the modules.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
---
arch/loongarch/include/asm/elf.h | 37 +++++++++++
arch/loongarch/kernel/module-sections.c | 12 +++-
arch/loongarch/kernel/module.c | 85 +++++++++++++++++++++++++
3 files changed, 132 insertions(+), 2 deletions(-)
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index 5f3ff4781fda..7af0cebf28d7 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -74,6 +74,43 @@
#define R_LARCH_SUB64 56
#define R_LARCH_GNU_VTINHERIT 57
#define R_LARCH_GNU_VTENTRY 58
+#define R_LARCH_B16 64
+#define R_LARCH_B21 65
+#define R_LARCH_B26 66
+#define R_LARCH_ABS_HI20 67
+#define R_LARCH_ABS_LO12 68
+#define R_LARCH_ABS64_LO20 69
+#define R_LARCH_ABS64_HI12 70
+#define R_LARCH_PCALA_HI20 71
+#define R_LARCH_PCALA_LO12 72
+#define R_LARCH_PCALA64_LO20 73
+#define R_LARCH_PCALA64_HI12 74
+#define R_LARCH_GOT_PC_HI20 75
+#define R_LARCH_GOT_PC_LO12 76
+#define R_LARCH_GOT64_PC_LO20 77
+#define R_LARCH_GOT64_PC_HI12 78
+#define R_LARCH_GOT_HI20 79
+#define R_LARCH_GOT_LO12 80
+#define R_LARCH_GOT64_LO20 81
+#define R_LARCH_GOT64_HI12 82
+#define R_LARCH_TLS_LE_HI20 83
+#define R_LARCH_TLS_LE_LO12 84
+#define R_LARCH_TLS_LE64_LO20 85
+#define R_LARCH_TLS_LE64_HI12 86
+#define R_LARCH_TLS_IE_PC_HI20 87
+#define R_LARCH_TLS_IE_PC_LO12 88
+#define R_LARCH_TLS_IE64_PC_LO20 89
+#define R_LARCH_TLS_IE64_PC_HI12 90
+#define R_LARCH_TLS_IE_HI20 91
+#define R_LARCH_TLS_IE_LO12 92
+#define R_LARCH_TLS_IE64_LO20 93
+#define R_LARCH_TLS_IE64_HI12 94
+#define R_LARCH_TLS_LD_PC_HI20 95
+#define R_LARCH_TLS_LD_HI20 96
+#define R_LARCH_TLS_GD_PC_HI20 97
+#define R_LARCH_TLS_GD_HI20 98
+#define R_LARCH_32_PCREL 99
+#define R_LARCH_RELAX 100
#ifndef ELF_ARCH
diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
index 36a77771d18c..8c0e4ad048cc 100644
--- a/arch/loongarch/kernel/module-sections.c
+++ b/arch/loongarch/kernel/module-sections.c
@@ -76,12 +76,20 @@ static void count_max_entries(Elf_Rela *relas, int num,
for (i = 0; i < num; i++) {
type = ELF_R_TYPE(relas[i].r_info);
- if (type == R_LARCH_SOP_PUSH_PLT_PCREL) {
+ switch (type) {
+ case R_LARCH_SOP_PUSH_PLT_PCREL:
+ case R_LARCH_B26:
if (!duplicate_rela(relas, i))
(*plts)++;
- } else if (type == R_LARCH_SOP_PUSH_GPREL)
+ break;
+ case R_LARCH_SOP_PUSH_GPREL:
+ case R_LARCH_GOT_PC_HI20:
if (!duplicate_rela(relas, i))
(*gots)++;
+ break;
+ default:
+ /* Do nothing. */
+ }
}
}
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index 3ac4fbb5f109..c7b40150e1f0 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -291,6 +291,86 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v,
}
}
+static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v,
+ s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+ ptrdiff_t offset = (void *)v - (void *)location;
+ union loongarch_instruction *insn = (union loongarch_instruction *)location;
+
+ if (offset >= SZ_128M)
+ v = module_emit_plt_entry(mod, v);
+
+ if (offset < -SZ_128M)
+ v = module_emit_plt_entry(mod, v);
+
+ offset = (void *)v - (void *)location;
+
+ if (offset & 3) {
+ pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n",
+ mod->name, (long long)offset, type);
+ return -ENOEXEC;
+ }
+
+ if (!signed_imm_check(offset, 28)) {
+ pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n",
+ mod->name, (long long)offset, type);
+ return -ENOEXEC;
+ }
+
+ offset >>= 2;
+ insn->reg0i26_format.immediate_l = offset & 0xffff;
+ insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff;
+ return 0;
+}
+
+static int apply_r_larch_pcala_hi20(struct module *mod, u32 *location,
+ Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
+ unsigned int type)
+{
+ ptrdiff_t offset = (void *)((v + 0x800) & ~0xfff) -
+ (void *)((Elf_Addr)location & ~0xfff);
+ union loongarch_instruction *insn = (union loongarch_instruction *)location;
+
+ if (!signed_imm_check(offset, 32)) {
+ pr_err("module %s: PCALA offset = 0x%llx does not fit in 32-bit signed and is unsupported by kernel! dangerous %s (%u) relocation\n",
+ mod->name, (long long)offset, __func__, type);
+ return -ENOEXEC;
+ }
+
+ insn->reg1i20_format.immediate = (offset >> 12) & 0xfffff;
+ return 0;
+}
+
+static int apply_r_larch_got_pc_hi20(struct module *mod, u32 *location,
+ Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
+ unsigned int type)
+{
+ Elf_Addr got = module_emit_got_entry(mod, v);
+
+ return apply_r_larch_pcala_hi20(mod, location, got, rela_stack,
+ rela_stack_top, type);
+}
+
+static int apply_r_larch_pcala_lo12(struct module *mod, u32 *location,
+ Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
+ unsigned int type)
+{
+ union loongarch_instruction *insn = (union loongarch_instruction *)location;
+
+ insn->reg2i12_format.immediate = v & 0xfff;
+ return 0;
+}
+
+static int apply_r_larch_got_pc_lo12(struct module *mod, u32 *location,
+ Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
+ unsigned int type)
+{
+ Elf_Addr got = module_emit_got_entry(mod, v);
+
+ return apply_r_larch_pcala_lo12(mod, location, got, rela_stack,
+ rela_stack_top, type);
+}
+
/*
* reloc_handlers_rela() - Apply a particular relocation to a module
* @mod: the module to apply the reloc to
@@ -321,6 +401,11 @@ static reloc_rela_handler reloc_rela_handlers[] = {
[R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop,
[R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
[R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub,
+ [R_LARCH_B26] = apply_r_larch_b26,
+ [R_LARCH_PCALA_HI20] = apply_r_larch_pcala_hi20,
+ [R_LARCH_PCALA_LO12] = apply_r_larch_pcala_lo12,
+ [R_LARCH_GOT_PC_HI20] = apply_r_larch_got_pc_hi20,
+ [R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc_lo12,
};
int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
--
2.37.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH v4 4/4] LoongArch: Support modules with new relocation types
2022-07-29 8:42 ` [PATCH v4 4/4] LoongArch: Support modules with new relocation types Xi Ruoyao
@ 2022-08-01 9:45 ` Youling Tang
2022-08-09 11:31 ` Youling Tang
1 sibling, 0 replies; 33+ messages in thread
From: Youling Tang @ 2022-08-01 9:45 UTC (permalink / raw)
To: Xi Ruoyao; +Cc: loongarch, linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He
Hi, Ruoyao
On 07/29/2022 04:42 PM, Xi Ruoyao wrote:
> If GAS 2.40 and/or GCC 13 is used to build the kernel, the modules will
> contain R_LARCH_B26, R_LARCH_PCALA_HI20, R_LARCH_PCALA_LO12,
> R_LARCH_GOT_PC_HI20, and R_LARCH_GOT_PC_LO12 relocations. Support them
> in the module loader to allow a kernel built with latest toolchain
> capable to load the modules.
>
> Signed-off-by: Xi Ruoyao <xry111@xry111.site>
> ---
> arch/loongarch/include/asm/elf.h | 37 +++++++++++
> arch/loongarch/kernel/module-sections.c | 12 +++-
> arch/loongarch/kernel/module.c | 85 +++++++++++++++++++++++++
> 3 files changed, 132 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
> index 5f3ff4781fda..7af0cebf28d7 100644
> --- a/arch/loongarch/include/asm/elf.h
> +++ b/arch/loongarch/include/asm/elf.h
> @@ -74,6 +74,43 @@
> #define R_LARCH_SUB64 56
> #define R_LARCH_GNU_VTINHERIT 57
> #define R_LARCH_GNU_VTENTRY 58
> +#define R_LARCH_B16 64
> +#define R_LARCH_B21 65
> +#define R_LARCH_B26 66
> +#define R_LARCH_ABS_HI20 67
> +#define R_LARCH_ABS_LO12 68
> +#define R_LARCH_ABS64_LO20 69
> +#define R_LARCH_ABS64_HI12 70
> +#define R_LARCH_PCALA_HI20 71
> +#define R_LARCH_PCALA_LO12 72
> +#define R_LARCH_PCALA64_LO20 73
> +#define R_LARCH_PCALA64_HI12 74
> +#define R_LARCH_GOT_PC_HI20 75
> +#define R_LARCH_GOT_PC_LO12 76
> +#define R_LARCH_GOT64_PC_LO20 77
> +#define R_LARCH_GOT64_PC_HI12 78
> +#define R_LARCH_GOT_HI20 79
> +#define R_LARCH_GOT_LO12 80
> +#define R_LARCH_GOT64_LO20 81
> +#define R_LARCH_GOT64_HI12 82
> +#define R_LARCH_TLS_LE_HI20 83
> +#define R_LARCH_TLS_LE_LO12 84
> +#define R_LARCH_TLS_LE64_LO20 85
> +#define R_LARCH_TLS_LE64_HI12 86
> +#define R_LARCH_TLS_IE_PC_HI20 87
> +#define R_LARCH_TLS_IE_PC_LO12 88
> +#define R_LARCH_TLS_IE64_PC_LO20 89
> +#define R_LARCH_TLS_IE64_PC_HI12 90
> +#define R_LARCH_TLS_IE_HI20 91
> +#define R_LARCH_TLS_IE_LO12 92
> +#define R_LARCH_TLS_IE64_LO20 93
> +#define R_LARCH_TLS_IE64_HI12 94
> +#define R_LARCH_TLS_LD_PC_HI20 95
> +#define R_LARCH_TLS_LD_HI20 96
> +#define R_LARCH_TLS_GD_PC_HI20 97
> +#define R_LARCH_TLS_GD_HI20 98
> +#define R_LARCH_32_PCREL 99
> +#define R_LARCH_RELAX 100
>
> #ifndef ELF_ARCH
>
> diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
> index 36a77771d18c..8c0e4ad048cc 100644
> --- a/arch/loongarch/kernel/module-sections.c
> +++ b/arch/loongarch/kernel/module-sections.c
> @@ -76,12 +76,20 @@ static void count_max_entries(Elf_Rela *relas, int num,
>
> for (i = 0; i < num; i++) {
> type = ELF_R_TYPE(relas[i].r_info);
> - if (type == R_LARCH_SOP_PUSH_PLT_PCREL) {
> + switch (type) {
> + case R_LARCH_SOP_PUSH_PLT_PCREL:
> + case R_LARCH_B26:
> if (!duplicate_rela(relas, i))
> (*plts)++;
> - } else if (type == R_LARCH_SOP_PUSH_GPREL)
> + break;
> + case R_LARCH_SOP_PUSH_GPREL:
> + case R_LARCH_GOT_PC_HI20:
> if (!duplicate_rela(relas, i))
> (*gots)++;
> + break;
> + default:
> + /* Do nothing. */
> + }
> }
> }
>
> diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
> index 3ac4fbb5f109..c7b40150e1f0 100644
> --- a/arch/loongarch/kernel/module.c
> +++ b/arch/loongarch/kernel/module.c
> @@ -291,6 +291,86 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v,
> }
> }
>
> +static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v,
> + s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
> +{
> + ptrdiff_t offset = (void *)v - (void *)location;
> + union loongarch_instruction *insn = (union loongarch_instruction *)location;
> +
> + if (offset >= SZ_128M)
> + v = module_emit_plt_entry(mod, v);
> +
> + if (offset < -SZ_128M)
> + v = module_emit_plt_entry(mod, v);
> +
> + offset = (void *)v - (void *)location;
> +
> + if (offset & 3) {
> + pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n",
> + mod->name, (long long)offset, type);
> + return -ENOEXEC;
> + }
> +
> + if (!signed_imm_check(offset, 28)) {
> + pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n",
> + mod->name, (long long)offset, type);
> + return -ENOEXEC;
> + }
> +
> + offset >>= 2;
> + insn->reg0i26_format.immediate_l = offset & 0xffff;
> + insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff;
> + return 0;
> +}
> +
> +static int apply_r_larch_pcala_hi20(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + ptrdiff_t offset = (void *)((v + 0x800) & ~0xfff) -
> + (void *)((Elf_Addr)location & ~0xfff);
> + union loongarch_instruction *insn = (union loongarch_instruction *)location;
> +
> + if (!signed_imm_check(offset, 32)) {
> + pr_err("module %s: PCALA offset = 0x%llx does not fit in 32-bit signed and is unsupported by kernel! dangerous %s (%u) relocation\n",
> + mod->name, (long long)offset, __func__, type);
> + return -ENOEXEC;
> + }
Do we need to add a "location" message when the relocation fails? To
get the PC for better debugging.
> +
> + insn->reg1i20_format.immediate = (offset >> 12) & 0xfffff;
> + return 0;
> +}
> +
> +static int apply_r_larch_got_pc_hi20(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + Elf_Addr got = module_emit_got_entry(mod, v);
> +
> + return apply_r_larch_pcala_hi20(mod, location, got, rela_stack,
> + rela_stack_top, type);
> +}
> +
> +static int apply_r_larch_pcala_lo12(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + union loongarch_instruction *insn = (union loongarch_instruction *)location;
> +
> + insn->reg2i12_format.immediate = v & 0xfff;
> + return 0;
> +}
> +
> +static int apply_r_larch_got_pc_lo12(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + Elf_Addr got = module_emit_got_entry(mod, v);
> +
> + return apply_r_larch_pcala_lo12(mod, location, got, rela_stack,
> + rela_stack_top, type);
> +}
Maybe it might look better to keep the apply_r_larch_* functions
ordered by relocation type ID?
Order:
apply_r_larch_pcala_hi20
apply_r_larch_pcala_lo12
apply_r_larch_got_pc_hi20
apply_r_larch_got_pc_lo12
Thanks,
Youling
> +
> /*
> * reloc_handlers_rela() - Apply a particular relocation to a module
> * @mod: the module to apply the reloc to
> @@ -321,6 +401,11 @@ static reloc_rela_handler reloc_rela_handlers[] = {
> [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop,
> [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
> [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub,
> + [R_LARCH_B26] = apply_r_larch_b26,
> + [R_LARCH_PCALA_HI20] = apply_r_larch_pcala_hi20,
> + [R_LARCH_PCALA_LO12] = apply_r_larch_pcala_lo12,
> + [R_LARCH_GOT_PC_HI20] = apply_r_larch_got_pc_hi20,
> + [R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc_lo12,
> };
>
> int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 4/4] LoongArch: Support modules with new relocation types
2022-07-29 8:42 ` [PATCH v4 4/4] LoongArch: Support modules with new relocation types Xi Ruoyao
2022-08-01 9:45 ` Youling Tang
@ 2022-08-09 11:31 ` Youling Tang
1 sibling, 0 replies; 33+ messages in thread
From: Youling Tang @ 2022-08-09 11:31 UTC (permalink / raw)
To: Xi Ruoyao, loongarch; +Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He
Hi, Ruoyao
On 07/29/2022 04:42 PM, Xi Ruoyao wrote:
> If GAS 2.40 and/or GCC 13 is used to build the kernel, the modules will
> contain R_LARCH_B26, R_LARCH_PCALA_HI20, R_LARCH_PCALA_LO12,
> R_LARCH_GOT_PC_HI20, and R_LARCH_GOT_PC_LO12 relocations. Support them
> in the module loader to allow a kernel built with latest toolchain
> capable to load the modules.
>
> Signed-off-by: Xi Ruoyao <xry111@xry111.site>
> ---
> arch/loongarch/include/asm/elf.h | 37 +++++++++++
> arch/loongarch/kernel/module-sections.c | 12 +++-
> arch/loongarch/kernel/module.c | 85 +++++++++++++++++++++++++
> 3 files changed, 132 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
> index 5f3ff4781fda..7af0cebf28d7 100644
> --- a/arch/loongarch/include/asm/elf.h
> +++ b/arch/loongarch/include/asm/elf.h
> @@ -74,6 +74,43 @@
> #define R_LARCH_SUB64 56
> #define R_LARCH_GNU_VTINHERIT 57
> #define R_LARCH_GNU_VTENTRY 58
> +#define R_LARCH_B16 64
> +#define R_LARCH_B21 65
> +#define R_LARCH_B26 66
> +#define R_LARCH_ABS_HI20 67
> +#define R_LARCH_ABS_LO12 68
> +#define R_LARCH_ABS64_LO20 69
> +#define R_LARCH_ABS64_HI12 70
ARCH_REL_TYPE_ABS should add the corresponding absolute relocation type
with the new relocation type added. Maybe we should add R_LARCH_ABS* in
arch/loongarch/vdso/Makefile?
Thanks,
Youling
> +#define R_LARCH_PCALA_HI20 71
> +#define R_LARCH_PCALA_LO12 72
> +#define R_LARCH_PCALA64_LO20 73
> +#define R_LARCH_PCALA64_HI12 74
> +#define R_LARCH_GOT_PC_HI20 75
> +#define R_LARCH_GOT_PC_LO12 76
> +#define R_LARCH_GOT64_PC_LO20 77
> +#define R_LARCH_GOT64_PC_HI12 78
> +#define R_LARCH_GOT_HI20 79
> +#define R_LARCH_GOT_LO12 80
> +#define R_LARCH_GOT64_LO20 81
> +#define R_LARCH_GOT64_HI12 82
> +#define R_LARCH_TLS_LE_HI20 83
> +#define R_LARCH_TLS_LE_LO12 84
> +#define R_LARCH_TLS_LE64_LO20 85
> +#define R_LARCH_TLS_LE64_HI12 86
> +#define R_LARCH_TLS_IE_PC_HI20 87
> +#define R_LARCH_TLS_IE_PC_LO12 88
> +#define R_LARCH_TLS_IE64_PC_LO20 89
> +#define R_LARCH_TLS_IE64_PC_HI12 90
> +#define R_LARCH_TLS_IE_HI20 91
> +#define R_LARCH_TLS_IE_LO12 92
> +#define R_LARCH_TLS_IE64_LO20 93
> +#define R_LARCH_TLS_IE64_HI12 94
> +#define R_LARCH_TLS_LD_PC_HI20 95
> +#define R_LARCH_TLS_LD_HI20 96
> +#define R_LARCH_TLS_GD_PC_HI20 97
> +#define R_LARCH_TLS_GD_HI20 98
> +#define R_LARCH_32_PCREL 99
> +#define R_LARCH_RELAX 100
>
> #ifndef ELF_ARCH
>
> diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
> index 36a77771d18c..8c0e4ad048cc 100644
> --- a/arch/loongarch/kernel/module-sections.c
> +++ b/arch/loongarch/kernel/module-sections.c
> @@ -76,12 +76,20 @@ static void count_max_entries(Elf_Rela *relas, int num,
>
> for (i = 0; i < num; i++) {
> type = ELF_R_TYPE(relas[i].r_info);
> - if (type == R_LARCH_SOP_PUSH_PLT_PCREL) {
> + switch (type) {
> + case R_LARCH_SOP_PUSH_PLT_PCREL:
> + case R_LARCH_B26:
> if (!duplicate_rela(relas, i))
> (*plts)++;
> - } else if (type == R_LARCH_SOP_PUSH_GPREL)
> + break;
> + case R_LARCH_SOP_PUSH_GPREL:
> + case R_LARCH_GOT_PC_HI20:
> if (!duplicate_rela(relas, i))
> (*gots)++;
> + break;
> + default:
> + /* Do nothing. */
> + }
> }
> }
>
> diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
> index 3ac4fbb5f109..c7b40150e1f0 100644
> --- a/arch/loongarch/kernel/module.c
> +++ b/arch/loongarch/kernel/module.c
> @@ -291,6 +291,86 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v,
> }
> }
>
> +static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v,
> + s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
> +{
> + ptrdiff_t offset = (void *)v - (void *)location;
> + union loongarch_instruction *insn = (union loongarch_instruction *)location;
> +
> + if (offset >= SZ_128M)
> + v = module_emit_plt_entry(mod, v);
> +
> + if (offset < -SZ_128M)
> + v = module_emit_plt_entry(mod, v);
> +
> + offset = (void *)v - (void *)location;
> +
> + if (offset & 3) {
> + pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n",
> + mod->name, (long long)offset, type);
> + return -ENOEXEC;
> + }
> +
> + if (!signed_imm_check(offset, 28)) {
> + pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n",
> + mod->name, (long long)offset, type);
> + return -ENOEXEC;
> + }
> +
> + offset >>= 2;
> + insn->reg0i26_format.immediate_l = offset & 0xffff;
> + insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff;
> + return 0;
> +}
> +
> +static int apply_r_larch_pcala_hi20(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + ptrdiff_t offset = (void *)((v + 0x800) & ~0xfff) -
> + (void *)((Elf_Addr)location & ~0xfff);
> + union loongarch_instruction *insn = (union loongarch_instruction *)location;
> +
> + if (!signed_imm_check(offset, 32)) {
> + pr_err("module %s: PCALA offset = 0x%llx does not fit in 32-bit signed and is unsupported by kernel! dangerous %s (%u) relocation\n",
> + mod->name, (long long)offset, __func__, type);
> + return -ENOEXEC;
> + }
> +
> + insn->reg1i20_format.immediate = (offset >> 12) & 0xfffff;
> + return 0;
> +}
> +
> +static int apply_r_larch_got_pc_hi20(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + Elf_Addr got = module_emit_got_entry(mod, v);
> +
> + return apply_r_larch_pcala_hi20(mod, location, got, rela_stack,
> + rela_stack_top, type);
> +}
> +
> +static int apply_r_larch_pcala_lo12(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + union loongarch_instruction *insn = (union loongarch_instruction *)location;
> +
> + insn->reg2i12_format.immediate = v & 0xfff;
> + return 0;
> +}
> +
> +static int apply_r_larch_got_pc_lo12(struct module *mod, u32 *location,
> + Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top,
> + unsigned int type)
> +{
> + Elf_Addr got = module_emit_got_entry(mod, v);
> +
> + return apply_r_larch_pcala_lo12(mod, location, got, rela_stack,
> + rela_stack_top, type);
> +}
> +
> /*
> * reloc_handlers_rela() - Apply a particular relocation to a module
> * @mod: the module to apply the reloc to
> @@ -321,6 +401,11 @@ static reloc_rela_handler reloc_rela_handlers[] = {
> [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop,
> [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
> [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub,
> + [R_LARCH_B26] = apply_r_larch_b26,
> + [R_LARCH_PCALA_HI20] = apply_r_larch_pcala_hi20,
> + [R_LARCH_PCALA_LO12] = apply_r_larch_pcala_lo12,
> + [R_LARCH_GOT_PC_HI20] = apply_r_larch_got_pc_hi20,
> + [R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc_lo12,
> };
>
> int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 8:38 [PATCH v4 0/4] LoongArch: Support new relocation types Xi Ruoyao
` (3 preceding siblings ...)
2022-07-29 8:42 ` [PATCH v4 4/4] LoongArch: Support modules with new relocation types Xi Ruoyao
@ 2022-07-29 9:47 ` WANG Xuerui
2022-07-29 9:49 ` Youling Tang
5 siblings, 0 replies; 33+ messages in thread
From: WANG Xuerui @ 2022-07-29 9:47 UTC (permalink / raw)
To: Xi Ruoyao, loongarch; +Cc: linux-kernel, Huacai Chen, Youling Tang, Jinyang He
On 2022/7/29 16:38, Xi Ruoyao wrote:
> The version 2.00 of LoongArch ELF ABI specification introduced new
> relocation types, and the development tree of Binutils and GCC has
> started to use them. If the kernel is built with the latest snapshot of
> Binutils or GCC, it will fail to load the modules because of unrecognized
> relocation types in modules.
>
> Add support for GOT and new relocation types for the module loader, so
> the kernel (with modules) can be built with the "normal" code model and
> function properly.
>
> This series does not break the compatibility with old toolchain using
> stack-based relocation types, so with the patches applied the kernel can
> be be built with both old and new toolchains.
>
> Tested by building the kernel with both Binutils & GCC master branch and
> my system Binutils & GCC (without new relocation type support), running
> both the builds with 35 in-tree modules loaded, and loading one module
> with 20 GOT loads (loaded addresses verified by comparing with
> /proc/kallsyms).
>
> Changes from v3 to v4:
>
> - No code change. Reword the commit message of the 3rd patch again
> based on suggestion from Huacai.
>
> Changes from v2 to v3:
>
> - Use `union loongarch_instruction` instead of explicit bit shifts
> applying the relocation. Suggested by Youling.
> - For R_LARCH_B26, move the alignment check before the range check to be
> consistent with stack pop relocations. Suggested by Youling.
> - Reword the commit message of the 3rd patch. Suggested by Huacai.
>
> Changes from v1 to v2:
>
> - Fix a stupid programming error (confusion between the number of PLT
> entries and the number of GOT entries). (Bug spotted by Youling).
> - Synthesize the _GLOBAL_OFFSET_TABLE_ symbol with module.lds, instead
> of faking it at runtime. The 3rd patch from V1 is now merged into
> the 1st patch because it would be a one-line change. (Suggested by
> Jinyang).
> - Keep reloc_rela_handlers[] ordered by the relocation type ID.
> (Suggested by Youling).
> - Remove -fplt along with -Wa,-mla-* options because it's the default.
> (Suggested by Youling).
>
> Xi Ruoyao (4):
> LoongArch: Add section of GOT for kernel module
> LoongArch: Support R_LARCH_SOP_PUSH_GPREL relocation type in kernel
> module
> LoongArch: Remove -fplt and -Wa,-mla-* from CFLAGS
> LoongArch: Support modules with new relocation types
>
> arch/loongarch/Makefile | 4 --
> arch/loongarch/include/asm/elf.h | 37 ++++++++++
> arch/loongarch/include/asm/module.h | 23 ++++++
> arch/loongarch/include/asm/module.lds.h | 1 +
> arch/loongarch/kernel/head.S | 10 +--
> arch/loongarch/kernel/module-sections.c | 51 +++++++++++--
> arch/loongarch/kernel/module.c | 96 +++++++++++++++++++++++++
> 7 files changed, 209 insertions(+), 13 deletions(-)
>
Thanks very much for the timely adaptation. I'm rebuilding my Gentoo
toolchain from upstream HEAD, will test this weekend.
--
WANG "xen0n" Xuerui
Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 8:38 [PATCH v4 0/4] LoongArch: Support new relocation types Xi Ruoyao
` (4 preceding siblings ...)
2022-07-29 9:47 ` [PATCH v4 0/4] LoongArch: Support " WANG Xuerui
@ 2022-07-29 9:49 ` Youling Tang
2022-07-29 10:18 ` Xi Ruoyao
5 siblings, 1 reply; 33+ messages in thread
From: Youling Tang @ 2022-07-29 9:49 UTC (permalink / raw)
To: Xi Ruoyao, loongarch; +Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He
Hi, Ruoyao
Tested this series of patches v3 on a CLFS 5.5 system, using the new
cross toolchain,
$ dmesg | head
[ 0.000000] Linux version 5.19.0-rc7new-toolchain+ (loongson@linux)
(loongarch64-unknown-linux-gnu-gcc (GCC) 13.0.0 20220726 (experimental)
[master revision
cf7eac5805e:1e0611b64d8:3fb68f2e666d9de7e0326af9f43b12c9e98f19a6], GNU
ld (GNU Binutils) 2.39.50.20220726) #1 SMP PREEMPT Fri Jul 29 05:24:15
EDT 2022
Relocation error when manually loading nf_tables.ko module,
$ sudo modprobe nf_tables
odprobe: ERROR: could not insert 'nf_tables': Exec format error
$ dmesg
[ 61.506737] kmod: module nf_tables: PCALA offset = 0x90007ffffed8c000
does not fit in 32-bit signed and is unsupported by kernel! dangerous
apply_r_larch_pcala_hi20 (71) relocation
Do you have the same problem over there?
Thanks,
Youling
On 07/29/2022 04:38 PM, Xi Ruoyao wrote:
> The version 2.00 of LoongArch ELF ABI specification introduced new
> relocation types, and the development tree of Binutils and GCC has
> started to use them. If the kernel is built with the latest snapshot of
> Binutils or GCC, it will fail to load the modules because of unrecognized
> relocation types in modules.
>
> Add support for GOT and new relocation types for the module loader, so
> the kernel (with modules) can be built with the "normal" code model and
> function properly.
>
> This series does not break the compatibility with old toolchain using
> stack-based relocation types, so with the patches applied the kernel can
> be be built with both old and new toolchains.
>
> Tested by building the kernel with both Binutils & GCC master branch and
> my system Binutils & GCC (without new relocation type support), running
> both the builds with 35 in-tree modules loaded, and loading one module
> with 20 GOT loads (loaded addresses verified by comparing with
> /proc/kallsyms).
>
> Changes from v3 to v4:
>
> - No code change. Reword the commit message of the 3rd patch again
> based on suggestion from Huacai.
>
> Changes from v2 to v3:
>
> - Use `union loongarch_instruction` instead of explicit bit shifts
> applying the relocation. Suggested by Youling.
> - For R_LARCH_B26, move the alignment check before the range check to be
> consistent with stack pop relocations. Suggested by Youling.
> - Reword the commit message of the 3rd patch. Suggested by Huacai.
>
> Changes from v1 to v2:
>
> - Fix a stupid programming error (confusion between the number of PLT
> entries and the number of GOT entries). (Bug spotted by Youling).
> - Synthesize the _GLOBAL_OFFSET_TABLE_ symbol with module.lds, instead
> of faking it at runtime. The 3rd patch from V1 is now merged into
> the 1st patch because it would be a one-line change. (Suggested by
> Jinyang).
> - Keep reloc_rela_handlers[] ordered by the relocation type ID.
> (Suggested by Youling).
> - Remove -fplt along with -Wa,-mla-* options because it's the default.
> (Suggested by Youling).
>
> Xi Ruoyao (4):
> LoongArch: Add section of GOT for kernel module
> LoongArch: Support R_LARCH_SOP_PUSH_GPREL relocation type in kernel
> module
> LoongArch: Remove -fplt and -Wa,-mla-* from CFLAGS
> LoongArch: Support modules with new relocation types
>
> arch/loongarch/Makefile | 4 --
> arch/loongarch/include/asm/elf.h | 37 ++++++++++
> arch/loongarch/include/asm/module.h | 23 ++++++
> arch/loongarch/include/asm/module.lds.h | 1 +
> arch/loongarch/kernel/head.S | 10 +--
> arch/loongarch/kernel/module-sections.c | 51 +++++++++++--
> arch/loongarch/kernel/module.c | 96 +++++++++++++++++++++++++
> 7 files changed, 209 insertions(+), 13 deletions(-)
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 9:49 ` Youling Tang
@ 2022-07-29 10:18 ` Xi Ruoyao
2022-07-29 10:36 ` Xi Ruoyao
0 siblings, 1 reply; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 10:18 UTC (permalink / raw)
To: Youling Tang, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He
On Fri, 2022-07-29 at 17:49 +0800, Youling Tang wrote:
> Hi, Ruoyao
>
> Tested this series of patches v3 on a CLFS 5.5 system, using the new
> cross toolchain,
> $ dmesg | head
> [ 0.000000] Linux version 5.19.0-rc7new-toolchain+ (loongson@linux)
> (loongarch64-unknown-linux-gnu-gcc (GCC) 13.0.0 20220726 (experimental)
> [master revision
> cf7eac5805e:1e0611b64d8:3fb68f2e666d9de7e0326af9f43b12c9e98f19a6], GNU
> ld (GNU Binutils) 2.39.50.20220726) #1 SMP PREEMPT Fri Jul 29 05:24:15
> EDT 2022
>
> Relocation error when manually loading nf_tables.ko module,
> $ sudo modprobe nf_tables
> odprobe: ERROR: could not insert 'nf_tables': Exec format error
>
> $ dmesg
> [ 61.506737] kmod: module nf_tables: PCALA offset = 0x90007ffffed8c000
> does not fit in 32-bit signed and is unsupported by kernel! dangerous
> apply_r_larch_pcala_hi20 (71) relocation
>
> Do you have the same problem over there?
I can reproduce it with "modprobe x_tables". Will try to debug...
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 10:18 ` Xi Ruoyao
@ 2022-07-29 10:36 ` Xi Ruoyao
2022-07-29 11:45 ` Xi Ruoyao
0 siblings, 1 reply; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 10:36 UTC (permalink / raw)
To: Youling Tang, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He
On Fri, 2022-07-29 at 18:18 +0800, Xi Ruoyao wrote:
> On Fri, 2022-07-29 at 17:49 +0800, Youling Tang wrote:
> > Hi, Ruoyao
> >
> > Tested this series of patches v3 on a CLFS 5.5 system, using the new
> > cross toolchain,
> > $ dmesg | head
> > [ 0.000000] Linux version 5.19.0-rc7new-toolchain+ (loongson@linux)
> > (loongarch64-unknown-linux-gnu-gcc (GCC) 13.0.0 20220726 (experimental)
> > [master revision
> > cf7eac5805e:1e0611b64d8:3fb68f2e666d9de7e0326af9f43b12c9e98f19a6], GNU
> > ld (GNU Binutils) 2.39.50.20220726) #1 SMP PREEMPT Fri Jul 29 05:24:15
> > EDT 2022
> >
> > Relocation error when manually loading nf_tables.ko module,
> > $ sudo modprobe nf_tables
> > odprobe: ERROR: could not insert 'nf_tables': Exec format error
> >
> > $ dmesg
> > [ 61.506737] kmod: module nf_tables: PCALA offset = 0x90007ffffed8c000
> > does not fit in 32-bit signed and is unsupported by kernel! dangerous
> > apply_r_larch_pcala_hi20 (71) relocation
> >
> > Do you have the same problem over there?
>
> I can reproduce it with "modprobe x_tables". Will try to debug...
The relocation against local percpu variable is broken up. I'll try to
fix it.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 10:36 ` Xi Ruoyao
@ 2022-07-29 11:45 ` Xi Ruoyao
2022-07-29 12:19 ` Youling Tang
0 siblings, 1 reply; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 11:45 UTC (permalink / raw)
To: Youling Tang, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He, Lulu Cheng
On Fri, 2022-07-29 at 18:36 +0800, Xi Ruoyao wrote:
> The relocation against local percpu variable is broken up. I'll try
> to fix it.
Hmm... The problem is the "addresses" of per-cpu symbols are faked: they
are actually offsets from $r21. So we can't just load such an offset
with PCALA addressing.
It looks like we'll need to introduce an attribute for GCC to make an
variable "must be addressed via GOT", and add the attribute into
PER_CPU_ATTRIBUTES.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 11:45 ` Xi Ruoyao
@ 2022-07-29 12:19 ` Youling Tang
2022-07-29 17:55 ` Xi Ruoyao
0 siblings, 1 reply; 33+ messages in thread
From: Youling Tang @ 2022-07-29 12:19 UTC (permalink / raw)
To: Xi Ruoyao, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He, Lulu Cheng
On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> On Fri, 2022-07-29 at 18:36 +0800, Xi Ruoyao wrote:
>
>> The relocation against local percpu variable is broken up. I'll try
>> to fix it.
>
> Hmm... The problem is the "addresses" of per-cpu symbols are faked: they
> are actually offsets from $r21. So we can't just load such an offset
> with PCALA addressing.
>
> It looks like we'll need to introduce an attribute for GCC to make an
> variable "must be addressed via GOT", and add the attribute into
> PER_CPU_ATTRIBUTES.
Yes, we need a GCC attribute to specify the per-cpu variable.
Thanks,
Youling
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 12:19 ` Youling Tang
@ 2022-07-29 17:55 ` Xi Ruoyao
2022-07-30 2:24 ` Xi Ruoyao
0 siblings, 1 reply; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-29 17:55 UTC (permalink / raw)
To: Youling Tang, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He, Lulu Cheng
On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
> On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> > Hmm... The problem is the "addresses" of per-cpu symbols are faked: they
> > are actually offsets from $r21. So we can't just load such an offset
> > with PCALA addressing.
> >
> > It looks like we'll need to introduce an attribute for GCC to make an
> > variable "must be addressed via GOT", and add the attribute into
> > PER_CPU_ATTRIBUTES.
> Yes, we need a GCC attribute to specify the per-cpu variable.
GCC patch adding "addr_global" attribute for LoongArch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
An experiment to use it:
https://github.com/xry111/linux/commit/c1d5d70
This fixes "modprobe x_tables" for me.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-29 17:55 ` Xi Ruoyao
@ 2022-07-30 2:24 ` Xi Ruoyao
2022-07-30 2:52 ` Xi Ruoyao
0 siblings, 1 reply; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-30 2:24 UTC (permalink / raw)
To: Youling Tang, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He, Lulu Cheng
On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
> On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
>
> > On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> > > Hmm... The problem is the "addresses" of per-cpu symbols are
> > > faked: they
> > > are actually offsets from $r21. So we can't just load such an
> > > offset
> > > with PCALA addressing.
> > >
> > > It looks like we'll need to introduce an attribute for GCC to make
> > > an
> > > variable "must be addressed via GOT", and add the attribute into
> > > PER_CPU_ATTRIBUTES.
>
> > Yes, we need a GCC attribute to specify the per-cpu variable.
>
> GCC patch adding "addr_global" attribute for LoongArch:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
>
> An experiment to use it:
> https://github.com/xry111/linux/commit/c1d5d70
Correction: https://github.com/xry111/linux/commit/c1d5d708
It seems 7-bit SHA is not enough for kernel repo.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-30 2:24 ` Xi Ruoyao
@ 2022-07-30 2:52 ` Xi Ruoyao
2022-07-30 6:14 ` Huacai Chen
2022-08-01 2:16 ` Youling Tang
0 siblings, 2 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-07-30 2:52 UTC (permalink / raw)
To: Youling Tang, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He, Lulu Cheng
On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
> On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
> > On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
> >
> > > On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> > > > Hmm... The problem is the "addresses" of per-cpu symbols are
> > > > faked: they
> > > > are actually offsets from $r21. So we can't just load such an
> > > > offset
> > > > with PCALA addressing.
> > > >
> > > > It looks like we'll need to introduce an attribute for GCC to
> > > > make
> > > > an
> > > > variable "must be addressed via GOT", and add the attribute into
> > > > PER_CPU_ATTRIBUTES.
> >
> > > Yes, we need a GCC attribute to specify the per-cpu variable.
> >
> > GCC patch adding "addr_global" attribute for LoongArch:
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
> >
> > An experiment to use it:
> > https://github.com/xry111/linux/commit/c1d5d70
>
> Correction: https://github.com/xry111/linux/commit/c1d5d708
>
> It seems 7-bit SHA is not enough for kernel repo.
If addr_global is rejected or not implemented (for example, building the
kernel with GCC 12), *I expect* the following hack to work (I've not
tested it because I'm AFK now). Using visibility in kernel seems
strange, but I think it may make some sense because the modules are some
sort of similar to an ELF shared object being dlopen()'ed, and our way
to inject per-CPU symbols is analog to ELF interposition.
arch/loongarch/include/asm/percpu.h:
#if !__has_attribute(__addr_global__) && defined(MODULE)
/* Magically remove "static" for per-CPU variables. */
# define ARCH_NEEDS_WEAK_PER_CPU
/* Force GOT-relocation for per-CPU variables. */
# define PER_CPU_ATTRIBUTES __attribute__((__visibility__("default")))
#endif
arch/loongarch/Makefile:
# Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
# include/asm/percpu.h
if (call gcc-does-not-support-addr-global)
KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
endif
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-30 2:52 ` Xi Ruoyao
@ 2022-07-30 6:14 ` Huacai Chen
[not found] ` <674cb3e9-d820-016b-a210-afd37ed6e25e@loongson.cn>
2022-08-01 2:16 ` Youling Tang
1 sibling, 1 reply; 33+ messages in thread
From: Huacai Chen @ 2022-07-30 6:14 UTC (permalink / raw)
To: Xi Ruoyao
Cc: Youling Tang, loongarch, LKML, WANG Xuerui, Jinyang He, Lulu Cheng
Hi, Ruoyao,
On Sat, Jul 30, 2022 at 10:53 AM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
> > On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
> > > On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
> > >
> > > > On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> > > > > Hmm... The problem is the "addresses" of per-cpu symbols are
> > > > > faked: they
> > > > > are actually offsets from $r21. So we can't just load such an
> > > > > offset
> > > > > with PCALA addressing.
> > > > >
> > > > > It looks like we'll need to introduce an attribute for GCC to
> > > > > make
> > > > > an
> > > > > variable "must be addressed via GOT", and add the attribute into
> > > > > PER_CPU_ATTRIBUTES.
> > >
> > > > Yes, we need a GCC attribute to specify the per-cpu variable.
> > >
> > > GCC patch adding "addr_global" attribute for LoongArch:
> > > https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
> > >
> > > An experiment to use it:
> > > https://github.com/xry111/linux/commit/c1d5d70
> >
> > Correction: https://github.com/xry111/linux/commit/c1d5d708
> >
> > It seems 7-bit SHA is not enough for kernel repo.
>
> If addr_global is rejected or not implemented (for example, building the
> kernel with GCC 12), *I expect* the following hack to work (I've not
> tested it because I'm AFK now). Using visibility in kernel seems
> strange, but I think it may make some sense because the modules are some
> sort of similar to an ELF shared object being dlopen()'ed, and our way
> to inject per-CPU symbols is analog to ELF interposition.
Sadly, I don't know what visibility is, does it have something to do
with __visible in include/linux/compiler_attributes.h?
Huacai
>
> arch/loongarch/include/asm/percpu.h:
>
> #if !__has_attribute(__addr_global__) && defined(MODULE)
> /* Magically remove "static" for per-CPU variables. */
> # define ARCH_NEEDS_WEAK_PER_CPU
> /* Force GOT-relocation for per-CPU variables. */
> # define PER_CPU_ATTRIBUTES __attribute__((__visibility__("default")))
> #endif
>
> arch/loongarch/Makefile:
>
> # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
> # include/asm/percpu.h
> if (call gcc-does-not-support-addr-global)
> KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
> endif
>
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-07-30 2:52 ` Xi Ruoyao
2022-07-30 6:14 ` Huacai Chen
@ 2022-08-01 2:16 ` Youling Tang
2022-08-01 2:34 ` Huacai Chen
1 sibling, 1 reply; 33+ messages in thread
From: Youling Tang @ 2022-08-01 2:16 UTC (permalink / raw)
To: Xi Ruoyao, loongarch
Cc: linux-kernel, WANG Xuerui, Huacai Chen, Jinyang He, Lulu Cheng
Hi, Ruoyao
On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
> On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
>> On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
>>> On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
>>>
>>>> On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
>>>>> Hmm... The problem is the "addresses" of per-cpu symbols are
>>>>> faked: they
>>>>> are actually offsets from $r21. So we can't just load such an
>>>>> offset
>>>>> with PCALA addressing.
>>>>>
>>>>> It looks like we'll need to introduce an attribute for GCC to
>>>>> make
>>>>> an
>>>>> variable "must be addressed via GOT", and add the attribute into
>>>>> PER_CPU_ATTRIBUTES.
>>>
>>>> Yes, we need a GCC attribute to specify the per-cpu variable.
>>>
>>> GCC patch adding "addr_global" attribute for LoongArch:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
>>>
>>> An experiment to use it:
>>> https://github.com/xry111/linux/commit/c1d5d70
>>
>> Correction: https://github.com/xry111/linux/commit/c1d5d708
>>
>> It seems 7-bit SHA is not enough for kernel repo.
>
> If addr_global is rejected or not implemented (for example, building the
> kernel with GCC 12), *I expect* the following hack to work (I've not
> tested it because I'm AFK now). Using visibility in kernel seems
> strange, but I think it may make some sense because the modules are some
> sort of similar to an ELF shared object being dlopen()'ed, and our way
> to inject per-CPU symbols is analog to ELF interposition.
>
> arch/loongarch/include/asm/percpu.h:
>
> #if !__has_attribute(__addr_global__) && defined(MODULE)
> /* Magically remove "static" for per-CPU variables. */
> # define ARCH_NEEDS_WEAK_PER_CPU
> /* Force GOT-relocation for per-CPU variables. */
> # define PER_CPU_ATTRIBUTES __attribute__((__visibility__("default")))
> #endif
>
> arch/loongarch/Makefile:
>
> # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
> # include/asm/percpu.h
> if (call gcc-does-not-support-addr-global)
> KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
> endif
>
Using the old toolchain (GCC 12) can successfully load the nf_tables.ko
module after applying the above patch.
Thanks,
Youling
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 2:16 ` Youling Tang
@ 2022-08-01 2:34 ` Huacai Chen
2022-08-01 4:31 ` Youling Tang
2022-08-01 9:55 ` Xi Ruoyao
0 siblings, 2 replies; 33+ messages in thread
From: Huacai Chen @ 2022-08-01 2:34 UTC (permalink / raw)
To: Youling Tang
Cc: Xi Ruoyao, loongarch, LKML, WANG Xuerui, Jinyang He, Lulu Cheng
Hi, all,
On Mon, Aug 1, 2022 at 10:16 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Ruoyao
>
> On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
> > On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
> >> On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
> >>> On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
> >>>
> >>>> On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> >>>>> Hmm... The problem is the "addresses" of per-cpu symbols are
> >>>>> faked: they
> >>>>> are actually offsets from $r21. So we can't just load such an
> >>>>> offset
> >>>>> with PCALA addressing.
> >>>>>
> >>>>> It looks like we'll need to introduce an attribute for GCC to
> >>>>> make
> >>>>> an
> >>>>> variable "must be addressed via GOT", and add the attribute into
> >>>>> PER_CPU_ATTRIBUTES.
> >>>
> >>>> Yes, we need a GCC attribute to specify the per-cpu variable.
> >>>
> >>> GCC patch adding "addr_global" attribute for LoongArch:
> >>> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
> >>>
> >>> An experiment to use it:
> >>> https://github.com/xry111/linux/commit/c1d5d70
> >>
> >> Correction: https://github.com/xry111/linux/commit/c1d5d708
> >>
> >> It seems 7-bit SHA is not enough for kernel repo.
> >
> > If addr_global is rejected or not implemented (for example, building the
> > kernel with GCC 12), *I expect* the following hack to work (I've not
> > tested it because I'm AFK now). Using visibility in kernel seems
> > strange, but I think it may make some sense because the modules are some
> > sort of similar to an ELF shared object being dlopen()'ed, and our way
> > to inject per-CPU symbols is analog to ELF interposition.
> >
> > arch/loongarch/include/asm/percpu.h:
> >
> > #if !__has_attribute(__addr_global__) && defined(MODULE)
> > /* Magically remove "static" for per-CPU variables. */
> > # define ARCH_NEEDS_WEAK_PER_CPU
> > /* Force GOT-relocation for per-CPU variables. */
> > # define PER_CPU_ATTRIBUTES __attribute__((__visibility__("default")))
> > #endif
> >
> > arch/loongarch/Makefile:
> >
> > # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
> > # include/asm/percpu.h
> > if (call gcc-does-not-support-addr-global)
> > KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
> > endif
> >
> Using the old toolchain (GCC 12) can successfully load the nf_tables.ko
> module after applying the above patch.
I don't like such a hack..., can we consider using old relocation
types when building by old toolchains?
Huacai
>
> Thanks,
> Youling
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 2:34 ` Huacai Chen
@ 2022-08-01 4:31 ` Youling Tang
2022-08-01 9:55 ` Xi Ruoyao
1 sibling, 0 replies; 33+ messages in thread
From: Youling Tang @ 2022-08-01 4:31 UTC (permalink / raw)
To: Huacai Chen, Xi Ruoyao
Cc: loongarch, LKML, WANG Xuerui, Jinyang He, Lulu Cheng
Hi, all
On 08/01/2022 10:34 AM, Huacai Chen wrote:
> Hi, all,
>
> On Mon, Aug 1, 2022 at 10:16 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Ruoyao
>>
>> On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
>>> On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
>>>> On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
>>>>> On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
>>>>>
>>>>>> On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
>>>>>>> Hmm... The problem is the "addresses" of per-cpu symbols are
>>>>>>> faked: they
>>>>>>> are actually offsets from $r21. So we can't just load such an
>>>>>>> offset
>>>>>>> with PCALA addressing.
>>>>>>>
>>>>>>> It looks like we'll need to introduce an attribute for GCC to
>>>>>>> make
>>>>>>> an
>>>>>>> variable "must be addressed via GOT", and add the attribute into
>>>>>>> PER_CPU_ATTRIBUTES.
>>>>>
>>>>>> Yes, we need a GCC attribute to specify the per-cpu variable.
>>>>>
>>>>> GCC patch adding "addr_global" attribute for LoongArch:
>>>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
>>>>>
>>>>> An experiment to use it:
>>>>> https://github.com/xry111/linux/commit/c1d5d70
>>>>
>>>> Correction: https://github.com/xry111/linux/commit/c1d5d708
Using the new toolchain (with the "addr_global" attribute) to build the
kernel can successfully load the nf_tables.ko module after applying the
"c1d5d708" commit.
Thanks,
Youling
>>>>
>>>> It seems 7-bit SHA is not enough for kernel repo.
>>>
>>> If addr_global is rejected or not implemented (for example, building the
>>> kernel with GCC 12), *I expect* the following hack to work (I've not
>>> tested it because I'm AFK now). Using visibility in kernel seems
>>> strange, but I think it may make some sense because the modules are some
>>> sort of similar to an ELF shared object being dlopen()'ed, and our way
>>> to inject per-CPU symbols is analog to ELF interposition.
>>>
>>> arch/loongarch/include/asm/percpu.h:
>>>
>>> #if !__has_attribute(__addr_global__) && defined(MODULE)
>>> /* Magically remove "static" for per-CPU variables. */
>>> # define ARCH_NEEDS_WEAK_PER_CPU
>>> /* Force GOT-relocation for per-CPU variables. */
>>> # define PER_CPU_ATTRIBUTES __attribute__((__visibility__("default")))
>>> #endif
>>>
>>> arch/loongarch/Makefile:
>>>
>>> # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
>>> # include/asm/percpu.h
>>> if (call gcc-does-not-support-addr-global)
>>> KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
>>> endif
>>>
>> Using the old toolchain (GCC 12) can successfully load the nf_tables.ko
>> module after applying the above patch.
> I don't like such a hack..., can we consider using old relocation
> types when building by old toolchains?
>
> Huacai
>>
>> Thanks,
>> Youling
>>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 2:34 ` Huacai Chen
2022-08-01 4:31 ` Youling Tang
@ 2022-08-01 9:55 ` Xi Ruoyao
2022-08-01 10:08 ` Jinyang He
2022-08-01 10:41 ` Huacai Chen
1 sibling, 2 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-08-01 9:55 UTC (permalink / raw)
To: Huacai Chen, Youling Tang
Cc: loongarch, LKML, WANG Xuerui, Jinyang He, Lulu Cheng
On Mon, 2022-08-01 at 10:34 +0800, Huacai Chen wrote:
> Hi, all,
>
> On Mon, Aug 1, 2022 at 10:16 AM Youling Tang <tangyouling@loongson.cn>
> wrote:
> >
> > Hi, Ruoyao
> >
> > On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
> > > On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
> > > > On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
> > > > > On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
> > > > >
> > > > > > On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> > > > > > > Hmm... The problem is the "addresses" of per-cpu symbols
> > > > > > > are
> > > > > > > faked: they
> > > > > > > are actually offsets from $r21. So we can't just load
> > > > > > > such an
> > > > > > > offset
> > > > > > > with PCALA addressing.
> > > > > > >
> > > > > > > It looks like we'll need to introduce an attribute for GCC
> > > > > > > to
> > > > > > > make
> > > > > > > an
> > > > > > > variable "must be addressed via GOT", and add the
> > > > > > > attribute into
> > > > > > > PER_CPU_ATTRIBUTES.
> > > > >
> > > > > > Yes, we need a GCC attribute to specify the per-cpu
> > > > > > variable.
> > > > >
> > > > > GCC patch adding "addr_global" attribute for LoongArch:
> > > > > https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
> > > > >
> > > > > An experiment to use it:
> > > > > https://github.com/xry111/linux/commit/c1d5d70
> > > >
> > > > Correction: https://github.com/xry111/linux/commit/c1d5d708
> > > >
> > > > It seems 7-bit SHA is not enough for kernel repo.
> > >
> > > If addr_global is rejected or not implemented (for example,
> > > building the
> > > kernel with GCC 12), *I expect* the following hack to work (I've
> > > not
> > > tested it because I'm AFK now). Using visibility in kernel seems
> > > strange, but I think it may make some sense because the modules
> > > are some
> > > sort of similar to an ELF shared object being dlopen()'ed, and our
> > > way
> > > to inject per-CPU symbols is analog to ELF interposition.
> > >
> > > arch/loongarch/include/asm/percpu.h:
> > >
> > > #if !__has_attribute(__addr_global__) && defined(MODULE)
> > > /* Magically remove "static" for per-CPU variables. */
> > > # define ARCH_NEEDS_WEAK_PER_CPU
> > > /* Force GOT-relocation for per-CPU variables. */
> > > # define PER_CPU_ATTRIBUTES
> > > __attribute__((__visibility__("default")))
> > > #endif
> > >
> > > arch/loongarch/Makefile:
> > >
> > > # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
> > > # include/asm/percpu.h
> > > if (call gcc-does-not-support-addr-global)
> > > KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
> > > endif
> > >
> > Using the old toolchain (GCC 12) can successfully load the
> > nf_tables.ko
> > module after applying the above patch.
> I don't like such a hack..., can we consider using old relocation
> types when building by old toolchains?
I don't like the hack too. I only developed it as an intellectual game.
We need to consider multiple combinations:
(1) Old GCC + old Binutils. We need -mla-local-with-abs for
KBUILD_CFLAGS_MODULE.
(2) Old GCC + new Binutils. We need -mla-local-with-abs for
KBUILD_CFLAGS_MODULE, *and* adding the support for
R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module loader.
(3) New GCC + old Binutils. As new GCC should support our new attribute
(I now intend to send V2 patch to gcc-patches using "movable" as the
attribute name), no special action is needed.
Basically, we need:
(1) Handle R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module
loader.
(2) Add -Wa,-mla-local-with-abs into KBUILD_CFLAGS_MODULE if GCC version
is <= 12.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 9:55 ` Xi Ruoyao
@ 2022-08-01 10:08 ` Jinyang He
2022-08-01 10:44 ` WANG Xuerui
2022-08-01 11:28 ` Youling Tang
2022-08-01 10:41 ` Huacai Chen
1 sibling, 2 replies; 33+ messages in thread
From: Jinyang He @ 2022-08-01 10:08 UTC (permalink / raw)
To: Xi Ruoyao, Huacai Chen, Youling Tang
Cc: loongarch, LKML, WANG Xuerui, Lulu Cheng
On 08/01/2022 05:55 PM, Xi Ruoyao wrote:
> On Mon, 2022-08-01 at 10:34 +0800, Huacai Chen wrote:
>> Hi, all,
>>
>> On Mon, Aug 1, 2022 at 10:16 AM Youling Tang <tangyouling@loongson.cn>
>> wrote:
>>> Hi, Ruoyao
>>>
>>> On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
>>>> On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
>>>>> On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
>>>>>> On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
>>>>>>
>>>>>>> On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
>>>>>>>> Hmm... The problem is the "addresses" of per-cpu symbols
>>>>>>>> are
>>>>>>>> faked: they
>>>>>>>> are actually offsets from $r21. So we can't just load
>>>>>>>> such an
>>>>>>>> offset
>>>>>>>> with PCALA addressing.
>>>>>>>>
>>>>>>>> It looks like we'll need to introduce an attribute for GCC
>>>>>>>> to
>>>>>>>> make
>>>>>>>> an
>>>>>>>> variable "must be addressed via GOT", and add the
>>>>>>>> attribute into
>>>>>>>> PER_CPU_ATTRIBUTES.
>>>>>>> Yes, we need a GCC attribute to specify the per-cpu
>>>>>>> variable.
>>>>>> GCC patch adding "addr_global" attribute for LoongArch:
>>>>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
>>>>>>
>>>>>> An experiment to use it:
>>>>>> https://github.com/xry111/linux/commit/c1d5d70
>>>>> Correction: https://github.com/xry111/linux/commit/c1d5d708
>>>>>
>>>>> It seems 7-bit SHA is not enough for kernel repo.
>>>> If addr_global is rejected or not implemented (for example,
>>>> building the
>>>> kernel with GCC 12), *I expect* the following hack to work (I've
>>>> not
>>>> tested it because I'm AFK now). Using visibility in kernel seems
>>>> strange, but I think it may make some sense because the modules
>>>> are some
>>>> sort of similar to an ELF shared object being dlopen()'ed, and our
>>>> way
>>>> to inject per-CPU symbols is analog to ELF interposition.
>>>>
>>>> arch/loongarch/include/asm/percpu.h:
>>>>
>>>> #if !__has_attribute(__addr_global__) && defined(MODULE)
>>>> /* Magically remove "static" for per-CPU variables. */
>>>> # define ARCH_NEEDS_WEAK_PER_CPU
>>>> /* Force GOT-relocation for per-CPU variables. */
>>>> # define PER_CPU_ATTRIBUTES
>>>> __attribute__((__visibility__("default")))
>>>> #endif
>>>>
>>>> arch/loongarch/Makefile:
>>>>
>>>> # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
>>>> # include/asm/percpu.h
>>>> if (call gcc-does-not-support-addr-global)
>>>> KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
>>>> endif
>>>>
>>> Using the old toolchain (GCC 12) can successfully load the
>>> nf_tables.ko
>>> module after applying the above patch.
>> I don't like such a hack..., can we consider using old relocation
>> types when building by old toolchains?
>
> I don't like the hack too. I only developed it as an intellectual game.
>
> We need to consider multiple combinations:
>
> (1) Old GCC + old Binutils. We need -mla-local-with-abs for
> KBUILD_CFLAGS_MODULE.
>
> (2) Old GCC + new Binutils. We need -mla-local-with-abs for
> KBUILD_CFLAGS_MODULE, *and* adding the support for
> R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module loader.
>
> (3) New GCC + old Binutils. As new GCC should support our new attribute
> (I now intend to send V2 patch to gcc-patches using "movable" as the
> attribute name), no special action is needed.
>
> Basically, we need:
>
> (1) Handle R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module
> loader.
> (2) Add -Wa,-mla-local-with-abs into KBUILD_CFLAGS_MODULE if GCC version
> is <= 12.
Actually, I really hope kernel image is in the XKVRANGE, rather
than being in XKPRANGE. So that we can limit kernel and modules
be in 4GB range. I think it will make all work normally. :-(
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 10:08 ` Jinyang He
@ 2022-08-01 10:44 ` WANG Xuerui
2022-08-01 11:28 ` Youling Tang
1 sibling, 0 replies; 33+ messages in thread
From: WANG Xuerui @ 2022-08-01 10:44 UTC (permalink / raw)
To: Jinyang He, Xi Ruoyao, Huacai Chen, Youling Tang
Cc: loongarch, LKML, Lulu Cheng
On 2022/8/1 18:08, Jinyang He wrote:
> [snip]
>
> Actually, I really hope kernel image is in the XKVRANGE, rather
> than being in XKPRANGE. So that we can limit kernel and modules
> be in 4GB range. I think it will make all work normally. :-(
Just my 2c. I'd prefer any scheme in which memfd_secret is possible. The
current design makes it impossible to carve out memory regions from
kernel's view, IIUC, which is of course something to improve...
--
WANG "xen0n" Xuerui
Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 10:08 ` Jinyang He
2022-08-01 10:44 ` WANG Xuerui
@ 2022-08-01 11:28 ` Youling Tang
2022-08-01 11:39 ` Xi Ruoyao
1 sibling, 1 reply; 33+ messages in thread
From: Youling Tang @ 2022-08-01 11:28 UTC (permalink / raw)
To: Jinyang He
Cc: Xi Ruoyao, Huacai Chen, loongarch, LKML, WANG Xuerui, Lulu Cheng
Hi, Jinyang
On 08/01/2022 06:08 PM, Jinyang He wrote:
> On 08/01/2022 05:55 PM, Xi Ruoyao wrote:
>
>> On Mon, 2022-08-01 at 10:34 +0800, Huacai Chen wrote:
>>> Hi, all,
>>>
>>> On Mon, Aug 1, 2022 at 10:16 AM Youling Tang <tangyouling@loongson.cn>
>>> wrote:
>>>> Hi, Ruoyao
>>>>
>>>> On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
>>>>> On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
>>>>>> On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
>>>>>>> On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
>>>>>>>
>>>>>>>> On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
>>>>>>>>> Hmm... The problem is the "addresses" of per-cpu symbols
>>>>>>>>> are
>>>>>>>>> faked: they
>>>>>>>>> are actually offsets from $r21. So we can't just load
>>>>>>>>> such an
>>>>>>>>> offset
>>>>>>>>> with PCALA addressing.
>>>>>>>>>
>>>>>>>>> It looks like we'll need to introduce an attribute for GCC
>>>>>>>>> to
>>>>>>>>> make
>>>>>>>>> an
>>>>>>>>> variable "must be addressed via GOT", and add the
>>>>>>>>> attribute into
>>>>>>>>> PER_CPU_ATTRIBUTES.
>>>>>>>> Yes, we need a GCC attribute to specify the per-cpu
>>>>>>>> variable.
>>>>>>> GCC patch adding "addr_global" attribute for LoongArch:
>>>>>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
>>>>>>>
>>>>>>> An experiment to use it:
>>>>>>> https://github.com/xry111/linux/commit/c1d5d70
>>>>>> Correction: https://github.com/xry111/linux/commit/c1d5d708
>>>>>>
>>>>>> It seems 7-bit SHA is not enough for kernel repo.
>>>>> If addr_global is rejected or not implemented (for example,
>>>>> building the
>>>>> kernel with GCC 12), *I expect* the following hack to work (I've
>>>>> not
>>>>> tested it because I'm AFK now). Using visibility in kernel seems
>>>>> strange, but I think it may make some sense because the modules
>>>>> are some
>>>>> sort of similar to an ELF shared object being dlopen()'ed, and our
>>>>> way
>>>>> to inject per-CPU symbols is analog to ELF interposition.
>>>>>
>>>>> arch/loongarch/include/asm/percpu.h:
>>>>>
>>>>> #if !__has_attribute(__addr_global__) && defined(MODULE)
>>>>> /* Magically remove "static" for per-CPU variables. */
>>>>> # define ARCH_NEEDS_WEAK_PER_CPU
>>>>> /* Force GOT-relocation for per-CPU variables. */
>>>>> # define PER_CPU_ATTRIBUTES
>>>>> __attribute__((__visibility__("default")))
>>>>> #endif
>>>>>
>>>>> arch/loongarch/Makefile:
>>>>>
>>>>> # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
>>>>> # include/asm/percpu.h
>>>>> if (call gcc-does-not-support-addr-global)
>>>>> KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
>>>>> endif
>>>>>
>>>> Using the old toolchain (GCC 12) can successfully load the
>>>> nf_tables.ko
>>>> module after applying the above patch.
>>> I don't like such a hack..., can we consider using old relocation
>>> types when building by old toolchains?
>>
>> I don't like the hack too. I only developed it as an intellectual game.
>>
>> We need to consider multiple combinations:
>>
>> (1) Old GCC + old Binutils. We need -mla-local-with-abs for
>> KBUILD_CFLAGS_MODULE.
>>
>> (2) Old GCC + new Binutils. We need -mla-local-with-abs for
>> KBUILD_CFLAGS_MODULE, *and* adding the support for
>> R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module loader.
>>
>> (3) New GCC + old Binutils. As new GCC should support our new attribute
>> (I now intend to send V2 patch to gcc-patches using "movable" as the
>> attribute name), no special action is needed.
>>
>> Basically, we need:
>>
>> (1) Handle R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module
>> loader.
>> (2) Add -Wa,-mla-local-with-abs into KBUILD_CFLAGS_MODULE if GCC version
>> is <= 12.
>
> Actually, I really hope kernel image is in the XKVRANGE, rather
> than being in XKPRANGE. So that we can limit kernel and modules
> be in 4GB range. I think it will make all work normally. :-(
>
Assuming that the kernel and modules are limited to 4G, the external
symbols will be accessed through pcrel32, which means that there is no
need to pass the GOT table entry, and there is no need for got support,
so there will be no percpu problem, and it will make all work normally?
Youling.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 11:28 ` Youling Tang
@ 2022-08-01 11:39 ` Xi Ruoyao
2022-08-01 12:09 ` Huacai Chen
2022-08-01 12:13 ` Youling Tang
0 siblings, 2 replies; 33+ messages in thread
From: Xi Ruoyao @ 2022-08-01 11:39 UTC (permalink / raw)
To: Youling Tang, Jinyang He
Cc: Huacai Chen, loongarch, LKML, WANG Xuerui, Lulu Cheng
On Mon, 2022-08-01 at 19:28 +0800, Youling Tang wrote:
> > Actually, I really hope kernel image is in the XKVRANGE, rather
> > than being in XKPRANGE. So that we can limit kernel and modules
> > be in 4GB range. I think it will make all work normally. :-(
> >
>
> Assuming that the kernel and modules are limited to 4G, the external
> symbols will be accessed through pcrel32, which means that there is no
> need to pass the GOT table entry and there is no need for got support
We'll still need to modify GCC to use PC-rel for accessing an object in
another TU (by default, or an option to control), instead of GOT. Or
just add support to GOT relocations here. But anyway it will be much
easier as we won't need to handle per-CPU variables specially.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 11:39 ` Xi Ruoyao
@ 2022-08-01 12:09 ` Huacai Chen
2022-08-01 12:13 ` Youling Tang
1 sibling, 0 replies; 33+ messages in thread
From: Huacai Chen @ 2022-08-01 12:09 UTC (permalink / raw)
To: Xi Ruoyao
Cc: Youling Tang, Jinyang He, loongarch, LKML, WANG Xuerui, Lulu Cheng
Hi, Ruoyao,
On Mon, Aug 1, 2022 at 7:40 PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Mon, 2022-08-01 at 19:28 +0800, Youling Tang wrote:
>
> > > Actually, I really hope kernel image is in the XKVRANGE, rather
> > > than being in XKPRANGE. So that we can limit kernel and modules
> > > be in 4GB range. I think it will make all work normally. :-(
> > >
> >
> > Assuming that the kernel and modules are limited to 4G, the external
> > symbols will be accessed through pcrel32, which means that there is no
> > need to pass the GOT table entry and there is no need for got support
>
> We'll still need to modify GCC to use PC-rel for accessing an object in
> another TU (by default, or an option to control), instead of GOT. Or
> just add support to GOT relocations here. But anyway it will be much
> easier as we won't need to handle per-CPU variables specially.
All tlb-mapped kernel may be supported in future, but no now. Because
there are a ton of problems. :)
Huacai
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 11:39 ` Xi Ruoyao
2022-08-01 12:09 ` Huacai Chen
@ 2022-08-01 12:13 ` Youling Tang
[not found] ` <98efbf76-fbf3-f90b-82d4-bd2874088d05@loongson.cn>
1 sibling, 1 reply; 33+ messages in thread
From: Youling Tang @ 2022-08-01 12:13 UTC (permalink / raw)
To: Xi Ruoyao, Huacai Chen
Cc: Jinyang He, loongarch, LKML, WANG Xuerui, Lulu Cheng
On 08/01/2022 07:39 PM, Xi Ruoyao wrote:
> On Mon, 2022-08-01 at 19:28 +0800, Youling Tang wrote:
>
>>> Actually, I really hope kernel image is in the XKVRANGE, rather
>>> than being in XKPRANGE. So that we can limit kernel and modules
>>> be in 4GB range. I think it will make all work normally. :-(
>>>
>>
>> Assuming that the kernel and modules are limited to 4G, the external
>> symbols will be accessed through pcrel32, which means that there is no
>> need to pass the GOT table entry and there is no need for got support
>
> We'll still need to modify GCC to use PC-rel for accessing an object in
> another TU (by default, or an option to control), instead of GOT. Or
> just add support to GOT relocations here. But anyway it will be much
> easier as we won't need to handle per-CPU variables specially.
>
OO, old toolchains require extra handlingg no matter how modified.
Maybe rejecting old toolchain builds is a good option as Huacai said.
Youling.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4 0/4] LoongArch: Support new relocation types
2022-08-01 9:55 ` Xi Ruoyao
2022-08-01 10:08 ` Jinyang He
@ 2022-08-01 10:41 ` Huacai Chen
1 sibling, 0 replies; 33+ messages in thread
From: Huacai Chen @ 2022-08-01 10:41 UTC (permalink / raw)
To: Xi Ruoyao
Cc: Youling Tang, loongarch, LKML, WANG Xuerui, Jinyang He, Lulu Cheng
Hi, Ruoyao,
On Mon, Aug 1, 2022 at 5:55 PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Mon, 2022-08-01 at 10:34 +0800, Huacai Chen wrote:
> > Hi, all,
> >
> > On Mon, Aug 1, 2022 at 10:16 AM Youling Tang <tangyouling@loongson.cn>
> > wrote:
> > >
> > > Hi, Ruoyao
> > >
> > > On 07/30/2022 10:52 AM, Xi Ruoyao wrote:
> > > > On Sat, 2022-07-30 at 10:24 +0800, Xi Ruoyao wrote:
> > > > > On Sat, 2022-07-30 at 01:55 +0800, Xi Ruoyao wrote:
> > > > > > On Fri, 2022-07-29 at 20:19 +0800, Youling Tang wrote:
> > > > > >
> > > > > > > On 07/29/2022 07:45 PM, Xi Ruoyao wrote:
> > > > > > > > Hmm... The problem is the "addresses" of per-cpu symbols
> > > > > > > > are
> > > > > > > > faked: they
> > > > > > > > are actually offsets from $r21. So we can't just load
> > > > > > > > such an
> > > > > > > > offset
> > > > > > > > with PCALA addressing.
> > > > > > > >
> > > > > > > > It looks like we'll need to introduce an attribute for GCC
> > > > > > > > to
> > > > > > > > make
> > > > > > > > an
> > > > > > > > variable "must be addressed via GOT", and add the
> > > > > > > > attribute into
> > > > > > > > PER_CPU_ATTRIBUTES.
> > > > > >
> > > > > > > Yes, we need a GCC attribute to specify the per-cpu
> > > > > > > variable.
> > > > > >
> > > > > > GCC patch adding "addr_global" attribute for LoongArch:
> > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2022-July/599064.html
> > > > > >
> > > > > > An experiment to use it:
> > > > > > https://github.com/xry111/linux/commit/c1d5d70
> > > > >
> > > > > Correction: https://github.com/xry111/linux/commit/c1d5d708
> > > > >
> > > > > It seems 7-bit SHA is not enough for kernel repo.
> > > >
> > > > If addr_global is rejected or not implemented (for example,
> > > > building the
> > > > kernel with GCC 12), *I expect* the following hack to work (I've
> > > > not
> > > > tested it because I'm AFK now). Using visibility in kernel seems
> > > > strange, but I think it may make some sense because the modules
> > > > are some
> > > > sort of similar to an ELF shared object being dlopen()'ed, and our
> > > > way
> > > > to inject per-CPU symbols is analog to ELF interposition.
> > > >
> > > > arch/loongarch/include/asm/percpu.h:
> > > >
> > > > #if !__has_attribute(__addr_global__) && defined(MODULE)
> > > > /* Magically remove "static" for per-CPU variables. */
> > > > # define ARCH_NEEDS_WEAK_PER_CPU
> > > > /* Force GOT-relocation for per-CPU variables. */
> > > > # define PER_CPU_ATTRIBUTES
> > > > __attribute__((__visibility__("default")))
> > > > #endif
> > > >
> > > > arch/loongarch/Makefile:
> > > >
> > > > # Hack for per-CPU variables, see PER_CPU_ATTRIBUTES in
> > > > # include/asm/percpu.h
> > > > if (call gcc-does-not-support-addr-global)
> > > > KBUILD_CFLAGS_MODULE += -fPIC -fvisibility=hidden
> > > > endif
> > > >
> > > Using the old toolchain (GCC 12) can successfully load the
> > > nf_tables.ko
> > > module after applying the above patch.
> > I don't like such a hack..., can we consider using old relocation
> > types when building by old toolchains?
>
>
> I don't like the hack too. I only developed it as an intellectual game.
>
> We need to consider multiple combinations:
>
> (1) Old GCC + old Binutils. We need -mla-local-with-abs for
> KBUILD_CFLAGS_MODULE.
>
> (2) Old GCC + new Binutils. We need -mla-local-with-abs for
> KBUILD_CFLAGS_MODULE, *and* adding the support for
> R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module loader.
>
> (3) New GCC + old Binutils. As new GCC should support our new attribute
> (I now intend to send V2 patch to gcc-patches using "movable" as the
> attribute name), no special action is needed.
>
> Basically, we need:
>
> (1) Handle R_LARCH_ABS{_HI20,_LO12,64_LO20,64_HI12} in the kernel module
> loader.
> (2) Add -Wa,-mla-local-with-abs into KBUILD_CFLAGS_MODULE if GCC version
> is <= 12.
There is another simple solution: just refuse to build if the
toolchain is too old.
Huacai
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
>
^ permalink raw reply [flat|nested] 33+ messages in thread