* [PATCH v2 1/7] ARM: mm: Rafactor the __do_page_fault()
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:29 ` Russell King (Oracle)
2021-06-02 7:02 ` [PATCH v2 2/7] ARM: mm: Kill task_struct argument for __do_page_fault() Kefeng Wang
` (5 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
Clean up the multiple goto statements and drops local variable
vm_fault_t fault, which will make the __do_page_fault() much
more readability.
No functional change.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/mm/fault.c | 34 +++++++++++++---------------------
1 file changed, 13 insertions(+), 21 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index efa402025031..662ac3ca3c8a 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -205,35 +205,27 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
unsigned int flags, struct task_struct *tsk,
struct pt_regs *regs)
{
- struct vm_area_struct *vma;
- vm_fault_t fault;
-
- vma = find_vma(mm, addr);
- fault = VM_FAULT_BADMAP;
+ struct vm_area_struct *vma = find_vma(mm, addr);
if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > addr))
- goto check_stack;
+ return VM_FAULT_BADMAP;
+
+ if (unlikely(vma->vm_start > addr)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return VM_FAULT_BADMAP;
+ if (addr < FIRST_USER_ADDRESS)
+ return VM_FAULT_BADMAP;
+ if (expand_stack(vma, addr))
+ return VM_FAULT_BADMAP;
+ }
/*
* Ok, we have a good vm_area for this
* memory access, so we can handle it.
*/
-good_area:
- if (access_error(fsr, vma)) {
- fault = VM_FAULT_BADACCESS;
- goto out;
- }
+ if (access_error(fsr, vma))
+ return VM_FAULT_BADACCESS;
return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
-
-check_stack:
- /* Don't allow expansion below FIRST_USER_ADDRESS */
- if (vma->vm_flags & VM_GROWSDOWN &&
- addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
- goto good_area;
-out:
- return fault;
}
static int __kprobes
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 1/7] ARM: mm: Rafactor the __do_page_fault()
2021-06-02 7:02 ` [PATCH v2 1/7] ARM: mm: Rafactor the __do_page_fault() Kefeng Wang
@ 2021-06-02 10:29 ` Russell King (Oracle)
0 siblings, 0 replies; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:29 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi,
On Wed, Jun 02, 2021 at 03:02:40PM +0800, Kefeng Wang wrote:
> Clean up the multiple goto statements and drops local variable
> vm_fault_t fault, which will make the __do_page_fault() much
> more readability.
>
> No functional change.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
This looks fine to me. Please send it to the patch system (details in my
email signature.) Thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 2/7] ARM: mm: Kill task_struct argument for __do_page_fault()
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
2021-06-02 7:02 ` [PATCH v2 1/7] ARM: mm: Rafactor the __do_page_fault() Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:31 ` Russell King (Oracle)
2021-06-02 7:02 ` [PATCH v2 3/7] ARM: mm: Cleanup access_error() Kefeng Wang
` (4 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
The __do_page_fault() won't use task_struct argument, kill it
and also use current->mm directly in do_page_fault().
No functional change.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/mm/fault.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 662ac3ca3c8a..249db395bdf0 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -202,8 +202,7 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
static vm_fault_t __kprobes
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
- unsigned int flags, struct task_struct *tsk,
- struct pt_regs *regs)
+ unsigned int flags, struct pt_regs *regs)
{
struct vm_area_struct *vma = find_vma(mm, addr);
if (unlikely(!vma))
@@ -231,8 +230,7 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
- struct task_struct *tsk;
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
int sig, code;
vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
@@ -240,8 +238,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (kprobe_page_fault(regs, fsr))
return 0;
- tsk = current;
- mm = tsk->mm;
/* Enable interrupts if they were enabled in the parent context. */
if (interrupts_enabled(regs))
@@ -285,7 +281,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
#endif
}
- fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs);
+ fault = __do_page_fault(mm, addr, fsr, flags, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 2/7] ARM: mm: Kill task_struct argument for __do_page_fault()
2021-06-02 7:02 ` [PATCH v2 2/7] ARM: mm: Kill task_struct argument for __do_page_fault() Kefeng Wang
@ 2021-06-02 10:31 ` Russell King (Oracle)
0 siblings, 0 replies; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:31 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi,
On Wed, Jun 02, 2021 at 03:02:41PM +0800, Kefeng Wang wrote:
> The __do_page_fault() won't use task_struct argument, kill it
> and also use current->mm directly in do_page_fault().
>
> No functional change.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
This looks fine, thanks. Please send it to the patch system, thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 3/7] ARM: mm: Cleanup access_error()
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
2021-06-02 7:02 ` [PATCH v2 1/7] ARM: mm: Rafactor the __do_page_fault() Kefeng Wang
2021-06-02 7:02 ` [PATCH v2 2/7] ARM: mm: Kill task_struct argument for __do_page_fault() Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:39 ` Russell King (Oracle)
2021-06-02 7:02 ` [PATCH v2 4/7] ARM: mm: print out correct page table entries Kefeng Wang
` (3 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
Now the write fault check in do_page_fault() and access_error() twice,
we can cleanup access_error(), and make the fault check and vma flags set
into do_page_fault() directly, then pass the vma flags to __do_page_fault.
No functional change.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/mm/fault.c | 38 ++++++++++++++------------------------
1 file changed, 14 insertions(+), 24 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 249db395bdf0..9a6d74f6ea1d 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -183,26 +183,9 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
#define VM_FAULT_BADMAP 0x010000
#define VM_FAULT_BADACCESS 0x020000
-/*
- * Check that the permissions on the VMA allow for the fault which occurred.
- * If we encountered a write fault, we must have write permission, otherwise
- * we allow any permission.
- */
-static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
-{
- unsigned int mask = VM_ACCESS_FLAGS;
-
- if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
- mask = VM_WRITE;
- if (fsr & FSR_LNX_PF)
- mask = VM_EXEC;
-
- return vma->vm_flags & mask ? false : true;
-}
-
static vm_fault_t __kprobes
-__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
- unsigned int flags, struct pt_regs *regs)
+__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int flags,
+ unsigned long vma_flags, struct pt_regs *regs)
{
struct vm_area_struct *vma = find_vma(mm, addr);
if (unlikely(!vma))
@@ -218,10 +201,10 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
}
/*
- * Ok, we have a good vm_area for this
- * memory access, so we can handle it.
+ * ok, we have a good vm_area for this memory access, check the
+ * permissions on the VMA allow for the fault which occurred.
*/
- if (access_error(fsr, vma))
+ if (!(vma->vm_flags & vma_flags))
return VM_FAULT_BADACCESS;
return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
@@ -234,6 +217,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
int sig, code;
vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
+ unsigned long vm_flags = VM_ACCESS_FLAGS;
if (kprobe_page_fault(regs, fsr))
return 0;
@@ -252,8 +236,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
+
+ if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) {
flags |= FAULT_FLAG_WRITE;
+ vm_flags = VM_WRITE;
+ }
+
+ if (fsr & FSR_LNX_PF)
+ vm_flags = VM_EXEC;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
@@ -281,7 +271,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
#endif
}
- fault = __do_page_fault(mm, addr, fsr, flags, regs);
+ fault = __do_page_fault(mm, addr, flags, vm_flags, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 3/7] ARM: mm: Cleanup access_error()
2021-06-02 7:02 ` [PATCH v2 3/7] ARM: mm: Cleanup access_error() Kefeng Wang
@ 2021-06-02 10:39 ` Russell King (Oracle)
0 siblings, 0 replies; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:39 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi,
On Wed, Jun 02, 2021 at 03:02:42PM +0800, Kefeng Wang wrote:
> Now the write fault check in do_page_fault() and access_error() twice,
> we can cleanup access_error(), and make the fault check and vma flags set
> into do_page_fault() directly, then pass the vma flags to __do_page_fault.
>
> No functional change.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Looks fine to me. Thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 4/7] ARM: mm: print out correct page table entries
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
` (2 preceding siblings ...)
2021-06-02 7:02 ` [PATCH v2 3/7] ARM: mm: Cleanup access_error() Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:44 ` Russell King (Oracle)
2021-06-02 7:02 ` [PATCH v2 5/7] ARM: mm: Print physical address of page table base in show_pte() Kefeng Wang
` (2 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
Like commit 67ce16ec15ce ("arm64: mm: print out correct page table entries")
does, drop the struct mm_struct argument of show_pte(), print the tables
based on the faulting address.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/include/asm/bug.h | 2 +-
arch/arm/kernel/traps.c | 2 +-
arch/arm/mm/fault.c | 36 ++++++++++++++++++++----------------
3 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index ba8d9d7d242b..d618640e34aa 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -86,7 +86,7 @@ extern asmlinkage void c_backtrace(unsigned long fp, int pmode,
const char *loglvl);
struct mm_struct;
-void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr);
+void show_pte(const char *lvl, unsigned long addr);
extern void __show_regs(struct pt_regs *);
extern void __show_regs_alloc_free(struct pt_regs *regs);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 64308e3a5d0c..98c904aeee78 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -736,7 +736,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
task_pid_nr(current), current->comm, code, instr);
dump_instr(KERN_ERR, regs);
- show_pte(KERN_ERR, current->mm, addr);
+ show_pte(KERN_ERR, addr);
}
#endif
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 9a6d74f6ea1d..71a5c29488c2 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -27,15 +27,23 @@
#ifdef CONFIG_MMU
/*
- * This is useful to dump out the page tables associated with
- * 'addr' in mm 'mm'.
+ * Dump out the page tables associated with 'addr' in the currently active mm
*/
-void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, unsigned long addr)
{
pgd_t *pgd;
-
- if (!mm)
+ struct mm_struct *mm;
+
+ if (addr < TASK_SIZE) {
+ mm = current->active_mm;
+ if (mm == &init_mm) {
+ printk("%s[%08lx] user address but active_mm is swapper\n",
+ lvl, addr);
+ return;
+ }
+ } else {
mm = &init_mm;
+ }
printk("%spgd = %p\n", lvl, mm->pgd);
pgd = pgd_offset(mm, addr);
@@ -96,7 +104,7 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
pr_cont("\n");
}
#else /* CONFIG_MMU */
-void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, unsigned long addr)
{ }
#endif /* CONFIG_MMU */
@@ -104,8 +112,7 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
* Oops. The kernel tried to access some page that wasn't present.
*/
static void
-__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
- struct pt_regs *regs)
+__do_kernel_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
/*
* Are we prepared to handle this kernel fault?
@@ -122,7 +129,7 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
- show_pte(KERN_ALERT, mm, addr);
+ show_pte(KERN_ALERT, addr);
die("Oops", regs, fsr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -147,7 +154,7 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
pr_err("8<--- cut here ---\n");
pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
tsk->comm, sig, addr, fsr);
- show_pte(KERN_ERR, tsk->mm, addr);
+ show_pte(KERN_ERR, addr);
show_regs(regs);
}
#endif
@@ -166,9 +173,6 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->active_mm;
-
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
@@ -176,7 +180,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (user_mode(regs))
__do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
else
- __do_kernel_fault(mm, addr, fsr, regs);
+ __do_kernel_fault(addr, fsr, regs);
}
#ifdef CONFIG_MMU
@@ -336,7 +340,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
return 0;
no_context:
- __do_kernel_fault(mm, addr, fsr, regs);
+ __do_kernel_fault(addr, fsr, regs);
return 0;
}
#else /* CONFIG_MMU */
@@ -503,7 +507,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
pr_alert("8<--- cut here ---\n");
pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
- show_pte(KERN_ALERT, current->mm, addr);
+ show_pte(KERN_ALERT, addr);
arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
fsr, 0);
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 4/7] ARM: mm: print out correct page table entries
2021-06-02 7:02 ` [PATCH v2 4/7] ARM: mm: print out correct page table entries Kefeng Wang
@ 2021-06-02 10:44 ` Russell King (Oracle)
2021-06-02 11:24 ` Kefeng Wang
0 siblings, 1 reply; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:44 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
On Wed, Jun 02, 2021 at 03:02:43PM +0800, Kefeng Wang wrote:
> Like commit 67ce16ec15ce ("arm64: mm: print out correct page table entries")
> does, drop the struct mm_struct argument of show_pte(), print the tables
> based on the faulting address.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
This can be misleading on 32-bit ARM.
The effective page tables for each thread are the threads *own* page
tables. There is no hardware magic for addresses above PAGE_OFFSET being
directed to the init_mm page tables.
So, when we hit a fault in kernel space, we need to be printing the
currently in-use page tables associated with the running thread.
Hence:
> /*
> - * This is useful to dump out the page tables associated with
> - * 'addr' in mm 'mm'.
> + * Dump out the page tables associated with 'addr' in the currently active mm
> */
> -void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
> +void show_pte(const char *lvl, unsigned long addr)
> {
> pgd_t *pgd;
> -
> - if (!mm)
> + struct mm_struct *mm;
> +
> + if (addr < TASK_SIZE) {
> + mm = current->active_mm;
> + if (mm == &init_mm) {
> + printk("%s[%08lx] user address but active_mm is swapper\n",
> + lvl, addr);
> + return;
> + }
> + } else {
> mm = &init_mm;
> + }
is incorrect here.
It's completely fine for architectures where kernel accesses always go
to the init_mm page tables, but for 32-bit ARM that is not the case.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2 4/7] ARM: mm: print out correct page table entries
2021-06-02 10:44 ` Russell King (Oracle)
@ 2021-06-02 11:24 ` Kefeng Wang
0 siblings, 0 replies; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 11:24 UTC (permalink / raw)
To: Russell King (Oracle)
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
On 2021/6/2 18:44, Russell King (Oracle) wrote:
> On Wed, Jun 02, 2021 at 03:02:43PM +0800, Kefeng Wang wrote:
>> Like commit 67ce16ec15ce ("arm64: mm: print out correct page table entries")
>> does, drop the struct mm_struct argument of show_pte(), print the tables
>> based on the faulting address.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> This can be misleading on 32-bit ARM.
>
> The effective page tables for each thread are the threads *own* page
> tables. There is no hardware magic for addresses above PAGE_OFFSET being
> directed to the init_mm page tables.
>
> So, when we hit a fault in kernel space, we need to be printing the
> currently in-use page tables associated with the running thread.
>
> Hence:
>
>> /*
>> - * This is useful to dump out the page tables associated with
>> - * 'addr' in mm 'mm'.
>> + * Dump out the page tables associated with 'addr' in the currently active mm
>> */
>> -void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
>> +void show_pte(const char *lvl, unsigned long addr)
>> {
>> pgd_t *pgd;
>> -
>> - if (!mm)
>> + struct mm_struct *mm;
>> +
>> + if (addr < TASK_SIZE) {
>> + mm = current->active_mm;
>> + if (mm == &init_mm) {
>> + printk("%s[%08lx] user address but active_mm is swapper\n",
>> + lvl, addr);
>> + return;
>> + }
>> + } else {
>> mm = &init_mm;
>> + }
> is incorrect here.
>
> It's completely fine for architectures where kernel accesses always go
> to the init_mm page tables, but for 32-bit ARM that is not the case.
OK, I will drop this one, thanks
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 5/7] ARM: mm: Print physical address of page table base in show_pte()
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
` (3 preceding siblings ...)
2021-06-02 7:02 ` [PATCH v2 4/7] ARM: mm: print out correct page table entries Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:47 ` Russell King (Oracle)
2021-06-02 7:02 ` [PATCH v2 6/7] ARM: mm: Provide die_kernel_fault() helper Kefeng Wang
2021-06-02 7:02 ` [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature Kefeng Wang
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
Now the show_pts() will dump the virtual (hashed) address of page
table base, it is useless, let's print the page table base pointer
as a physical address for debug.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/mm/fault.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 71a5c29488c2..1383d465399b 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -45,7 +45,7 @@ void show_pte(const char *lvl, unsigned long addr)
mm = &init_mm;
}
- printk("%spgd = %p\n", lvl, mm->pgd);
+ printk("%spgd = %08lx\n", lvl, (unsigned long)virt_to_phys(mm->pgd));
pgd = pgd_offset(mm, addr);
printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 5/7] ARM: mm: Print physical address of page table base in show_pte()
2021-06-02 7:02 ` [PATCH v2 5/7] ARM: mm: Print physical address of page table base in show_pte() Kefeng Wang
@ 2021-06-02 10:47 ` Russell King (Oracle)
2021-06-02 11:25 ` Kefeng Wang
0 siblings, 1 reply; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:47 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi,
On Wed, Jun 02, 2021 at 03:02:44PM +0800, Kefeng Wang wrote:
> Now the show_pts() will dump the virtual (hashed) address of page
> table base, it is useless, let's print the page table base pointer
> as a physical address for debug.
I think we could probably get rid of this line - I think the last time
the PGD address was of use was a very long time ago.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2 5/7] ARM: mm: Print physical address of page table base in show_pte()
2021-06-02 10:47 ` Russell King (Oracle)
@ 2021-06-02 11:25 ` Kefeng Wang
0 siblings, 0 replies; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 11:25 UTC (permalink / raw)
To: Russell King (Oracle)
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
On 2021/6/2 18:47, Russell King (Oracle) wrote:
> Hi,
>
> On Wed, Jun 02, 2021 at 03:02:44PM +0800, Kefeng Wang wrote:
>> Now the show_pts() will dump the virtual (hashed) address of page
>> table base, it is useless, let's print the page table base pointer
>> as a physical address for debug.
> I think we could probably get rid of this line - I think the last time
> the PGD address was of use was a very long time ago.
Will delete this print.
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 6/7] ARM: mm: Provide die_kernel_fault() helper
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
` (4 preceding siblings ...)
2021-06-02 7:02 ` [PATCH v2 5/7] ARM: mm: Print physical address of page table base in show_pte() Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:49 ` Russell King (Oracle)
2021-06-02 7:02 ` [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature Kefeng Wang
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
Provide die_kernel_fault() helper to do the kernel fault reporting,
which with msg argument, it could report different message in different
scenes, and the later patch "ARM: mm: Fix PXN process with LPAE feature"
will use it.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/mm/fault.c | 29 ++++++++++++++++++++---------
1 file changed, 20 insertions(+), 9 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 1383d465399b..7cfa9a59d3ec 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -108,12 +108,27 @@ void show_pte(const char *lvl, unsigned long addr)
{ }
#endif /* CONFIG_MMU */
+static void die_kernel_fault(const char *msg, unsigned long addr,
+ unsigned int fsr, struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+ pr_alert("8<--- cut here ---\n");
+ pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
+ msg, addr);
+
+ show_pte(KERN_ALERT, addr);
+ die("Oops", regs, fsr);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+}
+
/*
* Oops. The kernel tried to access some page that wasn't present.
*/
static void
__do_kernel_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ const char *msg;
/*
* Are we prepared to handle this kernel fault?
*/
@@ -123,16 +138,12 @@ __do_kernel_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
/*
* No handler, we'll have to terminate things with extreme prejudice.
*/
- bust_spinlocks(1);
- pr_alert("8<--- cut here ---\n");
- pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
- (addr < PAGE_SIZE) ? "NULL pointer dereference" :
- "paging request", addr);
+ if (addr < PAGE_SIZE)
+ msg = "NULL pointer dereference";
+ else
+ msg = "paging request";
- show_pte(KERN_ALERT, addr);
- die("Oops", regs, fsr);
- bust_spinlocks(0);
- do_exit(SIGKILL);
+ die_kernel_fault(msg, addr, fsr, regs);
}
/*
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 6/7] ARM: mm: Provide die_kernel_fault() helper
2021-06-02 7:02 ` [PATCH v2 6/7] ARM: mm: Provide die_kernel_fault() helper Kefeng Wang
@ 2021-06-02 10:49 ` Russell King (Oracle)
0 siblings, 0 replies; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:49 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi,
On Wed, Jun 02, 2021 at 03:02:45PM +0800, Kefeng Wang wrote:
> Provide die_kernel_fault() helper to do the kernel fault reporting,
> which with msg argument, it could report different message in different
> scenes, and the later patch "ARM: mm: Fix PXN process with LPAE feature"
> will use it.
This looks fine to me, thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature
2021-06-02 7:02 [PATCH v2 0/7] ARM: mm: cleanup page fault and fix pxn process issue Kefeng Wang
` (5 preceding siblings ...)
2021-06-02 7:02 ` [PATCH v2 6/7] ARM: mm: Provide die_kernel_fault() helper Kefeng Wang
@ 2021-06-02 7:02 ` Kefeng Wang
2021-06-02 10:52 ` Russell King (Oracle)
6 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 7:02 UTC (permalink / raw)
To: Russell King, linux-arm-kernel
Cc: Catalin Marinas, linux-kernel, Andrew Morton, Jungseung Lee, Kefeng Wang
When user code execution with privilege mode, it will lead to
infinite loop in the page fault handler if ARM_LPAE enabled,
The issue could be reproduced with
"echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT"
Lets' fix it by adding the check in do_page_fault() and panic
when ARM_LPAE enabled.
Fixes: 1d4d37159d01 ("ARM: 8235/1: Support for the PXN CPU feature on ARMv7")
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/mm/fault.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 7cfa9a59d3ec..279bbeb33b48 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -257,8 +257,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
vm_flags = VM_WRITE;
}
- if (fsr & FSR_LNX_PF)
+ if (fsr & FSR_LNX_PF) {
vm_flags = VM_EXEC;
+#ifdef CONFIG_ARM_LPAE
+ if (addr && addr < TASK_SIZE && !user_mode(regs))
+ die_kernel_fault("execution of user memory",
+ addr, fsr, regs);
+#endif
+ }
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
--
2.26.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature
2021-06-02 7:02 ` [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature Kefeng Wang
@ 2021-06-02 10:52 ` Russell King (Oracle)
2021-06-02 15:13 ` Kefeng Wang
0 siblings, 1 reply; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 10:52 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi,
On Wed, Jun 02, 2021 at 03:02:46PM +0800, Kefeng Wang wrote:
> When user code execution with privilege mode, it will lead to
> infinite loop in the page fault handler if ARM_LPAE enabled,
>
> The issue could be reproduced with
> "echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT"
>
> Lets' fix it by adding the check in do_page_fault() and panic
> when ARM_LPAE enabled.
>
> Fixes: 1d4d37159d01 ("ARM: 8235/1: Support for the PXN CPU feature on ARMv7")
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
> arch/arm/mm/fault.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
> index 7cfa9a59d3ec..279bbeb33b48 100644
> --- a/arch/arm/mm/fault.c
> +++ b/arch/arm/mm/fault.c
> @@ -257,8 +257,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
> vm_flags = VM_WRITE;
> }
>
> - if (fsr & FSR_LNX_PF)
> + if (fsr & FSR_LNX_PF) {
> vm_flags = VM_EXEC;
> +#ifdef CONFIG_ARM_LPAE
> + if (addr && addr < TASK_SIZE && !user_mode(regs))
> + die_kernel_fault("execution of user memory",
> + addr, fsr, regs);
> +#endif
> + }
Do we need to do this test here?
Also, is this really LPAE specific? We have similar protection on 32-bit
ARM using domains to disable access to userspace except when the user
accessors are being used, so I would expect kernel-mode execution to
also cause a fault there.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature
2021-06-02 10:52 ` Russell King (Oracle)
@ 2021-06-02 15:13 ` Kefeng Wang
2021-06-02 15:58 ` Russell King (Oracle)
0 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-02 15:13 UTC (permalink / raw)
To: Russell King (Oracle)
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
On 2021/6/2 18:52, Russell King (Oracle) wrote:
> Hi,
>
> On Wed, Jun 02, 2021 at 03:02:46PM +0800, Kefeng Wang wrote:
>> When user code execution with privilege mode, it will lead to
>> infinite loop in the page fault handler if ARM_LPAE enabled,
>>
>> The issue could be reproduced with
>> "echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT"
>>
>> Lets' fix it by adding the check in do_page_fault() and panic
>> when ARM_LPAE enabled.
>>
>> Fixes: 1d4d37159d01 ("ARM: 8235/1: Support for the PXN CPU feature on ARMv7")
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>> arch/arm/mm/fault.c | 8 +++++++-
>> 1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
>> index 7cfa9a59d3ec..279bbeb33b48 100644
>> --- a/arch/arm/mm/fault.c
>> +++ b/arch/arm/mm/fault.c
>> @@ -257,8 +257,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>> vm_flags = VM_WRITE;
>> }
>>
>> - if (fsr & FSR_LNX_PF)
>> + if (fsr & FSR_LNX_PF) {
>> vm_flags = VM_EXEC;
>> +#ifdef CONFIG_ARM_LPAE
>> + if (addr && addr < TASK_SIZE && !user_mode(regs))
>> + die_kernel_fault("execution of user memory",
>> + addr, fsr, regs);
>> +#endif
>> + }
> Do we need to do this test here?
>
> Also, is this really LPAE specific? We have similar protection on 32-bit
> ARM using domains to disable access to userspace except when the user
> accessors are being used, so I would expect kernel-mode execution to
> also cause a fault there.
IFSR format when using the Short-descriptor translation table format
Domain fault 01001 First level 01011 Second level
Permission fault 01101 First level 01111 Second level
IFSR format when using the Long-descriptor translation table format
0011LL Permission fault. LL bits indicate levelb.
After check the ARM spec, I think for the permission fault, we should panic
with or without LPAE, will change to
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 7cfa9a59d3ec..dd97d9b19dec 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -257,8 +257,11 @@ do_page_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
vm_flags = VM_WRITE;
}
- if (fsr & FSR_LNX_PF)
+ if (fsr & FSR_LNX_PF) {
vm_flags = VM_EXEC;
+ if (!user_mode(regs))
+ die_kernel_fault("execution of memory", addr,
fsr, regs);
+ }
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
If no object, I will send all patches with updates to patch system,
thanks.
>
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature
2021-06-02 15:13 ` Kefeng Wang
@ 2021-06-02 15:58 ` Russell King (Oracle)
2021-06-03 9:38 ` Kefeng Wang
0 siblings, 1 reply; 21+ messages in thread
From: Russell King (Oracle) @ 2021-06-02 15:58 UTC (permalink / raw)
To: Kefeng Wang
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
On Wed, Jun 02, 2021 at 11:13:14PM +0800, Kefeng Wang wrote:
> IFSR format when using the Short-descriptor translation table format
>
> Domain fault 01001 First level 01011 Second level
>
> Permission fault 01101 First level 01111 Second level
>
> IFSR format when using the Long-descriptor translation table format
>
> 0011LL Permission fault. LL bits indicate levelb.
>
> After check the ARM spec, I think for the permission fault, we should panic
> with or without LPAE, will change to
As I explained in one of the previous patches, the page tables that get
used for mapping kernel space are the _tasks_ own page tables. Any new
kernel mappings are lazily copied to the task page tables - such as
when a module is loaded.
The first time we touch a page, we could end up with a page translation
fault. This will call do_page_fault(), and so with your proposal,
loading a module will potentially cause a kernel panic in this case,
probably leading to systems that panic early during userspace boot.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature
2021-06-02 15:58 ` Russell King (Oracle)
@ 2021-06-03 9:38 ` Kefeng Wang
2021-06-07 8:32 ` Kefeng Wang
0 siblings, 1 reply; 21+ messages in thread
From: Kefeng Wang @ 2021-06-03 9:38 UTC (permalink / raw)
To: Russell King (Oracle)
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
On 2021/6/2 23:58, Russell King (Oracle) wrote:
> On Wed, Jun 02, 2021 at 11:13:14PM +0800, Kefeng Wang wrote:
>> IFSR format when using the Short-descriptor translation table format
>>
>> Domain fault 01001 First level 01011 Second level
>>
>> Permission fault 01101 First level 01111 Second level
>>
>> IFSR format when using the Long-descriptor translation table format
>>
>> 0011LL Permission fault. LL bits indicate levelb.
>>
>> After check the ARM spec, I think for the permission fault, we should panic
>> with or without LPAE, will change to
> As I explained in one of the previous patches, the page tables that get
> used for mapping kernel space are the _tasks_ own page tables. Any new
> kernel mappings are lazily copied to the task page tables - such as
> when a module is loaded.
>
> The first time we touch a page, we could end up with a page translation
> fault. This will call do_page_fault(), and so with your proposal,
> loading a module will potentially cause a kernel panic in this case,
> probably leading to systems that panic early during userspace boot.
Could we add some FSR_FS check, only panic when the permission fault, eg,
+static inline bool is_permission_fault(unsigned int fsr)
+{
+ int fs = fsr_fs(fsr);
+#ifdef CONFIG_ARM_LPAE
+ if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL)
+ return true;
+#else
+ if (fs == FS_L1_PERM || fs == )
+ return true;
+#endif
+ return false;
+}
+
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
@@ -255,8 +268,7 @@ do_page_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
if (fsr & FSR_LNX_PF) {
vm_flags = VM_EXEC;
-
- if (!user_mode(regs))
+ if (is_permission_fault && !user_mode(regs))
die_kernel_fault("execution of memory",
mm, addr, fsr, regs);
}
diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
index 9ecc2097a87a..187954b4acca 100644
--- a/arch/arm/mm/fault.h
+++ b/arch/arm/mm/fault.h
@@ -14,6 +14,8 @@
#ifdef CONFIG_ARM_LPAE
#define FSR_FS_AEA 17
+#define FS_PERM_NOLL 0xC
+#define FS_PERM_NOLL_MASK 0x3C
static inline int fsr_fs(unsigned int fsr)
{
@@ -21,6 +23,8 @@ static inline int fsr_fs(unsigned int fsr)
}
#else
#define FSR_FS_AEA 22
+#define FS_L1_PERM 0xD
+#define FS_L2_PERM 0xF
and suggestion or proper solution to solve the issue?
>
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v2 7/7] ARM: mm: Fix PXN process with LPAE feature
2021-06-03 9:38 ` Kefeng Wang
@ 2021-06-07 8:32 ` Kefeng Wang
0 siblings, 0 replies; 21+ messages in thread
From: Kefeng Wang @ 2021-06-07 8:32 UTC (permalink / raw)
To: Russell King (Oracle)
Cc: linux-arm-kernel, Catalin Marinas, linux-kernel, Andrew Morton,
Jungseung Lee
Hi Russell, any comments, thanks.
On 2021/6/3 17:38, Kefeng Wang wrote:
>
> On 2021/6/2 23:58, Russell King (Oracle) wrote:
>> On Wed, Jun 02, 2021 at 11:13:14PM +0800, Kefeng Wang wrote:
>>> IFSR format when using the Short-descriptor translation table format
>>>
>>> Domain fault 01001 First level 01011
>>> Second level
>>>
>>> Permission fault 01101 First level 01111 Second level
>>>
>>> IFSR format when using the Long-descriptor translation table format
>>>
>>> 0011LL Permission fault. LL bits indicate levelb.
>>>
>>> After check the ARM spec, I think for the permission fault, we
>>> should panic
>>> with or without LPAE, will change to
>> As I explained in one of the previous patches, the page tables that get
>> used for mapping kernel space are the _tasks_ own page tables. Any new
>> kernel mappings are lazily copied to the task page tables - such as
>> when a module is loaded.
>>
>> The first time we touch a page, we could end up with a page translation
>> fault. This will call do_page_fault(), and so with your proposal,
>> loading a module will potentially cause a kernel panic in this case,
>> probably leading to systems that panic early during userspace boot.
>
> Could we add some FSR_FS check, only panic when the permission fault,
> eg,
>
> +static inline bool is_permission_fault(unsigned int fsr)
> +{
> + int fs = fsr_fs(fsr);
> +#ifdef CONFIG_ARM_LPAE
> + if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL)
> + return true;
> +#else
> + if (fs == FS_L1_PERM || fs == FS_L2_PERM )
> + return true;
> +#endif
> + return false;
> +}
> +
> static int __kprobes
> do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs
> *regs)
> {
> @@ -255,8 +268,7 @@ do_page_fault(unsigned long addr, unsigned int
> fsr, struct pt_regs *regs)
>
> if (fsr & FSR_LNX_PF) {
> vm_flags = VM_EXEC;
> -
> - if (!user_mode(regs))
> + if (is_permission_fault && !user_mode(regs))
> die_kernel_fault("execution of memory",
> mm, addr, fsr, regs);
> }
>
> diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
> index 9ecc2097a87a..187954b4acca 100644
> --- a/arch/arm/mm/fault.h
> +++ b/arch/arm/mm/fault.h
> @@ -14,6 +14,8 @@
>
> #ifdef CONFIG_ARM_LPAE
> #define FSR_FS_AEA 17
> +#define FS_PERM_NOLL 0xC
> +#define FS_PERM_NOLL_MASK 0x3C
>
> static inline int fsr_fs(unsigned int fsr)
> {
> @@ -21,6 +23,8 @@ static inline int fsr_fs(unsigned int fsr)
> }
> #else
> #define FSR_FS_AEA 22
> +#define FS_L1_PERM 0xD
> +#define FS_L2_PERM 0xF
>
> and suggestion or proper solution to solve the issue?
>
>>
^ permalink raw reply [flat|nested] 21+ messages in thread