All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
@ 2023-07-31 21:50 Lorenzo Stoakes
  2023-07-31 22:11 ` Jiri Olsa
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2023-07-31 21:50 UTC (permalink / raw)
  To: linux-mm, linux-kernel, Andrew Morton
  Cc: Baoquan He, Uladzislau Rezki, linux-fsdevel, Jiri Olsa,
	Will Deacon, Mike Galbraith, Mark Rutland, wangkefeng.wang,
	catalin.marinas, ardb, David Hildenbrand,
	Linux regression tracking, regressions, Matthew Wilcox,
	Liu Shixin, Jens Axboe, Alexander Viro, Lorenzo Stoakes, stable

Some architectures do not populate the entire range categorised by
KCORE_TEXT, so we must ensure that the kernel address we read from is
valid.

Unfortunately there is no solution currently available to do so with a
purely iterator solution so reinstate the bounce buffer in this instance so
we can use copy_from_kernel_nofault() in order to avoid page faults when
regions are unmapped.

This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
the code to continue to use an iterator.

Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
Reported-by: Jiri Olsa <olsajiri@gmail.com>
Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
Cc: stable@vger.kernel.org
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
---
 fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 9cb32e1a78a0..3bc689038232 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
 
 static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
+	struct file *file = iocb->ki_filp;
+	char *buf = file->private_data;
 	loff_t *fpos = &iocb->ki_pos;
 	size_t phdrs_offset, notes_offset, data_offset;
 	size_t page_offline_frozen = 1;
@@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
 			fallthrough;
 		case KCORE_VMEMMAP:
 		case KCORE_TEXT:
+			/*
+			 * Sadly we must use a bounce buffer here to be able to
+			 * make use of copy_from_kernel_nofault(), as these
+			 * memory regions might not always be mapped on all
+			 * architectures.
+			 */
+			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+				if (iov_iter_zero(tsz, iter) != tsz) {
+					ret = -EFAULT;
+					goto out;
+				}
 			/*
 			 * We use _copy_to_iter() to bypass usermode hardening
 			 * which would otherwise prevent this operation.
 			 */
-			if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
 				ret = -EFAULT;
 				goto out;
 			}
@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
 	if (ret)
 		return ret;
 
+	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!filp->private_data)
+		return -ENOMEM;
+
 	if (kcore_need_update)
 		kcore_update_ram();
 	if (i_size_read(inode) != proc_root_kcore->size) {
@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static int release_kcore(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
 static const struct proc_ops kcore_proc_ops = {
 	.proc_read_iter	= read_kcore_iter,
 	.proc_open	= open_kcore,
+	.proc_release	= release_kcore,
 	.proc_lseek	= default_llseek,
 };
 
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-07-31 21:50 [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions Lorenzo Stoakes
@ 2023-07-31 22:11 ` Jiri Olsa
  2023-08-01  8:27 ` Will Deacon
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Jiri Olsa @ 2023-07-31 22:11 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-mm, linux-kernel, Andrew Morton, Baoquan He,
	Uladzislau Rezki, linux-fsdevel, Jiri Olsa, Will Deacon,
	Mike Galbraith, Mark Rutland, wangkefeng.wang, catalin.marinas,
	ardb, David Hildenbrand, Linux regression tracking, regressions,
	Matthew Wilcox, Liu Shixin, Jens Axboe, Alexander Viro, stable

On Mon, Jul 31, 2023 at 10:50:21PM +0100, Lorenzo Stoakes wrote:
> Some architectures do not populate the entire range categorised by
> KCORE_TEXT, so we must ensure that the kernel address we read from is
> valid.
> 
> Unfortunately there is no solution currently available to do so with a
> purely iterator solution so reinstate the bounce buffer in this instance so
> we can use copy_from_kernel_nofault() in order to avoid page faults when
> regions are unmapped.
> 
> This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> the code to continue to use an iterator.
> 
> Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> Reported-by: Jiri Olsa <olsajiri@gmail.com>

it fixed my issue, thanks

Tested-by: Jiri Olsa <jolsa@kernel.org>

jirka

> Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> Cc: stable@vger.kernel.org
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> ---
>  fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index 9cb32e1a78a0..3bc689038232 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
>  
>  static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>  {
> +	struct file *file = iocb->ki_filp;
> +	char *buf = file->private_data;
>  	loff_t *fpos = &iocb->ki_pos;
>  	size_t phdrs_offset, notes_offset, data_offset;
>  	size_t page_offline_frozen = 1;
> @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>  			fallthrough;
>  		case KCORE_VMEMMAP:
>  		case KCORE_TEXT:
> +			/*
> +			 * Sadly we must use a bounce buffer here to be able to
> +			 * make use of copy_from_kernel_nofault(), as these
> +			 * memory regions might not always be mapped on all
> +			 * architectures.
> +			 */
> +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> +				if (iov_iter_zero(tsz, iter) != tsz) {
> +					ret = -EFAULT;
> +					goto out;
> +				}
>  			/*
>  			 * We use _copy_to_iter() to bypass usermode hardening
>  			 * which would otherwise prevent this operation.
>  			 */
> -			if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
> +			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
>  				ret = -EFAULT;
>  				goto out;
>  			}
> @@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
>  	if (ret)
>  		return ret;
>  
> +	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!filp->private_data)
> +		return -ENOMEM;
> +
>  	if (kcore_need_update)
>  		kcore_update_ram();
>  	if (i_size_read(inode) != proc_root_kcore->size) {
> @@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
>  	return 0;
>  }
>  
> +static int release_kcore(struct inode *inode, struct file *file)
> +{
> +	kfree(file->private_data);
> +	return 0;
> +}
> +
>  static const struct proc_ops kcore_proc_ops = {
>  	.proc_read_iter	= read_kcore_iter,
>  	.proc_open	= open_kcore,
> +	.proc_release	= release_kcore,
>  	.proc_lseek	= default_llseek,
>  };
>  
> -- 
> 2.41.0
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-07-31 21:50 [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions Lorenzo Stoakes
  2023-07-31 22:11 ` Jiri Olsa
@ 2023-08-01  8:27 ` Will Deacon
  2023-08-01  9:05 ` David Hildenbrand
  2023-08-01 15:57 ` Baoquan He
  3 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2023-08-01  8:27 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-mm, linux-kernel, Andrew Morton, Baoquan He,
	Uladzislau Rezki, linux-fsdevel, Jiri Olsa, Mike Galbraith,
	Mark Rutland, wangkefeng.wang, catalin.marinas, ardb,
	David Hildenbrand, Linux regression tracking, regressions,
	Matthew Wilcox, Liu Shixin, Jens Axboe, Alexander Viro, stable

On Mon, Jul 31, 2023 at 10:50:21PM +0100, Lorenzo Stoakes wrote:
> Some architectures do not populate the entire range categorised by
> KCORE_TEXT, so we must ensure that the kernel address we read from is
> valid.
> 
> Unfortunately there is no solution currently available to do so with a
> purely iterator solution so reinstate the bounce buffer in this instance so
> we can use copy_from_kernel_nofault() in order to avoid page faults when
> regions are unmapped.
> 
> This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> the code to continue to use an iterator.
> 
> Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> Reported-by: Jiri Olsa <olsajiri@gmail.com>
> Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> Cc: stable@vger.kernel.org
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> ---
>  fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)

Tested-by: Will Deacon <will@kernel.org>

I can confirm this fixes the arm64 issue reported by Mike over at [1].

Cheers,

Will

[1] https://lore.kernel.org/r/b39c62d29a431b023e98959578ba87e96af0e030.camel@gmx.de

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-07-31 21:50 [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions Lorenzo Stoakes
  2023-07-31 22:11 ` Jiri Olsa
  2023-08-01  8:27 ` Will Deacon
@ 2023-08-01  9:05 ` David Hildenbrand
  2023-08-01 16:33   ` Lorenzo Stoakes
  2023-08-01 15:57 ` Baoquan He
  3 siblings, 1 reply; 11+ messages in thread
From: David Hildenbrand @ 2023-08-01  9:05 UTC (permalink / raw)
  To: Lorenzo Stoakes, linux-mm, linux-kernel, Andrew Morton
  Cc: Baoquan He, Uladzislau Rezki, linux-fsdevel, Jiri Olsa,
	Will Deacon, Mike Galbraith, Mark Rutland, wangkefeng.wang,
	catalin.marinas, ardb, Linux regression tracking, regressions,
	Matthew Wilcox, Liu Shixin, Jens Axboe, Alexander Viro, stable

On 31.07.23 23:50, Lorenzo Stoakes wrote:
> Some architectures do not populate the entire range categorised by
> KCORE_TEXT, so we must ensure that the kernel address we read from is
> valid.
> 
> Unfortunately there is no solution currently available to do so with a
> purely iterator solution so reinstate the bounce buffer in this instance so
> we can use copy_from_kernel_nofault() in order to avoid page faults when
> regions are unmapped.
> 
> This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> the code to continue to use an iterator.
> 
> Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> Reported-by: Jiri Olsa <olsajiri@gmail.com>
> Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> Cc: stable@vger.kernel.org
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> ---
>   fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
>   1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index 9cb32e1a78a0..3bc689038232 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
>   
>   static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>   {
> +	struct file *file = iocb->ki_filp;
> +	char *buf = file->private_data;
>   	loff_t *fpos = &iocb->ki_pos;
>   	size_t phdrs_offset, notes_offset, data_offset;
>   	size_t page_offline_frozen = 1;
> @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>   			fallthrough;
>   		case KCORE_VMEMMAP:
>   		case KCORE_TEXT:
> +			/*
> +			 * Sadly we must use a bounce buffer here to be able to
> +			 * make use of copy_from_kernel_nofault(), as these
> +			 * memory regions might not always be mapped on all
> +			 * architectures.
> +			 */
> +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> +				if (iov_iter_zero(tsz, iter) != tsz) {
> +					ret = -EFAULT;
> +					goto out;
> +				}
>   			/*
>   			 * We use _copy_to_iter() to bypass usermode hardening
>   			 * which would otherwise prevent this operation.
>   			 */

Having a comment at this indentation level looks for the else case looks 
kind of weird.

(does that comment still apply?)


-- 
Cheers,

David / dhildenb


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-07-31 21:50 [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions Lorenzo Stoakes
                   ` (2 preceding siblings ...)
  2023-08-01  9:05 ` David Hildenbrand
@ 2023-08-01 15:57 ` Baoquan He
  2023-08-01 16:01   ` Baoquan He
  3 siblings, 1 reply; 11+ messages in thread
From: Baoquan He @ 2023-08-01 15:57 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-mm, linux-kernel, Andrew Morton, Uladzislau Rezki,
	linux-fsdevel, Jiri Olsa, Will Deacon, Mike Galbraith,
	Mark Rutland, wangkefeng.wang, catalin.marinas, ardb,
	David Hildenbrand, Linux regression tracking, regressions,
	Matthew Wilcox, Liu Shixin, Jens Axboe, Alexander Viro, stable

On 07/31/23 at 10:50pm, Lorenzo Stoakes wrote:
> Some architectures do not populate the entire range categorised by
> KCORE_TEXT, so we must ensure that the kernel address we read from is
> valid.
> 
> Unfortunately there is no solution currently available to do so with a
> purely iterator solution so reinstate the bounce buffer in this instance so
> we can use copy_from_kernel_nofault() in order to avoid page faults when
> regions are unmapped.
> 
> This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> the code to continue to use an iterator.
> 
> Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> Reported-by: Jiri Olsa <olsajiri@gmail.com>
> Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> Cc: stable@vger.kernel.org
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> ---
>  fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index 9cb32e1a78a0..3bc689038232 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
>  
>  static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>  {
> +	struct file *file = iocb->ki_filp;
> +	char *buf = file->private_data;
>  	loff_t *fpos = &iocb->ki_pos;
>  	size_t phdrs_offset, notes_offset, data_offset;
>  	size_t page_offline_frozen = 1;
> @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>  			fallthrough;
>  		case KCORE_VMEMMAP:
>  		case KCORE_TEXT:
> +			/*
> +			 * Sadly we must use a bounce buffer here to be able to
> +			 * make use of copy_from_kernel_nofault(), as these
> +			 * memory regions might not always be mapped on all
> +			 * architectures.
> +			 */
> +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> +				if (iov_iter_zero(tsz, iter) != tsz) {
> +					ret = -EFAULT;
> +					goto out;
> +				}
>  			/*
>  			 * We use _copy_to_iter() to bypass usermode hardening
>  			 * which would otherwise prevent this operation.
>  			 */
> -			if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
> +			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
>  				ret = -EFAULT;
>  				goto out;
>  			}
> @@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
>  	if (ret)
>  		return ret;
>  
> +	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!filp->private_data)
> +		return -ENOMEM;
> +
>  	if (kcore_need_update)
>  		kcore_update_ram();
>  	if (i_size_read(inode) != proc_root_kcore->size) {
> @@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
>  	return 0;
>  }
>  
> +static int release_kcore(struct inode *inode, struct file *file)
> +{
> +	kfree(file->private_data);
> +	return 0;
> +}
> +
>  static const struct proc_ops kcore_proc_ops = {
>  	.proc_read_iter	= read_kcore_iter,
>  	.proc_open	= open_kcore,
> +	.proc_release	= release_kcore,
>  	.proc_lseek	= default_llseek,
>  };

On 6.5-rc4, the failures can be reproduced stably on a arm64 machine.
With patch applied, both makedumpfile and objdump test cases passed.

And the code change looks good to me, thanks.

Tested-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Baoquan He <bhe@redhat.com>


===============================================
[root@ ~]# makedumpfile --mem-usage /proc/kcore 
The kernel version is not supported.
The makedumpfile operation may be incomplete.

TYPE		PAGES			EXCLUDABLE	DESCRIPTION
----------------------------------------------------------------------
ZERO		76234           	yes		Pages filled with zero
NON_PRI_CACHE	147613          	yes		Cache pages without private flag
PRI_CACHE	3847            	yes		Cache pages with private flag
USER		15276           	yes		User process pages
FREE		15809884        	yes		Free pages
KERN_DATA	459950          	no		Dumpable kernel data 

page size:		4096            
Total pages on system:	16512804        
Total size on system:	67636445184      Byte

[root@ ~]# objdump -d  --start-address=0x^C
[root@ ~]# cat /proc/kallsyms | grep ksys_read
ffffab3be77229d8 T ksys_readahead
ffffab3be782a700 T ksys_read
[root@ ~]# objdump -d  --start-address=0xffffab3be782a700 --stop-address=0xffffab3be782a710 /proc/kcore 

/proc/kcore:     file format elf64-littleaarch64


Disassembly of section load1:

ffffab3be782a700 <load1+0x41a700>:
ffffab3be782a700:	aa1e03e9 	mov	x9, x30
ffffab3be782a704:	d503201f 	nop
ffffab3be782a708:	d503233f 	paciasp
ffffab3be782a70c:	a9bc7bfd 	stp	x29, x30, [sp, #-64]!
objdump: error: /proc/kcore(load2) is too large (0x7bff70000000 bytes)
objdump: Reading section load2 failed because: memory exhausted



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-08-01 15:57 ` Baoquan He
@ 2023-08-01 16:01   ` Baoquan He
  2023-08-01 16:22     ` Lorenzo Stoakes
  0 siblings, 1 reply; 11+ messages in thread
From: Baoquan He @ 2023-08-01 16:01 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-mm, linux-kernel, Andrew Morton, Uladzislau Rezki,
	linux-fsdevel, Jiri Olsa, Will Deacon, Mike Galbraith,
	Mark Rutland, wangkefeng.wang, catalin.marinas, ardb,
	David Hildenbrand, Linux regression tracking, regressions,
	Matthew Wilcox, Liu Shixin, Jens Axboe, Alexander Viro, stable

On 08/01/23 at 11:57pm, Baoquan He wrote:
> On 07/31/23 at 10:50pm, Lorenzo Stoakes wrote:
> > Some architectures do not populate the entire range categorised by
> > KCORE_TEXT, so we must ensure that the kernel address we read from is
> > valid.
> > 
> > Unfortunately there is no solution currently available to do so with a
> > purely iterator solution so reinstate the bounce buffer in this instance so
> > we can use copy_from_kernel_nofault() in order to avoid page faults when
> > regions are unmapped.
> > 
> > This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> > bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> > the code to continue to use an iterator.
> > 
> > Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> > Reported-by: Jiri Olsa <olsajiri@gmail.com>
> > Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> > ---
> >  fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
> >  1 file changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> > index 9cb32e1a78a0..3bc689038232 100644
> > --- a/fs/proc/kcore.c
> > +++ b/fs/proc/kcore.c
> > @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
> >  
> >  static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> >  {
> > +	struct file *file = iocb->ki_filp;
> > +	char *buf = file->private_data;
> >  	loff_t *fpos = &iocb->ki_pos;
> >  	size_t phdrs_offset, notes_offset, data_offset;
> >  	size_t page_offline_frozen = 1;
> > @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> >  			fallthrough;
> >  		case KCORE_VMEMMAP:
> >  		case KCORE_TEXT:
> > +			/*
> > +			 * Sadly we must use a bounce buffer here to be able to
> > +			 * make use of copy_from_kernel_nofault(), as these
> > +			 * memory regions might not always be mapped on all
> > +			 * architectures.
> > +			 */
> > +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> > +				if (iov_iter_zero(tsz, iter) != tsz) {
> > +					ret = -EFAULT;
> > +					goto out;
> > +				}
> >  			/*
> >  			 * We use _copy_to_iter() to bypass usermode hardening
> >  			 * which would otherwise prevent this operation.
> >  			 */
> > -			if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
> > +			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
> >  				ret = -EFAULT;
> >  				goto out;
> >  			}
> > @@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
> >  	if (ret)
> >  		return ret;
> >  
> > +	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > +	if (!filp->private_data)
> > +		return -ENOMEM;
> > +
> >  	if (kcore_need_update)
> >  		kcore_update_ram();
> >  	if (i_size_read(inode) != proc_root_kcore->size) {
> > @@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
> >  	return 0;
> >  }
> >  
> > +static int release_kcore(struct inode *inode, struct file *file)
> > +{
> > +	kfree(file->private_data);
> > +	return 0;
> > +}
> > +
> >  static const struct proc_ops kcore_proc_ops = {
> >  	.proc_read_iter	= read_kcore_iter,
> >  	.proc_open	= open_kcore,
> > +	.proc_release	= release_kcore,
> >  	.proc_lseek	= default_llseek,
> >  };
> 
> On 6.5-rc4, the failures can be reproduced stably on a arm64 machine.
> With patch applied, both makedumpfile and objdump test cases passed.
> 
> And the code change looks good to me, thanks.
> 
> Tested-by: Baoquan He <bhe@redhat.com>
> Reviewed-by: Baoquan He <bhe@redhat.com>
> 
> 
> ===============================================
> [root@ ~]# makedumpfile --mem-usage /proc/kcore 
> The kernel version is not supported.
> The makedumpfile operation may be incomplete.
> 
> TYPE		PAGES			EXCLUDABLE	DESCRIPTION
> ----------------------------------------------------------------------
> ZERO		76234           	yes		Pages filled with zero
> NON_PRI_CACHE	147613          	yes		Cache pages without private flag
> PRI_CACHE	3847            	yes		Cache pages with private flag
> USER		15276           	yes		User process pages
> FREE		15809884        	yes		Free pages
> KERN_DATA	459950          	no		Dumpable kernel data 
> 
> page size:		4096            
> Total pages on system:	16512804        
> Total size on system:	67636445184      Byte
> 
> [root@ ~]# objdump -d  --start-address=0x^C
> [root@ ~]# cat /proc/kallsyms | grep ksys_read
> ffffab3be77229d8 T ksys_readahead
> ffffab3be782a700 T ksys_read
> [root@ ~]# objdump -d  --start-address=0xffffab3be782a700 --stop-address=0xffffab3be782a710 /proc/kcore 
> 
> /proc/kcore:     file format elf64-littleaarch64
> 
> 
> Disassembly of section load1:
> 
> ffffab3be782a700 <load1+0x41a700>:
> ffffab3be782a700:	aa1e03e9 	mov	x9, x30
> ffffab3be782a704:	d503201f 	nop
> ffffab3be782a708:	d503233f 	paciasp
> ffffab3be782a70c:	a9bc7bfd 	stp	x29, x30, [sp, #-64]!
> objdump: error: /proc/kcore(load2) is too large (0x7bff70000000 bytes)
> objdump: Reading section load2 failed because: memory exhausted

By the way, I can still see the objdump error saying kcore is too large
as above, at the same time there's console printing as below. Haven't
checked it's objdump's issue or kernel's.

[ 6631.575800] __vm_enough_memory: pid: 5321, comm: objdump, not enough memory for the allocation
[ 6631.584469] __vm_enough_memory: pid: 5321, comm: objdump, not enough memory for the allocation


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-08-01 16:01   ` Baoquan He
@ 2023-08-01 16:22     ` Lorenzo Stoakes
  0 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2023-08-01 16:22 UTC (permalink / raw)
  To: Baoquan He
  Cc: linux-mm, linux-kernel, Andrew Morton, Uladzislau Rezki,
	linux-fsdevel, Jiri Olsa, Will Deacon, Mike Galbraith,
	Mark Rutland, wangkefeng.wang, catalin.marinas, ardb,
	David Hildenbrand, Linux regression tracking, regressions,
	Matthew Wilcox, Liu Shixin, Jens Axboe, Alexander Viro, stable

On Wed, Aug 02, 2023 at 12:01:16AM +0800, Baoquan He wrote:
> On 08/01/23 at 11:57pm, Baoquan He wrote:
> > On 07/31/23 at 10:50pm, Lorenzo Stoakes wrote:
> > > Some architectures do not populate the entire range categorised by
> > > KCORE_TEXT, so we must ensure that the kernel address we read from is
> > > valid.
> > >
> > > Unfortunately there is no solution currently available to do so with a
> > > purely iterator solution so reinstate the bounce buffer in this instance so
> > > we can use copy_from_kernel_nofault() in order to avoid page faults when
> > > regions are unmapped.
> > >
> > > This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> > > bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> > > the code to continue to use an iterator.
> > >
> > > Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> > > Reported-by: Jiri Olsa <olsajiri@gmail.com>
> > > Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> > > Cc: stable@vger.kernel.org
> > > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> > > ---
> > >  fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
> > >  1 file changed, 25 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> > > index 9cb32e1a78a0..3bc689038232 100644
> > > --- a/fs/proc/kcore.c
> > > +++ b/fs/proc/kcore.c
> > > @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
> > >
> > >  static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> > >  {
> > > +	struct file *file = iocb->ki_filp;
> > > +	char *buf = file->private_data;
> > >  	loff_t *fpos = &iocb->ki_pos;
> > >  	size_t phdrs_offset, notes_offset, data_offset;
> > >  	size_t page_offline_frozen = 1;
> > > @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> > >  			fallthrough;
> > >  		case KCORE_VMEMMAP:
> > >  		case KCORE_TEXT:
> > > +			/*
> > > +			 * Sadly we must use a bounce buffer here to be able to
> > > +			 * make use of copy_from_kernel_nofault(), as these
> > > +			 * memory regions might not always be mapped on all
> > > +			 * architectures.
> > > +			 */
> > > +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> > > +				if (iov_iter_zero(tsz, iter) != tsz) {
> > > +					ret = -EFAULT;
> > > +					goto out;
> > > +				}
> > >  			/*
> > >  			 * We use _copy_to_iter() to bypass usermode hardening
> > >  			 * which would otherwise prevent this operation.
> > >  			 */
> > > -			if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
> > > +			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
> > >  				ret = -EFAULT;
> > >  				goto out;
> > >  			}
> > > @@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
> > >  	if (ret)
> > >  		return ret;
> > >
> > > +	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > > +	if (!filp->private_data)
> > > +		return -ENOMEM;
> > > +
> > >  	if (kcore_need_update)
> > >  		kcore_update_ram();
> > >  	if (i_size_read(inode) != proc_root_kcore->size) {
> > > @@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
> > >  	return 0;
> > >  }
> > >
> > > +static int release_kcore(struct inode *inode, struct file *file)
> > > +{
> > > +	kfree(file->private_data);
> > > +	return 0;
> > > +}
> > > +
> > >  static const struct proc_ops kcore_proc_ops = {
> > >  	.proc_read_iter	= read_kcore_iter,
> > >  	.proc_open	= open_kcore,
> > > +	.proc_release	= release_kcore,
> > >  	.proc_lseek	= default_llseek,
> > >  };
> >
> > On 6.5-rc4, the failures can be reproduced stably on a arm64 machine.
> > With patch applied, both makedumpfile and objdump test cases passed.
> >
> > And the code change looks good to me, thanks.
> >
> > Tested-by: Baoquan He <bhe@redhat.com>
> > Reviewed-by: Baoquan He <bhe@redhat.com>

Thanks!

> >
> >
> > ===============================================
> > [root@ ~]# makedumpfile --mem-usage /proc/kcore
> > The kernel version is not supported.
> > The makedumpfile operation may be incomplete.
> >
> > TYPE		PAGES			EXCLUDABLE	DESCRIPTION
> > ----------------------------------------------------------------------
> > ZERO		76234           	yes		Pages filled with zero
> > NON_PRI_CACHE	147613          	yes		Cache pages without private flag
> > PRI_CACHE	3847            	yes		Cache pages with private flag
> > USER		15276           	yes		User process pages
> > FREE		15809884        	yes		Free pages
> > KERN_DATA	459950          	no		Dumpable kernel data
> >
> > page size:		4096
> > Total pages on system:	16512804
> > Total size on system:	67636445184      Byte
> >
> > [root@ ~]# objdump -d  --start-address=0x^C
> > [root@ ~]# cat /proc/kallsyms | grep ksys_read
> > ffffab3be77229d8 T ksys_readahead
> > ffffab3be782a700 T ksys_read
> > [root@ ~]# objdump -d  --start-address=0xffffab3be782a700 --stop-address=0xffffab3be782a710 /proc/kcore
> >
> > /proc/kcore:     file format elf64-littleaarch64
> >
> >
> > Disassembly of section load1:
> >
> > ffffab3be782a700 <load1+0x41a700>:
> > ffffab3be782a700:	aa1e03e9 	mov	x9, x30
> > ffffab3be782a704:	d503201f 	nop
> > ffffab3be782a708:	d503233f 	paciasp
> > ffffab3be782a70c:	a9bc7bfd 	stp	x29, x30, [sp, #-64]!
> > objdump: error: /proc/kcore(load2) is too large (0x7bff70000000 bytes)
> > objdump: Reading section load2 failed because: memory exhausted
>
> By the way, I can still see the objdump error saying kcore is too large
> as above, at the same time there's console printing as below. Haven't
> checked it's objdump's issue or kernel's.
>
> [ 6631.575800] __vm_enough_memory: pid: 5321, comm: objdump, not enough memory for the allocation
> [ 6631.584469] __vm_enough_memory: pid: 5321, comm: objdump, not enough memory for the allocation
>

Yeah this issue existed before this patch was applied on arm64, apparently
an ancient objdump bug according to the other thread [0]. I confirmed it
exists on v6.0 kernel for instance.

[0]:https://lore.kernel.org/all/7b94619ad89c9e308c7aedef2cacfa10b8666e69.camel@gmx.de/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-08-01  9:05 ` David Hildenbrand
@ 2023-08-01 16:33   ` Lorenzo Stoakes
  2023-08-01 16:34     ` David Hildenbrand
  0 siblings, 1 reply; 11+ messages in thread
From: Lorenzo Stoakes @ 2023-08-01 16:33 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-mm, linux-kernel, Andrew Morton, Baoquan He,
	Uladzislau Rezki, linux-fsdevel, Jiri Olsa, Will Deacon,
	Mike Galbraith, Mark Rutland, wangkefeng.wang, catalin.marinas,
	ardb, Linux regression tracking, regressions, Matthew Wilcox,
	Liu Shixin, Jens Axboe, Alexander Viro, stable

On Tue, Aug 01, 2023 at 11:05:40AM +0200, David Hildenbrand wrote:
> On 31.07.23 23:50, Lorenzo Stoakes wrote:
> > Some architectures do not populate the entire range categorised by
> > KCORE_TEXT, so we must ensure that the kernel address we read from is
> > valid.
> >
> > Unfortunately there is no solution currently available to do so with a
> > purely iterator solution so reinstate the bounce buffer in this instance so
> > we can use copy_from_kernel_nofault() in order to avoid page faults when
> > regions are unmapped.
> >
> > This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> > bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> > the code to continue to use an iterator.
> >
> > Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> > Reported-by: Jiri Olsa <olsajiri@gmail.com>
> > Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> > ---
> >   fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
> >   1 file changed, 25 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> > index 9cb32e1a78a0..3bc689038232 100644
> > --- a/fs/proc/kcore.c
> > +++ b/fs/proc/kcore.c
> > @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
> >   static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> >   {
> > +	struct file *file = iocb->ki_filp;
> > +	char *buf = file->private_data;
> >   	loff_t *fpos = &iocb->ki_pos;
> >   	size_t phdrs_offset, notes_offset, data_offset;
> >   	size_t page_offline_frozen = 1;
> > @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> >   			fallthrough;
> >   		case KCORE_VMEMMAP:
> >   		case KCORE_TEXT:
> > +			/*
> > +			 * Sadly we must use a bounce buffer here to be able to
> > +			 * make use of copy_from_kernel_nofault(), as these
> > +			 * memory regions might not always be mapped on all
> > +			 * architectures.
> > +			 */
> > +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> > +				if (iov_iter_zero(tsz, iter) != tsz) {
> > +					ret = -EFAULT;
> > +					goto out;
> > +				}
> >   			/*
> >   			 * We use _copy_to_iter() to bypass usermode hardening
> >   			 * which would otherwise prevent this operation.
> >   			 */
>
> Having a comment at this indentation level looks for the else case looks
> kind of weird.

Yeah, but having it indented again would be weird and seem like it doesn't
apply to the block below, there's really no good spot for it and
checkpatch.pl doesn't mind so I think this is ok :)

>
> (does that comment still apply?)

Hm good point, actually, now we're using the bounce buffer we don't need to
avoid usermode hardening any more.

However since we've established a bounce buffer ourselves its still
appropriate to use _copy_to_iter() as we know the source region is good to
copy from.

To make life easy I'll just respin with an updated comment :)

>
>
> --
> Cheers,
>
> David / dhildenb
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-08-01 16:33   ` Lorenzo Stoakes
@ 2023-08-01 16:34     ` David Hildenbrand
  2023-08-01 16:39       ` Lorenzo Stoakes
  0 siblings, 1 reply; 11+ messages in thread
From: David Hildenbrand @ 2023-08-01 16:34 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-mm, linux-kernel, Andrew Morton, Baoquan He,
	Uladzislau Rezki, linux-fsdevel, Jiri Olsa, Will Deacon,
	Mike Galbraith, Mark Rutland, wangkefeng.wang, catalin.marinas,
	ardb, Linux regression tracking, regressions, Matthew Wilcox,
	Liu Shixin, Jens Axboe, Alexander Viro, stable

On 01.08.23 18:33, Lorenzo Stoakes wrote:
> On Tue, Aug 01, 2023 at 11:05:40AM +0200, David Hildenbrand wrote:
>> On 31.07.23 23:50, Lorenzo Stoakes wrote:
>>> Some architectures do not populate the entire range categorised by
>>> KCORE_TEXT, so we must ensure that the kernel address we read from is
>>> valid.
>>>
>>> Unfortunately there is no solution currently available to do so with a
>>> purely iterator solution so reinstate the bounce buffer in this instance so
>>> we can use copy_from_kernel_nofault() in order to avoid page faults when
>>> regions are unmapped.
>>>
>>> This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
>>> bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
>>> the code to continue to use an iterator.
>>>
>>> Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
>>> Reported-by: Jiri Olsa <olsajiri@gmail.com>
>>> Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
>>> ---
>>>    fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
>>>    1 file changed, 25 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
>>> index 9cb32e1a78a0..3bc689038232 100644
>>> --- a/fs/proc/kcore.c
>>> +++ b/fs/proc/kcore.c
>>> @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
>>>    static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>>>    {
>>> +	struct file *file = iocb->ki_filp;
>>> +	char *buf = file->private_data;
>>>    	loff_t *fpos = &iocb->ki_pos;
>>>    	size_t phdrs_offset, notes_offset, data_offset;
>>>    	size_t page_offline_frozen = 1;
>>> @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>>>    			fallthrough;
>>>    		case KCORE_VMEMMAP:
>>>    		case KCORE_TEXT:
>>> +			/*
>>> +			 * Sadly we must use a bounce buffer here to be able to
>>> +			 * make use of copy_from_kernel_nofault(), as these
>>> +			 * memory regions might not always be mapped on all
>>> +			 * architectures.
>>> +			 */
>>> +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
>>> +				if (iov_iter_zero(tsz, iter) != tsz) {
>>> +					ret = -EFAULT;
>>> +					goto out;
>>> +				}
>>>    			/*
>>>    			 * We use _copy_to_iter() to bypass usermode hardening
>>>    			 * which would otherwise prevent this operation.
>>>    			 */
>>
>> Having a comment at this indentation level looks for the else case looks
>> kind of weird.
> 
> Yeah, but having it indented again would be weird and seem like it doesn't
> apply to the block below, there's really no good spot for it and
> checkpatch.pl doesn't mind so I think this is ok :)
> 
>>
>> (does that comment still apply?)
> 
> Hm good point, actually, now we're using the bounce buffer we don't need to
> avoid usermode hardening any more.
> 
> However since we've established a bounce buffer ourselves its still
> appropriate to use _copy_to_iter() as we know the source region is good to
> copy from.
> 
> To make life easy I'll just respin with an updated comment :)

I'm not too picky this time, no need to resend if everybody else is fine :P

-- 
Cheers,

David / dhildenb


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-08-01 16:34     ` David Hildenbrand
@ 2023-08-01 16:39       ` Lorenzo Stoakes
  2023-08-01 18:14         ` David Hildenbrand
  0 siblings, 1 reply; 11+ messages in thread
From: Lorenzo Stoakes @ 2023-08-01 16:39 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-mm, linux-kernel, Andrew Morton, Baoquan He,
	Uladzislau Rezki, linux-fsdevel, Jiri Olsa, Will Deacon,
	Mike Galbraith, Mark Rutland, wangkefeng.wang, catalin.marinas,
	ardb, Linux regression tracking, regressions, Matthew Wilcox,
	Liu Shixin, Jens Axboe, Alexander Viro, stable

On Tue, Aug 01, 2023 at 06:34:26PM +0200, David Hildenbrand wrote:
> On 01.08.23 18:33, Lorenzo Stoakes wrote:
> > On Tue, Aug 01, 2023 at 11:05:40AM +0200, David Hildenbrand wrote:
> > > On 31.07.23 23:50, Lorenzo Stoakes wrote:
> > > > Some architectures do not populate the entire range categorised by
> > > > KCORE_TEXT, so we must ensure that the kernel address we read from is
> > > > valid.
> > > >
> > > > Unfortunately there is no solution currently available to do so with a
> > > > purely iterator solution so reinstate the bounce buffer in this instance so
> > > > we can use copy_from_kernel_nofault() in order to avoid page faults when
> > > > regions are unmapped.
> > > >
> > > > This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
> > > > bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
> > > > the code to continue to use an iterator.
> > > >
> > > > Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
> > > > Reported-by: Jiri Olsa <olsajiri@gmail.com>
> > > > Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
> > > > Cc: stable@vger.kernel.org
> > > > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> > > > ---
> > > >    fs/proc/kcore.c | 26 +++++++++++++++++++++++++-
> > > >    1 file changed, 25 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> > > > index 9cb32e1a78a0..3bc689038232 100644
> > > > --- a/fs/proc/kcore.c
> > > > +++ b/fs/proc/kcore.c
> > > > @@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
> > > >    static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> > > >    {
> > > > +	struct file *file = iocb->ki_filp;
> > > > +	char *buf = file->private_data;
> > > >    	loff_t *fpos = &iocb->ki_pos;
> > > >    	size_t phdrs_offset, notes_offset, data_offset;
> > > >    	size_t page_offline_frozen = 1;
> > > > @@ -554,11 +556,22 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
> > > >    			fallthrough;
> > > >    		case KCORE_VMEMMAP:
> > > >    		case KCORE_TEXT:
> > > > +			/*
> > > > +			 * Sadly we must use a bounce buffer here to be able to
> > > > +			 * make use of copy_from_kernel_nofault(), as these
> > > > +			 * memory regions might not always be mapped on all
> > > > +			 * architectures.
> > > > +			 */
> > > > +			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
> > > > +				if (iov_iter_zero(tsz, iter) != tsz) {
> > > > +					ret = -EFAULT;
> > > > +					goto out;
> > > > +				}
> > > >    			/*
> > > >    			 * We use _copy_to_iter() to bypass usermode hardening
> > > >    			 * which would otherwise prevent this operation.
> > > >    			 */
> > >
> > > Having a comment at this indentation level looks for the else case looks
> > > kind of weird.
> >
> > Yeah, but having it indented again would be weird and seem like it doesn't
> > apply to the block below, there's really no good spot for it and
> > checkpatch.pl doesn't mind so I think this is ok :)
> >
> > >
> > > (does that comment still apply?)
> >
> > Hm good point, actually, now we're using the bounce buffer we don't need to
> > avoid usermode hardening any more.
> >
> > However since we've established a bounce buffer ourselves its still
> > appropriate to use _copy_to_iter() as we know the source region is good to
> > copy from.
> >
> > To make life easy I'll just respin with an updated comment :)
>
> I'm not too picky this time, no need to resend if everybody else is fine :P
>

Haha you know the classic Lorenzo respin spiral and want to avoid it I see ;)

The comment is actually inaccurate now, so to avoid noise + make life easy
(maybe) for Andrew here's a fix patch that just corrects the comment:-

----8<----

From d2b8fb271f21b79048e5630699133f77a93d0481 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Tue, 1 Aug 2023 17:36:08 +0100
Subject: [PATCH] fs/proc/kcore: correct comment

Correct comment to be strictly correct about reasoning.

Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
---
 fs/proc/kcore.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 3bc689038232..23fc24d16b31 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -568,8 +568,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
 					goto out;
 				}
 			/*
-			 * We use _copy_to_iter() to bypass usermode hardening
-			 * which would otherwise prevent this operation.
+			 * We know the bounce buffer is safe to copy from, so
+			 * use _copy_to_iter() directly.
 			 */
 			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
 				ret = -EFAULT;
--
2.41.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  2023-08-01 16:39       ` Lorenzo Stoakes
@ 2023-08-01 18:14         ` David Hildenbrand
  0 siblings, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2023-08-01 18:14 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-mm, linux-kernel, Andrew Morton, Baoquan He,
	Uladzislau Rezki, linux-fsdevel, Jiri Olsa, Will Deacon,
	Mike Galbraith, Mark Rutland, wangkefeng.wang, catalin.marinas,
	ardb, Linux regression tracking, regressions, Matthew Wilcox,
	Liu Shixin, Jens Axboe, Alexander Viro, stable

>>> Hm good point, actually, now we're using the bounce buffer we don't need to
>>> avoid usermode hardening any more.
>>>
>>> However since we've established a bounce buffer ourselves its still
>>> appropriate to use _copy_to_iter() as we know the source region is good to
>>> copy from.
>>>
>>> To make life easy I'll just respin with an updated comment :)
>>
>> I'm not too picky this time, no need to resend if everybody else is fine :P
>>
> 
> Haha you know the classic Lorenzo respin spiral and want to avoid it I see ;)

Don't want to make your apparently stressful week more stressful. Not 
this time ;)

> 
> The comment is actually inaccurate now, so to avoid noise + make life easy
> (maybe) for Andrew here's a fix patch that just corrects the comment:-
> 
> ----8<----
> 
>  From d2b8fb271f21b79048e5630699133f77a93d0481 Mon Sep 17 00:00:00 2001
> From: Lorenzo Stoakes <lstoakes@gmail.com>
> Date: Tue, 1 Aug 2023 17:36:08 +0100
> Subject: [PATCH] fs/proc/kcore: correct comment
> 
> Correct comment to be strictly correct about reasoning.
> 
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> ---
>   fs/proc/kcore.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index 3bc689038232..23fc24d16b31 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -568,8 +568,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
>   					goto out;
>   				}
>   			/*
> -			 * We use _copy_to_iter() to bypass usermode hardening
> -			 * which would otherwise prevent this operation.
> +			 * We know the bounce buffer is safe to copy from, so
> +			 * use _copy_to_iter() directly.
>   			 */
>   			} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
>   				ret = -EFAULT;
> --
> 2.41.0
> 

Thanks!

Acked-by: David Hildenbrand <david@redhat.com>

-- 
Cheers,

David / dhildenb


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-08-01 18:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-31 21:50 [PATCH] fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions Lorenzo Stoakes
2023-07-31 22:11 ` Jiri Olsa
2023-08-01  8:27 ` Will Deacon
2023-08-01  9:05 ` David Hildenbrand
2023-08-01 16:33   ` Lorenzo Stoakes
2023-08-01 16:34     ` David Hildenbrand
2023-08-01 16:39       ` Lorenzo Stoakes
2023-08-01 18:14         ` David Hildenbrand
2023-08-01 15:57 ` Baoquan He
2023-08-01 16:01   ` Baoquan He
2023-08-01 16:22     ` Lorenzo Stoakes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.