All of lore.kernel.org
 help / color / mirror / Atom feed
From: Baoquan He <bhe@redhat.com>
To: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Matthew Wilcox <willy@infradead.org>,
	David Hildenbrand <david@redhat.com>,
	Liu Shixin <liushixin2@huawei.com>, Jiri Olsa <jolsa@kernel.org>,
	Jens Axboe <axboe@kernel.dk>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Subject: Re: [PATCH v4 4/4] mm: vmalloc: convert vread() to vread_iter()
Date: Wed, 22 Mar 2023 20:55:23 +0800	[thread overview]
Message-ID: <ZBr6u+rivMztIvn9@MiWiFi-R3L-srv> (raw)
In-Reply-To: <6b3899bbbf1f4bd6b7133c8b6f27b3a8791607b0.1679431886.git.lstoakes@gmail.com>

On 03/21/23 at 08:54pm, Lorenzo Stoakes wrote:
......
> Additionally, we must account for the fact that at any point a copy may
> fail if this happens, we exit indicating fewer bytes retrieved than
> expected.
......
> -static int aligned_vread(char *buf, char *addr, unsigned long count)
> +/*
> + * small helper routine, copy contents to iter from addr.
> + * If the page is not present, fill zero.
> + *
> + * Returns the number of copied bytes.
> + */
> +static size_t aligned_vread_iter(struct iov_iter *iter,
> +				 const char *addr, size_t count)
>  {
> -	struct page *p;
> -	int copied = 0;
> +	size_t remains = count;
> +	struct page *page;
>  
> -	while (count) {
> +	while (remains > 0) {
>  		unsigned long offset, length;
> +		size_t copied = 0;
>  
>  		offset = offset_in_page(addr);
>  		length = PAGE_SIZE - offset;
> -		if (length > count)
> -			length = count;
> -		p = vmalloc_to_page(addr);
> +		if (length > remains)
> +			length = remains;
> +		page = vmalloc_to_page(addr);
>  		/*
> -		 * To do safe access to this _mapped_ area, we need
> -		 * lock. But adding lock here means that we need to add
> -		 * overhead of vmalloc()/vfree() calls for this _debug_
> -		 * interface, rarely used. Instead of that, we'll use
> -		 * kmap() and get small overhead in this access function.
> +		 * To do safe access to this _mapped_ area, we need lock. But
> +		 * adding lock here means that we need to add overhead of
> +		 * vmalloc()/vfree() calls for this _debug_ interface, rarely
> +		 * used. Instead of that, we'll use an local mapping via
> +		 * copy_page_to_iter_atomic() and accept a small overhead in
> +		 * this access function.
>  		 */
> -		if (p) {
> -			/* We can expect USER0 is not used -- see vread() */
> -			void *map = kmap_atomic(p);
> -			memcpy(buf, map + offset, length);
> -			kunmap_atomic(map);
> -		} else
> -			memset(buf, 0, length);
> +		if (page)
> +			copied = copy_page_to_iter_atomic(page, offset, length,
> +							  iter);
> +

If we decided to quit at any failing copy point, indicating fewer bytes
retrieved than expected, wondering why we don't quit here if
copy_page_to_iter_atomic() failed? 

The original zero filling is for unmapped vmalloc area, but not for
reading area if failed. Not sure if I got your point.

> +		/* Zero anything we were unable to copy. */
> +		copied += zero_iter(iter, length - copied);
> +
> +		addr += copied;
> +		remains -= copied;
>  
> -		addr += length;
> -		buf += length;
> -		copied += length;
> -		count -= length;
> +		if (copied != length)
> +			break;
>  	}
> -	return copied;
> +
> +	return count - remains;
>  }
>  
> -static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
> +/*
> + * Read from a vm_map_ram region of memory.
> + *
> + * Returns the number of copied bytes.
> + */
> +static size_t vmap_ram_vread_iter(struct iov_iter *iter, const char *addr,
> +				  size_t count, unsigned long flags)
>  {
>  	char *start;
>  	struct vmap_block *vb;
>  	unsigned long offset;
> -	unsigned int rs, re, n;
> +	unsigned int rs, re;
> +	size_t remains, n;
>  
>  	/*
>  	 * If it's area created by vm_map_ram() interface directly, but
>  	 * not further subdividing and delegating management to vmap_block,
>  	 * handle it here.
>  	 */
> -	if (!(flags & VMAP_BLOCK)) {
> -		aligned_vread(buf, addr, count);
> -		return;
> -	}
> +	if (!(flags & VMAP_BLOCK))
> +		return aligned_vread_iter(iter, addr, count);
>  
>  	/*
>  	 * Area is split into regions and tracked with vmap_block, read out
> @@ -3505,50 +3537,65 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags
>  	 */
>  	vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
>  	if (!vb)
> -		goto finished;
> +		goto finished_zero;
>  
>  	spin_lock(&vb->lock);
>  	if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
>  		spin_unlock(&vb->lock);
> -		goto finished;
> +		goto finished_zero;
>  	}
> +
> +	remains = count;
>  	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> -		if (!count)
> -			break;
> +		size_t copied;
> +
> +		if (remains == 0)
> +			goto finished;
> +
>  		start = vmap_block_vaddr(vb->va->va_start, rs);
> -		while (addr < start) {
> -			if (count == 0)
> -				goto unlock;
> -			*buf = '\0';
> -			buf++;
> -			addr++;
> -			count--;
> +
> +		if (addr < start) {
> +			size_t to_zero = min_t(size_t, start - addr, remains);
> +			size_t zeroed = zero_iter(iter, to_zero);
> +
> +			addr += zeroed;
> +			remains -= zeroed;
> +
> +			if (remains == 0 || zeroed != to_zero)
> +				goto finished;
>  		}
> +
>  		/*it could start reading from the middle of used region*/
>  		offset = offset_in_page(addr);
>  		n = ((re - rs + 1) << PAGE_SHIFT) - offset;
> -		if (n > count)
> -			n = count;
> -		aligned_vread(buf, start+offset, n);
> +		if (n > remains)
> +			n = remains;
> +
> +		copied = aligned_vread_iter(iter, start + offset, n);
>  
> -		buf += n;
> -		addr += n;
> -		count -= n;
> +		addr += copied;
> +		remains -= copied;
> +
> +		if (copied != n)
> +			goto finished;
>  	}
> -unlock:
> +
>  	spin_unlock(&vb->lock);
>  
> -finished:
> +finished_zero:
>  	/* zero-fill the left dirty or free regions */
> -	if (count)
> -		memset(buf, 0, count);
> +	return count - remains + zero_iter(iter, remains);

When it jumps to finished_zero, local varialble 'remains' is still 0, we
do nothing here but return count. It doesn't look so right. You may need
to move up the 'remains' assignment as 'count' line.

> +finished:
> +	/* We couldn't copy/zero everything */
> +	spin_unlock(&vb->lock);
> +	return count - remains;
>  }
>  
>  /**
> - * vread() - read vmalloc area in a safe way.
> - * @buf:     buffer for reading data
> - * @addr:    vm address.
> - * @count:   number of bytes to be read.
> + * vread_iter() - read vmalloc area in a safe way to an iterator.
> + * @iter:         the iterator to which data should be written.
> + * @addr:         vm address.
> + * @count:        number of bytes to be read.
>   *
>   * This function checks that addr is a valid vmalloc'ed area, and
>   * copy data from that area to a given buffer. If the given memory range
> @@ -3568,13 +3615,12 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags
>   * (same number as @count) or %0 if [addr...addr+count) doesn't
>   * include any intersection with valid vmalloc area
>   */
> -long vread(char *buf, char *addr, unsigned long count)
> +long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
>  {
>  	struct vmap_area *va;
>  	struct vm_struct *vm;
> -	char *vaddr, *buf_start = buf;
> -	unsigned long buflen = count;
> -	unsigned long n, size, flags;
> +	char *vaddr;
> +	size_t n, size, flags, remains;
>  
>  	addr = kasan_reset_tag(addr);
>  
> @@ -3582,18 +3628,22 @@ long vread(char *buf, char *addr, unsigned long count)
>  	if ((unsigned long) addr + count < count)
>  		count = -(unsigned long) addr;
>  
> +	remains = count;
> +
>  	spin_lock(&vmap_area_lock);
>  	va = find_vmap_area_exceed_addr((unsigned long)addr);
>  	if (!va)
> -		goto finished;
> +		goto finished_zero;
>  
>  	/* no intersects with alive vmap_area */
> -	if ((unsigned long)addr + count <= va->va_start)
> -		goto finished;
> +	if ((unsigned long)addr + remains <= va->va_start)
> +		goto finished_zero;
>  
>  	list_for_each_entry_from(va, &vmap_area_list, list) {
> -		if (!count)
> -			break;
> +		size_t copied;
> +
> +		if (remains == 0)
> +			goto finished;
>  
>  		vm = va->vm;
>  		flags = va->flags & VMAP_FLAGS_MASK;
> @@ -3608,6 +3658,7 @@ long vread(char *buf, char *addr, unsigned long count)
>  
>  		if (vm && (vm->flags & VM_UNINITIALIZED))
>  			continue;
> +
>  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
>  		smp_rmb();
>  
> @@ -3616,38 +3667,45 @@ long vread(char *buf, char *addr, unsigned long count)
>  
>  		if (addr >= vaddr + size)
>  			continue;
> -		while (addr < vaddr) {
> -			if (count == 0)
> +
> +		if (addr < vaddr) {
> +			size_t to_zero = min_t(size_t, vaddr - addr, remains);
> +			size_t zeroed = zero_iter(iter, to_zero);
> +
> +			addr += zeroed;
> +			remains -= zeroed;
> +
> +			if (remains == 0 || zeroed != to_zero)
>  				goto finished;
> -			*buf = '\0';
> -			buf++;
> -			addr++;
> -			count--;
>  		}
> +
>  		n = vaddr + size - addr;
> -		if (n > count)
> -			n = count;
> +		if (n > remains)
> +			n = remains;
>  
>  		if (flags & VMAP_RAM)
> -			vmap_ram_vread(buf, addr, n, flags);
> +			copied = vmap_ram_vread_iter(iter, addr, n, flags);
>  		else if (!(vm->flags & VM_IOREMAP))
> -			aligned_vread(buf, addr, n);
> +			copied = aligned_vread_iter(iter, addr, n);
>  		else /* IOREMAP area is treated as memory hole */
> -			memset(buf, 0, n);
> -		buf += n;
> -		addr += n;
> -		count -= n;
> +			copied = zero_iter(iter, n);
> +
> +		addr += copied;
> +		remains -= copied;
> +
> +		if (copied != n)
> +			goto finished;
>  	}
> -finished:
> -	spin_unlock(&vmap_area_lock);
>  
> -	if (buf == buf_start)
> -		return 0;
> +finished_zero:
> +	spin_unlock(&vmap_area_lock);
>  	/* zero-fill memory holes */
> -	if (buf != buf_start + buflen)
> -		memset(buf, 0, buflen - (buf - buf_start));
> +	return count - remains + zero_iter(iter, remains);
> +finished:
> +	/* Nothing remains, or We couldn't copy/zero everything. */
                              ~~~ typo, s/We/we/
> +	spin_unlock(&vmap_area_lock);
>  
> -	return buflen;
> +	return count - remains;
>  }
>  
>  /**
> -- 
> 2.39.2
> 


  parent reply	other threads:[~2023-03-22 12:56 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-21 20:54 [PATCH v4 0/4] convert read_kcore(), vread() to use iterators Lorenzo Stoakes
2023-03-21 20:54 ` [PATCH v4 1/4] fs/proc/kcore: avoid bounce buffer for ktext data Lorenzo Stoakes
2023-03-21 20:54 ` [PATCH v4 2/4] fs/proc/kcore: convert read_kcore() to read_kcore_iter() Lorenzo Stoakes
2023-03-22  1:15   ` Baoquan He
2023-03-22  6:17     ` Lorenzo Stoakes
2023-03-22 11:12   ` David Hildenbrand
2023-03-21 20:54 ` [PATCH v4 3/4] iov_iter: add copy_page_to_iter_atomic() Lorenzo Stoakes
2023-03-22 10:17   ` Baoquan He
2023-03-22 10:32     ` Lorenzo Stoakes
2023-03-22 11:06       ` Lorenzo Stoakes
2023-03-22 13:21         ` Baoquan He
2023-03-22 13:08       ` Baoquan He
2023-03-21 20:54 ` [PATCH v4 4/4] mm: vmalloc: convert vread() to vread_iter() Lorenzo Stoakes
2023-03-22 11:18   ` David Hildenbrand
2023-03-22 11:31     ` Lorenzo Stoakes
2023-03-22 12:55   ` Baoquan He [this message]
2023-03-22 13:43     ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZBr6u+rivMztIvn9@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=david@redhat.com \
    --cc=jolsa@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liushixin2@huawei.com \
    --cc=lstoakes@gmail.com \
    --cc=urezki@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.