All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yunsheng Lin <linyunsheng@huawei.com>
To: <davem@davemloft.net>, <kuba@kernel.org>, <pabeni@redhat.com>
Cc: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	Yunsheng Lin <linyunsheng@huawei.com>,
	Andrew Morton <akpm@linux-foundation.org>, <linux-mm@kvack.org>
Subject: [PATCH RFC 07/10] mm: page_frag: reuse existing bit field of 'va' for pagecnt_bias
Date: Thu, 28 Mar 2024 21:38:36 +0800	[thread overview]
Message-ID: <20240328133839.13620-8-linyunsheng@huawei.com> (raw)
In-Reply-To: <20240328133839.13620-1-linyunsheng@huawei.com>

As alignment of 'va' is always aligned with the order of the
page allocated, we can reuse the LSB bits for the pagecount
bias, and remove the orginal space needed by 'pagecnt_bias'.
Also limit the 'fragsz' to be at least the size of
'usigned int' to match the limited pagecnt_bias.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/page_frag_cache.h | 20 +++++++----
 mm/page_frag_alloc.c            | 63 +++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 40a7d6da9ef0..a97a1ac017d6 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -9,7 +9,18 @@
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
 struct page_frag_cache {
-	void *va;
+	union {
+		void *va;
+		/* we maintain a pagecount bias, so that we dont dirty cache
+		 * line containing page->_refcount every time we allocate a
+		 * fragment. As 'va' is always aligned with the order of the
+		 * page allocated, we can reuse the LSB bits for the pagecount
+		 * bias, and its bit width happens to be indicated by the
+		 * 'size_mask' below.
+		 */
+		unsigned long pagecnt_bias;
+
+	};
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 	__u16 offset;
 	__u16 size_mask:15;
@@ -18,10 +29,6 @@ struct page_frag_cache {
 	__u32 offset:31;
 	__u32 pfmemalloc:1;
 #endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
 };
 
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
@@ -56,7 +63,8 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
 					     gfp_t gfp_mask,
 					     unsigned int align)
 {
-	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE);
+	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE ||
+		     fragsz < sizeof(unsigned int));
 
 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, align);
 }
diff --git a/mm/page_frag_alloc.c b/mm/page_frag_alloc.c
index a02e57a439f0..ae1393d0619a 100644
--- a/mm/page_frag_alloc.c
+++ b/mm/page_frag_alloc.c
@@ -18,8 +18,8 @@
 #include <linux/page_frag_cache.h>
 #include "internal.h"
 
-static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
-					     gfp_t gfp_mask)
+static bool __page_frag_cache_refill(struct page_frag_cache *nc,
+				     gfp_t gfp_mask)
 {
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
@@ -35,9 +35,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 	if (unlikely(!page))
 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
 
-	nc->va = page ? page_address(page) : NULL;
+	if (unlikely(!page)) {
+		nc->va = NULL;
+		return false;
+	}
+
+	nc->va = page_address(page);
 
-	return page;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	VM_BUG_ON(nc->pagecnt_bias & nc->size_mask);
+	page_ref_add(page, nc->size_mask - 1);
+	nc->pagecnt_bias |= nc->size_mask;
+#else
+	VM_BUG_ON(nc->pagecnt_bias & (PAGE_SIZE - 1));
+	page_ref_add(page, PAGE_SIZE - 2);
+	nc->pagecnt_bias |= (PAGE_SIZE - 1);
+#endif
+
+	nc->pfmemalloc = page_is_pfmemalloc(page);
+	nc->offset = 0;
+	return true;
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
@@ -67,38 +84,31 @@ EXPORT_SYMBOL(__page_frag_cache_drain);
 void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
 			 gfp_t gfp_mask)
 {
-	unsigned int size, offset;
+	unsigned long size_mask;
+	unsigned int offset;
 	struct page *page;
+	void *va;
 
 	if (unlikely(!nc->va)) {
 refill:
-		page = __page_frag_cache_refill(nc, gfp_mask);
-		if (!page)
+		if (!__page_frag_cache_refill(nc, gfp_mask))
 			return NULL;
-
-		/* Even if we own the page, we do not use atomic_set().
-		 * This would break get_page_unless_zero() users.
-		 */
-		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
-
-		/* reset page count bias and offset to start of new frag */
-		nc->pfmemalloc = page_is_pfmemalloc(page);
-		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
-		nc->offset = 0;
 	}
 
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 	/* if size can vary use size else just use PAGE_SIZE */
-	size = nc->size_mask + 1;
+	size_mask = nc->size_mask;
 #else
-	size = PAGE_SIZE;
+	size_mask = PAGE_SIZE - 1;
 #endif
 
+	va = (void *)((unsigned long)nc->va & ~size_mask);
 	offset = nc->offset;
-	if (unlikely(offset + fragsz > size)) {
-		page = virt_to_page(nc->va);
 
-		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
+	if (unlikely(offset + fragsz > (size_mask + 1))) {
+		page = virt_to_page(va);
+
+		if (!page_ref_sub_and_test(page, nc->pagecnt_bias & size_mask))
 			goto refill;
 
 		if (unlikely(nc->pfmemalloc)) {
@@ -107,12 +117,11 @@ void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
 		}
 
 		/* OK, page count is 0, we can safely set it */
-		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+		set_page_count(page, size_mask);
+		nc->pagecnt_bias |= size_mask;
 
-		/* reset page count bias and offset to start of new frag */
-		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
 		offset = 0;
-		if (unlikely(fragsz > size)) {
+		if (unlikely(fragsz > (size_mask + 1))) {
 			/*
 			 * The caller is trying to allocate a fragment
 			 * with fragsz > PAGE_SIZE but the cache isn't big
@@ -129,7 +138,7 @@ void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
 	nc->pagecnt_bias--;
 	nc->offset = offset + fragsz;
 
-	return nc->va + offset;
+	return va + offset;
 }
 EXPORT_SYMBOL(page_frag_alloc_va);
 
-- 
2.33.0


  parent reply	other threads:[~2024-03-28 13:41 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-28 13:38 [PATCH RFC 00/10] First try to replace page_frag with page_frag_cache Yunsheng Lin
2024-03-28 13:38 ` Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 01/10] mm: Move the page fragment allocator from page_alloc into its own file Yunsheng Lin
2024-03-29 17:19   ` Christophe Leroy
2024-03-30 12:01     ` Yunsheng Lin
2024-03-30 12:01       ` Yunsheng Lin
2024-03-30 12:01       ` Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 02/10] mm: page_frag: use initial zero offset for page_frag_alloc_align() Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 03/10] mm: page_frag: change page_frag_alloc_* API to accept align param Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 04/10] mm: page_frag: add '_va' suffix to page_frag API Yunsheng Lin
2024-03-28 13:38   ` [Intel-wired-lan] " Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 05/10] mm: page_frag: add two inline helper for " Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 06/10] mm: page_frag: reuse MSB of 'size' field for pfmemalloc Yunsheng Lin
2024-03-28 13:38 ` Yunsheng Lin [this message]
2024-03-28 13:38 ` [PATCH RFC 08/10] net: introduce the skb_copy_to_va_nocache() helper Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 09/10] mm: page_frag: introduce prepare/commit API for page_frag Yunsheng Lin
2024-03-28 13:38 ` [PATCH RFC 10/10] net: replace page_frag with page_frag_cache Yunsheng Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240328133839.13620-8-linyunsheng@huawei.com \
    --to=linyunsheng@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.