linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Change struct page layout for page_pool
@ 2021-04-16 23:07 Matthew Wilcox (Oracle)
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-04-16 23:07 UTC (permalink / raw)
  To: brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-kernel,
	Matthew Wilcox (Oracle),
	linux-mips, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

The first patch here fixes two bugs on ppc32, and mips32.  It fixes one
bug on arc and arm32 (in certain configurations).  It probably makes
sense to get it in ASAP through the networking tree.  I'd like to see
testing on those four architectures if possible?

The second patch enables new functionality.  It is much less urgent.
I'd really like to see Mel & Michal's thoughts on it.

I have only compile-tested these patches.

Matthew Wilcox (Oracle) (2):
  mm: Fix struct page layout on 32-bit systems
  mm: Indicate pfmemalloc pages in compound_head

 include/linux/mm.h       | 12 +++++++-----
 include/linux/mm_types.h |  9 ++++-----
 include/net/page_pool.h  | 12 +++++++++++-
 net/core/page_pool.c     | 12 +++++++-----
 4 files changed, 29 insertions(+), 16 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-16 23:07 [PATCH 0/2] Change struct page layout for page_pool Matthew Wilcox (Oracle)
@ 2021-04-16 23:07 ` Matthew Wilcox (Oracle)
  2021-04-17  1:08   ` kernel test robot
                     ` (3 more replies)
  2021-04-16 23:07 ` [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head Matthew Wilcox (Oracle)
  2021-04-20  4:03 ` [PATCH 0/2] Change struct page layout for page_pool Michael Ellerman
  2 siblings, 4 replies; 26+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-04-16 23:07 UTC (permalink / raw)
  To: brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-kernel,
	Matthew Wilcox (Oracle),
	linux-mips, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

32-bit architectures which expect 8-byte alignment for 8-byte integers
and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
page inadvertently expanded in 2019.  When the dma_addr_t was added,
it forced the alignment of the union to 8 bytes, which inserted a 4 byte
gap between 'flags' and the union.

Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long, and also fixes a
potential problem where (on a big endian platform), the bit used to denote
PageTail could inadvertently get set, and a racing get_user_pages_fast()
could dereference a bogus compound_head().

Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/mm_types.h |  4 ++--
 include/net/page_pool.h  | 12 +++++++++++-
 net/core/page_pool.c     | 12 +++++++-----
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..5aacc1c10a45 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
 		};
 		struct {	/* page_pool used by netstack */
 			/**
-			 * @dma_addr: might require a 64-bit value even on
+			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
 			 */
-			dma_addr_t dma_addr;
+			unsigned long dma_addr[2];
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index b5b195305346..db7c7020746a 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr[0];
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		ret |= (dma_addr_t)page->dma_addr[1] << 32;
+	return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+	page->dma_addr[0] = addr;
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		page->dma_addr[1] = addr >> 32;
 }
 
 static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ad8b0707af04..f014fd8c19a6 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
 					  struct page *page,
 					  unsigned int dma_sync_size)
 {
+	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
 	dma_sync_size = min(dma_sync_size, pool->p.max_len);
-	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
 					 pool->p.offset, dma_sync_size,
 					 pool->p.dma_dir);
 }
@@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 		put_page(page);
 		return NULL;
 	}
-	page->dma_addr = dma;
+	page_pool_set_dma_addr(page, dma);
 
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -294,13 +296,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
 		 */
 		goto skip_dma_unmap;
 
-	dma = page->dma_addr;
+	dma = page_pool_get_dma_addr(page);
 
-	/* When page is unmapped, it cannot be returned our pool */
+	/* When page is unmapped, it cannot be returned to our pool */
 	dma_unmap_page_attrs(pool->p.dev, dma,
 			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
 			     DMA_ATTR_SKIP_CPU_SYNC);
-	page->dma_addr = 0;
+	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head
  2021-04-16 23:07 [PATCH 0/2] Change struct page layout for page_pool Matthew Wilcox (Oracle)
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
@ 2021-04-16 23:07 ` Matthew Wilcox (Oracle)
  2021-04-17 21:13   ` David Laight
  2021-04-20  4:03 ` [PATCH 0/2] Change struct page layout for page_pool Michael Ellerman
  2 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox (Oracle) @ 2021-04-16 23:07 UTC (permalink / raw)
  To: brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-kernel,
	Matthew Wilcox (Oracle),
	linux-mips, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

The net page_pool wants to use a magic value to identify page pool pages.
The best place to put it is in the first word where it can be clearly a
non-pointer value.  That means shifting dma_addr up to alias with ->index,
which means we need to find another way to indicate page_is_pfmemalloc().
Since page_pool doesn't want to set its magic value on pages which are
pfmemalloc, we can use bit 1 of compound_head to indicate that the page
came from the memory reserves.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/mm.h       | 12 +++++++-----
 include/linux/mm_types.h |  7 +++----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ba434287387..44eab3f6d5ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1629,10 +1629,12 @@ struct address_space *page_mapping_file(struct page *page);
 static inline bool page_is_pfmemalloc(const struct page *page)
 {
 	/*
-	 * Page index cannot be this large so this must be
-	 * a pfmemalloc page.
+	 * This is not a tail page; compound_head of a head page is unused
+	 * at return from the page allocator, and will be overwritten
+	 * by callers who do not care whether the page came from the
+	 * reserves.
 	 */
-	return page->index == -1UL;
+	return page->compound_head & 2;
 }
 
 /*
@@ -1641,12 +1643,12 @@ static inline bool page_is_pfmemalloc(const struct page *page)
  */
 static inline void set_page_pfmemalloc(struct page *page)
 {
-	page->index = -1UL;
+	page->compound_head = 2;
 }
 
 static inline void clear_page_pfmemalloc(struct page *page)
 {
-	page->index = 0;
+	page->compound_head = 0;
 }
 
 /*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5aacc1c10a45..39f7163dcace 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -96,10 +96,9 @@ struct page {
 			unsigned long private;
 		};
 		struct {	/* page_pool used by netstack */
-			/**
-			 * @dma_addr: might require a 64-bit value on
-			 * 32-bit architectures.
-			 */
+			unsigned long pp_magic;
+			unsigned long xmi;
+			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr[2];
 		};
 		struct {	/* slab, slob and slub */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
@ 2021-04-17  1:08   ` kernel test robot
  2021-04-17  2:45   ` Matthew Wilcox
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2021-04-17  1:08 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), brouer
  Cc: kbuild-all, netdev, ilias.apalodimas, linux-kernel,
	Matthew Wilcox (Oracle),
	linux-mips, linux-mm, mcroce, linuxppc-dev, linux-arm-kernel

[-- Attachment #1: Type: text/plain, Size: 2920 bytes --]

Hi "Matthew,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[also build test WARNING on linus/master v5.12-rc7]
[cannot apply to hnaz-linux-mm/master next-20210416]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Matthew-Wilcox-Oracle/Change-struct-page-layout-for-page_pool/20210417-070951
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: parisc-randconfig-s031-20210416 (attached as .config)
compiler: hppa-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-280-g2cd6d34e-dirty
        # https://github.com/0day-ci/linux/commit/898e155048088be20b2606575a24108eacc4c91b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Matthew-Wilcox-Oracle/Change-struct-page-layout-for-page_pool/20210417-070951
        git checkout 898e155048088be20b2606575a24108eacc4c91b
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=parisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from net/core/xdp.c:15:
   include/net/page_pool.h: In function 'page_pool_get_dma_addr':
>> include/net/page_pool.h:203:40: warning: left shift count >= width of type [-Wshift-count-overflow]
     203 |   ret |= (dma_addr_t)page->dma_addr[1] << 32;
         |                                        ^~
   include/net/page_pool.h: In function 'page_pool_set_dma_addr':
>> include/net/page_pool.h:211:28: warning: right shift count >= width of type [-Wshift-count-overflow]
     211 |   page->dma_addr[1] = addr >> 32;
         |                            ^~


vim +203 include/net/page_pool.h

   198	
   199	static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
   200	{
   201		dma_addr_t ret = page->dma_addr[0];
   202		if (sizeof(dma_addr_t) > sizeof(unsigned long))
 > 203			ret |= (dma_addr_t)page->dma_addr[1] << 32;
   204		return ret;
   205	}
   206	
   207	static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
   208	{
   209		page->dma_addr[0] = addr;
   210		if (sizeof(dma_addr_t) > sizeof(unsigned long))
 > 211			page->dma_addr[1] = addr >> 32;
   212	}
   213	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 23601 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
  2021-04-17  1:08   ` kernel test robot
@ 2021-04-17  2:45   ` Matthew Wilcox
  2021-04-17 18:32     ` Ilias Apalodimas
                       ` (3 more replies)
  2021-04-17  7:34   ` Jesper Dangaard Brouer
  2021-04-20  7:21   ` Rasmus Villemoes
  3 siblings, 4 replies; 26+ messages in thread
From: Matthew Wilcox @ 2021-04-17  2:45 UTC (permalink / raw)
  To: brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-mips,
	linux-kernel, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel


Replacement patch to fix compiler warning.

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 16 Apr 2021 16:34:55 -0400
Subject: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
To: brouer@redhat.com
Cc: linux-kernel@vger.kernel.org,
    linux-mm@kvack.org,
    netdev@vger.kernel.org,
    linuxppc-dev@lists.ozlabs.org,
    linux-arm-kernel@lists.infradead.org,
    linux-mips@vger.kernel.org,
    ilias.apalodimas@linaro.org,
    mcroce@linux.microsoft.com,
    grygorii.strashko@ti.com,
    arnd@kernel.org,
    hch@lst.de,
    linux-snps-arc@lists.infradead.org,
    mhocko@kernel.org,
    mgorman@suse.de

32-bit architectures which expect 8-byte alignment for 8-byte integers
and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
page inadvertently expanded in 2019.  When the dma_addr_t was added,
it forced the alignment of the union to 8 bytes, which inserted a 4 byte
gap between 'flags' and the union.

Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long, and also fixes a
potential problem where (on a big endian platform), the bit used to denote
PageTail could inadvertently get set, and a racing get_user_pages_fast()
could dereference a bogus compound_head().

Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/mm_types.h |  4 ++--
 include/net/page_pool.h  | 12 +++++++++++-
 net/core/page_pool.c     | 12 +++++++-----
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..5aacc1c10a45 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
 		};
 		struct {	/* page_pool used by netstack */
 			/**
-			 * @dma_addr: might require a 64-bit value even on
+			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
 			 */
-			dma_addr_t dma_addr;
+			unsigned long dma_addr[2];
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index b5b195305346..ad6154dc206c 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr[0];
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+	return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+	page->dma_addr[0] = addr;
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		page->dma_addr[1] = addr >> 16 >> 16;
 }
 
 static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ad8b0707af04..f014fd8c19a6 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
 					  struct page *page,
 					  unsigned int dma_sync_size)
 {
+	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
 	dma_sync_size = min(dma_sync_size, pool->p.max_len);
-	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
 					 pool->p.offset, dma_sync_size,
 					 pool->p.dma_dir);
 }
@@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 		put_page(page);
 		return NULL;
 	}
-	page->dma_addr = dma;
+	page_pool_set_dma_addr(page, dma);
 
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -294,13 +296,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
 		 */
 		goto skip_dma_unmap;
 
-	dma = page->dma_addr;
+	dma = page_pool_get_dma_addr(page);
 
-	/* When page is unmapped, it cannot be returned our pool */
+	/* When page is unmapped, it cannot be returned to our pool */
 	dma_unmap_page_attrs(pool->p.dev, dma,
 			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
 			     DMA_ATTR_SKIP_CPU_SYNC);
-	page->dma_addr = 0;
+	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
  2021-04-17  1:08   ` kernel test robot
  2021-04-17  2:45   ` Matthew Wilcox
@ 2021-04-17  7:34   ` Jesper Dangaard Brouer
  2021-04-20  7:21   ` Rasmus Villemoes
  3 siblings, 0 replies; 26+ messages in thread
From: Jesper Dangaard Brouer @ 2021-04-17  7:34 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-mips,
	linux-kernel, mhocko, linux-mm, mgorman, brouer, mcroce,
	linux-snps-arc, linuxppc-dev, hch, linux-arm-kernel

On Sat, 17 Apr 2021 00:07:23 +0100
"Matthew Wilcox (Oracle)" <willy@infradead.org> wrote:

> 32-bit architectures which expect 8-byte alignment for 8-byte integers
> and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> page inadvertently expanded in 2019.  When the dma_addr_t was added,
> it forced the alignment of the union to 8 bytes, which inserted a 4 byte
> gap between 'flags' and the union.
> 
> Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
> This restores the alignment to that of an unsigned long, and also fixes a
> potential problem where (on a big endian platform), the bit used to denote
> PageTail could inadvertently get set, and a racing get_user_pages_fast()
> could dereference a bogus compound_head().
> 
> Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

Thanks you Matthew for working on a fix for this.  It's been a pleasure
working with you and exchanging crazy ideas with you for solving this.
Most of them didn't work out, especially those that came to me during
restless nights ;-).

Having worked through the other solutions, some very intrusive and some
could even be consider ugly.  I think we have a good and non-intrusive
solution/workaround in this patch.  Thanks!


>  include/linux/mm_types.h |  4 ++--
>  include/net/page_pool.h  | 12 +++++++++++-
>  net/core/page_pool.c     | 12 +++++++-----
>  3 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6613b26a8894..5aacc1c10a45 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -97,10 +97,10 @@ struct page {
>  		};
>  		struct {	/* page_pool used by netstack */
>  			/**
> -			 * @dma_addr: might require a 64-bit value even on
> +			 * @dma_addr: might require a 64-bit value on
>  			 * 32-bit architectures.
>  			 */
> -			dma_addr_t dma_addr;
> +			unsigned long dma_addr[2];
>  		};
>  		struct {	/* slab, slob and slub */
>  			union {
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index b5b195305346..db7c7020746a 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>  
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -	return page->dma_addr;
> +	dma_addr_t ret = page->dma_addr[0];
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		ret |= (dma_addr_t)page->dma_addr[1] << 32;
> +	return ret;
> +}
> +
> +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +{
> +	page->dma_addr[0] = addr;
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		page->dma_addr[1] = addr >> 32;
>  }
>  
>  static inline bool is_page_pool_compiled_in(void)
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index ad8b0707af04..f014fd8c19a6 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
>  					  struct page *page,
>  					  unsigned int dma_sync_size)
>  {
> +	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
> +
>  	dma_sync_size = min(dma_sync_size, pool->p.max_len);
> -	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
> +	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
>  					 pool->p.offset, dma_sync_size,
>  					 pool->p.dma_dir);
>  }
> @@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
>  		put_page(page);
>  		return NULL;
>  	}
> -	page->dma_addr = dma;
> +	page_pool_set_dma_addr(page, dma);
>  
>  	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
>  		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
> @@ -294,13 +296,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
>  		 */
>  		goto skip_dma_unmap;
>  
> -	dma = page->dma_addr;
> +	dma = page_pool_get_dma_addr(page);
>  
> -	/* When page is unmapped, it cannot be returned our pool */
> +	/* When page is unmapped, it cannot be returned to our pool */
>  	dma_unmap_page_attrs(pool->p.dev, dma,
>  			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
>  			     DMA_ATTR_SKIP_CPU_SYNC);
> -	page->dma_addr = 0;
> +	page_pool_set_dma_addr(page, 0);
>  skip_dma_unmap:
>  	/* This may be the last page returned, releasing the pool, so
>  	 * it is not safe to reference pool afterwards.



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17  2:45   ` Matthew Wilcox
@ 2021-04-17 18:32     ` Ilias Apalodimas
  2021-04-17 20:22       ` Matthew Wilcox
  2021-04-17 21:18     ` David Laight
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: Ilias Apalodimas @ 2021-04-17 18:32 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: arnd, grygorii.strashko, netdev, linux-kernel, linux-mips,
	mhocko, linux-mm, mgorman, brouer, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

Hi Matthew,

On Sat, Apr 17, 2021 at 03:45:22AM +0100, Matthew Wilcox wrote:
> 
> Replacement patch to fix compiler warning.
> 
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Date: Fri, 16 Apr 2021 16:34:55 -0400
> Subject: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
> To: brouer@redhat.com
> Cc: linux-kernel@vger.kernel.org,
>     linux-mm@kvack.org,
>     netdev@vger.kernel.org,
>     linuxppc-dev@lists.ozlabs.org,
>     linux-arm-kernel@lists.infradead.org,
>     linux-mips@vger.kernel.org,
>     ilias.apalodimas@linaro.org,
>     mcroce@linux.microsoft.com,
>     grygorii.strashko@ti.com,
>     arnd@kernel.org,
>     hch@lst.de,
>     linux-snps-arc@lists.infradead.org,
>     mhocko@kernel.org,
>     mgorman@suse.de
> 
> 32-bit architectures which expect 8-byte alignment for 8-byte integers
> and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> page inadvertently expanded in 2019.  When the dma_addr_t was added,
> it forced the alignment of the union to 8 bytes, which inserted a 4 byte
> gap between 'flags' and the union.
> 
> Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
> This restores the alignment to that of an unsigned long, and also fixes a
> potential problem where (on a big endian platform), the bit used to denote
> PageTail could inadvertently get set, and a racing get_user_pages_fast()
> could dereference a bogus compound_head().
> 
> Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/mm_types.h |  4 ++--
>  include/net/page_pool.h  | 12 +++++++++++-
>  net/core/page_pool.c     | 12 +++++++-----
>  3 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6613b26a8894..5aacc1c10a45 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -97,10 +97,10 @@ struct page {
>  		};
>  		struct {	/* page_pool used by netstack */
>  			/**
> -			 * @dma_addr: might require a 64-bit value even on
> +			 * @dma_addr: might require a 64-bit value on
>  			 * 32-bit architectures.
>  			 */
> -			dma_addr_t dma_addr;
> +			unsigned long dma_addr[2];
>  		};
>  		struct {	/* slab, slob and slub */
>  			union {
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index b5b195305346..ad6154dc206c 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>  
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -	return page->dma_addr;
> +	dma_addr_t ret = page->dma_addr[0];
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
> +	return ret;
> +}
> +
> +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +{
> +	page->dma_addr[0] = addr;
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		page->dma_addr[1] = addr >> 16 >> 16;


The 'error' that was reported will never trigger right?
I assume this was compiled with dma_addr_t as 32bits (so it triggered the
compilation error), but the if check will never allow this codepath to run.
If so can we add a comment explaining this, since none of us will remember why
in 6 months from now?

>  }
>  
>  static inline bool is_page_pool_compiled_in(void)
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index ad8b0707af04..f014fd8c19a6 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
>  					  struct page *page,
>  					  unsigned int dma_sync_size)
>  {
> +	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
> +
>  	dma_sync_size = min(dma_sync_size, pool->p.max_len);
> -	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
> +	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
>  					 pool->p.offset, dma_sync_size,
>  					 pool->p.dma_dir);
>  }
> @@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
>  		put_page(page);
>  		return NULL;
>  	}
> -	page->dma_addr = dma;
> +	page_pool_set_dma_addr(page, dma);
>  
>  	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
>  		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
> @@ -294,13 +296,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
>  		 */
>  		goto skip_dma_unmap;
>  
> -	dma = page->dma_addr;
> +	dma = page_pool_get_dma_addr(page);
>  
> -	/* When page is unmapped, it cannot be returned our pool */
> +	/* When page is unmapped, it cannot be returned to our pool */
>  	dma_unmap_page_attrs(pool->p.dev, dma,
>  			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
>  			     DMA_ATTR_SKIP_CPU_SYNC);
> -	page->dma_addr = 0;
> +	page_pool_set_dma_addr(page, 0);
>  skip_dma_unmap:
>  	/* This may be the last page returned, releasing the pool, so
>  	 * it is not safe to reference pool afterwards.
> -- 
> 2.30.2
> 

Thanks
/Ilias

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17 18:32     ` Ilias Apalodimas
@ 2021-04-17 20:22       ` Matthew Wilcox
  2021-04-18 11:21         ` Ilias Apalodimas
  0 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2021-04-17 20:22 UTC (permalink / raw)
  To: Ilias Apalodimas
  Cc: arnd, grygorii.strashko, netdev, linux-kernel, linux-mips,
	mhocko, linux-mm, mgorman, brouer, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

On Sat, Apr 17, 2021 at 09:32:06PM +0300, Ilias Apalodimas wrote:
> > +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> > +{
> > +	page->dma_addr[0] = addr;
> > +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> > +		page->dma_addr[1] = addr >> 16 >> 16;
> 
> The 'error' that was reported will never trigger right?
> I assume this was compiled with dma_addr_t as 32bits (so it triggered the
> compilation error), but the if check will never allow this codepath to run.
> If so can we add a comment explaining this, since none of us will remember why
> in 6 months from now?

That's right.  I compiled it all three ways -- 32-bit, 64-bit dma, 32-bit long
and 64-bit.  The 32/64 bit case turn into:

	if (0)
		page->dma_addr[1] = addr >> 16 >> 16;

which gets elided.  So the only case that has to work is 64-bit dma and
32-bit long.

I can replace this with upper_32_bits().


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head
  2021-04-16 23:07 ` [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head Matthew Wilcox (Oracle)
@ 2021-04-17 21:13   ` David Laight
  2021-04-17 21:28     ` Matthew Wilcox
  0 siblings, 1 reply; 26+ messages in thread
From: David Laight @ 2021-04-17 21:13 UTC (permalink / raw)
  To: 'Matthew Wilcox (Oracle)', brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-mips,
	linux-kernel, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

From: Matthew Wilcox (Oracle) <willy@infradead.org>
> Sent: 17 April 2021 00:07
> 
> The net page_pool wants to use a magic value to identify page pool pages.
> The best place to put it is in the first word where it can be clearly a
> non-pointer value.  That means shifting dma_addr up to alias with ->index,
> which means we need to find another way to indicate page_is_pfmemalloc().
> Since page_pool doesn't want to set its magic value on pages which are
> pfmemalloc, we can use bit 1 of compound_head to indicate that the page
> came from the memory reserves.
> 
...
>  		struct {	/* page_pool used by netstack */
> -			/**
> -			 * @dma_addr: might require a 64-bit value on
> -			 * 32-bit architectures.
> -			 */
> +			unsigned long pp_magic;
> +			unsigned long xmi;
> +			unsigned long _pp_mapping_pad;
>  			unsigned long dma_addr[2];
>  		};

You've deleted the comment.

I also think there should be a comment that dma_addr[0]
must be aliased to ->index.
(Or whatever all the exact requirements are.)

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17  2:45   ` Matthew Wilcox
  2021-04-17 18:32     ` Ilias Apalodimas
@ 2021-04-17 21:18     ` David Laight
  2021-04-17 22:45       ` Matthew Wilcox
  2021-04-20  2:48     ` Vineet Gupta
  2021-04-20  7:39     ` Geert Uytterhoeven
  3 siblings, 1 reply; 26+ messages in thread
From: David Laight @ 2021-04-17 21:18 UTC (permalink / raw)
  To: 'Matthew Wilcox', brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-mips,
	linux-kernel, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

From: Matthew Wilcox <willy@infradead.org>
> Sent: 17 April 2021 03:45
> 
> Replacement patch to fix compiler warning.
...
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -	return page->dma_addr;
> +	dma_addr_t ret = page->dma_addr[0];
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;

Ugly as well.

Why not just replace the (dma_addr_t) cast with a (u64) one?
Looks better than the double shift.

Same could be done for the '>> 32'.
Is there an upper_32_bits() that could be used??

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head
  2021-04-17 21:13   ` David Laight
@ 2021-04-17 21:28     ` Matthew Wilcox
  0 siblings, 0 replies; 26+ messages in thread
From: Matthew Wilcox @ 2021-04-17 21:28 UTC (permalink / raw)
  To: David Laight
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-kernel,
	linux-mips, mhocko, linux-mm, mgorman, brouer, mcroce,
	linux-snps-arc, linuxppc-dev, hch, linux-arm-kernel

On Sat, Apr 17, 2021 at 09:13:45PM +0000, David Laight wrote:
> >  		struct {	/* page_pool used by netstack */
> > -			/**
> > -			 * @dma_addr: might require a 64-bit value on
> > -			 * 32-bit architectures.
> > -			 */
> > +			unsigned long pp_magic;
> > +			unsigned long xmi;
> > +			unsigned long _pp_mapping_pad;
> >  			unsigned long dma_addr[2];
> >  		};
> 
> You've deleted the comment.

Yes.  It no longer added any value.  You can see dma_addr now occupies
two words.

> I also think there should be a comment that dma_addr[0]
> must be aliased to ->index.

That's not a requirement.  Moving the pfmemalloc indicator is a
requirement so that we _can_ use index, but there's no requirement about
how index is used.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17 21:18     ` David Laight
@ 2021-04-17 22:45       ` Matthew Wilcox
  0 siblings, 0 replies; 26+ messages in thread
From: Matthew Wilcox @ 2021-04-17 22:45 UTC (permalink / raw)
  To: David Laight
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-kernel,
	linux-mips, mhocko, linux-mm, mgorman, brouer, mcroce,
	linux-snps-arc, linuxppc-dev, hch, linux-arm-kernel

On Sat, Apr 17, 2021 at 09:18:57PM +0000, David Laight wrote:
> Ugly as well.

Thank you for expressing your opinion.  Again.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17 20:22       ` Matthew Wilcox
@ 2021-04-18 11:21         ` Ilias Apalodimas
  0 siblings, 0 replies; 26+ messages in thread
From: Ilias Apalodimas @ 2021-04-18 11:21 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: arnd, grygorii.strashko, netdev, linux-kernel, linux-mips,
	mhocko, linux-mm, mgorman, brouer, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

On Sat, Apr 17, 2021 at 09:22:40PM +0100, Matthew Wilcox wrote:
> On Sat, Apr 17, 2021 at 09:32:06PM +0300, Ilias Apalodimas wrote:
> > > +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> > > +{
> > > +	page->dma_addr[0] = addr;
> > > +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> > > +		page->dma_addr[1] = addr >> 16 >> 16;
> > 
> > The 'error' that was reported will never trigger right?
> > I assume this was compiled with dma_addr_t as 32bits (so it triggered the
> > compilation error), but the if check will never allow this codepath to run.
> > If so can we add a comment explaining this, since none of us will remember why
> > in 6 months from now?
> 
> That's right.  I compiled it all three ways -- 32-bit, 64-bit dma, 32-bit long
> and 64-bit.  The 32/64 bit case turn into:
> 
> 	if (0)
> 		page->dma_addr[1] = addr >> 16 >> 16;
> 
> which gets elided.  So the only case that has to work is 64-bit dma and
> 32-bit long.
> 
> I can replace this with upper_32_bits().
> 

Ok up to you, I don't mind either way and thanks for solving this!

Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17  2:45   ` Matthew Wilcox
  2021-04-17 18:32     ` Ilias Apalodimas
  2021-04-17 21:18     ` David Laight
@ 2021-04-20  2:48     ` Vineet Gupta
  2021-04-20  3:10       ` Matthew Wilcox
  2021-04-20  7:39     ` Geert Uytterhoeven
  3 siblings, 1 reply; 26+ messages in thread
From: Vineet Gupta @ 2021-04-20  2:48 UTC (permalink / raw)
  To: Matthew Wilcox, brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-mips,
	linux-kernel, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

Hi Matthew,

On 4/16/21 7:45 PM, Matthew Wilcox wrote:
> Replacement patch to fix compiler warning.
>
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Date: Fri, 16 Apr 2021 16:34:55 -0400
> Subject: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
> To: brouer@redhat.com
> Cc: linux-kernel@vger.kernel.org,
>      linux-mm@kvack.org,
>      netdev@vger.kernel.org,
>      linuxppc-dev@lists.ozlabs.org,
>      linux-arm-kernel@lists.infradead.org,
>      linux-mips@vger.kernel.org,
>      ilias.apalodimas@linaro.org,
>      mcroce@linux.microsoft.com,
>      grygorii.strashko@ti.com,
>      arnd@kernel.org,
>      hch@lst.de,
>      linux-snps-arc@lists.infradead.org,
>      mhocko@kernel.org,
>      mgorman@suse.de
>
> 32-bit architectures which expect 8-byte alignment for 8-byte integers
> and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> page inadvertently expanded in 2019.

FWIW, ARC doesn't require 8 byte alignment for 8 byte integers. This is 
only needed for 8-byte atomics due to the requirements of LLOCKD/SCOND 
instructions.

> When the dma_addr_t was added,
> it forced the alignment of the union to 8 bytes, which inserted a 4 byte
> gap between 'flags' and the union.
>
> Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
> This restores the alignment to that of an unsigned long, and also fixes a
> potential problem where (on a big endian platform), the bit used to denote
> PageTail could inadvertently get set, and a racing get_user_pages_fast()
> could dereference a bogus compound_head().
>
> Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   include/linux/mm_types.h |  4 ++--
>   include/net/page_pool.h  | 12 +++++++++++-
>   net/core/page_pool.c     | 12 +++++++-----
>   3 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6613b26a8894..5aacc1c10a45 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -97,10 +97,10 @@ struct page {
>   		};
>   		struct {	/* page_pool used by netstack */
>   			/**
> -			 * @dma_addr: might require a 64-bit value even on
> +			 * @dma_addr: might require a 64-bit value on
>   			 * 32-bit architectures.
>   			 */
> -			dma_addr_t dma_addr;
> +			unsigned long dma_addr[2];
>   		};
>   		struct {	/* slab, slob and slub */
>   			union {
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index b5b195305346..ad6154dc206c 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>   
>   static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>   {
> -	return page->dma_addr;
> +	dma_addr_t ret = page->dma_addr[0];
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
> +	return ret;
> +}
> +
> +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +{
> +	page->dma_addr[0] = addr;
> +	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +		page->dma_addr[1] = addr >> 16 >> 16;
>   }
>   
>   static inline bool is_page_pool_compiled_in(void)
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index ad8b0707af04..f014fd8c19a6 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
>   					  struct page *page,
>   					  unsigned int dma_sync_size)
>   {
> +	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
> +
>   	dma_sync_size = min(dma_sync_size, pool->p.max_len);
> -	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
> +	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
>   					 pool->p.offset, dma_sync_size,
>   					 pool->p.dma_dir);
>   }
> @@ -226,7 +228,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
>   		put_page(page);
>   		return NULL;
>   	}
> -	page->dma_addr = dma;
> +	page_pool_set_dma_addr(page, dma);
>   
>   	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
>   		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
> @@ -294,13 +296,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
>   		 */
>   		goto skip_dma_unmap;
>   
> -	dma = page->dma_addr;
> +	dma = page_pool_get_dma_addr(page);
>   
> -	/* When page is unmapped, it cannot be returned our pool */
> +	/* When page is unmapped, it cannot be returned to our pool */
>   	dma_unmap_page_attrs(pool->p.dev, dma,
>   			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
>   			     DMA_ATTR_SKIP_CPU_SYNC);
> -	page->dma_addr = 0;
> +	page_pool_set_dma_addr(page, 0);
>   skip_dma_unmap:
>   	/* This may be the last page returned, releasing the pool, so
>   	 * it is not safe to reference pool afterwards.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20  2:48     ` Vineet Gupta
@ 2021-04-20  3:10       ` Matthew Wilcox
  2021-04-20  7:07         ` Arnd Bergmann
  0 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2021-04-20  3:10 UTC (permalink / raw)
  To: Vineet Gupta
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-kernel,
	linux-mips, mhocko, linux-mm, mgorman, brouer, mcroce,
	linux-snps-arc, linuxppc-dev, hch, linux-arm-kernel

On Tue, Apr 20, 2021 at 02:48:17AM +0000, Vineet Gupta wrote:
> > 32-bit architectures which expect 8-byte alignment for 8-byte integers
> > and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> > page inadvertently expanded in 2019.
> 
> FWIW, ARC doesn't require 8 byte alignment for 8 byte integers. This is 
> only needed for 8-byte atomics due to the requirements of LLOCKD/SCOND 
> instructions.

Ah, like x86?  OK, great, I'll drop your arch from the list of
affected.  Thanks!

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/2] Change struct page layout for page_pool
  2021-04-16 23:07 [PATCH 0/2] Change struct page layout for page_pool Matthew Wilcox (Oracle)
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
  2021-04-16 23:07 ` [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head Matthew Wilcox (Oracle)
@ 2021-04-20  4:03 ` Michael Ellerman
  2 siblings, 0 replies; 26+ messages in thread
From: Michael Ellerman @ 2021-04-20  4:03 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), brouer
  Cc: arnd, grygorii.strashko, linux-snps-arc, netdev,
	ilias.apalodimas, linux-kernel, Matthew Wilcox (Oracle),
	linux-mips, linux-mm, mgorman, mcroce, mhocko, linuxppc-dev, hch,
	linux-arm-kernel

"Matthew Wilcox (Oracle)" <willy@infradead.org> writes:
> The first patch here fixes two bugs on ppc32, and mips32.  It fixes one
> bug on arc and arm32 (in certain configurations).  It probably makes
> sense to get it in ASAP through the networking tree.  I'd like to see
> testing on those four architectures if possible?

Sorry I don't have easy access to any hardware that fits the bill. At
some point I'll be able to go to the office and setup a machine that (I
think) can test these paths, but I don't have an ETA on that.

You and others seem to have done lots of analysis on this though, so I
think you should merge the fixes without waiting on ppc32 testing.

cheers


>
> The second patch enables new functionality.  It is much less urgent.
> I'd really like to see Mel & Michal's thoughts on it.
>
> I have only compile-tested these patches.
>
> Matthew Wilcox (Oracle) (2):
>   mm: Fix struct page layout on 32-bit systems
>   mm: Indicate pfmemalloc pages in compound_head
>
>  include/linux/mm.h       | 12 +++++++-----
>  include/linux/mm_types.h |  9 ++++-----
>  include/net/page_pool.h  | 12 +++++++++++-
>  net/core/page_pool.c     | 12 +++++++-----
>  4 files changed, 29 insertions(+), 16 deletions(-)
>
> -- 
> 2.30.2

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20  3:10       ` Matthew Wilcox
@ 2021-04-20  7:07         ` Arnd Bergmann
  2021-04-20 21:14           ` Vineet Gupta
  0 siblings, 1 reply; 26+ messages in thread
From: Arnd Bergmann @ 2021-04-20  7:07 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: grygorii.strashko, linux-snps-arc, netdev, Vineet Gupta,
	ilias.apalodimas, linux-kernel, linux-mips, linux-mm, mgorman,
	brouer, mcroce, mhocko, linuxppc-dev, hch, linux-arm-kernel

On Tue, Apr 20, 2021 at 5:10 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Tue, Apr 20, 2021 at 02:48:17AM +0000, Vineet Gupta wrote:
> > > 32-bit architectures which expect 8-byte alignment for 8-byte integers
> > > and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> > > page inadvertently expanded in 2019.
> >
> > FWIW, ARC doesn't require 8 byte alignment for 8 byte integers. This is
> > only needed for 8-byte atomics due to the requirements of LLOCKD/SCOND
> > instructions.
>
> Ah, like x86?  OK, great, I'll drop your arch from the list of
> affected.  Thanks!

I mistakenly assumed that i386 and m68k were the only supported
architectures with 32-bit alignment on u64. I checked it now and found

$ for i in /home/arnd/cross/x86_64/gcc-10.1.0-nolibc/*/bin/*-gcc ; do
echo `echo 'int a = __alignof__(long long);' | $i -xc - -Wall -S -o- |
grep -A1 a: | tail -n 1 | cut -f 3 -d\   `
${i#/home/arnd/cross/x86_64/gcc-10.1.0-nolibc/*/bin/} ; done
8 aarch64-linux-gcc
8 alpha-linux-gcc
4 arc-linux-gcc
8 arm-linux-gnueabi-gcc
8 c6x-elf-gcc
4 csky-linux-gcc
4 h8300-linux-gcc
8 hppa-linux-gcc
8 hppa64-linux-gcc
8 i386-linux-gcc
8 ia64-linux-gcc
2 m68k-linux-gcc
4 microblaze-linux-gcc
8 mips-linux-gcc
8 mips64-linux-gcc
8 nds32le-linux-gcc
4 nios2-linux-gcc
4 or1k-linux-gcc
8 powerpc-linux-gcc
8 powerpc64-linux-gcc
8 riscv32-linux-gcc
8 riscv64-linux-gcc
8 s390-linux-gcc
4 sh2-linux-gcc
4 sh4-linux-gcc
8 sparc-linux-gcc
8 sparc64-linux-gcc
8 x86_64-linux-gcc
8 xtensa-linux-gcc

which means that half the 32-bit architectures do this. This may
cause more problems when arc and/or microblaze want to support
64-bit kernels and compat mode in the future on their latest hardware,
as that means duplicating the x86 specific hacks we have for compat.

What is alignof(u64) on 64-bit arc?

      Arnd

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
                     ` (2 preceding siblings ...)
  2021-04-17  7:34   ` Jesper Dangaard Brouer
@ 2021-04-20  7:21   ` Rasmus Villemoes
  3 siblings, 0 replies; 26+ messages in thread
From: Rasmus Villemoes @ 2021-04-20  7:21 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), brouer
  Cc: arnd, grygorii.strashko, netdev, ilias.apalodimas, linux-mips,
	linux-kernel, mhocko, linux-mm, mgorman, mcroce, linux-snps-arc,
	linuxppc-dev, hch, linux-arm-kernel

On 17/04/2021 01.07, Matthew Wilcox (Oracle) wrote:
> 32-bit architectures which expect 8-byte alignment for 8-byte integers
> and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> page inadvertently expanded in 2019.  When the dma_addr_t was added,
> it forced the alignment of the union to 8 bytes, which inserted a 4 byte
> gap between 'flags' and the union.
> 
> Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
> This restores the alignment to that of an unsigned long, and also fixes a
> potential problem where (on a big endian platform), the bit used to denote
> PageTail could inadvertently get set, and a racing get_user_pages_fast()
> could dereference a bogus compound_head().
> 
> Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/mm_types.h |  4 ++--
>  include/net/page_pool.h  | 12 +++++++++++-
>  net/core/page_pool.c     | 12 +++++++-----
>  3 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6613b26a8894..5aacc1c10a45 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -97,10 +97,10 @@ struct page {
>  		};
>  		struct {	/* page_pool used by netstack */
>  			/**
> -			 * @dma_addr: might require a 64-bit value even on
> +			 * @dma_addr: might require a 64-bit value on
>  			 * 32-bit architectures.
>  			 */
> -			dma_addr_t dma_addr;
> +			unsigned long dma_addr[2];

Shouldn't that member get another name (_dma_addr?) to be sure the
buildbots catch every possible (ab)user and get them turned into the new
accessors? Sure, page->dma_addr is now "pointer to unsigned long"
instead of "dma_addr_t", but who knows if there's a
"(long)page->dma_addr" somewhere?

Rasmus

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-17  2:45   ` Matthew Wilcox
                       ` (2 preceding siblings ...)
  2021-04-20  2:48     ` Vineet Gupta
@ 2021-04-20  7:39     ` Geert Uytterhoeven
  2021-04-20  8:39       ` David Laight
  2021-04-20 11:32       ` Matthew Wilcox
  3 siblings, 2 replies; 26+ messages in thread
From: Geert Uytterhoeven @ 2021-04-20  7:39 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Arnd Bergmann, Grygorii Strashko, netdev, Ilias Apalodimas,
	Linux Kernel Mailing List, open list:BROADCOM NVRAM DRIVER,
	Michal Hocko, Linux MM, Mel Gorman, Jesper Dangaard Brouer,
	mcroce, arcml, linuxppc-dev, Christoph Hellwig, Linux ARM

Hi Willy,

On Sat, Apr 17, 2021 at 4:49 AM Matthew Wilcox <willy@infradead.org> wrote:
> Replacement patch to fix compiler warning.
>
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Date: Fri, 16 Apr 2021 16:34:55 -0400
> Subject: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
> To: brouer@redhat.com
> Cc: linux-kernel@vger.kernel.org,
>     linux-mm@kvack.org,
>     netdev@vger.kernel.org,
>     linuxppc-dev@lists.ozlabs.org,
>     linux-arm-kernel@lists.infradead.org,
>     linux-mips@vger.kernel.org,
>     ilias.apalodimas@linaro.org,
>     mcroce@linux.microsoft.com,
>     grygorii.strashko@ti.com,
>     arnd@kernel.org,
>     hch@lst.de,
>     linux-snps-arc@lists.infradead.org,
>     mhocko@kernel.org,
>     mgorman@suse.de
>
> 32-bit architectures which expect 8-byte alignment for 8-byte integers
> and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> page inadvertently expanded in 2019.  When the dma_addr_t was added,
> it forced the alignment of the union to 8 bytes, which inserted a 4 byte
> gap between 'flags' and the union.
>
> Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
> This restores the alignment to that of an unsigned long, and also fixes a
> potential problem where (on a big endian platform), the bit used to denote
> PageTail could inadvertently get set, and a racing get_user_pages_fast()
> could dereference a bogus compound_head().
>
> Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Thanks for your patch!

> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -97,10 +97,10 @@ struct page {
>                 };
>                 struct {        /* page_pool used by netstack */
>                         /**
> -                        * @dma_addr: might require a 64-bit value even on
> +                        * @dma_addr: might require a 64-bit value on
>                          * 32-bit architectures.
>                          */
> -                       dma_addr_t dma_addr;
> +                       unsigned long dma_addr[2];

So we get two 64-bit words on 64-bit platforms, while only one is
needed?

Would

    unsigned long _dma_addr[sizeof(dma_addr_t) / sizeof(unsigned long)];

work?

Or will the compiler become too overzealous, and warn about the use
of ...[1] below, even when unreachable?
I wouldn't mind an #ifdef instead of an if () in the code below, though.

>                 };
>                 struct {        /* slab, slob and slub */
>                         union {
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index b5b195305346..ad6154dc206c 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -       return page->dma_addr;
> +       dma_addr_t ret = page->dma_addr[0];
> +       if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;

We don't seem to have a handy macro for a 32-bit left shift yet...

But you can also avoid the warning using

    ret |= (u64)page->dma_addr[1] << 32;

> +       return ret;
> +}
> +
> +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +{
> +       page->dma_addr[0] = addr;
> +       if (sizeof(dma_addr_t) > sizeof(unsigned long))
> +               page->dma_addr[1] = addr >> 16 >> 16;

... but we do have upper_32_bits() for a 32-bit right shift.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20  7:39     ` Geert Uytterhoeven
@ 2021-04-20  8:39       ` David Laight
  2021-04-20 11:32       ` Matthew Wilcox
  1 sibling, 0 replies; 26+ messages in thread
From: David Laight @ 2021-04-20  8:39 UTC (permalink / raw)
  To: 'Geert Uytterhoeven', Matthew Wilcox
  Cc: Arnd Bergmann, Grygorii Strashko, netdev, Ilias Apalodimas,
	Linux Kernel Mailing List, open list:BROADCOM NVRAM DRIVER,
	Michal Hocko, Linux MM, Mel Gorman, Jesper Dangaard Brouer,
	mcroce, arcml, linuxppc-dev, Christoph Hellwig, Linux ARM

From: Geert Uytterhoeven
> Sent: 20 April 2021 08:40
> 
> Hi Willy,
> 
> On Sat, Apr 17, 2021 at 4:49 AM Matthew Wilcox <willy@infradead.org> wrote:
> > Replacement patch to fix compiler warning.
> >
> > 32-bit architectures which expect 8-byte alignment for 8-byte integers
> > and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
> > page inadvertently expanded in 2019.  When the dma_addr_t was added,
> > it forced the alignment of the union to 8 bytes, which inserted a 4 byte
> > gap between 'flags' and the union.
> >
> > Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
> > This restores the alignment to that of an unsigned long, and also fixes a
> > potential problem where (on a big endian platform), the bit used to denote
> > PageTail could inadvertently get set, and a racing get_user_pages_fast()
> > could dereference a bogus compound_head().
> >
> > Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> 
> Thanks for your patch!
> 
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -97,10 +97,10 @@ struct page {
> >                 };
> >                 struct {        /* page_pool used by netstack */
> >                         /**
> > -                        * @dma_addr: might require a 64-bit value even on
> > +                        * @dma_addr: might require a 64-bit value on
> >                          * 32-bit architectures.
> >                          */
> > -                       dma_addr_t dma_addr;
> > +                       unsigned long dma_addr[2];
> 
> So we get two 64-bit words on 64-bit platforms, while only one is
> needed?
> 
> Would
> 
>     unsigned long _dma_addr[sizeof(dma_addr_t) / sizeof(unsigned long)];
> 
> work?
> 
> Or will the compiler become too overzealous, and warn about the use
> of ...[1] below, even when unreachable?
> I wouldn't mind an #ifdef instead of an if () in the code below, though.

You could use [ARRAY_SIZE()-1] instead of [1].
Or, since IIRC it is the last member of that specific struct, define as:
		unsigned long dma_addr[];

...
> > -       return page->dma_addr;
> > +       dma_addr_t ret = page->dma_addr[0];
> > +       if (sizeof(dma_addr_t) > sizeof(unsigned long))
> > +               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
> 
> We don't seem to have a handy macro for a 32-bit left shift yet...
> 
> But you can also avoid the warning using
> 
>     ret |= (u64)page->dma_addr[1] << 32;

Or:
	ret |= page->dma_addr[1] + 0ull << 32;

Which relies in integer promotion rather than a cast.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20  7:39     ` Geert Uytterhoeven
  2021-04-20  8:39       ` David Laight
@ 2021-04-20 11:32       ` Matthew Wilcox
  1 sibling, 0 replies; 26+ messages in thread
From: Matthew Wilcox @ 2021-04-20 11:32 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Arnd Bergmann, Grygorii Strashko, netdev, Ilias Apalodimas,
	Linux Kernel Mailing List, open list:BROADCOM NVRAM DRIVER,
	Michal Hocko, Linux MM, Mel Gorman, Jesper Dangaard Brouer,
	mcroce, arcml, linuxppc-dev, Christoph Hellwig, Linux ARM

On Tue, Apr 20, 2021 at 09:39:54AM +0200, Geert Uytterhoeven wrote:
> > +++ b/include/linux/mm_types.h
> > @@ -97,10 +97,10 @@ struct page {
> >                 };
> >                 struct {        /* page_pool used by netstack */
> >                         /**
> > -                        * @dma_addr: might require a 64-bit value even on
> > +                        * @dma_addr: might require a 64-bit value on
> >                          * 32-bit architectures.
> >                          */
> > -                       dma_addr_t dma_addr;
> > +                       unsigned long dma_addr[2];
> 
> So we get two 64-bit words on 64-bit platforms, while only one is
> needed?

Not really.  This is part of the 5-word union in struct page, so the space
ends up being reserved anyway, event if it's not "assigned" to dma_addr.

> > +       dma_addr_t ret = page->dma_addr[0];
> > +       if (sizeof(dma_addr_t) > sizeof(unsigned long))
> > +               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
> 
> We don't seem to have a handy macro for a 32-bit left shift yet...
> 
> But you can also avoid the warning using
> 
>     ret |= (u64)page->dma_addr[1] << 32;

Sure.  It doesn't really matter which way we eliminate the warning;
the code is unreachable.

> > +{
> > +       page->dma_addr[0] = addr;
> > +       if (sizeof(dma_addr_t) > sizeof(unsigned long))
> > +               page->dma_addr[1] = addr >> 16 >> 16;
> 
> ... but we do have upper_32_bits() for a 32-bit right shift.

Yep, that's what my current tree looks like.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20  7:07         ` Arnd Bergmann
@ 2021-04-20 21:14           ` Vineet Gupta
  2021-04-20 21:20             ` Arnd Bergmann
  0 siblings, 1 reply; 26+ messages in thread
From: Vineet Gupta @ 2021-04-20 21:14 UTC (permalink / raw)
  To: Arnd Bergmann, Matthew Wilcox
  Cc: grygorii.strashko, linux-snps-arc, netdev, Vineet Gupta,
	ilias.apalodimas, linux-kernel, linux-mips, linux-mm, mgorman,
	brouer, mcroce, mhocko, linuxppc-dev, hch, linux-arm-kernel

On 4/20/21 12:07 AM, Arnd Bergmann wrote:
> On Tue, Apr 20, 2021 at 5:10 AM Matthew Wilcox <willy@infradead.org> wrote:
>> On Tue, Apr 20, 2021 at 02:48:17AM +0000, Vineet Gupta wrote:
>>>> 32-bit architectures which expect 8-byte alignment for 8-byte integers
>>>> and need 64-bit DMA addresses (arc, arm, mips, ppc) had their struct
>>>> page inadvertently expanded in 2019.
>>> FWIW, ARC doesn't require 8 byte alignment for 8 byte integers. This is
>>> only needed for 8-byte atomics due to the requirements of LLOCKD/SCOND
>>> instructions.
>> Ah, like x86?  OK, great, I'll drop your arch from the list of
>> affected.  Thanks!
> I mistakenly assumed that i386 and m68k were the only supported
> architectures with 32-bit alignment on u64. I checked it now and found
>
> $ for i in /home/arnd/cross/x86_64/gcc-10.1.0-nolibc/*/bin/*-gcc ; do
> echo `echo 'int a = __alignof__(long long);' | $i -xc - -Wall -S -o- |
> grep -A1 a: | tail -n 1 | cut -f 3 -d\   `
> ${i#/home/arnd/cross/x86_64/gcc-10.1.0-nolibc/*/bin/} ; done
> 8 aarch64-linux-gcc
> 8 alpha-linux-gcc
> 4 arc-linux-gcc
> 8 arm-linux-gnueabi-gcc
> 8 c6x-elf-gcc
> 4 csky-linux-gcc
> 4 h8300-linux-gcc
> 8 hppa-linux-gcc
> 8 hppa64-linux-gcc
> 8 i386-linux-gcc
> 8 ia64-linux-gcc
> 2 m68k-linux-gcc
> 4 microblaze-linux-gcc
> 8 mips-linux-gcc
> 8 mips64-linux-gcc
> 8 nds32le-linux-gcc
> 4 nios2-linux-gcc
> 4 or1k-linux-gcc
> 8 powerpc-linux-gcc
> 8 powerpc64-linux-gcc
> 8 riscv32-linux-gcc
> 8 riscv64-linux-gcc
> 8 s390-linux-gcc
> 4 sh2-linux-gcc
> 4 sh4-linux-gcc
> 8 sparc-linux-gcc
> 8 sparc64-linux-gcc
> 8 x86_64-linux-gcc
> 8 xtensa-linux-gcc
>
> which means that half the 32-bit architectures do this. This may
> cause more problems when arc and/or microblaze want to support
> 64-bit kernels and compat mode in the future on their latest hardware,
> as that means duplicating the x86 specific hacks we have for compat.
>
> What is alignof(u64) on 64-bit arc?

$ echo 'int a = __alignof__(long long);' | arc64-linux-gnu-gcc -xc - 
-Wall -S -o - | grep -A1 a: | tail -n 1 | cut -f 3
8

Yeah ARCv2 alignment of 4 for 64-bit data was a bit of surprise finding 
for me as well. When 64-bit load/stores were initially targeted by the 
internal Metaware compiler (llvm based) they decided to keep alignment 
to 4 still (granted hardware allowed this) and then gcc guys decided to 
follow the same ABI. I only found this by accident :-)

Can you point me to some specifics on the compat issue. For better of 
worse, arc64 does''t have a compat 32-bit mode, so everything is 
64-on-64 or 32-on-32 (ARC32 flavor of ARCv3)

Thx,
-Vineet

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20 21:14           ` Vineet Gupta
@ 2021-04-20 21:20             ` Arnd Bergmann
  2021-04-21  5:50               ` hch
  2021-04-21  8:43               ` David Laight
  0 siblings, 2 replies; 26+ messages in thread
From: Arnd Bergmann @ 2021-04-20 21:20 UTC (permalink / raw)
  To: Vineet Gupta
  Cc: grygorii.strashko, linux-snps-arc, netdev, ilias.apalodimas,
	linux-kernel, Matthew Wilcox, linux-mips, linux-mm, mgorman,
	brouer, mcroce, mhocko, linuxppc-dev, hch, linux-arm-kernel

On Tue, Apr 20, 2021 at 11:14 PM Vineet Gupta
<Vineet.Gupta1@synopsys.com> wrote:
> On 4/20/21 12:07 AM, Arnd Bergmann wrote:

> >
> > which means that half the 32-bit architectures do this. This may
> > cause more problems when arc and/or microblaze want to support
> > 64-bit kernels and compat mode in the future on their latest hardware,
> > as that means duplicating the x86 specific hacks we have for compat.
> >
> > What is alignof(u64) on 64-bit arc?
>
> $ echo 'int a = __alignof__(long long);' | arc64-linux-gnu-gcc -xc -
> -Wall -S -o - | grep -A1 a: | tail -n 1 | cut -f 3
> 8

Ok, good.

> Yeah ARCv2 alignment of 4 for 64-bit data was a bit of surprise finding
> for me as well. When 64-bit load/stores were initially targeted by the
> internal Metaware compiler (llvm based) they decided to keep alignment
> to 4 still (granted hardware allowed this) and then gcc guys decided to
> follow the same ABI. I only found this by accident :-)
>
> Can you point me to some specifics on the compat issue. For better of
> worse, arc64 does''t have a compat 32-bit mode, so everything is
> 64-on-64 or 32-on-32 (ARC32 flavor of ARCv3)

In that case, there should be no problem for you.

The main issue is with system calls and ioctls that contain a misaligned
struct member like

struct s {
       u32 a;
       u64 b;
};

Passing this structure by reference from a 32-bit user space application
to a 64-bit kernel with different alignment constraints means that the
kernel has to convert the structure layout. See
compat_ioctl_preallocate() in fs/ioctl.c for one such example.

       Arnd

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20 21:20             ` Arnd Bergmann
@ 2021-04-21  5:50               ` hch
  2021-04-21  8:43               ` David Laight
  1 sibling, 0 replies; 26+ messages in thread
From: hch @ 2021-04-21  5:50 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: grygorii.strashko, linux-snps-arc, netdev, Vineet Gupta,
	ilias.apalodimas, linux-kernel, Matthew Wilcox, linux-mips,
	linux-mm, mgorman, brouer, mcroce, mhocko, linuxppc-dev, hch,
	linux-arm-kernel

On Tue, Apr 20, 2021 at 11:20:19PM +0200, Arnd Bergmann wrote:
> In that case, there should be no problem for you.
> 
> The main issue is with system calls and ioctls that contain a misaligned
> struct member like
> 
> struct s {
>        u32 a;
>        u64 b;
> };
> 
> Passing this structure by reference from a 32-bit user space application
> to a 64-bit kernel with different alignment constraints means that the
> kernel has to convert the structure layout. See
> compat_ioctl_preallocate() in fs/ioctl.c for one such example.

We've also had this problem with some on-disk structures in the past,
but hopefully people desining those have learnt the lesson by now.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-20 21:20             ` Arnd Bergmann
  2021-04-21  5:50               ` hch
@ 2021-04-21  8:43               ` David Laight
  2021-04-21  8:58                 ` Arnd Bergmann
  1 sibling, 1 reply; 26+ messages in thread
From: David Laight @ 2021-04-21  8:43 UTC (permalink / raw)
  To: 'Arnd Bergmann', Vineet Gupta
  Cc: grygorii.strashko, linux-snps-arc, netdev, ilias.apalodimas,
	linux-kernel, Matthew Wilcox, linux-mips, linux-mm, mgorman,
	brouer, mcroce, mhocko, linuxppc-dev, hch, linux-arm-kernel

From: Arnd Bergmann
> Sent: 20 April 2021 22:20
> 
> On Tue, Apr 20, 2021 at 11:14 PM Vineet Gupta
> <Vineet.Gupta1@synopsys.com> wrote:
> > On 4/20/21 12:07 AM, Arnd Bergmann wrote:
> 
> > >
> > > which means that half the 32-bit architectures do this. This may
> > > cause more problems when arc and/or microblaze want to support
> > > 64-bit kernels and compat mode in the future on their latest hardware,
> > > as that means duplicating the x86 specific hacks we have for compat.
> > >
> > > What is alignof(u64) on 64-bit arc?
> >
> > $ echo 'int a = __alignof__(long long);' | arc64-linux-gnu-gcc -xc -
> > -Wall -S -o - | grep -A1 a: | tail -n 1 | cut -f 3
> > 8
> 
> Ok, good.

That test doesn't prove anything.
Try running on x86:
$ echo 'int a = __alignof__(long long);' | gcc -xc - -Wall -S -o - -m32
        .file   ""
        .globl  a
        .data
        .align 4
        .type   a, @object
        .size   a, 4
a:
        .long   8
        .ident  "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.10) 5.4.0 20160609"
        .section        .note.GNU-stack,"",@progbits

Using '__alignof__(struct {long long x;})' does give the expected 4.

__alignof__() returns the preferred alignment, not the enforced
alignmnet - go figure.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] mm: Fix struct page layout on 32-bit systems
  2021-04-21  8:43               ` David Laight
@ 2021-04-21  8:58                 ` Arnd Bergmann
  0 siblings, 0 replies; 26+ messages in thread
From: Arnd Bergmann @ 2021-04-21  8:58 UTC (permalink / raw)
  To: David Laight
  Cc: grygorii.strashko, linux-snps-arc, netdev, Vineet Gupta,
	ilias.apalodimas, linux-kernel, Matthew Wilcox, linux-mips,
	linux-mm, mgorman, brouer, mcroce, mhocko, linuxppc-dev, hch,
	linux-arm-kernel

On Wed, Apr 21, 2021 at 10:43 AM David Laight <David.Laight@aculab.com> wrote:
> From: Arnd Bergmann Sent: 20 April 2021 22:20
> > On Tue, Apr 20, 2021 at 11:14 PM Vineet Gupta <Vineet.Gupta1@synopsys.com> wrote:
> > > On 4/20/21 12:07 AM, Arnd Bergmann wrote:
> >
> > > >
> > > > which means that half the 32-bit architectures do this. This may
> > > > cause more problems when arc and/or microblaze want to support
> > > > 64-bit kernels and compat mode in the future on their latest hardware,
> > > > as that means duplicating the x86 specific hacks we have for compat.
> > > >
> > > > What is alignof(u64) on 64-bit arc?
> > >
> > > $ echo 'int a = __alignof__(long long);' | arc64-linux-gnu-gcc -xc -
> > > -Wall -S -o - | grep -A1 a: | tail -n 1 | cut -f 3
> > > 8
> >
> > Ok, good.
>
> That test doesn't prove anything.
> Try running on x86:
> $ echo 'int a = __alignof__(long long);' | gcc -xc - -Wall -S -o - -m32
> a:
>         .long   8

Right, I had wondered about that one after I sent the email.

> Using '__alignof__(struct {long long x;})' does give the expected 4.
>
> __alignof__() returns the preferred alignment, not the enforced
> alignmnet - go figure.

I checked the others as well now, and i386 is the only one that
changed here: m68k still has '2', while arc/csky/h8300/microblaze/
nios2/or1k/sh/i386 all have '4' and the rest have '8'.

     Arnd

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2021-04-21  8:59 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-16 23:07 [PATCH 0/2] Change struct page layout for page_pool Matthew Wilcox (Oracle)
2021-04-16 23:07 ` [PATCH 1/2] mm: Fix struct page layout on 32-bit systems Matthew Wilcox (Oracle)
2021-04-17  1:08   ` kernel test robot
2021-04-17  2:45   ` Matthew Wilcox
2021-04-17 18:32     ` Ilias Apalodimas
2021-04-17 20:22       ` Matthew Wilcox
2021-04-18 11:21         ` Ilias Apalodimas
2021-04-17 21:18     ` David Laight
2021-04-17 22:45       ` Matthew Wilcox
2021-04-20  2:48     ` Vineet Gupta
2021-04-20  3:10       ` Matthew Wilcox
2021-04-20  7:07         ` Arnd Bergmann
2021-04-20 21:14           ` Vineet Gupta
2021-04-20 21:20             ` Arnd Bergmann
2021-04-21  5:50               ` hch
2021-04-21  8:43               ` David Laight
2021-04-21  8:58                 ` Arnd Bergmann
2021-04-20  7:39     ` Geert Uytterhoeven
2021-04-20  8:39       ` David Laight
2021-04-20 11:32       ` Matthew Wilcox
2021-04-17  7:34   ` Jesper Dangaard Brouer
2021-04-20  7:21   ` Rasmus Villemoes
2021-04-16 23:07 ` [PATCH 2/2] mm: Indicate pfmemalloc pages in compound_head Matthew Wilcox (Oracle)
2021-04-17 21:13   ` David Laight
2021-04-17 21:28     ` Matthew Wilcox
2021-04-20  4:03 ` [PATCH 0/2] Change struct page layout for page_pool Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).