All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: Mina Almasry <almasrymina@google.com>,
	Shailend Chand <shailend@google.com>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-kselftest@vger.kernel.org, bpf@vger.kernel.org,
	linux-media@vger.kernel.org, dri-devel@lists.freedesktop.org
Cc: "David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Jeroen de Borst" <jeroendb@google.com>,
	"Praveen Kaligineedi" <pkaligineedi@google.com>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"Ilias Apalodimas" <ilias.apalodimas@linaro.org>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"David Ahern" <dsahern@kernel.org>,
	"Willem de Bruijn" <willemdebruijn.kernel@gmail.com>,
	"Shuah Khan" <shuah@kernel.org>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Christian König" <christian.koenig@amd.com>,
	"Yunsheng Lin" <linyunsheng@huawei.com>,
	"Harshitha Ramamurthy" <hramamurthy@google.com>,
	"Shakeel Butt" <shakeelb@google.com>,
	"Willem de Bruijn" <willemb@google.com>,
	"Kaiyuan Zhang" <kaiyuanz@google.com>
Subject: Re: [net-next v1 08/16] memory-provider: dmabuf devmem memory provider
Date: Fri, 8 Dec 2023 22:48:39 +0000	[thread overview]
Message-ID: <b07a4eca-0c3d-4620-9f97-b1d2c76642c2@gmail.com> (raw)
In-Reply-To: <20231208005250.2910004-9-almasrymina@google.com>

On 12/8/23 00:52, Mina Almasry wrote:
> Implement a memory provider that allocates dmabuf devmem page_pool_iovs.
> 
> The provider receives a reference to the struct netdev_dmabuf_binding
> via the pool->mp_priv pointer. The driver needs to set this pointer for
> the provider in the page_pool_params.
> 
> The provider obtains a reference on the netdev_dmabuf_binding which
> guarantees the binding and the underlying mapping remains alive until
> the provider is destroyed.
> 
> Usage of PP_FLAG_DMA_MAP is required for this memory provide such that
> the page_pool can provide the driver with the dma-addrs of the devmem.
> 
> Support for PP_FLAG_DMA_SYNC_DEV is omitted for simplicity.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
> Signed-off-by: Mina Almasry <almasrymina@google.com>
> 
> ---
> 
> v1:
> - static_branch check in page_is_page_pool_iov() (Willem & Paolo).
> - PP_DEVMEM -> PP_IOV (David).
> - Require PP_FLAG_DMA_MAP (Jakub).
> 
> ---
>   include/net/page_pool/helpers.h | 47 +++++++++++++++++
>   include/net/page_pool/types.h   |  9 ++++
>   net/core/page_pool.c            | 89 ++++++++++++++++++++++++++++++++-
>   3 files changed, 144 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
> index 8bfc2d43efd4..00197f14aa87 100644
> --- a/include/net/page_pool/helpers.h
> +++ b/include/net/page_pool/helpers.h
> @@ -53,6 +53,8 @@
>   #define _NET_PAGE_POOL_HELPERS_H
>   
>   #include <net/page_pool/types.h>
> +#include <net/net_debug.h>
> +#include <net/devmem.h>
>   
>   #ifdef CONFIG_PAGE_POOL_STATS
>   /* Deprecated driver-facing API, use netlink instead */
> @@ -92,6 +94,11 @@ static inline unsigned int page_pool_iov_idx(const struct page_pool_iov *ppiov)
>   	return ppiov - page_pool_iov_owner(ppiov)->ppiovs;
>   }
>   
> +static inline u32 page_pool_iov_binding_id(const struct page_pool_iov *ppiov)
> +{
> +	return page_pool_iov_owner(ppiov)->binding->id;
> +}
> +
>   static inline dma_addr_t
>   page_pool_iov_dma_addr(const struct page_pool_iov *ppiov)
>   {
> @@ -107,6 +114,46 @@ page_pool_iov_binding(const struct page_pool_iov *ppiov)
>   	return page_pool_iov_owner(ppiov)->binding;
>   }
>   
> +static inline int page_pool_iov_refcount(const struct page_pool_iov *ppiov)
> +{
> +	return refcount_read(&ppiov->refcount);
> +}
> +
> +static inline void page_pool_iov_get_many(struct page_pool_iov *ppiov,
> +					  unsigned int count)
> +{
> +	refcount_add(count, &ppiov->refcount);
> +}
> +
> +void __page_pool_iov_free(struct page_pool_iov *ppiov);
> +
> +static inline void page_pool_iov_put_many(struct page_pool_iov *ppiov,
> +					  unsigned int count)
> +{
> +	if (!refcount_sub_and_test(count, &ppiov->refcount))
> +		return;
> +
> +	__page_pool_iov_free(ppiov);
> +}
> +
> +/* page pool mm helpers */
> +
> +DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +static inline bool page_is_page_pool_iov(const struct page *page)
> +{
> +	return static_branch_unlikely(&page_pool_mem_providers) &&
> +	       (unsigned long)page & PP_IOV;
> +}
> +
> +static inline struct page_pool_iov *page_to_page_pool_iov(struct page *page)
> +{
> +	if (page_is_page_pool_iov(page))
> +		return (struct page_pool_iov *)((unsigned long)page & ~PP_IOV);
> +
> +	DEBUG_NET_WARN_ON_ONCE(true);
> +	return NULL;
> +}
> +
>   /**
>    * page_pool_dev_alloc_pages() - allocate a page.
>    * @pool:	pool from which to allocate
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 44faee7a7b02..136930a238de 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -134,8 +134,15 @@ struct memory_provider_ops {
>   	bool (*release_page)(struct page_pool *pool, struct page *page);
>   };
>   
> +extern const struct memory_provider_ops dmabuf_devmem_ops;
> +
>   /* page_pool_iov support */
>   
> +/*  We overload the LSB of the struct page pointer to indicate whether it's
> + *  a page or page_pool_iov.
> + */
> +#define PP_IOV 0x01UL
> +
>   /* Owner of the dma-buf chunks inserted into the gen pool. Each scatterlist
>    * entry from the dmabuf is inserted into the genpool as a chunk, and needs
>    * this owner struct to keep track of some metadata necessary to create
> @@ -159,6 +166,8 @@ struct page_pool_iov {
>   	struct dmabuf_genpool_chunk_owner *owner;
>   
>   	refcount_t refcount;
> +
> +	struct page_pool *pp;
>   };
>   
>   struct page_pool {
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index f5c84d2a4510..423c88564a00 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -12,6 +12,7 @@
>   
>   #include <net/page_pool/helpers.h>
>   #include <net/xdp.h>
> +#include <net/netdev_rx_queue.h>
>   
>   #include <linux/dma-direction.h>
>   #include <linux/dma-mapping.h>
> @@ -20,12 +21,15 @@
>   #include <linux/poison.h>
>   #include <linux/ethtool.h>
>   #include <linux/netdevice.h>
> +#include <linux/genalloc.h>
> +#include <net/devmem.h>
>   
>   #include <trace/events/page_pool.h>
>   
>   #include "page_pool_priv.h"
>   
> -static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +EXPORT_SYMBOL(page_pool_mem_providers);
>   
>   #define DEFER_TIME (msecs_to_jiffies(1000))
>   #define DEFER_WARN_INTERVAL (60 * HZ)
> @@ -175,6 +179,7 @@ static void page_pool_producer_unlock(struct page_pool *pool,
>   static int page_pool_init(struct page_pool *pool,
>   			  const struct page_pool_params *params)
>   {
> +	struct netdev_dmabuf_binding *binding = NULL;
>   	unsigned int ring_qsize = 1024; /* Default */
>   	int err;
>   
> @@ -237,6 +242,14 @@ static int page_pool_init(struct page_pool *pool,
>   	/* Driver calling page_pool_create() also call page_pool_destroy() */
>   	refcount_set(&pool->user_cnt, 1);
>   
> +	if (pool->p.queue)
> +		binding = READ_ONCE(pool->p.queue->binding);
> +
> +	if (binding) {
> +		pool->mp_ops = &dmabuf_devmem_ops;
> +		pool->mp_priv = binding;
> +	}

Hmm, I don't understand why would we replace a nice transparent
api with page pool relying on a queue having devmem specific
pointer? It seemed more flexible and cleaner in the last RFC.

> +
>   	if (pool->mp_ops) {
>   		err = pool->mp_ops->init(pool);
>   		if (err) {
> @@ -1020,3 +1033,77 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
>   	}
>   }
>   EXPORT_SYMBOL(page_pool_update_nid);
> +
> +void __page_pool_iov_free(struct page_pool_iov *ppiov)
> +{
> +	if (WARN_ON(ppiov->pp->mp_ops != &dmabuf_devmem_ops))
> +		return;
> +
> +	netdev_free_dmabuf(ppiov);
> +}
> +EXPORT_SYMBOL_GPL(__page_pool_iov_free);

I didn't look too deep but I don't think I immediately follow
the pp refcounting. It increments pages_state_hold_cnt on
allocation, but IIUC doesn't mark skbs for recycle? Then, they all
will be put down via page_pool_iov_put_many() bypassing
page_pool_return_page() and friends. That will call
netdev_free_dmabuf(), which doesn't bump pages_state_release_cnt.

At least I couldn't make it work with io_uring, and for my purposes,
I forced all puts to go through page_pool_return_page(), which calls
the ->release_page callback. The callback will put the reference and
ask its page pool to account release_cnt. It also gets rid of
__page_pool_iov_free(), as we'd need to add a hook there for
customization otherwise.

I didn't care about overhead because the hot path for me is getting
buffers from a ring, which is somewhat analogous to sock_devmem_dontneed(),
but done on pp allocations under napi, and it's done separately.

Completely untested with TCP devmem:

https://github.com/isilence/linux/commit/14bd56605183dc80b540999e8058c79ac92ae2d8

> +
> +/*** "Dmabuf devmem memory provider" ***/
> +
> +static int mp_dmabuf_devmem_init(struct page_pool *pool)
> +{
> +	struct netdev_dmabuf_binding *binding = pool->mp_priv;
> +
> +	if (!binding)
> +		return -EINVAL;
> +
> +	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
> +		return -EOPNOTSUPP;
> +
> +	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> +		return -EOPNOTSUPP;
> +
> +	netdev_dmabuf_binding_get(binding);
> +	return 0;
> +}
> +
> +static struct page *mp_dmabuf_devmem_alloc_pages(struct page_pool *pool,
> +						 gfp_t gfp)
> +{
> +	struct netdev_dmabuf_binding *binding = pool->mp_priv;
> +	struct page_pool_iov *ppiov;
> +
> +	ppiov = netdev_alloc_dmabuf(binding);
> +	if (!ppiov)
> +		return NULL;
> +
> +	ppiov->pp = pool;
> +	pool->pages_state_hold_cnt++;
> +	trace_page_pool_state_hold(pool, (struct page *)ppiov,
> +				   pool->pages_state_hold_cnt);
> +	return (struct page *)((unsigned long)ppiov | PP_IOV);
> +}
> +
> +static void mp_dmabuf_devmem_destroy(struct page_pool *pool)
> +{
> +	struct netdev_dmabuf_binding *binding = pool->mp_priv;
> +
> +	netdev_dmabuf_binding_put(binding);
> +}
> +
> +static bool mp_dmabuf_devmem_release_page(struct page_pool *pool,
> +					  struct page *page)
> +{
> +	struct page_pool_iov *ppiov;
> +
> +	if (WARN_ON_ONCE(!page_is_page_pool_iov(page)))
> +		return false;
> +
> +	ppiov = page_to_page_pool_iov(page);
> +	page_pool_iov_put_many(ppiov, 1);
> +	/* We don't want the page pool put_page()ing our page_pool_iovs. */
> +	return false;
> +}
> +
> +const struct memory_provider_ops dmabuf_devmem_ops = {
> +	.init			= mp_dmabuf_devmem_init,
> +	.destroy		= mp_dmabuf_devmem_destroy,
> +	.alloc_pages		= mp_dmabuf_devmem_alloc_pages,
> +	.release_page		= mp_dmabuf_devmem_release_page,
> +};
> +EXPORT_SYMBOL(dmabuf_devmem_ops);

-- 
Pavel Begunkov

WARNING: multiple messages have this Message-ID (diff)
From: Pavel Begunkov <asml.silence@gmail.com>
To: Mina Almasry <almasrymina@google.com>,
	Shailend Chand <shailend@google.com>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-kselftest@vger.kernel.org, bpf@vger.kernel.org,
	linux-media@vger.kernel.org, dri-devel@lists.freedesktop.org
Cc: "Willem de Bruijn" <willemdebruijn.kernel@gmail.com>,
	"Kaiyuan Zhang" <kaiyuanz@google.com>,
	"Jeroen de Borst" <jeroendb@google.com>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"David Ahern" <dsahern@kernel.org>,
	"Ilias Apalodimas" <ilias.apalodimas@linaro.org>,
	"Yunsheng Lin" <linyunsheng@huawei.com>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Eric Dumazet" <edumazet@google.com>,
	"Shakeel Butt" <shakeelb@google.com>,
	"Willem de Bruijn" <willemb@google.com>,
	"Harshitha Ramamurthy" <hramamurthy@google.com>,
	"Praveen Kaligineedi" <pkaligineedi@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Christian König" <christian.koenig@amd.com>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Shuah Khan" <shuah@kernel.org>,
	"David S. Miller" <davem@davemloft.net>
Subject: Re: [net-next v1 08/16] memory-provider: dmabuf devmem memory provider
Date: Fri, 8 Dec 2023 22:48:39 +0000	[thread overview]
Message-ID: <b07a4eca-0c3d-4620-9f97-b1d2c76642c2@gmail.com> (raw)
In-Reply-To: <20231208005250.2910004-9-almasrymina@google.com>

On 12/8/23 00:52, Mina Almasry wrote:
> Implement a memory provider that allocates dmabuf devmem page_pool_iovs.
> 
> The provider receives a reference to the struct netdev_dmabuf_binding
> via the pool->mp_priv pointer. The driver needs to set this pointer for
> the provider in the page_pool_params.
> 
> The provider obtains a reference on the netdev_dmabuf_binding which
> guarantees the binding and the underlying mapping remains alive until
> the provider is destroyed.
> 
> Usage of PP_FLAG_DMA_MAP is required for this memory provide such that
> the page_pool can provide the driver with the dma-addrs of the devmem.
> 
> Support for PP_FLAG_DMA_SYNC_DEV is omitted for simplicity.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
> Signed-off-by: Mina Almasry <almasrymina@google.com>
> 
> ---
> 
> v1:
> - static_branch check in page_is_page_pool_iov() (Willem & Paolo).
> - PP_DEVMEM -> PP_IOV (David).
> - Require PP_FLAG_DMA_MAP (Jakub).
> 
> ---
>   include/net/page_pool/helpers.h | 47 +++++++++++++++++
>   include/net/page_pool/types.h   |  9 ++++
>   net/core/page_pool.c            | 89 ++++++++++++++++++++++++++++++++-
>   3 files changed, 144 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
> index 8bfc2d43efd4..00197f14aa87 100644
> --- a/include/net/page_pool/helpers.h
> +++ b/include/net/page_pool/helpers.h
> @@ -53,6 +53,8 @@
>   #define _NET_PAGE_POOL_HELPERS_H
>   
>   #include <net/page_pool/types.h>
> +#include <net/net_debug.h>
> +#include <net/devmem.h>
>   
>   #ifdef CONFIG_PAGE_POOL_STATS
>   /* Deprecated driver-facing API, use netlink instead */
> @@ -92,6 +94,11 @@ static inline unsigned int page_pool_iov_idx(const struct page_pool_iov *ppiov)
>   	return ppiov - page_pool_iov_owner(ppiov)->ppiovs;
>   }
>   
> +static inline u32 page_pool_iov_binding_id(const struct page_pool_iov *ppiov)
> +{
> +	return page_pool_iov_owner(ppiov)->binding->id;
> +}
> +
>   static inline dma_addr_t
>   page_pool_iov_dma_addr(const struct page_pool_iov *ppiov)
>   {
> @@ -107,6 +114,46 @@ page_pool_iov_binding(const struct page_pool_iov *ppiov)
>   	return page_pool_iov_owner(ppiov)->binding;
>   }
>   
> +static inline int page_pool_iov_refcount(const struct page_pool_iov *ppiov)
> +{
> +	return refcount_read(&ppiov->refcount);
> +}
> +
> +static inline void page_pool_iov_get_many(struct page_pool_iov *ppiov,
> +					  unsigned int count)
> +{
> +	refcount_add(count, &ppiov->refcount);
> +}
> +
> +void __page_pool_iov_free(struct page_pool_iov *ppiov);
> +
> +static inline void page_pool_iov_put_many(struct page_pool_iov *ppiov,
> +					  unsigned int count)
> +{
> +	if (!refcount_sub_and_test(count, &ppiov->refcount))
> +		return;
> +
> +	__page_pool_iov_free(ppiov);
> +}
> +
> +/* page pool mm helpers */
> +
> +DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +static inline bool page_is_page_pool_iov(const struct page *page)
> +{
> +	return static_branch_unlikely(&page_pool_mem_providers) &&
> +	       (unsigned long)page & PP_IOV;
> +}
> +
> +static inline struct page_pool_iov *page_to_page_pool_iov(struct page *page)
> +{
> +	if (page_is_page_pool_iov(page))
> +		return (struct page_pool_iov *)((unsigned long)page & ~PP_IOV);
> +
> +	DEBUG_NET_WARN_ON_ONCE(true);
> +	return NULL;
> +}
> +
>   /**
>    * page_pool_dev_alloc_pages() - allocate a page.
>    * @pool:	pool from which to allocate
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 44faee7a7b02..136930a238de 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -134,8 +134,15 @@ struct memory_provider_ops {
>   	bool (*release_page)(struct page_pool *pool, struct page *page);
>   };
>   
> +extern const struct memory_provider_ops dmabuf_devmem_ops;
> +
>   /* page_pool_iov support */
>   
> +/*  We overload the LSB of the struct page pointer to indicate whether it's
> + *  a page or page_pool_iov.
> + */
> +#define PP_IOV 0x01UL
> +
>   /* Owner of the dma-buf chunks inserted into the gen pool. Each scatterlist
>    * entry from the dmabuf is inserted into the genpool as a chunk, and needs
>    * this owner struct to keep track of some metadata necessary to create
> @@ -159,6 +166,8 @@ struct page_pool_iov {
>   	struct dmabuf_genpool_chunk_owner *owner;
>   
>   	refcount_t refcount;
> +
> +	struct page_pool *pp;
>   };
>   
>   struct page_pool {
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index f5c84d2a4510..423c88564a00 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -12,6 +12,7 @@
>   
>   #include <net/page_pool/helpers.h>
>   #include <net/xdp.h>
> +#include <net/netdev_rx_queue.h>
>   
>   #include <linux/dma-direction.h>
>   #include <linux/dma-mapping.h>
> @@ -20,12 +21,15 @@
>   #include <linux/poison.h>
>   #include <linux/ethtool.h>
>   #include <linux/netdevice.h>
> +#include <linux/genalloc.h>
> +#include <net/devmem.h>
>   
>   #include <trace/events/page_pool.h>
>   
>   #include "page_pool_priv.h"
>   
> -static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +EXPORT_SYMBOL(page_pool_mem_providers);
>   
>   #define DEFER_TIME (msecs_to_jiffies(1000))
>   #define DEFER_WARN_INTERVAL (60 * HZ)
> @@ -175,6 +179,7 @@ static void page_pool_producer_unlock(struct page_pool *pool,
>   static int page_pool_init(struct page_pool *pool,
>   			  const struct page_pool_params *params)
>   {
> +	struct netdev_dmabuf_binding *binding = NULL;
>   	unsigned int ring_qsize = 1024; /* Default */
>   	int err;
>   
> @@ -237,6 +242,14 @@ static int page_pool_init(struct page_pool *pool,
>   	/* Driver calling page_pool_create() also call page_pool_destroy() */
>   	refcount_set(&pool->user_cnt, 1);
>   
> +	if (pool->p.queue)
> +		binding = READ_ONCE(pool->p.queue->binding);
> +
> +	if (binding) {
> +		pool->mp_ops = &dmabuf_devmem_ops;
> +		pool->mp_priv = binding;
> +	}

Hmm, I don't understand why would we replace a nice transparent
api with page pool relying on a queue having devmem specific
pointer? It seemed more flexible and cleaner in the last RFC.

> +
>   	if (pool->mp_ops) {
>   		err = pool->mp_ops->init(pool);
>   		if (err) {
> @@ -1020,3 +1033,77 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
>   	}
>   }
>   EXPORT_SYMBOL(page_pool_update_nid);
> +
> +void __page_pool_iov_free(struct page_pool_iov *ppiov)
> +{
> +	if (WARN_ON(ppiov->pp->mp_ops != &dmabuf_devmem_ops))
> +		return;
> +
> +	netdev_free_dmabuf(ppiov);
> +}
> +EXPORT_SYMBOL_GPL(__page_pool_iov_free);

I didn't look too deep but I don't think I immediately follow
the pp refcounting. It increments pages_state_hold_cnt on
allocation, but IIUC doesn't mark skbs for recycle? Then, they all
will be put down via page_pool_iov_put_many() bypassing
page_pool_return_page() and friends. That will call
netdev_free_dmabuf(), which doesn't bump pages_state_release_cnt.

At least I couldn't make it work with io_uring, and for my purposes,
I forced all puts to go through page_pool_return_page(), which calls
the ->release_page callback. The callback will put the reference and
ask its page pool to account release_cnt. It also gets rid of
__page_pool_iov_free(), as we'd need to add a hook there for
customization otherwise.

I didn't care about overhead because the hot path for me is getting
buffers from a ring, which is somewhat analogous to sock_devmem_dontneed(),
but done on pp allocations under napi, and it's done separately.

Completely untested with TCP devmem:

https://github.com/isilence/linux/commit/14bd56605183dc80b540999e8058c79ac92ae2d8

> +
> +/*** "Dmabuf devmem memory provider" ***/
> +
> +static int mp_dmabuf_devmem_init(struct page_pool *pool)
> +{
> +	struct netdev_dmabuf_binding *binding = pool->mp_priv;
> +
> +	if (!binding)
> +		return -EINVAL;
> +
> +	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
> +		return -EOPNOTSUPP;
> +
> +	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> +		return -EOPNOTSUPP;
> +
> +	netdev_dmabuf_binding_get(binding);
> +	return 0;
> +}
> +
> +static struct page *mp_dmabuf_devmem_alloc_pages(struct page_pool *pool,
> +						 gfp_t gfp)
> +{
> +	struct netdev_dmabuf_binding *binding = pool->mp_priv;
> +	struct page_pool_iov *ppiov;
> +
> +	ppiov = netdev_alloc_dmabuf(binding);
> +	if (!ppiov)
> +		return NULL;
> +
> +	ppiov->pp = pool;
> +	pool->pages_state_hold_cnt++;
> +	trace_page_pool_state_hold(pool, (struct page *)ppiov,
> +				   pool->pages_state_hold_cnt);
> +	return (struct page *)((unsigned long)ppiov | PP_IOV);
> +}
> +
> +static void mp_dmabuf_devmem_destroy(struct page_pool *pool)
> +{
> +	struct netdev_dmabuf_binding *binding = pool->mp_priv;
> +
> +	netdev_dmabuf_binding_put(binding);
> +}
> +
> +static bool mp_dmabuf_devmem_release_page(struct page_pool *pool,
> +					  struct page *page)
> +{
> +	struct page_pool_iov *ppiov;
> +
> +	if (WARN_ON_ONCE(!page_is_page_pool_iov(page)))
> +		return false;
> +
> +	ppiov = page_to_page_pool_iov(page);
> +	page_pool_iov_put_many(ppiov, 1);
> +	/* We don't want the page pool put_page()ing our page_pool_iovs. */
> +	return false;
> +}
> +
> +const struct memory_provider_ops dmabuf_devmem_ops = {
> +	.init			= mp_dmabuf_devmem_init,
> +	.destroy		= mp_dmabuf_devmem_destroy,
> +	.alloc_pages		= mp_dmabuf_devmem_alloc_pages,
> +	.release_page		= mp_dmabuf_devmem_release_page,
> +};
> +EXPORT_SYMBOL(dmabuf_devmem_ops);

-- 
Pavel Begunkov

  reply	other threads:[~2023-12-08 22:56 UTC|newest]

Thread overview: 145+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-08  0:52 [net-next v1 00/16] Device Memory TCP Mina Almasry
2023-12-08  0:52 ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 01/16] net: page_pool: factor out releasing DMA from releasing the page Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-10  3:49   ` Shakeel Butt
2023-12-10  3:49     ` Shakeel Butt
2023-12-12  8:11   ` Ilias Apalodimas
2023-12-12  8:11     ` Ilias Apalodimas
2023-12-08  0:52 ` [net-next v1 02/16] net: page_pool: create hooks for custom page providers Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-12  8:07   ` Ilias Apalodimas
2023-12-12  8:07     ` Ilias Apalodimas
2023-12-12 14:47     ` Mina Almasry
2023-12-12 14:47       ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 03/16] queue_api: define queue api Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-14  1:15   ` Jakub Kicinski
2023-12-14  1:15     ` Jakub Kicinski
2023-12-08  0:52 ` [net-next v1 04/16] gve: implement " Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2024-03-05 11:45   ` Arnd Bergmann
2023-12-08  0:52 ` [net-next v1 05/16] net: netdev netlink api to bind dma-buf to a net device Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-14  1:17   ` Jakub Kicinski
2023-12-14  1:17     ` Jakub Kicinski
2023-12-08  0:52 ` [net-next v1 06/16] netdev: support binding dma-buf to netdevice Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08 15:40   ` kernel test robot
2023-12-08 15:40     ` kernel test robot
2023-12-08 16:02   ` kernel test robot
2023-12-08 16:02     ` kernel test robot
2023-12-08 17:48   ` David Ahern
2023-12-08 17:48     ` David Ahern
2023-12-08 19:22     ` Mina Almasry
2023-12-08 19:22       ` Mina Almasry
2023-12-08 20:32       ` Mina Almasry
2023-12-08 20:32         ` Mina Almasry
2023-12-09 23:29       ` David Ahern
2023-12-09 23:29         ` David Ahern
2023-12-11  2:19         ` Mina Almasry
2023-12-11  2:19           ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 07/16] netdev: netdevice devmem allocator Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08 17:56   ` David Ahern
2023-12-08 17:56     ` David Ahern
2023-12-08 19:27     ` Mina Almasry
2023-12-08 19:27       ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 08/16] memory-provider: dmabuf devmem memory provider Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08 22:48   ` Pavel Begunkov [this message]
2023-12-08 22:48     ` Pavel Begunkov
2023-12-08 23:25     ` Mina Almasry
2023-12-08 23:25       ` Mina Almasry
2023-12-10  3:03       ` Pavel Begunkov
2023-12-10  3:03         ` Pavel Begunkov
2023-12-11  2:30         ` Mina Almasry
2023-12-11  2:30           ` Mina Almasry
2023-12-11 20:35           ` Pavel Begunkov
2023-12-11 20:35             ` Pavel Begunkov
2023-12-14 20:03             ` Mina Almasry
2023-12-14 20:03               ` Mina Almasry
2023-12-19 23:55               ` Pavel Begunkov
2023-12-19 23:55                 ` Pavel Begunkov
2023-12-08 23:05   ` Pavel Begunkov
2023-12-08 23:05     ` Pavel Begunkov
2023-12-12 12:25   ` Jason Gunthorpe
2023-12-12 12:25     ` Jason Gunthorpe
2023-12-12 13:07     ` Christoph Hellwig
2023-12-12 14:26     ` Mina Almasry
2023-12-12 14:26       ` Mina Almasry
2023-12-12 14:39       ` Jason Gunthorpe
2023-12-12 14:39         ` Jason Gunthorpe
2023-12-12 14:58         ` Mina Almasry
2023-12-12 14:58           ` Mina Almasry
2023-12-12 15:08           ` Jason Gunthorpe
2023-12-12 15:08             ` Jason Gunthorpe
2023-12-13  1:09             ` Mina Almasry
2023-12-13  1:09               ` Mina Almasry
2023-12-13  2:19               ` David Ahern
2023-12-13  2:19                 ` David Ahern
2023-12-13  7:49   ` Yinjun Zhang
2023-12-13  7:49     ` Yinjun Zhang
2023-12-08  0:52 ` [net-next v1 09/16] page_pool: device memory support Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08  9:30   ` Yunsheng Lin
2023-12-08  9:30     ` Yunsheng Lin
2023-12-08 16:05     ` Mina Almasry
2023-12-08 16:05       ` Mina Almasry
2023-12-11  2:04       ` Yunsheng Lin
2023-12-11  2:04         ` Yunsheng Lin
2023-12-11  2:26         ` Mina Almasry
2023-12-11  2:26           ` Mina Almasry
2023-12-11  4:04           ` Mina Almasry
2023-12-11  4:04             ` Mina Almasry
2023-12-11 11:51             ` Yunsheng Lin
2023-12-11 11:51               ` Yunsheng Lin
2023-12-11 18:14               ` Mina Almasry
2023-12-11 18:14                 ` Mina Almasry
2023-12-12 11:17                 ` Yunsheng Lin
2023-12-12 11:17                   ` Yunsheng Lin
2023-12-12 14:28                   ` Mina Almasry
2023-12-12 14:28                     ` Mina Almasry
2023-12-13 11:48                     ` Yunsheng Lin
2023-12-13 11:48                       ` Yunsheng Lin
2023-12-13  7:52             ` Mina Almasry
2023-12-13  7:52               ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 10/16] page_pool: don't release iov on elevanted refcount Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 11/16] net: support non paged skb frags Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 12/16] net: add support for skbs with unreadable frags Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 13/16] tcp: RX path for devmem TCP Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08 15:40   ` kernel test robot
2023-12-08 15:40     ` kernel test robot
2023-12-08 17:55   ` David Ahern
2023-12-08 17:55     ` David Ahern
2023-12-08 19:23     ` Mina Almasry
2023-12-08 19:23       ` Mina Almasry
2023-12-08  0:52 ` [net-next v1 14/16] net: add SO_DEVMEM_DONTNEED setsockopt to release RX frags Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-12 19:08   ` Simon Horman
2023-12-12 19:08     ` Simon Horman
2023-12-08  0:52 ` [net-next v1 15/16] net: add devmem TCP documentation Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-12 19:14   ` Simon Horman
2023-12-12 19:14     ` Simon Horman
2023-12-08  0:52 ` [net-next v1 16/16] selftests: add ncdevmem, netcat for devmem TCP Mina Almasry
2023-12-08  0:52   ` Mina Almasry
2023-12-08  1:47 ` [net-next v1 00/16] Device Memory TCP Mina Almasry
2023-12-08  1:47   ` Mina Almasry
2023-12-08 17:57 ` David Ahern
2023-12-08 17:57   ` David Ahern
2023-12-08 19:31   ` Mina Almasry
2023-12-08 19:31     ` Mina Almasry
2023-12-10  3:48 ` Shakeel Butt
2023-12-10  3:48   ` Shakeel Butt
2023-12-12  5:58 ` Christoph Hellwig
2023-12-14  6:20 ` patchwork-bot+netdevbpf
2023-12-14  6:20   ` patchwork-bot+netdevbpf
2023-12-14  6:48   ` Christoph Hellwig
2023-12-14  6:51     ` Mina Almasry
2023-12-14  6:51       ` Mina Almasry
2023-12-14  6:59       ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b07a4eca-0c3d-4620-9f97-b1d2c76642c2@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=almasrymina@google.com \
    --cc=arnd@arndb.de \
    --cc=bpf@vger.kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=hramamurthy@google.com \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jeroendb@google.com \
    --cc=kaiyuanz@google.com \
    --cc=kuba@kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=linyunsheng@huawei.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pkaligineedi@google.com \
    --cc=shailend@google.com \
    --cc=shakeelb@google.com \
    --cc=shuah@kernel.org \
    --cc=sumit.semwal@linaro.org \
    --cc=willemb@google.com \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.