* [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill
@ 2018-11-15 9:43 Jason Wang
2018-11-15 9:43 ` [PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch Jason Wang
2018-11-17 20:01 ` [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill David Miller
0 siblings, 2 replies; 4+ messages in thread
From: Jason Wang @ 2018-11-15 9:43 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev, linux-kernel
We do a get_page() which involves a atomic operation. This patch tries
to mitigate a per packet atomic operation by maintaining a reference
bias which is initially USHRT_MAX. Each time a page is got, instead of
calling get_page() we decrease the bias and when we find it's time to
use a new page we will decrease the bias at one time through
__page_cache_drain_cache().
Testpmd(virtio_user + vhost_net) + XDP_DROP on TAP shows about 1.6%
improvement.
Before: 4.63Mpps
After: 4.71Mpps
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/net.c | 54 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 51 insertions(+), 3 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ab11b2bee273..d919284f103b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -141,6 +141,10 @@ struct vhost_net {
unsigned tx_zcopy_err;
/* Flush in progress. Protected by tx vq lock. */
bool tx_flush;
+ /* Private page frag */
+ struct page_frag page_frag;
+ /* Refcount bias of page frag */
+ int refcnt_bias;
};
static unsigned vhost_net_zcopy_mask __read_mostly;
@@ -637,14 +641,53 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
!vhost_vq_avail_empty(vq->dev, vq);
}
+#define SKB_FRAG_PAGE_ORDER get_order(32768)
+
+static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
+ struct page_frag *pfrag, gfp_t gfp)
+{
+ if (pfrag->page) {
+ if (pfrag->offset + sz <= pfrag->size)
+ return true;
+ __page_frag_cache_drain(pfrag->page, net->refcnt_bias);
+ }
+
+ pfrag->offset = 0;
+ net->refcnt_bias = 0;
+ if (SKB_FRAG_PAGE_ORDER) {
+ /* Avoid direct reclaim but allow kswapd to wake */
+ pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_COMP | __GFP_NOWARN |
+ __GFP_NORETRY,
+ SKB_FRAG_PAGE_ORDER);
+ if (likely(pfrag->page)) {
+ pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
+ goto done;
+ }
+ }
+ pfrag->page = alloc_page(gfp);
+ if (likely(pfrag->page)) {
+ pfrag->size = PAGE_SIZE;
+ goto done;
+ }
+ return false;
+
+done:
+ net->refcnt_bias = USHRT_MAX;
+ page_ref_add(pfrag->page, USHRT_MAX - 1);
+ return true;
+}
+
#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct iov_iter *from)
{
struct vhost_virtqueue *vq = &nvq->vq;
+ struct vhost_net *net = container_of(vq->dev, struct vhost_net,
+ dev);
struct socket *sock = vq->private_data;
- struct page_frag *alloc_frag = ¤t->task_frag;
+ struct page_frag *alloc_frag = &net->page_frag;
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
struct tun_xdp_hdr *hdr;
@@ -665,7 +708,8 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
buflen += SKB_DATA_ALIGN(len + pad);
alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
- if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+ if (unlikely(!vhost_net_page_frag_refill(net, buflen,
+ alloc_frag, GFP_KERNEL)))
return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
@@ -703,7 +747,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
xdp->data_end = xdp->data + len;
hdr->buflen = buflen;
- get_page(alloc_frag->page);
+ --net->refcnt_bias;
alloc_frag->offset += buflen;
++nvq->batched_xdp;
@@ -1292,6 +1336,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
f->private_data = n;
+ n->page_frag.page = NULL;
+ n->refcnt_bias = 0;
return 0;
}
@@ -1366,6 +1412,8 @@ static int vhost_net_release(struct inode *inode, struct file *f)
kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue);
kfree(n->vqs[VHOST_NET_VQ_TX].xdp);
kfree(n->dev.vqs);
+ if (n->page_frag.page)
+ __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias);
kvfree(n);
return 0;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch
2018-11-15 9:43 [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill Jason Wang
@ 2018-11-15 9:43 ` Jason Wang
2018-11-17 20:01 ` David Miller
2018-11-17 20:01 ` [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill David Miller
1 sibling, 1 reply; 4+ messages in thread
From: Jason Wang @ 2018-11-15 9:43 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev, linux-kernel
Thanks to the batched XDP buffs through msg_control. Instead of
calling put_page() for each page which involves a atomic operation,
let's batch them by record the last page that needs to be freed and
its refcnt count and free them in a batch.
Testpmd(virtio-user + vhost_net) + XDP_DROP shows 3.8% improvement.
Before: 4.71Mpps
After : 4.89Mpps
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/tun.c | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a65779c6d72f..e90a7923a5f6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -188,6 +188,11 @@ struct tun_file {
struct xdp_rxq_info xdp_rxq;
};
+struct tun_page {
+ struct page *page;
+ int count;
+};
+
struct tun_flow_entry {
struct hlist_node hash_link;
struct rcu_head rcu;
@@ -2377,9 +2382,16 @@ static void tun_sock_write_space(struct sock *sk)
kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
}
+static void tun_put_page(struct tun_page *tpage)
+{
+ if (tpage->page)
+ __page_frag_cache_drain(tpage->page, tpage->count);
+}
+
static int tun_xdp_one(struct tun_struct *tun,
struct tun_file *tfile,
- struct xdp_buff *xdp, int *flush)
+ struct xdp_buff *xdp, int *flush,
+ struct tun_page *tpage)
{
struct tun_xdp_hdr *hdr = xdp->data_hard_start;
struct virtio_net_hdr *gso = &hdr->gso;
@@ -2390,6 +2402,7 @@ static int tun_xdp_one(struct tun_struct *tun,
int buflen = hdr->buflen;
int err = 0;
bool skb_xdp = false;
+ struct page *page;
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
@@ -2416,7 +2429,14 @@ static int tun_xdp_one(struct tun_struct *tun,
case XDP_PASS:
break;
default:
- put_page(virt_to_head_page(xdp->data));
+ page = virt_to_head_page(xdp->data);
+ if (tpage->page == page) {
+ ++tpage->count;
+ } else {
+ tun_put_page(tpage);
+ tpage->page = page;
+ tpage->count = 1;
+ }
return 0;
}
}
@@ -2480,6 +2500,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return -EBADFD;
if (ctl && (ctl->type == TUN_MSG_PTR)) {
+ struct tun_page tpage = {0};
int n = ctl->num;
int flush = 0;
@@ -2488,7 +2509,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
- tun_xdp_one(tun, tfile, xdp, &flush);
+ tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
}
if (flush)
@@ -2497,6 +2518,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
rcu_read_unlock();
local_bh_enable();
+ tun_put_page(&tpage);
+
ret = total_len;
goto out;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill
2018-11-15 9:43 [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill Jason Wang
2018-11-15 9:43 ` [PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch Jason Wang
@ 2018-11-17 20:01 ` David Miller
1 sibling, 0 replies; 4+ messages in thread
From: David Miller @ 2018-11-17 20:01 UTC (permalink / raw)
To: jasowang; +Cc: mst, kvm, virtualization, netdev, linux-kernel
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 15 Nov 2018 17:43:09 +0800
> We do a get_page() which involves a atomic operation. This patch tries
> to mitigate a per packet atomic operation by maintaining a reference
> bias which is initially USHRT_MAX. Each time a page is got, instead of
> calling get_page() we decrease the bias and when we find it's time to
> use a new page we will decrease the bias at one time through
> __page_cache_drain_cache().
>
> Testpmd(virtio_user + vhost_net) + XDP_DROP on TAP shows about 1.6%
> improvement.
>
> Before: 4.63Mpps
> After: 4.71Mpps
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
Applied.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch
2018-11-15 9:43 ` [PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch Jason Wang
@ 2018-11-17 20:01 ` David Miller
0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2018-11-17 20:01 UTC (permalink / raw)
To: jasowang; +Cc: mst, kvm, virtualization, netdev, linux-kernel
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 15 Nov 2018 17:43:10 +0800
> Thanks to the batched XDP buffs through msg_control. Instead of
> calling put_page() for each page which involves a atomic operation,
> let's batch them by record the last page that needs to be freed and
> its refcnt count and free them in a batch.
>
> Testpmd(virtio-user + vhost_net) + XDP_DROP shows 3.8% improvement.
>
> Before: 4.71Mpps
> After : 4.89Mpps
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
Applied.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2018-11-17 20:01 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-15 9:43 [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill Jason Wang
2018-11-15 9:43 ` [PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch Jason Wang
2018-11-17 20:01 ` David Miller
2018-11-17 20:01 ` [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).