From patchwork Wed May 10 02:37:53 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786369 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752193AbdEJCkk (ORCPT ); Tue, 9 May 2017 22:40:40 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55684 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751569AbdEJCiO (ORCPT ); Tue, 9 May 2017 22:38:14 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 720EB3B71B Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 720EB3B71B From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 1/9] ptr_ring: add ptr_ring_unconsume Date: Wed, 10 May 2017 10:37:53 +0800 Message-Id: <1494383881-6811-2-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Wed, 10 May 2017 02:38:14 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 2519 Lines: 84 From: "Michael S. Tsirkin" Applications that consume a batch of entries in one go can benefit from ability to return some of them back into the ring. Add an API for that - assuming there's space. If there's no space naturally can't do this and have to drop entries, but this implies ring is full so we'd likely drop some anyway. Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- include/linux/ptr_ring.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6c70444..5476f68 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -359,6 +359,61 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } +/* + * Return entries into ring. Destroy entries that don't fit. + * + * Note: this is expected to be a rare slow path operation. + * + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, + void (*destroy)(void *)) +{ + unsigned long flags; + int head; + + spin_lock_irqsave(&r->consumer_lock, flags); + spin_lock(&r->producer_lock); + + if (!r->size) + goto done; + + /* + * Clean out buffered entries (for simplicity). This way following code + * can test entries for NULL and if not assume they are valid. + */ + head = r->consumer_head - 1; + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + + /* + * Go over entries in batch, start moving head back and copy entries. + * Stop when we run into previously unconsumed entries. + */ + while (n) { + head = r->consumer_head - 1; + if (head < 0) + head = r->size - 1; + if (r->queue[head]) { + /* This batch entry will have to be destroyed. */ + goto done; + } + r->queue[head] = batch[--n]; + r->consumer_tail = r->consumer_head = head; + } + +done: + /* Destroy all entries left in the batch. */ + while (n) + destroy(batch[--n]); + spin_unlock(&r->producer_lock); + spin_unlock_irqrestore(&r->consumer_lock, flags); +} + static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, int size, gfp_t gfp, void (*destroy)(void *)) From patchwork Wed May 10 02:37:54 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786361 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751714AbdEJCiV (ORCPT ); Tue, 9 May 2017 22:38:21 -0400 Received: from mx1.redhat.com ([209.132.183.28]:44394 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751638AbdEJCiT (ORCPT ); Tue, 9 May 2017 22:38:19 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 884A24E4C6 Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 884A24E4C6 From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 2/9] skb_array: introduce skb_array_unconsume Date: Wed, 10 May 2017 10:37:54 +0800 Message-Id: <1494383881-6811-3-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.38]); Wed, 10 May 2017 02:38:18 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 725 Lines: 24 Signed-off-by: Jason Wang --- include/linux/skb_array.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index f4dfade..79850b6 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -156,6 +156,12 @@ static void __skb_array_destroy_skb(void *ptr) kfree_skb(ptr); } +static inline void skb_array_unconsume(struct skb_array *a, + struct sk_buff **skbs, int n) +{ + ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb); +} + static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); From patchwork Wed May 10 02:37:55 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786366 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751787AbdEJCi0 (ORCPT ); Tue, 9 May 2017 22:38:26 -0400 Received: from mx1.redhat.com ([209.132.183.28]:60444 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751728AbdEJCiY (ORCPT ); Tue, 9 May 2017 22:38:24 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com A6FE97F4BB Authentication-Results: ext-mx02.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx02.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com A6FE97F4BB From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 3/9] ptr_ring: introduce batch dequeuing Date: Wed, 10 May 2017 10:37:55 +0800 Message-Id: <1494383881-6811-4-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.26]); Wed, 10 May 2017 02:38:23 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 2371 Lines: 94 This patch introduce a batched version of consuming, consumer can dequeue more than one pointers from the ring at a time. We don't care about the reorder of reading here so no need for compiler barrier. Signed-off-by: Jason Wang --- include/linux/ptr_ring.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 5476f68..9db39e6 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -247,6 +247,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) return ptr; } +static inline int __ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + void *ptr; + int i; + + for (i = 0; i < n; i++) { + ptr = __ptr_ring_consume(r); + if (!ptr) + break; + array[i] = ptr; + } + + return i; +} + /* * Note: resize (below) nests producer lock within consumer lock, so if you * call this in interrupt or BH context, you must disable interrupts/BH when @@ -297,6 +313,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) return ptr; } +static inline int ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_irq(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, + void **array, int n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_bh(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + /* Cast to structure type and call a function without discarding from FIFO. * Function must return a value. * Callers must take consumer_lock. From patchwork Wed May 10 02:37:56 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786362 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751868AbdEJCid (ORCPT ); Tue, 9 May 2017 22:38:33 -0400 Received: from mx1.redhat.com ([209.132.183.28]:33384 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751795AbdEJCia (ORCPT ); Tue, 9 May 2017 22:38:30 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com C6E2080470 Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com C6E2080470 From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 4/9] skb_array: introduce batch dequeuing Date: Wed, 10 May 2017 10:37:56 +0800 Message-Id: <1494383881-6811-5-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Wed, 10 May 2017 02:38:29 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 1640 Lines: 58 Signed-off-by: Jason Wang --- include/linux/skb_array.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 79850b6..35226cd 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -97,21 +97,46 @@ static inline struct sk_buff *skb_array_consume(struct skb_array *a) return ptr_ring_consume(&a->ring); } +static inline int skb_array_consume_batched(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched(&a->ring, (void **)array, n); +} + static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a) { return ptr_ring_consume_irq(&a->ring); } +static inline int skb_array_consume_batched_irq(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n); +} + static inline struct sk_buff *skb_array_consume_any(struct skb_array *a) { return ptr_ring_consume_any(&a->ring); } +static inline int skb_array_consume_batched_any(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_any(&a->ring, (void **)array, n); +} + + static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a) { return ptr_ring_consume_bh(&a->ring); } +static inline int skb_array_consume_batched_bh(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n); +} + static inline int __skb_array_len_with_tag(struct sk_buff *skb) { if (likely(skb)) { From patchwork Wed May 10 02:37:57 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786364 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751929AbdEJCii (ORCPT ); Tue, 9 May 2017 22:38:38 -0400 Received: from mx1.redhat.com ([209.132.183.28]:58306 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751795AbdEJCif (ORCPT ); Tue, 9 May 2017 22:38:35 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 715373DE42 Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 715373DE42 From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 5/9] tun: export skb_array Date: Wed, 10 May 2017 10:37:57 +0800 Message-Id: <1494383881-6811-6-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.29]); Wed, 10 May 2017 02:38:35 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 1548 Lines: 57 This patch exports skb_array through tun_get_skb_array(). Caller can then manipulate skb array directly. Signed-off-by: Jason Wang --- drivers/net/tun.c | 13 +++++++++++++ include/linux/if_tun.h | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bbd707b..3cbfc5c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2626,6 +2626,19 @@ struct socket *tun_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tun_get_socket); +struct skb_array *tun_get_skb_array(struct file *file) +{ + struct tun_file *tfile; + + if (file->f_op != &tun_fops) + return ERR_PTR(-EINVAL); + tfile = file->private_data; + if (!tfile) + return ERR_PTR(-EBADFD); + return &tfile->tx_array; +} +EXPORT_SYMBOL_GPL(tun_get_skb_array); + module_init(tun_init); module_exit(tun_cleanup); MODULE_DESCRIPTION(DRV_DESCRIPTION); diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index ed6da2e..bf9bdf4 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -19,6 +19,7 @@ #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); +struct skb_array *tun_get_skb_array(struct file *file); #else #include #include @@ -28,5 +29,9 @@ static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } +static inline struct skb_array *tun_get_skb_array(struct file *f) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ From patchwork Wed May 10 02:37:58 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786363 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751980AbdEJCip (ORCPT ); Tue, 9 May 2017 22:38:45 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55894 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751935AbdEJCik (ORCPT ); Tue, 9 May 2017 22:38:40 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 8E4C42D9FC8 Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 8E4C42D9FC8 From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 6/9] tap: export skb_array Date: Wed, 10 May 2017 10:37:58 +0800 Message-Id: <1494383881-6811-7-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Wed, 10 May 2017 02:38:40 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 1500 Lines: 58 This patch exports skb_array through tap_get_skb_array(). Caller can then manipulate skb array directly. Signed-off-by: Jason Wang --- drivers/net/tap.c | 13 +++++++++++++ include/linux/if_tap.h | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 4d4173d..abdaf86 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1193,6 +1193,19 @@ struct socket *tap_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tap_get_socket); +struct skb_array *tap_get_skb_array(struct file *file) +{ + struct tap_queue *q; + + if (file->f_op != &tap_fops) + return ERR_PTR(-EINVAL); + q = file->private_data; + if (!q) + return ERR_PTR(-EBADFD); + return &q->skb_array; +} +EXPORT_SYMBOL_GPL(tap_get_skb_array); + int tap_queue_resize(struct tap_dev *tap) { struct net_device *dev = tap->dev; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 3482c3c..4837157 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -3,6 +3,7 @@ #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); +struct skb_array *tap_get_skb_array(struct file *file); #else #include #include @@ -12,6 +13,10 @@ static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } +static inline struct skb_array *tap_get_skb_array(struct file *f) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TAP */ #include From patchwork Wed May 10 02:37:59 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786365 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750946AbdEJCiw (ORCPT ); Tue, 9 May 2017 22:38:52 -0400 Received: from mx1.redhat.com ([209.132.183.28]:34596 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751935AbdEJCiu (ORCPT ); Tue, 9 May 2017 22:38:50 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com C8F0880F8E Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com C8F0880F8E From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 7/9] tun: support receiving skb through msg_control Date: Wed, 10 May 2017 10:37:59 +0800 Message-Id: <1494383881-6811-8-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Wed, 10 May 2017 02:38:49 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 1928 Lines: 61 This patch makes tun_recvmsg() can receive from skb from its caller through msg_control. Vhost_net will be the first user. Signed-off-by: Jason Wang --- drivers/net/tun.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3cbfc5c..f8041f9c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1510,9 +1510,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *to, - int noblock) + int noblock, struct sk_buff *skb) { - struct sk_buff *skb; ssize_t ret; int err; @@ -1521,10 +1520,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, if (!iov_iter_count(to)) return 0; - /* Read frames from ring */ - skb = tun_ring_recv(tfile, noblock, &err); - if (!skb) - return err; + if (!skb) { + /* Read frames from ring */ + skb = tun_ring_recv(tfile, noblock, &err); + if (!skb) + return err; + } ret = tun_put_user(tun, tfile, skb, to); if (unlikely(ret < 0)) @@ -1544,7 +1545,7 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!tun) return -EBADFD; - ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK); + ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; @@ -1646,7 +1647,8 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT); + ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, + m->msg_control); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; From patchwork Wed May 10 02:38:00 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786367 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752094AbdEJCi7 (ORCPT ); Tue, 9 May 2017 22:38:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:34628 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751935AbdEJCiy (ORCPT ); Tue, 9 May 2017 22:38:54 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com DC16F80F8E Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com DC16F80F8E From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 8/9] tap: support receiving skb from msg_control Date: Wed, 10 May 2017 10:38:00 +0800 Message-Id: <1494383881-6811-9-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Wed, 10 May 2017 02:38:54 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 1797 Lines: 61 This patch makes tap_recvmsg() can receive from skb from its caller through msg_control. Vhost_net will be the first user. Signed-off-by: Jason Wang --- drivers/net/tap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index abdaf86..9af3239 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -824,15 +824,17 @@ static ssize_t tap_put_user(struct tap_queue *q, static ssize_t tap_do_read(struct tap_queue *q, struct iov_iter *to, - int noblock) + int noblock, struct sk_buff *skb) { DEFINE_WAIT(wait); - struct sk_buff *skb; ssize_t ret = 0; if (!iov_iter_count(to)) return 0; + if (skb) + goto put; + while (1) { if (!noblock) prepare_to_wait(sk_sleep(&q->sk), &wait, @@ -856,6 +858,7 @@ static ssize_t tap_do_read(struct tap_queue *q, if (!noblock) finish_wait(sk_sleep(&q->sk), &wait); +put: if (skb) { ret = tap_put_user(q, skb, to); if (unlikely(ret < 0)) @@ -872,7 +875,7 @@ static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) struct tap_queue *q = file->private_data; ssize_t len = iov_iter_count(to), ret; - ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK); + ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; @@ -1155,7 +1158,8 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m, int ret; if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) return -EINVAL; - ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT); + ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, + m->msg_control); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; From patchwork Wed May 10 02:38:01 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Wang X-Patchwork-Id: 786368 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752151AbdEJCjG (ORCPT ); Tue, 9 May 2017 22:39:06 -0400 Received: from mx1.redhat.com ([209.132.183.28]:56012 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751935AbdEJCjD (ORCPT ); Tue, 9 May 2017 22:39:03 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 46CB83B707 Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jasowang@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 46CB83B707 From: Jason Wang To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com Cc: Jason Wang Subject: [PATCH net-next V3 9/9] vhost_net: try batch dequing from skb array Date: Wed, 10 May 2017 10:38:01 +0800 Message-Id: <1494383881-6811-10-git-send-email-jasowang@redhat.com> In-Reply-To: <1494383881-6811-1-git-send-email-jasowang@redhat.com> References: <1494383881-6811-1-git-send-email-jasowang@redhat.com> X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Wed, 10 May 2017 02:38:58 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6891 Lines: 255 We used to dequeue one skb during recvmsg() from skb_array, this could be inefficient because of the bad cache utilization and spinlock touching for each packet. This patch tries to batch them by calling batch dequeuing helpers explicitly on the exported skb array and pass the skb back through msg_control for underlayer socket to finish the userspace copying. Batch dequeuing is also the requirement for more batching improvement on rx. Tests were done by pktgen on tap with XDP1 in guest on top of batch zeroing: rx batch | pps 256 2.41Mpps (+6.16%) 128 2.48Mpps (+8.80%) 64 2.38Mpps (+3.96%) <- Default 16 2.31Mpps (+1.76%) 4 2.31Mpps (+1.76%) 1 2.30Mpps (+1.32%) 0 2.27Mpps (+7.48%) Signed-off-by: Jason Wang --- drivers/vhost/net.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9b51989..fbaecf3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include @@ -85,6 +87,13 @@ struct vhost_net_ubuf_ref { struct vhost_virtqueue *vq; }; +#define VHOST_RX_BATCH 64 +struct vhost_net_buf { + struct sk_buff *queue[VHOST_RX_BATCH]; + int tail; + int head; +}; + struct vhost_net_virtqueue { struct vhost_virtqueue vq; size_t vhost_hlen; @@ -99,6 +108,8 @@ struct vhost_net_virtqueue { /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; + struct skb_array *rx_array; + struct vhost_net_buf rxq; }; struct vhost_net { @@ -117,6 +128,71 @@ struct vhost_net { static unsigned vhost_net_zcopy_mask __read_mostly; +static void *vhost_net_buf_get_ptr(struct vhost_net_buf *rxq) +{ + if (rxq->tail != rxq->head) + return rxq->queue[rxq->head]; + else + return NULL; +} + +static int vhost_net_buf_get_size(struct vhost_net_buf *rxq) +{ + return rxq->tail - rxq->head; +} + +static int vhost_net_buf_is_empty(struct vhost_net_buf *rxq) +{ + return rxq->tail == rxq->head; +} + +static void *vhost_net_buf_consume(struct vhost_net_buf *rxq) +{ + void *ret = vhost_net_buf_get_ptr(rxq); + ++rxq->head; + return ret; +} + +static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) +{ + struct vhost_net_buf *rxq = &nvq->rxq; + + rxq->head = 0; + rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, + VHOST_RX_BATCH); + return rxq->tail; +} + +static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) +{ + struct vhost_net_buf *rxq = &nvq->rxq; + + if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) { + skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head, + vhost_net_buf_get_size(rxq)); + rxq->head = rxq->tail = 0; + } +} + +static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) +{ + struct vhost_net_buf *rxq = &nvq->rxq; + + if (!vhost_net_buf_is_empty(rxq)) + goto out; + + if (!vhost_net_buf_produce(nvq)) + return 0; + +out: + return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); +} + +static void vhost_net_buf_init(struct vhost_net_buf *rxq) +{ + rxq->head = rxq->tail = 0; +} + static void vhost_net_enable_zcopy(int vq) { vhost_net_zcopy_mask |= 0x1 << vq; @@ -201,6 +277,7 @@ static void vhost_net_vq_reset(struct vhost_net *n) n->vqs[i].ubufs = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; + vhost_net_buf_init(&n->vqs[i].rxq); } } @@ -503,15 +580,14 @@ static void handle_tx(struct vhost_net *net) mutex_unlock(&vq->mutex); } -static int peek_head_len(struct sock *sk) +static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) { - struct socket *sock = sk->sk_socket; struct sk_buff *head; int len = 0; unsigned long flags; - if (sock->ops->peek_len) - return sock->ops->peek_len(sock); + if (rvq->rx_array) + return vhost_net_buf_peek(rvq); spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); @@ -537,10 +613,11 @@ static int sk_has_rx_data(struct sock *sk) static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) { + struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned long uninitialized_var(endtime); - int len = peek_head_len(sk); + int len = peek_head_len(rvq, sk); if (!len && vq->busyloop_timeout) { /* Both tx vq and rx socket were polled here */ @@ -561,7 +638,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) vhost_poll_queue(&vq->poll); mutex_unlock(&vq->mutex); - len = peek_head_len(sk); + len = peek_head_len(rvq, sk); } return len; @@ -699,6 +776,8 @@ static void handle_rx(struct vhost_net *net) /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) goto out; + if (nvq->rx_array) + msg.msg_control = vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); @@ -841,6 +920,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].done_idx = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; + vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); @@ -856,11 +936,14 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct socket *sock; + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); mutex_lock(&vq->mutex); sock = vq->private_data; vhost_net_disable_vq(n, vq); vq->private_data = NULL; + vhost_net_buf_unproduce(nvq); mutex_unlock(&vq->mutex); return sock; } @@ -953,6 +1036,25 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } +static struct skb_array *get_tap_skb_array(int fd) +{ + struct skb_array *array; + struct file *file = fget(fd); + + if (!file) + return NULL; + array = tun_get_skb_array(file); + if (!IS_ERR(array)) + goto out; + array = tap_get_skb_array(file); + if (!IS_ERR(array)) + goto out; + array = NULL; +out: + fput(file); + return array; +} + static struct socket *get_tap_socket(int fd) { struct file *file = fget(fd); @@ -1029,6 +1131,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; + vhost_net_buf_unproduce(nvq); + if (index == VHOST_NET_VQ_RX) + nvq->rx_array = get_tap_skb_array(fd); r = vhost_vq_init_access(vq); if (r) goto err_used;