netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjunz@nvidia.com>
To: yanjunz@nvidia.com, dledford@redhat.com, jgg@ziepe.ca,
	linux-rdma@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH 1/1] RDMA/rxe: Fetch skb packets from ethernet layer
Date: Thu,  5 Nov 2020 19:12:01 +0800	[thread overview]
Message-ID: <1604574721-2505-1-git-send-email-yanjunz@nvidia.com> (raw)

In the original design, in rx, skb packet would pass ethernet
layer and IP layer, eventually reach udp tunnel.

Now rxe fetches the skb packets from the ethernet layer directly.
So this bypasses the IP and UDP layer. As such, the skb packets
are sent to the upper protocals directly from the ethernet layer.

This increases bandwidth and decreases latency.

Signed-off-by: Zhu Yanjun <yanjunz@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_net.c |   45 ++++++++++++++++++++++++++++++++++-
 1 files changed, 44 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 2e490e5..8ea68b6 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -18,6 +18,7 @@
 #include "rxe_loc.h"
 
 static struct rxe_recv_sockets recv_sockets;
+static struct net_device *g_ndev;
 
 struct device *rxe_dma_device(struct rxe_dev *rxe)
 {
@@ -113,7 +114,7 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	tnl_cfg.encap_type = 1;
-	tnl_cfg.encap_rcv = rxe_udp_encap_recv;
+	tnl_cfg.encap_rcv = NULL;
 
 	/* Setup UDP tunnel */
 	setup_udp_tunnel_sock(net, sock, &tnl_cfg);
@@ -357,6 +358,38 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 	return rxe->ndev->name;
 }
 
+static rx_handler_result_t rxe_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct iphdr *iph;
+	struct udphdr *udph;
+
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
+	if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) {
+		kfree(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	if (eth_hdr(skb)->h_proto != cpu_to_be16(ETH_P_IP))
+		return RX_HANDLER_PASS;
+
+	iph = ip_hdr(skb);
+
+	if (iph->protocol != IPPROTO_UDP)
+		return RX_HANDLER_PASS;
+
+	udph = udp_hdr(skb);
+
+	if (udph->dest != cpu_to_be16(ROCE_V2_UDP_DPORT))
+		return RX_HANDLER_PASS;
+
+	rxe_udp_encap_recv(NULL, skb);
+
+	return RX_HANDLER_CONSUMED;
+}
+
 int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
 {
 	int err;
@@ -367,6 +400,7 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
 		return -ENOMEM;
 
 	rxe->ndev = ndev;
+	g_ndev = ndev;
 
 	err = rxe_add(rxe, ndev->mtu, ibdev_name);
 	if (err) {
@@ -374,6 +408,12 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
 		return err;
 	}
 
+	rtnl_lock();
+	err = netdev_rx_handler_register(ndev, rxe_handle_frame, rxe);
+	rtnl_unlock();
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -498,6 +538,9 @@ static int rxe_net_ipv6_init(void)
 
 void rxe_net_exit(void)
 {
+	rtnl_lock();
+	netdev_rx_handler_unregister(g_ndev);
+	rtnl_unlock();
 	rxe_release_udp_tunnel(recv_sockets.sk6);
 	rxe_release_udp_tunnel(recv_sockets.sk4);
 	unregister_netdevice_notifier(&rxe_net_notifier);
-- 
1.7.1


             reply	other threads:[~2020-11-05 11:12 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-05 11:12 Zhu Yanjun [this message]
2020-11-07 20:26 ` [PATCH 1/1] RDMA/rxe: Fetch skb packets from ethernet layer Jakub Kicinski
     [not found]   ` <222b9c1b-9d60-22f3-6097-8abd651cc192@gmail.com>
2020-11-08  5:27     ` Zhu Yanjun
2020-11-09 18:25       ` Jakub Kicinski
2020-11-10  1:58         ` Zhu Yanjun
2020-11-11 11:15           ` Zhu Yanjun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1604574721-2505-1-git-send-email-yanjunz@nvidia.com \
    --to=yanjunz@nvidia.com \
    --cc=dledford@redhat.com \
    --cc=jgg@ziepe.ca \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).