All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Ahern <dsahern@gmail.com>
To: Taehee Yoo <ap420073@gmail.com>,
	davem@davemloft.net, kuba@kernel.org, dsahern@kernel.org,
	netdev@vger.kernel.org
Cc: dkirjanov@suse.de
Subject: Re: [PATCH net-next 2/4 v4] amt: add data plane of amt interface
Date: Wed, 27 Oct 2021 09:00:57 -0600	[thread overview]
Message-ID: <ac69dddd-0346-d663-bacf-bb03f01b0b96@gmail.com> (raw)
In-Reply-To: <20211026151016.25997-3-ap420073@gmail.com>

On 10/26/21 9:10 AM, Taehee Yoo wrote:
> diff --git a/drivers/net/amt.c b/drivers/net/amt.c
> index 8d4782c66cde..c3ac94b6d3e8 100644
> --- a/drivers/net/amt.c
> +++ b/drivers/net/amt.c
> @@ -31,6 +31,1220 @@
>  
>  static struct workqueue_struct *amt_wq;
>  
> +static char *status_str[] = {
> +	"AMT_STATUS_INIT",
> +	"AMT_STATUS_SENT_DISCOVERY",
> +	"AMT_STATUS_RECEIVED_DISCOVERY",
> +	"AMT_STATUS_SENT_ADVERTISEMENT",
> +	"AMT_STATUS_RECEIVED_ADVERTISEMENT",
> +	"AMT_STATUS_SENT_REQUEST",
> +	"AMT_STATUS_RECEIVED_REQUEST",
> +	"AMT_STATUS_SENT_QUERY",
> +	"AMT_STATUS_RECEIVED_QUERY",
> +	"AMT_STATUS_SENT_UPDATE",
> +	"AMT_STATUS_RECEIVED_UPDATE",
> +};
> +
> +static char *type_str[] = {
> +	"AMT_MSG_DISCOVERY",
> +	"AMT_MSG_ADVERTISEMENT",
> +	"AMT_MSG_REQUEST",
> +	"AMT_MSG_MEMBERSHIP_QUERY",
> +	"AMT_MSG_MEMBERSHIP_UPDATE",
> +	"AMT_MSG_MULTICAST_DATA",
> +	"AMT_MSG_TEATDOWM",

TEARDOWN is misspelled.

> +};
> +
> +static struct amt_skb_cb *amt_skb_cb(struct sk_buff *skb)
> +{
> +	return (struct amt_skb_cb *)((void *)skb->cb +
> +		sizeof(struct qdisc_skb_cb));

how will you know of sizeof(struct qdisc_skb_cb) + sizeof(struct
amt_skb_cb) exceeds the cb space? You should add a BUILD_BUG_ON.


> +}
> +
> +static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
> +{
> +	int hlen = LL_RESERVED_SPACE(amt->dev);
> +	int tlen = amt->dev->needed_tailroom;
> +	struct igmpv3_query *ihv3;
> +	void *csum_start = NULL;
> +	__sum16 *csum = NULL;
> +	struct sk_buff *skb;
> +	struct ethhdr *eth;
> +	struct iphdr *iph;
> +	unsigned int len;
> +	int offset;
> +
> +	len = hlen + tlen + sizeof(*iph) + 4 + sizeof(*ihv3);
> +	skb = netdev_alloc_skb_ip_align(amt->dev, len);
> +	if (!skb)
> +		return NULL;
> +
> +	skb_reserve(skb, hlen);
> +	skb_push(skb, sizeof(*eth));
> +	skb->protocol = htons(ETH_P_IP);
> +	skb_reset_mac_header(skb);
> +	skb->priority = TC_PRIO_CONTROL;
> +	skb_put(skb, sizeof(*iph) + 4 + sizeof(*ihv3));
> +	skb_pull(skb, sizeof(*eth));
> +	skb_reset_network_header(skb);
> +
> +	iph		= ip_hdr(skb);
> +	iph->version	= 4;
> +	iph->ihl	= (sizeof(struct iphdr) + 4) >> 2;
> +	iph->tos	= 0xc0;
> +	iph->tot_len	= htons(sizeof(*iph) + 4 + sizeof(*ihv3));
> +	iph->frag_off	= htons(IP_DF);
> +	iph->ttl	= 1;
> +	iph->id		= 0;
> +	iph->protocol	= IPPROTO_IGMP;
> +	iph->daddr	= htonl(INADDR_ALLHOSTS_GROUP);
> +	iph->saddr	= htonl(INADDR_ANY);
> +	((u8 *)&iph[1])[0] = IPOPT_RA;
> +	((u8 *)&iph[1])[1] = 4;
> +	((u8 *)&iph[1])[2] = 0;
> +	((u8 *)&iph[1])[3] = 0;

This reads weird to me. Why not use a local variable like:
#define AMT_IPHDR_OPTS 4
	u8 buf[AMT_IPHDR_OPTS] = (u8 *)(iph + 1);

and then use AMT_IPHDR_OPTS in place of the magic '4' in the skb
manipulations.

> +	ip_send_check(iph);
> +
> +	eth = eth_hdr(skb);
> +	ether_addr_copy(eth->h_source, amt->dev->dev_addr);
> +	ip_eth_mc_map(htonl(INADDR_ALLHOSTS_GROUP), eth->h_dest);
> +	eth->h_proto = htons(ETH_P_IP);
> +
> +	ihv3		= skb_pull(skb, sizeof(*iph) + 4);
> +	skb_reset_transport_header(skb);
> +	ihv3->type	= IGMP_HOST_MEMBERSHIP_QUERY;
> +	ihv3->code	= 1;
> +	ihv3->group	= 0;
> +	ihv3->qqic	= amt->qi;
> +	ihv3->nsrcs	= 0;
> +	ihv3->resv	= 0;
> +	ihv3->suppress	= false;
> +	ihv3->qrv	= amt->net->ipv4.sysctl_igmp_qrv;
> +	ihv3->csum	= 0;
> +	csum		= &ihv3->csum;
> +	csum_start	= (void *)ihv3;
> +	*csum		= ip_compute_csum(csum_start, sizeof(*ihv3));
> +	offset		= skb_transport_offset(skb);
> +	skb->csum	= skb_checksum(skb, offset, skb->len - offset, 0);
> +	skb->ip_summed	= CHECKSUM_NONE;
> +
> +	skb_push(skb, sizeof(*eth) + sizeof(*iph) + 4);
> +
> +	return skb;
> +}
> +



> +static void amt_send_request(struct amt_dev *amt, bool v6)
> +{
> +	struct amt_header_request *amtrh;
> +	int hlen, tlen, offset;
> +	struct socket *sock;
> +	struct udphdr *udph;
> +	struct sk_buff *skb;
> +	struct iphdr *iph;
> +	struct rtable *rt;
> +	struct flowi4 fl4;
> +	u32 len;
> +	int err;
> +
> +	rcu_read_lock();
> +	sock = rcu_dereference(amt->sock);
> +	if (!sock)
> +		goto out;
> +
> +	if (!netif_running(amt->stream_dev) || !netif_running(amt->dev))
> +		goto out;
> +
> +	rt = ip_route_output_ports(amt->net, &fl4, sock->sk,
> +				   amt->remote_ip, amt->local_ip,
> +				   amt->gw_port, amt->relay_port,
> +				   IPPROTO_UDP, 0,
> +				   amt->stream_dev->ifindex);
> +	if (IS_ERR(rt)) {
> +		amt->dev->stats.tx_errors++;
> +		goto out;
> +	}
> +
> +	hlen = LL_RESERVED_SPACE(amt->dev);
> +	tlen = amt->dev->needed_tailroom;
> +	len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh);
> +	skb = netdev_alloc_skb_ip_align(amt->dev, len);
> +	if (!skb) {
> +		ip_rt_put(rt);
> +		amt->dev->stats.tx_errors++;
> +		goto out;
> +	}
> +
> +	skb->priority = TC_PRIO_CONTROL;
> +	skb_dst_set(skb, &rt->dst);
> +
> +	len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh);
> +	skb_reset_network_header(skb);
> +	skb_put(skb, len);
> +	amtrh = skb_pull(skb, sizeof(*iph) + sizeof(*udph));
> +	amtrh->version	 = 0;
> +	amtrh->type	 = AMT_MSG_REQUEST;
> +	amtrh->reserved1 = 0;
> +	amtrh->p	 = v6;
> +	amtrh->reserved2 = 0;
> +	amtrh->nonce	 = amt->nonce;
> +	skb_push(skb, sizeof(*udph));
> +	skb_reset_transport_header(skb);
> +	udph		= udp_hdr(skb);
> +	udph->source	= amt->gw_port;
> +	udph->dest	= amt->relay_port;
> +	udph->len	= htons(sizeof(*amtrh) + sizeof(*udph));
> +	udph->check	= 0;
> +	offset = skb_transport_offset(skb);
> +	skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
> +	udph->check = csum_tcpudp_magic(amt->local_ip, amt->remote_ip,
> +					sizeof(*udph) + sizeof(*amtrh),
> +					IPPROTO_UDP, skb->csum);
> +
> +	skb_push(skb, sizeof(*iph));
> +	iph		= ip_hdr(skb);
> +	iph->version	= 4;
> +	iph->ihl	= (sizeof(struct iphdr)) >> 2;
> +	iph->tos	= 0xc0;

why c0? Use a macro with a proper name.

> +	iph->frag_off	= 0;
> +	iph->ttl	= ip4_dst_hoplimit(&rt->dst);
> +	iph->daddr	= amt->remote_ip;
> +	iph->saddr	= amt->local_ip;
> +	iph->protocol	= IPPROTO_UDP;
> +	iph->tot_len	= htons(len);
> +
> +	skb->ip_summed = CHECKSUM_NONE;
> +	ip_select_ident(amt->net, skb, NULL);
> +	ip_send_check(iph);
> +	err = ip_local_out(amt->net, sock->sk, skb);
> +	if (unlikely(net_xmit_eval(err)))
> +		amt->dev->stats.tx_errors++;
> +
> +out:
> +	rcu_read_unlock();
> +}
> +
> +static void amt_send_igmp_gq(struct amt_dev *amt,
> +			     struct amt_tunnel_list *tunnel)
> +{
> +	struct sk_buff *skb;
> +
> +	skb = amt_build_igmp_gq(amt);
> +	if (!skb)
> +		return;
> +
> +	amt_skb_cb(skb)->tunnel = tunnel;
> +	dev_queue_xmit(skb);
> +}
> +

> +
> +static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> +	struct amt_dev *amt = netdev_priv(dev);
> +	struct amt_tunnel_list *tunnel;
> +	bool report = false;
> +	struct igmphdr *ih;
> +	bool query = false;
> +	struct iphdr *iph;
> +	bool data = false;
> +	bool v6 = false;
> +
> +	rcu_read_lock();

As I recall ndo_start_xmit functions are called inside of
rcu_read_lock_bh().


> +	iph = ip_hdr(skb);> +	if (iph->version == 4) {
> +		if (!ipv4_is_multicast(iph->daddr))
> +			goto free;
> +
> +		if (!ip_mc_check_igmp(skb)) {
> +			ih = igmp_hdr(skb);
> +			switch (ih->type) {
> +			case IGMPV3_HOST_MEMBERSHIP_REPORT:
> +			case IGMP_HOST_MEMBERSHIP_REPORT:
> +				report = true;
> +				break;
> +			case IGMP_HOST_MEMBERSHIP_QUERY:
> +				query = true;
> +				break;
> +			default:
> +				goto free;
> +			}
> +		} else {
> +			data = true;
> +		}
> +		v6 = false;
> +	} else {
> +		dev->stats.tx_errors++;
> +		goto free;
> +	}
> +
> +	if (!pskb_may_pull(skb, sizeof(struct ethhdr)))
> +		goto free;
> +
> +	skb_pull(skb, sizeof(struct ethhdr));
> +
> +	if (amt->mode == AMT_MODE_GATEWAY) {
> +		/* Gateway only passes IGMP/MLD packets */
> +		if (!report)
> +			goto free;
> +		if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
> +			goto free;
> +		if (amt_send_membership_update(amt, skb,  v6))
> +			goto free;
> +		goto unlock;
> +	} else if (amt->mode == AMT_MODE_RELAY) {
> +		if (query) {
> +			tunnel = amt_skb_cb(skb)->tunnel;
> +			if (!tunnel) {
> +				WARN_ON(1);
> +				goto free;
> +			}
> +
> +			/* Do not forward unexpected query */
> +			if (amt_send_membership_query(amt, skb, tunnel, v6))
> +				goto free;
> +			goto unlock;
> +		}
> +
> +		if (!data)
> +			goto free;
> +		list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list)
> +			amt_send_multicast_data(amt, skb, tunnel, v6);
> +	}
> +
> +	dev_kfree_skb(skb);
> +	rcu_read_unlock();
> +	return NETDEV_TX_OK;
> +free:
> +	dev_kfree_skb(skb);
> +unlock:
> +	rcu_read_unlock();
> +	dev->stats.tx_dropped++;
> +	return NETDEV_TX_OK;
> +}
> +




> +static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
> +{
> +	struct amt_header_request *amtrh;
> +	struct amt_tunnel_list *tunnel;
> +	unsigned long long key;
> +	struct udphdr *udph;
> +	struct iphdr *iph;
> +	u64 mac;
> +	int i;
> +
> +	if (!pskb_may_pull(skb, sizeof(*udph) + sizeof(*amtrh)))
> +		return true;
> +
> +	iph = ip_hdr(skb);
> +	udph = udp_hdr(skb);
> +	amtrh = (struct amt_header_request *)(udp_hdr(skb) + 1);
> +
> +	if (amtrh->reserved1 || amtrh->reserved2 || amtrh->version)
> +		return true;
> +
> +	list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list)
> +		if (tunnel->ip4 == iph->saddr)
> +			goto send;
> +
> +	if (amt->nr_tunnels >= amt->max_tunnels) {
> +		icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
> +		return true;
> +	}
> +
> +	tunnel = kmalloc(sizeof(*tunnel) +

kzalloc and then you do not need the '= 0' below.

> +			 (sizeof(struct hlist_head) * amt->hash_buckets),
> +			 GFP_ATOMIC);
> +	if (!tunnel)
> +		return true;
> +
> +	tunnel->source_port = udph->source;
> +	tunnel->ip4 = iph->saddr;
> +	tunnel->nr_groups = 0;
> +	tunnel->nr_sources = 0;
> +
> +	memcpy(&key, &tunnel->key, sizeof(unsigned long long));
> +	tunnel->reserved = 0;
> +	tunnel->amt = amt;
> +	spin_lock_init(&tunnel->lock);
> +	for (i = 0; i < amt->hash_buckets; i++)
> +		INIT_HLIST_HEAD(&tunnel->groups[i]);
> +
> +	INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
> +
> +	spin_lock_bh(&amt->lock);
> +	list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
> +	tunnel->key = amt->key;
> +	amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
> +	amt->nr_tunnels++;
> +	mod_delayed_work(amt_wq, &tunnel->gc_wq,
> +			 msecs_to_jiffies(amt_gmi(amt)));
> +	spin_unlock_bh(&amt->lock);
> +
> +send:
> +	tunnel->nonce = amtrh->nonce;
> +	mac = siphash_3u32((__force u32)tunnel->ip4,
> +			   (__force u32)tunnel->source_port,
> +			   (__force u32)tunnel->nonce,
> +			   &tunnel->key);
> +	tunnel->mac = mac >> 16;
> +
> +	if (!netif_running(amt->dev) || !netif_running(amt->stream_dev))
> +		return true;
> +
> +	if (!amtrh->p)
> +		amt_send_igmp_gq(amt, tunnel);
> +
> +	return false;
> +}
> +

you create some selftests and include some torture cases around cleanup
of all the work activity.

  reply	other threads:[~2021-10-27 15:01 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-26 15:10 [PATCH net-next 0/4 v4] amt: add initial driver for Automatic Multicast Tunneling (AMT) Taehee Yoo
2021-10-26 15:10 ` [PATCH net-next 1/4 v4] amt: add control plane of amt interface Taehee Yoo
2021-10-27 14:37   ` David Ahern
2021-10-27 15:38     ` Taehee Yoo
2021-10-28 13:57   ` Denis Kirjanov
2021-10-28 15:03     ` Taehee Yoo
2021-10-26 15:10 ` [PATCH net-next 2/4 v4] amt: add data " Taehee Yoo
2021-10-27 15:00   ` David Ahern [this message]
2021-10-27 16:12     ` Taehee Yoo
2021-10-26 15:10 ` [PATCH net-next 3/4 v4] amt: add multicast(IGMP) report message handler Taehee Yoo
2021-10-27 15:08   ` David Ahern
2021-10-27 16:17     ` Taehee Yoo
2021-10-26 15:10 ` [PATCH net-next 4/4 v4] amt: add mld " Taehee Yoo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ac69dddd-0346-d663-bacf-bb03f01b0b96@gmail.com \
    --to=dsahern@gmail.com \
    --cc=ap420073@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dkirjanov@suse.de \
    --cc=dsahern@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.