All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
@ 2012-06-27 12:00 Tom Parkin
  2012-06-27 19:03 ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Tom Parkin @ 2012-06-27 12:00 UTC (permalink / raw)
  To: netdev; +Cc: David.Laight, Tom Parkin, James Chapman

This patch fixes a race condition in l2tp when updating tunnel and
session statistics.  Previously it was possible for multiple threads
to concurrently call u64_stats_update*(), which lead to statistics
readers blocking forever.

This race was discovered on an AMD64 SMP machine running a 32bit
kernel.  Running "ip l2tp" while sending data over an Ethernet
pseudowire resulted in an occasional soft lockup in
u64_stats_fetch_begin() called from l2tp_nl_session_send().

For safe lockless update of l2tp stats, data is now stored in per-cpu
variables.  These per-cpu datasets are then summed at read time via.
an extra helper function l2tp_stats_copy() which has been added to
l2tp_core.c.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
---
 net/l2tp/l2tp_core.c    |  286 ++++++++++++++++++++++++++++-------------------
 net/l2tp/l2tp_core.h    |   44 ++++++--
 net/l2tp/l2tp_debugfs.c |   42 ++++---
 net/l2tp/l2tp_netlink.c |   64 ++++-------
 net/l2tp/l2tp_ppp.c     |   75 ++++++++-----
 5 files changed, 301 insertions(+), 210 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 32b2155..ab2ffc0 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -320,6 +320,43 @@ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
 
+/*
+ * Sum tunnel/session statistics across all CPUs
+ */
+int l2tp_stats_copy(struct l2tp_stats *cpustats, struct l2tp_stats *dest)
+{
+	int i;
+	unsigned int start;
+
+	if (!cpustats || !dest)
+		return 1;
+
+	memset(dest, 0, sizeof(struct l2tp_stats));
+
+	for_each_possible_cpu(i) {
+		struct l2tp_stats *stats = per_cpu_ptr(cpustats, i);
+
+		do {
+			start = u64_stats_fetch_begin(&stats->tx.syncp);
+			dest->tx.packets += stats->tx.packets;
+			dest->tx.bytes += stats->tx.bytes;
+			dest->tx.errors += stats->tx.errors;
+		} while (u64_stats_fetch_retry(&stats->tx.syncp, start));
+
+		do {
+			start = u64_stats_fetch_begin(&stats->rx.syncp);
+			dest->rx.packets += stats->rx.packets;
+			dest->rx.bytes += stats->rx.bytes;
+			dest->rx.errors += stats->rx.errors;
+			dest->rx.seq_discards += stats->rx.seq_discards;
+			dest->rx.oos_packets += stats->rx.oos_packets;
+		} while (u64_stats_fetch_retry(&stats->rx.syncp, start));
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_stats_copy);
+
 /*****************************************************************************
  * Receive data handling
  *****************************************************************************/
@@ -335,7 +372,6 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 	struct l2tp_stats *sstats;
 
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
 		if (L2TP_SKB_CB(skbp)->ns > ns) {
 			__skb_queue_before(&session->reorder_q, skbp, skb);
@@ -343,9 +379,10 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 				 "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
 				 session->name, ns, L2TP_SKB_CB(skbp)->ns,
 				 skb_queue_len(&session->reorder_q));
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_oos_packets++;
-			u64_stats_update_end(&sstats->syncp);
+			sstats = this_cpu_ptr(session->cpustats);
+			u64_stats_update_begin(&sstats->rx.syncp);
+			sstats->rx.oos_packets++;
+			u64_stats_update_end(&sstats->rx.syncp);
 			goto out;
 		}
 	}
@@ -369,16 +406,17 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 	 */
 	skb_orphan(skb);
 
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
-	tstats->rx_packets++;
-	tstats->rx_bytes += length;
-	sstats->rx_packets++;
-	sstats->rx_bytes += length;
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
+	tstats = this_cpu_ptr(tunnel->cpustats);
+	u64_stats_update_begin(&tstats->rx.syncp);
+	tstats->rx.packets++;
+	tstats->rx.bytes += length;
+	u64_stats_update_end(&tstats->rx.syncp);
+
+	sstats = this_cpu_ptr(session->cpustats);
+	u64_stats_update_begin(&sstats->rx.syncp);
+	sstats->rx.packets++;
+	sstats->rx.bytes += length;
+	u64_stats_update_end(&sstats->rx.syncp);
 
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Bump our Nr */
@@ -417,13 +455,13 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 	 */
 start:
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
 		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			sstats->rx_errors++;
-			u64_stats_update_end(&sstats->syncp);
+			sstats = this_cpu_ptr(session->cpustats);
+			u64_stats_update_begin(&sstats->rx.syncp);
+			sstats->rx.seq_discards++;
+			sstats->rx.errors++;
+			u64_stats_update_end(&sstats->rx.syncp);
 			l2tp_dbg(session, L2TP_MSG_SEQ,
 				 "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n",
 				 session->name, L2TP_SKB_CB(skb)->ns,
@@ -582,7 +620,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int offset;
 	u32 ns, nr;
-	struct l2tp_stats *sstats = &session->stats;
+	struct l2tp_stats *sstats;
 
 	/* The ref count is increased since we now hold a pointer to
 	 * the session. Take care to decrement the refcnt when exiting
@@ -599,9 +637,10 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 				  "%s: cookie mismatch (%u/%u). Discarding.\n",
 				  tunnel->name, tunnel->tunnel_id,
 				  session->session_id);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_cookie_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			sstats = this_cpu_ptr(session->cpustats);
+			u64_stats_update_begin(&sstats->rx.syncp);
+			sstats->rx.cookie_discards++;
+			u64_stats_update_end(&sstats->rx.syncp);
 			goto discard;
 		}
 		ptr += session->peer_cookie_len;
@@ -670,9 +709,10 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			sstats = this_cpu_ptr(session->cpustats);
+			u64_stats_update_begin(&sstats->rx.syncp);
+			sstats->rx.seq_discards++;
+			u64_stats_update_end(&sstats->rx.syncp);
 			goto discard;
 		}
 
@@ -691,9 +731,10 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			sstats = this_cpu_ptr(session->cpustats);
+			u64_stats_update_begin(&sstats->rx.syncp);
+			sstats->rx.seq_discards++;
+			u64_stats_update_end(&sstats->rx.syncp);
 			goto discard;
 		}
 	}
@@ -747,9 +788,10 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			 * packets
 			 */
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
-				u64_stats_update_begin(&sstats->syncp);
-				sstats->rx_seq_discards++;
-				u64_stats_update_end(&sstats->syncp);
+				sstats = this_cpu_ptr(session->cpustats);
+				u64_stats_update_begin(&sstats->rx.syncp);
+				sstats->rx.seq_discards++;
+				u64_stats_update_end(&sstats->rx.syncp);
 				l2tp_dbg(session, L2TP_MSG_SEQ,
 					 "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
 					 session->name, L2TP_SKB_CB(skb)->ns,
@@ -775,9 +817,10 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	return;
 
 discard:
-	u64_stats_update_begin(&sstats->syncp);
-	sstats->rx_errors++;
-	u64_stats_update_end(&sstats->syncp);
+	sstats = this_cpu_ptr(session->cpustats);
+	u64_stats_update_begin(&sstats->rx.syncp);
+	sstats->rx.errors++;
+	u64_stats_update_end(&sstats->rx.syncp);
 	kfree_skb(skb);
 
 	if (session->deref)
@@ -891,10 +934,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 discard_bad_csum:
 	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
 	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	tstats->rx_errors++;
-	u64_stats_update_end(&tstats->syncp);
+	tstats = this_cpu_ptr(tunnel->cpustats);
+	u64_stats_update_begin(&tstats->rx.syncp);
+	tstats->rx.errors++;
+	u64_stats_update_end(&tstats->rx.syncp);
 	kfree_skb(skb);
 
 	return 0;
@@ -1050,21 +1093,21 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 		error = ip_queue_xmit(skb, fl);
 
 	/* Update stats */
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
+	tstats = this_cpu_ptr(tunnel->cpustats);
+	sstats = this_cpu_ptr(session->cpustats);
+	u64_stats_update_begin(&tstats->tx.syncp);
+	u64_stats_update_begin(&sstats->tx.syncp);
 	if (error >= 0) {
-		tstats->tx_packets++;
-		tstats->tx_bytes += len;
-		sstats->tx_packets++;
-		sstats->tx_bytes += len;
+		tstats->tx.packets++;
+		tstats->tx.bytes += len;
+		sstats->tx.packets++;
+		sstats->tx.bytes += len;
 	} else {
-		tstats->tx_errors++;
-		sstats->tx_errors++;
+		tstats->tx.errors++;
+		sstats->tx.errors++;
 	}
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
+	u64_stats_update_end(&tstats->tx.syncp);
+	u64_stats_update_end(&sstats->tx.syncp);
 
 	return 0;
 }
@@ -1256,6 +1299,8 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	sk->sk_destruct = tunnel->old_sk_destruct;
 	sk->sk_user_data = NULL;
 
+	free_percpu(tunnel->cpustats);
+
 	/* Call the original destructor */
 	if (sk->sk_destruct)
 		(*sk->sk_destruct)(sk);
@@ -1567,6 +1612,13 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		goto err;
 	}
 
+	tunnel->cpustats = alloc_percpu(struct l2tp_stats);
+	if (tunnel->cpustats == NULL) {
+		kfree(tunnel);
+		err = -ENOMEM;
+		goto err;
+	}
+
 	tunnel->version = version;
 	tunnel->tunnel_id = tunnel_id;
 	tunnel->peer_tunnel_id = peer_tunnel_id;
@@ -1692,6 +1744,8 @@ void l2tp_session_free(struct l2tp_session *session)
 
 		sock_put(tunnel->sock);
 
+		free_percpu(session->cpustats);
+
 		/* This will delete the tunnel context if this
 		 * is the last session on the tunnel.
 		 */
@@ -1742,80 +1796,88 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 	struct l2tp_session *session;
 
 	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
-	if (session != NULL) {
-		session->magic = L2TP_SESSION_MAGIC;
-		session->tunnel = tunnel;
+	if (session == NULL)
+		goto err;
 
-		session->session_id = session_id;
-		session->peer_session_id = peer_session_id;
-		session->nr = 0;
+	session->cpustats = alloc_percpu(struct l2tp_stats);
+	if (session->cpustats == NULL) {
+		kfree(session);
+		goto err;
+	}
 
-		sprintf(&session->name[0], "sess %u/%u",
-			tunnel->tunnel_id, session->session_id);
+	session->magic = L2TP_SESSION_MAGIC;
+	session->tunnel = tunnel;
 
-		skb_queue_head_init(&session->reorder_q);
-
-		INIT_HLIST_NODE(&session->hlist);
-		INIT_HLIST_NODE(&session->global_hlist);
-
-		/* Inherit debug options from tunnel */
-		session->debug = tunnel->debug;
-
-		if (cfg) {
-			session->pwtype = cfg->pw_type;
-			session->debug = cfg->debug;
-			session->mtu = cfg->mtu;
-			session->mru = cfg->mru;
-			session->send_seq = cfg->send_seq;
-			session->recv_seq = cfg->recv_seq;
-			session->lns_mode = cfg->lns_mode;
-			session->reorder_timeout = cfg->reorder_timeout;
-			session->offset = cfg->offset;
-			session->l2specific_type = cfg->l2specific_type;
-			session->l2specific_len = cfg->l2specific_len;
-			session->cookie_len = cfg->cookie_len;
-			memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
-			session->peer_cookie_len = cfg->peer_cookie_len;
-			memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
-		}
+	session->session_id = session_id;
+	session->peer_session_id = peer_session_id;
+	session->nr = 0;
 
-		if (tunnel->version == L2TP_HDR_VER_2)
-			session->build_header = l2tp_build_l2tpv2_header;
-		else
-			session->build_header = l2tp_build_l2tpv3_header;
+	sprintf(&session->name[0], "sess %u/%u",
+			tunnel->tunnel_id, session->session_id);
 
-		l2tp_session_set_header_len(session, tunnel->version);
+	skb_queue_head_init(&session->reorder_q);
+
+	INIT_HLIST_NODE(&session->hlist);
+	INIT_HLIST_NODE(&session->global_hlist);
+
+	/* Inherit debug options from tunnel */
+	session->debug = tunnel->debug;
+
+	if (cfg) {
+		session->pwtype = cfg->pw_type;
+		session->debug = cfg->debug;
+		session->mtu = cfg->mtu;
+		session->mru = cfg->mru;
+		session->send_seq = cfg->send_seq;
+		session->recv_seq = cfg->recv_seq;
+		session->lns_mode = cfg->lns_mode;
+		session->reorder_timeout = cfg->reorder_timeout;
+		session->offset = cfg->offset;
+		session->l2specific_type = cfg->l2specific_type;
+		session->l2specific_len = cfg->l2specific_len;
+		session->cookie_len = cfg->cookie_len;
+		memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
+		session->peer_cookie_len = cfg->peer_cookie_len;
+		memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
+	}
 
-		/* Bump the reference count. The session context is deleted
-		 * only when this drops to zero.
-		 */
-		l2tp_session_inc_refcount(session);
-		l2tp_tunnel_inc_refcount(tunnel);
+	if (tunnel->version == L2TP_HDR_VER_2)
+		session->build_header = l2tp_build_l2tpv2_header;
+	else
+		session->build_header = l2tp_build_l2tpv3_header;
 
-		/* Ensure tunnel socket isn't deleted */
-		sock_hold(tunnel->sock);
+	l2tp_session_set_header_len(session, tunnel->version);
 
-		/* Add session to the tunnel's hash list */
-		write_lock_bh(&tunnel->hlist_lock);
-		hlist_add_head(&session->hlist,
-			       l2tp_session_id_hash(tunnel, session_id));
-		write_unlock_bh(&tunnel->hlist_lock);
+	/* Bump the reference count. The session context is deleted
+	 * only when this drops to zero.
+	 */
+	l2tp_session_inc_refcount(session);
+	l2tp_tunnel_inc_refcount(tunnel);
 
-		/* And to the global session list if L2TPv3 */
-		if (tunnel->version != L2TP_HDR_VER_2) {
-			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+	/* Ensure tunnel socket isn't deleted */
+	sock_hold(tunnel->sock);
 
-			spin_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_add_head_rcu(&session->global_hlist,
-					   l2tp_session_id_hash_2(pn, session_id));
-			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-		}
+	/* Add session to the tunnel's hash list */
+	write_lock_bh(&tunnel->hlist_lock);
+	hlist_add_head(&session->hlist,
+			l2tp_session_id_hash(tunnel, session_id));
+	write_unlock_bh(&tunnel->hlist_lock);
 
-		/* Ignore management session in session count value */
-		if (session->session_id != 0)
-			atomic_inc(&l2tp_session_count);
+	/* And to the global session list if L2TPv3 */
+	if (tunnel->version != L2TP_HDR_VER_2) {
+		struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+		spin_lock_bh(&pn->l2tp_session_hlist_lock);
+		hlist_add_head_rcu(&session->global_hlist,
+				l2tp_session_id_hash_2(pn, session_id));
+		spin_unlock_bh(&pn->l2tp_session_hlist_lock);
 	}
 
+	/* Ignore management session in session count value */
+	if (session->session_id != 0)
+		atomic_inc(&l2tp_session_count);
+
+err:
 	return session;
 }
 EXPORT_SYMBOL_GPL(l2tp_session_create);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a38ec6c..29bb34c 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -35,19 +35,34 @@ enum {
 
 struct sk_buff;
 
-struct l2tp_stats {
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			tx_errors;
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_seq_discards;
-	u64			rx_oos_packets;
-	u64			rx_errors;
-	u64			rx_cookie_discards;
+/* Stats are synchronised via a synchronisation point for safe
+ * reader/writer access on 64 and 32 bit kernels.  Stats are
+ * stored on a per-cpu basis for safe lockless updating, and
+ * summed at read time.  This allows the statistics update in
+ * the data path to be fast at some small cost when reading.
+ */
+struct l2tp_rx_stats {
+	u64			packets;
+	u64			bytes;
+	u64			errors;
+	u64			seq_discards;
+	u64			cookie_discards;
+	u64			oos_packets;
+	struct u64_stats_sync	syncp;
+};
+
+struct l2tp_tx_stats {
+	u64			packets;
+	u64			bytes;
+	u64			errors;
 	struct u64_stats_sync	syncp;
 };
 
+struct l2tp_stats {
+	struct l2tp_rx_stats	rx;
+	struct l2tp_tx_stats	tx;
+};
+
 struct l2tp_tunnel;
 
 /* Describes a session. Contains information to determine incoming
@@ -127,7 +142,9 @@ struct l2tp_session {
 	int			mtu;
 	int			mru;
 	enum l2tp_pwtype	pwtype;
-	struct l2tp_stats	stats;
+
+	struct l2tp_stats	*cpustats;	/* per-cpu counters */
+
 	struct hlist_node	global_hlist;	/* Global hash list node */
 
 	int (*build_header)(struct l2tp_session *session, void *buf);
@@ -175,7 +192,8 @@ struct l2tp_tunnel {
 	int			debug;		/* bitmask of debug message
 						 * categories */
 	enum l2tp_encap_type	encap;
-	struct l2tp_stats	stats;
+
+	struct l2tp_stats	*cpustats;	/* per-cpu counters */
 
 	struct list_head	list;		/* Keep a list of all tunnels */
 	struct net		*l2tp_net;	/* the net we belong to */
@@ -246,6 +264,8 @@ extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int
 extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
 extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
 
+extern int l2tp_stats_copy(struct l2tp_stats *cpustats, struct l2tp_stats *dest);
+
 /* Session reference counts. Incremented when code obtains a reference
  * to a session.
  */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index c3813bc..5ec4732 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -106,6 +106,7 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 	int hash;
 	struct hlist_node *walk;
 	struct hlist_node *tmp;
+	struct l2tp_stats stats;
 
 	read_lock_bh(&tunnel->hlist_lock);
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
@@ -146,14 +147,18 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
 		   atomic_read(&tunnel->ref_count));
 
-	seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
-		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+	if (!l2tp_stats_copy(tunnel->cpustats, &stats)) {
+		seq_printf(m, " %08x tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+				tunnel->debug,
+				(unsigned long long)stats.tx.packets,
+				(unsigned long long)stats.tx.bytes,
+				(unsigned long long)stats.tx.errors,
+				(unsigned long long)stats.rx.packets,
+				(unsigned long long)stats.rx.bytes,
+				(unsigned long long)stats.rx.errors);
+	} else {
+		seq_printf(m, " stats lookup failure\n");
+	}
 
 	if (tunnel->show != NULL)
 		tunnel->show(m, tunnel);
@@ -162,6 +167,7 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 {
 	struct l2tp_session *session = v;
+	struct l2tp_stats stats;
 
 	seq_printf(m, "  SESSION %u, peer %u, %s\n", session->session_id,
 		   session->peer_session_id,
@@ -203,14 +209,18 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		seq_printf(m, "\n");
 	}
 
-	seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
-		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+	if (!l2tp_stats_copy(session->cpustats, &stats)) {
+		seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+				session->nr, session->ns,
+				(unsigned long long)stats.tx.packets,
+				(unsigned long long)stats.tx.bytes,
+				(unsigned long long)stats.tx.errors,
+				(unsigned long long)stats.rx.packets,
+				(unsigned long long)stats.rx.bytes,
+				(unsigned long long)stats.rx.errors);
+	} else {
+		seq_printf(m, " stats lookup failure\n");
+	}
 
 	if (session->show != NULL)
 		session->show(m, session);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index ddc553e..09fd33d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -246,7 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
 	struct ipv6_pinfo *np = NULL;
 #endif
 	struct l2tp_stats stats;
-	unsigned int start;
 
 	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
 			  L2TP_CMD_TUNNEL_GET);
@@ -264,28 +263,19 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&tunnel->stats.syncp);
-		stats.tx_packets = tunnel->stats.tx_packets;
-		stats.tx_bytes = tunnel->stats.tx_bytes;
-		stats.tx_errors = tunnel->stats.tx_errors;
-		stats.rx_packets = tunnel->stats.rx_packets;
-		stats.rx_bytes = tunnel->stats.rx_bytes;
-		stats.rx_errors = tunnel->stats.rx_errors;
-		stats.rx_seq_discards = tunnel->stats.rx_seq_discards;
-		stats.rx_oos_packets = tunnel->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&tunnel->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (0 != l2tp_stats_copy(tunnel->cpustats, &stats))
+		goto nla_put_failure;
+
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx.packets) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx.bytes) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx.errors) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx.packets) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx.bytes) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+			stats.rx.seq_discards) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+			stats.rx.oos_packets) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx.errors))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
@@ -612,7 +602,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct sock *sk = NULL;
 	struct l2tp_stats stats;
-	unsigned int start;
 
 	sk = tunnel->sock;
 
@@ -655,28 +644,19 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&session->stats.syncp);
-		stats.tx_packets = session->stats.tx_packets;
-		stats.tx_bytes = session->stats.tx_bytes;
-		stats.tx_errors = session->stats.tx_errors;
-		stats.rx_packets = session->stats.rx_packets;
-		stats.rx_bytes = session->stats.rx_bytes;
-		stats.rx_errors = session->stats.rx_errors;
-		stats.rx_seq_discards = session->stats.rx_seq_discards;
-		stats.rx_oos_packets = session->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&session->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (0 != l2tp_stats_copy(session->cpustats, &stats))
+		goto nla_put_failure;
+
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx.packets) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx.bytes) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx.errors) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx.packets) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx.bytes) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+			stats.rx.seq_discards) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+			stats.rx.oos_packets) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx.errors))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 286366e..054c069 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -222,6 +222,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 {
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
 	struct sock *sk = NULL;
+	struct l2tp_stats *sstats = NULL;
 
 	/* If the socket is bound, send it in to PPP's input queue. Otherwise
 	 * queue it on the session socket.
@@ -259,7 +260,10 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 			  session->name);
 
 		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
+		sstats = this_cpu_ptr(session->cpustats);
+		u64_stats_update_begin(&sstats->rx.syncp);
+		sstats->rx.errors++;
+		u64_stats_update_end(&sstats->rx.syncp);
 		kfree_skb(skb);
 	}
 
@@ -1028,16 +1032,21 @@ end:
  ****************************************************************************/
 
 static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
-				struct l2tp_stats *stats)
+				struct l2tp_stats *cpustats)
 {
-	dest->tx_packets = stats->tx_packets;
-	dest->tx_bytes = stats->tx_bytes;
-	dest->tx_errors = stats->tx_errors;
-	dest->rx_packets = stats->rx_packets;
-	dest->rx_bytes = stats->rx_bytes;
-	dest->rx_seq_discards = stats->rx_seq_discards;
-	dest->rx_oos_packets = stats->rx_oos_packets;
-	dest->rx_errors = stats->rx_errors;
+	struct l2tp_stats tmp;
+
+	if (0 != l2tp_stats_copy(cpustats, &tmp))
+		return;
+
+	dest->tx_packets = tmp.tx.packets;
+	dest->tx_bytes = tmp.tx.bytes;
+	dest->tx_errors = tmp.tx.errors;
+	dest->rx_packets = tmp.rx.packets;
+	dest->rx_bytes = tmp.rx.bytes;
+	dest->rx_seq_discards = tmp.rx.seq_discards;
+	dest->rx_oos_packets = tmp.rx.oos_packets;
+	dest->rx_errors = tmp.rx.errors;
 }
 
 /* Session ioctl helper.
@@ -1151,7 +1160,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
 		memset(&stats, 0, sizeof(stats));
 		stats.tunnel_id = tunnel->tunnel_id;
 		stats.session_id = session->session_id;
-		pppol2tp_copy_stats(&stats, &session->stats);
+		pppol2tp_copy_stats(&stats, session->cpustats);
 		if (copy_to_user((void __user *) arg, &stats,
 				 sizeof(stats)))
 			break;
@@ -1214,7 +1223,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 #ifdef CONFIG_XFRM
 		stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
 #endif
-		pppol2tp_copy_stats(&stats, &tunnel->stats);
+		pppol2tp_copy_stats(&stats, tunnel->cpustats);
 		if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) {
 			err = -EFAULT;
 			break;
@@ -1666,19 +1675,24 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
 static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
 {
 	struct l2tp_tunnel *tunnel = v;
+	struct l2tp_stats tmp;
 
 	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
 		   tunnel->name,
 		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
 		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
-		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+	if (!l2tp_stats_copy(tunnel->cpustats, &tmp)) {
+		seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+				tunnel->debug,
+				(unsigned long long)tmp.tx.packets,
+				(unsigned long long)tmp.tx.bytes,
+				(unsigned long long)tmp.tx.errors,
+				(unsigned long long)tmp.rx.packets,
+				(unsigned long long)tmp.rx.bytes,
+				(unsigned long long)tmp.rx.errors);
+	} else {
+		seq_printf(m, " stats lookup failure\n");
+	}
 }
 
 static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
@@ -1687,6 +1701,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
 	struct pppox_sock *po = pppox_sk(ps->sock);
+	struct l2tp_stats tmp;
 	u32 ip = 0;
 	u16 port = 0;
 
@@ -1713,14 +1728,18 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
-		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+	if (!l2tp_stats_copy(session->cpustats, &tmp)) {
+		seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+			   session->nr, session->ns,
+			   (unsigned long long)tmp.tx.packets,
+			   (unsigned long long)tmp.tx.bytes,
+			   (unsigned long long)tmp.tx.errors,
+			   (unsigned long long)tmp.rx.packets,
+			   (unsigned long long)tmp.rx.bytes,
+			   (unsigned long long)tmp.rx.errors);
+	} else {
+		seq_printf(m, " stats lookup failure\n");
+	}
 
 	if (po)
 		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 12:00 [PATCH v2] l2tp: use per-cpu variables for u64_stats updates Tom Parkin
@ 2012-06-27 19:03 ` Eric Dumazet
  2012-06-27 20:21   ` Rick Jones
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-06-27 19:03 UTC (permalink / raw)
  To: Tom Parkin; +Cc: netdev, David.Laight, James Chapman

On Wed, 2012-06-27 at 13:00 +0100, Tom Parkin wrote:
> This patch fixes a race condition in l2tp when updating tunnel and
> session statistics.  Previously it was possible for multiple threads
> to concurrently call u64_stats_update*(), which lead to statistics
> readers blocking forever.
> 
> This race was discovered on an AMD64 SMP machine running a 32bit
> kernel.  Running "ip l2tp" while sending data over an Ethernet
> pseudowire resulted in an occasional soft lockup in
> u64_stats_fetch_begin() called from l2tp_nl_session_send().
> 
> For safe lockless update of l2tp stats, data is now stored in per-cpu
> variables.  These per-cpu datasets are then summed at read time via.
> an extra helper function l2tp_stats_copy() which has been added to
> l2tp_core.c.
> 

Do we really need 64bits stats on 32bit arches for l2tp ?

> Signed-off-by: Tom Parkin <tparkin@katalix.com>
> Signed-off-by: James Chapman <jchapman@katalix.com>
> ---
>  net/l2tp/l2tp_core.c    |  286 ++++++++++++++++++++++++++++-------------------
>  net/l2tp/l2tp_core.h    |   44 ++++++--
>  net/l2tp/l2tp_debugfs.c |   42 ++++---
>  net/l2tp/l2tp_netlink.c |   64 ++++-------
>  net/l2tp/l2tp_ppp.c     |   75 ++++++++-----
>  5 files changed, 301 insertions(+), 210 deletions(-)
> 
> diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
> index 32b2155..ab2ffc0 100644
> --- a/net/l2tp/l2tp_core.c
> +++ b/net/l2tp/l2tp_core.c
> @@ -320,6 +320,43 @@ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
>  }
>  EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
>  
> +/*
> + * Sum tunnel/session statistics across all CPUs
> + */
> +int l2tp_stats_copy(struct l2tp_stats *cpustats, struct l2tp_stats *dest)
> +{
> +	int i;
> +	unsigned int start;
> +
> +	if (!cpustats || !dest)
> +		return 1;
> +
> +	memset(dest, 0, sizeof(struct l2tp_stats));
> +
> +	for_each_possible_cpu(i) {
> +		struct l2tp_stats *stats = per_cpu_ptr(cpustats, i);
> +
> +		do {
> +			start = u64_stats_fetch_begin(&stats->tx.syncp);
> +			dest->tx.packets += stats->tx.packets;
> +			dest->tx.bytes += stats->tx.bytes;
> +			dest->tx.errors += stats->tx.errors;

You cant do the sum in 'dest', since if loop is restarted, you'll have
accumulation of all values.

> +		} while (u64_stats_fetch_retry(&stats->tx.syncp, start));
> +
> +		do {
> +			start = u64_stats_fetch_begin(&stats->rx.syncp);
> +			dest->rx.packets += stats->rx.packets;
> +			dest->rx.bytes += stats->rx.bytes;
> +			dest->rx.errors += stats->rx.errors;
> +			dest->rx.seq_discards += stats->rx.seq_discards;
> +			dest->rx.oos_packets += stats->rx.oos_packets;

same problem

> +		} while (u64_stats_fetch_retry(&stats->rx.syncp, start));
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(l2tp_stats_copy);
> +


>  
>  static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
> -				struct l2tp_stats *stats)
> +				struct l2tp_stats *cpustats)
>  {
> -	dest->tx_packets = stats->tx_packets;
> -	dest->tx_bytes = stats->tx_bytes;
> -	dest->tx_errors = stats->tx_errors;
> -	dest->rx_packets = stats->rx_packets;
> -	dest->rx_bytes = stats->rx_bytes;
> -	dest->rx_seq_discards = stats->rx_seq_discards;
> -	dest->rx_oos_packets = stats->rx_oos_packets;
> -	dest->rx_errors = stats->rx_errors;
> +	struct l2tp_stats tmp;
> +
> +	if (0 != l2tp_stats_copy(cpustats, &tmp))
> +		return;

	if (l2tp_stats_copy(cpustats, &tmp) != 0)
		return;

> +
> +	dest->tx_packets = tmp.tx.packets;
> +	dest->tx_bytes = tmp.tx.bytes;
> +	dest->tx_errors = tmp.tx.errors;
> +	dest->rx_packets = tmp.rx.packets;
> +	dest->rx_bytes = tmp.rx.bytes;
> +	dest->rx_seq_discards = tmp.rx.seq_discards;
> +	dest->rx_oos_packets = tmp.rx.oos_packets;
> +	dest->rx_errors = tmp.rx.errors;
>  

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 19:03 ` Eric Dumazet
@ 2012-06-27 20:21   ` Rick Jones
  2012-06-27 20:39     ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Rick Jones @ 2012-06-27 20:21 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Parkin, netdev, David.Laight, James Chapman

On 06/27/2012 12:03 PM, Eric Dumazet wrote:
> On Wed, 2012-06-27 at 13:00 +0100, Tom Parkin wrote:
>> This patch fixes a race condition in l2tp when updating tunnel and
>> session statistics.  Previously it was possible for multiple threads
>> to concurrently call u64_stats_update*(), which lead to statistics
>> readers blocking forever.
>>
>> This race was discovered on an AMD64 SMP machine running a 32bit
>> kernel.  Running "ip l2tp" while sending data over an Ethernet
>> pseudowire resulted in an occasional soft lockup in
>> u64_stats_fetch_begin() called from l2tp_nl_session_send().
>>
>> For safe lockless update of l2tp stats, data is now stored in per-cpu
>> variables.  These per-cpu datasets are then summed at read time via.
>> an extra helper function l2tp_stats_copy() which has been added to
>> l2tp_core.c.
>>
>
> Do we really need 64bits stats on 32bit arches for l2tp ?

It is a question of the speed of the communications more than the 
bitness of the processor no?

rick jones

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 20:21   ` Rick Jones
@ 2012-06-27 20:39     ` Eric Dumazet
  2012-06-27 20:50       ` Stephen Hemminger
                         ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-06-27 20:39 UTC (permalink / raw)
  To: Rick Jones; +Cc: Tom Parkin, netdev, David.Laight, James Chapman

On Wed, 2012-06-27 at 13:21 -0700, Rick Jones wrote:

> It is a question of the speed of the communications more than the 
> bitness of the processor no?

Why ?

In 2012 or 2013, 64bits kernels are the norm, and 32bit the exception.

Should we add complex code to l2tp just for being able to run it on
32bit kernels with 64bit stats ?

Considering this code is buggy at the v1 & v2, I am really wondering.

All sane SNMP applications are ready to cope with 32bits counters
wrapping.

Machines that could wrap the 32bit counter several times per second are
probably running on 64bit kernels.

Also percpu stats are overkill unless a device is really meant to be
used in // by many cpus.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 20:39     ` Eric Dumazet
@ 2012-06-27 20:50       ` Stephen Hemminger
  2012-06-27 20:58         ` Ben Greear
  2012-06-27 21:00       ` Rick Jones
  2012-06-27 22:21       ` David Miller
  2 siblings, 1 reply; 21+ messages in thread
From: Stephen Hemminger @ 2012-06-27 20:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Rick Jones, Tom Parkin, netdev, David.Laight, James Chapman

On Wed, 27 Jun 2012 22:39:01 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> All sane SNMP applications are ready to cope with 32bits counters
> wrapping.

Actually that statement depends on the data rate. SNMP daemons work
by polling at periodic intervals. The limit for detecting roll over depends
on the rate and the interval. I believe the ubiquitous net-snmp code uses
something a 30 second polling interval for lots of it's caches. This means
it rolls over too fast at 10G. Polling faster can help but net-snmp is
a pig about updates.

I just realized the whole x32 (running 32 bit apps on 64 bit kernel) is broken
for things like /proc/net/dev where 64 bit kernel will give 64 bit values and
the 32 bit app (like net-snmp) is expecting unsigned long (32 bits).

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 20:50       ` Stephen Hemminger
@ 2012-06-27 20:58         ` Ben Greear
  2012-06-27 21:20           ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Ben Greear @ 2012-06-27 20:58 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Eric Dumazet, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On 06/27/2012 01:50 PM, Stephen Hemminger wrote:
> On Wed, 27 Jun 2012 22:39:01 +0200
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>> All sane SNMP applications are ready to cope with 32bits counters
>> wrapping.
>
> Actually that statement depends on the data rate. SNMP daemons work
> by polling at periodic intervals. The limit for detecting roll over depends
> on the rate and the interval. I believe the ubiquitous net-snmp code uses
> something a 30 second polling interval for lots of it's caches. This means
> it rolls over too fast at 10G. Polling faster can help but net-snmp is
> a pig about updates.
>
> I just realized the whole x32 (running 32 bit apps on 64 bit kernel) is broken
> for things like /proc/net/dev where 64 bit kernel will give 64 bit values and
> the 32 bit app (like net-snmp) is expecting unsigned long (32 bits).

It's worse than that:  Even on 64-bit kernels, counters that are returned by
netlink and /proc/net/dev as 64-bit may still wrap themselves at 32-bit
intervals.

I found that I just have to be very paranoid, and assume that if a '64-bit'
number wraps its high 32-bits between polls then it is really just a 32-bit
number and do that wrap properly (and poll more often).

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 20:39     ` Eric Dumazet
  2012-06-27 20:50       ` Stephen Hemminger
@ 2012-06-27 21:00       ` Rick Jones
  2012-06-27 22:21       ` David Miller
  2 siblings, 0 replies; 21+ messages in thread
From: Rick Jones @ 2012-06-27 21:00 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Parkin, netdev, David.Laight, James Chapman

On 06/27/2012 01:39 PM, Eric Dumazet wrote:
> On Wed, 2012-06-27 at 13:21 -0700, Rick Jones wrote:
>
>> It is a question of the speed of the communications more than the
>> bitness of the processor no?
>
> Why ?

The desire to have a greater than 32 bit counter stems from how quickly 
it will wrap, and how quickly it will wrap depends on the speed of 
communication.

> In 2012 or 2013, 64bits kernels are the norm, and 32bit the exception.

In servers and laptops I would be inclined to agree.  Elsewhere I am not 
so sure.   And while I don't know of its status, there is at least one 
hit on an RFC where, back in 2001, when a great many things were 32 bit, 
various 64-bit counters were added for L2TP Domain statistics:

http://tools.ietf.org/html/draft-ietf-l2tpext-l2tp-mib-03

(I think that became http://tools.ietf.org/html/rfc3371 in 2002)

So, it is possible people were simply painting a bikeshed, but 10 years 
ago, when the prevalence of 64 bit processors was not nearly so great, 
folks involved in L2TP MIB definitions found it worthwhile to make 
various counters 64-bit.

> Should we add complex code to l2tp just for being able to run it on
> 32bit kernels with 64bit stats ?
>
> Considering this code is buggy at the v1 & v2, I am really wondering.
>
> All sane SNMP applications are ready to cope with 32bits counters
> wrapping.

Once (maybe twice?) but no more than that.

> Machines that could wrap the 32bit counter several times per second are
> probably running on 64bit kernels.

I don't think it requires wrapping a 32 bit counter several times a 
second to warrant a > 32 bit counter.

> Also percpu stats are overkill unless a device is really meant to be
> used in // by many cpus.

I take it "//" is used as a shorthand for parallel?

rick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 20:58         ` Ben Greear
@ 2012-06-27 21:20           ` Eric Dumazet
  2012-06-27 21:31             ` Ben Greear
  2012-06-27 21:32             ` Eric Dumazet
  0 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-06-27 21:20 UTC (permalink / raw)
  To: Ben Greear
  Cc: Stephen Hemminger, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On Wed, 2012-06-27 at 13:58 -0700, Ben Greear wrote:

> It's worse than that:  Even on 64-bit kernels, counters that are returned by
> netlink and /proc/net/dev as 64-bit may still wrap themselves at 32-bit
> intervals.

Really ?

Thats incredible you dont send a bug report then.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 21:20           ` Eric Dumazet
@ 2012-06-27 21:31             ` Ben Greear
  2012-06-27 21:35               ` Eric Dumazet
  2012-06-27 21:32             ` Eric Dumazet
  1 sibling, 1 reply; 21+ messages in thread
From: Ben Greear @ 2012-06-27 21:31 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Stephen Hemminger, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On 06/27/2012 02:20 PM, Eric Dumazet wrote:
> On Wed, 2012-06-27 at 13:58 -0700, Ben Greear wrote:
>
>> It's worse than that:  Even on 64-bit kernels, counters that are returned by
>> netlink and /proc/net/dev as 64-bit may still wrap themselves at 32-bit
>> intervals.
>
> Really ?
>
> Thats incredible you dont send a bug report then.

I bitched and moaned a while back...didn't get a lot
of positive feedback :)

For an example, see the VLAN code. rx-errors and tx-dropped are only 32-bit
counters.  Now, in the real world, we wouldn't expect those counters to
increase at high rates, but they are still 32-bit counters masquerading
as 64, and they could wrap after a while, so any code that expected a wrap
to mean a 64-bit wrap would be wrong.

At the time I was last complaining, there were lots more cases
of this that I was fighting with, but I don't recall exactly what they
were.  Once my user-space code got paranoid enough, it was able to
at least mostly deal with 32 and 64 wraps.

static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{

	if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
		struct vlan_pcpu_stats *p;
		u32 rx_errors = 0, tx_dropped = 0, collisions = 0;
		int i;

		for_each_possible_cpu(i) {
			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
			unsigned int start;

			p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
			do {
				start = u64_stats_fetch_begin_bh(&p->syncp);
				rxpackets	= p->rx_packets;
				rxbytes		= p->rx_bytes;
				rxmulticast	= p->rx_multicast;
				txpackets	= p->tx_packets;
				txbytes		= p->tx_bytes;
			} while (u64_stats_fetch_retry_bh(&p->syncp, start));

			stats->rx_packets	+= rxpackets;
			stats->rx_bytes		+= rxbytes;
			stats->multicast	+= rxmulticast;
			stats->tx_packets	+= txpackets;
			stats->tx_bytes		+= txbytes;
			/* rx_errors & tx_dropped are u32 */
			rx_errors	+= p->rx_errors;
			tx_dropped	+= p->tx_dropped;
			collisions	+= p->collisions;
		}
		stats->rx_errors  = rx_errors;
		stats->tx_dropped = tx_dropped;
		stats->collisions = collisions;
	}
	return stats;
}

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 21:20           ` Eric Dumazet
  2012-06-27 21:31             ` Ben Greear
@ 2012-06-27 21:32             ` Eric Dumazet
  2012-06-27 21:40               ` Ben Greear
  1 sibling, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-06-27 21:32 UTC (permalink / raw)
  To: Ben Greear
  Cc: Stephen Hemminger, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On Wed, 2012-06-27 at 23:20 +0200, Eric Dumazet wrote:
> On Wed, 2012-06-27 at 13:58 -0700, Ben Greear wrote:
> 
> > It's worse than that:  Even on 64-bit kernels, counters that are returned by
> > netlink and /proc/net/dev as 64-bit may still wrap themselves at 32-bit
> > intervals.
> 
> Really ?
> 
> Thats incredible you dont send a bug report then.

A bug report to the application author, not the kernel.

/proc/net/dev is an ASCII file, and nothing gives the width of a field.

Therefore, an application should cope with all cases (counter being a
32bit or 64bit integer), wrapping included.

Note that this has little to do with the application or kernel being
32/64 bit code.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 21:31             ` Ben Greear
@ 2012-06-27 21:35               ` Eric Dumazet
  2012-06-27 23:01                 ` Rick Jones
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-06-27 21:35 UTC (permalink / raw)
  To: Ben Greear
  Cc: Stephen Hemminger, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On Wed, 2012-06-27 at 14:31 -0700, Ben Greear wrote:

> For an example, see the VLAN code. rx-errors and tx-dropped are only 32-bit
> counters.  Now, in the real world, we wouldn't expect those counters to
> increase at high rates, but they are still 32-bit counters masquerading
> as 64, and they could wrap after a while, so any code that expected a wrap
> to mean a 64-bit wrap would be wrong.
> 
> At the time I was last complaining, there were lots more cases
> of this that I was fighting with, but I don't recall exactly what they
> were.  Once my user-space code got paranoid enough, it was able to
> at least mostly deal with 32 and 64 wraps.

Good, you now know how to deal correctly with these things.

Using 64bit fields for tx_dropped is what I call kernel bloat.

If only all drops were correctly accounted...

I believe 50% of drivers are buggy in this aspect.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 21:32             ` Eric Dumazet
@ 2012-06-27 21:40               ` Ben Greear
  2012-06-27 21:50                 ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Ben Greear @ 2012-06-27 21:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Stephen Hemminger, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On 06/27/2012 02:32 PM, Eric Dumazet wrote:
> On Wed, 2012-06-27 at 23:20 +0200, Eric Dumazet wrote:
>> On Wed, 2012-06-27 at 13:58 -0700, Ben Greear wrote:
>>
>>> It's worse than that:  Even on 64-bit kernels, counters that are returned by
>>> netlink and /proc/net/dev as 64-bit may still wrap themselves at 32-bit
>>> intervals.
>>
>> Really ?
>>
>> Thats incredible you dont send a bug report then.
>
> A bug report to the application author, not the kernel.

Yeah, application author was me.

> /proc/net/dev is an ASCII file, and nothing gives the width of a field.
>
> Therefore, an application should cope with all cases (counter being a
> 32bit or 64bit integer), wrapping included.
>
> Note that this has little to do with the application or kernel being
> 32/64 bit code.

Notice that the netlink stats are claiming 64-bit and they are not
(always) 64-bit.

That is a nice binary API that is still wrapping before its time
in many cases.  There may be good performance reasons for keeping
some counters at 32-bit, but it plays merry hell with anyone wanting
to optimize an application to poll less often for stats that are
supposedly 64-bit.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 21:40               ` Ben Greear
@ 2012-06-27 21:50                 ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-06-27 21:50 UTC (permalink / raw)
  To: Ben Greear
  Cc: Stephen Hemminger, Rick Jones, Tom Parkin, netdev, David.Laight,
	James Chapman

On Wed, 2012-06-27 at 14:40 -0700, Ben Greear wrote:

> Notice that the netlink stats are claiming 64-bit and they are not
> (always) 64-bit.


There is no such claim.

netlink provides a 64bit generic binary holder, even for legacy drivers
still using 'unsigned long' stats, so obviously 32bit values on 32bit
kernels.

> That is a nice binary API that is still wrapping before its time
> in many cases.  There may be good performance reasons for keeping
> some counters at 32-bit, but it plays merry hell with anyone wanting
> to optimize an application to poll less often for stats that are
> supposedly 64-bit.

We dont want hundred of patches to bring 64bit stats on legacy drivers. 

Nobody cares, because its way too late to try to 'fix' this.

If you want your application running on linux-2.6.x, or linux-3.0, you
are forced to correctly detect each counter being 32 or 64, not because
netlink API is 64bit, but because a driver provides a 32 or 64 bit
value.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 20:39     ` Eric Dumazet
  2012-06-27 20:50       ` Stephen Hemminger
  2012-06-27 21:00       ` Rick Jones
@ 2012-06-27 22:21       ` David Miller
  2 siblings, 0 replies; 21+ messages in thread
From: David Miller @ 2012-06-27 22:21 UTC (permalink / raw)
  To: eric.dumazet; +Cc: rick.jones2, tparkin, netdev, David.Laight, jchapman

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 27 Jun 2012 22:39:01 +0200

> In 2012 or 2013, 64bits kernels are the norm, and 32bit the exception.
> 
> Should we add complex code to l2tp just for being able to run it on
> 32bit kernels with 64bit stats ?
> 
> Considering this code is buggy at the v1 & v2, I am really wondering.
> 
> All sane SNMP applications are ready to cope with 32bits counters
> wrapping.
> 
> Machines that could wrap the 32bit counter several times per second are
> probably running on 64bit kernels.
> 
> Also percpu stats are overkill unless a device is really meant to be
> used in // by many cpus.

I agree with Eric on all counts.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 21:35               ` Eric Dumazet
@ 2012-06-27 23:01                 ` Rick Jones
  2012-06-27 23:09                   ` David Miller
  2012-06-28  5:00                   ` Eric Dumazet
  0 siblings, 2 replies; 21+ messages in thread
From: Rick Jones @ 2012-06-27 23:01 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Ben Greear, Stephen Hemminger, Tom Parkin, netdev, David.Laight,
	James Chapman

On 06/27/2012 02:35 PM, Eric Dumazet wrote:
> On Wed, 2012-06-27 at 14:31 -0700, Ben Greear wrote:
>
>> For an example, see the VLAN code. rx-errors and tx-dropped are only 32-bit
>> counters.  Now, in the real world, we wouldn't expect those counters to
>> increase at high rates, but they are still 32-bit counters masquerading
>> as 64, and they could wrap after a while, so any code that expected a wrap
>> to mean a 64-bit wrap would be wrong.
>>
>> At the time I was last complaining, there were lots more cases
>> of this that I was fighting with, but I don't recall exactly what they
>> were.  Once my user-space code got paranoid enough, it was able to
>> at least mostly deal with 32 and 64 wraps.
>
> Good, you now know how to deal correctly with these things.
>
> Using 64bit fields for tx_dropped is what I call kernel bloat.

Today, sure, generalizing to packet counters in general, that bloat is 
likely on its way.  At 100 Gbit/s Ethernet, that is upwards of 147 
million packets per second each way.  At 1 GbE it is 125 million octets 
per second.  So, if 32 bit octet counters were insufficient for 1 GbE, 
32 bit packet counters likely will be insufficient for 100GbE.

Or, I suppose, 3 or more bonded 40 GbEs or 10 or more bonded 10 GbEs 
(unlikely though that last one may be) assuming there is stats 
aggregation in the bond interface.

rick jones

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 23:01                 ` Rick Jones
@ 2012-06-27 23:09                   ` David Miller
  2012-06-27 23:39                     ` Rick Jones
  2012-06-28  5:00                   ` Eric Dumazet
  1 sibling, 1 reply; 21+ messages in thread
From: David Miller @ 2012-06-27 23:09 UTC (permalink / raw)
  To: rick.jones2
  Cc: eric.dumazet, greearb, shemminger, tparkin, netdev, David.Laight,
	jchapman

From: Rick Jones <rick.jones2@hp.com>
Date: Wed, 27 Jun 2012 16:01:23 -0700

> At 100 Gbit/s Ethernet, that is upwards of 147

Listing upcoming technologies shows that you miss Eric's point.

Nobody with a brain is going to drive those kinds of cards on boxes
running 32-bit kernels.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 23:09                   ` David Miller
@ 2012-06-27 23:39                     ` Rick Jones
  0 siblings, 0 replies; 21+ messages in thread
From: Rick Jones @ 2012-06-27 23:39 UTC (permalink / raw)
  To: David Miller
  Cc: eric.dumazet, greearb, shemminger, tparkin, netdev, David.Laight,
	jchapman

On 06/27/2012 04:09 PM, David Miller wrote:
> From: Rick Jones <rick.jones2@hp.com>
> Date: Wed, 27 Jun 2012 16:01:23 -0700
>
>> At 100 Gbit/s Ethernet, that is upwards of 147
>
> Listing upcoming technologies shows that you miss Eric's point.
>
> Nobody with a brain is going to drive those kinds of cards on boxes
> running 32-bit kernels.

Yes, I strayed from the context of 32-bit kernels.  I will go run iperf 
a couple times as penance :)

rick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-27 23:01                 ` Rick Jones
  2012-06-27 23:09                   ` David Miller
@ 2012-06-28  5:00                   ` Eric Dumazet
  2012-06-28  8:24                     ` Tom Parkin
  2012-06-28  8:46                     ` David Laight
  1 sibling, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-06-28  5:00 UTC (permalink / raw)
  To: Rick Jones
  Cc: Ben Greear, Stephen Hemminger, Tom Parkin, netdev, David.Laight,
	James Chapman

On Wed, 2012-06-27 at 16:01 -0700, Rick Jones wrote:

> Today, sure, generalizing to packet counters in general, that bloat is 
> likely on its way.  At 100 Gbit/s Ethernet, that is upwards of 147 
> million packets per second each way.  At 1 GbE it is 125 million octets 
> per second.  So, if 32 bit octet counters were insufficient for 1 GbE, 
> 32 bit packet counters likely will be insufficient for 100GbE.
> 
> Or, I suppose, 3 or more bonded 40 GbEs or 10 or more bonded 10 GbEs 
> (unlikely though that last one may be) assuming there is stats 
> aggregation in the bond interface.

Note that I am all for 64bit counters on 64bit kernels because they are
almost[1] free, since they fit in a machine word (unsigned long).

tx_dropped is the count of dropped _packets_.

If more than 32bits are needed, and someone must run this 100GbE on a
32bit machine of the last century, he really has a big problem.


[1] : LLTX drivers case 
   since ndo_start_xmit() can be run concurrently by many cpus, safely
updating an "unsigned long" requires additional hassle :

   1) Use of a spinlock to protect the update.
   2) Use atomic_long_t instead of "unsigned long"
   3) Use percpu data

3) is overkill for devices with light traffic, because it consumes lot
of RAM on machines with 2048 possible cpus, _and_ the reader must fold
the data of all possible values.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-28  5:00                   ` Eric Dumazet
@ 2012-06-28  8:24                     ` Tom Parkin
  2012-06-28  8:46                     ` David Laight
  1 sibling, 0 replies; 21+ messages in thread
From: Tom Parkin @ 2012-06-28  8:24 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Rick Jones, Ben Greear, Stephen Hemminger, netdev, David.Laight,
	James Chapman

[-- Attachment #1: Type: text/plain, Size: 807 bytes --]

On Thu, Jun 28, 2012 at 07:00:54AM +0200, Eric Dumazet wrote:
> [1] : LLTX drivers case 
>    since ndo_start_xmit() can be run concurrently by many cpus, safely
> updating an "unsigned long" requires additional hassle :
> 
>    1) Use of a spinlock to protect the update.
>    2) Use atomic_long_t instead of "unsigned long"
>    3) Use percpu data
> 
> 3) is overkill for devices with light traffic, because it consumes lot
> of RAM on machines with 2048 possible cpus, _and_ the reader must fold
> the data of all possible values.

Thanks Eric.

So am I right in thinking that a v3 patch which uses atomic_long_t for
the statistics would be the correct way forwards?

-- 
Tom Parkin
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-28  5:00                   ` Eric Dumazet
  2012-06-28  8:24                     ` Tom Parkin
@ 2012-06-28  8:46                     ` David Laight
  2012-06-28 18:17                       ` Ben Hutchings
  1 sibling, 1 reply; 21+ messages in thread
From: David Laight @ 2012-06-28  8:46 UTC (permalink / raw)
  To: Eric Dumazet, Rick Jones
  Cc: Ben Greear, Stephen Hemminger, Tom Parkin, netdev, James Chapman

 
> [1] : LLTX drivers case 
>    since ndo_start_xmit() can be run concurrently by many cpus, safely
> updating an "unsigned long" requires additional hassle :
> 
>    1) Use of a spinlock to protect the update.
>    2) Use atomic_long_t instead of "unsigned long"
>    3) Use percpu data

4) These are statistics so it shouldn't really matter if
   they are out by a small number.  So the errors
   introduced by unlocked updates can probably be ignored.
   So just use 'unsigned long'.

   Might be worth putting in gcc barriers so that the
   load and store instructions aren't separated too far.

	David

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2] l2tp: use per-cpu variables for u64_stats updates
  2012-06-28  8:46                     ` David Laight
@ 2012-06-28 18:17                       ` Ben Hutchings
  0 siblings, 0 replies; 21+ messages in thread
From: Ben Hutchings @ 2012-06-28 18:17 UTC (permalink / raw)
  To: David Laight
  Cc: Eric Dumazet, Rick Jones, Ben Greear, Stephen Hemminger,
	Tom Parkin, netdev, James Chapman

On Thu, 2012-06-28 at 09:46 +0100, David Laight wrote:
> > [1] : LLTX drivers case 
> >    since ndo_start_xmit() can be run concurrently by many cpus, safely
> > updating an "unsigned long" requires additional hassle :
> > 
> >    1) Use of a spinlock to protect the update.
> >    2) Use atomic_long_t instead of "unsigned long"
> >    3) Use percpu data
> 
> 4) These are statistics so it shouldn't really matter if
>    they are out by a small number.

You might be surprised just how much end users care about statistics.

>    So the errors
>    introduced by unlocked updates can probably be ignored.
>    So just use 'unsigned long'.

This can result in statistics being decremented in case of a race.  That
should never be allowed to happen.

Ben.

>    Might be worth putting in gcc barriers so that the
>    load and store instructions aren't separated too far.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-06-28 18:17 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-27 12:00 [PATCH v2] l2tp: use per-cpu variables for u64_stats updates Tom Parkin
2012-06-27 19:03 ` Eric Dumazet
2012-06-27 20:21   ` Rick Jones
2012-06-27 20:39     ` Eric Dumazet
2012-06-27 20:50       ` Stephen Hemminger
2012-06-27 20:58         ` Ben Greear
2012-06-27 21:20           ` Eric Dumazet
2012-06-27 21:31             ` Ben Greear
2012-06-27 21:35               ` Eric Dumazet
2012-06-27 23:01                 ` Rick Jones
2012-06-27 23:09                   ` David Miller
2012-06-27 23:39                     ` Rick Jones
2012-06-28  5:00                   ` Eric Dumazet
2012-06-28  8:24                     ` Tom Parkin
2012-06-28  8:46                     ` David Laight
2012-06-28 18:17                       ` Ben Hutchings
2012-06-27 21:32             ` Eric Dumazet
2012-06-27 21:40               ` Ben Greear
2012-06-27 21:50                 ` Eric Dumazet
2012-06-27 21:00       ` Rick Jones
2012-06-27 22:21       ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.