All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH ipsec-next 0/3] Add support for per cpu xfrm states.
@ 2024-04-12  6:05 Steffen Klassert
  2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-12  6:05 UTC (permalink / raw)
  To: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu
  Cc: Steffen Klassert

Add support for per cpu xfrm states.

This patchset implements the xfrm part of per cpu SAs as specified in:

https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/

Patch 1 adds the cpu as a lookup key and config option to to generate
acquire messages for each cpu.

Patch 2 caches outbound states at the policy.

Patch 3 caches inbound states on a new percpu state cache.

Please review and test.

Thanks!

----------------------------------------------------------------
Steffen Klassert (3):
      xfrm: Add support for per cpu xfrm state handling.
      xfrm: Cache used outbound xfrm states at the policy.
      xfrm: Add an inbound percpu state cache.

 include/net/netns/xfrm.h  |   1 +
 include/net/xfrm.h        |  13 +++-
 include/uapi/linux/xfrm.h |   2 +
 net/ipv4/esp4_offload.c   |   6 +-
 net/ipv6/esp6_offload.c   |   6 +-
 net/key/af_key.c          |   7 +-
 net/xfrm/xfrm_input.c     |   2 +-
 net/xfrm/xfrm_policy.c    |  12 ++++
 net/xfrm/xfrm_state.c     | 160 ++++++++++++++++++++++++++++++++++++++++++----
 net/xfrm/xfrm_user.c      |  43 ++++++++++++-
 10 files changed, 227 insertions(+), 25 deletions(-)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-12  6:05 [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Steffen Klassert
@ 2024-04-12  6:05 ` Steffen Klassert
  2024-04-12 10:37   ` Tobias Brunner
                     ` (2 more replies)
  2024-04-12  6:05 ` [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy Steffen Klassert
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-12  6:05 UTC (permalink / raw)
  To: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu
  Cc: Steffen Klassert

Currently all flows for a certain SA must be processed by the same
cpu to avoid packet reordering and lock contention of the xfrm
state lock.

To get rid of this limitation, the IETF is about to standardize
per cpu SAs. This patch implements the xfrm part of it:

https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/

This adds the cpu as a lookup key for xfrm states and a config option
to generate acquire messages for each cpu.

With that, we can have on each cpu a SA with identical traffic selector
so that flows can be processed in parallel on all cpu.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        |  5 ++--
 include/uapi/linux/xfrm.h |  2 ++
 net/key/af_key.c          |  7 +++---
 net/xfrm/xfrm_state.c     | 49 ++++++++++++++++++++++++++++++---------
 net/xfrm/xfrm_user.c      | 43 ++++++++++++++++++++++++++++++++--
 5 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 57c743b7e4fe..2ba4c077ccf9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -185,6 +185,7 @@ struct xfrm_state {
 	refcount_t		refcnt;
 	spinlock_t		lock;
 
+	u32			pcpu_num;
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
@@ -1639,7 +1640,7 @@ struct xfrmk_spdinfo {
 	u32 spdhmcnt;
 };
 
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
 int xfrm_state_delete(struct xfrm_state *x);
 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
@@ -1754,7 +1755,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
 		   struct netlink_ext_ack *extack);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-				 u8 mode, u32 reqid, u32 if_id, u8 proto,
+				 u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
 				 const xfrm_address_t *daddr,
 				 const xfrm_address_t *saddr, int create,
 				 unsigned short family);
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 6a77328be114..1703b6fc868e 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -315,6 +315,7 @@ enum xfrm_attr_type_t {
 	XFRMA_SET_MARK_MASK,	/* __u32 */
 	XFRMA_IF_ID,		/* __u32 */
 	XFRMA_MTIMER_THRESH,	/* __u32 in seconds for input SA */
+	XFRMA_SA_PCPU,		/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
@@ -430,6 +431,7 @@ struct xfrm_userpolicy_info {
 #define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
 	/* Automatically expand selector to include matching ICMP payloads. */
 #define XFRM_POLICY_ICMP	2
+#define XFRM_POLICY_CPU_ACQUIRE	4
 	__u8				share;
 };
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f79fb99271ed..b9a1eb3ff461 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	}
 
 	if (hdr->sadb_msg_seq) {
-		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, 0);
 		if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	}
 
 	if (!x)
-		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
+		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, 0, proto,
+				  xdaddr, xsaddr, 1, family);
 
 	if (x == NULL)
 		return -ENOENT;
@@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
 	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
 		return 0;
 
-	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, 0);
 	if (x == NULL)
 		return 0;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0c306473a79d..b41b5dd72d8e 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -677,6 +677,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 		x->lft.hard_packet_limit = XFRM_INF;
 		x->replay_maxage = 0;
 		x->replay_maxdiff = 0;
+		x->pcpu_num = UINT_MAX;
 		spin_lock_init(&x->lock);
 	}
 	return x;
@@ -972,6 +973,7 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = tmpl->encap_family;
+	x->type_offload = NULL;
 }
 
 static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
@@ -1096,6 +1098,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 			       struct xfrm_state **best, int *acq_in_progress,
 			       int *error)
 {
+	unsigned int pcpu_id = get_cpu();
+	put_cpu();
+
 	/* Resolution logic:
 	 * 1. There is a valid state with matching selector. Done.
 	 * 2. Valid state with inappropriate selector. Skip.
@@ -1115,13 +1120,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 							&fl->u.__fl_common))
 			return;
 
+		if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
+			return;
+
 		if (!*best ||
+		    ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
 		    (*best)->km.dying > x->km.dying ||
 		    ((*best)->km.dying == x->km.dying &&
 		     (*best)->curlft.add_time < x->curlft.add_time))
 			*best = x;
 	} else if (x->km.state == XFRM_STATE_ACQ) {
-		*acq_in_progress = 1;
+		if (!*best || (*best && x->pcpu_num == pcpu_id))
+			*acq_in_progress = 1;
 	} else if (x->km.state == XFRM_STATE_ERROR ||
 		   x->km.state == XFRM_STATE_EXPIRED) {
 		if ((!x->sel.family ||
@@ -1150,6 +1160,8 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	unsigned short encap_family = tmpl->encap_family;
 	unsigned int sequence;
 	struct km_event c;
+	unsigned int pcpu_id = get_cpu();
+	put_cpu();
 
 	to_put = NULL;
 
@@ -1223,7 +1235,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	}
 
 found:
-	x = best;
+	if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
+	    (best && (best->pcpu_num == pcpu_id)))
+		x = best;
+
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
 		    (x0 = __xfrm_state_lookup_all(net, mark, daddr,
@@ -1255,6 +1270,8 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 		x->if_id = if_id;
+		if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
+			x->pcpu_num = pcpu_id;
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
 		if (error) {
@@ -1333,6 +1350,9 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 			x = NULL;
 			error = -ESRCH;
 		}
+
+		if (best)
+			x = best;
 	}
 out:
 	if (x) {
@@ -1464,12 +1484,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	unsigned int h;
 	u32 mark = xnew->mark.v & xnew->mark.m;
 	u32 if_id = xnew->if_id;
+	u32 cpu_id = xnew->pcpu_num;
 
 	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
 	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
 		    x->if_id		== if_id &&
+		    x->pcpu_num		== cpu_id &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
 		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1492,7 +1514,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
 static struct xfrm_state *__find_acq_core(struct net *net,
 					  const struct xfrm_mark *m,
 					  unsigned short family, u8 mode,
-					  u32 reqid, u32 if_id, u8 proto,
+					  u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
 					  const xfrm_address_t *daddr,
 					  const xfrm_address_t *saddr,
 					  int create)
@@ -1509,6 +1531,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 		    x->id.spi       != 0 ||
 		    x->id.proto	    != proto ||
 		    (mark & x->mark.m) != x->mark.v ||
+		    x->pcpu_num != pcpu_num ||
 		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
 		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 			continue;
@@ -1542,6 +1565,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 			break;
 		}
 
+		x->pcpu_num = pcpu_num;
 		x->km.state = XFRM_STATE_ACQ;
 		x->id.proto = proto;
 		x->props.family = family;
@@ -1570,7 +1594,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 	return x;
 }
 
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
@@ -1596,7 +1620,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	}
 
 	if (use_spi && x->km.seq) {
-		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
+		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
 		if (x1 && ((x1->id.proto != x->id.proto) ||
 		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
 			to_put = x1;
@@ -1606,7 +1630,7 @@ int xfrm_state_add(struct xfrm_state *x)
 
 	if (use_spi && !x1)
 		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
-				     x->props.reqid, x->if_id, x->id.proto,
+				     x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
 	__xfrm_state_bump_genids(x);
@@ -1731,6 +1755,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	x->props.flags = orig->props.flags;
 	x->props.extra_flags = orig->props.extra_flags;
 
+	x->pcpu_num = orig->pcpu_num;
 	x->if_id = orig->if_id;
 	x->tfcpad = orig->tfcpad;
 	x->replay_maxdiff = orig->replay_maxdiff;
@@ -1999,13 +2024,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
-	      u32 if_id, u8 proto, const xfrm_address_t *daddr,
+	      u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
 	      const xfrm_address_t *saddr, int create, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
+	x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
+			    proto, daddr, saddr, create);
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 	return x;
@@ -2140,7 +2166,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
 
 /* Silly enough, but I'm lazy to build resolution list */
 
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
 {
 	unsigned int h = xfrm_seq_hash(net, seq);
 	struct xfrm_state *x;
@@ -2148,6 +2174,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
 	hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
 		if (x->km.seq == seq &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->pcpu_num == pcpu_num &&
 		    x->km.state == XFRM_STATE_ACQ) {
 			xfrm_state_hold(x);
 			return x;
@@ -2157,12 +2184,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
 	return NULL;
 }
 
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	x = __xfrm_find_acq_byseq(net, mark, seq);
+	x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	return x;
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 810b520493f3..97e659f0812e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -734,6 +734,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 	if (attrs[XFRMA_IF_ID])
 		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
 
+	if (attrs[XFRMA_SA_PCPU]) {
+		x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+		if (x->pcpu_num >= num_possible_cpus())
+			goto error;
+	}
+
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
 	if (err)
 		goto error;
@@ -1182,6 +1188,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 		if (ret)
 			goto out;
 	}
+	if (x->pcpu_num != UINT_MAX) {
+		ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+		if (ret)
+			goto out;
+	}
 	if (x->mapping_maxage)
 		ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
 out:
@@ -1579,6 +1590,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	u32 mark;
 	struct xfrm_mark m;
 	u32 if_id = 0;
+	u32 pcpu_num = UINT_MAX;
 
 	p = nlmsg_data(nlh);
 	err = verify_spi_info(p->info.id.proto, p->min, p->max, extack);
@@ -1595,8 +1607,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (attrs[XFRMA_IF_ID])
 		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
 
+	if (attrs[XFRMA_SA_PCPU]) {
+		pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+		if (pcpu_num >= num_possible_cpus()) {
+			err = -EINVAL;
+			goto out_noput;
+		}
+	}
+
 	if (p->info.seq) {
-		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
+		x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num);
 		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -1605,7 +1625,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (!x)
 		x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
-				  if_id, p->info.id.proto, daddr,
+				  if_id, pcpu_num, p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
 	err = -ENOENT;
@@ -2458,6 +2478,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 	err = xfrm_if_id_put(skb, x->if_id);
 	if (err)
 		goto out_cancel;
+	if (x->pcpu_num != UINT_MAX)
+		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -2722,6 +2744,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	xfrm_mark_get(attrs, &mark);
 
+	if (attrs[XFRMA_SA_PCPU]) {
+		x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+		err = -EINVAL;
+		if (x->pcpu_num >= num_possible_cpus())
+			goto free_state;
+	}
+
 	err = verify_newpolicy_info(&ua->policy, extack);
 	if (err)
 		goto free_state;
@@ -3049,6 +3078,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
 	[XFRMA_IF_ID]		= { .type = NLA_U32 },
 	[XFRMA_MTIMER_THRESH]   = { .type = NLA_U32 },
+	[XFRMA_SA_PCPU]		= { .type = NLA_U32 },
 };
 EXPORT_SYMBOL_GPL(xfrma_policy);
 
@@ -3216,6 +3246,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	err = xfrm_if_id_put(skb, x->if_id);
 	if (err)
 		return err;
+	if (x->pcpu_num != UINT_MAX) {
+		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+		if (err)
+			return err;
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -3317,6 +3352,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
 	}
 	if (x->if_id)
 		l += nla_total_size(sizeof(x->if_id));
+	if (x->pcpu_num)
+		l += nla_total_size(sizeof(x->pcpu_num));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
@@ -3453,6 +3490,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		err = xfrm_if_id_put(skb, xp->if_id);
 	if (!err && xp->xdo.dev)
 		err = copy_user_offload(&xp->xdo, skb);
+	if (!err && x->pcpu_num != UINT_MAX)
+		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy.
  2024-04-12  6:05 [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Steffen Klassert
  2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
@ 2024-04-12  6:05 ` Steffen Klassert
  2024-04-16 21:51   ` Daniel Xu
  2024-04-12  6:05 ` [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache Steffen Klassert
  2024-05-02 12:20 ` [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Antony Antony
  3 siblings, 1 reply; 16+ messages in thread
From: Steffen Klassert @ 2024-04-12  6:05 UTC (permalink / raw)
  To: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu
  Cc: Steffen Klassert

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we cache the used xfrm states at the policy for outbound
IPsec traffic.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  3 +++
 net/xfrm/xfrm_policy.c | 12 ++++++++++
 net/xfrm/xfrm_state.c  | 54 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2ba4c077ccf9..49c85bcd9fd9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -181,6 +181,7 @@ struct xfrm_state {
 	struct hlist_node	bysrc;
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
+	struct hlist_node	state_cache;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
@@ -524,6 +525,8 @@ struct xfrm_policy {
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
+	struct hlist_head	state_cache_list;
+
 	/* This lock only affects elements except for entry. */
 	rwlock_t		lock;
 	refcount_t		refcnt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6affe5cd85d8..6a7f1d40d5f6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -410,6 +410,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 	if (policy) {
 		write_pnet(&policy->xp_net, net);
 		INIT_LIST_HEAD(&policy->walk.all);
+		INIT_HLIST_HEAD(&policy->state_cache_list);
 		INIT_HLIST_NODE(&policy->bydst_inexact_list);
 		INIT_HLIST_NODE(&policy->bydst);
 		INIT_HLIST_NODE(&policy->byidx);
@@ -452,6 +453,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
 
 static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
+	struct net *net = xp_net(policy);
+	struct xfrm_state *x;
+
 	write_lock_bh(&policy->lock);
 	policy->walk.dead = 1;
 	write_unlock_bh(&policy->lock);
@@ -465,6 +469,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 	if (del_timer(&policy->timer))
 		xfrm_pol_put(policy);
 
+	/* XXX: Flush state cache */
+	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+		hlist_del_init_rcu(&x->state_cache);
+	}
+	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
 	xfrm_pol_put(policy);
 }
 
@@ -3253,6 +3264,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
 		dst_release(dst);
 		dst = dst_orig;
 	}
+
 ok:
 	xfrm_pols_put(pols, drop_pols);
 	if (dst && dst->xfrm &&
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b41b5dd72d8e..ff2b0fc0b206 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -663,6 +663,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 		refcount_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
 		INIT_LIST_HEAD(&x->km.all);
+		INIT_HLIST_NODE(&x->state_cache);
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
@@ -707,12 +708,15 @@ int __xfrm_state_delete(struct xfrm_state *x)
 
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
+
 		spin_lock(&net->xfrm.xfrm_state_lock);
 		list_del(&x->km.all);
 		hlist_del_rcu(&x->bydst);
 		hlist_del_rcu(&x->bysrc);
 		if (x->km.seq)
 			hlist_del_rcu(&x->byseq);
+		if (!hlist_unhashed(&x->state_cache))
+			hlist_del_rcu(&x->state_cache);
 		if (x->id.spi)
 			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
@@ -1160,6 +1164,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	unsigned short encap_family = tmpl->encap_family;
 	unsigned int sequence;
 	struct km_event c;
+	bool cached = false;
 	unsigned int pcpu_id = get_cpu();
 	put_cpu();
 
@@ -1168,6 +1173,45 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
 
 	rcu_read_lock();
+	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+		if (x->props.family == encap_family &&
+		    x->props.reqid == tmpl->reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto &&
+		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+			xfrm_state_look_at(pol, x, fl, encap_family,
+					   &best, &acquire_in_progress, &error);
+	}
+
+	if (best)
+		goto cached;
+
+	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+		if (x->props.family == encap_family &&
+		    x->props.reqid == tmpl->reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
+		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto &&
+		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+			xfrm_state_look_at(pol, x, fl, family,
+					   &best, &acquire_in_progress, &error);
+	}
+
+cached:
+	if (best)
+		goto found;
+	else if (error)
+		best = NULL;
+	else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+		WARN_ON(1);
+
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 #ifdef CONFIG_XFRM_OFFLOAD
@@ -1317,6 +1361,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 			XFRM_STATE_INSERT(bysrc, &x->bysrc,
 					  net->xfrm.state_bysrc + h,
 					  x->xso.type);
+			INIT_HLIST_NODE(&x->state_cache);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1363,6 +1408,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	} else {
 		*err = acquire_in_progress ? -EAGAIN : error;
 	}
+
+	if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+	    (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+		spin_lock_bh(&net->xfrm.xfrm_state_lock);
+		if (hlist_unhashed(&x->state_cache))
+			hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	}
+
 	rcu_read_unlock();
 	if (to_put)
 		xfrm_state_put(to_put);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache.
  2024-04-12  6:05 [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Steffen Klassert
  2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
  2024-04-12  6:05 ` [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy Steffen Klassert
@ 2024-04-12  6:05 ` Steffen Klassert
  2024-05-02 12:20 ` [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Antony Antony
  3 siblings, 0 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-12  6:05 UTC (permalink / raw)
  To: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu
  Cc: Steffen Klassert

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h |  1 +
 include/net/xfrm.h       |  5 ++++
 net/ipv4/esp4_offload.c  |  6 ++---
 net/ipv6/esp6_offload.c  |  6 ++---
 net/xfrm/xfrm_input.c    |  2 +-
 net/xfrm/xfrm_state.c    | 57 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 423b52eca908..177516be776d 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -43,6 +43,7 @@ struct netns_xfrm {
 	struct hlist_head	__rcu *state_bysrc;
 	struct hlist_head	__rcu *state_byspi;
 	struct hlist_head	__rcu *state_byseq;
+	struct hlist_head	__rcu __percpu *state_cache_input;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 49c85bcd9fd9..60c2b129e9e5 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -182,6 +182,7 @@ struct xfrm_state {
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
 	struct hlist_node	state_cache;
+	struct hlist_node	state_cache_input;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
@@ -1604,6 +1605,10 @@ int xfrm_state_update(struct xfrm_state *x);
 struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
 				     const xfrm_address_t *daddr, __be32 spi,
 				     u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+					   const xfrm_address_t *daddr,
+					   __be32 spi, u8 proto,
+					   unsigned short family);
 struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 					    const xfrm_address_t *daddr,
 					    const xfrm_address_t *saddr,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index b3271957ad9a..6ccb8d56ad2a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 		if (sp->len == XFRM_MAX_DEPTH)
 			goto out_reset;
 
-		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
-				      (xfrm_address_t *)&ip_hdr(skb)->daddr,
-				      spi, IPPROTO_ESP, AF_INET);
+		x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+					    (xfrm_address_t *)&ip_hdr(skb)->daddr,
+					    spi, IPPROTO_ESP, AF_INET);
 		if (!x)
 			goto out_reset;
 
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 527b7caddbc6..c82ed369e888 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 		if (sp->len == XFRM_MAX_DEPTH)
 			goto out_reset;
 
-		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
-				      (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
-				      spi, IPPROTO_ESP, AF_INET6);
+		x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+					    (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+					    spi, IPPROTO_ESP, AF_INET6);
 		if (!x)
 			goto out_reset;
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 161f535c8b94..82dba4673296 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -563,7 +563,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 		}
 
-		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+		x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
 		if (x == NULL) {
 			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ff2b0fc0b206..86f8dde23ff1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -717,6 +717,9 @@ int __xfrm_state_delete(struct xfrm_state *x)
 			hlist_del_rcu(&x->byseq);
 		if (!hlist_unhashed(&x->state_cache))
 			hlist_del_rcu(&x->state_cache);
+		if (!hlist_unhashed(&x->state_cache_input))
+			hlist_del_rcu(&x->state_cache_input);
+
 		if (x->id.spi)
 			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
@@ -1048,6 +1051,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 	return NULL;
 }
 
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+					   const xfrm_address_t *daddr,
+					   __be32 spi, u8 proto,
+					   unsigned short family)
+{
+	struct hlist_head *state_cache_input;
+	struct xfrm_state *x = NULL;
+	int cpu = get_cpu();
+
+	state_cache_input =  per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+		if (x->props.family != family ||
+		    x->id.spi       != spi ||
+		    x->id.proto     != proto ||
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
+			continue;
+
+		if ((mark & x->mark.m) != x->mark.v)
+			continue;
+		if (!xfrm_state_hold_rcu(x))
+			continue;
+		goto out;
+	}
+
+	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+
+	if (x && x->km.state == XFRM_STATE_VALID) {
+		spin_lock_bh(&net->xfrm.xfrm_state_lock);
+		if (hlist_unhashed(&x->state_cache_input)) {
+			hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+		} else {
+			hlist_del_rcu(&x->state_cache_input);
+			hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+		}
+		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	}
+
+out:
+	rcu_read_unlock();
+	put_cpu();
+	return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 						     const xfrm_address_t *daddr,
 						     const xfrm_address_t *saddr,
@@ -2987,6 +3036,11 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_byseq = xfrm_hash_alloc(sz);
 	if (!net->xfrm.state_byseq)
 		goto out_byseq;
+
+	net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+	if (!net->xfrm.state_cache_input)
+		goto out_state_cache_input;
+
 	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	net->xfrm.state_num = 0;
@@ -2996,6 +3050,8 @@ int __net_init xfrm_state_init(struct net *net)
 			       &net->xfrm.xfrm_state_lock);
 	return 0;
 
+out_state_cache_input:
+	xfrm_hash_free(net->xfrm.state_byseq, sz);
 out_byseq:
 	xfrm_hash_free(net->xfrm.state_byspi, sz);
 out_byspi:
@@ -3025,6 +3081,7 @@ void xfrm_state_fini(struct net *net)
 	xfrm_hash_free(net->xfrm.state_bysrc, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
 	xfrm_hash_free(net->xfrm.state_bydst, sz);
+	free_percpu(net->xfrm.state_cache_input);
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
@ 2024-04-12 10:37   ` Tobias Brunner
  2024-04-18  9:45     ` Steffen Klassert
  2024-04-14 11:05   ` Simon Horman
  2024-04-15 14:12   ` Sabrina Dubroca
  2 siblings, 1 reply; 16+ messages in thread
From: Tobias Brunner @ 2024-04-12 10:37 UTC (permalink / raw)
  To: Steffen Klassert, netdev, devel, Paul Wouters, Antony Antony, Daniel Xu

On 12.04.24 08:05, Steffen Klassert wrote:
> Currently all flows for a certain SA must be processed by the same
> cpu to avoid packet reordering and lock contention of the xfrm
> state lock.
> 
> To get rid of this limitation, the IETF is about to standardize
> per cpu SAs. This patch implements the xfrm part of it:
> 
> https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/
> 
> This adds the cpu as a lookup key for xfrm states and a config option
> to generate acquire messages for each cpu.
> 
> With that, we can have on each cpu a SA with identical traffic selector
> so that flows can be processed in parallel on all cpu.
> 
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> ---
>  include/net/xfrm.h        |  5 ++--
>  include/uapi/linux/xfrm.h |  2 ++
>  net/key/af_key.c          |  7 +++---
>  net/xfrm/xfrm_state.c     | 49 ++++++++++++++++++++++++++++++---------
>  net/xfrm/xfrm_user.c      | 43 ++++++++++++++++++++++++++++++++--
>  5 files changed, 88 insertions(+), 18 deletions(-)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 57c743b7e4fe..2ba4c077ccf9 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -185,6 +185,7 @@ struct xfrm_state {
>  	refcount_t		refcnt;
>  	spinlock_t		lock;
>  
> +	u32			pcpu_num;
>  	struct xfrm_id		id;
>  	struct xfrm_selector	sel;
>  	struct xfrm_mark	mark;
> @@ -1639,7 +1640,7 @@ struct xfrmk_spdinfo {
>  	u32 spdhmcnt;
>  };
>  
> -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
> +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
>  int xfrm_state_delete(struct xfrm_state *x);
>  int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
>  int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
> @@ -1754,7 +1755,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
>  int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
>  		   struct netlink_ext_ack *extack);
>  struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
> -				 u8 mode, u32 reqid, u32 if_id, u8 proto,
> +				 u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
>  				 const xfrm_address_t *daddr,
>  				 const xfrm_address_t *saddr, int create,
>  				 unsigned short family);
> diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
> index 6a77328be114..1703b6fc868e 100644
> --- a/include/uapi/linux/xfrm.h
> +++ b/include/uapi/linux/xfrm.h
> @@ -315,6 +315,7 @@ enum xfrm_attr_type_t {
>  	XFRMA_SET_MARK_MASK,	/* __u32 */
>  	XFRMA_IF_ID,		/* __u32 */
>  	XFRMA_MTIMER_THRESH,	/* __u32 in seconds for input SA */
> +	XFRMA_SA_PCPU,		/* __u32 */
>  	__XFRMA_MAX
>  
>  #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
> @@ -430,6 +431,7 @@ struct xfrm_userpolicy_info {
>  #define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
>  	/* Automatically expand selector to include matching ICMP payloads. */
>  #define XFRM_POLICY_ICMP	2
> +#define XFRM_POLICY_CPU_ACQUIRE	4
>  	__u8				share;
>  };
>  
> diff --git a/net/key/af_key.c b/net/key/af_key.c
> index f79fb99271ed..b9a1eb3ff461 100644
> --- a/net/key/af_key.c
> +++ b/net/key/af_key.c
> @@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
>  	}
>  
>  	if (hdr->sadb_msg_seq) {
> -		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
> +		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, 0);

Shouldn't this be UINT_MAX instead of 0?  (Not sure if it makes a
difference in practice, but it would be consistent with XFRM.)

>  		if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
>  			xfrm_state_put(x);
>  			x = NULL;
> @@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
>  	}
>  
>  	if (!x)
> -		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
> +		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, 0, proto,
> +				  xdaddr, xsaddr, 1, family);

Same as above.

>  
>  	if (x == NULL)
>  		return -ENOENT;
> @@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
>  	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
>  		return 0;
>  
> -	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
> +	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, 0);

Same as above.

>  	if (x == NULL)
>  		return 0;
>  
> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> index 0c306473a79d..b41b5dd72d8e 100644
> --- a/net/xfrm/xfrm_state.c
> +++ b/net/xfrm/xfrm_state.c
> @@ -677,6 +677,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
>  		x->lft.hard_packet_limit = XFRM_INF;
>  		x->replay_maxage = 0;
>  		x->replay_maxdiff = 0;
> +		x->pcpu_num = UINT_MAX;
>  		spin_lock_init(&x->lock);
>  	}
>  	return x;
> @@ -972,6 +973,7 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
>  	x->props.mode = tmpl->mode;
>  	x->props.reqid = tmpl->reqid;
>  	x->props.family = tmpl->encap_family;
> +	x->type_offload = NULL;

This seems unrelated.  And is it actually necessary?  xfrm_state_alloc()
uses kmem_cache_zalloc(), so this should be NULL anyway.

>  }
>  
>  static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
> @@ -1096,6 +1098,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
>  			       struct xfrm_state **best, int *acq_in_progress,
>  			       int *error)
>  {
> +	unsigned int pcpu_id = get_cpu();
> +	put_cpu();
> +
>  	/* Resolution logic:
>  	 * 1. There is a valid state with matching selector. Done.
>  	 * 2. Valid state with inappropriate selector. Skip.
> @@ -1115,13 +1120,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
>  							&fl->u.__fl_common))
>  			return;
>  
> +		if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
> +			return;
> +
>  		if (!*best ||
> +		    ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
>  		    (*best)->km.dying > x->km.dying ||
>  		    ((*best)->km.dying == x->km.dying &&
>  		     (*best)->curlft.add_time < x->curlft.add_time))
>  			*best = x;
>  	} else if (x->km.state == XFRM_STATE_ACQ) {
> -		*acq_in_progress = 1;
> +		if (!*best || (*best && x->pcpu_num == pcpu_id))
> +			*acq_in_progress = 1;
>  	} else if (x->km.state == XFRM_STATE_ERROR ||
>  		   x->km.state == XFRM_STATE_EXPIRED) {
>  		if ((!x->sel.family ||
> @@ -1150,6 +1160,8 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  	unsigned short encap_family = tmpl->encap_family;
>  	unsigned int sequence;
>  	struct km_event c;
> +	unsigned int pcpu_id = get_cpu();
> +	put_cpu();
>  
>  	to_put = NULL;
>  
> @@ -1223,7 +1235,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  	}
>  
>  found:
> -	x = best;
> +	if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
> +	    (best && (best->pcpu_num == pcpu_id)))
> +		x = best;
> +
>  	if (!x && !error && !acquire_in_progress) {
>  		if (tmpl->id.spi &&
>  		    (x0 = __xfrm_state_lookup_all(net, mark, daddr,
> @@ -1255,6 +1270,8 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
>  		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
>  		x->if_id = if_id;
> +		if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
> +			x->pcpu_num = pcpu_id;
>  
>  		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
>  		if (error) {
> @@ -1333,6 +1350,9 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  			x = NULL;
>  			error = -ESRCH;
>  		}
> +
> +		if (best)
> +			x = best;

Since `x` could be assigned to a (more specific in terms of CPU ID)
state in state XFRM_STATE_ACQ at this point, it might warrant a comment
that clarifies why it is unconditionally overridden with `best`.  That
is, so this other matching "fallback" SA is used for this packet while
the CPU-specific acquire is handled, which might not be immedialy
obvious when reading the code.

>  	}
>  out:
>  	if (x) {
> @@ -1464,12 +1484,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
>  	unsigned int h;
>  	u32 mark = xnew->mark.v & xnew->mark.m;
>  	u32 if_id = xnew->if_id;
> +	u32 cpu_id = xnew->pcpu_num;
>  
>  	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
>  	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
>  		if (x->props.family	== family &&
>  		    x->props.reqid	== reqid &&
>  		    x->if_id		== if_id &&
> +		    x->pcpu_num		== cpu_id &&
>  		    (mark & x->mark.m) == x->mark.v &&
>  		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
>  		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
> @@ -1492,7 +1514,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
>  static struct xfrm_state *__find_acq_core(struct net *net,
>  					  const struct xfrm_mark *m,
>  					  unsigned short family, u8 mode,
> -					  u32 reqid, u32 if_id, u8 proto,
> +					  u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
>  					  const xfrm_address_t *daddr,
>  					  const xfrm_address_t *saddr,
>  					  int create)
> @@ -1509,6 +1531,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
>  		    x->id.spi       != 0 ||
>  		    x->id.proto	    != proto ||
>  		    (mark & x->mark.m) != x->mark.v ||
> +		    x->pcpu_num != pcpu_num ||
>  		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
>  		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
>  			continue;
> @@ -1542,6 +1565,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
>  			break;
>  		}
>  
> +		x->pcpu_num = pcpu_num;
>  		x->km.state = XFRM_STATE_ACQ;
>  		x->id.proto = proto;
>  		x->props.family = family;
> @@ -1570,7 +1594,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
>  	return x;
>  }
>  
> -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
> +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
>  
>  int xfrm_state_add(struct xfrm_state *x)
>  {
> @@ -1596,7 +1620,7 @@ int xfrm_state_add(struct xfrm_state *x)
>  	}
>  
>  	if (use_spi && x->km.seq) {
> -		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
> +		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
>  		if (x1 && ((x1->id.proto != x->id.proto) ||
>  		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
>  			to_put = x1;
> @@ -1606,7 +1630,7 @@ int xfrm_state_add(struct xfrm_state *x)
>  
>  	if (use_spi && !x1)
>  		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
> -				     x->props.reqid, x->if_id, x->id.proto,
> +				     x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
>  				     &x->id.daddr, &x->props.saddr, 0);
>  
>  	__xfrm_state_bump_genids(x);
> @@ -1731,6 +1755,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
>  	x->props.flags = orig->props.flags;
>  	x->props.extra_flags = orig->props.extra_flags;
>  
> +	x->pcpu_num = orig->pcpu_num;
>  	x->if_id = orig->if_id;
>  	x->tfcpad = orig->tfcpad;
>  	x->replay_maxdiff = orig->replay_maxdiff;
> @@ -1999,13 +2024,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
>  
>  struct xfrm_state *
>  xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
> -	      u32 if_id, u8 proto, const xfrm_address_t *daddr,
> +	      u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
>  	      const xfrm_address_t *saddr, int create, unsigned short family)
>  {
>  	struct xfrm_state *x;
>  
>  	spin_lock_bh(&net->xfrm.xfrm_state_lock);
> -	x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
> +	x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
> +			    proto, daddr, saddr, create);
>  	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
>  
>  	return x;
> @@ -2140,7 +2166,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
>  
>  /* Silly enough, but I'm lazy to build resolution list */
>  
> -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
> +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
>  {
>  	unsigned int h = xfrm_seq_hash(net, seq);
>  	struct xfrm_state *x;
> @@ -2148,6 +2174,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
>  	hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
>  		if (x->km.seq == seq &&
>  		    (mark & x->mark.m) == x->mark.v &&
> +		    x->pcpu_num == pcpu_num &&
>  		    x->km.state == XFRM_STATE_ACQ) {
>  			xfrm_state_hold(x);
>  			return x;
> @@ -2157,12 +2184,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
>  	return NULL;
>  }
>  
> -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
> +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
>  {
>  	struct xfrm_state *x;
>  
>  	spin_lock_bh(&net->xfrm.xfrm_state_lock);
> -	x = __xfrm_find_acq_byseq(net, mark, seq);
> +	x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
>  	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
>  	return x;
>  }
> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
> index 810b520493f3..97e659f0812e 100644
> --- a/net/xfrm/xfrm_user.c
> +++ b/net/xfrm/xfrm_user.c
> @@ -734,6 +734,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
>  	if (attrs[XFRMA_IF_ID])
>  		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
>  
> +	if (attrs[XFRMA_SA_PCPU]) {
> +		x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
> +		if (x->pcpu_num >= num_possible_cpus())
> +			goto error;
> +	}
> +
>  	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
>  	if (err)
>  		goto error;
> @@ -1182,6 +1188,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
>  		if (ret)
>  			goto out;
>  	}
> +	if (x->pcpu_num != UINT_MAX) {
> +		ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
> +		if (ret)
> +			goto out;
> +	}
>  	if (x->mapping_maxage)
>  		ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
>  out:
> @@ -1579,6 +1590,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
>  	u32 mark;
>  	struct xfrm_mark m;
>  	u32 if_id = 0;
> +	u32 pcpu_num = UINT_MAX;
>  
>  	p = nlmsg_data(nlh);
>  	err = verify_spi_info(p->info.id.proto, p->min, p->max, extack);
> @@ -1595,8 +1607,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
>  	if (attrs[XFRMA_IF_ID])
>  		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
>  
> +	if (attrs[XFRMA_SA_PCPU]) {
> +		pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
> +		if (pcpu_num >= num_possible_cpus()) {
> +			err = -EINVAL;
> +			goto out_noput;
> +		}
> +	}
> +
>  	if (p->info.seq) {
> -		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
> +		x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num);
>  		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
>  			xfrm_state_put(x);
>  			x = NULL;
> @@ -1605,7 +1625,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
>  
>  	if (!x)
>  		x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
> -				  if_id, p->info.id.proto, daddr,
> +				  if_id, pcpu_num, p->info.id.proto, daddr,
>  				  &p->info.saddr, 1,
>  				  family);
>  	err = -ENOENT;
> @@ -2458,6 +2478,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
>  	err = xfrm_if_id_put(skb, x->if_id);
>  	if (err)
>  		goto out_cancel;
> +	if (x->pcpu_num != UINT_MAX)
> +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
>  
>  	nlmsg_end(skb, nlh);
>  	return 0;
> @@ -2722,6 +2744,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
>  
>  	xfrm_mark_get(attrs, &mark);
>  
> +	if (attrs[XFRMA_SA_PCPU]) {
> +		x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
> +		err = -EINVAL;
> +		if (x->pcpu_num >= num_possible_cpus())
> +			goto free_state;
> +	}
> +
>  	err = verify_newpolicy_info(&ua->policy, extack);
>  	if (err)
>  		goto free_state;
> @@ -3049,6 +3078,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
>  	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
>  	[XFRMA_IF_ID]		= { .type = NLA_U32 },
>  	[XFRMA_MTIMER_THRESH]   = { .type = NLA_U32 },
> +	[XFRMA_SA_PCPU]		= { .type = NLA_U32 },
>  };
>  EXPORT_SYMBOL_GPL(xfrma_policy);
>  
> @@ -3216,6 +3246,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
>  	err = xfrm_if_id_put(skb, x->if_id);
>  	if (err)
>  		return err;
> +	if (x->pcpu_num != UINT_MAX) {
> +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
> +		if (err)
> +			return err;
> +	}
>  
>  	nlmsg_end(skb, nlh);
>  	return 0;
> @@ -3317,6 +3352,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
>  	}
>  	if (x->if_id)
>  		l += nla_total_size(sizeof(x->if_id));
> +	if (x->pcpu_num)
> +		l += nla_total_size(sizeof(x->pcpu_num));
>  
>  	/* Must count x->lastused as it may become non-zero behind our back. */
>  	l += nla_total_size_64bit(sizeof(u64));
> @@ -3453,6 +3490,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
>  		err = xfrm_if_id_put(skb, xp->if_id);
>  	if (!err && xp->xdo.dev)
>  		err = copy_user_offload(&xp->xdo, skb);
> +	if (!err && x->pcpu_num != UINT_MAX)
> +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
>  	if (err) {
>  		nlmsg_cancel(skb, nlh);
>  		return err;


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache.
  2024-04-12  6:05 ` [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache Steffen Klassert
@ 2024-04-16  2:33 ` kernel test robot
  -1 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2024-04-13  0:36 UTC (permalink / raw)
  To: oe-kbuild; +Cc: lkp

:::::: 
:::::: Manual check reason: "low confidence static check warning: include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]"
:::::: 

BCC: lkp@intel.com
CC: oe-kbuild-all@lists.linux.dev
In-Reply-To: <20240412060553.3483630-4-steffen.klassert@secunet.com>
References: <20240412060553.3483630-4-steffen.klassert@secunet.com>
TO: Steffen Klassert <steffen.klassert@secunet.com>
TO: netdev@vger.kernel.org
TO: devel@linux-ipsec.org
TO: Paul Wouters <paul@nohats.ca>
TO: Antony Antony <antony.antony@secunet.com>
TO: Tobias Brunner <tobias@strongswan.org>
TO: Daniel Xu <dxu@dxuuu.xyz>
CC: Steffen Klassert <steffen.klassert@secunet.com>

Hi Steffen,

kernel test robot noticed the following build warnings:

[auto build test WARNING on klassert-ipsec-next/master]
[also build test WARNING on klassert-ipsec/master net/main net-next/main linus/master v6.9-rc3 next-20240412]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Steffen-Klassert/xfrm-Add-support-for-per-cpu-xfrm-state-handling/20240412-140746
base:   https://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master
patch link:    https://lore.kernel.org/r/20240412060553.3483630-4-steffen.klassert%40secunet.com
patch subject: [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache.
:::::: branch date: 18 hours ago
:::::: commit date: 18 hours ago
config: i386-randconfig-061-20240413 (https://download.01.org/0day-ci/archive/20240413/202404130802.rDxN3ijD-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240413/202404130802.rDxN3ijD-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/r/202404130802.rDxN3ijD-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   drivers/net/ethernet/altera/altera_tse_ethtool.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_msgdma.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_utils.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/if_vlan.h, ...):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_sgdma.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/if_vlan.h, ...):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_tse_main.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/etherdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu

vim +46 include/net/netns/xfrm.h

880a6fab8f6ba5b Christophe Gouault 2014-08-29  31  
d62ddc21b674b5a Alexey Dobriyan    2008-11-25  32  struct netns_xfrm {
9d4139c76905833 Alexey Dobriyan    2008-11-25  33  	struct list_head	state_all;
73d189dce486cd6 Alexey Dobriyan    2008-11-25  34  	/*
73d189dce486cd6 Alexey Dobriyan    2008-11-25  35  	 * Hash table to find appropriate SA towards given target (endpoint of
73d189dce486cd6 Alexey Dobriyan    2008-11-25  36  	 * tunnel or destination of transport mode) allowed by selector.
73d189dce486cd6 Alexey Dobriyan    2008-11-25  37  	 *
73d189dce486cd6 Alexey Dobriyan    2008-11-25  38  	 * Main use is finding SA after policy selected tunnel or transport
73d189dce486cd6 Alexey Dobriyan    2008-11-25  39  	 * mode. Also, it can be used by ah/esp icmp error handler to find
73d189dce486cd6 Alexey Dobriyan    2008-11-25  40  	 * offending SA.
73d189dce486cd6 Alexey Dobriyan    2008-11-25  41  	 */
d737a5805581c6f Florian Westphal   2016-08-09  42  	struct hlist_head	__rcu *state_bydst;
d737a5805581c6f Florian Westphal   2016-08-09  43  	struct hlist_head	__rcu *state_bysrc;
d737a5805581c6f Florian Westphal   2016-08-09  44  	struct hlist_head	__rcu *state_byspi;
fe9f1d8779cb470 Sabrina Dubroca    2021-04-25  45  	struct hlist_head	__rcu *state_byseq;
042bf7320e286f6 Steffen Klassert   2024-04-12 @46  	struct hlist_head	__rcu __percpu *state_cache_input;
529983ecabeae3d Alexey Dobriyan    2008-11-25  47  	unsigned int		state_hmask;
0bf7c5b019518d3 Alexey Dobriyan    2008-11-25  48  	unsigned int		state_num;
630827338585022 Alexey Dobriyan    2008-11-25  49  	struct work_struct	state_hash_work;
50a30657fd7ee77 Alexey Dobriyan    2008-11-25  50  
adfcf0b27e87d16 Alexey Dobriyan    2008-11-25  51  	struct list_head	policy_all;
93b851c1c93c7d5 Alexey Dobriyan    2008-11-25  52  	struct hlist_head	*policy_byidx;
8100bea7d619e84 Alexey Dobriyan    2008-11-25  53  	unsigned int		policy_idx_hmask;
3e4bc23926b83c3 Eric Dumazet       2023-09-08  54  	unsigned int		idx_generator;
53c2e285f970300 Herbert Xu         2014-11-13  55  	struct hlist_head	policy_inexact[XFRM_POLICY_MAX];
53c2e285f970300 Herbert Xu         2014-11-13  56  	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX];
dc2caba7b321289 Alexey Dobriyan    2008-11-25  57  	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
66caf628c3b634c Alexey Dobriyan    2008-11-25  58  	struct work_struct	policy_hash_work;
880a6fab8f6ba5b Christophe Gouault 2014-08-29  59  	struct xfrm_policy_hthresh policy_hthresh;
24969facd704a5f Florian Westphal   2018-11-07  60  	struct list_head	inexact_bins;
a6483b790f8efcd Alexey Dobriyan    2008-11-25  61  
d7c7544c3d5f590 Alexey Dobriyan    2010-01-24  62  
a6483b790f8efcd Alexey Dobriyan    2008-11-25  63  	struct sock		*nlsk;
d79d792ef9f99cc Eric W. Biederman  2009-12-03  64  	struct sock		*nlsk_stash;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  65  
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  66  	u32			sysctl_aevent_etime;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  67  	u32			sysctl_aevent_rseqth;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  68  	int			sysctl_larval_drop;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  69  	u32			sysctl_acq_expires;
2d151d39073aff4 Steffen Klassert   2021-07-18  70  
b58b1f563ab7895 Nicolas Dichtel    2022-03-14  71  	u8			policy_default[XFRM_POLICY_MAX];
2d151d39073aff4 Steffen Klassert   2021-07-18  72  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
  2024-04-12 10:37   ` Tobias Brunner
@ 2024-04-14 11:05   ` Simon Horman
  2024-04-18  9:46     ` Steffen Klassert
  2024-04-15 14:12   ` Sabrina Dubroca
  2 siblings, 1 reply; 16+ messages in thread
From: Simon Horman @ 2024-04-14 11:05 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu

On Fri, Apr 12, 2024 at 08:05:51AM +0200, Steffen Klassert wrote:

...

> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c

...

> @@ -1115,13 +1120,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
>  							&fl->u.__fl_common))
>  			return;
>  
> +		if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
> +			return;
> +
>  		if (!*best ||
> +		    ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
>  		    (*best)->km.dying > x->km.dying ||
>  		    ((*best)->km.dying == x->km.dying &&
>  		     (*best)->curlft.add_time < x->curlft.add_time))
>  			*best = x;
>  	} else if (x->km.state == XFRM_STATE_ACQ) {
> -		*acq_in_progress = 1;
> +		if (!*best || (*best && x->pcpu_num == pcpu_id))

Hi Steffen,

a minor nit from my side: I think this can be expressed as follows.

		if (!*best || x->pcpu_num == pcpu_id)

Flagged by Coccinelle

> +			*acq_in_progress = 1;
>  	} else if (x->km.state == XFRM_STATE_ERROR ||
>  		   x->km.state == XFRM_STATE_EXPIRED) {
>  		if ((!x->sel.family ||

...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
  2024-04-12 10:37   ` Tobias Brunner
  2024-04-14 11:05   ` Simon Horman
@ 2024-04-15 14:12   ` Sabrina Dubroca
  2024-04-18  9:50     ` Steffen Klassert
  2 siblings, 1 reply; 16+ messages in thread
From: Sabrina Dubroca @ 2024-04-15 14:12 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu

2024-04-12, 08:05:51 +0200, Steffen Klassert wrote:
> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> index 0c306473a79d..b41b5dd72d8e 100644
> --- a/net/xfrm/xfrm_state.c
> +++ b/net/xfrm/xfrm_state.c
[...]
> @@ -1096,6 +1098,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
>  			       struct xfrm_state **best, int *acq_in_progress,
>  			       int *error)
>  {
> +	unsigned int pcpu_id = get_cpu();
> +	put_cpu();

That looks really strange to me. Is it safe? If it is, I guess you
could just use smp_processor_id(), since you don't get anything out of
the extra preempt_disable/enable pair.

(same in xfrm_state_find)


[...]
> @@ -2458,6 +2478,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
>  	err = xfrm_if_id_put(skb, x->if_id);
>  	if (err)
>  		goto out_cancel;
> +	if (x->pcpu_num != UINT_MAX)
> +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);

Missing the corresponding change to xfrm_aevent_msgsize?


[...]
> @@ -3049,6 +3078,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
>  	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
>  	[XFRMA_IF_ID]		= { .type = NLA_U32 },
>  	[XFRMA_MTIMER_THRESH]   = { .type = NLA_U32 },
> +	[XFRMA_SA_PCPU]		= { .type = NLA_U32 },

What about xfrm_compat? Don't we need to add XFRMA_SA_PCPU to
compat_policy, and then some changes to the translators?


[...]
> @@ -3216,6 +3246,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
>  	err = xfrm_if_id_put(skb, x->if_id);
>  	if (err)
>  		return err;
> +	if (x->pcpu_num != UINT_MAX) {
> +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);

Missing the corresponding change to xfrm_expire_msgsize?


[...]
> @@ -3453,6 +3490,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
>  		err = xfrm_if_id_put(skb, xp->if_id);
>  	if (!err && xp->xdo.dev)
>  		err = copy_user_offload(&xp->xdo, skb);
> +	if (!err && x->pcpu_num != UINT_MAX)
> +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);

Missing the corresponding change to xfrm_acquire_msgsize?

-- 
Sabrina


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache.
@ 2024-04-16  2:33 ` kernel test robot
  0 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2024-04-16  2:33 UTC (permalink / raw)
  To: Steffen Klassert, netdev, devel, Paul Wouters, Antony Antony,
	Tobias Brunner, Daniel Xu
  Cc: oe-kbuild-all, Steffen Klassert

Hi Steffen,

kernel test robot noticed the following build warnings:

[auto build test WARNING on klassert-ipsec-next/master]
[also build test WARNING on klassert-ipsec/master net/main net-next/main linus/master v6.9-rc3 next-20240412]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Steffen-Klassert/xfrm-Add-support-for-per-cpu-xfrm-state-handling/20240412-140746
base:   https://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master
patch link:    https://lore.kernel.org/r/20240412060553.3483630-4-steffen.klassert%40secunet.com
patch subject: [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache.
config: i386-randconfig-061-20240413 (https://download.01.org/0day-ci/archive/20240413/202404130802.rDxN3ijD-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240413/202404130802.rDxN3ijD-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <yujie.liu@intel.com>
| Closes: https://lore.kernel.org/r/202404130802.rDxN3ijD-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   drivers/net/ethernet/altera/altera_tse_ethtool.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_msgdma.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_utils.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/if_vlan.h, ...):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_sgdma.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/if_vlan.h, ...):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_tse_main.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/etherdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu

vim +46 include/net/netns/xfrm.h

880a6fab8f6ba5b Christophe Gouault 2014-08-29  31  
d62ddc21b674b5a Alexey Dobriyan    2008-11-25  32  struct netns_xfrm {
9d4139c76905833 Alexey Dobriyan    2008-11-25  33  	struct list_head	state_all;
73d189dce486cd6 Alexey Dobriyan    2008-11-25  34  	/*
73d189dce486cd6 Alexey Dobriyan    2008-11-25  35  	 * Hash table to find appropriate SA towards given target (endpoint of
73d189dce486cd6 Alexey Dobriyan    2008-11-25  36  	 * tunnel or destination of transport mode) allowed by selector.
73d189dce486cd6 Alexey Dobriyan    2008-11-25  37  	 *
73d189dce486cd6 Alexey Dobriyan    2008-11-25  38  	 * Main use is finding SA after policy selected tunnel or transport
73d189dce486cd6 Alexey Dobriyan    2008-11-25  39  	 * mode. Also, it can be used by ah/esp icmp error handler to find
73d189dce486cd6 Alexey Dobriyan    2008-11-25  40  	 * offending SA.
73d189dce486cd6 Alexey Dobriyan    2008-11-25  41  	 */
d737a5805581c6f Florian Westphal   2016-08-09  42  	struct hlist_head	__rcu *state_bydst;
d737a5805581c6f Florian Westphal   2016-08-09  43  	struct hlist_head	__rcu *state_bysrc;
d737a5805581c6f Florian Westphal   2016-08-09  44  	struct hlist_head	__rcu *state_byspi;
fe9f1d8779cb470 Sabrina Dubroca    2021-04-25  45  	struct hlist_head	__rcu *state_byseq;
042bf7320e286f6 Steffen Klassert   2024-04-12 @46  	struct hlist_head	__rcu __percpu *state_cache_input;
529983ecabeae3d Alexey Dobriyan    2008-11-25  47  	unsigned int		state_hmask;
0bf7c5b019518d3 Alexey Dobriyan    2008-11-25  48  	unsigned int		state_num;
630827338585022 Alexey Dobriyan    2008-11-25  49  	struct work_struct	state_hash_work;
50a30657fd7ee77 Alexey Dobriyan    2008-11-25  50  
adfcf0b27e87d16 Alexey Dobriyan    2008-11-25  51  	struct list_head	policy_all;
93b851c1c93c7d5 Alexey Dobriyan    2008-11-25  52  	struct hlist_head	*policy_byidx;
8100bea7d619e84 Alexey Dobriyan    2008-11-25  53  	unsigned int		policy_idx_hmask;
3e4bc23926b83c3 Eric Dumazet       2023-09-08  54  	unsigned int		idx_generator;
53c2e285f970300 Herbert Xu         2014-11-13  55  	struct hlist_head	policy_inexact[XFRM_POLICY_MAX];
53c2e285f970300 Herbert Xu         2014-11-13  56  	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX];
dc2caba7b321289 Alexey Dobriyan    2008-11-25  57  	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
66caf628c3b634c Alexey Dobriyan    2008-11-25  58  	struct work_struct	policy_hash_work;
880a6fab8f6ba5b Christophe Gouault 2014-08-29  59  	struct xfrm_policy_hthresh policy_hthresh;
24969facd704a5f Florian Westphal   2018-11-07  60  	struct list_head	inexact_bins;
a6483b790f8efcd Alexey Dobriyan    2008-11-25  61  
d7c7544c3d5f590 Alexey Dobriyan    2010-01-24  62  
a6483b790f8efcd Alexey Dobriyan    2008-11-25  63  	struct sock		*nlsk;
d79d792ef9f99cc Eric W. Biederman  2009-12-03  64  	struct sock		*nlsk_stash;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  65  
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  66  	u32			sysctl_aevent_etime;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  67  	u32			sysctl_aevent_rseqth;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  68  	int			sysctl_larval_drop;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  69  	u32			sysctl_acq_expires;
2d151d39073aff4 Steffen Klassert   2021-07-18  70  
b58b1f563ab7895 Nicolas Dichtel    2022-03-14  71  	u8			policy_default[XFRM_POLICY_MAX];
2d151d39073aff4 Steffen Klassert   2021-07-18  72  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy.
  2024-04-12  6:05 ` [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy Steffen Klassert
@ 2024-04-16 21:51   ` Daniel Xu
  2024-04-18 10:01     ` Steffen Klassert
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Xu @ 2024-04-16 21:51 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner

Hi Steffen,

On Fri, Apr 12, 2024 at 08:05:52AM +0200, Steffen Klassert wrote:
> Now that we can have percpu xfrm states, the number of active
> states might increase. To get a better lookup performance,
> we cache the used xfrm states at the policy for outbound
> IPsec traffic.
> 
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> ---
>  include/net/xfrm.h     |  3 +++
>  net/xfrm/xfrm_policy.c | 12 ++++++++++
>  net/xfrm/xfrm_state.c  | 54 ++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 69 insertions(+)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 2ba4c077ccf9..49c85bcd9fd9 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -181,6 +181,7 @@ struct xfrm_state {
>  	struct hlist_node	bysrc;
>  	struct hlist_node	byspi;
>  	struct hlist_node	byseq;
> +	struct hlist_node	state_cache;
>  
>  	refcount_t		refcnt;
>  	spinlock_t		lock;
> @@ -524,6 +525,8 @@ struct xfrm_policy {
>  	struct hlist_node	bydst;
>  	struct hlist_node	byidx;
>  
> +	struct hlist_head	state_cache_list;
> +
>  	/* This lock only affects elements except for entry. */
>  	rwlock_t		lock;
>  	refcount_t		refcnt;
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index 6affe5cd85d8..6a7f1d40d5f6 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -410,6 +410,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
>  	if (policy) {
>  		write_pnet(&policy->xp_net, net);
>  		INIT_LIST_HEAD(&policy->walk.all);
> +		INIT_HLIST_HEAD(&policy->state_cache_list);
>  		INIT_HLIST_NODE(&policy->bydst_inexact_list);
>  		INIT_HLIST_NODE(&policy->bydst);
>  		INIT_HLIST_NODE(&policy->byidx);
> @@ -452,6 +453,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
>  
>  static void xfrm_policy_kill(struct xfrm_policy *policy)
>  {
> +	struct net *net = xp_net(policy);
> +	struct xfrm_state *x;
> +
>  	write_lock_bh(&policy->lock);
>  	policy->walk.dead = 1;
>  	write_unlock_bh(&policy->lock);
> @@ -465,6 +469,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
>  	if (del_timer(&policy->timer))
>  		xfrm_pol_put(policy);
>  
> +	/* XXX: Flush state cache */
> +	spin_lock_bh(&net->xfrm.xfrm_state_lock);
> +	hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
> +		hlist_del_init_rcu(&x->state_cache);
> +	}
> +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
> +
>  	xfrm_pol_put(policy);
>  }
>  
> @@ -3253,6 +3264,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
>  		dst_release(dst);
>  		dst = dst_orig;
>  	}
> +
>  ok:
>  	xfrm_pols_put(pols, drop_pols);
>  	if (dst && dst->xfrm &&
> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> index b41b5dd72d8e..ff2b0fc0b206 100644
> --- a/net/xfrm/xfrm_state.c
> +++ b/net/xfrm/xfrm_state.c
> @@ -663,6 +663,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
>  		refcount_set(&x->refcnt, 1);
>  		atomic_set(&x->tunnel_users, 0);
>  		INIT_LIST_HEAD(&x->km.all);
> +		INIT_HLIST_NODE(&x->state_cache);
>  		INIT_HLIST_NODE(&x->bydst);
>  		INIT_HLIST_NODE(&x->bysrc);
>  		INIT_HLIST_NODE(&x->byspi);
> @@ -707,12 +708,15 @@ int __xfrm_state_delete(struct xfrm_state *x)
>  
>  	if (x->km.state != XFRM_STATE_DEAD) {
>  		x->km.state = XFRM_STATE_DEAD;
> +
>  		spin_lock(&net->xfrm.xfrm_state_lock);
>  		list_del(&x->km.all);
>  		hlist_del_rcu(&x->bydst);
>  		hlist_del_rcu(&x->bysrc);
>  		if (x->km.seq)
>  			hlist_del_rcu(&x->byseq);
> +		if (!hlist_unhashed(&x->state_cache))
> +			hlist_del_rcu(&x->state_cache);
>  		if (x->id.spi)
>  			hlist_del_rcu(&x->byspi);
>  		net->xfrm.state_num--;
> @@ -1160,6 +1164,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  	unsigned short encap_family = tmpl->encap_family;
>  	unsigned int sequence;
>  	struct km_event c;
> +	bool cached = false;
>  	unsigned int pcpu_id = get_cpu();
>  	put_cpu();
>  
> @@ -1168,6 +1173,45 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  	sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
>  
>  	rcu_read_lock();
> +	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
> +		if (x->props.family == encap_family &&
> +		    x->props.reqid == tmpl->reqid &&
> +		    (mark & x->mark.m) == x->mark.v &&
> +		    x->if_id == if_id &&
> +		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
> +		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
> +		    tmpl->mode == x->props.mode &&
> +		    tmpl->id.proto == x->id.proto &&
> +		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
> +			xfrm_state_look_at(pol, x, fl, encap_family,
> +					   &best, &acquire_in_progress, &error);
> +	}
> +
> +	if (best)
> +		goto cached;
> +
> +	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
> +		if (x->props.family == encap_family &&
> +		    x->props.reqid == tmpl->reqid &&
> +		    (mark & x->mark.m) == x->mark.v &&
> +		    x->if_id == if_id &&
> +		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
> +		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
> +		    tmpl->mode == x->props.mode &&
> +		    tmpl->id.proto == x->id.proto &&
> +		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
> +			xfrm_state_look_at(pol, x, fl, family,
> +					   &best, &acquire_in_progress, &error);
> +	}
> +
> +cached:
> +	if (best)

Need to set `cached = true` here otherwise slowpath will always be
taken.

[...]

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-12 10:37   ` Tobias Brunner
@ 2024-04-18  9:45     ` Steffen Klassert
  0 siblings, 0 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-18  9:45 UTC (permalink / raw)
  To: Tobias Brunner; +Cc: netdev, devel, Paul Wouters, Antony Antony, Daniel Xu

On Fri, Apr 12, 2024 at 12:37:24PM +0200, Tobias Brunner wrote:
> On 12.04.24 08:05, Steffen Klassert wrote:
> >  
> > diff --git a/net/key/af_key.c b/net/key/af_key.c
> > index f79fb99271ed..b9a1eb3ff461 100644
> > --- a/net/key/af_key.c
> > +++ b/net/key/af_key.c
> > @@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
> >  	}
> >  
> >  	if (hdr->sadb_msg_seq) {
> > -		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
> > +		x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, 0);
> 
> Shouldn't this be UINT_MAX instead of 0?  (Not sure if it makes a
> difference in practice, but it would be consistent with XFRM.)

Yes, right fixed

> 
> >  		if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
> >  			xfrm_state_put(x);
> >  			x = NULL;
> > @@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
> >  	}
> >  
> >  	if (!x)
> > -		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
> > +		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, 0, proto,
> > +				  xdaddr, xsaddr, 1, family);
> 
> Same as above.

Fixed.

> 
> >  
> >  	if (x == NULL)
> >  		return -ENOENT;
> > @@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
> >  	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
> >  		return 0;
> >  
> > -	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
> > +	x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, 0);
> 
> Same as above.

Fixed too.

> >  	return x;
> > @@ -972,6 +973,7 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
> >  	x->props.mode = tmpl->mode;
> >  	x->props.reqid = tmpl->reqid;
> >  	x->props.family = tmpl->encap_family;
> > +	x->type_offload = NULL;
> 
> This seems unrelated.  And is it actually necessary?  xfrm_state_alloc()
> uses kmem_cache_zalloc(), so this should be NULL anyway.

Removed.

> > @@ -1333,6 +1350,9 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
> >  			x = NULL;
> >  			error = -ESRCH;
> >  		}
> > +
> > +		if (best)
> > +			x = best;
> 
> Since `x` could be assigned to a (more specific in terms of CPU ID)
> state in state XFRM_STATE_ACQ at this point, it might warrant a comment
> that clarifies why it is unconditionally overridden with `best`.  That
> is, so this other matching "fallback" SA is used for this packet while
> the CPU-specific acquire is handled, which might not be immedialy
> obvious when reading the code.

Good point. I've added a comment.

Thanks for the review!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-14 11:05   ` Simon Horman
@ 2024-04-18  9:46     ` Steffen Klassert
  0 siblings, 0 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-18  9:46 UTC (permalink / raw)
  To: Simon Horman
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu

On Sun, Apr 14, 2024 at 12:05:34PM +0100, Simon Horman wrote:
> On Fri, Apr 12, 2024 at 08:05:51AM +0200, Steffen Klassert wrote:
> 
> ...
> 
> > diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> 
> ...
> 
> > @@ -1115,13 +1120,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
> >  							&fl->u.__fl_common))
> >  			return;
> >  
> > +		if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
> > +			return;
> > +
> >  		if (!*best ||
> > +		    ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
> >  		    (*best)->km.dying > x->km.dying ||
> >  		    ((*best)->km.dying == x->km.dying &&
> >  		     (*best)->curlft.add_time < x->curlft.add_time))
> >  			*best = x;
> >  	} else if (x->km.state == XFRM_STATE_ACQ) {
> > -		*acq_in_progress = 1;
> > +		if (!*best || (*best && x->pcpu_num == pcpu_id))
> 
> Hi Steffen,
> 
> a minor nit from my side: I think this can be expressed as follows.
> 
> 		if (!*best || x->pcpu_num == pcpu_id)

Indeed, thanks Simon!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling.
  2024-04-15 14:12   ` Sabrina Dubroca
@ 2024-04-18  9:50     ` Steffen Klassert
  0 siblings, 0 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-18  9:50 UTC (permalink / raw)
  To: Sabrina Dubroca
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu

On Mon, Apr 15, 2024 at 04:12:30PM +0200, Sabrina Dubroca wrote:
> 2024-04-12, 08:05:51 +0200, Steffen Klassert wrote:
> > diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> > index 0c306473a79d..b41b5dd72d8e 100644
> > --- a/net/xfrm/xfrm_state.c
> > +++ b/net/xfrm/xfrm_state.c
> [...]
> > @@ -1096,6 +1098,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
> >  			       struct xfrm_state **best, int *acq_in_progress,
> >  			       int *error)
> >  {
> > +	unsigned int pcpu_id = get_cpu();
> > +	put_cpu();
> 
> That looks really strange to me. Is it safe? If it is, I guess you
> could just use smp_processor_id(), since you don't get anything out of
> the extra preempt_disable/enable pair.

We can use use smp_processor_id() as we just need the ID as a lookup
key.

> 
> (same in xfrm_state_find)
> 
> 
> [...]
> > @@ -2458,6 +2478,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
> >  	err = xfrm_if_id_put(skb, x->if_id);
> >  	if (err)
> >  		goto out_cancel;
> > +	if (x->pcpu_num != UINT_MAX)
> > +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
> 
> Missing the corresponding change to xfrm_aevent_msgsize?

Right, fixed.

> [...]
> > @@ -3049,6 +3078,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
> >  	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
> >  	[XFRMA_IF_ID]		= { .type = NLA_U32 },
> >  	[XFRMA_MTIMER_THRESH]   = { .type = NLA_U32 },
> > +	[XFRMA_SA_PCPU]		= { .type = NLA_U32 },
> 
> What about xfrm_compat? Don't we need to add XFRMA_SA_PCPU to
> compat_policy, and then some changes to the translators?

Yeah, I forgot this. The compat layer did not yet exist when
I wrote the initial pachset. The IETF standardization process
held this pachset off for about 5 years :-/

> [...]
> > @@ -3216,6 +3246,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
> >  	err = xfrm_if_id_put(skb, x->if_id);
> >  	if (err)
> >  		return err;
> > +	if (x->pcpu_num != UINT_MAX) {
> > +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
> 
> Missing the corresponding change to xfrm_expire_msgsize?

Fixed.

> [...]
> > @@ -3453,6 +3490,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
> >  		err = xfrm_if_id_put(skb, xp->if_id);
> >  	if (!err && xp->xdo.dev)
> >  		err = copy_user_offload(&xp->xdo, skb);
> > +	if (!err && x->pcpu_num != UINT_MAX)
> > +		err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
> 
> Missing the corresponding change to xfrm_acquire_msgsize?

Fixed.

Thanks for the review Sabrina!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy.
  2024-04-16 21:51   ` Daniel Xu
@ 2024-04-18 10:01     ` Steffen Klassert
  0 siblings, 0 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-04-18 10:01 UTC (permalink / raw)
  To: Daniel Xu; +Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner

On Tue, Apr 16, 2024 at 03:51:55PM -0600, Daniel Xu wrote:
> > +
> > +	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
> > +		if (x->props.family == encap_family &&
> > +		    x->props.reqid == tmpl->reqid &&
> > +		    (mark & x->mark.m) == x->mark.v &&
> > +		    x->if_id == if_id &&
> > +		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
> > +		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
> > +		    tmpl->mode == x->props.mode &&
> > +		    tmpl->id.proto == x->id.proto &&
> > +		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
> > +			xfrm_state_look_at(pol, x, fl, family,
> > +					   &best, &acquire_in_progress, &error);
> > +	}
> > +
> > +cached:
> > +	if (best)
> 
> Need to set `cached = true` here otherwise slowpath will always be
> taken.

Fixed, thanks Daniel!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 0/3] Add support for per cpu xfrm states.
  2024-04-12  6:05 [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Steffen Klassert
                   ` (2 preceding siblings ...)
  2024-04-12  6:05 ` [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache Steffen Klassert
@ 2024-05-02 12:20 ` Antony Antony
  2024-05-02 12:38   ` Steffen Klassert
  3 siblings, 1 reply; 16+ messages in thread
From: Antony Antony @ 2024-05-02 12:20 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu


On Fri, Apr 12, 2024 at 08:05:50AM +0200, Steffen Klassert via Devel wrote:
> Add support for per cpu xfrm states.
> 
> This patchset implements the xfrm part of per cpu SAs as specified in:
> 
> https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/
> 
> Patch 1 adds the cpu as a lookup key and config option to to generate
> acquire messages for each cpu.
> 
> Patch 2 caches outbound states at the policy.
> 
> Patch 3 caches inbound states on a new percpu state cache.
> 
> Please review and test.

Hi Steffen,

I tried xfrm-pcpu-v8 branch, and get these kernel splats. I think it happens 
of the pervious version too. This kernel build has  KASAN enabled.
On AWS I didn't get it to work, with UDP encap enabled. I will try again.
Each ping packet seems to new CHILD_SA. Let me try again.

I am sending ping from the IPsc gateway. It does not appear on the receiver.  
And also appears with iperf too.

[   92.935674] BUG: using smp_processor_id() in preemptible [00000000] code: ping/582
[   92.936489] caller is xfrm_state_find+0x131/0x19c3
[   92.937008] CPU: 0 PID: 582 Comm: ping Not tainted 6.9.0-rc2-00675-gcc50d6985093 #118
[   92.937807] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   92.938731] Call Trace:
[   92.939010]  <TASK>
[   92.939256]  dump_stack_lvl+0x47/0x5f
[   92.939654]  check_preemption_disabled+0xc1/0xd2
[   92.940142]  xfrm_state_find+0x131/0x19c3
[   92.940568]  ? stack_access_ok+0x17/0x58
[   92.941001]  ? on_stack+0x34/0x5f
[   92.941371]  ? xfrm_alloc_spi+0x358/0x358
[   92.941808]  ? preempt_count_sub+0x14/0xb5
[   92.942243]  ? unwind_next_frame+0x9d3/0xa68
[   92.942698]  ? entry_SYSCALL_64_after_hwframe+0x46/0x4e
[   92.943242]  ? preempt_latency_start+0x40/0x4b
[   92.943711]  ? __rcu_read_unlock+0x32/0x224
[   92.944156]  ? __rcu_read_unlock+0x32/0x224
[   92.944597]  ? write_profile+0x1da/0x1da
[   92.945023]  xfrm_tmpl_resolve+0x246/0x497
[   92.945469]  ? policy_hash_bysel+0x208/0x208
[   92.945933]  ? fib_lookup_good_nhc+0x60/0xff
[   92.946388]  ? fib_table_lookup+0x673/0x738
[   92.946836]  ? __addr_hash+0x6a/0x7e
[   92.947225]  ? __addr_hash+0x6a/0x7e
[   92.947613]  xfrm_resolve_and_create_bundle+0xf7/0xac2
[   92.948149]  ? xfrm_policy_addr_delta+0x52/0xe0
[   92.948625]  ? __refcount_add_not_zero.constprop.0+0x91/0xe7
[   92.949214]  ? xfrm_flowi_sport.isra.0+0x46/0x6c
[   92.949718]  ? __rcu_read_unlock+0x4e/0x224
[   92.950163]  ? xfrm_tmpl_resolve+0x497/0x497
[   92.950619]  ? xfrm_policy_lookup_bytype+0x47a/0x4ca
[   92.951141]  ? xfrm_sk_policy_lookup+0xf7/0xf7
[   92.951614]  ? rcuref_get+0xf/0x23
[   92.951989]  xfrm_lookup_with_ifid+0x2af/0x768
[   92.952461]  ? xfrm_expand_policies.constprop.0+0x1cd/0x1cd
[   92.953053]  ? ip_route_output_key_hash+0xd0/0x110
[   92.953562]  ? ip_route_output_key_hash+0xd0/0x110
[   92.954069]  xfrm_lookup_route+0x18/0x8b
[   92.954491]  __ip4_datagram_connect+0x36a/0x522
[   92.954973]  ip4_datagram_connect+0x28/0x3c
[   92.955418]  __sys_connect+0xaa/0xfa
[   92.955808]  ? __sys_connect_file+0xa9/0xa9
[   92.956254]  ? fd_install+0x11e/0x130
[   92.956650]  ? preempt_count_sub+0x14/0xb5
[   92.957091]  ? rcu_read_unlock_sched+0xa/0x1b
[   92.957568]  ? __sys_socket+0xe0/0x120
[   92.957974]  ? update_socket_protocol+0x8/0x8
[   92.958436]  ? __rcu_read_unlock+0x4e/0x224
[   92.958881]  __x64_sys_connect+0x3c/0x43
[   92.959302]  do_syscall_64+0x6d/0xd9
[   92.959689]  entry_SYSCALL_64_after_hwframe+0x46/0x4e
[   92.960218] RIP: 0033:0x7fa4d3443580
[   92.960603] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 80 3d 61 10 0d 00 00 74 17 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 54
[   92.962448] RSP: 002b:00007ffd6336e968 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
[   92.963215] RAX: ffffffffffffffda RBX: 00007ffd6336e990 RCX: 00007fa4d3443580
[   92.963941] RDX: 0000000000000010 RSI: 00007ffd6336e990 RDI: 0000000000000005
[   92.964660] RBP: 0000000000000000 R08: 1999999999999999 R09: 0000000000000000
[   92.965406] R10: 00007fa4d3350980 R11: 0000000000000202 R12: 0000000000000005
[   92.966138] R13: 000055e34bbf45c2 R14: 000055e34bbf9240 R15: 000055e34bbf2340
[   92.966863]  </TASK>
[   92.967531] BUG: using smp_processor_id() in preemptible [00000000] code: ping/582
[   92.968305] caller is xfrm_state_find+0x131/0x19c3
[   92.968809] CPU: 0 PID: 582 Comm: ping Not tainted 6.9.0-rc2-00675-gcc50d6985093 #118
[   92.969622] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   92.970549] Call Trace:
[   92.970828]  <TASK>
[   92.971106]  dump_stack_lvl+0x47/0x5f
[   92.971553]  check_preemption_disabled+0xc1/0xd2
[   92.972047]  xfrm_state_find+0x131/0x19c3
[   92.972476]  ? mas_push_data+0x3d3/0x400
[   92.972902]  ? xfrm_alloc_spi+0x358/0x358
[   92.973346]  ? _lookup_address_cpa.isra.0+0x2d/0x2d
[   92.973867]  ? on_stack+0x34/0x5f
[   92.974232]  ? on_stack+0x34/0x5f
[   92.974596]  ? preempt_count_sub+0x14/0xb5
[   92.975033]  ? unwind_next_frame+0x9d3/0xa68
[   92.975488]  ? entry_SYSCALL_64_after_hwframe+0x46/0x4e
[   92.976033]  xfrm_tmpl_resolve+0x246/0x497
[   92.976504]  ? policy_hash_bysel+0x208/0x208
[   92.977049]  ? entry_SYSCALL_64_after_hwframe+0x46/0x4e
[   92.977616]  ? fib_lookup_good_nhc+0x60/0xff
[   92.978074]  ? fib_table_lookup+0x673/0x738
[   92.978519]  ? __addr_hash+0x6a/0x7e
[   92.978910]  ? __addr_hash+0x6a/0x7e
[   92.979299]  xfrm_resolve_and_create_bundle+0xf7/0xac2
[   92.979835]  ? xfrm_policy_addr_delta+0x52/0xe0
[   92.980314]  ? __refcount_add_not_zero.constprop.0+0x91/0xe7
[   92.980902]  ? xfrm_flowi_sport.isra.0+0x54/0x6c
[   92.981407]  ? __rcu_read_unlock+0x4e/0x224
[   92.981860]  ? xfrm_tmpl_resolve+0x497/0x497
[   92.982316]  ? xfrm_policy_lookup_bytype+0x47a/0x4ca
[   92.982837]  ? xfrm_sk_policy_lookup+0xf7/0xf7
[   92.983309]  ? __kernel_text_address+0xe/0x30
[   92.983773]  xfrm_lookup_with_ifid+0x2af/0x768
[   92.984245]  ? xfrm_expand_policies.constprop.0+0x1cd/0x1cd
[   92.984819]  ? ip_route_output_key_hash+0xd0/0x110
[   92.985329]  ? copy_page_to_iter_nofault+0x61e/0x61e
[   92.985859]  ? preempt_latency_start+0x40/0x4b
[   92.986332]  xfrm_lookup_route+0x18/0x8b
[   92.986752]  raw_sendmsg+0x683/0x1014
[   92.987149]  ? native_flush_tlb_one_user+0x23/0xe7
[   92.987653]  ? raw_hash_sk+0x224/0x224
[   92.988056]  ? kasan_unpoison+0x28/0x33
[   92.988469]  ? kernel_init_pages+0x42/0x51
[   92.988905]  ? prep_new_page+0x44/0x51
[   92.989315]  ? get_page_from_freelist+0x722/0x8e6
[   92.989823]  ? zone_watermark_fast.isra.0+0x12b/0x12b
[   92.990351]  ? __might_resched+0x8c/0x246
[   92.990778]  ? __might_sleep+0x26/0xa1
[   92.991183]  ? first_zones_zonelist+0x2c/0x43
[   92.991646]  ? do_raw_spin_lock+0x72/0xbb
[   92.992076]  ? queued_read_unlock+0x19/0x19
[   92.992520]  ? walk_page_mapping+0x1ba/0x1ba
[   92.992988]  ? __might_resched+0x8c/0x246
[   92.993418]  ? __might_sleep+0x26/0xa1
[   92.993833]  ? inet_send_prepare+0x59/0x59
[   92.994271]  ? sock_sendmsg_nosec+0x42/0x6c
[   92.994713]  sock_sendmsg_nosec+0x42/0x6c
[   92.995141]  __sys_sendto+0x15d/0x1cc
[   92.995537]  ? __x64_sys_getpeername+0x44/0x44
[   92.996009]  ? __handle_mm_fault+0x667/0xad4
[   92.996466]  ? find_vma+0x6b/0x8b
[   92.996830]  ? find_vma_intersection+0x8a/0x8a
[   92.997308]  ? handle_mm_fault+0x39/0x167
[   92.997741]  ? handle_mm_fault+0xfd/0x167
[   92.998168]  ? preempt_latency_start+0x40/0x4b
[   92.998637]  ? preempt_count_sub+0x14/0xb5
[   92.999073]  __x64_sys_sendto+0x76/0x82
[   92.999486]  do_syscall_64+0x6d/0xd9
[   92.999879]  entry_SYSCALL_64_after_hwframe+0x46/0x4e
[   93.000407] RIP: 0033:0x7fa4d3443a73
[   93.000795] Code: 8b 15 a9 83 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 80 3d 71 0b 0d 00 00 41 89 ca 74 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 55 48 83 ec 30 44 89 4c 24
[   93.002626] RSP: 002b:00007ffd6336d758 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
[   93.003392] RAX: ffffffffffffffda RBX: 000055e34bbf2340 RCX: 00007fa4d3443a73
[   93.004114] RDX: 0000000000000040 RSI: 000055e34bbf83c0 RDI: 0000000000000003
[   93.004835] RBP: 000055e34bbf83c0 R08: 000055e34bbf45c0 R09: 0000000000000010
[   93.005578] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000040
[   93.006301] R13: 00007ffd6336ee40 R14: 0000001d00000001 R15: 000055e34bbf5680
[   93.007033]  </TASK>
[   93.007334] BUG: using smp_processor_id() in preemptible [00000000] code: ping/582
[   93.008285] caller is xfrm_state_look_at.isra.0+0x28/0x1b6
[   93.008853] CPU: 0 PID: 582 Comm: ping Not tainted 6.9.0-rc2-00675-gcc50d6985093 #118
[   93.009661] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   93.010589] Call Trace:
[   93.010868]  <TASK>
[   93.011115]  dump_stack_lvl+0x47/0x5f
[   93.011512]  check_preemption_disabled+0xc1/0xd2
[   93.012000]  xfrm_state_look_at.isra.0+0x28/0x1b6

Breakpoint 1 at 0xffffffff81e13cb7: file net/xfrm/xfrm_user.c, line 790.
(gdb) list *xfrm_state_find+0x131
0xffffffff81e0166f is in xfrm_state_find (net/xfrm/xfrm_state.c:1215).
1210		u32 mark = pol->mark.v & pol->mark.m;
1211		unsigned short encap_family = tmpl->encap_family;
1212		unsigned int sequence;
1213		struct km_event c;
1214		bool cached = false;
1215		unsigned int pcpu_id = smp_processor_id();
1216
1217		to_put = NULL;
1218
1219		sequence = 
read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH ipsec-next 0/3] Add support for per cpu xfrm states.
  2024-05-02 12:20 ` [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Antony Antony
@ 2024-05-02 12:38   ` Steffen Klassert
  0 siblings, 0 replies; 16+ messages in thread
From: Steffen Klassert @ 2024-05-02 12:38 UTC (permalink / raw)
  To: Antony Antony
  Cc: netdev, devel, Paul Wouters, Antony Antony, Tobias Brunner, Daniel Xu

On Thu, May 02, 2024 at 02:20:25PM +0200, Antony Antony wrote:
> 
> On Fri, Apr 12, 2024 at 08:05:50AM +0200, Steffen Klassert via Devel wrote:
> > Add support for per cpu xfrm states.
> > 
> > This patchset implements the xfrm part of per cpu SAs as specified in:
> > 
> > https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/
> > 
> > Patch 1 adds the cpu as a lookup key and config option to to generate
> > acquire messages for each cpu.
> > 
> > Patch 2 caches outbound states at the policy.
> > 
> > Patch 3 caches inbound states on a new percpu state cache.
> > 
> > Please review and test.
> 
> Hi Steffen,
> 
> I tried xfrm-pcpu-v8 branch, and get these kernel splats. I think it happens 
> of the pervious version too. This kernel build has  KASAN enabled.

I've introduced this in v8 when I replaced get_cpu by smp_processor_id.
I'll fix this when I rebase next time.


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2024-05-02 12:38 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-12  6:05 [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Steffen Klassert
2024-04-12  6:05 ` [PATCH ipsec-next 1/3] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
2024-04-12 10:37   ` Tobias Brunner
2024-04-18  9:45     ` Steffen Klassert
2024-04-14 11:05   ` Simon Horman
2024-04-18  9:46     ` Steffen Klassert
2024-04-15 14:12   ` Sabrina Dubroca
2024-04-18  9:50     ` Steffen Klassert
2024-04-12  6:05 ` [PATCH ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy Steffen Klassert
2024-04-16 21:51   ` Daniel Xu
2024-04-18 10:01     ` Steffen Klassert
2024-04-12  6:05 ` [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache Steffen Klassert
2024-05-02 12:20 ` [PATCH ipsec-next 0/3] Add support for per cpu xfrm states Antony Antony
2024-05-02 12:38   ` Steffen Klassert
2024-04-13  0:36 [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache kernel test robot
2024-04-16  2:33 ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.