All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH ipsec-next] xfrm: allow to avoid copying DSCP during encapsulation
@ 2013-02-18 15:32 Nicolas Dichtel
  2013-02-22  6:06 ` Steffen Klassert
  0 siblings, 1 reply; 4+ messages in thread
From: Nicolas Dichtel @ 2013-02-18 15:32 UTC (permalink / raw)
  To: steffen.klassert, herbert, davem; +Cc: netdev, Nicolas Dichtel

By default, DSCP is copying during encapsulation.
Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with
different DSCP may get reordered relative to each other in the network and then
dropped by the remote IPsec GW if the reordering becomes too big compared to the
replay window.

It is possible to avoid this copy with netfilter rules, but it's very convenient
to be able to configure it for each SA directly.

This patch adds a toogle for this purpose. By default, it's not set to maintain
backward compatibility.

Field flags in struct xfrm_usersa_info is full, hence I add a new attribute.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 include/net/xfrm.h           |  1 +
 include/uapi/linux/xfrm.h    |  3 +++
 net/ipv4/xfrm4_mode_tunnel.c |  8 ++++++--
 net/ipv6/xfrm6_mode_tunnel.c |  7 +++++--
 net/xfrm/xfrm_user.c         | 13 +++++++++++++
 5 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 30f3e5b..c5f12da 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -162,6 +162,7 @@ struct xfrm_state {
 		xfrm_address_t	saddr;
 		int		header_len;
 		int		trailer_len;
+		u32		extra_flags;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 28e493b..f07f422 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -297,6 +297,7 @@ enum xfrm_attr_type_t {
 	XFRMA_MARK,		/* struct xfrm_mark */
 	XFRMA_TFCPAD,		/* __u32 */
 	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
+	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -367,6 +368,8 @@ struct xfrm_usersa_info {
 #define XFRM_STATE_ESN		128
 };
 
+#define XFRM_STATE_EXTRA_FLAGS_DONT_ENCAP_DSCP		1
+
 struct xfrm_usersa_id {
 	xfrm_address_t			daddr;
 	__be32				spi;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ddee0a0..0273555 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	/* DS disclosed */
-	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+	/* DS disclosing depends on XFRM_STATE_EXTRA_FLAGS_DONT_ENCAP_DSCP */
+	if (x->props.extra_flags & XFRM_STATE_EXTRA_FLAGS_DONT_ENCAP_DSCP)
+		top_iph->tos = 0;
+	else
+		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
 					    XFRM_MODE_SKB_CB(skb)->tos);
 
 	flags = x->props.flags;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b..7c1cc59 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	       sizeof(top_iph->flow_lbl));
 	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
-	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+	if (x->props.extra_flags & XFRM_STATE_EXTRA_FLAGS_DONT_ENCAP_DSCP)
+		dsfield = 0;
+	else
+		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index eb872b2..8cf7f5f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	copy_from_user_state(x, p);
 
+	if (attrs[XFRMA_SA_EXTRA_FLAGS])
+		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
 	if ((err = attach_aead(&x->aead, &x->props.ealgo,
 			       attrs[XFRMA_ALG_AEAD])))
 		goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 
 	copy_to_user_state(x, p);
 
+	if (x->props.extra_flags) {
+		ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
+				  x->props.extra_flags);
+		if (ret)
+			goto out;
+	}
+
 	if (x->coaddr) {
 		ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
 		if (ret)
@@ -2302,6 +2312,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
 	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) },
+	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
 };
 
 static struct xfrm_link {
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 				    x->security->ctx_len);
 	if (x->coaddr)
 		l += nla_total_size(sizeof(*x->coaddr));
+	if (x->props.extra_flags)
+		l += nla_total_size(sizeof(x->props.extra_flags));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size(sizeof(u64));
-- 
1.8.0.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH ipsec-next] xfrm: allow to avoid copying DSCP during encapsulation
  2013-02-18 15:32 [PATCH ipsec-next] xfrm: allow to avoid copying DSCP during encapsulation Nicolas Dichtel
@ 2013-02-22  6:06 ` Steffen Klassert
  2013-02-22  9:54   ` [PATCH ipsec-next v2] " Nicolas Dichtel
  0 siblings, 1 reply; 4+ messages in thread
From: Steffen Klassert @ 2013-02-22  6:06 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: herbert, davem, netdev

On Mon, Feb 18, 2013 at 04:32:39PM +0100, Nicolas Dichtel wrote:
> By default, DSCP is copying during encapsulation.
> Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with
> different DSCP may get reordered relative to each other in the network and then
> dropped by the remote IPsec GW if the reordering becomes too big compared to the
> replay window.
> 
> It is possible to avoid this copy with netfilter rules, but it's very convenient
> to be able to configure it for each SA directly.
> 
> This patch adds a toogle for this purpose. By default, it's not set to maintain
> backward compatibility.
> 
> Field flags in struct xfrm_usersa_info is full, hence I add a new attribute.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
>  include/net/xfrm.h           |  1 +
>  include/uapi/linux/xfrm.h    |  3 +++
>  net/ipv4/xfrm4_mode_tunnel.c |  8 ++++++--
>  net/ipv6/xfrm6_mode_tunnel.c |  7 +++++--
>  net/xfrm/xfrm_user.c         | 13 +++++++++++++
>  5 files changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 30f3e5b..c5f12da 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -162,6 +162,7 @@ struct xfrm_state {
>  		xfrm_address_t	saddr;
>  		int		header_len;
>  		int		trailer_len;
> +		u32		extra_flags;
>  	} props;

Please ensure that your new extra_flags are copied whenever needed.
Right now we would loose them when we clone a state with
xfrm_state_clone().

>  
>  	struct xfrm_lifetime_cfg lft;
> diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
> index 28e493b..f07f422 100644
> --- a/include/uapi/linux/xfrm.h
> +++ b/include/uapi/linux/xfrm.h
> @@ -297,6 +297,7 @@ enum xfrm_attr_type_t {
>  	XFRMA_MARK,		/* struct xfrm_mark */
>  	XFRMA_TFCPAD,		/* __u32 */
>  	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
> +	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
>  	__XFRMA_MAX
>  
>  #define XFRMA_MAX (__XFRMA_MAX - 1)
> @@ -367,6 +368,8 @@ struct xfrm_usersa_info {
>  #define XFRM_STATE_ESN		128
>  };
>  
> +#define XFRM_STATE_EXTRA_FLAGS_DONT_ENCAP_DSCP		1

Can we have a shorter name for this flag, please?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH ipsec-next v2] xfrm: allow to avoid copying DSCP during encapsulation
  2013-02-22  6:06 ` Steffen Klassert
@ 2013-02-22  9:54   ` Nicolas Dichtel
  2013-03-07  7:46     ` Steffen Klassert
  0 siblings, 1 reply; 4+ messages in thread
From: Nicolas Dichtel @ 2013-02-22  9:54 UTC (permalink / raw)
  To: steffen.klassert, herbert, davem; +Cc: netdev, Nicolas Dichtel

By default, DSCP is copying during encapsulation.
Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with
different DSCP may get reordered relative to each other in the network and then
dropped by the remote IPsec GW if the reordering becomes too big compared to the
replay window.

It is possible to avoid this copy with netfilter rules, but it's very convenient
to be able to configure it for each SA directly.

This patch adds a toogle for this purpose. By default, it's not set to maintain
backward compatibility.

Field flags in struct xfrm_usersa_info is full, hence I add a new attribute.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---

v2: rename DONT_ENCAP flag to have a shorter name
    ensure to copy extra_flags in xfrm_state_clone() and ipcomp_tunnel_create()

Note that I will be off next week, so I will answer to comments only the week
after.

 include/net/xfrm.h           |  1 +
 include/uapi/linux/xfrm.h    |  3 +++
 net/ipv4/ipcomp.c            |  1 +
 net/ipv4/xfrm4_mode_tunnel.c |  8 ++++++--
 net/ipv6/xfrm6_mode_tunnel.c |  7 +++++--
 net/xfrm/xfrm_state.c        |  1 +
 net/xfrm/xfrm_user.c         | 13 +++++++++++++
 7 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 24c8886..ae16531 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -162,6 +162,7 @@ struct xfrm_state {
 		xfrm_address_t	saddr;
 		int		header_len;
 		int		trailer_len;
+		u32		extra_flags;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 28e493b..a8cd6a4 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -297,6 +297,7 @@ enum xfrm_attr_type_t {
 	XFRMA_MARK,		/* struct xfrm_mark */
 	XFRMA_TFCPAD,		/* __u32 */
 	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
+	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -367,6 +368,8 @@ struct xfrm_usersa_info {
 #define XFRM_STATE_ESN		128
 };
 
+#define XFRM_SA_XFLAG_DONT_ENCAP_DSCP	1
+
 struct xfrm_usersa_id {
 	xfrm_address_t			daddr;
 	__be32				spi;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f01d1b1..59cb8c7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->props.mode = x->props.mode;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
+	t->props.extra_flags = x->props.extra_flags;
 	memcpy(&t->mark, &x->mark, sizeof(t->mark));
 
 	if (xfrm_init_state(t))
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index fe5189e..eb1dd4d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	/* DS disclosed */
-	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		top_iph->tos = 0;
+	else
+		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
 					    XFRM_MODE_SKB_CB(skb)->tos);
 
 	flags = x->props.flags;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9bf6a74..4770d51 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	       sizeof(top_iph->flow_lbl));
 	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
-	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		dsfield = 0;
+	else
+		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ae01bdb..2a34167 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1195,6 +1195,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 		goto error;
 
 	x->props.flags = orig->props.flags;
+	x->props.extra_flags = orig->props.extra_flags;
 
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fbd9e6c..204cba1 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	copy_from_user_state(x, p);
 
+	if (attrs[XFRMA_SA_EXTRA_FLAGS])
+		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
 	if ((err = attach_aead(&x->aead, &x->props.ealgo,
 			       attrs[XFRMA_ALG_AEAD])))
 		goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 
 	copy_to_user_state(x, p);
 
+	if (x->props.extra_flags) {
+		ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
+				  x->props.extra_flags);
+		if (ret)
+			goto out;
+	}
+
 	if (x->coaddr) {
 		ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
 		if (ret)
@@ -2302,6 +2312,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
 	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) },
+	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
 };
 
 static struct xfrm_link {
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 				    x->security->ctx_len);
 	if (x->coaddr)
 		l += nla_total_size(sizeof(*x->coaddr));
+	if (x->props.extra_flags)
+		l += nla_total_size(sizeof(x->props.extra_flags));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size(sizeof(u64));
-- 
1.8.0.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH ipsec-next v2] xfrm: allow to avoid copying DSCP during encapsulation
  2013-02-22  9:54   ` [PATCH ipsec-next v2] " Nicolas Dichtel
@ 2013-03-07  7:46     ` Steffen Klassert
  0 siblings, 0 replies; 4+ messages in thread
From: Steffen Klassert @ 2013-03-07  7:46 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: herbert, davem, netdev

On Fri, Feb 22, 2013 at 10:54:54AM +0100, Nicolas Dichtel wrote:
> By default, DSCP is copying during encapsulation.
> Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with
> different DSCP may get reordered relative to each other in the network and then
> dropped by the remote IPsec GW if the reordering becomes too big compared to the
> replay window.
> 
> It is possible to avoid this copy with netfilter rules, but it's very convenient
> to be able to configure it for each SA directly.
> 
> This patch adds a toogle for this purpose. By default, it's not set to maintain
> backward compatibility.
> 
> Field flags in struct xfrm_usersa_info is full, hence I add a new attribute.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>

Applied to ipsec-next, thanks!

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-03-07  7:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-18 15:32 [PATCH ipsec-next] xfrm: allow to avoid copying DSCP during encapsulation Nicolas Dichtel
2013-02-22  6:06 ` Steffen Klassert
2013-02-22  9:54   ` [PATCH ipsec-next v2] " Nicolas Dichtel
2013-03-07  7:46     ` Steffen Klassert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.