All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message
       [not found] <YTXGGiMzsda6mcm2@AntonyAntony.local>
@ 2021-12-19  0:28 ` Antony Antony
  2023-10-26 14:45   ` [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
                     ` (9 more replies)
  0 siblings, 10 replies; 27+ messages in thread
From: Antony Antony @ 2021-12-19  0:28 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: Herbert Xu, Antony Antony, David S. Miller, netdev

IETF RFC 4301, Section 6, requires a configurable option to forward
unauthenticated ICMP error message that does not match any
policies using. Use reverse of ICMP payload, which will be partial IP
packet.
Add this reverse lookup (using ICMP payload as skb) for xfrm forward path.

To enable this add the flag XFRM_POLICY_ICMP to fwd and out policy
and the flag XFRM_STATE_ICMP on incoming SA.

ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir fwd priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 137 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9341298b2a70..8505c36413c7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/xfrm.h>
@@ -3475,6 +3476,123 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }

+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi6 *fl6 = &fl->u.ip6;
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET)
+		if (fl4->flowi4_proto == IPPROTO_ICMP &&
+		    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+		      fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+			return false;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+	    (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+	      fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+		return false;
+#endif
+	return true;
+}
+
+static struct sk_buff *xfrm_icmp_flow_decode(struct sk_buff *skb,
+					     unsigned short family,
+					     struct flowi *fl,
+					     struct flowi *fl1)
+{
+	struct net *net = dev_net(skb->dev);
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+
+	if (!pskb_pull(newskb, (sizeof(struct iphdr) + sizeof(struct icmphdr))))
+		return NULL;
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(newskb, fl1, family) < 0) {
+		kfree_skb(newskb);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+		return NULL;
+	}
+
+	/* inherit more from the old flow ???
+	 * the inner skb may have these values different from outer skb
+	 */
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+
+	return newskb;
+}
+
+static bool xfrm_sa_icmp_flow(struct sk_buff *skb,
+			      unsigned short family, const struct xfrm_selector *sel,
+			      struct flowi *fl)
+{
+	bool ret = false;
+
+	if (!icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+		kfree_skb(newskb);
+	}
+
+	return ret;
+}
+
+static inline
+struct xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+				     unsigned short family, u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (!icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return pol;
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+		kfree_skb(newskb);
+	}
+
+	return pol;
+}
+
+static inline
+struct dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+				    unsigned short family, struct dst_entry *dst)
+{
+	if (!icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return dst;
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, XFRM_LOOKUP_QUEUE);
+
+		kfree_skb(newskb);
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm)
+			dst = dst2;
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3521,9 +3639,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,

 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = true;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_sa_icmp_flow(skb, family, &x->sel, &fl))
+					ret = false;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3546,6 +3672,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}

+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (!xfrm_default_allow(net, dir)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3675,6 +3804,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
--
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
@ 2023-10-26 14:45   ` Antony Antony
  2023-10-26 19:41     ` kernel test robot
  2024-01-30 10:36     ` Dan Carpenter
  2023-10-26 14:45   ` [PATCH ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
                     ` (8 subsequent siblings)
  9 siblings, 2 replies; 27+ messages in thread
From: Antony Antony @ 2023-10-26 14:45 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

This commit aligns with RFC 4301, Section 6, and addresses the
requirement to forward unauthenticated ICMP error messages that do not
match any xfrm policies. It utilizes the ICMP payload as an skb and
performs a reverse lookup. If a policy match is found, forward
the packet.

The ICMP payload typically contains a partial IP packet that is likely
responsible for the error message.

The following error types will be forwarded:
- IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
- IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
			   ICMPV6_TIME_EXCEED

To implement this feature, a reverse lookup has been added to the xfrm
forward path, making use of the ICMP payload as the skb.

To enable this functionality from user space, the XFRM_POLICY_ICMP flag
should be added to the outgoing and forward policies, and the
XFRM_STATE_ICMP flag should be set on incoming states.

e.g.
ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir out priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 146 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 144 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e8c406eba11b..683acc4ad94b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/inet_ecn.h>
@@ -3498,6 +3499,132 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }
 
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi6 *fl6 = &fl->u.ip6;
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET &&
+	    fl4->flowi4_proto == IPPROTO_ICMP &&
+	    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+	     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+		return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6 &&
+	    fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+	    (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+	      fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+	      fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+		return true;
+#endif
+	return false;
+}
+
+static struct sk_buff *xfrm_icmp_flow_decode(struct sk_buff *skb,
+					     unsigned short family,
+					     struct flowi *fl,
+					     struct flowi *fl1)
+{
+	struct net *net = dev_net(skb->dev);
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
+		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+	skb_reset_network_header(newskb);
+
+	if (!pskb_pull(newskb, hl))
+		return NULL;
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(net, newskb, fl1, family) < 0) {
+		kfree_skb(newskb);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+		return NULL;
+	}
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+
+	return newskb;
+}
+
+static bool xfrm_sa_icmp_flow(struct sk_buff *skb,
+			      unsigned short family, const struct xfrm_selector *sel,
+			      struct flowi *fl)
+{
+	bool ret = false;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+		kfree_skb(newskb);
+	}
+
+	return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+			      struct flowi *fl, unsigned short family,
+			      u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return pol;
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+
+		kfree_skb(newskb);
+	}
+
+	return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+			     unsigned short family, struct dst_entry *dst)
+{
+	if (icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return dst;
+
+		dst_hold(dst);
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+		kfree_skb(newskb);
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm) {
+			dst_release(dst);
+			dst = dst2;
+		} else {
+			dst_release(dst2);
+		}
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3544,9 +3671,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = true;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_sa_icmp_flow(skb, family, &x->sel, &fl))
+					ret = false;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3569,6 +3704,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}
 
+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3702,6 +3840,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
  2023-10-26 14:45   ` [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2023-10-26 14:45   ` Antony Antony
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
                     ` (7 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-10-26 14:45 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

When enabling support for xfrm lookup using reverse ICMP payload,
We have identified an issue where the source address of the IPv4 e.g
"Destination Host Unreachable" message is incorrect. The IPv6 appear
to do the right thing.

Here is example of incorrect source address for ICMP error response.
When sending a ping to an unreachable host, the sender would receive an
ICMP unreachable response with a fake source address. Rather the address
of the host that generated ICMP Unreachable message. This is confusing
and incorrect.

Example:
ping -W 9 -w 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

Notice : packet has the source address of the ICMP "Unreachable host!"

This issue can be traced back to commit
415b3334a21a ("icmp: Fix regression in nexthop resolution during replies.")
which introduced a change that copied the source address from the ICMP
payload.

This commit would force to use source address from the gatway/host.
The ICMP error message source address correctly set from the host.

After fixing:
ping -W 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Here is an example to reporduce:

export AB="10.1"
for i in 1 2 3 4 5; do
        h="host${i}"
        ip netns add ${h}
        ip -netns ${h} link set lo up
        ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1
        if [ $i -lt 5 ]; then
                ip -netns ${h} link add eth0 type veth peer name eth10${i}
                ip -netns ${h} addr add "${AB}.${i}.1/24" dev eth0
                ip -netns ${h} link set up dev eth0
        fi
done

for i in 1 2 3 4 5; do
        h="host${i}"
        p=$((i - 1))
        ph="host${p}"
        # connect to previous host
        if [ $i -gt 1 ]; then
                ip -netns ${ph} link set eth10${p} netns ${h}
                ip -netns ${h} link set eth10${p} name eth1
                ip -netns ${h} link set up dev eth1
                ip -netns ${h} addr add "${AB}.${p}.2/24" dev eth1
        fi
        # add forward routes
        for k in $(seq ${i} $((5 - 1))); do
                ip -netns ${h} route 2>/dev/null | (grep "${AB}.${k}.0" 2>/dev/null) || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${i}.2" 2>/dev/nul
        done

        # add reverse routes
        for k in $(seq 1 $((i - 2))); do
                ip -netns ${h} route 2>/dev/null | grep "${AB}.${k}.0" 2>/dev/null || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${p}.1" 2>/dev/nul
        done
done

ip netns exec host1 ping -q -W 2 -w 1 -c 1 10.1.4.2 2>&1>/dev/null && echo "success 10.1.4.2 reachable" || echo "ERROR"
ip netns exec host1 ping -W 9 -w 5 -c 1 10.1.4.3 || echo  "note the source address of unreachble"
ip -netns host1 route flush cache

ip netns exec host3 nft add table inet filter
ip netns exec host3 nft add chain inet filter FORWARD { type filter hook forward priority filter\; policy drop \; }
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol icmp drop
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol esp accept
ip netns exec host3 nft add rule inet filter FORWARD counter drop

ip -netns host2 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir out \
        flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 1 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir in \
        tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir fwd \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 1 replay-window 1  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip -netns host2 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        flag icmp reqid 2 replay-window 10 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir out \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 1 mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir in \
        tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2  mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir fwd \
                flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2 mode tunnel

ip -netns host4 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        reqid 1 replay-window 1 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 2 replay-window 20 flag icmp  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip netns exec host1 ping -W 5 -c 1 10.1.4.2 2>&1 > /dev/null && echo ""
ip netns exec host1 ping -W 5 -c 1 10.1.4.3 || echo "note source address"

Again before the fix
ping -W 5 -c 1 10.1.4.3
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

After the fix
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/icmp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e63a3bf99617..bec234637122 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					    XFRM_LOOKUP_ICMP);
 	if (!IS_ERR(rt2)) {
 		dst_release(&rt->dst);
-		memcpy(fl4, &fl4_dec, sizeof(*fl4));
 		rt = rt2;
 	} else if (PTR_ERR(rt2) == -EPERM) {
 		if (rt)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2023-10-26 14:45   ` [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2023-10-26 19:41     ` kernel test robot
  2024-01-30 10:36     ` Dan Carpenter
  1 sibling, 0 replies; 27+ messages in thread
From: kernel test robot @ 2023-10-26 19:41 UTC (permalink / raw)
  To: Antony Antony, Steffen Klassert
  Cc: oe-kbuild-all, Herbert Xu, Antony Antony, devel, Jakub Kicinski, netdev

Hi Antony,

kernel test robot noticed the following build warnings:

[auto build test WARNING on klassert-ipsec-next/master]
[also build test WARNING on klassert-ipsec/master net-next/main net/main linus/master v6.6-rc7 next-20231026]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Antony-Antony/xfrm-fix-source-address-in-icmp-error-generation-from-IPsec-gateway/20231026-234542
base:   https://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master
patch link:    https://lore.kernel.org/r/4b30e07300159db93ec0f6b31778aa0f6a41ef21.1698331320.git.antony.antony%40secunet.com
patch subject: [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
config: csky-randconfig-002-20231027 (https://download.01.org/0day-ci/archive/20231027/202310270353.sobcrQay-lkp@intel.com/config)
compiler: csky-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231027/202310270353.sobcrQay-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310270353.sobcrQay-lkp@intel.com/

All warnings (new ones prefixed by >>):

   net/xfrm/xfrm_policy.c: In function 'icmp_err_packet':
>> net/xfrm/xfrm_policy.c:3490:30: warning: unused variable 'fl6' [-Wunused-variable]
    3490 |         const struct flowi6 *fl6 = &fl->u.ip6;
         |                              ^~~


vim +/fl6 +3490 net/xfrm/xfrm_policy.c

  3487	
  3488	static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
  3489	{
> 3490		const struct flowi6 *fl6 = &fl->u.ip6;
  3491		const struct flowi4 *fl4 = &fl->u.ip4;
  3492	
  3493		if (family == AF_INET &&
  3494		    fl4->flowi4_proto == IPPROTO_ICMP &&
  3495		    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
  3496		     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
  3497			return true;
  3498	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
  2023-10-26 14:45   ` [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
  2023-10-26 14:45   ` [PATCH ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
@ 2023-10-27  8:16   ` Antony Antony
  2023-11-17  9:13     ` Steffen Klassert
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
                     ` (6 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Antony Antony @ 2023-10-27  8:16 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

This commit aligns with RFC 4301, Section 6, and addresses the
requirement to forward unauthenticated ICMP error messages that do not
match any xfrm policies. It utilizes the ICMP payload as an skb and
performs a reverse lookup. If a policy match is found, forward
the packet.

The ICMP payload typically contains a partial IP packet that is likely
responsible for the error message.

The following error types will be forwarded:
- IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
- IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
			   ICMPV6_TIME_EXCEED

To implement this feature, a reverse lookup has been added to the xfrm
forward path, making use of the ICMP payload as the skb.

To enable this functionality from user space, the XFRM_POLICY_ICMP flag
should be added to the outgoing and forward policies, and the
XFRM_STATE_ICMP flag should be set on incoming states.

e.g.
ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir out priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Changes since v1:
- Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6)

Changes since RFC:
- Fix calculation of ICMPv6 header length

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 148 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 146 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6aea8b2f45e0..0ab6989d5b20 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/inet_ecn.h>
@@ -3484,6 +3485,134 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }
 
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET &&
+	    fl4->flowi4_proto == IPPROTO_ICMP &&
+	    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+	     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+		return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6) {
+		const struct flowi6 *fl6 = &fl->u.ip6;
+
+		if(fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+		   (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+		    fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+		    fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+			return true;
+	}
+#endif
+	return false;
+}
+
+static struct sk_buff *xfrm_icmp_flow_decode(struct sk_buff *skb,
+					     unsigned short family,
+					     struct flowi *fl,
+					     struct flowi *fl1)
+{
+	struct net *net = dev_net(skb->dev);
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
+		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+	skb_reset_network_header(newskb);
+
+	if (!pskb_pull(newskb, hl))
+		return NULL;
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(net, newskb, fl1, family) < 0) {
+		kfree_skb(newskb);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+		return NULL;
+	}
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+
+	return newskb;
+}
+
+static bool xfrm_sa_icmp_flow(struct sk_buff *skb,
+			      unsigned short family, const struct xfrm_selector *sel,
+			      struct flowi *fl)
+{
+	bool ret = false;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+		kfree_skb(newskb);
+	}
+
+	return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+			      struct flowi *fl, unsigned short family,
+			      u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return pol;
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+
+		kfree_skb(newskb);
+	}
+
+	return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+			     unsigned short family, struct dst_entry *dst)
+{
+	if (icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+		struct sk_buff *newskb = xfrm_icmp_flow_decode(skb, family, fl, &fl1);
+
+		if (!newskb)
+			return dst;
+
+		dst_hold(dst);
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+		kfree_skb(newskb);
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm) {
+			dst_release(dst);
+			dst = dst2;
+		} else {
+			dst_release(dst2);
+		}
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3530,9 +3659,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = true;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_sa_icmp_flow(skb, family, &x->sel, &fl))
+					ret = false;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3555,6 +3692,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}
 
+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3688,6 +3828,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (2 preceding siblings ...)
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2023-10-27  8:16   ` Antony Antony
  2023-10-27 13:30     ` [devel-ipsec] " Michael Richardson
  2023-11-17  9:21     ` Steffen Klassert
  2023-12-19 20:29   ` [PATCH v3 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
                     ` (5 subsequent siblings)
  9 siblings, 2 replies; 27+ messages in thread
From: Antony Antony @ 2023-10-27  8:16 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

When enabling support for xfrm lookup using reverse ICMP payload,
We have identified an issue where the source address of the IPv4 e.g
"Destination Host Unreachable" message is incorrect. The IPv6 appear
to do the right thing.

Here is example of incorrect source address for ICMP error response.
When sending a ping to an unreachable host, the sender would receive an
ICMP unreachable response with a fake source address. Rather the address
of the host that generated ICMP Unreachable message. This is confusing
and incorrect.

Example:
ping -W 9 -w 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

Notice : packet has the source address of the ICMP "Unreachable host!"

This issue can be traced back to commit
415b3334a21a ("icmp: Fix regression in nexthop resolution during replies.")
which introduced a change that copied the source address from the ICMP
payload.

This commit would force to use source address from the gatway/host.
The ICMP error message source address correctly set from the host.

After fixing:
ping -W 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Here is an example to reporduce:

export AB="10.1"
for i in 1 2 3 4 5; do
        h="host${i}"
        ip netns add ${h}
        ip -netns ${h} link set lo up
        ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1
        if [ $i -lt 5 ]; then
                ip -netns ${h} link add eth0 type veth peer name eth10${i}
                ip -netns ${h} addr add "${AB}.${i}.1/24" dev eth0
                ip -netns ${h} link set up dev eth0
        fi
done

for i in 1 2 3 4 5; do
        h="host${i}"
        p=$((i - 1))
        ph="host${p}"
        # connect to previous host
        if [ $i -gt 1 ]; then
                ip -netns ${ph} link set eth10${p} netns ${h}
                ip -netns ${h} link set eth10${p} name eth1
                ip -netns ${h} link set up dev eth1
                ip -netns ${h} addr add "${AB}.${p}.2/24" dev eth1
        fi
        # add forward routes
        for k in $(seq ${i} $((5 - 1))); do
                ip -netns ${h} route 2>/dev/null | (grep "${AB}.${k}.0" 2>/dev/null) || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${i}.2" 2>/dev/nul
        done

        # add reverse routes
        for k in $(seq 1 $((i - 2))); do
                ip -netns ${h} route 2>/dev/null | grep "${AB}.${k}.0" 2>/dev/null || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${p}.1" 2>/dev/nul
        done
done

ip netns exec host1 ping -q -W 2 -w 1 -c 1 10.1.4.2 2>&1>/dev/null && echo "success 10.1.4.2 reachable" || echo "ERROR"
ip netns exec host1 ping -W 9 -w 5 -c 1 10.1.4.3 || echo  "note the source address of unreachble"
ip -netns host1 route flush cache

ip netns exec host3 nft add table inet filter
ip netns exec host3 nft add chain inet filter FORWARD { type filter hook forward priority filter\; policy drop \; }
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol icmp drop
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol esp accept
ip netns exec host3 nft add rule inet filter FORWARD counter drop

ip -netns host2 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir out \
        flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 1 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir in \
        tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir fwd \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 1 replay-window 1  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip -netns host2 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        flag icmp reqid 2 replay-window 10 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir out \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 1 mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir in \
        tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2  mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir fwd \
                flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2 mode tunnel

ip -netns host4 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        reqid 1 replay-window 1 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 2 replay-window 20 flag icmp  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip netns exec host1 ping -W 5 -c 1 10.1.4.2 2>&1 > /dev/null && echo ""
ip netns exec host1 ping -W 5 -c 1 10.1.4.3 || echo "note source address"

Again before the fix
ping -W 5 -c 1 10.1.4.3
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

After the fix
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/icmp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e63a3bf99617..bec234637122 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					    XFRM_LOOKUP_ICMP);
 	if (!IS_ERR(rt2)) {
 		dst_release(&rt->dst);
-		memcpy(fl4, &fl4_dec, sizeof(*fl4));
 		rt = rt2;
 	} else if (PTR_ERR(rt2) == -EPERM) {
 		if (rt)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [devel-ipsec] [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
@ 2023-10-27 13:30     ` Michael Richardson
  2023-10-29 10:26       ` Antony Antony
  2023-11-17  9:21     ` Steffen Klassert
  1 sibling, 1 reply; 27+ messages in thread
From: Michael Richardson @ 2023-10-27 13:30 UTC (permalink / raw)
  To: antony.antony
  Cc: Steffen Klassert, Herbert Xu, netdev, devel, Jakub Kicinski,
	David S. Miller

[-- Attachment #1: Type: text/plain, Size: 883 bytes --]


Antony Antony via Devel <devel@linux-ipsec.org> wrote:
    > When enabling support for xfrm lookup using reverse ICMP payload,
    > We have identified an issue where the source address of the IPv4 e.g
    > "Destination Host Unreachable" message is incorrect. The IPv6 appear
    > to do the right thing.

One thing that operators of routers with a multitude of interfaces want to do
is send all ICMP messages from a specific IP address.  Often the public
address, that has the sane reverse DNS name.
AFAIK, this is not an option on Linux, but Cisco/Juniper/etc. devices usually
can do this.  I can't recall how today. (I was actually looking that up this week)

This can conflict however, with the need to get the result back into the
tunnel.  I don't have a good answer, except that we probably need a fair bit
of flexibility, with some good automatically discovered defaults.



[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 511 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [devel-ipsec] [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-10-27 13:30     ` [devel-ipsec] " Michael Richardson
@ 2023-10-29 10:26       ` Antony Antony
  2023-10-31  7:59         ` Michael Richardson
  0 siblings, 1 reply; 27+ messages in thread
From: Antony Antony @ 2023-10-29 10:26 UTC (permalink / raw)
  To: Michael Richardson
  Cc: antony.antony, Herbert Xu, netdev, devel, Jakub Kicinski,
	David S. Miller

On Fri, Oct 27, 2023 at 09:30:07AM -0400, Michael Richardson via Devel wrote:
> 
> Antony Antony via Devel <devel@linux-ipsec.org> wrote:
>     > When enabling support for xfrm lookup using reverse ICMP payload,
>     > We have identified an issue where the source address of the IPv4 e.g
>     > "Destination Host Unreachable" message is incorrect. The IPv6 appear
>     > to do the right thing.
> 
> One thing that operators of routers with a multitude of interfaces want to do
> is send all ICMP messages from a specific IP address.  Often the public
> address, that has the sane reverse DNS name.

While it makes sense for routers with multiple interfaces, receiving ICMP 
errors from private addresses can be confusing. However, wouldn't this also 
make it more challenging to adhere to BCP 32 and BCP 38? Routing with 
multiple interfaces is tricky on Linux, especially when it comes to 
compliance with these BCPs.

> AFAIK, this is not an option on Linux, but Cisco/Juniper/etc. devices usually
> can do this.  I can't recall how today. (I was actually looking that up this week)

I wonder if a netfilter rule would be a solution for you, something like:

"ip protocol icmp type <error codes> snat to x.x.x.x"

I would love see a simple option instead of a SNAT hack. May be a routing 
rule that will choose sourse address for ICMP error code.

> This can conflict however, with the need to get the result back into the
> tunnel.  I don't have a good answer, except that we probably need a fair bit

Forwarding that ICMP packet, which is not covered by your forward IPsec 
policy, would be fixed with the second patch in this series. In that case 
lookup would using the orginal packet as describe in RFC 4301, Section 6.

-antony

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [devel-ipsec] [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-10-29 10:26       ` Antony Antony
@ 2023-10-31  7:59         ` Michael Richardson
  0 siblings, 0 replies; 27+ messages in thread
From: Michael Richardson @ 2023-10-31  7:59 UTC (permalink / raw)
  To: Antony Antony
  Cc: antony.antony, Herbert Xu, netdev, devel, Jakub Kicinski,
	David S. Miller

[-- Attachment #1: Type: text/plain, Size: 1775 bytes --]


Antony Antony <antony@phenome.org> wrote:
    > On Fri, Oct 27, 2023 at 09:30:07AM -0400, Michael Richardson via Devel
    > wrote:
    >>
    >> Antony Antony via Devel <devel@linux-ipsec.org> wrote: > When enabling
    >> support for xfrm lookup using reverse ICMP payload, > We have
    >> identified an issue where the source address of the IPv4 e.g >
    >> "Destination Host Unreachable" message is incorrect. The IPv6 appear >
    >> to do the right thing.
    >>
    >> One thing that operators of routers with a multitude of interfaces
    >> want to do is send all ICMP messages from a specific IP address.
    >> Often the public address, that has the sane reverse DNS name.

    > While it makes sense for routers with multiple interfaces, receiving
    > ICMP errors from private addresses can be confusing. However, wouldn't
    > this also make it more challenging to adhere to BCP 32 and BCP 38?
    > Routing with multiple interfaces is tricky on Linux, especially when it
    > comes to compliance with these BCPs.

Yes, that's why sending from a public, topically significant source address
is really important.  Yet, many links are numbered in 1918 because..

    > I wonder if a netfilter rule would be a solution for you, something
    > like:

    > I would love see a simple option instead of a SNAT hack. May be a
    > routing rule that will choose sourse address for ICMP error code.

yeah, I really don't want to do SNAT stuff.
I'd like to have a flag on each interface that says to use the "global" ICMP
source or use the heuristic we have now.  And then we need a way to set that
source address.  Most routing platforms put a /32 address (and /128) on lo
(or a dummy) as the device's reachable address, and then spread that through
OSPF.







[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 658 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2023-11-17  9:13     ` Steffen Klassert
  2023-11-25 22:48       ` [devel-ipsec] " Antony Antony
  0 siblings, 1 reply; 27+ messages in thread
From: Steffen Klassert @ 2023-11-17  9:13 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, David S. Miller, devel, Jakub Kicinski, netdev

On Fri, Oct 27, 2023 at 10:16:29AM +0200, Antony Antony wrote:
> +
> +static struct sk_buff *xfrm_icmp_flow_decode(struct sk_buff *skb,
> +					     unsigned short family,
> +					     struct flowi *fl,
> +					     struct flowi *fl1)
> +{
> +	struct net *net = dev_net(skb->dev);
> +	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
> +	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
> +		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
> +	skb_reset_network_header(newskb);

This is not needed there.

> +
> +	if (!pskb_pull(newskb, hl))
> +		return NULL;

This leaks newskb.

> +	skb_reset_network_header(newskb);
> +
> +	if (xfrm_decode_session_reverse(net, newskb, fl1, family) < 0) {
> +		kfree_skb(newskb);

The newskb is not dropped because of an error, maybe better use
consume_skb().

> +		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);

This might bump a second stat counter for this packet. Also
this is not really an error, because you just can't parse
the payload of the icmp packet.

> +		return NULL;
> +	}
> +
> +	fl1->flowi_oif = fl->flowi_oif;
> +	fl1->flowi_mark = fl->flowi_mark;
> +	fl1->flowi_tos = fl->flowi_tos;
> +	nf_nat_decode_session(newskb, fl1, family);
> +
> +	return newskb;

Why do you return newskb here? It is not used in all
of the calling functions.

The rest looks good, thanks!

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
  2023-10-27 13:30     ` [devel-ipsec] " Michael Richardson
@ 2023-11-17  9:21     ` Steffen Klassert
  2023-11-25 23:15       ` [devel-ipsec] " Antony Antony
  1 sibling, 1 reply; 27+ messages in thread
From: Steffen Klassert @ 2023-11-17  9:21 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, David S. Miller, devel, Jakub Kicinski, netdev

On Fri, Oct 27, 2023 at 10:16:52AM +0200, Antony Antony wrote:
> 
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index e63a3bf99617..bec234637122 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
>  					    XFRM_LOOKUP_ICMP);
>  	if (!IS_ERR(rt2)) {
>  		dst_release(&rt->dst);
> -		memcpy(fl4, &fl4_dec, sizeof(*fl4));

This is not really IPsec code. The change needs either an
Ack of one of the netdev Maintainers, or it has to go
through the nedev tree. Also, please consider this as
a fix.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [devel-ipsec] [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2023-11-17  9:13     ` Steffen Klassert
@ 2023-11-25 22:48       ` Antony Antony
  0 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-11-25 22:48 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Antony Antony, Herbert Xu, David S. Miller, devel,
	Jakub Kicinski, netdev

On Fri, Nov 17, 2023 at 10:13:15AM +0100, Steffen Klassert via Devel wrote:
> On Fri, Oct 27, 2023 at 10:16:29AM +0200, Antony Antony wrote:
> > +
> > +static struct sk_buff *xfrm_icmp_flow_decode(struct sk_buff *skb,
> > +					     unsigned short family,
> > +					     struct flowi *fl,
> > +					     struct flowi *fl1)
> > +{
> > +	struct net *net = dev_net(skb->dev);
> > +	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
> > +	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
> > +		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
> > +	skb_reset_network_header(newskb);
> 
> This is not needed there.

fixed.
> 
> > +
> > +	if (!pskb_pull(newskb, hl))
> > +		return NULL;
> 
> This leaks newskb.

fixedd

> 
> > +	skb_reset_network_header(newskb);
> > +
> > +	if (xfrm_decode_session_reverse(net, newskb, fl1, family) < 0) {
> > +		kfree_skb(newskb);
> 
> The newskb is not dropped because of an error, maybe better use
> consume_skb().

good idea. I will change to consume_skb().

> 
> > +		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
> 
> This might bump a second stat counter for this packet. Also
> this is not really an error, because you just can't parse
> the payload of the icmp packet. 

yes fixed

> 
> > +		return NULL;
> > +	}
> > +
> > +	fl1->flowi_oif = fl->flowi_oif;
> > +	fl1->flowi_mark = fl->flowi_mark;
> > +	fl1->flowi_tos = fl->flowi_tos;
> > +	nf_nat_decode_session(newskb, fl1, family);
> > +
> > +	return newskb;
> 
> Why do you return newskb here? It is not used in all
> of the Acalling functions.

I thought fl1,  decode session, had pointers to skb fl1. That is not the 
case. Now the skb is consumed and not returned.


> The rest looks good, thanks!

thanks for the review. I will send out new version soon.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [devel-ipsec] [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-11-17  9:21     ` Steffen Klassert
@ 2023-11-25 23:15       ` Antony Antony
  0 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-11-25 23:15 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Antony Antony, Herbert Xu, David S. Miller, devel,
	Jakub Kicinski, netdev

On Fri, Nov 17, 2023 at 10:21:37AM +0100, Steffen Klassert via Devel wrote:
> On Fri, Oct 27, 2023 at 10:16:52AM +0200, Antony Antony wrote:
> > 
> > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> > index e63a3bf99617..bec234637122 100644
> > --- a/net/ipv4/icmp.c
> > +++ b/net/ipv4/icmp.c
> > @@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
> >  					    XFRM_LOOKUP_ICMP);
> >  	if (!IS_ERR(rt2)) {
> >  		dst_release(&rt->dst);
> > -		memcpy(fl4, &fl4_dec, sizeof(*fl4));
> 
> This is not really IPsec code. The change needs either an
> Ack of one of the netdev Maintainers, or it has to go

I understand your concern. I chose to submit the change to ipsec-next as it 
is directly related to the outcome of a successful xfrm_lookup().

> through the nedev tree. Also, please consider this as
> a fix.

It is a fix:) I considered including a 'Fixes:' tag initially but ultimately 
decided against it.  My hesitation stemmed from the concern that if this fix 
were backported, it could inadvertently trigger regressions in someone’s 
test suite. This might lead to requests for a revert through the ipsec tree, 
which I am keen to avoid.

However, I do concur that this submission qualifies as a fix. Is there a way
to include the 'Fixes:' tag while also advising against backporting it to 
reduce the risk of potential regressions?

I will add the 'Fixes:' tag to the new version. When it comes to backport I 
will recomend not to backport this fix. Please keep an eye out for those 
messages. This could get backported to all curently maintained releases!

The key reason for pairing this update with my other patch ("xfrm: introduce 
forwarding of ICMP Error messages") is to proactively address any potential 
claims of a regression. Without this new patch, it's  conceivable that the 
changes could be misinterpreted as causing a regression, especially 
considering that the commit this patch addresses is 12 years old! By 
submitting them together, it should help clarify that these changes are, in 
fact, rectifying long-standing issues rather than introducing new ones.

I believe applying two patches together will provide a clearer context for 
both the changes and help streamline their acceptance and integration.

thanks,
-antony

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v3 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (3 preceding siblings ...)
  2023-10-27  8:16   ` [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
@ 2023-12-19 20:29   ` Antony Antony
  2023-12-19 20:30   ` [PATCH v3 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
                     ` (4 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-12-19 20:29 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

This commit aligns with RFC 4301, Section 6, and addresses the
requirement to forward unauthenticated ICMP error messages that do not
match any xfrm policies. It utilizes the ICMP payload as an skb and
performs a reverse lookup. If a policy match is found, forward
the packet.

The ICMP payload typically contains a partial IP packet that is likely
responsible for the error message.

The following error types will be forwarded:
- IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
- IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
			   ICMPV6_TIME_EXCEED

To implement this feature, a reverse lookup has been added to the xfrm
forward path, making use of the ICMP payload as the skb.

To enable this functionality from user space, the XFRM_POLICY_ICMP flag
should be added to the outgoing and forward policies, and the
XFRM_STATE_ICMP flag should be set on incoming states.

e.g.
ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir out priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Changes since v2: reviewed by Steffen Klassert
 - user consume_skb instead of kfree_skb for the inner skb
 - fixed newskb leaks in error paths
 - free the newskb once inner flow is decoded with change due to
   7a0207094f1b ("xfrm: policy: replace session decode with flow dissector")
 - if xfrm_decode_session_reverse() on inner payload fails ignore.
   do not increment error counter

Changes since v1:
- Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6)

Changes since RFC:
- Fix calculation of ICMPv6 header length

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 141 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 139 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..683d4cdb235f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/inet_ecn.h>
@@ -3503,6 +3504,127 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }
 
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET &&
+	    fl4->flowi4_proto == IPPROTO_ICMP &&
+	    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+	     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+		return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6) {
+		const struct flowi6 *fl6 = &fl->u.ip6;
+
+		if(fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+		   (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+		    fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+		    fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+			return true;
+	}
+#endif
+	return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+				  const struct flowi *fl, struct flowi *fl1)
+{
+	bool ret = true;
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
+		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+	if (!newskb)
+		return true;
+
+	if (!pskb_pull(newskb, hl))
+		goto out;
+
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+		goto out;
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+	ret = false;
+
+out:
+	consume_skb(newskb);
+	return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+					   const struct xfrm_selector *sel,
+					   const struct flowi *fl)
+{
+	bool ret = false;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+	}
+
+	return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+			      const struct flowi *fl, unsigned short family,
+			      u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return pol;
+
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+	}
+
+	return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+			     unsigned short family, struct dst_entry *dst)
+{
+	if (icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+
+		if(xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return dst;
+
+		dst_hold(dst);
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm) {
+			dst_release(dst);
+			dst = dst2;
+		} else {
+			dst_release(dst2);
+		}
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3549,9 +3671,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = true;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+					ret = false;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3574,6 +3704,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}
 
+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3707,6 +3840,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (4 preceding siblings ...)
  2023-12-19 20:29   ` [PATCH v3 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2023-12-19 20:30   ` Antony Antony
  2023-12-21 13:12   ` [PATCH v4 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
                     ` (3 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-12-19 20:30 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

When enabling support for xfrm lookup using reverse ICMP payload,
We have identified an issue where the source address of the IPv4 e.g
"Destination Host Unreachable" message is incorrect. The IPv6 appear
to do the right thing.

Here is example of incorrect source address for ICMP error response.
When sending a ping to an unreachable host, the sender would receive an
ICMP unreachable response with a fake source address. Rather the address
of the host that generated ICMP Unreachable message. This is confusing
and incorrect.

Example:
ping -W 9 -w 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

Notice : packet has the source address of the ICMP "Unreachable host!"

This issue can be traced back to commit
415b3334a21a ("icmp: Fix regression in nexthop resolution during replies.")
which introduced a change that copied the source address from the ICMP
payload.

This commit would force to use source address from the gatway/host.
The ICMP error message source address correctly set from the host.

After fixing:
ping -W 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Here is an example to reporduce:

export AB="10.1"
for i in 1 2 3 4 5; do
        h="host${i}"
        ip netns add ${h}
        ip -netns ${h} link set lo up
        ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1
        if [ $i -lt 5 ]; then
                ip -netns ${h} link add eth0 type veth peer name eth10${i}
                ip -netns ${h} addr add "${AB}.${i}.1/24" dev eth0
                ip -netns ${h} link set up dev eth0
        fi
done

for i in 1 2 3 4 5; do
        h="host${i}"
        p=$((i - 1))
        ph="host${p}"
        # connect to previous host
        if [ $i -gt 1 ]; then
                ip -netns ${ph} link set eth10${p} netns ${h}
                ip -netns ${h} link set eth10${p} name eth1
                ip -netns ${h} link set up dev eth1
                ip -netns ${h} addr add "${AB}.${p}.2/24" dev eth1
        fi
        # add forward routes
        for k in $(seq ${i} $((5 - 1))); do
                ip -netns ${h} route 2>/dev/null | (grep "${AB}.${k}.0" 2>/dev/null) || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${i}.2" 2>/dev/nul
        done

        # add reverse routes
        for k in $(seq 1 $((i - 2))); do
                ip -netns ${h} route 2>/dev/null | grep "${AB}.${k}.0" 2>/dev/null || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${p}.1" 2>/dev/nul
        done
done

ip netns exec host1 ping -q -W 2 -w 1 -c 1 10.1.4.2 2>&1>/dev/null && echo "success 10.1.4.2 reachable" || echo "ERROR"
ip netns exec host1 ping -W 9 -w 5 -c 1 10.1.4.3 || echo  "note the source address of unreachble"
ip -netns host1 route flush cache

ip netns exec host3 nft add table inet filter
ip netns exec host3 nft add chain inet filter FORWARD { type filter hook forward priority filter\; policy drop \; }
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol icmp drop
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol esp accept
ip netns exec host3 nft add rule inet filter FORWARD counter drop

ip -netns host2 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir out \
        flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 1 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir in \
        tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir fwd \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 1 replay-window 1  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip -netns host2 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        flag icmp reqid 2 replay-window 10 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir out \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 1 mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir in \
        tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2  mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir fwd \
                flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2 mode tunnel

ip -netns host4 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        reqid 1 replay-window 1 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 2 replay-window 20 flag icmp  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip netns exec host1 ping -W 5 -c 1 10.1.4.2 2>&1 > /dev/null && echo ""
ip netns exec host1 ping -W 5 -c 1 10.1.4.3 || echo "note source address"

Again before the fix
ping -W 5 -c 1 10.1.4.3
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

After the fix
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/icmp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e63a3bf99617..bec234637122 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					    XFRM_LOOKUP_ICMP);
 	if (!IS_ERR(rt2)) {
 		dst_release(&rt->dst);
-		memcpy(fl4, &fl4_dec, sizeof(*fl4));
 		rt = rt2;
 	} else if (PTR_ERR(rt2) == -EPERM) {
 		if (rt)
--
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v4 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (5 preceding siblings ...)
  2023-12-19 20:30   ` [PATCH v3 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
@ 2023-12-21 13:12   ` Antony Antony
  2023-12-21 13:12   ` [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
                     ` (2 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-12-21 13:12 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, Jakub Kicinski,
	devel, netdev

This commit aligns with RFC 4301, Section 6, and addresses the
requirement to forward unauthenticated ICMP error messages that do not
match any xfrm policies. It utilizes the ICMP payload as an skb and
performs a reverse lookup. If a policy match is found, forward
the packet.

The ICMP payload typically contains a partial IP packet that is likely
responsible for the error message.

The following error types will be forwarded:
- IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
- IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
			   ICMPV6_TIME_EXCEED

To implement this feature, a reverse lookup has been added to the xfrm
forward path, making use of the ICMP payload as the skb.

To enable this functionality from user space, the XFRM_POLICY_ICMP flag
should be added to the outgoing and forward policies, and the
XFRM_STATE_ICMP flag should be set on incoming states.

e.g.
ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir out priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Changes since v3: no code chage
 - add missing white spaces detected by checkpatch.pl

Changes since v2: reviewed by Steffen Klassert
 - user consume_skb instead of kfree_skb for the inner skb
 - fixed newskb leaks in error paths
 - free the newskb once inner flow is decoded with change due to
   commit 7a0207094f1b ("xfrm: policy: replace session decode with flow dissector")
 - if xfrm_decode_session_reverse() on inner payload fails ignore.
   do not increment error counter

Changes since v1:
- Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6)

Changes since RFC:
- Fix calculation of ICMPv6 header length

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 142 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..bc99984e8d39 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/inet_ecn.h>
@@ -3503,6 +3504,128 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }

+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET &&
+	    fl4->flowi4_proto == IPPROTO_ICMP &&
+	    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+	     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+		return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6) {
+		const struct flowi6 *fl6 = &fl->u.ip6;
+
+		if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+		    (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+		    fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+		    fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+			return true;
+	}
+#endif
+	return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+				  const struct flowi *fl, struct flowi *fl1)
+{
+	bool ret = true;
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
+		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+	if (!newskb)
+		return true;
+
+	if (!pskb_pull(newskb, hl))
+		goto out;
+
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+		goto out;
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+	ret = false;
+
+out:
+	consume_skb(newskb);
+	return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+					   const struct xfrm_selector *sel,
+					   const struct flowi *fl)
+{
+	bool ret = false;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+	}
+
+	return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+			      const struct flowi *fl, unsigned short family,
+			      u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return pol;
+
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+	}
+
+	return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+			     unsigned short family, struct dst_entry *dst)
+{
+	if (icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return dst;
+
+		dst_hold(dst);
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm) {
+			dst_release(dst);
+			dst = dst2;
+		} else {
+			dst_release(dst2);
+		}
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3549,9 +3672,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,

 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = true;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+					ret = false;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3574,6 +3705,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}

+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3707,6 +3841,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
--
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (6 preceding siblings ...)
  2023-12-21 13:12   ` [PATCH v4 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2023-12-21 13:12   ` Antony Antony
  2023-12-22  7:23     ` Steffen Klassert
  2023-12-22 12:57   ` [PATCH v5 ipsec-next] xfrm: introduce forwarding of ICMP Error messages Antony Antony
  2024-01-19 11:27   ` [PATCH v6 " Antony Antony
  9 siblings, 1 reply; 27+ messages in thread
From: Antony Antony @ 2023-12-21 13:12 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

When enabling support for xfrm lookup using reverse ICMP payload,
We have identified an issue where the source address of the IPv4 e.g
"Destination Host Unreachable" message is incorrect. The IPv6 appear
to do the right thing.

Here is example of incorrect source address for ICMP error response.
When sending a ping to an unreachable host, the sender would receive an
ICMP unreachable response with a fake source address. Rather the address
of the host that generated ICMP Unreachable message. This is confusing
and incorrect.

Example:
ping -W 9 -w 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

Notice : packet has the source address of the ICMP "Unreachable host!"

This issue can be traced back to commit
415b3334a21a ("icmp: Fix regression in nexthop resolution during replies.")
which introduced a change that copied the source address from the ICMP
payload.

This commit would force to use source address from the gatway/host.
The ICMP error message source address correctly set from the host.

After fixing:
ping -W 5 -c 1 10.1.4.3
PING 10.1.4.3 (10.1.4.3) 56(84) bytes of data.
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Here is an example to reporduce:

export AB="10.1"
for i in 1 2 3 4 5; do
        h="host${i}"
        ip netns add ${h}
        ip -netns ${h} link set lo up
        ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1
        if [ $i -lt 5 ]; then
                ip -netns ${h} link add eth0 type veth peer name eth10${i}
                ip -netns ${h} addr add "${AB}.${i}.1/24" dev eth0
                ip -netns ${h} link set up dev eth0
        fi
done

for i in 1 2 3 4 5; do
        h="host${i}"
        p=$((i - 1))
        ph="host${p}"
        # connect to previous host
        if [ $i -gt 1 ]; then
                ip -netns ${ph} link set eth10${p} netns ${h}
                ip -netns ${h} link set eth10${p} name eth1
                ip -netns ${h} link set up dev eth1
                ip -netns ${h} addr add "${AB}.${p}.2/24" dev eth1
        fi
        # add forward routes
        for k in $(seq ${i} $((5 - 1))); do
                ip -netns ${h} route 2>/dev/null | (grep "${AB}.${k}.0" 2>/dev/null) || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${i}.2" 2>/dev/nul
        done

        # add reverse routes
        for k in $(seq 1 $((i - 2))); do
                ip -netns ${h} route 2>/dev/null | grep "${AB}.${k}.0" 2>/dev/null || \
                ip -netns ${h} route add "${AB}.${k}.0/24" via "${AB}.${p}.1" 2>/dev/nul
        done
done

ip netns exec host1 ping -q -W 2 -w 1 -c 1 10.1.4.2 2>&1>/dev/null && echo "success 10.1.4.2 reachable" || echo "ERROR"
ip netns exec host1 ping -W 9 -w 5 -c 1 10.1.4.3 || echo  "note the source address of unreachble"
ip -netns host1 route flush cache

ip netns exec host3 nft add table inet filter
ip netns exec host3 nft add chain inet filter FORWARD { type filter hook forward priority filter\; policy drop \; }
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol icmp drop
ip netns exec host3 nft add rule inet filter FORWARD counter ip protocol esp accept
ip netns exec host3 nft add rule inet filter FORWARD counter drop

ip -netns host2 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir out \
        flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 1 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir in \
        tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir fwd \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 2 mode tunnel

ip -netns host2 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 1 replay-window 1  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip -netns host2 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        flag icmp reqid 2 replay-window 10 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm policy add src 10.1.4.0/24 dst 10.1.1.0/24 dir out \
        flag icmp tmpl src 10.1.3.2 dst 10.1.2.1 proto esp reqid 1 mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir in \
        tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2  mode tunnel

ip -netns host4 xfrm policy add src 10.1.1.0/24 dst 10.1.4.0/24 dir fwd \
                flag icmp tmpl src 10.1.2.1 dst 10.1.3.2 proto esp reqid 2 mode tunnel

ip -netns host4 xfrm state add src 10.1.3.2 dst 10.1.2.1 proto esp spi 2 \
        reqid 1 replay-window 1 mode tunnel aead 'rfc4106(gcm(aes))' \
        0x2222222222222222222222222222222222222222 96

ip -netns host4 xfrm state add src 10.1.2.1 dst 10.1.3.2 proto esp spi 1 \
        reqid 2 replay-window 20 flag icmp  mode tunnel aead 'rfc4106(gcm(aes))' \
        0x1111111111111111111111111111111111111111 96 \
        sel src 10.1.1.0/24 dst 10.1.4.0/24

ip netns exec host1 ping -W 5 -c 1 10.1.4.2 2>&1 > /dev/null && echo ""
ip netns exec host1 ping -W 5 -c 1 10.1.4.3 || echo "note source address"

Again before the fix
ping -W 5 -c 1 10.1.4.3
From 10.1.4.3 icmp_seq=1 Destination Host Unreachable

After the fix
From 10.1.3.2 icmp_seq=1 Destination Host Unreachable

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/ipv4/icmp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e63a3bf99617..bec234637122 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					    XFRM_LOOKUP_ICMP);
 	if (!IS_ERR(rt2)) {
 		dst_release(&rt->dst);
-		memcpy(fl4, &fl4_dec, sizeof(*fl4));
 		rt = rt2;
 	} else if (PTR_ERR(rt2) == -EPERM) {
 		if (rt)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-12-21 13:12   ` [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
@ 2023-12-22  7:23     ` Steffen Klassert
  2023-12-22 12:56       ` Antony Antony
  0 siblings, 1 reply; 27+ messages in thread
From: Steffen Klassert @ 2023-12-22  7:23 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, David S. Miller, devel, Jakub Kicinski, netdev

On Thu, Dec 21, 2023 at 02:12:36PM +0100, Antony Antony wrote:
> ---
>  net/ipv4/icmp.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index e63a3bf99617..bec234637122 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
>  					    XFRM_LOOKUP_ICMP);
>  	if (!IS_ERR(rt2)) {
>  		dst_release(&rt->dst);
> -		memcpy(fl4, &fl4_dec, sizeof(*fl4));
>  		rt = rt2;
>  	} else if (PTR_ERR(rt2) == -EPERM) {
>  		if (rt)

This is generic icmp code, so I want to see an Ack from one of
the netdev Maintainers before I merge this into the ipsec-next
tree. Alternatively, you can just split the patchset and route
this one to net or net-next.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway
  2023-12-22  7:23     ` Steffen Klassert
@ 2023-12-22 12:56       ` Antony Antony
  0 siblings, 0 replies; 27+ messages in thread
From: Antony Antony @ 2023-12-22 12:56 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Antony Antony, Herbert Xu, David S. Miller, devel,
	Jakub Kicinski, netdev

On Fri, Dec 22, 2023 at 08:23:05 +0100, Steffen Klassert wrote:
> On Thu, Dec 21, 2023 at 02:12:36PM +0100, Antony Antony wrote:
> > ---
> >  net/ipv4/icmp.c | 1 -
> >  1 file changed, 1 deletion(-)
> > 
> > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> > index e63a3bf99617..bec234637122 100644
> > --- a/net/ipv4/icmp.c
> > +++ b/net/ipv4/icmp.c
> > @@ -555,7 +555,6 @@ static struct rtable *icmp_route_lookup(struct net *net,
> >  					    XFRM_LOOKUP_ICMP);
> >  	if (!IS_ERR(rt2)) {
> >  		dst_release(&rt->dst);
> > -		memcpy(fl4, &fl4_dec, sizeof(*fl4));
> >  		rt = rt2;
> >  	} else if (PTR_ERR(rt2) == -EPERM) {
> >  		if (rt)
> 
> This is generic icmp code, so I want to see an Ack from one of
> the netdev Maintainers before I merge this into the ipsec-next
> tree. Alternatively, you can just split the patchset and route
> this one to net or net-next.

Let me split the series. In case we get an Ack take the v4 series!

thank,
-antony

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v5 ipsec-next] xfrm: introduce forwarding of ICMP Error messages
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (7 preceding siblings ...)
  2023-12-21 13:12   ` [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
@ 2023-12-22 12:57   ` Antony Antony
  2024-01-04  9:39     ` Steffen Klassert
  2024-01-19 11:27   ` [PATCH v6 " Antony Antony
  9 siblings, 1 reply; 27+ messages in thread
From: Antony Antony @ 2023-12-22 12:57 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

This commit aligns with RFC 4301, Section 6, and addresses the
requirement to forward unauthenticated ICMP error messages that do not
match any xfrm policies. It utilizes the ICMP payload as an skb and
performs a reverse lookup. If a policy match is found, forward
the packet.

The ICMP payload typically contains a partial IP packet that is likely
responsible for the error message.

The following error types will be forwarded:
- IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
- IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
			   ICMPV6_TIME_EXCEED

To implement this feature, a reverse lookup has been added to the xfrm
forward path, making use of the ICMP payload as the skb.

To enable this functionality from user space, the XFRM_POLICY_ICMP flag
should be added to the outgoing and forward policies, and the
XFRM_STATE_ICMP flag should be set on incoming states.

e.g.
ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir out priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Changes since v4:
- split the series to only ICMP erorr forwarding

Changes since v3: no code chage
 - add missing white spaces detected by checkpatch.pl

Changes since v2: reviewed by Steffen Klassert
 - user consume_skb instead of kfree_skb for the inner skb
 - fixed newskb leaks in error paths
 - free the newskb once inner flow is decoded with change due to
   commit 7a0207094f1b ("xfrm: policy: replace session decode with flow dissector")
 - if xfrm_decode_session_reverse() on inner payload fails ignore.
   do not increment error counter

Changes since v1:
- Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6)

Changes since RFC:
- Fix calculation of ICMPv6 header length

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 142 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..bc99984e8d39 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/inet_ecn.h>
@@ -3503,6 +3504,128 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }

+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET &&
+	    fl4->flowi4_proto == IPPROTO_ICMP &&
+	    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+	     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+		return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6) {
+		const struct flowi6 *fl6 = &fl->u.ip6;
+
+		if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+		    (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+		    fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+		    fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+			return true;
+	}
+#endif
+	return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+				  const struct flowi *fl, struct flowi *fl1)
+{
+	bool ret = true;
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
+		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+	if (!newskb)
+		return true;
+
+	if (!pskb_pull(newskb, hl))
+		goto out;
+
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+		goto out;
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+	ret = false;
+
+out:
+	consume_skb(newskb);
+	return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+					   const struct xfrm_selector *sel,
+					   const struct flowi *fl)
+{
+	bool ret = false;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+	}
+
+	return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+			      const struct flowi *fl, unsigned short family,
+			      u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return pol;
+
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+	}
+
+	return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+			     unsigned short family, struct dst_entry *dst)
+{
+	if (icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return dst;
+
+		dst_hold(dst);
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm) {
+			dst_release(dst);
+			dst = dst2;
+		} else {
+			dst_release(dst2);
+		}
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3549,9 +3672,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,

 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = true;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+					ret = false;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3574,6 +3705,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}

+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3707,6 +3841,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
--
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 ipsec-next] xfrm: introduce forwarding of ICMP Error messages
  2023-12-22 12:57   ` [PATCH v5 ipsec-next] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2024-01-04  9:39     ` Steffen Klassert
  0 siblings, 0 replies; 27+ messages in thread
From: Steffen Klassert @ 2024-01-04  9:39 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, David S. Miller, devel, Jakub Kicinski, netdev

On Fri, Dec 22, 2023 at 01:57:04PM +0100, Antony Antony wrote:
> +
>  int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
>  			unsigned short family)
>  {
> @@ -3549,9 +3672,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
> 
>  		for (i = sp->len - 1; i >= 0; i--) {
>  			struct xfrm_state *x = sp->xvec[i];
> +			int ret = 0;
> +
>  			if (!xfrm_selector_match(&x->sel, &fl, family)) {
> -				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
> -				return 0;
> +				ret = true;
> +				if (x->props.flags & XFRM_STATE_ICMP &&
> +				    xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
> +					ret = false;

__xfrm_policy_check returns int, not bool. Please fix these
return values.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v6 ipsec-next] xfrm: introduce forwarding of ICMP Error messages
  2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
                     ` (8 preceding siblings ...)
  2023-12-22 12:57   ` [PATCH v5 ipsec-next] xfrm: introduce forwarding of ICMP Error messages Antony Antony
@ 2024-01-19 11:27   ` Antony Antony
  2024-01-26  9:11     ` Steffen Klassert
  9 siblings, 1 reply; 27+ messages in thread
From: Antony Antony @ 2024-01-19 11:27 UTC (permalink / raw)
  To: Steffen Klassert
  Cc: Herbert Xu, Antony Antony, David S. Miller, devel,
	Jakub Kicinski, netdev

This commit aligns with RFC 4301, Section 6, and addresses the
requirement to forward unauthenticated ICMP error messages that do not
match any xfrm policies. It utilizes the ICMP payload as an skb and
performs a reverse lookup. If a policy match is found, forward
the packet.

The ICMP payload typically contains a partial IP packet that is likely
responsible for the error message.

The following error types will be forwarded:
- IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
- IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
			   ICMPV6_TIME_EXCEED

To implement this feature, a reverse lookup has been added to the xfrm
forward path, making use of the ICMP payload as the skb.

To enable this functionality from user space, the XFRM_POLICY_ICMP flag
should be added to the outgoing and forward policies, and the
XFRM_STATE_ICMP flag should be set on incoming states.

e.g.
ip xfrm policy add flag icmp tmpl

ip xfrm policy
src 192.0.2.0/24 dst 192.0.1.0/25
	dir out priority 2084302 ptype main flag icmp

ip xfrm state add ...flag icmp

ip xfrm state
root@west:~#ip x s
src 192.1.2.23 dst 192.1.2.45
	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
	replay-window 32 flag icmp af-unspec

Changes since v5:
- fix return values bool->int, feedback from Steffen

Changes since v4:
- split the series to only ICMP erorr forwarding

Changes since v3: no code chage
 - add missing white spaces detected by checkpatch.pl

Changes since v2: reviewed by Steffen Klassert
 - user consume_skb instead of kfree_skb for the inner skb
 - fixed newskb leaks in error paths
 - free the newskb once inner flow is decoded with change due to
   commit 7a0207094f1b ("xfrm: policy: replace session decode with flow dissector")
 - if xfrm_decode_session_reverse() on inner payload fails ignore.
   do not increment error counter

Changes since v1:
- Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6)

Changes since RFC:
- Fix calculation of ICMPv6 header length

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_policy.c | 142 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1b7e75159727..b4850a8f14ad 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
 #include <linux/audit.h>
 #include <linux/rhashtable.h>
 #include <linux/if_tunnel.h>
+#include <linux/icmp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/inet_ecn.h>
@@ -3503,6 +3504,128 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
 	return 0;
 }
 
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+	const struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (family == AF_INET &&
+	    fl4->flowi4_proto == IPPROTO_ICMP &&
+	    (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+	     fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+		return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6) {
+		const struct flowi6 *fl6 = &fl->u.ip6;
+
+		if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+		    (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+		    fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+		    fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+			return true;
+	}
+#endif
+	return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+				  const struct flowi *fl, struct flowi *fl1)
+{
+	bool ret = true;
+	struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+	int hl = family == AF_INET ? (sizeof(struct iphdr) +  sizeof(struct icmphdr)) :
+		 (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+	if (!newskb)
+		return true;
+
+	if (!pskb_pull(newskb, hl))
+		goto out;
+
+	skb_reset_network_header(newskb);
+
+	if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+		goto out;
+
+	fl1->flowi_oif = fl->flowi_oif;
+	fl1->flowi_mark = fl->flowi_mark;
+	fl1->flowi_tos = fl->flowi_tos;
+	nf_nat_decode_session(newskb, fl1, family);
+	ret = false;
+
+out:
+	consume_skb(newskb);
+	return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+					   const struct xfrm_selector *sel,
+					   const struct flowi *fl)
+{
+	bool ret = false;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return ret;
+
+		ret = xfrm_selector_match(sel, &fl1, family);
+	}
+
+	return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+			      const struct flowi *fl, unsigned short family,
+			      u32 if_id)
+{
+	struct xfrm_policy *pol = NULL;
+
+	if (icmp_err_packet(fl, family)) {
+		struct flowi fl1;
+		struct net *net = dev_net(skb->dev);
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return pol;
+
+		pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+	}
+
+	return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+			     unsigned short family, struct dst_entry *dst)
+{
+	if (icmp_err_packet(fl, family)) {
+		struct net *net = dev_net(skb->dev);
+		struct dst_entry *dst2;
+		struct flowi fl1;
+
+		if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+			return dst;
+
+		dst_hold(dst);
+
+		dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+		if (IS_ERR(dst2))
+			return dst;
+
+		if (dst2->xfrm) {
+			dst_release(dst);
+			dst = dst2;
+		} else {
+			dst_release(dst2);
+		}
+	}
+
+	return dst;
+}
+
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
@@ -3549,9 +3672,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 		for (i = sp->len - 1; i >= 0; i--) {
 			struct xfrm_state *x = sp->xvec[i];
+			int ret = 0;
+
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
-				return 0;
+				ret = 1;
+				if (x->props.flags & XFRM_STATE_ICMP &&
+				    xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+					ret = 0;
+				if (ret) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+					return 0;
+				}
 			}
 		}
 	}
@@ -3574,6 +3705,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 0;
 	}
 
+	if (!pol && dir == XFRM_POLICY_FWD)
+		pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
 	if (!pol) {
 		if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3707,6 +3841,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		res = 0;
 		dst = NULL;
 	}
+
+	if (dst && !dst->xfrm)
+		dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
 	skb_dst_set(skb, dst);
 	return res;
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v6 ipsec-next] xfrm: introduce forwarding of ICMP Error messages
  2024-01-19 11:27   ` [PATCH v6 " Antony Antony
@ 2024-01-26  9:11     ` Steffen Klassert
  0 siblings, 0 replies; 27+ messages in thread
From: Steffen Klassert @ 2024-01-26  9:11 UTC (permalink / raw)
  To: Antony Antony; +Cc: Herbert Xu, David S. Miller, devel, Jakub Kicinski, netdev

On Fri, Jan 19, 2024 at 12:27:42PM +0100, Antony Antony wrote:
> This commit aligns with RFC 4301, Section 6, and addresses the
> requirement to forward unauthenticated ICMP error messages that do not
> match any xfrm policies. It utilizes the ICMP payload as an skb and
> performs a reverse lookup. If a policy match is found, forward
> the packet.
> 
> The ICMP payload typically contains a partial IP packet that is likely
> responsible for the error message.
> 
> The following error types will be forwarded:
> - IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED
> - IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG,
> 			   ICMPV6_TIME_EXCEED
> 
> To implement this feature, a reverse lookup has been added to the xfrm
> forward path, making use of the ICMP payload as the skb.
> 
> To enable this functionality from user space, the XFRM_POLICY_ICMP flag
> should be added to the outgoing and forward policies, and the
> XFRM_STATE_ICMP flag should be set on incoming states.
> 
> e.g.
> ip xfrm policy add flag icmp tmpl
> 
> ip xfrm policy
> src 192.0.2.0/24 dst 192.0.1.0/25
> 	dir out priority 2084302 ptype main flag icmp
> 
> ip xfrm state add ...flag icmp
> 
> ip xfrm state
> root@west:~#ip x s
> src 192.1.2.23 dst 192.1.2.45
> 	proto esp spi 0xa7b76872 reqid 16389 mode tunnel
> 	replay-window 32 flag icmp af-unspec
> 
> Changes since v5:
> - fix return values bool->int, feedback from Steffen
> 
> Changes since v4:
> - split the series to only ICMP erorr forwarding
> 
> Changes since v3: no code chage
>  - add missing white spaces detected by checkpatch.pl
> 
> Changes since v2: reviewed by Steffen Klassert
>  - user consume_skb instead of kfree_skb for the inner skb
>  - fixed newskb leaks in error paths
>  - free the newskb once inner flow is decoded with change due to
>    commit 7a0207094f1b ("xfrm: policy: replace session decode with flow dissector")
>  - if xfrm_decode_session_reverse() on inner payload fails ignore.
>    do not increment error counter
> 
> Changes since v1:
> - Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6)
> 
> Changes since RFC:
> - Fix calculation of ICMPv6 header length
> 
> Signed-off-by: Antony Antony <antony.antony@secunet.com>

Patch applied, thanks a lot Antony!

^ permalink raw reply	[flat|nested] 27+ messages in thread

* re: [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2023-10-26 14:45   ` [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
  2023-10-26 19:41     ` kernel test robot
@ 2024-01-30 10:36     ` Dan Carpenter
  2024-01-31 19:38       ` Antony Antony
  1 sibling, 1 reply; 27+ messages in thread
From: Dan Carpenter @ 2024-01-30 10:36 UTC (permalink / raw)
  To: Antony Antony
  Cc: Steffen Klassert, Herbert Xu, David S. Miller, devel,
	Jakub Kicinski, netdev


Hello Antony Antony,

The patch 63b21caba17e: "xfrm: introduce forwarding of ICMP Error
messages" from Jan 19, 2024 (linux-next), leads to the following
Smatch static checker warning:

	net/xfrm/xfrm_policy.c:3708 __xfrm_policy_check()
	error: testing array offset 'dir' after use.

net/xfrm/xfrm_policy.c
  3689  
  3690          pol = NULL;
  3691          sk = sk_to_full_sk(sk);
  3692          if (sk && sk->sk_policy[dir]) {
                            ^^^^^^^^^^^^^^^^
If dir is XFRM_POLICY_FWD (2) then it is one element beyond the end of
the ->sk_policy[] array.

  3693                  pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
  3694                  if (IS_ERR(pol)) {
  3695                          XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
  3696                          return 0;
  3697                  }
  3698          }
  3699  
  3700          if (!pol)
  3701                  pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
  3702  
  3703          if (IS_ERR(pol)) {
  3704                  XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
  3705                  return 0;
  3706          }
  3707  
  3708          if (!pol && dir == XFRM_POLICY_FWD)
                            ^^^^^^^^^^^^^^^^^^^^^^
This assumes that dir can be 2.

  3709                  pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
  3710  
  3711          if (!pol) {
  3712                  if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
  3713                          XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
  3714                          return 0;
  3715                  }
  3716  
  3717                  if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
  3718                          xfrm_secpath_reject(xerr_idx, skb, &fl);
  3719                          XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
  3720                          return 0;
  3721                  }
  3722                  return 1;

regards,
dan carpenter

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2024-01-30 10:36     ` Dan Carpenter
@ 2024-01-31 19:38       ` Antony Antony
  2024-01-31 19:48         ` Dan Carpenter
  0 siblings, 1 reply; 27+ messages in thread
From: Antony Antony @ 2024-01-31 19:38 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Antony Antony, Steffen Klassert, Herbert Xu, David S. Miller,
	devel, Jakub Kicinski, netdev

HI Dan,

Thanks for reporting the warning.

On Tue, Jan 30, 2024 at 01:36:28PM +0300, Dan Carpenter wrote:
> 
> Hello Antony Antony,
> 
> The patch 63b21caba17e: "xfrm: introduce forwarding of ICMP Error
> messages" from Jan 19, 2024 (linux-next), leads to the following
> Smatch static checker warning:
> 
> 	net/xfrm/xfrm_policy.c:3708 __xfrm_policy_check()
> 	error: testing array offset 'dir' after use.

> 
> net/xfrm/xfrm_policy.c
>   3689  
>   3690          pol = NULL;
>   3691          sk = sk_to_full_sk(sk);
>   3692          if (sk && sk->sk_policy[dir]) {
>                             ^^^^^^^^^^^^^^^^
> If dir is XFRM_POLICY_FWD (2) then it is one element beyond the end of
> the ->sk_policy[] array.

Yes, that's correct. However, for this patch, it's necessary that sk != NULL 
at the same time. As far as I know, there isn't any code that would call dir 
= XFRM_POLICY_FWD with sk != NULL. What am I missing? Did Smatch give any 
hints for such a code path?

> 
>   3693                  pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
>   3694                  if (IS_ERR(pol)) {
>   3695                          XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
>   3696                          return 0;
>   3697                  }
>   3698          }
>   3699  
>   3700          if (!pol)
>   3701                  pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
>   3702  
>   3703          if (IS_ERR(pol)) {
>   3704                  XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
>   3705                  return 0;
>   3706          }
>   3707  
>   3708          if (!pol && dir == XFRM_POLICY_FWD)
>                             ^^^^^^^^^^^^^^^^^^^^^^
> This assumes that dir can be 2

Yes that is correct. However, this patch does not need sk != NULL at the 
same time.

>   3709                  pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
>   3710  
>   3711          if (!pol) {
>   3712                  if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
>   3713                          XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
>   3714                          return 0;
>   3715                  }
>   3716  
>   3717                  if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
>   3718                          xfrm_secpath_reject(xerr_idx, skb, &fl);
>   3719                          XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
>   3720                          return 0;
>   3721                  }
>   3722                  return 1;
> 
> regards,
> dan carpenter
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2024-01-31 19:38       ` Antony Antony
@ 2024-01-31 19:48         ` Dan Carpenter
  2024-01-31 19:50           ` Dan Carpenter
  0 siblings, 1 reply; 27+ messages in thread
From: Dan Carpenter @ 2024-01-31 19:48 UTC (permalink / raw)
  To: Antony Antony
  Cc: Antony Antony, Steffen Klassert, Herbert Xu, David S. Miller,
	devel, Jakub Kicinski, netdev

On Wed, Jan 31, 2024 at 08:38:51PM +0100, Antony Antony wrote:
> HI Dan,
> 
> Thanks for reporting the warning.
> 
> On Tue, Jan 30, 2024 at 01:36:28PM +0300, Dan Carpenter wrote:
> > 
> > Hello Antony Antony,
> > 
> > The patch 63b21caba17e: "xfrm: introduce forwarding of ICMP Error
> > messages" from Jan 19, 2024 (linux-next), leads to the following
> > Smatch static checker warning:
> > 
> > 	net/xfrm/xfrm_policy.c:3708 __xfrm_policy_check()
> > 	error: testing array offset 'dir' after use.
> 
> > 
> > net/xfrm/xfrm_policy.c
> >   3689  
> >   3690          pol = NULL;
> >   3691          sk = sk_to_full_sk(sk);
> >   3692          if (sk && sk->sk_policy[dir]) {
> >                             ^^^^^^^^^^^^^^^^
> > If dir is XFRM_POLICY_FWD (2) then it is one element beyond the end of
> > the ->sk_policy[] array.
> 
> Yes, that's correct. However, for this patch, it's necessary that sk != NULL 
> at the same time. As far as I know, there isn't any code that would call dir 
> = XFRM_POLICY_FWD with sk != NULL. What am I missing? Did Smatch give any 
> hints for such a code path?
> 

I wondered if that might be the case.  The truth is that this sort of
dependency is too compicated for any static analysis tools that
currently exist.  Smatch tries to track the relationship between
"dir" and "sk" as they are passed in, but it will look the relationship
information when we re-assign sk.  "sk = sk_to_full_sk(sk);".

So what we do in this case, is we just ignore the warning and if anyone
has questions about it they will look up this conversation on
lore.kernel.org to find the explanation.

No need to worry about trying to silence the checker...

regards,
dan carpenter


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages
  2024-01-31 19:48         ` Dan Carpenter
@ 2024-01-31 19:50           ` Dan Carpenter
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Carpenter @ 2024-01-31 19:50 UTC (permalink / raw)
  To: Antony Antony
  Cc: Antony Antony, Steffen Klassert, Herbert Xu, David S. Miller,
	devel, Jakub Kicinski, netdev

On Wed, Jan 31, 2024 at 10:48:02PM +0300, Dan Carpenter wrote:
> On Wed, Jan 31, 2024 at 08:38:51PM +0100, Antony Antony wrote:
> > HI Dan,
> > 
> > Thanks for reporting the warning.
> > 
> > On Tue, Jan 30, 2024 at 01:36:28PM +0300, Dan Carpenter wrote:
> > > 
> > > Hello Antony Antony,
> > > 
> > > The patch 63b21caba17e: "xfrm: introduce forwarding of ICMP Error
> > > messages" from Jan 19, 2024 (linux-next), leads to the following
> > > Smatch static checker warning:
> > > 
> > > 	net/xfrm/xfrm_policy.c:3708 __xfrm_policy_check()
> > > 	error: testing array offset 'dir' after use.
> > 
> > > 
> > > net/xfrm/xfrm_policy.c
> > >   3689  
> > >   3690          pol = NULL;
> > >   3691          sk = sk_to_full_sk(sk);
> > >   3692          if (sk && sk->sk_policy[dir]) {
> > >                             ^^^^^^^^^^^^^^^^
> > > If dir is XFRM_POLICY_FWD (2) then it is one element beyond the end of
> > > the ->sk_policy[] array.
> > 
> > Yes, that's correct. However, for this patch, it's necessary that sk != NULL 
> > at the same time. As far as I know, there isn't any code that would call dir 
> > = XFRM_POLICY_FWD with sk != NULL. What am I missing? Did Smatch give any 
> > hints for such a code path?
> > 
> 
> I wondered if that might be the case.  The truth is that this sort of
> dependency is too compicated for any static analysis tools that
> currently exist.  Smatch tries to track the relationship between
> "dir" and "sk" as they are passed in, but it will look the relationship
> information when we re-assign sk.  "sk = sk_to_full_sk(sk);".

s/look/lose/.  I'm tired.  I should go to bed.

regards,
dan carpenter


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2024-01-31 19:50 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <YTXGGiMzsda6mcm2@AntonyAntony.local>
2021-12-19  0:28 ` [RFC PATCH ipsec-next] xfrm: add forwarding ICMP error message Antony Antony
2023-10-26 14:45   ` [PATCH ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
2023-10-26 19:41     ` kernel test robot
2024-01-30 10:36     ` Dan Carpenter
2024-01-31 19:38       ` Antony Antony
2024-01-31 19:48         ` Dan Carpenter
2024-01-31 19:50           ` Dan Carpenter
2023-10-26 14:45   ` [PATCH ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
2023-10-27  8:16   ` [PATCH v2 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
2023-11-17  9:13     ` Steffen Klassert
2023-11-25 22:48       ` [devel-ipsec] " Antony Antony
2023-10-27  8:16   ` [PATCH v2 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
2023-10-27 13:30     ` [devel-ipsec] " Michael Richardson
2023-10-29 10:26       ` Antony Antony
2023-10-31  7:59         ` Michael Richardson
2023-11-17  9:21     ` Steffen Klassert
2023-11-25 23:15       ` [devel-ipsec] " Antony Antony
2023-12-19 20:29   ` [PATCH v3 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
2023-12-19 20:30   ` [PATCH v3 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
2023-12-21 13:12   ` [PATCH v4 ipsec-next 1/2] xfrm: introduce forwarding of ICMP Error messages Antony Antony
2023-12-21 13:12   ` [PATCH v4 ipsec-next 2/2] xfrm: fix source address in icmp error generation from IPsec gateway Antony Antony
2023-12-22  7:23     ` Steffen Klassert
2023-12-22 12:56       ` Antony Antony
2023-12-22 12:57   ` [PATCH v5 ipsec-next] xfrm: introduce forwarding of ICMP Error messages Antony Antony
2024-01-04  9:39     ` Steffen Klassert
2024-01-19 11:27   ` [PATCH v6 " Antony Antony
2024-01-26  9:11     ` Steffen Klassert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.