All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
@ 2021-04-22  2:35 Cole Dishington
  2021-04-22  4:10 ` kernel test robot
                   ` (3 more replies)
  0 siblings, 4 replies; 26+ messages in thread
From: Cole Dishington @ 2021-04-22  2:35 UTC (permalink / raw)
  To: pablo
  Cc: Cole Dishington, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, open list:NETFILTER, open list:NETFILTER,
	open list:NETWORKING [GENERAL],
	open list

This adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---
 include/net/netfilter/nf_conntrack.h          |   2 +
 .../netfilter/nf_conntrack_tuple_common.h     |   5 +
 include/uapi/linux/netfilter/nf_nat.h         |   3 +-
 net/netfilter/nf_nat_core.c                   | 101 ++++++++++++++++--
 net/netfilter/nf_nat_ftp.c                    |  23 ++--
 net/netfilter/nf_nat_helper.c                 |  15 ++-
 6 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 439379ca9ffa..d63d38aa7188 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -92,6 +92,8 @@ struct nf_conn {
 	/* If we were expected by an expectation, this will be it */
 	struct nf_conn *master;
 
+	struct nf_nat_range2 *range;
+
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	u_int32_t mark;
 #endif
diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
index 64390fac6f7e..36d16d47c2b0 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
@@ -39,6 +39,11 @@ union nf_conntrack_man_proto {
 	struct {
 		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
 	} gre;
+	struct {
+		unsigned char psid_length;
+		unsigned char offset;
+		__be16 psid;
+	} psid;
 };
 
 #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b7c3c902290f..7730ce4ca9a9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -232,13 +232,33 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     bool is_psid)
 {
 	__be16 port;
 
+	int m = 0;
+	u16 offset_mask = 0;
+	u16 psid_mask = 0;
+
+	/* In this case we are in PSID mode and the rules are all different */
+	if (is_psid) {
+		/* m = number of bits in each valid range */
+		m = 16 - min->psid.psid_length - min->psid.offset;
+		offset_mask = ((1 << min->psid.offset) - 1) <<
+				(16 - min->psid.offset);
+		psid_mask = ((1 << min->psid.psid_length) - 1) << m;
+	}
+
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return ((ntohs(tuple->src.u.icmp.id) & offset_mask) !=
+				0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) ==
+				min->psid.psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -252,6 +272,11 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return ((ntohs(port) & offset_mask) != 0) &&
+				(((ntohs(port) & psid_mask) >> m) ==
+				  min->psid.psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -274,9 +299,9 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 
 	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
 		return 1;
-
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -397,10 +422,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
  *
  * Per-protocol part of tuple is initialized to the incoming packet.
  */
-static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-					const struct nf_nat_range2 *range,
-					enum nf_nat_manip_type maniptype,
-					const struct nf_conn *ct)
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range2 *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct)
 {
 	unsigned int range_size, min, max, i, attempts;
 	__be16 *keyptr;
@@ -457,6 +482,50 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		return;
 	}
 
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		/* Find the non-PSID parts of the port.
+		 * To do this we look for an unused port that is
+		 * comprised of [t_chunk|PSID|b_chunk]. The size of
+		 * these pieces is defined by the psid_length and
+		 * offset.
+		 */
+		int m = 16 - range->min_proto.psid.psid_length -
+		    range->min_proto.psid.offset;
+		int available;
+		int range_count = ((1 << range->min_proto.psid.offset) - 1);
+
+		/* Calculate the size of the bottom block */
+		range_size = (1 << m);
+
+		/* Calculate the total IDs to check */
+		available = range_size * range_count;
+		if (!available)
+			available = range_size;
+
+		off = ntohs(*keyptr);
+		for (i = 0;; ++off) {
+			int b_chunk = off % range_size;
+			int t_chunk = 0;
+
+			/* Move up to avoid the all-zeroes reserved chunk
+			 * (if there is one).
+			 */
+			if (range->min_proto.psid.offset > 0) {
+				t_chunk = (off >> m) % range_count;
+				++t_chunk;
+				t_chunk <<= (m +
+					     range->min_proto.psid.psid_length);
+			}
+
+			*keyptr = htons(t_chunk |
+					 (range->min_proto.psid.psid << m)
+					 | b_chunk);
+
+			if (++i >= available || !nf_nat_used_tuple(tuple, ct))
+				return;
+		}
+	}
+
 	/* If no range specified... */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
@@ -566,11 +635,18 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	/* Only bother mapping if it's not already in range and unique */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
-		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+		/* Now that the PSID mode is present we always need to check
+		 * to see if the source ports are in range.
+		 */
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||
+		    (range->flags & NF_NAT_RANGE_PSID &&
+		    !in_range(orig_tuple, range))) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+					     &range->min_proto,
+					     &range->max_proto,
+					     range->flags &
+					     NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
@@ -623,6 +699,11 @@ nf_nat_setup_info(struct nf_conn *ct,
 			   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+	if (range) {
+		if (!ct->range)
+			ct->range = kmalloc(sizeof(*ct->range), 0);
+		memcpy(ct->range, range, sizeof(*ct->range));
+	}
 
 	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
 		struct nf_conntrack_tuple reply;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index aace6768a64e..006b7e1836ff 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -17,6 +17,10 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_ftp.h>
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range2 *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct);
 
 #define NAT_HELPER_NAME "ftp"
 
@@ -86,19 +90,12 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	 * this one. */
 	exp->expectfn = nf_nat_follow_master;
 
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		int ret;
-
-		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp, 0);
-		if (ret == 0)
-			break;
-		else if (ret != -EBUSY) {
-			port = 0;
-			break;
-		}
-	}
+	/* Find a port that matches the MASQ rule. */
+	nf_nat_l4proto_unique_tuple(&exp->tuple, ct->range,
+				    dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
+				    ct);
+	port = ntohs(exp->tuple.dst.u.tcp.port);
+	nf_ct_expect_related(exp, 0);
 
 	if (port == 0) {
 		nf_ct_helper_log(skb, ct, "all ports in use");
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index a263505455fc..090153475d4d 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -184,11 +184,16 @@ void nf_nat_follow_master(struct nf_conn *ct,
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
-	/* Change src to where master sends to */
-	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_addr = range.max_addr
-		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
-	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+	if (exp->master && exp->master->range && !exp->dir) {
+		range = *exp->master->range;
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+	} else {
+		/* Change src to where master sends to */
+		range.flags = NF_NAT_RANGE_MAP_IPS;
+		range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+		range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+	}
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-04-22  2:35 [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-04-22  4:10 ` kernel test robot
  2021-04-22  6:54 ` kernel test robot
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2021-04-22  4:10 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 7506 bytes --]

Hi Cole,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on nf-next/master]
[also build test WARNING on nf/master ipvs/master v5.12-rc8 next-20210421]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Cole-Dishington/net-netfilter-Add-RFC-7597-Section-5-1-PSID-support/20210422-103613
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/2198990eeb54f0fc1517731200e48b17851443af
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Cole-Dishington/net-netfilter-Add-RFC-7597-Section-5-1-PSID-support/20210422-103613
        git checkout 2198990eeb54f0fc1517731200e48b17851443af
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross W=1 ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> net/netfilter/nf_nat_core.c:425:6: warning: no previous prototype for 'nf_nat_l4proto_unique_tuple' [-Wmissing-prototypes]
     425 | void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/nf_nat_l4proto_unique_tuple +425 net/netfilter/nf_nat_core.c

   419	
   420	/* Alter the per-proto part of the tuple (depending on maniptype), to
   421	 * give a unique tuple in the given range if possible.
   422	 *
   423	 * Per-protocol part of tuple is initialized to the incoming packet.
   424	 */
 > 425	void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
   426					 const struct nf_nat_range2 *range,
   427					 enum nf_nat_manip_type maniptype,
   428					 const struct nf_conn *ct)
   429	{
   430		unsigned int range_size, min, max, i, attempts;
   431		__be16 *keyptr;
   432		u16 off;
   433		static const unsigned int max_attempts = 128;
   434	
   435		switch (tuple->dst.protonum) {
   436		case IPPROTO_ICMP:
   437		case IPPROTO_ICMPV6:
   438			/* id is same for either direction... */
   439			keyptr = &tuple->src.u.icmp.id;
   440			if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
   441				min = 0;
   442				range_size = 65536;
   443			} else {
   444				min = ntohs(range->min_proto.icmp.id);
   445				range_size = ntohs(range->max_proto.icmp.id) -
   446					     ntohs(range->min_proto.icmp.id) + 1;
   447			}
   448			goto find_free_id;
   449	#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
   450		case IPPROTO_GRE:
   451			/* If there is no master conntrack we are not PPTP,
   452			   do not change tuples */
   453			if (!ct->master)
   454				return;
   455	
   456			if (maniptype == NF_NAT_MANIP_SRC)
   457				keyptr = &tuple->src.u.gre.key;
   458			else
   459				keyptr = &tuple->dst.u.gre.key;
   460	
   461			if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
   462				min = 1;
   463				range_size = 65535;
   464			} else {
   465				min = ntohs(range->min_proto.gre.key);
   466				range_size = ntohs(range->max_proto.gre.key) - min + 1;
   467			}
   468			goto find_free_id;
   469	#endif
   470		case IPPROTO_UDP:
   471		case IPPROTO_UDPLITE:
   472		case IPPROTO_TCP:
   473		case IPPROTO_SCTP:
   474		case IPPROTO_DCCP:
   475			if (maniptype == NF_NAT_MANIP_SRC)
   476				keyptr = &tuple->src.u.all;
   477			else
   478				keyptr = &tuple->dst.u.all;
   479	
   480			break;
   481		default:
   482			return;
   483		}
   484	
   485		if (range->flags & NF_NAT_RANGE_PSID) {
   486			/* Find the non-PSID parts of the port.
   487			 * To do this we look for an unused port that is
   488			 * comprised of [t_chunk|PSID|b_chunk]. The size of
   489			 * these pieces is defined by the psid_length and
   490			 * offset.
   491			 */
   492			int m = 16 - range->min_proto.psid.psid_length -
   493			    range->min_proto.psid.offset;
   494			int available;
   495			int range_count = ((1 << range->min_proto.psid.offset) - 1);
   496	
   497			/* Calculate the size of the bottom block */
   498			range_size = (1 << m);
   499	
   500			/* Calculate the total IDs to check */
   501			available = range_size * range_count;
   502			if (!available)
   503				available = range_size;
   504	
   505			off = ntohs(*keyptr);
   506			for (i = 0;; ++off) {
   507				int b_chunk = off % range_size;
   508				int t_chunk = 0;
   509	
   510				/* Move up to avoid the all-zeroes reserved chunk
   511				 * (if there is one).
   512				 */
   513				if (range->min_proto.psid.offset > 0) {
   514					t_chunk = (off >> m) % range_count;
   515					++t_chunk;
   516					t_chunk <<= (m +
   517						     range->min_proto.psid.psid_length);
   518				}
   519	
   520				*keyptr = htons(t_chunk |
   521						 (range->min_proto.psid.psid << m)
   522						 | b_chunk);
   523	
   524				if (++i >= available || !nf_nat_used_tuple(tuple, ct))
   525					return;
   526			}
   527		}
   528	
   529		/* If no range specified... */
   530		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
   531			/* If it's dst rewrite, can't change port */
   532			if (maniptype == NF_NAT_MANIP_DST)
   533				return;
   534	
   535			if (ntohs(*keyptr) < 1024) {
   536				/* Loose convention: >> 512 is credential passing */
   537				if (ntohs(*keyptr) < 512) {
   538					min = 1;
   539					range_size = 511 - min + 1;
   540				} else {
   541					min = 600;
   542					range_size = 1023 - min + 1;
   543				}
   544			} else {
   545				min = 1024;
   546				range_size = 65535 - 1024 + 1;
   547			}
   548		} else {
   549			min = ntohs(range->min_proto.all);
   550			max = ntohs(range->max_proto.all);
   551			if (unlikely(max < min))
   552				swap(max, min);
   553			range_size = max - min + 1;
   554		}
   555	
   556	find_free_id:
   557		if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
   558			off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
   559		else
   560			off = prandom_u32();
   561	
   562		attempts = range_size;
   563		if (attempts > max_attempts)
   564			attempts = max_attempts;
   565	
   566		/* We are in softirq; doing a search of the entire range risks
   567		 * soft lockup when all tuples are already used.
   568		 *
   569		 * If we can't find any free port from first offset, pick a new
   570		 * one and try again, with ever smaller search window.
   571		 */
   572	another_round:
   573		for (i = 0; i < attempts; i++, off++) {
   574			*keyptr = htons(min + off % range_size);
   575			if (!nf_nat_used_tuple(tuple, ct))
   576				return;
   577		}
   578	
   579		if (attempts >= range_size || attempts < 16)
   580			return;
   581		attempts /= 2;
   582		off = prandom_u32();
   583		goto another_round;
   584	}
   585	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 67554 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-04-22  2:35 [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-04-22  4:10 ` kernel test robot
@ 2021-04-22  6:54 ` kernel test robot
  2021-04-22  7:48 ` kernel test robot
  2021-04-26 12:23 ` Florian Westphal
  3 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2021-04-22  6:54 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 7922 bytes --]

Hi Cole,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on nf-next/master]
[also build test WARNING on nf/master ipvs/master v5.12-rc8 next-20210421]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Cole-Dishington/net-netfilter-Add-RFC-7597-Section-5-1-PSID-support/20210422-103613
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: mips-randconfig-r031-20210421 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project f5446b769a7929806f72256fccd4826d66502e59)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install mips cross compiling tool for clang build
        # apt-get install binutils-mips-linux-gnu
        # https://github.com/0day-ci/linux/commit/2198990eeb54f0fc1517731200e48b17851443af
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Cole-Dishington/net-netfilter-Add-RFC-7597-Section-5-1-PSID-support/20210422-103613
        git checkout 2198990eeb54f0fc1517731200e48b17851443af
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> net/netfilter/nf_nat_core.c:425:6: warning: no previous prototype for function 'nf_nat_l4proto_unique_tuple' [-Wmissing-prototypes]
   void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
        ^
   net/netfilter/nf_nat_core.c:425:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
   ^
   static 
   1 warning generated.


vim +/nf_nat_l4proto_unique_tuple +425 net/netfilter/nf_nat_core.c

   419	
   420	/* Alter the per-proto part of the tuple (depending on maniptype), to
   421	 * give a unique tuple in the given range if possible.
   422	 *
   423	 * Per-protocol part of tuple is initialized to the incoming packet.
   424	 */
 > 425	void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
   426					 const struct nf_nat_range2 *range,
   427					 enum nf_nat_manip_type maniptype,
   428					 const struct nf_conn *ct)
   429	{
   430		unsigned int range_size, min, max, i, attempts;
   431		__be16 *keyptr;
   432		u16 off;
   433		static const unsigned int max_attempts = 128;
   434	
   435		switch (tuple->dst.protonum) {
   436		case IPPROTO_ICMP:
   437		case IPPROTO_ICMPV6:
   438			/* id is same for either direction... */
   439			keyptr = &tuple->src.u.icmp.id;
   440			if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
   441				min = 0;
   442				range_size = 65536;
   443			} else {
   444				min = ntohs(range->min_proto.icmp.id);
   445				range_size = ntohs(range->max_proto.icmp.id) -
   446					     ntohs(range->min_proto.icmp.id) + 1;
   447			}
   448			goto find_free_id;
   449	#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
   450		case IPPROTO_GRE:
   451			/* If there is no master conntrack we are not PPTP,
   452			   do not change tuples */
   453			if (!ct->master)
   454				return;
   455	
   456			if (maniptype == NF_NAT_MANIP_SRC)
   457				keyptr = &tuple->src.u.gre.key;
   458			else
   459				keyptr = &tuple->dst.u.gre.key;
   460	
   461			if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
   462				min = 1;
   463				range_size = 65535;
   464			} else {
   465				min = ntohs(range->min_proto.gre.key);
   466				range_size = ntohs(range->max_proto.gre.key) - min + 1;
   467			}
   468			goto find_free_id;
   469	#endif
   470		case IPPROTO_UDP:
   471		case IPPROTO_UDPLITE:
   472		case IPPROTO_TCP:
   473		case IPPROTO_SCTP:
   474		case IPPROTO_DCCP:
   475			if (maniptype == NF_NAT_MANIP_SRC)
   476				keyptr = &tuple->src.u.all;
   477			else
   478				keyptr = &tuple->dst.u.all;
   479	
   480			break;
   481		default:
   482			return;
   483		}
   484	
   485		if (range->flags & NF_NAT_RANGE_PSID) {
   486			/* Find the non-PSID parts of the port.
   487			 * To do this we look for an unused port that is
   488			 * comprised of [t_chunk|PSID|b_chunk]. The size of
   489			 * these pieces is defined by the psid_length and
   490			 * offset.
   491			 */
   492			int m = 16 - range->min_proto.psid.psid_length -
   493			    range->min_proto.psid.offset;
   494			int available;
   495			int range_count = ((1 << range->min_proto.psid.offset) - 1);
   496	
   497			/* Calculate the size of the bottom block */
   498			range_size = (1 << m);
   499	
   500			/* Calculate the total IDs to check */
   501			available = range_size * range_count;
   502			if (!available)
   503				available = range_size;
   504	
   505			off = ntohs(*keyptr);
   506			for (i = 0;; ++off) {
   507				int b_chunk = off % range_size;
   508				int t_chunk = 0;
   509	
   510				/* Move up to avoid the all-zeroes reserved chunk
   511				 * (if there is one).
   512				 */
   513				if (range->min_proto.psid.offset > 0) {
   514					t_chunk = (off >> m) % range_count;
   515					++t_chunk;
   516					t_chunk <<= (m +
   517						     range->min_proto.psid.psid_length);
   518				}
   519	
   520				*keyptr = htons(t_chunk |
   521						 (range->min_proto.psid.psid << m)
   522						 | b_chunk);
   523	
   524				if (++i >= available || !nf_nat_used_tuple(tuple, ct))
   525					return;
   526			}
   527		}
   528	
   529		/* If no range specified... */
   530		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
   531			/* If it's dst rewrite, can't change port */
   532			if (maniptype == NF_NAT_MANIP_DST)
   533				return;
   534	
   535			if (ntohs(*keyptr) < 1024) {
   536				/* Loose convention: >> 512 is credential passing */
   537				if (ntohs(*keyptr) < 512) {
   538					min = 1;
   539					range_size = 511 - min + 1;
   540				} else {
   541					min = 600;
   542					range_size = 1023 - min + 1;
   543				}
   544			} else {
   545				min = 1024;
   546				range_size = 65535 - 1024 + 1;
   547			}
   548		} else {
   549			min = ntohs(range->min_proto.all);
   550			max = ntohs(range->max_proto.all);
   551			if (unlikely(max < min))
   552				swap(max, min);
   553			range_size = max - min + 1;
   554		}
   555	
   556	find_free_id:
   557		if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
   558			off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
   559		else
   560			off = prandom_u32();
   561	
   562		attempts = range_size;
   563		if (attempts > max_attempts)
   564			attempts = max_attempts;
   565	
   566		/* We are in softirq; doing a search of the entire range risks
   567		 * soft lockup when all tuples are already used.
   568		 *
   569		 * If we can't find any free port from first offset, pick a new
   570		 * one and try again, with ever smaller search window.
   571		 */
   572	another_round:
   573		for (i = 0; i < attempts; i++, off++) {
   574			*keyptr = htons(min + off % range_size);
   575			if (!nf_nat_used_tuple(tuple, ct))
   576				return;
   577		}
   578	
   579		if (attempts >= range_size || attempts < 16)
   580			return;
   581		attempts /= 2;
   582		off = prandom_u32();
   583		goto another_round;
   584	}
   585	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 39123 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-04-22  2:35 [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-04-22  4:10 ` kernel test robot
  2021-04-22  6:54 ` kernel test robot
@ 2021-04-22  7:48 ` kernel test robot
  2021-04-26 12:23 ` Florian Westphal
  3 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2021-04-22  7:48 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1738 bytes --]

Hi Cole,

[FYI, it's a private test report for your RFC patch.]
[auto build test ERROR on nf-next/master]
[also build test ERROR on nf/master ipvs/master v5.12-rc8 next-20210421]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Cole-Dishington/net-netfilter-Add-RFC-7597-Section-5-1-PSID-support/20210422-103613
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: m68k-defconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/2198990eeb54f0fc1517731200e48b17851443af
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Cole-Dishington/net-netfilter-Add-RFC-7597-Section-5-1-PSID-support/20210422-103613
        git checkout 2198990eeb54f0fc1517731200e48b17851443af
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross W=1 ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "nf_nat_l4proto_unique_tuple" [net/netfilter/nf_nat_ftp.ko] undefined!

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 16954 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-04-22  2:35 [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
                   ` (2 preceding siblings ...)
  2021-04-22  7:48 ` kernel test robot
@ 2021-04-26 12:23 ` Florian Westphal
  2021-06-29  0:48   ` Cole Dishington
  3 siblings, 1 reply; 26+ messages in thread
From: Florian Westphal @ 2021-04-26 12:23 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, open list:NETFILTER, open list:NETFILTER,
	open list:NETWORKING [GENERAL],
	open list

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> This adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> ---
>  include/net/netfilter/nf_conntrack.h          |   2 +
>  .../netfilter/nf_conntrack_tuple_common.h     |   5 +
>  include/uapi/linux/netfilter/nf_nat.h         |   3 +-
>  net/netfilter/nf_nat_core.c                   | 101 ++++++++++++++++--
>  net/netfilter/nf_nat_ftp.c                    |  23 ++--
>  net/netfilter/nf_nat_helper.c                 |  15 ++-
>  6 files changed, 120 insertions(+), 29 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
> index 439379ca9ffa..d63d38aa7188 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -92,6 +92,8 @@ struct nf_conn {
>  	/* If we were expected by an expectation, this will be it */
>  	struct nf_conn *master;
>  
> +	struct nf_nat_range2 *range;

Increasing nf_conn size should be avoided unless
absolutely neccessary.

> --- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
> +++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
> @@ -39,6 +39,11 @@ union nf_conntrack_man_proto {
>  	struct {
>  		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
>  	} gre;
> +	struct {
> +		unsigned char psid_length;
> +		unsigned char offset;
> +		__be16 psid;
> +	} psid;

This breaks the ABI, you cannot change these structures.

This is the reason there is a 'struct nf_nat_range2', it wasn't
possible to add to the existing 'struct nf_nat_range'.

> diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
> index b7c3c902290f..7730ce4ca9a9 100644
> --- a/net/netfilter/nf_nat_core.c
> +++ b/net/netfilter/nf_nat_core.c
> @@ -232,13 +232,33 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
>  static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
>  			     enum nf_nat_manip_type maniptype,
>  			     const union nf_conntrack_man_proto *min,
> -			     const union nf_conntrack_man_proto *max)
> +			     const union nf_conntrack_man_proto *max,
> +			     bool is_psid)
>  {

...
>  	__be16 port;
>  
> +	int m = 0;
> +	u16 offset_mask = 0;
> +	u16 psid_mask = 0;
> +
> +	/* In this case we are in PSID mode and the rules are all different */
> +	if (is_psid) {
> +		/* m = number of bits in each valid range */
> +		m = 16 - min->psid.psid_length - min->psid.offset;
> +		offset_mask = ((1 << min->psid.offset) - 1) <<
> +				(16 - min->psid.offset);
> +		psid_mask = ((1 << min->psid.psid_length) - 1) << m;
> +	}

...

Is it really needed to place all of this in the nat core?

The only thing that has to be done in the NAT core, afaics, is to
suppress port reallocation attmepts when NF_NAT_RANGE_PSID is set.

Is there a reason why nf_nat_masquerade_ipv4/6 can't be changed instead
to do what you want?

AFAICS its enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init the
upper/lower boundaries, i.e. change input given to nf_nat_setup_info().

>  	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
> +	if (range) {
> +		if (!ct->range)
> +			ct->range = kmalloc(sizeof(*ct->range), 0);

If you absolutely have to store extra data in nf_conn, please extend
struct nf_conn_nat, masquerade already stores the interface index, so
you could place the psid len/offset there as well.

> +	/* Find a port that matches the MASQ rule. */
> +	nf_nat_l4proto_unique_tuple(&exp->tuple, ct->range,
> +				    dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
> +				    ct);
> +	port = ntohs(exp->tuple.dst.u.tcp.port);
> +	nf_ct_expect_related(exp, 0);

This removes there error check for nf_ct_expect_related(), why?

Also, how is this going to be used?

I see no changes to any of the nftables or iptables modules that would
be needed for userspace to enable this feature.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-04-26 12:23 ` Florian Westphal
@ 2021-06-29  0:48   ` Cole Dishington
  2021-06-30 14:20     ` Florian Westphal
  0 siblings, 1 reply; 26+ messages in thread
From: Cole Dishington @ 2021-06-29  0:48 UTC (permalink / raw)
  To: pablo
  Cc: Cole Dishington, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, netdev, linux-kernel

This adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing. I have also submitted a patch to netfilter iptables for these changes.
    
    Comments:
    Selecting the ports for psid needs to be in nf_nat_core since the PSID ranges are not a single range. e.g. offset=1024, PSID=0, psid_length=8 generates the ranges 1024-1027, 2048-2051, ..., 63488-63491, ... (example taken from RFC7597 B.2).
    This is why it is enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init upper/lower boundaries.
    
    Changes in v2:
    - Moved cached range2 from struct nf_conn to nf_conn_nat.
    - Moved psid fields out of union nf_conntrack_man_proto. Now using range2 fields src, dst, and base to store psid parameters.
    - Readded removed error check for nf_ct_expect_related()
    - Added new version to masquerade iptables extension to use the range2 base field.

 include/net/netfilter/nf_nat.h        |  1 +
 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/nf_nat_core.c           | 69 +++++++++++++++++++++++----
 net/netfilter/nf_nat_ftp.c            | 29 ++++++-----
 net/netfilter/nf_nat_helper.c         | 16 +++++--
 net/netfilter/nf_nat_masquerade.c     | 13 +++--
 net/netfilter/xt_MASQUERADE.c         | 44 +++++++++++++++--
 7 files changed, 140 insertions(+), 35 deletions(-)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 987111ae5240..67cc033f76bb 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -32,6 +32,7 @@ struct nf_conn_nat {
 	union nf_conntrack_nat_help help;
 #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE)
 	int masq_index;
+	struct nf_nat_range2 *range;
 #endif
 };
 
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..7307bb28ece2 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,32 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 offset_mask = 0;
+	u16 psid_mask = 0;
+	u16 psid = 0;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 j = ntohs(max->all) - ntohs(min->all) + 1;
+		u16 a = (1 << 16) / ntohs(base->all);
+
+		offset_mask = (a - 1) * ntohs(base->all);
+		psid_mask = ((ntohs(base->all) / j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return ((ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +234,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return ((ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +262,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -360,10 +384,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
  *
  * Per-protocol part of tuple is initialized to the incoming packet.
  */
-static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-					const struct nf_nat_range2 *range,
-					enum nf_nat_manip_type maniptype,
-					const struct nf_conn *ct)
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range2 *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct)
 {
 	unsigned int range_size, min, max, i, attempts;
 	__be16 *keyptr;
@@ -420,6 +444,25 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		return;
 	}
 
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		/* PSID defines a group of port ranges, per PSID. PSID
+		 * is already contained in min and max.
+		 */
+		unsigned int min_to_max, base;
+
+		min = ntohs(range->min_proto.all);
+		max = ntohs(range->max_proto.all);
+		base = ntohs(range->base_proto.all);
+		min_to_max = max - min;
+		for (; max <= (1 << 16) - 1; min += base, max = min + min_to_max) {
+			for (off = 0; off <= min_to_max; off++) {
+				*keyptr = htons(min + off);
+				if (!nf_nat_used_tuple(tuple, ct))
+					return;
+			}
+		}
+	}
+
 	/* If no range specified... */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
@@ -529,11 +572,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	/* Only bother mapping if it's not already in range and unique */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
-		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+		/* PSID mode is present always needs to check
+		 * to see if the source ports are in range.
+		 */
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||
+		    (range->flags & NF_NAT_RANGE_PSID &&
+		     !in_range(orig_tuple, range))) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index aace6768a64e..f65163278db0 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -17,6 +17,10 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_ftp.h>
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range2 *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct);
 
 #define NAT_HELPER_NAME "ftp"
 
@@ -72,8 +76,13 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	u_int16_t port;
 	int dir = CTINFO2DIR(ctinfo);
 	struct nf_conn *ct = exp->master;
+	struct nf_conn_nat *nat = nfct_nat(ct);
 	char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
 	unsigned int buflen;
+	int ret;
+
+	if (WARN_ON_ONCE(!nat))
+		return NF_DROP;
 
 	pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);
 
@@ -86,18 +95,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	 * this one. */
 	exp->expectfn = nf_nat_follow_master;
 
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		int ret;
-
-		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp, 0);
-		if (ret == 0)
-			break;
-		else if (ret != -EBUSY) {
-			port = 0;
-			break;
-		}
+	/* Find a port that matches the MASQ rule. */
+	nf_nat_l4proto_unique_tuple(&exp->tuple, nat->range,
+				    dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
+				    ct);
+	ret = nf_ct_expect_related(exp, 0);
+	port = ntohs(exp->tuple.dst.u.tcp.port);
+	if (ret != 0 && ret != -EBUSY) {
+		port = 0;
 	}
 
 	if (port == 0) {
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index a263505455fc..2d105e4eb8f8 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -179,15 +179,23 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 void nf_nat_follow_master(struct nf_conn *ct,
 			  struct nf_conntrack_expect *exp)
 {
+	struct nf_conn_nat *nat = NULL;
 	struct nf_nat_range2 range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
-	/* Change src to where master sends to */
-	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_addr = range.max_addr
-		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+	if (exp->master && !exp->dir) {
+		nat = nfct_nat(exp->master);
+		if (nat)
+			range = *nat->range;
+	}
+	if (!nat) {
+		/* Change src to where master sends to */
+		range.flags = NF_NAT_RANGE_MAP_IPS;
+		range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+		range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+	}
 	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..d83cd3d8ad3f 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -45,10 +45,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 		return NF_DROP;
 	}
 
-	nat = nf_ct_nat_ext_add(ct);
-	if (nat)
-		nat->masq_index = out->ifindex;
-
 	/* Transfer from original range. */
 	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
 	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
@@ -57,6 +53,15 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.max_addr.ip = newsrc;
 	newrange.min_proto   = range->min_proto;
 	newrange.max_proto   = range->max_proto;
+	newrange.base_proto  = range->base_proto;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat) {
+		nat->masq_index = out->ifindex;
+		if (!nat->range)
+			nat->range = kmalloc(sizeof(*nat->range), 0);
+		memcpy(nat->range, &newrange, sizeof(*nat->range));
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-06-29  0:48   ` Cole Dishington
@ 2021-06-30 14:20     ` Florian Westphal
       [not found]       ` <20210705040856.25191-1-Cole.Dishington@alliedtelesis.co.nz>
  0 siblings, 1 reply; 26+ messages in thread
From: Florian Westphal @ 2021-06-30 14:20 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, netdev, linux-kernel

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
>     Comments:
>     Selecting the ports for psid needs to be in nf_nat_core since the PSID ranges are not a single range. e.g. offset=1024, PSID=0, psid_length=8 generates the ranges 1024-1027, 2048-2051, ..., 63488-63491, ... (example taken from RFC7597 B.2).
>     This is why it is enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init upper/lower boundaries.

I suspect this misses a NOT.  But current algorithm has problems, see
below.

> +	if (range->flags & NF_NAT_RANGE_PSID) {
> +		/* PSID defines a group of port ranges, per PSID. PSID
> +		 * is already contained in min and max.
> +		 */
> +		unsigned int min_to_max, base;
> +
> +		min = ntohs(range->min_proto.all);
> +		max = ntohs(range->max_proto.all);
> +		base = ntohs(range->base_proto.all);
> +		min_to_max = max - min;
> +		for (; max <= (1 << 16) - 1; min += base, max = min + min_to_max) {
> +			for (off = 0; off <= min_to_max; off++) {
> +				*keyptr = htons(min + off);
> +				if (!nf_nat_used_tuple(tuple, ct))
> +					return;
> +			}
> +		}
> +	}

I fear this searches waaaay to many ports.
We had softlockups in the past because of exhausive searches.

See a504b703bb1da526a01593da0e4be2af9d9f5fa8
("netfilter: nat: limit port clash resolution attempts").

I suggest you try pre-selecting one of the eligible ranges in
nf_nat_masquerade_ipv4 when the 'newrange' is filled in and set
RANGE_PROTO_SPECIFIED.

Maybe even prandom-based preselection is good enough.

>  	/* If no range specified... */
>  	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
>  		/* If it's dst rewrite, can't change port */
> @@ -529,11 +572,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
>  
>  	/* Only bother mapping if it's not already in range and unique */
>  	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
> -		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
> +		/* PSID mode is present always needs to check
> +		 * to see if the source ports are in range.
> +		 */
> +		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||
> +		    (range->flags & NF_NAT_RANGE_PSID &&

Why the extra check?
Can't you set NF_NAT_RANGE_PROTO_SPECIFIED in case PSID is requested by
userspace?

> diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
> index aace6768a64e..f65163278db0 100644
> --- a/net/netfilter/nf_nat_ftp.c
> +++ b/net/netfilter/nf_nat_ftp.c
> @@ -17,6 +17,10 @@
>  #include <net/netfilter/nf_conntrack_helper.h>
>  #include <net/netfilter/nf_conntrack_expect.h>
>  #include <linux/netfilter/nf_conntrack_ftp.h>
> +void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
> +				 const struct nf_nat_range2 *range,
> +				 enum nf_nat_manip_type maniptype,
> +				 const struct nf_conn *ct);
>  
>  #define NAT_HELPER_NAME "ftp"
>  
> @@ -72,8 +76,13 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
>  	u_int16_t port;
>  	int dir = CTINFO2DIR(ctinfo);
>  	struct nf_conn *ct = exp->master;
> +	struct nf_conn_nat *nat = nfct_nat(ct);
>  	char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
>  	unsigned int buflen;
> +	int ret;
> +
> +	if (WARN_ON_ONCE(!nat))
> +		return NF_DROP;
>  
>  	pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);
>  
> @@ -86,18 +95,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
>  	 * this one. */
>  	exp->expectfn = nf_nat_follow_master;
>  
> -	/* Try to get same port: if not, try to change it. */
> -	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
> -		int ret;
> -
> -		exp->tuple.dst.u.tcp.port = htons(port);
> -		ret = nf_ct_expect_related(exp, 0);
> -		if (ret == 0)
> -			break;
> -		else if (ret != -EBUSY) {
> -			port = 0;
> -			break;
> -		}
> +	/* Find a port that matches the MASQ rule. */
> +	nf_nat_l4proto_unique_tuple(&exp->tuple, nat->range,
> +				    dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
> +				    ct);

Hmm, I am ingorant on details here, but is this correct?

This could be an inbound connection, rather than outbound.

> diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
> index a263505455fc..2d105e4eb8f8 100644
> --- a/net/netfilter/nf_nat_helper.c
> +++ b/net/netfilter/nf_nat_helper.c
> @@ -179,15 +179,23 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
>  void nf_nat_follow_master(struct nf_conn *ct,
>  			  struct nf_conntrack_expect *exp)
>  {
> +	struct nf_conn_nat *nat = NULL;
>  	struct nf_nat_range2 range;
>  
>  	/* This must be a fresh one. */
>  	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
>  
> -	/* Change src to where master sends to */
> -	range.flags = NF_NAT_RANGE_MAP_IPS;
> -	range.min_addr = range.max_addr
> -		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
> +	if (exp->master && !exp->dir) {
> +		nat = nfct_nat(exp->master);
> +		if (nat)
> +			range = *nat->range;

Can't you store the psid-relevant parts of the range struct only?
Non-PSID doesn't need the original range, so why do you?

> diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
> index 8e8a65d46345..d83cd3d8ad3f 100644
> --- a/net/netfilter/nf_nat_masquerade.c
> +++ b/net/netfilter/nf_nat_masquerade.c
> @@ -45,10 +45,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
>  		return NF_DROP;
>  	}
>  
> -	nat = nf_ct_nat_ext_add(ct);
> -	if (nat)
> -		nat->masq_index = out->ifindex;
> -
>  	/* Transfer from original range. */
>  	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
>  	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
> @@ -57,6 +53,15 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
>  	newrange.max_addr.ip = newsrc;
>  	newrange.min_proto   = range->min_proto;
>  	newrange.max_proto   = range->max_proto;
> +	newrange.base_proto  = range->base_proto;
> +
> +	nat = nf_ct_nat_ext_add(ct);
> +	if (nat) {
> +		nat->masq_index = out->ifindex;
> +		if (!nat->range)
> +			nat->range = kmalloc(sizeof(*nat->range), 0);
> +		memcpy(nat->range, &newrange, sizeof(*nat->range));

kmemdup.  Also misses error handling.  Should use GFP_ATOMIC.
Where is this free'd again?

It would be good if you could chop this up in smaller chunks.
A selftest would be nice as well (see tools/testing/selftests/netfilter).

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
       [not found]       ` <20210705040856.25191-1-Cole.Dishington@alliedtelesis.co.nz>
@ 2021-07-05  4:08         ` Cole Dishington
  2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-07-05  4:08         ` [PATCH] " Cole Dishington
  2 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-07-05  4:08 UTC (permalink / raw)
  To: pablo
  Cc: Cole Dishington, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, linux-kernel, netdev

Add support for revision 2 of xtables masquerade extension.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---
 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/xt_MASQUERADE.c         | 44 ++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
       [not found]       ` <20210705040856.25191-1-Cole.Dishington@alliedtelesis.co.nz>
  2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
@ 2021-07-05  4:08         ` Cole Dishington
  2021-07-05 10:39           ` Florian Westphal
  2021-07-05  4:08         ` [PATCH] " Cole Dishington
  2 siblings, 1 reply; 26+ messages in thread
From: Cole Dishington @ 2021-07-05  4:08 UTC (permalink / raw)
  To: pablo
  Cc: Cole Dishington, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, netdev, linux-kernel

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Changes in v3:
    - Select pseudo random port range (for a given psid) to search in
    nf_nat_l4proto_unique_tuple(), rather than exhausive search of all
    port ranges (for a given psid).
    - Remove extra check in get_unique_tuple for psid, it is not needed
    if NF_NAT_RANGE_PROTO_SPECIFIED is set.

 net/netfilter/nf_nat_core.c       | 33 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 17 ++++++++++++++--
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..1fbf98cade41 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,30 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 j = ntohs(max->all) - ntohs(min->all) + 1;
+		u16 a = (1 << 16) / ntohs(base->all);
+
+		offset_mask = (a - 1) * ntohs(base->all);
+		psid_mask = ((ntohs(base->all) / j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return ((ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +232,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return ((ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +260,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +554,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..423b3774e65c 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,21 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 off = prandom_u32();
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 max_off = ((1 << 16) / base) - 1;
+
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+		newrange.min_proto.all   = htons(min + base * (off % max_off));
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests
       [not found]       ` <20210705040856.25191-1-Cole.Dishington@alliedtelesis.co.nz>
  2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
  2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-05  4:08         ` Cole Dishington
  2 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-07-05  4:08 UTC (permalink / raw)
  To: pablo; +Cc: Cole Dishington, Shuah Khan, linux-kernel, linux-kselftest

Add selftests for masquerading into a smaller subset of ports defined by
PSID.

Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---
 .../netfilter/nat_masquerade_psid.sh          | 158 ++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

diff --git a/tools/testing/selftests/netfilter/nat_masquerade_psid.sh b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
new file mode 100644
index 000000000000..90e2e5ca4d68
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# <:copyright-gpl
+# Copyright (C) 2021 Allied Telesis Labs NZ
+#
+# check that NAT can masquerade using PSID defined ranges.
+#
+# Setup is:
+#
+# nsclient1(veth0) -> (veth1)nsrouter(veth2) -> (veth0)nsclient2
+# Setup a nat masquerade rule with psid defined ranges.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+ns_all="nsclient1 nsrouter nsclient2"
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+readonly server_port=8080
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ipv4() {
+	echo -n 192.168.$1.$2
+}
+
+cleanup() {
+	for n in $ns_all; do ip netns del $n;done
+
+	if [ -f "${outfile}" ]; then
+		rm "$outfile"
+	fi
+	if [ -f "${infile}" ]; then
+		rm "$infile"
+	fi
+}
+
+server_listen() {
+	ip netns exec nsclient2 nc -l -p "$server_port" > "$outfile" &
+	server_pid=$!
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec nsclient1 timeout 2 nc -w 1 -p "$port" $(ipv4 2 2) "$server_port" < $infile
+}
+
+verify_data() {
+	local _ret=0
+	wait "$server_pid"
+	cmp "$infile" "$outfile" 2>/dev/null
+	_ret=$?
+	rm "$outfile"
+	return $_ret
+}
+
+test_service() {
+	server_listen
+	client_connect
+	verify_data
+}
+
+check_connection() {
+	entry=$(ip netns exec nsrouter conntrack -p tcp --sport $port -L 2>&1)
+	entry=${entry##*sport=8080 dport=}
+	entry=${entry%% *}
+	[[ "x$(( ($entry & $psid_mask) / $two_power_j ))" = "x$psid" ]]
+}
+
+run_test() {
+	ip netns exec nsrouter iptables -A FORWARD -i veth1 -j ACCEPT
+	ip netns exec nsrouter iptables -P FORWARD DROP
+	ip netns exec nsrouter iptables -A FORWARD -m state --state ESTABLISHED,RELATED -j ACCEPT
+	ip netns exec nsrouter iptables -t nat --new psid
+	ip netns exec nsrouter iptables -t nat --insert psid -j MASQUERADE --psid $offset:$psid:$psid_length
+	ip netns exec nsrouter iptables -t nat -I POSTROUTING -o veth2 -j psid
+
+	# calculate psid mask
+	two_power_j=$(( $offset / (1 << $psid_length) ))
+	psid_mask=$(( ( (1 << $psid_length) - 1) * $two_power_j ))
+
+	# Create file
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 >/dev/null 2>&1
+
+	# Test multiple ports
+	for p in 1 2 3 4 5; do
+		port=1080$p
+
+		test_service
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+
+		check_connection
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+	done
+
+	# tidy up test rules
+	ip netns exec nsrouter iptables -F
+	ip netns exec nsrouter iptables -t nat -F
+	ip netns exec nsrouter iptables -t nat -X psid
+}
+
+for n in $ns_all; do
+	ip netns add $n
+	ip -net $n link set lo up
+done
+
+for i in 1 2; do
+	ip link add veth0 netns nsclient$i type veth peer name veth$i netns nsrouter
+
+	ip -net nsclient$i link set veth0 up
+	ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0
+
+	ip -net nsrouter link set veth$i up
+	ip -net nsrouter addr add $(ipv4 $i 1)/24 dev veth$i
+done
+
+ip -net nsclient1 route add default via $(ipv4 1 1)
+ip -net nsclient2 route add default via $(ipv4 2 1)
+
+ip netns exec nsrouter sysctl -q net.ipv4.conf.all.forwarding=1
+
+offset=1024
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+cleanup
+exit $ret
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-05 10:39           ` Florian Westphal
  2021-07-16  0:27             ` [PATCH 0/3] " Cole Dishington
  0 siblings, 1 reply; 26+ messages in thread
From: Florian Westphal @ 2021-07-05 10:39 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, Anthony Lineham, Scott Parlane, Blair Steven,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, netdev, linux-kernel

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> ---

Just a quick review:
> +	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
> +	if (is_psid) {
> +		u16 j = ntohs(max->all) - ntohs(min->all) + 1;
> +		u16 a = (1 << 16) / ntohs(base->all);

This gives crash when base->all is 0.
If this is impossible, please add a comment, otherwise this needs
a sanity test on the divisor.

> @@ -55,8 +55,21 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
>  	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
>  	newrange.min_addr.ip = newsrc;
>  	newrange.max_addr.ip = newsrc;
> -	newrange.min_proto   = range->min_proto;
> -	newrange.max_proto   = range->max_proto;
> +
> +	if (range->flags & NF_NAT_RANGE_PSID) {
> +		u16 off = prandom_u32();
> +		u16 base = ntohs(range->base_proto.all);
> +		u16 min =  ntohs(range->min_proto.all);
> +		u16 max_off = ((1 << 16) / base) - 1;
> +
> +		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
> +		newrange.min_proto.all   = htons(min + base * (off % max_off));

Same here for base and max_off.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 0/3] Add RFC-7597 Section 5.1 PSID support
  2021-07-05 10:39           ` Florian Westphal
@ 2021-07-16  0:27             ` Cole Dishington
  2021-07-16  0:27               ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
                                 ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington

Thanks for your time reviewing!

Changes in v4:
- Handle special case of no offset bits (a=0 / A=2^16).

Cole Dishington (3):
  net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  net: netfilter: Add RFC-7597 Section 5.1 PSID support
  selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests

 include/uapi/linux/netfilter/nf_nat.h         |   3 +-
 net/netfilter/nf_nat_core.c                   |  39 +++-
 net/netfilter/nf_nat_masquerade.c             |  20 +-
 net/netfilter/xt_MASQUERADE.c                 |  44 ++++-
 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 5 files changed, 276 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

-- 
2.32.0


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  2021-07-16  0:27             ` [PATCH 0/3] " Cole Dishington
@ 2021-07-16  0:27               ` Cole Dishington
  2021-07-16  0:27               ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-07-16  0:27               ` [PATCH " Cole Dishington
  2 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Add support for revision 2 of xtables masquerade extension.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---
 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/xt_MASQUERADE.c         | 44 ++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-16  0:27             ` [PATCH 0/3] " Cole Dishington
  2021-07-16  0:27               ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
@ 2021-07-16  0:27               ` Cole Dishington
  2021-07-16 15:18                 ` Florian Westphal
  2021-07-16  0:27               ` [PATCH " Cole Dishington
  2 siblings, 1 reply; 26+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing!
    
    Changes in v4:
    - Add support for a=0 (A=2^16) special case.
    - Now that offset=0 is used for the special case of 2^16 (as it cannot fit
      in a u16) the divide by zero issues are no longer present.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 20 ++++++++++++++--
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..4a9448684504 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..7e2fb0da344a 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,24 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32() % (((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests
  2021-07-16  0:27             ` [PATCH 0/3] " Cole Dishington
  2021-07-16  0:27               ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
  2021-07-16  0:27               ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-16  0:27               ` Cole Dishington
  2 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington

Add selftests for masquerading into a smaller subset of ports defined by
PSID.

Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing!
    
    Changes in v4:
    - Add tests for a=0 (A=2^16) special case.
    - Update to use offset_length (from iptables change).

 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

diff --git a/tools/testing/selftests/netfilter/nat_masquerade_psid.sh b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
new file mode 100644
index 000000000000..56c1b509caf6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# <:copyright-gpl
+# Copyright (C) 2021 Allied Telesis Labs NZ
+#
+# check that NAT can masquerade using PSID defined ranges.
+#
+# Setup is:
+#
+# nsclient1(veth0) -> (veth1)nsrouter(veth2) -> (veth0)nsclient2
+# Setup a nat masquerade rule with psid defined ranges.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+ns_all="nsclient1 nsrouter nsclient2"
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+readonly server_port=8080
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ipv4() {
+	echo -n 192.168.$1.$2
+}
+
+cleanup() {
+	for n in $ns_all; do ip netns del $n;done
+
+	if [ -f "${outfile}" ]; then
+		rm "$outfile"
+	fi
+	if [ -f "${infile}" ]; then
+		rm "$infile"
+	fi
+}
+
+server_listen() {
+	ip netns exec nsclient2 nc -l -p "$server_port" > "$outfile" &
+	server_pid=$!
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec nsclient1 timeout 2 nc -w 1 -p "$port" $(ipv4 2 2) "$server_port" < $infile
+}
+
+verify_data() {
+	local _ret=0
+	wait "$server_pid"
+	cmp "$infile" "$outfile" 2>/dev/null
+	_ret=$?
+	rm "$outfile"
+	return $_ret
+}
+
+test_service() {
+	server_listen
+	client_connect
+	verify_data
+}
+
+check_connection() {
+	local _ret=0
+	entry=$(ip netns exec nsrouter conntrack -p tcp --sport $port -L 2>&1)
+	entry=${entry##*sport=8080 dport=}
+	entry=${entry%% *}
+
+	if [[ "x$(( ($entry & $psid_mask) / $two_power_j ))" != "x$psid" ]]; then
+		_ret=1
+		echo "Failed psid mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+
+	if [[ "x$_ret" = "x0" ]] &&
+	   [[ "x$offset_mask" != "x0" -a "x$(( ($entry & $offset_mask) ))" == "x0" ]]; then
+		_ret=1
+		echo "Failed offset mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+	return $_ret
+}
+
+run_test() {
+	ip netns exec nsrouter iptables -A FORWARD -i veth1 -j ACCEPT
+	ip netns exec nsrouter iptables -P FORWARD DROP
+	ip netns exec nsrouter iptables -A FORWARD -m state --state ESTABLISHED,RELATED -j ACCEPT
+	ip netns exec nsrouter iptables -t nat --new psid
+	ip netns exec nsrouter iptables -t nat --insert psid -j MASQUERADE \
+		--psid $offset_len:$psid:$psid_length
+	ip netns exec nsrouter iptables -t nat -I POSTROUTING -o veth2 -j psid
+
+	# calculate psid mask
+	offset=$(( 1 << (16 - $offset_len) ))
+	two_power_j=$(( $offset / (1 << $psid_length) ))
+	offset_mask=$(( ( (1 << $offset_len) - 1 ) << (16 - $offset_len) ))
+	psid_mask=$(( ( (1 << $psid_length) - 1) * $two_power_j ))
+
+	# Create file
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 >/dev/null 2>&1
+
+	# Test multiple ports
+	for p in 1 2 3 4 5; do
+		port=1080$p
+
+		test_service
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+
+		check_connection
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+	done
+
+	# tidy up test rules
+	ip netns exec nsrouter iptables -F
+	ip netns exec nsrouter iptables -t nat -F
+	ip netns exec nsrouter iptables -t nat -X psid
+}
+
+for n in $ns_all; do
+	ip netns add $n
+	ip -net $n link set lo up
+done
+
+for i in 1 2; do
+	ip link add veth0 netns nsclient$i type veth peer name veth$i netns nsrouter
+
+	ip -net nsclient$i link set veth0 up
+	ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0
+
+	ip -net nsrouter link set veth$i up
+	ip -net nsrouter addr add $(ipv4 $i 1)/24 dev veth$i
+done
+
+ip -net nsclient1 route add default via $(ipv4 1 1)
+ip -net nsclient2 route add default via $(ipv4 2 1)
+
+ip netns exec nsrouter sysctl -q net.ipv4.conf.all.forwarding=1
+
+offset_len=0
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+offset_len=6
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+cleanup
+exit $ret
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-16  0:27               ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-16 15:18                 ` Florian Westphal
  2021-07-19  1:21                   ` Cole Dishington
  0 siblings, 1 reply; 26+ messages in thread
From: Florian Westphal @ 2021-07-16 15:18 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, kadlec, fw, davem, kuba, shuah, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
> index 7de595ead06a..4a9448684504 100644
> --- a/net/netfilter/nf_nat_core.c
> +++ b/net/netfilter/nf_nat_core.c
> @@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
>  static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
>  			     enum nf_nat_manip_type maniptype,
>  			     const union nf_conntrack_man_proto *min,
> -			     const union nf_conntrack_man_proto *max)
> +			     const union nf_conntrack_man_proto *max,
> +			     const union nf_conntrack_man_proto *base,
> +			     bool is_psid)
>  {
>  	__be16 port;
> +	u16 psid, psid_mask, offset_mask;
> +
> +	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
> +	if (is_psid) {
> +		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;
> +		u32 offset = ntohs(base->all);
> +		u16 power_a;
> +
> +		if (offset == 0)
> +			offset = 1 << 16;
> +
> +		power_a = (1 << 16) / offset;

Since the dividie is only needed nat setup and not for each packet I
think its ok.

> +	if (range->flags & NF_NAT_RANGE_PSID) {
> +		u16 base = ntohs(range->base_proto.all);
> +		u16 min =  ntohs(range->min_proto.all);
> +		u16 off = 0;
> +
> +		/* If offset=0, port range is in one contiguous block */
> +		if (base)
> +			off = prandom_u32() % (((1 << 16) / base) - 1);

Bases 32769 > gives 0 for the modulo value, so perhaps compute that
independently.

You could reject > 32769 in the iptables checkentry target.

Also, base of 21846 and above always give 0 result (% 1).

I don't know psid well enough to give a recommendation here.

If such inputs are nonsensical, just reject it when userspace asks for
this and add a 

if (WARN_ON_ONCE(base > bogus))
	return NF_DROP;

with s small coment explaining that xtables is supposed to not provide
such value.

Other than this I think its ok.

I still dislike the 'bool is_psid' in the nat core, but I can't find
a better solution.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-16 15:18                 ` Florian Westphal
@ 2021-07-19  1:21                   ` Cole Dishington
  2021-07-22  7:17                     ` Florian Westphal
  0 siblings, 1 reply; 26+ messages in thread
From: Cole Dishington @ 2021-07-19  1:21 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for time reviewing!
    
    Changes in v5:
    - Add WARN_ON_ONCE for invalid value of range->base.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 27 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..4a9448684504 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..dea6106f1699 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* xtables should stop base > 2^15 by enforcement of
+		 * 0 <= offset_len < 16 argument, with offset_len=0
+		 * as a special case inwhich base=0.
+		 */
+		if (WARN_ON_ONCE(base > (1 << 15)))
+			return NF_DROP;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32() % (((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-19  1:21                   ` Cole Dishington
@ 2021-07-22  7:17                     ` Florian Westphal
  2021-07-25 23:28                       ` Cole Dishington
  0 siblings, 1 reply; 26+ messages in thread
From: Florian Westphal @ 2021-07-22  7:17 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, kadlec, fw, davem, kuba, shuah, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> ---
> +
> +	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
> +	if (is_psid) {
> +		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;

I think this needs to be 'u32 power_j' to prevent overflow of
65535 + 1 -> 0.

> +		if (base)
> +			off = prandom_u32() % (((1 << 16) / base) - 1);

I think this can use prandom_u32_max(((1 << 16) / base) - 1).

I have no other comments.  Other kernel patches LGTM.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-22  7:17                     ` Florian Westphal
@ 2021-07-25 23:28                       ` Cole Dishington
  2021-07-26 14:37                         ` Florian Westphal
  0 siblings, 1 reply; 26+ messages in thread
From: Cole Dishington @ 2021-07-25 23:28 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing!
    
    Changes in v6:
    - Use prandom_u32_max() rather than prandom_u32() % max for generating PSID sub-range offset.
    - Use u32 for power_j for the case of a=0,psid_len=0.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 27 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..f07a3473aab5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u32 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..19a4754cda76 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* xtables should stop base > 2^15 by enforcement of
+		 * 0 <= offset_len < 16 argument, with offset_len=0
+		 * as a special case inwhich base=0.
+		 */
+		if (WARN_ON_ONCE(base > (1 << 15)))
+			return NF_DROP;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32_max(((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-25 23:28                       ` Cole Dishington
@ 2021-07-26 14:37                         ` Florian Westphal
  2021-08-09  4:10                           ` [PATCH net-next 0/3] " Cole Dishington
  0 siblings, 1 reply; 26+ messages in thread
From: Florian Westphal @ 2021-07-26 14:37 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, kadlec, fw, davem, kuba, shuah, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> ---

Thanks for your patience and addressing all the comments.

Reviewed-by: Florian Westphal <fw@strlen.de>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH net-next 0/3] Add RFC-7597 Section 5.1 PSID support
  2021-07-26 14:37                         ` Florian Westphal
@ 2021-08-09  4:10                           ` Cole Dishington
  2021-08-09  4:10                             ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
                                               ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest

Thanks for your time reviewing!

Changes in v7:
- Added net-next to subject.
- Added Reviewed-by: Florian Westphal <fw@strlen.de> to patch 2/3.

Cole Dishington (3):
  net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  net: netfilter: Add RFC-7597 Section 5.1 PSID support
  selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests

 include/uapi/linux/netfilter/nf_nat.h         |   3 +-
 net/netfilter/nf_nat_core.c                   |  39 +++-
 net/netfilter/nf_nat_masquerade.c             |  27 ++-
 net/netfilter/xt_MASQUERADE.c                 |  44 ++++-
 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 5 files changed, 283 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

-- 
2.32.0


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  2021-08-09  4:10                           ` [PATCH net-next 0/3] " Cole Dishington
@ 2021-08-09  4:10                             ` Cole Dishington
  2021-08-09  4:10                             ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-08-09  4:10                             ` [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests Cole Dishington
  2 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Add support for revision 2 of xtables masquerade extension.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    No changes.

 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/xt_MASQUERADE.c         | 44 ++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-08-09  4:10                           ` [PATCH net-next 0/3] " Cole Dishington
  2021-08-09  4:10                             ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
@ 2021-08-09  4:10                             ` Cole Dishington
  2021-08-25 17:05                               ` Pablo Neira Ayuso
  2021-08-09  4:10                             ` [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests Cole Dishington
  2 siblings, 1 reply; 26+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
Reviewed-by: Florian Westphal <fw@strlen.de>
---

Notes:
    Changes:
    - Added Reviewed-by: Florian Westphal <fw@strlen.de>.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 27 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..f07a3473aab5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u32 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..19a4754cda76 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* xtables should stop base > 2^15 by enforcement of
+		 * 0 <= offset_len < 16 argument, with offset_len=0
+		 * as a special case inwhich base=0.
+		 */
+		if (WARN_ON_ONCE(base > (1 << 15)))
+			return NF_DROP;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32_max(((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests
  2021-08-09  4:10                           ` [PATCH net-next 0/3] " Cole Dishington
  2021-08-09  4:10                             ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
  2021-08-09  4:10                             ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-08-09  4:10                             ` Cole Dishington
  2 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest

Add selftests for masquerading into a smaller subset of ports defined by
PSID.

Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    No changes.

 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

diff --git a/tools/testing/selftests/netfilter/nat_masquerade_psid.sh b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
new file mode 100644
index 000000000000..56c1b509caf6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# <:copyright-gpl
+# Copyright (C) 2021 Allied Telesis Labs NZ
+#
+# check that NAT can masquerade using PSID defined ranges.
+#
+# Setup is:
+#
+# nsclient1(veth0) -> (veth1)nsrouter(veth2) -> (veth0)nsclient2
+# Setup a nat masquerade rule with psid defined ranges.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+ns_all="nsclient1 nsrouter nsclient2"
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+readonly server_port=8080
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ipv4() {
+	echo -n 192.168.$1.$2
+}
+
+cleanup() {
+	for n in $ns_all; do ip netns del $n;done
+
+	if [ -f "${outfile}" ]; then
+		rm "$outfile"
+	fi
+	if [ -f "${infile}" ]; then
+		rm "$infile"
+	fi
+}
+
+server_listen() {
+	ip netns exec nsclient2 nc -l -p "$server_port" > "$outfile" &
+	server_pid=$!
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec nsclient1 timeout 2 nc -w 1 -p "$port" $(ipv4 2 2) "$server_port" < $infile
+}
+
+verify_data() {
+	local _ret=0
+	wait "$server_pid"
+	cmp "$infile" "$outfile" 2>/dev/null
+	_ret=$?
+	rm "$outfile"
+	return $_ret
+}
+
+test_service() {
+	server_listen
+	client_connect
+	verify_data
+}
+
+check_connection() {
+	local _ret=0
+	entry=$(ip netns exec nsrouter conntrack -p tcp --sport $port -L 2>&1)
+	entry=${entry##*sport=8080 dport=}
+	entry=${entry%% *}
+
+	if [[ "x$(( ($entry & $psid_mask) / $two_power_j ))" != "x$psid" ]]; then
+		_ret=1
+		echo "Failed psid mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+
+	if [[ "x$_ret" = "x0" ]] &&
+	   [[ "x$offset_mask" != "x0" -a "x$(( ($entry & $offset_mask) ))" == "x0" ]]; then
+		_ret=1
+		echo "Failed offset mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+	return $_ret
+}
+
+run_test() {
+	ip netns exec nsrouter iptables -A FORWARD -i veth1 -j ACCEPT
+	ip netns exec nsrouter iptables -P FORWARD DROP
+	ip netns exec nsrouter iptables -A FORWARD -m state --state ESTABLISHED,RELATED -j ACCEPT
+	ip netns exec nsrouter iptables -t nat --new psid
+	ip netns exec nsrouter iptables -t nat --insert psid -j MASQUERADE \
+		--psid $offset_len:$psid:$psid_length
+	ip netns exec nsrouter iptables -t nat -I POSTROUTING -o veth2 -j psid
+
+	# calculate psid mask
+	offset=$(( 1 << (16 - $offset_len) ))
+	two_power_j=$(( $offset / (1 << $psid_length) ))
+	offset_mask=$(( ( (1 << $offset_len) - 1 ) << (16 - $offset_len) ))
+	psid_mask=$(( ( (1 << $psid_length) - 1) * $two_power_j ))
+
+	# Create file
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 >/dev/null 2>&1
+
+	# Test multiple ports
+	for p in 1 2 3 4 5; do
+		port=1080$p
+
+		test_service
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+
+		check_connection
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+	done
+
+	# tidy up test rules
+	ip netns exec nsrouter iptables -F
+	ip netns exec nsrouter iptables -t nat -F
+	ip netns exec nsrouter iptables -t nat -X psid
+}
+
+for n in $ns_all; do
+	ip netns add $n
+	ip -net $n link set lo up
+done
+
+for i in 1 2; do
+	ip link add veth0 netns nsclient$i type veth peer name veth$i netns nsrouter
+
+	ip -net nsclient$i link set veth0 up
+	ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0
+
+	ip -net nsrouter link set veth$i up
+	ip -net nsrouter addr add $(ipv4 $i 1)/24 dev veth$i
+done
+
+ip -net nsclient1 route add default via $(ipv4 1 1)
+ip -net nsclient2 route add default via $(ipv4 2 1)
+
+ip netns exec nsrouter sysctl -q net.ipv4.conf.all.forwarding=1
+
+offset_len=0
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+offset_len=6
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+cleanup
+exit $ret
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-08-09  4:10                             ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-08-25 17:05                               ` Pablo Neira Ayuso
  2021-08-29 21:30                                 ` Cole Dishington
  0 siblings, 1 reply; 26+ messages in thread
From: Pablo Neira Ayuso @ 2021-08-25 17:05 UTC (permalink / raw)
  To: Cole Dishington
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Anthony Lineham,
	Scott Parlane, Blair Steven

Hi,

On Mon, Aug 09, 2021 at 04:10:36PM +1200, Cole Dishington wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> Reviewed-by: Florian Westphal <fw@strlen.de>
[...]

Looking at the userspace logic:

https://patchwork.ozlabs.org/project/netfilter-devel/patch/20210716002219.30193-1-Cole.Dishington@alliedtelesis.co.nz/

Chunk extracted from void parse_psid(...)

>        offset = (1 << (16 - offset_len));

Assuming offset_len = 6, then you skip 0-1023 ports, OK.

>        psid = psid << (16 - offset_len - psid_len);

This psid calculation is correct? Maybe:

        psid = psid << (16 - offset_len);

instead?

        psid=0  =>      0 << (16 - 6) = 1024
        psid=1  =>      1 << (16 - 6) = 2048

This is implicitly assuming that 64 PSIDs are available, each of them
taking 1024 ports, ie. psid_len is 6 bits. But why are you subtracting
the psid_len above?

>        /* Handle the special case of no offset bits (a=0), so offset loops */
>        min = psid;

OK, this line above is the minimal port in the range

>        if (offset)
>                min += offset;

... which is incremented by the offset (to skip the 0-1023 ports).

>       r->min_proto.all = htons(min);
>       r->max_proto.all = htons(min + ((1 << (16 - offset_len - psid_len)) - 1));

Here, you subtract psid_len again, not sure why.

>       r->base_proto.all = htons(offset);

base is set to offset, ie. 1024.

>       r->flags |= NF_NAT_RANGE_PSID;
>       r->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;

Now looking at the kernel side.

> diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
> index 8e8a65d46345..19a4754cda76 100644
> --- a/net/netfilter/nf_nat_masquerade.c
> +++ b/net/netfilter/nf_nat_masquerade.c
> @@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
>  	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
>  	newrange.min_addr.ip = newsrc;
>  	newrange.max_addr.ip = newsrc;
> -	newrange.min_proto   = range->min_proto;
> -	newrange.max_proto   = range->max_proto;
> +
> +	if (range->flags & NF_NAT_RANGE_PSID) {
> +		u16 base = ntohs(range->base_proto.all);
> +		u16 min =  ntohs(range->min_proto.all);
> +		u16 off = 0;
> +
> +		/* xtables should stop base > 2^15 by enforcement of
> +		 * 0 <= offset_len < 16 argument, with offset_len=0
> +		 * as a special case inwhich base=0.

I don't understand this comment.

> +		 */
> +		if (WARN_ON_ONCE(base > (1 << 15)))
> +			return NF_DROP;
> +
> +		/* If offset=0, port range is in one contiguous block */
> +		if (base)
> +			off = prandom_u32_max(((1 << 16) / base) - 1);

Assuming the example above, base is set to 1024. Then, off is a random
value between UINT16_MAX (you expressed this as 1 << 16) and the base
which is 1024 minus 1.

So this is picking a random off (actually the PSID?) between 0 and 63.
What about clashes? I mean, two different machines behind the NAT
might get the same off.

> +		newrange.min_proto.all   = htons(min + base * off);

min could be 1024, 2048, 3072... you add base which is 1024 * off.

Is this duplicated? Both calculated in user and kernel space?

> +		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);

I'm stopping here, I'm getting lost.

My understanding about this RFC is that you would like to split the
16-bit ports in ranges to uniquely identify the host behind the NAT.

Why don't you just you just select the port range from userspace
utilizing the existing infrastructure? I mean, why do you need this
kernel patch?

Florian already suggested:

> Is it really needed to place all of this in the nat core?
> 
> The only thing that has to be done in the NAT core, afaics, is to
> suppress port reallocation attmepts when NF_NAT_RANGE_PSID is set.
> 
> Is there a reason why nf_nat_masquerade_ipv4/6 can't be changed instead
> to do what you want?
> 
> AFAICS its enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init the
> upper/lower boundaries, i.e. change input given to nf_nat_setup_info().

extracted from:

https://patchwork.ozlabs.org/project/netfilter-devel/patch/20210422023506.4651-1-Cole.Dishington@alliedtelesis.co.nz/

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-08-25 17:05                               ` Pablo Neira Ayuso
@ 2021-08-29 21:30                                 ` Cole Dishington
  0 siblings, 0 replies; 26+ messages in thread
From: Cole Dishington @ 2021-08-29 21:30 UTC (permalink / raw)
  To: Blair Steven, davem, Anthony Lineham, pablo, shuah,
	Scott Parlane, kadlec, kuba, fw
  Cc: linux-kselftest, linux-kernel, netfilter-devel, coreteam, netdev

Hello,

Thanks for your time reviewing!

On Wed, 2021-08-25 at 19:05 +0200, Pablo Neira Ayuso wrote:
> Hi,
> 
> On Mon, Aug 09, 2021 at 04:10:36PM +1200, Cole Dishington wrote:
> > Adds support for masquerading into a smaller subset of ports -
> > defined by the PSID values from RFC-7597 Section 5.1. This is part of
> > the support for MAP-E and Lightweight 4over6, which allows multiple
> > devices to share an IPv4 address by splitting the L4 port / id into
> > ranges.
> > 
> > Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> > Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> > Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> > Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> > Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> > Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> > Reviewed-by: Florian Westphal <fw@strlen.de>
> [...]
> 
> Looking at the userspace logic:
> 
> https://scanmail.trustwave.com/?c=20988&d=6vim4fcVLjPkIbLUDqz3Tj2W4gXWNCkYa5llWggBjA&u=https%3a%2f%2fpatchwork%2eozlabs%2eorg%2fproject%2fnetfilter-devel%2fpatch%2f20210716002219%2e30193-1-Cole%2eDishington%40alliedtelesis%2eco%2enz%2f
> 
> Chunk extracted from void parse_psid(...)
> 
> >        offset = (1 << (16 - offset_len));
> 
> Assuming offset_len = 6, then you skip 0-1023 ports, OK.
> 
> >        psid = psid << (16 - offset_len - psid_len);
> 
> This psid calculation is correct? Maybe:
> 
>         psid = psid << (16 - offset_len);

PSID port numbers have the form
[offset|PSID|j]
and
16 = offset_length + PSID_length + j_length.
The PSID calculation above is bit shifting the passed psid up j_length.

The userspace tool accepts the unshifted psid to be consistent with how RFC7597 specified it (see RFC7597 Appendix A. Examples).

> 
> instead?
> 
>         psid=0  =>      0 << (16 - 6) = 1024
>         psid=1  =>      1 << (16 - 6) = 2048
> 
> This is implicitly assuming that 64 PSIDs are available, each of them
> taking 1024 ports, ie. psid_len is 6 bits. But why are you subtracting
> the psid_len above?
> 
> >        /* Handle the special case of no offset bits (a=0), so offset loops */
> >        min = psid;
> 
> OK, this line above is the minimal port in the range
> 
> >        if (offset)
> >                min += offset;
> 
> ... which is incremented by the offset (to skip the 0-1023 ports).
> 
> >       r->min_proto.all = htons(min);
> >       r->max_proto.all = htons(min + ((1 << (16 - offset_len - psid_len)) - 1));
> 
> Here, you subtract psid_len again, not sure why.

Each PSID port range is made up of many smaller contiguous port sub-ranges  (except for the special case of offset_len = 0) e.g. for PSID=0x34,psid_length=8,psid_offset=6 the ranges are 1232-1235, 2256-2259, ..., 63696-63699, 64720-64723 (Taken from rfc7597 Appendix A. Examples).
The above calculation is selecting the first sub-range. Max is computed by finding j_length and filling it with 1's.

> 
> >       r->base_proto.all = htons(offset);
> 
> base is set to offset, ie. 1024.
> 
> >       r->flags |= NF_NAT_RANGE_PSID;
> >       r->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
> 
> Now looking at the kernel side.
> 
> > diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
> > index 8e8a65d46345..19a4754cda76 100644
> > --- a/net/netfilter/nf_nat_masquerade.c
> > +++ b/net/netfilter/nf_nat_masquerade.c
> > @@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
> >  	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
> >  	newrange.min_addr.ip = newsrc;
> >  	newrange.max_addr.ip = newsrc;
> > -	newrange.min_proto   = range->min_proto;
> > -	newrange.max_proto   = range->max_proto;
> > +
> > +	if (range->flags & NF_NAT_RANGE_PSID) {
> > +		u16 base = ntohs(range->base_proto.all);
> > +		u16 min =  ntohs(range->min_proto.all);
> > +		u16 off = 0;
> > +
> > +		/* xtables should stop base > 2^15 by enforcement of
> > +		 * 0 <= offset_len < 16 argument, with offset_len=0
> > +		 * as a special case inwhich base=0.
> 
> I don't understand this comment.

This is a sanity check. The userspace tool restricts offset_len to the specified range and since base = 2^(16 - offset_len) (or base = 0 for the special case of offset_len = 16) the below condition should never be true.
However, if base greater than 1<<15 was allowed, a divide by zero error would occur on the block below.

> 
> > +		 */
> > +		if (WARN_ON_ONCE(base > (1 << 15)))
> > +			return NF_DROP;
> > +
> > +		/* If offset=0, port range is in one contiguous block */
> > +		if (base)
> > +			off = prandom_u32_max(((1 << 16) / base) - 1);
> 
> Assuming the example above, base is set to 1024. Then, off is a random
> value between UINT16_MAX (you expressed this as 1 << 16) and the base
> which is 1024 minus 1.
> 
> So this is picking a random off (actually the PSID?) between 0 and 63.
> What about clashes? I mean, two different machines behind the NAT
> might get the same off.
> 
> > +		newrange.min_proto.all   = htons(min + base * off);
> 
> min could be 1024, 2048, 3072... you add base which is 1024 * off.
> 
> Is this duplicated? Both calculated in user and kernel space?

Each PSID value defines many contiguous port sub-ranges. The randomly chosen off selects the ith sub-range for a given PSID e.g. off=1 would select 2256-2259 for rfc7597 Appendix A. Examples.

The userspace tool calculates the min and max of the first sub-range for a given psid, whereas the above randomly selects one of the sub-ranges for a given psid.

j_length determines how large each sub-range will be, so for small j_length values there still is the risk the chosen sub-range will be exhausted.

> 
> > +		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
> 
> I'm stopping here, I'm getting lost.
> 
> My understanding about this RFC is that you would like to split the
> 16-bit ports in ranges to uniquely identify the host behind the NAT.
> 
> Why don't you just you just select the port range from userspace
> utilizing the existing infrastructure? I mean, why do you need this
> kernel patch?

If utilizing existing infrastruture to install PSID port ranges a lot of rules would be required as each PSID port range is made up of many smaller sub-ranges.

e.g. (from rfc7597 Appendix A. Examples)
for psid_length=8,offset_length=6 each PSID would need 63 NF_NAT_RANGE_PROTO_SPECIFIED rules, hence a total of 16128 rules if all the PSIDs were allocated.

> 
> Florian already suggested:
> 
> > Is it really needed to place all of this in the nat core?
> > 
> > The only thing that has to be done in the NAT core, afaics, is to
> > suppress port reallocation attmepts when NF_NAT_RANGE_PSID is set.
> > 
> > Is there a reason why nf_nat_masquerade_ipv4/6 can't be changed instead
> > to do what you want?
> > 
> > AFAICS its enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init the
> > upper/lower boundaries, i.e. change input given to nf_nat_setup_info().
> 
> extracted from:
> 
> https://scanmail.trustwave.com/?c=20988&d=6vim4fcVLjPkIbLUDqz3Tj2W4gXWNCkYa5s0Bg8JjA&u=https%3a%2f%2fpatchwork%2eozlabs%2eorg%2fproject%2fnetfilter-devel%2fpatch%2f20210422023506%2e4651-1-Cole%2eDishington%40alliedtelesis%2eco%2enz%2f







^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2021-08-29 21:30 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-22  2:35 [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-04-22  4:10 ` kernel test robot
2021-04-22  6:54 ` kernel test robot
2021-04-22  7:48 ` kernel test robot
2021-04-26 12:23 ` Florian Westphal
2021-06-29  0:48   ` Cole Dishington
2021-06-30 14:20     ` Florian Westphal
     [not found]       ` <20210705040856.25191-1-Cole.Dishington@alliedtelesis.co.nz>
2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
2021-07-05  4:08         ` [PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-07-05 10:39           ` Florian Westphal
2021-07-16  0:27             ` [PATCH 0/3] " Cole Dishington
2021-07-16  0:27               ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
2021-07-16  0:27               ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-07-16 15:18                 ` Florian Westphal
2021-07-19  1:21                   ` Cole Dishington
2021-07-22  7:17                     ` Florian Westphal
2021-07-25 23:28                       ` Cole Dishington
2021-07-26 14:37                         ` Florian Westphal
2021-08-09  4:10                           ` [PATCH net-next 0/3] " Cole Dishington
2021-08-09  4:10                             ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
2021-08-09  4:10                             ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-08-25 17:05                               ` Pablo Neira Ayuso
2021-08-29 21:30                                 ` Cole Dishington
2021-08-09  4:10                             ` [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests Cole Dishington
2021-07-16  0:27               ` [PATCH " Cole Dishington
2021-07-05  4:08         ` [PATCH] " Cole Dishington

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.