netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC libnetfilter_queue 0/1] Avoid packet copy in nfq_nlmsg_verdict_put_pkt
@ 2020-06-03  3:55 Duncan Roe
  2020-06-03  3:55 ` [PATCH RFC libnetfilter_queue 1/1] examples: Use sendmsg() to avoid packet copy in nfq_nlmsg_verdict_put_pkt() Duncan Roe
  0 siblings, 1 reply; 2+ messages in thread
From: Duncan Roe @ 2020-06-03  3:55 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel

This patch demonstrates using sendmsg() to avoid the memcpy() in
nfq_nlmsg_verdict_put_pkt().

This is a catch-up to the libnfnetlink-based nfq_set_verdict() function family
which already used sendmsg() to avoid data copy.

The logic is implemented in a number of static functions.
These functions are proposed as a starting point for additions to
libnetfilter_queue and libmnl.

This patch is an RFC on these functions.

Duncan Roe (1):
  examples: Use sendmsg() to avoid packet copy in
    nfq_nlmsg_verdict_put_pkt()

 examples/nf-queue.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 122 insertions(+), 6 deletions(-)

-- 
2.14.5


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH RFC libnetfilter_queue 1/1] examples: Use sendmsg() to avoid packet copy in nfq_nlmsg_verdict_put_pkt()
  2020-06-03  3:55 [PATCH RFC libnetfilter_queue 0/1] Avoid packet copy in nfq_nlmsg_verdict_put_pkt Duncan Roe
@ 2020-06-03  3:55 ` Duncan Roe
  0 siblings, 0 replies; 2+ messages in thread
From: Duncan Roe @ 2020-06-03  3:55 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel

The patch does a null mangle of every 2nd packet (packet is unchanged but
pktb->mangled is set).

Mangling every 2nd packet tests all code paths, but turns out to be essential
for IPv6 on the 'lo' interface, but because such packets always have "checksum
not ready"; and the kernel insists that mangled packets have a good checksum.
So every second UDP is lost, while TCP gets through on the retransmit.

The static functions at the front really belong in the appropriate libraries.

      Diagram of in-memory verdict construction with packet data

KEY:        ++++                     ++++
            |  | Occurs once         <  > Occurs zero or more times
            ++++                     ++++

                      +++++++++
                      |nlmsg  |\
                      |header | \
                      +++++++++  }
                      <attrib >  }
                      <header >  }
                      <------->  } first
                      <attrib >  } IOV
                      <payload>  }
                      +++++++++  }
                      |packet |  }
                      |attrib | /
                      |header |/
                      +++++++++
                      <attrib >\
                      <header > \
                      <------->  } last IOV
                      <attrib > /
                      <payload>/
                      +++++++++

Second IOV covers packet data.
Third IOV covers padding after packet data (if required).

The count in the nlmsg header indexes attributes until all are added.
Add packet data length and pad length to count before sending to kernel.

Without packet data, there is one IOV.

Signed-off-by: Duncan Roe <duncan_roe@optusnet.com.au>
---
 examples/nf-queue.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 122 insertions(+), 6 deletions(-)

diff --git a/examples/nf-queue.c b/examples/nf-queue.c
index 3da2c24..192ec68 100644
--- a/examples/nf-queue.c
+++ b/examples/nf-queue.c
@@ -14,22 +14,118 @@
 #include <linux/netfilter/nfnetlink_queue.h>
 
 #include <libnetfilter_queue/libnetfilter_queue.h>
+#include <libnetfilter_queue/pktbuff.h>
 
 /* only for NFQA_CT, not needed otherwise: */
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
 static struct mnl_socket *nl;
 
+/* Suggested additions to libnetfilter_queue.h */
+
+struct nfq_iovec {
+	struct iovec	iov[4];
+	int		iovidx;
+	char		padbuf[3];
+};
+
+/* Suggested addition to libmnl */
+
+static ssize_t
+mnl_socket_sendmsg(struct mnl_socket *nl, struct iovec *iov, int iovcnt)
+{
+	/* Could have this at top level since > 1 function will now use it */
+	static struct sockaddr_nl snl = {
+		.nl_family = AF_NETLINK
+	};
+
+	const struct msghdr msg = {
+		.msg_name = &snl,
+		.msg_namelen = sizeof snl,
+		.msg_iov = iov,
+		.msg_iovlen = iovcnt,
+		.msg_control = NULL,
+		.msg_controllen = 0,
+		.msg_flags = 0,
+	};
+
+	return sendmsg(mnl_socket_get_fd(nl), &msg, 0);
+}
+
+/* Suggested additions to libnetfilter_queue */
+
+static void
+nfq_verdict_iov_init(struct nlmsghdr *nlh, struct nfq_iovec *vec)
+{
+	vec->iov[0].iov_base = nlh;
+	vec->iovidx = 0;
+}
+
 static void
-nfq_send_verdict(int queue_num, uint32_t id)
+nfq_nlmsg_verdict_iov_put_pkt(struct nlmsghdr *nlh, const void * pkt,
+			      uint32_t plen, struct nfq_iovec *vec)
+{
+	struct nlattr *attrib = mnl_nlmsg_get_payload_tail(nlh);
+	uint16_t payload_len = MNL_ATTR_HDRLEN + plen;
+	int pad;
+
+	/* Duplicate the first part of mnl_attr_put() */
+	attrib->nla_type = NFQA_PAYLOAD;
+	attrib->nla_len = payload_len;
+	nlh->nlmsg_len += MNL_ATTR_HDRLEN;
+
+	vec->iov[vec->iovidx].iov_len = nlh->nlmsg_len;
+	vec->iov[++vec->iovidx].iov_base = (void *)pkt;
+	vec->iov[vec->iovidx].iov_len = plen;
+	pad = MNL_ALIGN(plen) - plen;
+	if (pad) {
+		memset(vec->padbuf, 0, pad);
+		vec->iov[++vec->iovidx].iov_base = vec->padbuf;
+		vec->iov[vec->iovidx].iov_len = pad;
+	}
+	vec->iov[++vec->iovidx].iov_base = vec->iov[0].iov_base +
+					   vec->iov[0].iov_len;
+}
+
+static ssize_t
+nfq_socket_sendmsg(struct mnl_socket *nl, struct nlmsghdr *nlh,
+		   struct nfq_iovec *vec)
+{
+	if (vec->iovidx) {
+		int i;
+
+		vec->iov[vec->iovidx].iov_len = nlh->nlmsg_len -
+						vec->iov[0].iov_len;
+		for (i = 1; i < vec->iovidx; i++)
+			nlh->nlmsg_len += vec->iov[i].iov_len;
+	}
+	else
+		vec->iov[0].iov_len = nlh->nlmsg_len;
+
+	return mnl_socket_sendmsg(nl, vec->iov,
+				  vec->iovidx +
+				  (vec->iov[vec->iovidx].iov_len ? 1 : 0));
+}
+
+/* End suggested additions */
+
+static void
+nfq_send_verdict(int queue_num, uint32_t id, struct pkt_buff *pktb)
 {
 	char buf[MNL_SOCKET_BUFFER_SIZE];
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
+	struct nfq_iovec niov;
 
 	nlh = nfq_nlmsg_put(buf, NFQNL_MSG_VERDICT, queue_num);
+	nfq_verdict_iov_init(nlh, &niov);
 	nfq_nlmsg_verdict_put(nlh, id, NF_ACCEPT);
 
+	/* Example to update packet contents using sendmsg() */
+	if (pktb_mangled(pktb))
+		nfq_nlmsg_verdict_iov_put_pkt(nlh, pktb_data(pktb),
+					      pktb_len(pktb), &niov);
+
 	/* example to set the connmark. First, start NFQA_CT section: */
 	nest = mnl_attr_nest_start(nlh, NFQA_CT);
 
@@ -40,8 +136,9 @@ nfq_send_verdict(int queue_num, uint32_t id)
 	/* end conntrack section */
 	mnl_attr_nest_end(nlh, nest);
 
-	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
-		perror("mnl_socket_send");
+	if (nfq_socket_sendmsg(nl, nlh, &niov) < 0)
+	{
+		perror("nfq_socket_sendmsg");
 		exit(EXIT_FAILURE);
 	}
 }
@@ -50,9 +147,12 @@ static int queue_cb(const struct nlmsghdr *nlh, void *data)
 {
 	struct nfqnl_msg_packet_hdr *ph = NULL;
 	struct nlattr *attr[NFQA_MAX+1] = {};
+	struct pkt_buff *pktb;
 	uint32_t id = 0, skbinfo;
 	struct nfgenmsg *nfg;
 	uint16_t plen;
+	static bool mangle = false;
+	bool csum_bad = false;
 
 	if (nfq_nlmsg_parse(nlh, attr) < 0) {
 		perror("problems parsing");
@@ -69,7 +169,9 @@ static int queue_cb(const struct nlmsghdr *nlh, void *data)
 	ph = mnl_attr_get_payload(attr[NFQA_PACKET_HDR]);
 
 	plen = mnl_attr_get_payload_len(attr[NFQA_PAYLOAD]);
-	/* void *payload = mnl_attr_get_payload(attr[NFQA_PAYLOAD]); */
+
+	pktb = pktb_alloc(AF_INET, mnl_attr_get_payload(attr[NFQA_PAYLOAD]),
+			  plen, 0);
 
 	skbinfo = attr[NFQA_SKB_INFO] ? ntohl(mnl_attr_get_u32(attr[NFQA_SKB_INFO])) : 0;
 
@@ -93,11 +195,25 @@ static int queue_cb(const struct nlmsghdr *nlh, void *data)
 	 * If these packets are later forwarded/sent out, the checksums will
 	 * be corrected by kernel/hardware.
 	 */
-	if (skbinfo & NFQA_SKB_CSUMNOTREADY)
+	if (skbinfo & NFQA_SKB_CSUMNOTREADY) {
 		printf(", checksum not ready");
+		csum_bad = true;
+	}
 	puts(")");
 
-	nfq_send_verdict(ntohs(nfg->res_id), id);
+	/* Mangle every 2nd packet to exercise both code paths */
+	/* Nothing gets changed but set the 'mangled' flag */
+	mangle = !mangle;
+	if (mangle) {
+		pktb_mangle(pktb, 0, 0, 0, NULL, 0);
+		if (csum_bad)
+			puts("Warning: kernel will discard mangled packet "
+			     "owing to bad checksum");
+	}
+
+	nfq_send_verdict(ntohs(nfg->res_id), id, pktb);
+
+	pktb_free(pktb);
 
 	return MNL_CB_OK;
 }
-- 
2.14.5


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-06-03  3:55 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-03  3:55 [PATCH RFC libnetfilter_queue 0/1] Avoid packet copy in nfq_nlmsg_verdict_put_pkt Duncan Roe
2020-06-03  3:55 ` [PATCH RFC libnetfilter_queue 1/1] examples: Use sendmsg() to avoid packet copy in nfq_nlmsg_verdict_put_pkt() Duncan Roe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).