All of lore.kernel.org
 help / color / mirror / Atom feed
* Strange TCP behaviour when appending data to a packet from netfilter
@ 2021-06-03 19:38 Ignat Korchagin
  2021-06-03 19:46 ` Stephen Hemminger
  0 siblings, 1 reply; 3+ messages in thread
From: Ignat Korchagin @ 2021-06-03 19:38 UTC (permalink / raw)
  To: netdev; +Cc: kernel-team

Hi,

I was experimenting with a netfilter module (originally nftables
module) which appends a fixed byte string to outgoing IP packets and
removes it from incoming IP packets. In its simplest form the full
module code is below:

#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <net/ip.h>

#define TRAILER_LEN 16
#define TRAILER_VAL 0xfe

static u8 trailer_pattern[TRAILER_LEN];

static void adust_net_hdr(struct sk_buff *skb, bool out)
{
    ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) + (out ?
TRAILER_LEN : -TRAILER_LEN));
    ip_send_check(ip_hdr(skb));
}

static unsigned int nf_crypt_trailer(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
    if (state->hook == NF_INET_LOCAL_OUT) {
        struct sk_buff *trailer;
        int num_frags = skb_cow_data(skb, TRAILER_LEN, &trailer);
        if (num_frags < 0) {
            pr_err("skb_cow_data failed for NF_INET_LOCAL_OUT");
            return NF_DROP;
        }
        memset(pskb_put(skb, trailer, TRAILER_LEN), TRAILER_VAL, TRAILER_LEN);
    }

    if (state->hook == NF_INET_LOCAL_IN) {
        u8 buf[TRAILER_LEN];
        struct sk_buff *trailer;
        int num_frags = skb_cow_data(skb, 0, &trailer);
        if (num_frags < 0) {
            pr_err("skb_cow_data failed for NF_INET_LOCAL_IN");
            return NF_DROP;
        }

        if (skb_copy_bits(skb, skb->len - TRAILER_LEN, buf, TRAILER_LEN))
        {
            pr_err("skb_copy_bits failed for NF_INET_LOCAL_IN");
            return NF_DROP;
        }

        if (memcmp(buf, trailer_pattern, TRAILER_LEN)) {
            pr_err("trailer pattern not found in NF_INET_LOCAL_IN");
            return NF_DROP;
        }

        if (pskb_trim(skb, skb->len - TRAILER_LEN)) {
            pr_err("pskb_trim failed\n");
            return NF_DROP;
        }
    }
    /* adjust IP checksum */
    adust_net_hdr(skb, state->hook == NF_INET_LOCAL_OUT);

    return NF_ACCEPT;
}

static const struct nf_hook_ops nf_crypt_ops[] = {
    {
        .hook        = nf_crypt_trailer,
        .pf            = NFPROTO_IPV4,
        .hooknum    = NF_INET_LOCAL_IN,
        .priority    = NF_IP_PRI_RAW,
    },
    {
        .hook       = nf_crypt_trailer,
        .pf         = NFPROTO_IPV4,
        .hooknum    = NF_INET_LOCAL_OUT,
        .priority   = NF_IP_PRI_RAW,
    },
};

static int __net_init nf_crypt_net_init(struct net *net)
{
    /* do nothing in the init namespace */
    if (net == &init_net)
        return 0;

    return nf_register_net_hooks(net, nf_crypt_ops, ARRAY_SIZE(nf_crypt_ops));
}

static void __net_exit nf_crypt_net_exit(struct net *net)
{
    /* do nothing in the init namespace */
    if (net == &init_net)
        return;

    nf_unregister_net_hooks(net, nf_crypt_ops, ARRAY_SIZE(nf_crypt_ops));
}

static struct pernet_operations nf_crypt_net_ops = {
    .init = nf_crypt_net_init,
    .exit = nf_crypt_net_exit,
};

static int __init nf_crypt_init(void)
{
    memset(trailer_pattern, TRAILER_VAL, TRAILER_LEN);
    return register_pernet_subsys(&nf_crypt_net_ops);
}

static void __exit nf_crypt_fini(void)
{
    unregister_pernet_subsys(&nf_crypt_net_ops);
}

module_init(nf_crypt_init);
module_exit(nf_crypt_fini);

MODULE_LICENSE("GPL");

Then I set up a test env using two Linux network namespaces:
#!/bin/bash -e

sudo ip netns add alice
sudo ip netns add bob

sudo ip -netns alice link add a0 type veth peer b0 netns bob

sudo ip -netns alice address add 192.168.13.5/24 dev a0
sudo ip -netns bob address add 192.168.13.7/24 dev b0

sudo ip -netns alice link set lo up
sudo ip -netns alice link set a0 up

sudo ip -netns bob link set lo up
sudo ip -netns bob link set b0 up

All works except when I try to serve a large file over HTTP (aroung 5Gb):
$ sudo ip netns exec bob python3 -m http.server
and in another terminal
$ sudo ip netns exec alice curl -o /dev/null http://192.168.13.7:8000/test.bin

The download starts, but the download speed almost immediately drops
to 0 and "stalls".

I've explicitly added the pr_err messages for the module to notify me,
if it drops packets for whatever reason, but it doesn't drop any
packets.

Additionally, further debugging showed - if a TCP "ack" packet to
"bob" gets processed on a kernel thread (and not in softirq), "# cat
/proc/<pid>/stack" for the thread produces:

[<0>] wait_woken+0x1f4/0x250
[<0>] sk_stream_wait_memory+0x3fb/0xde0
[<0>] tcp_sendmsg_locked+0x94b/0x2e60
[<0>] tcp_sendmsg+0x28/0x40
[<0>] sock_sendmsg+0xdb/0x110
[<0>] __sys_sendto+0x1a8/0x270
[<0>] __x64_sys_sendto+0xdd/0x1b0
[<0>] do_syscall_64+0x33/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xae

It seems the server-side sending buffer is full, so one would assume
TCP acks from the client are somehow not getting processed, but I
definitely see client TCP acks at least in the netfilter module. I've
also tried to disable GSO on the veth interfaces as well as lower the
MTU to no avail.

Additionally, if I reduce TRAILER_LEN to 0 (leaving the other
skb_cow_data calls in place) - all start working.

Are there any hints why the above code causes this strange behaviour
in TCP given that it seems I'm undoing everything on the incoming path
I did for the outgoing path, so should be totally transparent to TCP?

Kind regards,
Ignat

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-04  9:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-03 19:38 Strange TCP behaviour when appending data to a packet from netfilter Ignat Korchagin
2021-06-03 19:46 ` Stephen Hemminger
2021-06-04  9:32   ` Ignat Korchagin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.