It's ipvs's duty to do traffic statistic if packets get hit, no matter what mode it is. Signed-off-by: longguang.yue <bigclouds@163.com> --- net/netfilter/ipvs/ip_vs_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..ed523057f07f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1413,8 +1413,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in ipvs, af, skb, &iph); if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ){ + ip_vs_out_stats(cp, skb); + skb->ipvs_property = 1; goto ignore_cp; + } return handle_response(af, skb, pd, cp, &iph, hooknum); } -- 2.20.1 (Apple Git-117)
especially in public cloud case, statistic is related to monitorring
and billing , both ingress and egress packets will go throught ipvs,
even dr/tun mode.
in dr/tun mode, ipvs need to do nothing except statistic, so
skb->ipvs_property = 1
regards
On Tue, Sep 29, 2020 at 1:04 PM longguang.yue <bigclouds@163.com> wrote:
>
> It's ipvs's duty to do traffic statistic if packets get hit,
> no matter what mode it is.
>
> Signed-off-by: longguang.yue <bigclouds@163.com>
> ---
> net/netfilter/ipvs/ip_vs_core.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index e3668a6e54e4..ed523057f07f 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1413,8 +1413,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
> ipvs, af, skb, &iph);
>
> if (likely(cp)) {
> - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
> + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ){
> + ip_vs_out_stats(cp, skb);
> + skb->ipvs_property = 1;
> goto ignore_cp;
> + }
> return handle_response(af, skb, pd, cp, &iph, hooknum);
> }
>
> --
> 2.20.1 (Apple Git-117)
>
It's ipvs's duty to do traffic statistic if packets get hit, no matter what mode it is. Signed-off-by: longguang.yue <bigclouds@163.com> --- net/netfilter/ipvs/ip_vs_conn.c | 13 +++++++++++-- net/netfilter/ipvs/ip_vs_core.c | 5 ++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a90b8eac16ac..2620c585d0c0 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; + __be16 cport; + const union nf_inet_addr *caddr; struct ip_vs_conn *cp, *ret=NULL; /* @@ -411,10 +413,17 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport == cp->cport && p->cport == cp->dport && + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ){ + cport = cp->vport; + caddr = &cp->vaddr; + } else { + cport = cp->dport; + caddr = &cp->daddr; + } + if (p->vport == cp->cport && p->cport == cport && cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + ip_vs_addr_equal(p->af, p->caddr, caddr) && p->protocol == cp->protocol && cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..ed523057f07f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1413,8 +1413,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in ipvs, af, skb, &iph); if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ){ + ip_vs_out_stats(cp, skb); + skb->ipvs_property = 1; goto ignore_cp; + } return handle_response(af, skb, pd, cp, &iph, hooknum); } -- 2.20.1 (Apple Git-117)
On Tue, 29 Sep 2020 16:18:11 +0800 longguang.yue wrote:
> @@ -411,10 +413,17 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
> rcu_read_lock();
>
> hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> - if (p->vport == cp->cport && p->cport == cp->dport &&
> + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ){
> + cport = cp->vport;
checkpatch says:
ERROR: space required before the open brace '{'
#25: FILE: net/netfilter/ipvs/ip_vs_core.c:1416:
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ){
It's ipvs's duty to do traffic statistic if packets get hit, no matter what mode it is. Signed-off-by: longguang.yue <bigclouds@163.com> --- net/netfilter/ipvs/ip_vs_conn.c | 14 ++++++++++++-- net/netfilter/ipvs/ip_vs_core.c | 5 ++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a90b8eac16ac..c4d164ce8ca7 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; + __be16 cport; + const union nf_inet_addr *caddr; struct ip_vs_conn *cp, *ret=NULL; /* @@ -411,10 +413,18 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport == cp->cport && p->cport == cp->dport && + cport = cp->dport; + caddr = &cp->daddr; + + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + cport = cp->vport; + caddr = &cp->vaddr; + } + + if (p->vport == cp->cport && p->cport == cport && cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + ip_vs_addr_equal(p->af, p->caddr, caddr) && p->protocol == cp->protocol && cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..7ba88dab297a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1413,8 +1413,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in ipvs, af, skb, &iph); if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + ip_vs_out_stats(cp, skb); + skb->ipvs_property = 1; goto ignore_cp; + } return handle_response(af, skb, pd, cp, &iph, hooknum); } -- 2.20.1 (Apple Git-117)
Hello, On Wed, 30 Sep 2020, longguang.yue wrote: > It's ipvs's duty to do traffic statistic if packets get hit, > no matter what mode it is. > > Signed-off-by: longguang.yue <bigclouds@163.com> > --- > net/netfilter/ipvs/ip_vs_conn.c | 14 ++++++++++++-- > net/netfilter/ipvs/ip_vs_core.c | 5 ++++- > 2 files changed, 16 insertions(+), 3 deletions(-) > > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c > index a90b8eac16ac..c4d164ce8ca7 100644 > --- a/net/netfilter/ipvs/ip_vs_conn.c > +++ b/net/netfilter/ipvs/ip_vs_conn.c > @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) > struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > { > unsigned int hash; > + __be16 cport; > + const union nf_inet_addr *caddr; > struct ip_vs_conn *cp, *ret=NULL; May be we can do it in few rounds, here is a list of some initial notes... caddr/cport are misleading, can be saddr/sport (source) or laddr/lport (local). > /* > @@ -411,10 +413,18 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > rcu_read_lock(); > > hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { Lets first check here for cp->cport before touching more cache lines while traversing the list, it eliminates the cost of next checks: if (p->vport != cp->cport) continue; then if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { ... > - if (p->vport == cp->cport && p->cport == cp->dport && > + cport = cp->dport; > + caddr = &cp->daddr; > + > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { > + cport = cp->vport; > + caddr = &cp->vaddr; > + } Considering the issues solved by commit 3c5ab3f395d6, such check more correctly matches the replies from DR/TUN real server to local clients but also to remote clients if director is used as router. > + > + if (p->vport == cp->cport && p->cport == cport && if (p->cport == sport && ... > cp->af == p->af && > ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && > - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && > + ip_vs_addr_equal(p->af, p->caddr, caddr) && > p->protocol == cp->protocol && > cp->ipvs == p->ipvs) { > if (!__ip_vs_conn_get(cp)) > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > index e3668a6e54e4..7ba88dab297a 100644 > --- a/net/netfilter/ipvs/ip_vs_core.c > +++ b/net/netfilter/ipvs/ip_vs_core.c > @@ -1413,8 +1413,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > ipvs, af, skb, &iph); > > if (likely(cp)) { > - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { > + ip_vs_out_stats(cp, skb); > + skb->ipvs_property = 1; We will also need: if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); Similar code is needed in handle_response_icmp(), so that we account ICMP packets, where a jump to new label before ip_vs_out_stats() can work. But such jump is preferred even for handle_response() because the (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) check should be moved from ip_vs_out() into handle_response(). For this to work the ip_vs_set_state() call in handle_response() should be moved before the new label and ip_vs_out_stats() call. > goto ignore_cp; > + } > return handle_response(af, skb, pd, cp, &iph, hooknum); > } Regards -- Julian Anastasov <ja@ssi.bg>
It's ipvs's duty to do traffic statistic if packets get hit, no matter what mode it is. Changes in v1: support DR/TUN mode statistic Changes in v2: ip_vs_conn_out_get handles DR/TUN mode's conn Changes in v3: fix checkpatch Changes in v4: restructure and optimise this feature Signed-off-by: longguang.yue <bigclouds@163.com> --- net/netfilter/ipvs/ip_vs_conn.c | 18 +++++++++++++++--- net/netfilter/ipvs/ip_vs_core.c | 24 +++++++++++++++++------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a90b8eac16ac..af08ca2d9174 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; + __be16 sport; + const union nf_inet_addr *saddr; struct ip_vs_conn *cp, *ret=NULL; /* @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport == cp->cport && p->cport == cp->dport && - cp->af == p->af && + if (p->vport != cp->cport) + continue; + + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + sport = cp->vport; + saddr = &cp->vaddr; + } else { + sport = cp->dport; + saddr = &cp->daddr; + } + + if (p->cport == sport && cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + ip_vs_addr_equal(p->af, p->caddr, saddr) && p->protocol == cp->protocol && cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..315289aecad7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -911,6 +911,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_update_conntrack(skb, cp, 0); ignore_cp: + ip_vs_out_stats(cp, skb); + skb->ipvs_property = 1; + if (!(cp->flags & IP_VS_CONN_F_NFCT)) + ip_vs_notrack(skb); verdict = NF_ACCEPT; out: @@ -1276,6 +1280,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (skb_ensure_writable(skb, iph->len)) @@ -1328,6 +1335,16 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, LeaveFunction(11); return NF_ACCEPT; +ignore_cp: + ip_vs_out_stats(cp, skb); + skb->ipvs_property = 1; + if (!(cp->flags & IP_VS_CONN_F_NFCT)) + ip_vs_notrack(skb); + __ip_vs_conn_put(cp); + + LeaveFunction(11); + return NF_ACCEPT; + drop: ip_vs_conn_put(cp); kfree_skb(skb); @@ -1413,8 +1430,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in ipvs, af, skb, &iph); if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); } @@ -1475,14 +1490,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } -out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; - -ignore_cp: - __ip_vs_conn_put(cp); - goto out; } /* -- 2.20.1 (Apple Git-117)
Hello, On Sat, 3 Oct 2020, longguang.yue wrote: > It's ipvs's duty to do traffic statistic if packets get hit, > no matter what mode it is. > > Changes in v1: support DR/TUN mode statistic > Changes in v2: ip_vs_conn_out_get handles DR/TUN mode's conn > Changes in v3: fix checkpatch > Changes in v4: restructure and optimise this feature Changes should be after --- > Signed-off-by: longguang.yue <bigclouds@163.com> > --- > net/netfilter/ipvs/ip_vs_conn.c | 18 +++++++++++++++--- > net/netfilter/ipvs/ip_vs_core.c | 24 +++++++++++++++++------- > 2 files changed, 32 insertions(+), 10 deletions(-) > > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c > index a90b8eac16ac..af08ca2d9174 100644 > --- a/net/netfilter/ipvs/ip_vs_conn.c > +++ b/net/netfilter/ipvs/ip_vs_conn.c > @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) > struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > { > unsigned int hash; > + __be16 sport; > + const union nf_inet_addr *saddr; > struct ip_vs_conn *cp, *ret=NULL; > > /* > @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > rcu_read_lock(); > > hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { > - if (p->vport == cp->cport && p->cport == cp->dport && > - cp->af == p->af && > + if (p->vport != cp->cport) > + continue; > + > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { > + sport = cp->vport; > + saddr = &cp->vaddr; > + } else { > + sport = cp->dport; > + saddr = &cp->daddr; > + } > + > + if (p->cport == sport && cp->af == p->af && > ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && > - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && > + ip_vs_addr_equal(p->af, p->caddr, saddr) && > p->protocol == cp->protocol && > cp->ipvs == p->ipvs) { > if (!__ip_vs_conn_get(cp)) > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > index e3668a6e54e4..315289aecad7 100644 > --- a/net/netfilter/ipvs/ip_vs_core.c > +++ b/net/netfilter/ipvs/ip_vs_core.c > @@ -911,6 +911,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb, > ip_vs_update_conntrack(skb, cp, 0); > Something is wrong here. May be it should be as simple as that (ignore_cp is moved and renamed to after_nat): @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; + goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -901,6 +901,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; +after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -909,8 +910,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); - -ignore_cp: verdict = NF_ACCEPT; out: > ignore_cp: > + ip_vs_out_stats(cp, skb); > + skb->ipvs_property = 1; > + if (!(cp->flags & IP_VS_CONN_F_NFCT)) > + ip_vs_notrack(skb); > verdict = NF_ACCEPT; > > out: > @@ -1276,6 +1280,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, > { > struct ip_vs_protocol *pp = pd->pp; > > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > + goto ignore_cp; > + > IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); > > if (skb_ensure_writable(skb, iph->len)) > @@ -1328,6 +1335,16 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, > LeaveFunction(11); > return NF_ACCEPT; > > +ignore_cp: > + ip_vs_out_stats(cp, skb); > + skb->ipvs_property = 1; > + if (!(cp->flags & IP_VS_CONN_F_NFCT)) > + ip_vs_notrack(skb); > + __ip_vs_conn_put(cp); > + > + LeaveFunction(11); > + return NF_ACCEPT; > + For handle_response() too, the code is already there: @@ -1278,6 +1277,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto after_nat; + if (skb_ensure_writable(skb, iph->len)) goto drop; @@ -1316,6 +1318,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); +after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; What we do is: - switch to bidirectional state changes by calling ip_vs_set_state with IP_VS_DIR_OUTPUT, we have packets in both directions just like MASQ. See set_tcp_state(). - use ip_vs_conn_put() like MASQ because this packet is part of the connection, not __ip_vs_conn_put(). This will schedule proper timeout according to the state. - __ip_vs_conn_put() is inappropriate for the last handle_response() call because real server can initiate new connection (see ip_vs_new_conn_out) with service using IP_VS_SVC_F_ONEPACKET flag. > drop: > ip_vs_conn_put(cp); > kfree_skb(skb); > @@ -1413,8 +1430,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > ipvs, af, skb, &iph); > > if (likely(cp)) { > - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > - goto ignore_cp; > return handle_response(af, skb, pd, cp, &iph, hooknum); > } > > @@ -1475,14 +1490,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > } > } > > -out: > IP_VS_DBG_PKT(12, af, pp, skb, iph.off, > "ip_vs_out: packet continues traversal as normal"); > return NF_ACCEPT; > - > -ignore_cp: > - __ip_vs_conn_put(cp); > - goto out; > } > > /* Regards -- Julian Anastasov <ja@ssi.bg>
It's ipvs's duty to do traffic statistic if packets get hit, no matter what mode it is. ---------------------- Changes in v1: support DR/TUN mode statistic Changes in v2: ip_vs_conn_out_get handles DR/TUN mode's conn Changes in v3: fix checkpatch Changes in v4, v5: restructure and optimise this feature ---------------------- Signed-off-by: longguang.yue <bigclouds@163.com> --- net/netfilter/ipvs/ip_vs_conn.c | 18 +++++++++++++++--- net/netfilter/ipvs/ip_vs_core.c | 17 ++++++----------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a90b8eac16ac..af08ca2d9174 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; + __be16 sport; + const union nf_inet_addr *saddr; struct ip_vs_conn *cp, *ret=NULL; /* @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport == cp->cport && p->cport == cp->dport && - cp->af == p->af && + if (p->vport != cp->cport) + continue; + + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + sport = cp->vport; + saddr = &cp->vaddr; + } else { + sport = cp->dport; + saddr = &cp->daddr; + } + + if (p->cport == sport && cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + ip_vs_addr_equal(p->af, p->caddr, saddr) && p->protocol == cp->protocol && cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..494ea1fcf4d8 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; + goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -900,7 +900,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; - +after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -909,8 +909,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); - -ignore_cp: verdict = NF_ACCEPT; out: @@ -1276,6 +1274,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto after_nat; + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (skb_ensure_writable(skb, iph->len)) @@ -1316,6 +1317,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); +after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; @@ -1413,8 +1415,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in ipvs, af, skb, &iph); if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); } @@ -1475,14 +1475,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } -out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; - -ignore_cp: - __ip_vs_conn_put(cp); - goto out; } /* -- 2.20.1 (Apple Git-117)
Hello, On Sun, 4 Oct 2020, longguang.yue wrote: > It's ipvs's duty to do traffic statistic if packets get hit, > no matter what mode it is. > > ---------------------- > Changes in v1: support DR/TUN mode statistic > Changes in v2: ip_vs_conn_out_get handles DR/TUN mode's conn > Changes in v3: fix checkpatch > Changes in v4, v5: restructure and optimise this feature Tested it, patch works. But we should add info about all new changes that v5 brings. First, the changelog between versions is usually to inform maintainers what is changed. Its place is after the "---" line when we do not want it to be part of the commit log. Such long "-" lines are not needed. See "14) The canonical patch format" from Documentation/process/submitting-patches.rst As for commit log, we can add such info before your two lines, for example: Just like for MASQ, inspect the reply packets coming from DR/TUN real servers and alter the connection's state and timeout according to the protocol. Then your 2 lines will specify that stats are accounted too. The Subject can be changed too, for example: [PATCH v6 net-next] ipvs: inspect reply packets from DR/TUN real servers > ---------------------- > > Signed-off-by: longguang.yue <bigclouds@163.com> > --- > net/netfilter/ipvs/ip_vs_conn.c | 18 +++++++++++++++--- > net/netfilter/ipvs/ip_vs_core.c | 17 ++++++----------- > 2 files changed, 21 insertions(+), 14 deletions(-) > > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c > index a90b8eac16ac..af08ca2d9174 100644 > --- a/net/netfilter/ipvs/ip_vs_conn.c > +++ b/net/netfilter/ipvs/ip_vs_conn.c > @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) > struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > { > unsigned int hash; > + __be16 sport; > + const union nf_inet_addr *saddr; > struct ip_vs_conn *cp, *ret=NULL; Developers prefer the reverse xmas tree order of variables where they are listed from longest to shortest, so we can do it at least for new changes: struct ip_vs_conn *cp, *ret=NULL; + const union nf_inet_addr *saddr; + __be16 sport; > /* > @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) > rcu_read_lock(); > > hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { > - if (p->vport == cp->cport && p->cport == cp->dport && > - cp->af == p->af && > + if (p->vport != cp->cport) > + continue; > + > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { > + sport = cp->vport; > + saddr = &cp->vaddr; > + } else { > + sport = cp->dport; > + saddr = &cp->daddr; > + } > + > + if (p->cport == sport && cp->af == p->af && > ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && > - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && > + ip_vs_addr_equal(p->af, p->caddr, saddr) && > p->protocol == cp->protocol && > cp->ipvs == p->ipvs) { > if (!__ip_vs_conn_get(cp)) > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > index e3668a6e54e4..494ea1fcf4d8 100644 > --- a/net/netfilter/ipvs/ip_vs_core.c > +++ b/net/netfilter/ipvs/ip_vs_core.c > @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, > unsigned int verdict = NF_DROP; > > if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > - goto ignore_cp; > + goto after_nat; > > /* Ensure the checksum is correct */ > if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { > @@ -900,7 +900,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, > > if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) > goto out; > - > +after_nat: > /* do the statistics and put it back */ > ip_vs_out_stats(cp, skb); > > @@ -909,8 +909,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, > ip_vs_notrack(skb); > else > ip_vs_update_conntrack(skb, cp, 0); > - > -ignore_cp: > verdict = NF_ACCEPT; > > out: > @@ -1276,6 +1274,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, > { > struct ip_vs_protocol *pp = pd->pp; > > + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > + goto after_nat; > + > IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); > > if (skb_ensure_writable(skb, iph->len)) > @@ -1316,6 +1317,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, > > IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); > > +after_nat: > ip_vs_out_stats(cp, skb); > ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); > skb->ipvs_property = 1; > @@ -1413,8 +1415,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > ipvs, af, skb, &iph); > > if (likely(cp)) { > - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) > - goto ignore_cp; > return handle_response(af, skb, pd, cp, &iph, hooknum); > } > > @@ -1475,14 +1475,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in > } > } > > -out: > IP_VS_DBG_PKT(12, af, pp, skb, iph.off, > "ip_vs_out: packet continues traversal as normal"); > return NF_ACCEPT; > - > -ignore_cp: > - __ip_vs_conn_put(cp); > - goto out; > } > > /* > -- > 2.20.1 (Apple Git-117) Regards -- Julian Anastasov <ja@ssi.bg>
Just like for MASQ, inspect the reply packets coming from DR/TUN real servers and alter the connection's state and timeout according to the protocol. It's ipvs's duty to do traffic statistic if packets get hit, no matter what mode it is. --- Changes in v1: support DR/TUN mode statistic Changes in v2: ip_vs_conn_out_get handles DR/TUN mode's conn Changes in v3: fix checkpatch Changes in v4, v5: restructure and optimise this feature Changes in v6: rewrite subject and patch description Signed-off-by: longguang.yue <bigclouds@163.com> --- net/netfilter/ipvs/ip_vs_conn.c | 18 +++++++++++++++--- net/netfilter/ipvs/ip_vs_core.c | 17 ++++++----------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a90b8eac16ac..af08ca2d9174 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -401,6 +401,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; + __be16 sport; + const union nf_inet_addr *saddr; struct ip_vs_conn *cp, *ret=NULL; /* @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport == cp->cport && p->cport == cp->dport && - cp->af == p->af && + if (p->vport != cp->cport) + continue; + + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + sport = cp->vport; + saddr = &cp->vaddr; + } else { + sport = cp->dport; + saddr = &cp->daddr; + } + + if (p->cport == sport && cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + ip_vs_addr_equal(p->af, p->caddr, saddr) && p->protocol == cp->protocol && cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..494ea1fcf4d8 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; + goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -900,7 +900,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; - +after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -909,8 +909,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); - -ignore_cp: verdict = NF_ACCEPT; out: @@ -1276,6 +1274,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto after_nat; + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (skb_ensure_writable(skb, iph->len)) @@ -1316,6 +1317,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); +after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; @@ -1413,8 +1415,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in ipvs, af, skb, &iph); if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); } @@ -1475,14 +1475,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } -out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; - -ignore_cp: - __ip_vs_conn_put(cp); - goto out; } /* -- 2.20.1 (Apple Git-117)