linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: stable-review@kernel.org, torvalds@linux-foundation.org,
	akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk,
	Eric Dumazet <eric.dumazet@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [13/80] udp: add rehash on connect()
Date: Fri, 24 Sep 2010 09:24:01 -0700	[thread overview]
Message-ID: <20100924162616.028002259@clark.site> (raw)
In-Reply-To: <20100924162706.GA7381@kroah.com>

2.6.35-stable review patch.  If anyone has any objections, please let us know.

------------------


From: Eric Dumazet <eric.dumazet@gmail.com>

commit 719f835853a92f6090258114a72ffe41f09155cd upstream

commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).

Problem is that following sequence :

fd = socket(...)
connect(fd, &remote, ...)

not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)

Sequence is :
 - autobind() : choose a random local port, insert socket in hash tables
              [while local address is INADDR_ANY]
 - connect() : set remote address and port, change local address to IP
              given by a route lookup.

When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.

One solution to this problem is to rehash datagram socket if needed.

We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.

This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.

Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/net/sock.h  |    1 +
 include/net/udp.h   |    1 +
 net/ipv4/datagram.c |    5 ++++-
 net/ipv4/udp.c      |   44 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/datagram.c |    7 ++++++-
 net/ipv6/udp.c      |   10 ++++++++++
 6 files changed, 66 insertions(+), 2 deletions(-)

--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -749,6 +749,7 @@ struct proto {
 	/* Keeping track of sk's, looking them up, and port selection methods. */
 	void			(*hash)(struct sock *sk);
 	void			(*unhash)(struct sock *sk);
+	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
 
 	/* Keeping track of sockets in use */
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct s
 }
 
 extern void udp_lib_unhash(struct sock *sk);
+extern void udp_lib_rehash(struct sock *sk, u16 new_hash);
 
 static inline void udp_lib_close(struct sock *sk, long timeout)
 {
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk
 	}
 	if (!inet->inet_saddr)
 		inet->inet_saddr = rt->rt_src;	/* Update source address */
-	if (!inet->inet_rcv_saddr)
+	if (!inet->inet_rcv_saddr) {
 		inet->inet_rcv_saddr = rt->rt_src;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
+	}
 	inet->inet_daddr = rt->rt_dst;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk)
 }
 EXPORT_SYMBOL(udp_lib_unhash);
 
+/*
+ * inet_rcv_saddr was changed, we must rehash secondary hash
+ */
+void udp_lib_rehash(struct sock *sk, u16 newhash)
+{
+	if (sk_hashed(sk)) {
+		struct udp_table *udptable = sk->sk_prot->h.udp_table;
+		struct udp_hslot *hslot, *hslot2, *nhslot2;
+
+		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+		nhslot2 = udp_hashslot2(udptable, newhash);
+		udp_sk(sk)->udp_portaddr_hash = newhash;
+		if (hslot2 != nhslot2) {
+			hslot = udp_hashslot(udptable, sock_net(sk),
+					     udp_sk(sk)->udp_port_hash);
+			/* we must lock primary chain too */
+			spin_lock_bh(&hslot->lock);
+
+			spin_lock(&hslot2->lock);
+			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+			hslot2->count--;
+			spin_unlock(&hslot2->lock);
+
+			spin_lock(&nhslot2->lock);
+			hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+						 &nhslot2->head);
+			nhslot2->count++;
+			spin_unlock(&nhslot2->lock);
+
+			spin_unlock_bh(&hslot->lock);
+		}
+	}
+}
+EXPORT_SYMBOL(udp_lib_rehash);
+
+static void udp_v4_rehash(struct sock *sk)
+{
+	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_num);
+	udp_lib_rehash(sk, new_hash);
+}
+
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
@@ -1843,6 +1886,7 @@ struct proto udp_prot = {
 	.backlog_rcv	   = __udp_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v4_rehash,
 	.get_port	   = udp_v4_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -104,9 +104,12 @@ ipv4_connected:
 		if (ipv6_addr_any(&np->saddr))
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&np->rcv_saddr)) {
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
 					       &np->rcv_saddr);
+			if (sk->sk_prot->rehash)
+				sk->sk_prot->rehash(sk);
+		}
 
 		goto out;
 	}
@@ -191,6 +194,8 @@ ipv4_connected:
 	if (ipv6_addr_any(&np->rcv_saddr)) {
 		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, uns
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
+static void udp_v6_rehash(struct sock *sk)
+{
+	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+					  &inet6_sk(sk)->rcv_saddr,
+					  inet_sk(sk)->inet_num);
+
+	udp_lib_rehash(sk, new_hash);
+}
+
 static inline int compute_score(struct sock *sk, struct net *net,
 				unsigned short hnum,
 				struct in6_addr *saddr, __be16 sport,
@@ -1452,6 +1461,7 @@ struct proto udpv6_prot = {
 	.backlog_rcv	   = udpv6_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v6_rehash,
 	.get_port	   = udp_v6_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,



  parent reply	other threads:[~2010-09-24 16:59 UTC|newest]

Thread overview: 99+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-24 16:27 [00/80] 2.6.35.6 stable review Greg KH
2010-09-24 16:23 ` [01/80] usb: musb_debugfs: dont use the struct file private_data field with seq_files Greg KH
2010-09-24 16:23 ` [02/80] USB: serial/mos*: prevent reading uninitialized stack memory Greg KH
2010-09-24 16:23 ` [03/80] bridge: Clear INET control block of SKBs passed into ip_fragment() Greg KH
2010-09-24 16:23 ` [04/80] gro: fix different skb headrooms Greg KH
2010-09-24 16:23 ` [05/80] gro: Re-fix " Greg KH
2010-09-24 16:23 ` [06/80] irda: Correctly clean up self->ias_obj on irda_bind() failure Greg KH
2010-09-24 16:23 ` [07/80] rds: fix a leak of kernel memory Greg KH
2010-09-24 16:23 ` [08/80] net: RPS needs to depend upon USE_GENERIC_SMP_HELPERS Greg KH
2010-09-24 16:23 ` [09/80] tcp: Combat per-cpu skew in orphan tests Greg KH
2010-09-24 16:23 ` [10/80] tcp: fix three tcp sysctls tuning Greg KH
2010-09-24 16:23 ` [11/80] tcp: select(writefds) dont hang up when a peer close connection Greg KH
2010-09-24 16:24 ` [12/80] tcp: Prevent overzealous packetization by SWS logic Greg KH
2010-09-24 16:24 ` Greg KH [this message]
2010-09-24 16:24 ` [14/80] UNIX: Do not loop forever at unix_autobind() Greg KH
2010-09-24 16:24 ` [15/80] l2tp: test for ethernet header in l2tp_eth_dev_recv() Greg KH
2010-09-24 16:24 ` [16/80] net: blackhole route should always be recalculated Greg KH
2010-09-24 16:24 ` [17/80] sparc64: Get rid of indirect p1275 PROM call buffer Greg KH
2010-09-24 16:24 ` [18/80] drivers/net/usb/hso.c: prevent reading uninitialized memory Greg KH
2010-09-24 16:24 ` [19/80] drivers/net/cxgb3/cxgb3_main.c: prevent reading uninitialized stack memory Greg KH
2010-09-24 16:24 ` [20/80] drivers/net/eql.c: " Greg KH
2010-09-24 16:24 ` [21/80] bonding: correctly process non-linear skbs Greg KH
2010-09-24 16:24 ` [22/80] Staging: vt6655: fix buffer overflow Greg KH
2010-09-24 16:24 ` [23/80] net/llc: make opt unsigned in llc_ui_setsockopt() Greg KH
2010-09-24 16:24 ` [24/80] mm: fix swapin race condition Greg KH
2010-09-24 16:24 ` [25/80] mm: further " Greg KH
2010-09-24 16:24 ` [26/80] virtio: console: Prevent userspace from submitting NULL buffers Greg KH
2010-09-24 16:24 ` [27/80] virtio: console: Fix poll blocking even though there is data to read Greg KH
2010-09-24 16:24 ` [28/80] intel_agp, drm/i915: Add all sandybridge graphics devices support Greg KH
2010-09-24 16:24 ` [29/80] agp/intel: fix physical address mask bits for sandybridge Greg KH
2010-09-24 16:24 ` [30/80] agp/intel: fix dma mask bits on sandybridge Greg KH
2010-09-24 16:24 ` [31/80] hw breakpoints: Fix pid namespace bug Greg KH
2010-09-24 16:24 ` [32/80] pid: make setpgid() system call use RCU read-side critical section Greg KH
2010-09-24 16:24 ` [33/80] sched: Fix user time incorrectly accounted as system time on 32-bit Greg KH
2010-09-24 16:24 ` [34/80] oprofile: Add Support for Intel CPU Family 6 / Model 22 (Intel Celeron 540) Greg KH
2010-09-24 16:24 ` [35/80] drm/i915,agp/intel: Add second set of PCI-IDs for B43 Greg KH
2010-09-24 16:24 ` [36/80] bdi: Initialize noop_backing_dev_info properly Greg KH
2010-09-24 16:24 ` [37/80] bdi: Fix warnings in __mark_inode_dirty for /dev/zero and friends Greg KH
2010-09-24 16:24 ` [38/80] char: Mark /dev/zero and /dev/kmem as not capable of writeback Greg KH
2010-09-24 16:24 ` [39/80] drivers/pci/intel-iommu.c: fix build with older gccs Greg KH
2010-09-24 16:24 ` [40/80] mmap: call unlink_anon_vmas() in __split_vma() in case of error Greg KH
2010-09-24 16:24 ` [41/80] drivers/video/sis/sis_main.c: prevent reading uninitialized stack memory Greg KH
2010-09-24 16:24 ` [42/80] rtc: s3c: balance state changes of wakeup flag Greg KH
2010-09-24 16:24 ` [43/80] Prevent freeing uninitialized pointer in compat_do_readv_writev Greg KH
2010-09-24 16:24 ` [44/80] /proc/vmcore: fix seeking Greg KH
2010-09-24 16:24 ` [45/80] vmscan: check all_unreclaimable in direct reclaim path Greg KH
2010-09-24 16:24 ` [46/80] percpu: fix pcpu_last_unit_cpu Greg KH
2010-09-24 16:24 ` [47/80] aio: do not return ERESTARTSYS as a result of AIO Greg KH
2010-09-24 16:24 ` [48/80] aio: check for multiplication overflow in do_io_submit Greg KH
2010-09-24 16:24 ` [49/80] x86 platform drivers: hp-wmi Reorder event id processing Greg KH
2010-09-24 16:24 ` [50/80] GFS2: gfs2_logd should be using interruptible waits Greg KH
2010-09-24 16:24 ` [51/80] drm/nv50: initialize ramht_refs list for faked 0 channel Greg KH
2010-09-24 16:24 ` [52/80] inotify: send IN_UNMOUNT events Greg KH
2010-09-24 16:24 ` [53/80] SCSI: mptsas: fix hangs caused by ATA pass-through Greg KH
2010-09-27 17:47   ` John Drescher
2010-09-24 16:24 ` [54/80] KVM: Keep slot ID in memory slot structure Greg KH
2010-09-24 16:24 ` [55/80] KVM: Prevent internal slots from being COWed Greg KH
2010-09-24 16:24 ` [56/80] KVM: MMU: fix direct sps access corrupted Greg KH
2010-09-24 16:24 ` [57/80] KVM: x86: emulator: inc/dec can have lock prefix Greg KH
2010-09-24 16:24 ` [58/80] KVM: MMU: fix mmu notifier invalidate handler for huge spte Greg KH
2010-09-24 16:24 ` [59/80] KVM: VMX: Fix host GDT.LIMIT corruption Greg KH
2010-09-24 16:24 ` [60/80] IA64: fix siglock Greg KH
2010-09-24 16:24 ` [61/80] IA64: Optimize ticket spinlocks in fsys_rt_sigprocmask Greg KH
2010-09-24 16:24 ` [62/80] KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() Greg KH
2010-09-24 16:24 ` [63/80] KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring Greg KH
2010-09-24 16:24 ` [64/80] xfs: prevent reading uninitialized stack memory Greg KH
2010-09-24 16:24 ` [65/80] drivers/video/via/ioctl.c: " Greg KH
2010-09-24 16:24 ` [66/80] AT91: change dma resource index Greg KH
2010-09-24 16:24 ` [67/80] PM: Prevent waiting forever on asynchronous resume after failing suspend Greg KH
2010-09-24 16:24 ` [68/80] PM / Hibernate: Avoid hitting OOM during preallocation of memory Greg KH
2010-09-24 16:24 ` [69/80] x86, asm: Use a lower case name for the end macro in atomic64_386_32.S Greg KH
2010-09-24 16:24 ` [70/80] ALSA: hda - Fix beep frequency on IDT 92HD73xx and 92HD71Bxx codecs Greg KH
2010-09-24 16:24 ` [71/80] Fix call to replaced SuperIO functions Greg KH
2010-09-24 16:25 ` [72/80] dell-wmi: Add support for eject key on Dell Studio 1555 Greg KH
2010-09-24 16:25 ` [73/80] mm: page allocator: drain per-cpu lists after direct reclaim allocation fails Greg KH
2010-09-24 16:25 ` [74/80] mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake Greg KH
2010-09-24 16:25 ` [75/80] mm: page allocator: update free page counters after pages are placed on the free list Greg KH
2010-09-24 16:25 ` [76/80] guard page for stacks that grow upwards Greg KH
2010-09-24 16:25 ` [77/80] Fix unprotected access to task credentials in waitid() Greg KH
2010-09-24 16:25 ` [78/80] sctp: Do not reset the packet during sctp_packet_config() Greg KH
2010-09-24 16:25 ` [79/80] drm/i915: Ensure that the crtcinfo is populated during mode_fixup() Greg KH
2010-09-24 16:25 ` [80/80] alpha: Fix printk format errors Greg KH
2010-09-24 20:49 ` [00/80] 2.6.35.6 stable review Gene Heskett
2010-09-25 15:02   ` Greg KH
2010-09-25 15:16     ` Gene Heskett
2010-09-25 23:52       ` Gene Heskett
2010-09-25 16:49 ` Piotr Hosowicz
2010-09-25 17:24   ` Greg KH
2010-09-25 17:30     ` Piotr Hosowicz
2010-09-25 17:42       ` Greg KH
2010-09-25 17:52         ` Piotr Hosowicz
2010-09-26 11:32           ` Greg KH
2010-09-26 13:02             ` Piotr Hosowicz
2010-09-26 13:10               ` Sven Joachim
2010-09-26 13:15                 ` Piotr Hosowicz
2010-09-25 17:34     ` Piotr Hosowicz
2010-09-25 17:41       ` Greg KH
2010-09-25 17:47         ` Piotr Hosowicz
2010-09-25 17:49         ` Piotr Hosowicz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100924162616.028002259@clark.site \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable-review@kernel.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).