stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Mike Galbraith <efault@gmx.de>, Jakub Kicinski <kuba@kernel.org>,
	Sasha Levin <sashal@kernel.org>,
	davem@davemloft.net, yoshfuji@linux-ipv6.org, dsahern@kernel.org,
	pabeni@redhat.com, netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 5.10 25/65] tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.
Date: Fri,  1 Apr 2022 10:41:26 -0400	[thread overview]
Message-ID: <20220401144206.1953700-25-sashal@kernel.org> (raw)
In-Reply-To: <20220401144206.1953700-1-sashal@kernel.org>

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

[ Upstream commit 4f9bf2a2f5aacf988e6d5e56b961ba45c5a25248 ]

Commit
   9652dc2eb9e40 ("tcp: relax listening_hash operations")

removed the need to disable bottom half while acquiring
listening_hash.lock. There are still two callers left which disable
bottom half before the lock is acquired.

On PREEMPT_RT the softirqs are preemptible and local_bh_disable() acts
as a lock to ensure that resources, that are protected by disabling
bottom halves, remain protected.
This leads to a circular locking dependency if the lock acquired with
disabled bottom halves is also acquired with enabled bottom halves
followed by disabling bottom halves. This is the reverse locking order.
It has been observed with inet_listen_hashbucket::lock:

local_bh_disable() + spin_lock(&ilb->lock):
  inet_listen()
    inet_csk_listen_start()
      sk->sk_prot->hash() := inet_hash()
	local_bh_disable()
	__inet_hash()
	  spin_lock(&ilb->lock);
	    acquire(&ilb->lock);

Reverse order: spin_lock(&ilb2->lock) + local_bh_disable():
  tcp_seq_next()
    listening_get_next()
      spin_lock(&ilb2->lock);
	acquire(&ilb2->lock);

  tcp4_seq_show()
    get_tcp4_sock()
      sock_i_ino()
	read_lock_bh(&sk->sk_callback_lock);
	  acquire(softirq_ctrl)	// <---- whoops
	  acquire(&sk->sk_callback_lock)

Drop local_bh_disable() around __inet_hash() which acquires
listening_hash->lock. Split inet_unhash() and acquire the
listen_hashbucket lock without disabling bottom halves; the inet_ehash
lock with disabled bottom halves.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de
Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net
Link: https://lore.kernel.org/r/YgQOebeZ10eNx1W6@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/ipv4/inet_hashtables.c  | 53 ++++++++++++++++++++++---------------
 net/ipv6/inet6_hashtables.c |  5 +---
 2 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e093847c334d..915b8e1bd9ef 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
 	int err = 0;
 
 	if (sk->sk_state != TCP_LISTEN) {
+		local_bh_disable();
 		inet_ehash_nolisten(sk, osk, NULL);
+		local_bh_enable();
 		return 0;
 	}
 	WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
 {
 	int err = 0;
 
-	if (sk->sk_state != TCP_CLOSE) {
-		local_bh_disable();
+	if (sk->sk_state != TCP_CLOSE)
 		err = __inet_hash(sk, NULL);
-		local_bh_enable();
-	}
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_hash);
 
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
 {
-	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	struct inet_listen_hashbucket *ilb = NULL;
-	spinlock_t *lock;
-
 	if (sk_unhashed(sk))
 		return;
 
-	if (sk->sk_state == TCP_LISTEN) {
-		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &ilb->lock;
-	} else {
-		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-	}
-	spin_lock_bh(lock);
-	if (sk_unhashed(sk))
-		goto unlock;
-
 	if (rcu_access_pointer(sk->sk_reuseport_cb))
 		reuseport_detach_sock(sk);
 	if (ilb) {
+		struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
 		inet_unhash2(hashinfo, sk);
 		ilb->count--;
 	}
 	__sk_nulls_del_node_init_rcu(sk);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
-	spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+	if (sk_unhashed(sk))
+		return;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		struct inet_listen_hashbucket *ilb;
+
+		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+		/* Don't disable bottom halves while acquiring the lock to
+		 * avoid circular locking dependency on PREEMPT_RT.
+		 */
+		spin_lock(&ilb->lock);
+		__inet_unhash(sk, ilb);
+		spin_unlock(&ilb->lock);
+	} else {
+		spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+		spin_lock_bh(lock);
+		__inet_unhash(sk, NULL);
+		spin_unlock_bh(lock);
+	}
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 67c9114835c8..0a2e7f228391 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
 {
 	int err = 0;
 
-	if (sk->sk_state != TCP_CLOSE) {
-		local_bh_disable();
+	if (sk->sk_state != TCP_CLOSE)
 		err = __inet_hash(sk, NULL);
-		local_bh_enable();
-	}
 
 	return err;
 }
-- 
2.34.1


  parent reply	other threads:[~2022-04-01 15:02 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-01 14:41 [PATCH AUTOSEL 5.10 01/65] drm: Add orientation quirk for GPD Win Max Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 02/65] ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111 Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 03/65] drm/amd/display: Add signal type check when verify stream backends same Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 04/65] drm/amd/amdgpu/amdgpu_cs: fix refcount leak of a dma_fence obj Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 05/65] usb: gadget: tegra-xudc: Do not program SPARAM Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 06/65] usb: gadget: tegra-xudc: Fix control endpoint's definitions Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 07/65] ptp: replace snprintf with sysfs_emit Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 08/65] powerpc: dts: t104xrdb: fix phy type for FMAN 4/5 Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 09/65] ath11k: fix kernel panic during unload/load ath11k modules Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 10/65] ath11k: mhi: use mhi_sync_power_up() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 11/65] bpf: Make dst_port field in struct bpf_sock 16-bit wide Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 12/65] scsi: mvsas: Replace snprintf() with sysfs_emit() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 13/65] scsi: bfa: " Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 14/65] power: supply: axp20x_battery: properly report current when discharging Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 15/65] mt76: dma: initialize skip_unmap in mt76_dma_rx_fill Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 16/65] cfg80211: don't add non transmitted BSS to 6GHz scanned channels Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 17/65] libbpf: Fix build issue with llvm-readelf Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 18/65] ipv6: make mc_forwarding atomic Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 19/65] powerpc: Set crashkernel offset to mid of RMA region Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 20/65] drm/amdgpu: Fix recursive locking warning Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 21/65] PCI: aardvark: Fix support for MSI interrupts Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 22/65] iommu/arm-smmu-v3: fix event handling soft lockup Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 23/65] usb: ehci: add pci device support for Aspeed platforms Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 24/65] PCI: endpoint: Fix alignment fault error in copy tests Sasha Levin
2022-04-01 14:41 ` Sasha Levin [this message]
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 26/65] PCI: pciehp: Add Qualcomm quirk for Command Completed erratum Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 27/65] power: supply: axp288-charger: Set Vhold to 4.4V Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 28/65] iwlwifi: mvm: Correctly set fragmented EBS Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 29/65] ipv4: Invalidate neighbour for broadcast address upon address addition Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 30/65] dm ioctl: prevent potential spectre v1 gadget Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 31/65] dm: requeue IO if mapping table not yet available Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 32/65] drm/amdkfd: make CRAT table missing message informational only Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 33/65] scsi: pm8001: Fix pm80xx_pci_mem_copy() interface Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 34/65] scsi: pm8001: Fix pm8001_mpi_task_abort_resp() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 35/65] scsi: pm8001: Fix task leak in pm8001_send_abort_all() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 36/65] scsi: pm8001: Fix tag leaks on error Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 37/65] scsi: pm8001: Fix memory leak in pm8001_chip_fw_flash_update_req() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 38/65] mt76: mt7615: Fix assigning negative values to unsigned variable Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 39/65] scsi: aha152x: Fix aha152x_setup() __setup handler return value Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 40/65] scsi: hisi_sas: Free irq vectors in order for v3 HW Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 41/65] net/smc: correct settings of RMB window update limit Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 42/65] mips: ralink: fix a refcount leak in ill_acc_of_setup() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 43/65] macvtap: advertise link netns via netlink Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 44/65] tuntap: add sanity checks about msg_controllen in sendmsg Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 45/65] iommu/iova: Improve 32-bit free space estimate Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 46/65] Bluetooth: Fix not checking for valid hdev on bt_dev_{info,warn,err,dbg} Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 47/65] Bluetooth: use memset avoid memory leaks Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 48/65] bnxt_en: Eliminate unintended link toggle during FW reset Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 49/65] PCI: endpoint: Fix misused goto label Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 50/65] MIPS: fix fortify panic when copying asm exception handlers Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 51/65] powerpc/code-patching: Pre-map patch area Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 52/65] powerpc/secvar: fix refcount leak in format_show() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 53/65] scsi: libfc: Fix use after free in fc_exch_abts_resp() Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 54/65] can: isotp: set default value for N_As to 50 micro seconds Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 55/65] net: account alternate interface name memory Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 56/65] net: limit altnames to 64k total Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 57/65] net: sfp: add 2500base-X quirk for Lantech SFP module Sasha Levin
2022-04-01 14:41 ` [PATCH AUTOSEL 5.10 58/65] usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evm Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 59/65] xtensa: fix DTC warning unit_address_format Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 60/65] MIPS: ingenic: correct unit node address Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 61/65] Bluetooth: Fix use after free in hci_send_acl Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 62/65] netlabel: fix out-of-bounds memory accesses Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 63/65] ceph: fix memory leak in ceph_readdir when note_last_dentry returns error Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 64/65] init/main.c: return 1 from handled __setup() functions Sasha Levin
2022-04-01 14:42 ` [PATCH AUTOSEL 5.10 65/65] minix: fix bug when opening a file with O_DIRECT Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220401144206.1953700-25-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=efault@gmx.de \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=stable@vger.kernel.org \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).