linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, "zhengbin (A)" <zhengbin13@huawei.com>,
	Al Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH 4.19 71/81] Fix the locking in dcache_readdir() and friends
Date: Wed, 16 Oct 2019 14:51:22 -0700	[thread overview]
Message-ID: <20191016214846.589242144@linuxfoundation.org> (raw)
In-Reply-To: <20191016214805.727399379@linuxfoundation.org>

From: Al Viro <viro@zeniv.linux.org.uk>

commit d4f4de5e5ef8efde85febb6876cd3c8ab1631999 upstream.

There are two problems in dcache_readdir() - one is that lockless traversal
of the list needs non-trivial cooperation of d_alloc() (at least a switch
to list_add_rcu(), and probably more than just that) and another is that
it assumes that no removal will happen without the directory locked exclusive.
Said assumption had always been there, never had been stated explicitly and
is violated by several places in the kernel (devpts and selinuxfs).

        * replacement of next_positive() with different calling conventions:
it returns struct list_head * instead of struct dentry *; the latter is
passed in and out by reference, grabbing the result and dropping the original
value.
        * scan is under ->d_lock.  If we run out of timeslice, cursor is moved
after the last position we'd reached and we reschedule; then the scan continues
from that place.  To avoid livelocks between multiple lseek() (with cursors
getting moved past each other, never reaching the real entries) we always
skip the cursors, need_resched() or not.
        * returned list_head * is either ->d_child of dentry we'd found or
->d_subdirs of parent (if we got to the end of the list).
        * dcache_readdir() and dcache_dir_lseek() switched to new helper.
dcache_readdir() always holds a reference to dentry passed to dir_emit() now.
Cursor is moved to just before the entry where dir_emit() has failed or into
the very end of the list, if we'd run out.
        * move_cursor() eliminated - it had sucky calling conventions and
after fixing that it became simply list_move() (in lseek and scan_positives)
or list_move_tail() (in readdir).

        All operations with the list are under ->d_lock now, and we do not
depend upon having all file removals done with parent locked exclusive
anymore.

Cc: stable@vger.kernel.org
Reported-by: "zhengbin (A)" <zhengbin13@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 fs/libfs.c |  134 +++++++++++++++++++++++++++++++------------------------------
 1 file changed, 69 insertions(+), 65 deletions(-)

--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -86,58 +86,47 @@ int dcache_dir_close(struct inode *inode
 EXPORT_SYMBOL(dcache_dir_close);
 
 /* parent is locked at least shared */
-static struct dentry *next_positive(struct dentry *parent,
-				    struct list_head *from,
-				    int count)
+/*
+ * Returns an element of siblings' list.
+ * We are looking for <count>th positive after <p>; if
+ * found, dentry is grabbed and passed to caller via *<res>.
+ * If no such element exists, the anchor of list is returned
+ * and *<res> is set to NULL.
+ */
+static struct list_head *scan_positives(struct dentry *cursor,
+					struct list_head *p,
+					loff_t count,
+					struct dentry **res)
 {
-	unsigned *seq = &parent->d_inode->i_dir_seq, n;
-	struct dentry *res;
-	struct list_head *p;
-	bool skipped;
-	int i;
+	struct dentry *dentry = cursor->d_parent, *found = NULL;
 
-retry:
-	i = count;
-	skipped = false;
-	n = smp_load_acquire(seq) & ~1;
-	res = NULL;
-	rcu_read_lock();
-	for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+	spin_lock(&dentry->d_lock);
+	while ((p = p->next) != &dentry->d_subdirs) {
 		struct dentry *d = list_entry(p, struct dentry, d_child);
-		if (!simple_positive(d)) {
-			skipped = true;
-		} else if (!--i) {
-			res = d;
-			break;
+		// we must at least skip cursors, to avoid livelocks
+		if (d->d_flags & DCACHE_DENTRY_CURSOR)
+			continue;
+		if (simple_positive(d) && !--count) {
+			spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+			if (simple_positive(d))
+				found = dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			if (likely(found))
+				break;
+			count = 1;
+		}
+		if (need_resched()) {
+			list_move(&cursor->d_child, p);
+			p = &cursor->d_child;
+			spin_unlock(&dentry->d_lock);
+			cond_resched();
+			spin_lock(&dentry->d_lock);
 		}
 	}
-	rcu_read_unlock();
-	if (skipped) {
-		smp_rmb();
-		if (unlikely(*seq != n))
-			goto retry;
-	}
-	return res;
-}
-
-static void move_cursor(struct dentry *cursor, struct list_head *after)
-{
-	struct dentry *parent = cursor->d_parent;
-	unsigned n, *seq = &parent->d_inode->i_dir_seq;
-	spin_lock(&parent->d_lock);
-	for (;;) {
-		n = *seq;
-		if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
-			break;
-		cpu_relax();
-	}
-	__list_del(cursor->d_child.prev, cursor->d_child.next);
-	if (after)
-		list_add(&cursor->d_child, after);
-	else
-		list_add_tail(&cursor->d_child, &parent->d_subdirs);
-	smp_store_release(seq, n + 2);
-	spin_unlock(&parent->d_lock);
+	spin_unlock(&dentry->d_lock);
+	dput(*res);
+	*res = found;
+	return p;
 }
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@@ -153,17 +142,28 @@ loff_t dcache_dir_lseek(struct file *fil
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
+		struct dentry *cursor = file->private_data;
+		struct dentry *to = NULL;
+		struct list_head *p;
+
 		file->f_pos = offset;
-		if (file->f_pos >= 2) {
-			struct dentry *cursor = file->private_data;
-			struct dentry *to;
-			loff_t n = file->f_pos - 2;
-
-			inode_lock_shared(dentry->d_inode);
-			to = next_positive(dentry, &dentry->d_subdirs, n);
-			move_cursor(cursor, to ? &to->d_child : NULL);
-			inode_unlock_shared(dentry->d_inode);
+		inode_lock_shared(dentry->d_inode);
+
+		if (file->f_pos > 2) {
+			p = scan_positives(cursor, &dentry->d_subdirs,
+					   file->f_pos - 2, &to);
+			spin_lock(&dentry->d_lock);
+			list_move(&cursor->d_child, p);
+			spin_unlock(&dentry->d_lock);
+		} else {
+			spin_lock(&dentry->d_lock);
+			list_del_init(&cursor->d_child);
+			spin_unlock(&dentry->d_lock);
 		}
+
+		dput(to);
+
+		inode_unlock_shared(dentry->d_inode);
 	}
 	return offset;
 }
@@ -185,25 +185,29 @@ int dcache_readdir(struct file *file, st
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct dentry *cursor = file->private_data;
-	struct list_head *p = &cursor->d_child;
-	struct dentry *next;
-	bool moved = false;
+	struct list_head *anchor = &dentry->d_subdirs;
+	struct dentry *next = NULL;
+	struct list_head *p;
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
 
 	if (ctx->pos == 2)
-		p = &dentry->d_subdirs;
-	while ((next = next_positive(dentry, p, 1)) != NULL) {
+		p = anchor;
+	else
+		p = &cursor->d_child;
+
+	while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
 		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
 			      d_inode(next)->i_ino, dt_type(d_inode(next))))
 			break;
-		moved = true;
-		p = &next->d_child;
 		ctx->pos++;
 	}
-	if (moved)
-		move_cursor(cursor, p);
+	spin_lock(&dentry->d_lock);
+	list_move_tail(&cursor->d_child, p);
+	spin_unlock(&dentry->d_lock);
+	dput(next);
+
 	return 0;
 }
 EXPORT_SYMBOL(dcache_readdir);



  parent reply	other threads:[~2019-10-16 22:08 UTC|newest]

Thread overview: 104+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-16 21:50 [PATCH 4.19 00/81] 4.19.80-stable review Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 01/81] panic: ensure preemption is disabled during panic() Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 02/81] f2fs: use EINVAL for superblock with invalid magic Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 03/81] USB: rio500: Remove Rio 500 kernel driver Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 04/81] USB: yurex: Dont retry on unexpected errors Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 05/81] USB: yurex: fix NULL-derefs on disconnect Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 06/81] USB: usb-skeleton: fix runtime PM after driver unbind Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 07/81] USB: usb-skeleton: fix NULL-deref on disconnect Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 08/81] xhci: Fix false warning message about wrong bounce buffer write length Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 09/81] xhci: Prevent device initiated U1/U2 link pm if exit latency is too long Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 10/81] xhci: Check all endpoints for LPM timeout Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 11/81] xhci: Fix USB 3.1 capability detection on early xHCI 1.1 spec based hosts Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 12/81] usb: xhci: wait for CNR controller not ready bit in xhci resume Greg Kroah-Hartman
2019-10-18 17:28   ` Pavel Machek
2019-10-16 21:50 ` [PATCH 4.19 13/81] xhci: Prevent deadlock when xhci adapter breaks during init Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 14/81] xhci: Increase STS_SAVE timeout in xhci_suspend() Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 15/81] USB: adutux: fix use-after-free on disconnect Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 16/81] USB: adutux: fix NULL-derefs " Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 17/81] USB: adutux: fix use-after-free on release Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 18/81] USB: iowarrior: fix use-after-free on disconnect Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 19/81] USB: iowarrior: fix use-after-free on release Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 20/81] USB: iowarrior: fix use-after-free after driver unbind Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 21/81] USB: usblp: fix runtime PM " Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 22/81] USB: chaoskey: fix use-after-free on release Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 23/81] USB: ldusb: fix NULL-derefs on driver unbind Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 24/81] serial: uartlite: fix exit path null pointer Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 25/81] USB: serial: keyspan: fix NULL-derefs on open() and write() Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 26/81] USB: serial: ftdi_sio: add device IDs for Sienna and Echelon PL-20 Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 27/81] USB: serial: option: add Telit FN980 compositions Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 28/81] USB: serial: option: add support for Cinterion CLS8 devices Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 29/81] USB: serial: fix runtime PM after driver unbind Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 30/81] USB: usblcd: fix I/O after disconnect Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 31/81] USB: microtek: fix info-leak at probe Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 32/81] USB: dummy-hcd: fix power budget for SuperSpeed mode Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 33/81] usb: renesas_usbhs: gadget: Do not discard queues in usb_ep_set_{halt,wedge}() Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 34/81] usb: renesas_usbhs: gadget: Fix usb_ep_set_{halt,wedge}() behavior Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 35/81] USB: legousbtower: fix slab info leak at probe Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 36/81] USB: legousbtower: fix deadlock on disconnect Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 37/81] USB: legousbtower: fix potential NULL-deref " Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 38/81] USB: legousbtower: fix open after failed reset request Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 39/81] USB: legousbtower: fix use-after-free on release Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 40/81] mei: me: add comet point (lake) LP device ids Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 41/81] mei: avoid FW version request on Ibex Peak and earlier Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 42/81] gpio: eic: sprd: Fix the incorrect EIC offset when toggling Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 43/81] Staging: fbtft: fix memory leak in fbtft_framebuffer_alloc Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 44/81] staging: vt6655: Fix memory leak in vt6655_probe Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 45/81] iio: adc: hx711: fix bug in sampling of data Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 46/81] iio: adc: ad799x: fix probe error handling Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 47/81] iio: adc: axp288: Override TS pin bias current for some models Greg Kroah-Hartman
2019-10-16 21:50 ` [PATCH 4.19 48/81] iio: light: opt3001: fix mutex unlock race Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 49/81] efivar/ssdt: Dont iterate over EFI vars if no SSDT override was specified Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 50/81] perf llvm: Dont access out-of-scope array Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 51/81] perf inject jit: Fix JIT_CODE_MOVE filename Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 52/81] blk-wbt: fix performance regression in wbt scale_up/scale_down Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 53/81] CIFS: Gracefully handle QueryInfo errors during open Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 54/81] CIFS: Force revalidate inode when dentry is stale Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 55/81] CIFS: Force reval dentry if LOOKUP_REVAL flag is set Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 56/81] kernel/sysctl.c: do not override max_threads provided by userspace Greg Kroah-Hartman
2019-10-17 10:59   ` Pavel Machek
2019-10-17 11:05     ` Michal Hocko
2019-10-17 11:25       ` David Laight
2019-10-17 11:39         ` Michal Hocko
2019-11-18 15:25       ` Pavel Machek
2019-11-18 15:52         ` Michal Hocko
2019-10-16 21:51 ` [PATCH 4.19 57/81] mm/vmpressure.c: fix a signedness bug in vmpressure_register_event() Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 58/81] firmware: google: increment VPD key_len properly Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 59/81] gpiolib: dont clear FLAG_IS_OUT when emulating open-drain/open-source Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 60/81] iio: adc: stm32-adc: move registers definitions Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 61/81] iio: adc: stm32-adc: fix a race when using several adcs with dma and irq Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 62/81] cifs: use cifsInodeInfo->open_file_lock while iterating to avoid a panic Greg Kroah-Hartman
2019-10-17  8:55   ` Pavel Machek
2019-10-17 16:01     ` Greg Kroah-Hartman
2019-10-17 17:19       ` Sasha Levin
2019-10-16 21:51 ` [PATCH 4.19 63/81] btrfs: fix incorrect updating of log root tree Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 64/81] btrfs: fix uninitialized ret in ref-verify Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 65/81] NFS: Fix O_DIRECT accounting of number of bytes read/written Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 66/81] MIPS: Disable Loongson MMI instructions for kernel build Greg Kroah-Hartman
2020-08-26 21:06   ` Guenter Roeck
2020-09-03  9:26     ` Greg Kroah-Hartman
2020-09-07  3:35       ` Philippe Mathieu-Daudé
2020-09-24 13:54         ` Thomas Bogendoerfer
2019-10-16 21:51 ` [PATCH 4.19 67/81] MIPS: elf_hwcap: Export userspace ASEs Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 68/81] ACPICA: ACPI 6.3: PPTT add additional fields in Processor Structure Flags Greg Kroah-Hartman
2019-10-17  8:59   ` Pavel Machek
2019-10-17 15:59     ` Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 69/81] ACPI/PPTT: Add support for ACPI 6.3 thread flag Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 70/81] arm64: topology: Use PPTT to determine if PE is a thread Greg Kroah-Hartman
2019-10-16 21:51 ` Greg Kroah-Hartman [this message]
2019-10-16 21:51 ` [PATCH 4.19 72/81] media: stkwebcam: fix runtime PM after driver unbind Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 73/81] arm64/sve: Fix wrong free for task->thread.sve_state Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 74/81] tracing/hwlat: Report total time spent in all NMIs during the sample Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 75/81] tracing/hwlat: Dont ignore outer-loop duration when calculating max_latency Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 76/81] ftrace: Get a reference counter for the trace_array on filter files Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 77/81] tracing: Get trace_array reference for available_tracers files Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 78/81] hwmon: Fix HWMON_P_MIN_ALARM mask Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 79/81] x86/asm: Fix MWAITX C-state hint value Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 80/81] PCI: vmd: Fix config addressing when using bus offsets Greg Kroah-Hartman
2019-10-16 21:51 ` [PATCH 4.19 81/81] perf/hw_breakpoint: Fix arch_hw_breakpoint use-before-initialization Greg Kroah-Hartman
2019-10-17  4:42 ` [PATCH 4.19 00/81] 4.19.80-stable review kernelci.org bot
2019-10-17 13:50 ` Naresh Kamboju
2019-10-17 15:02 ` shuah
2019-10-17 18:04 ` Guenter Roeck
2019-10-17 18:35 ` Didik Setiawan
2019-10-18  8:00 ` Jon Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191016214846.589242144@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=zhengbin13@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).