All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] e2fsprogs: fix deadlock issus in unix_io on write errors
@ 2023-01-31  6:15 Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 1/3] libext2fs: unix_io: add flag which suppresses calling the write error handler Theodore Ts'o
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Theodore Ts'o @ 2023-01-31  6:15 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: zhanchengbin1, linfeilong, Theodore Ts'o

This problem was reported by [1] but the fix was incorrect.  The issue
is that when unix_io was made thread-safe, it was necessary that to
add a CACHE_MUTEX to protect multiple threads from potentially
colliding with the very simple writeback cache used by the unix_io I/O
manager.  The original I/O manager was purposefully kept simple, used
a fixed-size cache; accordingly, the locking used also kept simple,
and used a single global mutex.

[1] https://lore.kernel.org/r/310fb77f-dfed-1196-c4ee-30d5138ee5a2@huawei.com

The problem was that if an application (such as e2fsck) registers a
write error handler, that handler would be called with the CACHE_MUTEX
still held, and if that application tried to do any I/O --- for
example, closing the file system using ext2fs_close() and then exiting
--- the application would deadlock.

The fixes here are good enough for the use case found in e2fsprogs,
and in practice there are very few other users of the error handler
feature in libext2fs.





*** BLURB HERE ***

Theodore Ts'o (3):
  libext2fs: unix_io: add flag which suppresses calling the write error
    handler
  libext2fs: unix_io: fix potential error path deadlock in reuse_cache()
  libext2fs: unix_io: fix_potential error path deadlock in
    flush_cached_blocks()

 lib/ext2fs/unix_io.c | 151 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 126 insertions(+), 25 deletions(-)

-- 
2.31.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] libext2fs: unix_io: add flag which suppresses calling the write error handler
  2023-01-31  6:15 [PATCH 0/3] e2fsprogs: fix deadlock issus in unix_io on write errors Theodore Ts'o
@ 2023-01-31  6:15 ` Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 2/3] libext2fs: unix_io: fix potential error path deadlock in reuse_cache() Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 3/3] libext2fs: unix_io: fix_potential error path deadlock in flush_cached_blocks() Theodore Ts'o
  2 siblings, 0 replies; 4+ messages in thread
From: Theodore Ts'o @ 2023-01-31  6:15 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: zhanchengbin1, linfeilong, Theodore Ts'o

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 lib/ext2fs/unix_io.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c
index e53db333..02d7fe1a 100644
--- a/lib/ext2fs/unix_io.c
+++ b/lib/ext2fs/unix_io.c
@@ -337,10 +337,13 @@ error_unlock:
 	return retval;
 }
 
+#define RAW_WRITE_NO_HANDLER	1
+
 static errcode_t raw_write_blk(io_channel channel,
 			       struct unix_private_data *data,
 			       unsigned long long block,
-			       int count, const void *bufv)
+			       int count, const void *bufv,
+			       int flags)
 {
 	ssize_t		size;
 	ext2_loff_t	location;
@@ -482,7 +485,7 @@ bounce_write:
 error_unlock:
 	mutex_unlock(data, BOUNCE_MTX);
 error_out:
-	if (channel->write_error)
+	if (((flags & RAW_WRITE_NO_HANDLER) == 0) && channel->write_error)
 		retval = (channel->write_error)(channel, block, count, buf,
 						size, actual, retval);
 	return retval;
@@ -580,7 +583,7 @@ static void reuse_cache(io_channel channel, struct unix_private_data *data,
 		 struct unix_cache *cache, unsigned long long block)
 {
 	if (cache->dirty && cache->in_use)
-		raw_write_blk(channel, data, cache->block, 1, cache->buf);
+		raw_write_blk(channel, data, cache->block, 1, cache->buf, 0);
 
 	cache->in_use = 1;
 	cache->dirty = 0;
@@ -616,7 +619,7 @@ static errcode_t flush_cached_blocks(io_channel channel,
 			continue;
 
 		retval = raw_write_blk(channel, data,
-				       cache->block, 1, cache->buf);
+				       cache->block, 1, cache->buf, 0);
 		if (retval)
 			retval2 = retval;
 		else
@@ -1067,10 +1070,10 @@ static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
 
 #ifdef NO_IO_CACHE
-	return raw_write_blk(channel, data, block, count, buf);
+	return raw_write_blk(channel, data, block, count, buf, 0);
 #else
 	if (data->flags & IO_FLAG_NOCACHE)
-		return raw_write_blk(channel, data, block, count, buf);
+		return raw_write_blk(channel, data, block, count, buf, 0);
 	/*
 	 * If we're doing an odd-sized write or a very large write,
 	 * flush out the cache completely and then do a direct write.
@@ -1079,7 +1082,7 @@ static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
 		if ((retval = flush_cached_blocks(channel, data,
 						  FLUSH_INVALIDATE)))
 			return retval;
-		return raw_write_blk(channel, data, block, count, buf);
+		return raw_write_blk(channel, data, block, count, buf, 0);
 	}
 
 	/*
@@ -1089,7 +1092,7 @@ static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
 	 */
 	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
 	if (writethrough)
-		retval = raw_write_blk(channel, data, block, count, buf);
+		retval = raw_write_blk(channel, data, block, count, buf, 0);
 
 	cp = buf;
 	mutex_lock(data, CACHE_MTX);
-- 
2.31.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] libext2fs: unix_io: fix potential error path deadlock in reuse_cache()
  2023-01-31  6:15 [PATCH 0/3] e2fsprogs: fix deadlock issus in unix_io on write errors Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 1/3] libext2fs: unix_io: add flag which suppresses calling the write error handler Theodore Ts'o
@ 2023-01-31  6:15 ` Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 3/3] libext2fs: unix_io: fix_potential error path deadlock in flush_cached_blocks() Theodore Ts'o
  2 siblings, 0 replies; 4+ messages in thread
From: Theodore Ts'o @ 2023-01-31  6:15 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: zhanchengbin1, linfeilong, Theodore Ts'o

This was reported by [1] but the fix was incorrect.  The issue is that
when unix_io was made thread-safe, it was necessary that to add a
CACHE_MUTEX to protect multiple threads from potentially colliding
with the very simple writeback cache used by the unix_io I/O manager.
The original I/O manager was purposefully kept simple, used a
fixed-size cache; accordingly, the locking used also kept simple, and
used a single global mutex.

[1] https://lore.kernel.org/r/310fb77f-dfed-1196-c4ee-30d5138ee5a2@huawei.com

The problem was that if an application (such as e2fsck) registers a
write error handler, that handler would be called with the CACHE_MUTEX
still held, and if that application tried to do any I/O --- for
example, closing the file system using ext2fs_close() and then exiting
--- the application would deadlock.

We should perhaps fix this either by deciding that the simple Unix I/O
cache doesn't actually buy much beyond some system call overhead, or
by putting in a full-fledged buffer I/O cache system which uses a much
larger cache with allocated memory, fine-grained locking and Direct
I/O to prevent double cache at the kernel and userspace level.
However, for now, fix the problem by waiting until after we have
released the CACHE_MUTEX before calling the write handler.  This is
good enough given how e2fsck's ehandler.c use case, and in practice no
one else really uses the error handler in any case.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 lib/ext2fs/unix_io.c | 75 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 69 insertions(+), 6 deletions(-)

diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c
index 02d7fe1a..2e108a2f 100644
--- a/lib/ext2fs/unix_io.c
+++ b/lib/ext2fs/unix_io.c
@@ -94,6 +94,7 @@ struct unix_cache {
 	int			access_time;
 	unsigned		dirty:1;
 	unsigned		in_use:1;
+	unsigned		write_err:1;
 };
 
 #define CACHE_SIZE 8
@@ -579,16 +580,27 @@ static struct unix_cache *find_cached_block(struct unix_private_data *data,
 /*
  * Reuse a particular cache entry for another block.
  */
-static void reuse_cache(io_channel channel, struct unix_private_data *data,
-		 struct unix_cache *cache, unsigned long long block)
+static errcode_t reuse_cache(io_channel channel,
+		struct unix_private_data *data, struct unix_cache *cache,
+		unsigned long long block)
 {
-	if (cache->dirty && cache->in_use)
-		raw_write_blk(channel, data, cache->block, 1, cache->buf, 0);
+	if (cache->dirty && cache->in_use) {
+		errcode_t retval;
+
+		retval = raw_write_blk(channel, data, cache->block, 1,
+				       cache->buf, RAW_WRITE_NO_HANDLER);
+		if (retval) {
+			cache->write_err = 1;
+			return retval;
+		}
+	}
 
 	cache->in_use = 1;
 	cache->dirty = 0;
+	cache->write_err = 0;
 	cache->block = block;
 	cache->access_time = ++data->access_time;
+	return 0;
 }
 
 #define FLUSH_INVALIDATE	0x01
@@ -1037,7 +1049,10 @@ static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
 		/* Save the results in the cache */
 		for (j=0; j < i; j++) {
 			if (!find_cached_block(data, block, &cache)) {
-				reuse_cache(channel, data, cache, block);
+				retval = reuse_cache(channel, data,
+						     cache, block);
+				if (retval)
+					goto call_write_handler;
 				memcpy(cache->buf, cp, channel->block_size);
 			}
 			count--;
@@ -1047,6 +1062,28 @@ static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
 	}
 	mutex_unlock(data, CACHE_MTX);
 	return 0;
+
+call_write_handler:
+	if (cache->write_err && channel->write_error) {
+		char *err_buf = NULL;
+		unsigned long long err_block = cache->block;
+
+		cache->dirty = 0;
+		cache->in_use = 0;
+		cache->write_err = 0;
+		if (io_channel_alloc_buf(channel, 0, &err_buf))
+			err_buf = NULL;
+		else
+			memcpy(err_buf, cache->buf, channel->block_size);
+		mutex_unlock(data, CACHE_MTX);
+		(channel->write_error)(channel, err_block, 1, err_buf,
+				       channel->block_size, -1,
+				       retval);
+		if (err_buf)
+			ext2fs_free_mem(&err_buf);
+	} else
+		mutex_unlock(data, CACHE_MTX);
+	return retval;
 #endif /* NO_IO_CACHE */
 }
 
@@ -1099,8 +1136,12 @@ static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
 	while (count > 0) {
 		cache = find_cached_block(data, block, &reuse);
 		if (!cache) {
+			errcode_t err;
+
 			cache = reuse;
-			reuse_cache(channel, data, cache, block);
+			err = reuse_cache(channel, data, cache, block);
+			if (err)
+				goto call_write_handler;
 		}
 		if (cache->buf != cp)
 			memcpy(cache->buf, cp, channel->block_size);
@@ -1111,6 +1152,28 @@ static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
 	}
 	mutex_unlock(data, CACHE_MTX);
 	return retval;
+
+call_write_handler:
+	if (cache->write_err && channel->write_error) {
+		char *err_buf = NULL;
+		unsigned long long err_block = cache->block;
+
+		cache->dirty = 0;
+		cache->in_use = 0;
+		cache->write_err = 0;
+		if (io_channel_alloc_buf(channel, 0, &err_buf))
+			err_buf = NULL;
+		else
+			memcpy(err_buf, cache->buf, channel->block_size);
+		mutex_unlock(data, CACHE_MTX);
+		(channel->write_error)(channel, err_block, 1, err_buf,
+				       channel->block_size, -1,
+				       retval);
+		if (err_buf)
+			ext2fs_free_mem(&err_buf);
+	} else
+		mutex_unlock(data, CACHE_MTX);
+	return retval;
 #endif /* NO_IO_CACHE */
 }
 
-- 
2.31.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] libext2fs: unix_io: fix_potential error path deadlock in flush_cached_blocks()
  2023-01-31  6:15 [PATCH 0/3] e2fsprogs: fix deadlock issus in unix_io on write errors Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 1/3] libext2fs: unix_io: add flag which suppresses calling the write error handler Theodore Ts'o
  2023-01-31  6:15 ` [PATCH 2/3] libext2fs: unix_io: fix potential error path deadlock in reuse_cache() Theodore Ts'o
@ 2023-01-31  6:15 ` Theodore Ts'o
  2 siblings, 0 replies; 4+ messages in thread
From: Theodore Ts'o @ 2023-01-31  6:15 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: zhanchengbin1, linfeilong, Theodore Ts'o

We can't call the error handler while holding the CACHE_MUTEX (see
previous commit, "libext2fs: unix_io: fix_potential error path
deadlock in reuse_cache()" for details), so first try to write out all
of the dirty blocks in the cache, and then for those where we had
errors, then call the error handler.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 lib/ext2fs/unix_io.c | 61 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c
index 2e108a2f..353d85af 100644
--- a/lib/ext2fs/unix_io.c
+++ b/lib/ext2fs/unix_io.c
@@ -614,31 +614,66 @@ static errcode_t flush_cached_blocks(io_channel channel,
 				     int flags)
 {
 	struct unix_cache	*cache;
-	errcode_t		retval, retval2;
+	errcode_t		retval, retval2 = 0;
 	int			i;
+	int			errors_found = 0;
 
-	retval2 = 0;
 	if ((flags & FLUSH_NOLOCK) == 0)
 		mutex_lock(data, CACHE_MTX);
 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
-		if (!cache->in_use)
+		if (!cache->in_use || !cache->dirty)
 			continue;
-
-		if (flags & FLUSH_INVALIDATE)
-			cache->in_use = 0;
-
-		if (!cache->dirty)
-			continue;
-
 		retval = raw_write_blk(channel, data,
-				       cache->block, 1, cache->buf, 0);
-		if (retval)
+				       cache->block, 1, cache->buf,
+				       RAW_WRITE_NO_HANDLER);
+		if (retval) {
+			cache->write_err = 1;
+			errors_found = 1;
 			retval2 = retval;
-		else
+		} else {
 			cache->dirty = 0;
+			cache->write_err = 0;
+			if (flags & FLUSH_INVALIDATE)
+				cache->in_use = 0;
+		}
 	}
 	if ((flags & FLUSH_NOLOCK) == 0)
 		mutex_unlock(data, CACHE_MTX);
+retry:
+	while (errors_found) {
+		if ((flags & FLUSH_NOLOCK) == 0)
+			mutex_lock(data, CACHE_MTX);
+		errors_found = 0;
+		for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
+			if (!cache->in_use || !cache->write_err)
+				continue;
+			errors_found = 1;
+			if (cache->write_err && channel->write_error) {
+				char *err_buf = NULL;
+				unsigned long long err_block = cache->block;
+
+				cache->dirty = 0;
+				cache->in_use = 0;
+				cache->write_err = 0;
+				if (io_channel_alloc_buf(channel, 0,
+							 &err_buf))
+					err_buf = NULL;
+				else
+					memcpy(err_buf, cache->buf,
+					       channel->block_size);
+				mutex_unlock(data, CACHE_MTX);
+				(channel->write_error)(channel, err_block,
+					1, err_buf, channel->block_size, -1,
+					retval2);
+				if (err_buf)
+					ext2fs_free_mem(&err_buf);
+				goto retry;
+			} else
+				cache->write_err = 0;
+		}
+		if ((flags & FLUSH_NOLOCK) == 0)
+			mutex_unlock(data, CACHE_MTX);
+	}
 	return retval2;
 }
 #endif /* NO_IO_CACHE */
-- 
2.31.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-01-31  6:16 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-31  6:15 [PATCH 0/3] e2fsprogs: fix deadlock issus in unix_io on write errors Theodore Ts'o
2023-01-31  6:15 ` [PATCH 1/3] libext2fs: unix_io: add flag which suppresses calling the write error handler Theodore Ts'o
2023-01-31  6:15 ` [PATCH 2/3] libext2fs: unix_io: fix potential error path deadlock in reuse_cache() Theodore Ts'o
2023-01-31  6:15 ` [PATCH 3/3] libext2fs: unix_io: fix_potential error path deadlock in flush_cached_blocks() Theodore Ts'o

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.