From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org, jane.chu@oracle.com
Cc: linux-xfs@vger.kernel.org, hch@infradead.org,
dan.j.williams@intel.com, linux-fsdevel@vger.kernel.org
Subject: [PATCH 5/5] ext4: implement FALLOC_FL_ZEROINIT_RANGE
Date: Fri, 17 Sep 2021 18:31:12 -0700 [thread overview]
Message-ID: <163192867220.417973.4913917281472586603.stgit@magnolia> (raw)
In-Reply-To: <163192864476.417973.143014658064006895.stgit@magnolia>
From: Darrick J. Wong <djwong@kernel.org>
Implement this new fallocate mode so that persistent memory users can,
upon receipt of a pmem poison notification, cause the pmem to be
reinitialized to a known value (zero) and clear any hardware poison
state that might be lurking.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/ext4/extents.c | 93 +++++++++++++++++++++++++++++++++++++++++++
include/trace/events/ext4.h | 7 +++
2 files changed, 99 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c0de30f25185..c345002e2da6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -29,6 +29,7 @@
#include <linux/fiemap.h>
#include <linux/backing-dev.h>
#include <linux/iomap.h>
+#include <linux/dax.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
#include "xattr.h"
@@ -4475,6 +4476,90 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
+static long ext4_zeroinit_range(struct file *file, loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = inode->i_mapping;
+ handle_t *handle = NULL;
+ loff_t end = offset + len;
+ long ret;
+
+ trace_ext4_zeroinit_range(inode, offset, len,
+ FALLOC_FL_ZEROINIT_RANGE | FALLOC_FL_KEEP_SIZE);
+
+ /* We don't support data=journal mode */
+ if (ext4_should_journal_data(inode))
+ return -EOPNOTSUPP;
+
+ inode_lock(inode);
+
+ /*
+ * Indirect files do not support unwritten extents
+ */
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+ ret = -EOPNOTSUPP;
+ goto out_mutex;
+ }
+
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ inode_dio_wait(inode);
+
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ filemap_invalidate_lock(mapping);
+
+ ret = ext4_break_layouts(inode);
+ if (ret)
+ goto out_mmap;
+
+ /* Now release the pages and zero block aligned part of pages */
+ truncate_pagecache_range(inode, offset, end - 1);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
+
+ if (IS_DAX(inode))
+ ret = dax_zeroinit_range(inode, offset, len,
+ &ext4_iomap_report_ops);
+ else
+ ret = iomap_zeroout_range(inode, offset, len,
+ &ext4_iomap_report_ops);
+ if (ret == -ECANCELED)
+ ret = -EOPNOTSUPP;
+ if (ret)
+ goto out_mmap;
+
+ /*
+ * In worst case we have to writeout two nonadjacent unwritten
+ * blocks and update the inode
+ */
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ ext4_std_error(inode->i_sb, ret);
+ goto out_mmap;
+ }
+
+ inode->i_mtime = inode->i_ctime = current_time(inode);
+ ret = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(ret))
+ goto out_handle;
+ ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
+ (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+
+ if (file->f_flags & O_SYNC)
+ ext4_handle_sync(handle);
+
+out_handle:
+ ext4_journal_stop(handle);
+out_mmap:
+ filemap_invalidate_unlock(mapping);
+out_mutex:
+ inode_unlock(inode);
+ return ret;
+}
+
static long ext4_zero_range(struct file *file, loff_t offset,
loff_t len, int mode)
{
@@ -4659,7 +4744,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
- FALLOC_FL_INSERT_RANGE))
+ FALLOC_FL_INSERT_RANGE | FALLOC_FL_ZEROINIT_RANGE))
return -EOPNOTSUPP;
ext4_fc_start_update(inode);
@@ -4687,6 +4772,12 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
ret = ext4_zero_range(file, offset, len, mode);
goto exit;
}
+
+ if (mode & FALLOC_FL_ZEROINIT_RANGE) {
+ ret = ext4_zeroinit_range(file, offset, len);
+ goto exit;
+ }
+
trace_ext4_fallocate_enter(inode, offset, len, mode);
lblk = offset >> blkbits;
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 0ea36b2b0662..282f1208067f 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1407,6 +1407,13 @@ DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range,
TP_ARGS(inode, offset, len, mode)
);
+DEFINE_EVENT(ext4__fallocate_mode, ext4_zeroinit_range,
+
+ TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
+
+ TP_ARGS(inode, offset, len, mode)
+);
+
TRACE_EVENT(ext4_fallocate_exit,
TP_PROTO(struct inode *inode, loff_t offset,
unsigned int max_blocks, int ret),
next prev parent reply other threads:[~2021-09-18 1:31 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-18 1:30 [PATCHSET RFC v2 jane 0/5] vfs: enable userspace to reset damaged file storage Darrick J. Wong
2021-09-18 1:30 ` [PATCH 1/5] dax: prepare pmem for use by zero-initializing contents and clearing poisons Darrick J. Wong
2021-09-18 16:54 ` riteshh
2021-09-20 17:22 ` Darrick J. Wong
2021-09-21 4:07 ` riteshh
2021-09-22 18:26 ` Darrick J. Wong
2021-09-22 19:47 ` riteshh
2021-09-22 20:26 ` Dan Williams
2021-09-21 8:34 ` Christoph Hellwig
2021-09-22 18:10 ` Darrick J. Wong
2021-09-18 1:30 ` [PATCH 2/5] iomap: use accelerated zeroing on a block device to zero a file range Darrick J. Wong
2021-09-18 16:55 ` riteshh
2021-09-21 8:29 ` Christoph Hellwig
2021-09-22 18:53 ` Darrick J. Wong
2021-09-21 22:33 ` Dave Chinner
2021-09-22 18:54 ` Darrick J. Wong
2021-09-18 1:31 ` [PATCH 3/5] vfs: add a zero-initialization mode to fallocate Darrick J. Wong
2021-09-18 16:58 ` riteshh
2021-09-20 17:52 ` Eric Biggers
2021-09-20 18:06 ` Darrick J. Wong
2021-09-21 0:44 ` Dave Chinner
2021-09-21 8:31 ` Christoph Hellwig
2021-09-22 2:16 ` Dan Williams
2021-09-22 2:38 ` Darrick J. Wong
2021-09-22 3:59 ` Dave Chinner
2021-09-22 4:13 ` Darrick J. Wong
2021-09-22 5:49 ` Dave Chinner
2021-09-22 21:27 ` Darrick J. Wong
2021-09-23 0:02 ` Darrick J. Wong
2021-09-23 0:44 ` Darrick J. Wong
2021-09-23 1:42 ` Dave Chinner
2021-09-23 2:43 ` Dan Williams
2021-09-23 5:42 ` Dan Williams
2021-09-23 22:54 ` Dave Chinner
2021-09-24 1:18 ` Dan Williams
2021-09-24 1:21 ` Jane Chu
2021-09-24 1:35 ` Darrick J. Wong
2021-09-27 21:07 ` Dave Chinner
2021-09-27 21:57 ` Jane Chu
2021-09-28 0:08 ` Dan Williams
2021-09-22 5:28 ` riteshh
2021-09-18 1:31 ` [PATCH 4/5] xfs: implement FALLOC_FL_ZEROINIT_RANGE Darrick J. Wong
2021-09-18 1:31 ` Darrick J. Wong [this message]
2021-09-18 17:07 ` [PATCH 5/5] ext4: " riteshh
2021-09-20 18:11 ` Darrick J. Wong
2021-09-21 6:10 ` riteshh
2021-09-18 18:05 ` [PATCHSET RFC v2 jane 0/5] vfs: enable userspace to reset damaged file storage Dan Williams
2021-09-23 0:51 ` Darrick J. Wong
2021-09-23 1:17 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=163192867220.417973.4913917281472586603.stgit@magnolia \
--to=djwong@kernel.org \
--cc=dan.j.williams@intel.com \
--cc=hch@infradead.org \
--cc=jane.chu@oracle.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).