linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
@ 2020-06-11  3:16 Daeho Jeong
  2020-06-11  8:54 ` [f2fs-dev] " Chao Yu
  2020-06-11 16:27 ` Eric Biggers
  0 siblings, 2 replies; 9+ messages in thread
From: Daeho Jeong @ 2020-06-11  3:16 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team; +Cc: Daeho Jeong

From: Daeho Jeong <daehojeong@google.com>

Added a new ioctl to send discard commands or/and zero out
to whole data area of a regular file for security reason.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
---
 fs/f2fs/f2fs.h |   8 +++
 fs/f2fs/file.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c812fb8e2d9c..ca139fac5a73 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -434,6 +434,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 					_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
 #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS				\
 					_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
+#define F2FS_IOC_SEC_TRIM_FILE		_IOW(F2FS_IOCTL_MAGIC, 20, __u32)
 
 #define F2FS_IOC_GET_VOLUME_NAME	FS_IOC_GETFSLABEL
 #define F2FS_IOC_SET_VOLUME_NAME	FS_IOC_SETFSLABEL
@@ -453,6 +454,13 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_GOING_DOWN_METAFLUSH	0x3	/* going down with meta flush */
 #define F2FS_GOING_DOWN_NEED_FSCK	0x4	/* going down to trigger fsck */
 
+/*
+ * Flags used by F2FS_IOC_SEC_TRIM_FILE
+ */
+#define F2FS_TRIM_FILE_DISCARD		0x1	/* send discard command */
+#define F2FS_TRIM_FILE_ZEROOUT		0x2	/* zero out */
+#define F2FS_TRIM_FILE_MASK		0x3
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index dfa1ac2d751a..ba9b7ec5d6bf 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3749,6 +3749,146 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_secure_erase(struct block_device *bdev, block_t block,
+					block_t len, u32 flags)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	sector_t sector = SECTOR_FROM_BLOCK(block);
+	sector_t nr_sects = SECTOR_FROM_BLOCK(len);
+	int ret = 0;
+
+	if (!q)
+		return -ENXIO;
+
+	if (flags & F2FS_TRIM_FILE_DISCARD)
+		ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
+						blk_queue_secure_erase(q) ?
+						BLKDEV_DISCARD_SECURE : 0);
+
+	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT))
+		ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
+
+	return ret;
+}
+
+static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct address_space *mapping = inode->i_mapping;
+	struct block_device *prev_bdev = NULL;
+	pgoff_t index, pg_start = 0, pg_end;
+	block_t prev_block = 0, len = 0;
+	u32 flags;
+	int ret = 0;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (get_user(flags, (u32 __user *)arg))
+		return -EFAULT;
+	if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
+		return -EINVAL;
+
+	if ((flags & F2FS_TRIM_FILE_DISCARD) && !f2fs_hw_support_discard(sbi))
+		return -EOPNOTSUPP;
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
+			f2fs_compressed_file(inode)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (!inode->i_size)
+		goto err;
+	pg_end = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+
+	ret = f2fs_convert_inline_inode(inode);
+	if (ret)
+		goto err;
+
+	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	down_write(&F2FS_I(inode)->i_mmap_sem);
+
+	ret = filemap_write_and_wait(mapping);
+	if (ret)
+		goto out;
+
+	truncate_inode_pages(mapping, 0);
+
+	for (index = pg_start; index < pg_end;) {
+		struct dnode_of_data dn;
+		unsigned int end_offset;
+
+		set_new_dnode(&dn, inode, NULL, NULL, 0);
+		ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+		if (ret)
+			goto out;
+
+		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+		if (pg_end < end_offset + index)
+			end_offset = pg_end - index;
+
+		for (; dn.ofs_in_node < end_offset;
+				dn.ofs_in_node++, index++) {
+			struct block_device *cur_bdev;
+			block_t blkaddr = f2fs_data_blkaddr(&dn);
+
+			if (__is_valid_data_blkaddr(blkaddr)) {
+				if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+					blkaddr, DATA_GENERIC_ENHANCE)) {
+					ret = -EFSCORRUPTED;
+					goto out;
+				}
+			} else
+				continue;
+
+			cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
+			if (f2fs_is_multi_device(sbi)) {
+				int i = f2fs_target_device_index(sbi, blkaddr);
+
+				blkaddr -= FDEV(i).start_blk;
+			}
+
+			if (len) {
+				if (prev_bdev == cur_bdev &&
+					blkaddr == prev_block + len) {
+					len++;
+				} else {
+					ret = f2fs_secure_erase(prev_bdev,
+							prev_block, len, flags);
+					if (ret)
+						goto out;
+
+					len = 0;
+				}
+			}
+
+			if (!len) {
+				prev_bdev = cur_bdev;
+				prev_block = blkaddr;
+				len = 1;
+			}
+		}
+
+		f2fs_put_dnode(&dn);
+	}
+
+	if (len)
+		ret = f2fs_secure_erase(prev_bdev, prev_block, len, flags);
+out:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
+	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+err:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3835,6 +3975,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_release_compress_blocks(filp, arg);
 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
 		return f2fs_reserve_compress_blocks(filp, arg);
+	case F2FS_IOC_SEC_TRIM_FILE:
+		return f2fs_sec_trim_file(filp, arg);
 	default:
 		return -ENOTTY;
 	}
@@ -4004,6 +4146,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_GET_COMPRESS_BLOCKS:
 	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
+	case F2FS_IOC_SEC_TRIM_FILE:
 		break;
 	default:
 		return -ENOIOCTLCMD;
-- 
2.27.0.278.ge193c7cf3a9-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11  3:16 [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl Daeho Jeong
@ 2020-06-11  8:54 ` Chao Yu
  2020-06-11 11:04   ` Daeho Jeong
  2020-06-11 16:27 ` Eric Biggers
  1 sibling, 1 reply; 9+ messages in thread
From: Chao Yu @ 2020-06-11  8:54 UTC (permalink / raw)
  To: Daeho Jeong, linux-kernel, linux-f2fs-devel, kernel-team; +Cc: Daeho Jeong

On 2020/6/11 11:16, Daeho Jeong wrote:
> From: Daeho Jeong <daehojeong@google.com>
> 
> Added a new ioctl to send discard commands or/and zero out
> to whole data area of a regular file for security reason.
> 
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> ---
>  fs/f2fs/f2fs.h |   8 +++
>  fs/f2fs/file.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 151 insertions(+)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index c812fb8e2d9c..ca139fac5a73 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -434,6 +434,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
>  					_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
>  #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS				\
>  					_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
> +#define F2FS_IOC_SEC_TRIM_FILE		_IOW(F2FS_IOCTL_MAGIC, 20, __u32)
>  
>  #define F2FS_IOC_GET_VOLUME_NAME	FS_IOC_GETFSLABEL
>  #define F2FS_IOC_SET_VOLUME_NAME	FS_IOC_SETFSLABEL
> @@ -453,6 +454,13 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
>  #define F2FS_GOING_DOWN_METAFLUSH	0x3	/* going down with meta flush */
>  #define F2FS_GOING_DOWN_NEED_FSCK	0x4	/* going down to trigger fsck */
>  
> +/*
> + * Flags used by F2FS_IOC_SEC_TRIM_FILE
> + */
> +#define F2FS_TRIM_FILE_DISCARD		0x1	/* send discard command */
> +#define F2FS_TRIM_FILE_ZEROOUT		0x2	/* zero out */
> +#define F2FS_TRIM_FILE_MASK		0x3
> +
>  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
>  /*
>   * ioctl commands in 32 bit emulation
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index dfa1ac2d751a..ba9b7ec5d6bf 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -3749,6 +3749,146 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
>  	return ret;
>  }
>  
> +static int f2fs_secure_erase(struct block_device *bdev, block_t block,
> +					block_t len, u32 flags)
> +{
> +	struct request_queue *q = bdev_get_queue(bdev);
> +	sector_t sector = SECTOR_FROM_BLOCK(block);
> +	sector_t nr_sects = SECTOR_FROM_BLOCK(len);
> +	int ret = 0;
> +
> +	if (!q)
> +		return -ENXIO;
> +
> +	if (flags & F2FS_TRIM_FILE_DISCARD)
> +		ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
> +						blk_queue_secure_erase(q) ?
> +						BLKDEV_DISCARD_SECURE : 0);
> +
> +	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT))
> +		ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
> +
> +	return ret;
> +}
> +
> +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
> +{
> +	struct inode *inode = file_inode(filp);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	struct address_space *mapping = inode->i_mapping;
> +	struct block_device *prev_bdev = NULL;
> +	pgoff_t index, pg_start = 0, pg_end;
> +	block_t prev_block = 0, len = 0;
> +	u32 flags;
> +	int ret = 0;
> +
> +	if (!(filp->f_mode & FMODE_WRITE))
> +		return -EBADF;
> +
> +	if (get_user(flags, (u32 __user *)arg))
> +		return -EFAULT;
> +	if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
> +		return -EINVAL;
> +
> +	if ((flags & F2FS_TRIM_FILE_DISCARD) && !f2fs_hw_support_discard(sbi))
> +		return -EOPNOTSUPP;
> +
> +	file_start_write(filp);

Now, I'm a little confused about when we need to call __mnt_want_write_file(),
you know, vfs_write() still will call this function when updating time.
- __generic_file_write_iter
 - file_update_time
  - __mnt_want_write_file

And previously, f2fs ioctl uses mnt_{want,drop}_write_file() whenever there is
any updates on fs/file, if Eric is correct, we need to clean up most of ioctl
interface as well.

> +	inode_lock(inode);
> +
> +	if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
> +			f2fs_compressed_file(inode)) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (!inode->i_size)
> +		goto err;
> +	pg_end = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
> +
> +	ret = f2fs_convert_inline_inode(inode);
> +	if (ret)
> +		goto err;
> +
> +	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> +	down_write(&F2FS_I(inode)->i_mmap_sem);
> +
> +	ret = filemap_write_and_wait(mapping);
> +	if (ret)
> +		goto out;
> +
> +	truncate_inode_pages(mapping, 0);
> +
> +	for (index = pg_start; index < pg_end;) {
> +		struct dnode_of_data dn;
> +		unsigned int end_offset;
> +
> +		set_new_dnode(&dn, inode, NULL, NULL, 0);
> +		ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
> +		if (ret)

if (ret == -ENOENT) {
	index = f2fs_get_next_page_offset(&dn, index);
	continue;
}

> +			goto out;
> +
> +		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
> +		if (pg_end < end_offset + index)
> +			end_offset = pg_end - index;
> +
> +		for (; dn.ofs_in_node < end_offset;
> +				dn.ofs_in_node++, index++) {
> +			struct block_device *cur_bdev;
> +			block_t blkaddr = f2fs_data_blkaddr(&dn);
> +
> +			if (__is_valid_data_blkaddr(blkaddr)) {
> +				if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
> +					blkaddr, DATA_GENERIC_ENHANCE)) {
> +					ret = -EFSCORRUPTED;
> +					goto out;


if we goto out label here, we will miss to call f2fs_put_dnode()?

> +				}
> +			} else

How about this?

if (!__is_valid_data_blkaddr())
	continue;

if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), blkaddr, DATA_GENERIC_ENHANCE)) {
	ret = -EFSCORRUPTED;
	goto out;
}

> +				continue;
> +
> +			cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
> +			if (f2fs_is_multi_device(sbi)) {
> +				int i = f2fs_target_device_index(sbi, blkaddr);
> +
> +				blkaddr -= FDEV(i).start_blk;
> +			}
> +
> +			if (len) {
> +				if (prev_bdev == cur_bdev &&
> +					blkaddr == prev_block + len) {
> +					len++;
> +				} else {
> +					ret = f2fs_secure_erase(prev_bdev,
> +							prev_block, len, flags);
> +					if (ret)
> +						goto out;

Ditto,

Thanks,

> +
> +					len = 0;
> +				}
> +			}
> +
> +			if (!len) {
> +				prev_bdev = cur_bdev;
> +				prev_block = blkaddr;
> +				len = 1;
> +			}
> +		}
> +
> +		f2fs_put_dnode(&dn);
> +	}
> +
> +	if (len)
> +		ret = f2fs_secure_erase(prev_bdev, prev_block, len, flags);
> +out:
> +	up_write(&F2FS_I(inode)->i_mmap_sem);
> +	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> +err:
> +	inode_unlock(inode);
> +	file_end_write(filp);
> +
> +	return ret;
> +}
> +
>  long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  {
>  	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
> @@ -3835,6 +3975,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  		return f2fs_release_compress_blocks(filp, arg);
>  	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
>  		return f2fs_reserve_compress_blocks(filp, arg);
> +	case F2FS_IOC_SEC_TRIM_FILE:
> +		return f2fs_sec_trim_file(filp, arg);
>  	default:
>  		return -ENOTTY;
>  	}
> @@ -4004,6 +4146,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  	case F2FS_IOC_GET_COMPRESS_BLOCKS:
>  	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
>  	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
> +	case F2FS_IOC_SEC_TRIM_FILE:
>  		break;
>  	default:
>  		return -ENOIOCTLCMD;
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11  8:54 ` [f2fs-dev] " Chao Yu
@ 2020-06-11 11:04   ` Daeho Jeong
  2020-06-11 16:19     ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Daeho Jeong @ 2020-06-11 11:04 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel, kernel-team, Daeho Jeong

2020년 6월 11일 (목) 오후 5:56, Chao Yu <yuchao0@huawei.com>님이 작성:
>
> On 2020/6/11 11:16, Daeho Jeong wrote:
> > From: Daeho Jeong <daehojeong@google.com>
> >
> > Added a new ioctl to send discard commands or/and zero out
> > to whole data area of a regular file for security reason.
> >
> > Signed-off-by: Daeho Jeong <daehojeong@google.com>
> > ---
> >  fs/f2fs/f2fs.h |   8 +++
> >  fs/f2fs/file.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 151 insertions(+)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index c812fb8e2d9c..ca139fac5a73 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -434,6 +434,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
> >                                       _IOR(F2FS_IOCTL_MAGIC, 18, __u64)
> >  #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS                             \
> >                                       _IOR(F2FS_IOCTL_MAGIC, 19, __u64)
> > +#define F2FS_IOC_SEC_TRIM_FILE               _IOW(F2FS_IOCTL_MAGIC, 20, __u32)
> >
> >  #define F2FS_IOC_GET_VOLUME_NAME     FS_IOC_GETFSLABEL
> >  #define F2FS_IOC_SET_VOLUME_NAME     FS_IOC_SETFSLABEL
> > @@ -453,6 +454,13 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
> >  #define F2FS_GOING_DOWN_METAFLUSH    0x3     /* going down with meta flush */
> >  #define F2FS_GOING_DOWN_NEED_FSCK    0x4     /* going down to trigger fsck */
> >
> > +/*
> > + * Flags used by F2FS_IOC_SEC_TRIM_FILE
> > + */
> > +#define F2FS_TRIM_FILE_DISCARD               0x1     /* send discard command */
> > +#define F2FS_TRIM_FILE_ZEROOUT               0x2     /* zero out */
> > +#define F2FS_TRIM_FILE_MASK          0x3
> > +
> >  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
> >  /*
> >   * ioctl commands in 32 bit emulation
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index dfa1ac2d751a..ba9b7ec5d6bf 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -3749,6 +3749,146 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
> >       return ret;
> >  }
> >
> > +static int f2fs_secure_erase(struct block_device *bdev, block_t block,
> > +                                     block_t len, u32 flags)
> > +{
> > +     struct request_queue *q = bdev_get_queue(bdev);
> > +     sector_t sector = SECTOR_FROM_BLOCK(block);
> > +     sector_t nr_sects = SECTOR_FROM_BLOCK(len);
> > +     int ret = 0;
> > +
> > +     if (!q)
> > +             return -ENXIO;
> > +
> > +     if (flags & F2FS_TRIM_FILE_DISCARD)
> > +             ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
> > +                                             blk_queue_secure_erase(q) ?
> > +                                             BLKDEV_DISCARD_SECURE : 0);
> > +
> > +     if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT))
> > +             ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
> > +
> > +     return ret;
> > +}
> > +
> > +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
> > +{
> > +     struct inode *inode = file_inode(filp);
> > +     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +     struct address_space *mapping = inode->i_mapping;
> > +     struct block_device *prev_bdev = NULL;
> > +     pgoff_t index, pg_start = 0, pg_end;
> > +     block_t prev_block = 0, len = 0;
> > +     u32 flags;
> > +     int ret = 0;
> > +
> > +     if (!(filp->f_mode & FMODE_WRITE))
> > +             return -EBADF;
> > +
> > +     if (get_user(flags, (u32 __user *)arg))
> > +             return -EFAULT;
> > +     if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
> > +             return -EINVAL;
> > +
> > +     if ((flags & F2FS_TRIM_FILE_DISCARD) && !f2fs_hw_support_discard(sbi))
> > +             return -EOPNOTSUPP;
> > +
> > +     file_start_write(filp);
>
> Now, I'm a little confused about when we need to call __mnt_want_write_file(),
> you know, vfs_write() still will call this function when updating time.
> - __generic_file_write_iter
>  - file_update_time
>   - __mnt_want_write_file
>
> And previously, f2fs ioctl uses mnt_{want,drop}_write_file() whenever there is
> any updates on fs/file, if Eric is correct, we need to clean up most of ioctl
> interface as well.

I also saw most filesytem codes use just mnt_{want,drop}_write_file()
and actually it doesn't affect code working. It's a matter of doing a
redundant job or not.
AFAIUI, if the file is not open for writing (FMODE_WRITE), we have to
call mnt_want_write_file() to increase mnt_writers.
In this case, we already checked it has FMODE_WRITE flag.

>
> > +     inode_lock(inode);
> > +
> > +     if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
> > +                     f2fs_compressed_file(inode)) {
> > +             ret = -EINVAL;
> > +             goto err;
> > +     }
> > +
> > +     if (!inode->i_size)
> > +             goto err;
> > +     pg_end = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
> > +
> > +     ret = f2fs_convert_inline_inode(inode);
> > +     if (ret)
> > +             goto err;
> > +
> > +     down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > +     down_write(&F2FS_I(inode)->i_mmap_sem);
> > +
> > +     ret = filemap_write_and_wait(mapping);
> > +     if (ret)
> > +             goto out;
> > +
> > +     truncate_inode_pages(mapping, 0);
> > +
> > +     for (index = pg_start; index < pg_end;) {
> > +             struct dnode_of_data dn;
> > +             unsigned int end_offset;
> > +
> > +             set_new_dnode(&dn, inode, NULL, NULL, 0);
> > +             ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
> > +             if (ret)
>
> if (ret == -ENOENT) {
>         index = f2fs_get_next_page_offset(&dn, index);
>         continue;
> }

Got it.

>
> > +                     goto out;
> > +
> > +             end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
> > +             if (pg_end < end_offset + index)
> > +                     end_offset = pg_end - index;
> > +
> > +             for (; dn.ofs_in_node < end_offset;
> > +                             dn.ofs_in_node++, index++) {
> > +                     struct block_device *cur_bdev;
> > +                     block_t blkaddr = f2fs_data_blkaddr(&dn);
> > +
> > +                     if (__is_valid_data_blkaddr(blkaddr)) {
> > +                             if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
> > +                                     blkaddr, DATA_GENERIC_ENHANCE)) {
> > +                                     ret = -EFSCORRUPTED;
> > +                                     goto out;
>
>
> if we goto out label here, we will miss to call f2fs_put_dnode()?
>

Oops, I missed this, when I changed the code flow. Thanks!

> > +                             }
> > +                     } else
>
> How about this?
>
> if (!__is_valid_data_blkaddr())
>         continue;
>
> if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), blkaddr, DATA_GENERIC_ENHANCE)) {
>         ret = -EFSCORRUPTED;
>         goto out;
> }
>

Looks better.

> > +                             continue;
> > +
> > +                     cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
> > +                     if (f2fs_is_multi_device(sbi)) {
> > +                             int i = f2fs_target_device_index(sbi, blkaddr);
> > +
> > +                             blkaddr -= FDEV(i).start_blk;
> > +                     }
> > +
> > +                     if (len) {
> > +                             if (prev_bdev == cur_bdev &&
> > +                                     blkaddr == prev_block + len) {
> > +                                     len++;
> > +                             } else {
> > +                                     ret = f2fs_secure_erase(prev_bdev,
> > +                                                     prev_block, len, flags);
> > +                                     if (ret)
> > +                                             goto out;
>
> Ditto,

Got it.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11 11:04   ` Daeho Jeong
@ 2020-06-11 16:19     ` Eric Biggers
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Biggers @ 2020-06-11 16:19 UTC (permalink / raw)
  To: Daeho Jeong
  Cc: Chao Yu, Daeho Jeong, kernel-team, linux-kernel, linux-f2fs-devel

On Thu, Jun 11, 2020 at 08:04:06PM +0900, Daeho Jeong wrote:
> > > +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
> > > +{
> > > +     struct inode *inode = file_inode(filp);
> > > +     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > +     struct address_space *mapping = inode->i_mapping;
> > > +     struct block_device *prev_bdev = NULL;
> > > +     pgoff_t index, pg_start = 0, pg_end;
> > > +     block_t prev_block = 0, len = 0;
> > > +     u32 flags;
> > > +     int ret = 0;
> > > +
> > > +     if (!(filp->f_mode & FMODE_WRITE))
> > > +             return -EBADF;
> > > +
> > > +     if (get_user(flags, (u32 __user *)arg))
> > > +             return -EFAULT;
> > > +     if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
> > > +             return -EINVAL;
> > > +
> > > +     if ((flags & F2FS_TRIM_FILE_DISCARD) && !f2fs_hw_support_discard(sbi))
> > > +             return -EOPNOTSUPP;
> > > +
> > > +     file_start_write(filp);
> >
> > Now, I'm a little confused about when we need to call __mnt_want_write_file(),
> > you know, vfs_write() still will call this function when updating time.
> > - __generic_file_write_iter
> >  - file_update_time
> >   - __mnt_want_write_file
> >
> > And previously, f2fs ioctl uses mnt_{want,drop}_write_file() whenever there is
> > any updates on fs/file, if Eric is correct, we need to clean up most of ioctl
> > interface as well.
> 
> I also saw most filesytem codes use just mnt_{want,drop}_write_file()
> and actually it doesn't affect code working. It's a matter of doing a
> redundant job or not.
> AFAIUI, if the file is not open for writing (FMODE_WRITE), we have to
> call mnt_want_write_file() to increase mnt_writers.
> In this case, we already checked it has FMODE_WRITE flag.

If the fd isn't writable (or may not be writable), mnt_want_write_file() is
needed.  That includes all ioctls that operate (or may operate) on directories,
since directories can't be opened for writing.

But when the fd is guaranteed to be writable, incrementing mnt_writers is
pointless.  I'm trying to clean this up in the VFS:
https://lkml.kernel.org/r/20200611160534.55042-1-ebiggers@kernel.org.

mnt_want_write_file() still does the freeze protection, which file_start_write()
achieves more directly.

The only other thing that mnt_want_write_file() does is the check for emergency
remount r/o, which I doubt is very important.  It's racy, so the filesystem
needs to detect it in other places too.

I'm not sure why file_update_time() uses __mnt_want_write_file().  Either it
assumes the fd might not be writable, or it just wants the check for emergency
remount r/o, or it's just a mistake.  Note also that mtime isn't always updated,
so just because file_update_time() calls __mnt_want_write_file() doesn't mean
that write() always calls __mnt_want_write_file().

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11  3:16 [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl Daeho Jeong
  2020-06-11  8:54 ` [f2fs-dev] " Chao Yu
@ 2020-06-11 16:27 ` Eric Biggers
  2020-06-11 22:49   ` Daeho Jeong
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2020-06-11 16:27 UTC (permalink / raw)
  To: Daeho Jeong; +Cc: linux-kernel, linux-f2fs-devel, kernel-team, Daeho Jeong

On Thu, Jun 11, 2020 at 12:16:52PM +0900, Daeho Jeong wrote:
> +	for (index = pg_start; index < pg_end;) {
> +		struct dnode_of_data dn;
> +		unsigned int end_offset;
> +
> +		set_new_dnode(&dn, inode, NULL, NULL, 0);
> +		ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
> +		if (ret)
> +			goto out;
> +
> +		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
> +		if (pg_end < end_offset + index)
> +			end_offset = pg_end - index;
> +
> +		for (; dn.ofs_in_node < end_offset;
> +				dn.ofs_in_node++, index++) {
> +			struct block_device *cur_bdev;
> +			block_t blkaddr = f2fs_data_blkaddr(&dn);
> +
> +			if (__is_valid_data_blkaddr(blkaddr)) {
> +				if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
> +					blkaddr, DATA_GENERIC_ENHANCE)) {
> +					ret = -EFSCORRUPTED;
> +					goto out;
> +				}
> +			} else
> +				continue;
> +
> +			cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
> +			if (f2fs_is_multi_device(sbi)) {
> +				int i = f2fs_target_device_index(sbi, blkaddr);
> +
> +				blkaddr -= FDEV(i).start_blk;
> +			}
> +
> +			if (len) {
> +				if (prev_bdev == cur_bdev &&
> +					blkaddr == prev_block + len) {
> +					len++;
> +				} else {
> +					ret = f2fs_secure_erase(prev_bdev,
> +							prev_block, len, flags);
> +					if (ret)
> +						goto out;
> +
> +					len = 0;
> +				}
> +			}
> +
> +			if (!len) {
> +				prev_bdev = cur_bdev;
> +				prev_block = blkaddr;
> +				len = 1;
> +			}
> +		}
> +
> +		f2fs_put_dnode(&dn);
> +	}

This loop processes the entire file, which may be very large.  So it could take
a very long time to execute.

It should at least use the following to make the task killable and preemptible:

		if (fatal_signal_pending(current)) {
			err = -EINTR;
			goto out;
		}
		cond_resched();

Also, perhaps this ioctl should be made incremental, i.e. take in an
(offset, length) like pwrite()?

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11 16:27 ` Eric Biggers
@ 2020-06-11 22:49   ` Daeho Jeong
  2020-06-11 23:00     ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Daeho Jeong @ 2020-06-11 22:49 UTC (permalink / raw)
  To: Eric Biggers; +Cc: linux-kernel, linux-f2fs-devel, kernel-team, Daeho Jeong

2020년 6월 12일 (금) 오전 1:27, Eric Biggers <ebiggers@kernel.org>님이 작성:
>
> On Thu, Jun 11, 2020 at 12:16:52PM +0900, Daeho Jeong wrote:
> > +     for (index = pg_start; index < pg_end;) {
> > +             struct dnode_of_data dn;
> > +             unsigned int end_offset;
> > +
> > +             set_new_dnode(&dn, inode, NULL, NULL, 0);
> > +             ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
> > +             if (ret)
> > +                     goto out;
> > +
> > +             end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
> > +             if (pg_end < end_offset + index)
> > +                     end_offset = pg_end - index;
> > +
> > +             for (; dn.ofs_in_node < end_offset;
> > +                             dn.ofs_in_node++, index++) {
> > +                     struct block_device *cur_bdev;
> > +                     block_t blkaddr = f2fs_data_blkaddr(&dn);
> > +
> > +                     if (__is_valid_data_blkaddr(blkaddr)) {
> > +                             if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
> > +                                     blkaddr, DATA_GENERIC_ENHANCE)) {
> > +                                     ret = -EFSCORRUPTED;
> > +                                     goto out;
> > +                             }
> > +                     } else
> > +                             continue;
> > +
> > +                     cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
> > +                     if (f2fs_is_multi_device(sbi)) {
> > +                             int i = f2fs_target_device_index(sbi, blkaddr);
> > +
> > +                             blkaddr -= FDEV(i).start_blk;
> > +                     }
> > +
> > +                     if (len) {
> > +                             if (prev_bdev == cur_bdev &&
> > +                                     blkaddr == prev_block + len) {
> > +                                     len++;
> > +                             } else {
> > +                                     ret = f2fs_secure_erase(prev_bdev,
> > +                                                     prev_block, len, flags);
> > +                                     if (ret)
> > +                                             goto out;
> > +
> > +                                     len = 0;
> > +                             }
> > +                     }
> > +
> > +                     if (!len) {
> > +                             prev_bdev = cur_bdev;
> > +                             prev_block = blkaddr;
> > +                             len = 1;
> > +                     }
> > +             }
> > +
> > +             f2fs_put_dnode(&dn);
> > +     }
>
> This loop processes the entire file, which may be very large.  So it could take
> a very long time to execute.
>
> It should at least use the following to make the task killable and preemptible:
>
>                 if (fatal_signal_pending(current)) {
>                         err = -EINTR;
>                         goto out;
>                 }
>                 cond_resched();
>

Good point!

> Also, perhaps this ioctl should be made incremental, i.e. take in an
> (offset, length) like pwrite()?
>
> - Eric

Discard and Zeroing will be treated in a unit of block, which is 4KB
in F2FS case.
Do you really need the (offset, length) option here even if the unit
is a 4KB block? I guess this option might make the user even
inconvenienced to use this ioctl, because they have to bear 4KB
alignment in mind.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11 22:49   ` Daeho Jeong
@ 2020-06-11 23:00     ` Eric Biggers
  2020-06-12  0:00       ` Daeho Jeong
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2020-06-11 23:00 UTC (permalink / raw)
  To: Daeho Jeong; +Cc: linux-kernel, linux-f2fs-devel, kernel-team, Daeho Jeong

On Fri, Jun 12, 2020 at 07:49:12AM +0900, Daeho Jeong wrote:
> 2020년 6월 12일 (금) 오전 1:27, Eric Biggers <ebiggers@kernel.org>님이 작성:
> >
> > On Thu, Jun 11, 2020 at 12:16:52PM +0900, Daeho Jeong wrote:
> > > +     for (index = pg_start; index < pg_end;) {
> > > +             struct dnode_of_data dn;
> > > +             unsigned int end_offset;
> > > +
> > > +             set_new_dnode(&dn, inode, NULL, NULL, 0);
> > > +             ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
> > > +             if (ret)
> > > +                     goto out;
> > > +
> > > +             end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
> > > +             if (pg_end < end_offset + index)
> > > +                     end_offset = pg_end - index;
> > > +
> > > +             for (; dn.ofs_in_node < end_offset;
> > > +                             dn.ofs_in_node++, index++) {
> > > +                     struct block_device *cur_bdev;
> > > +                     block_t blkaddr = f2fs_data_blkaddr(&dn);
> > > +
> > > +                     if (__is_valid_data_blkaddr(blkaddr)) {
> > > +                             if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
> > > +                                     blkaddr, DATA_GENERIC_ENHANCE)) {
> > > +                                     ret = -EFSCORRUPTED;
> > > +                                     goto out;
> > > +                             }
> > > +                     } else
> > > +                             continue;
> > > +
> > > +                     cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
> > > +                     if (f2fs_is_multi_device(sbi)) {
> > > +                             int i = f2fs_target_device_index(sbi, blkaddr);
> > > +
> > > +                             blkaddr -= FDEV(i).start_blk;
> > > +                     }
> > > +
> > > +                     if (len) {
> > > +                             if (prev_bdev == cur_bdev &&
> > > +                                     blkaddr == prev_block + len) {
> > > +                                     len++;
> > > +                             } else {
> > > +                                     ret = f2fs_secure_erase(prev_bdev,
> > > +                                                     prev_block, len, flags);
> > > +                                     if (ret)
> > > +                                             goto out;
> > > +
> > > +                                     len = 0;
> > > +                             }
> > > +                     }
> > > +
> > > +                     if (!len) {
> > > +                             prev_bdev = cur_bdev;
> > > +                             prev_block = blkaddr;
> > > +                             len = 1;
> > > +                     }
> > > +             }
> > > +
> > > +             f2fs_put_dnode(&dn);
> > > +     }
> >
> > This loop processes the entire file, which may be very large.  So it could take
> > a very long time to execute.
> >
> > It should at least use the following to make the task killable and preemptible:
> >
> >                 if (fatal_signal_pending(current)) {
> >                         err = -EINTR;
> >                         goto out;
> >                 }
> >                 cond_resched();
> >
> 
> Good point!
> 
> > Also, perhaps this ioctl should be made incremental, i.e. take in an
> > (offset, length) like pwrite()?
> >
> > - Eric
> 
> Discard and Zeroing will be treated in a unit of block, which is 4KB
> in F2FS case.
> Do you really need the (offset, length) option here even if the unit
> is a 4KB block? I guess this option might make the user even
> inconvenienced to use this ioctl, because they have to bear 4KB
> alignment in mind.

The ioctl as currently proposed always erases the entire file, which could be
gigabytes.  That could take a very long time.

I'm suggesting considering making it possible to call the ioctl multiple times
to process the file incrementally, like you would do with write() or pwrite().

That implies that for each ioctl call, the length would need to be specified
unless it's hardcoded to 4KiB which would be very inefficient.  Users would
probably want to process a larger amount at a time, like 1 MiB, right?

Likewise the offset would need to be specified as well, unless it were to be
taken implicitly from f_pos.

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-11 23:00     ` Eric Biggers
@ 2020-06-12  0:00       ` Daeho Jeong
  2020-06-12  0:13         ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Daeho Jeong @ 2020-06-12  0:00 UTC (permalink / raw)
  To: Eric Biggers; +Cc: linux-kernel, linux-f2fs-devel, kernel-team, Daeho Jeong

For the incremental way of erasing, we might as well support the
(offset, length) option in a unit of 4KiB.

So, you might use this ioctl like the below. Does it work for you?
struct f2fs_sec_trim {
        u64 startblk;
        u64 blklen;
        u32 flags;
};

sectrim.startblk = 0;
sectrim.blklen = 256;     // 1MiB
sectrim.flags = F2FS_TRIM_FILE_DISCARD | F2FS_TRIM_FILE_ZEROOUT;
ret = ioctl(fd, F2FS_SEC_TRIM_FILE, &sectrim);

2020년 6월 12일 (금) 오전 8:00, Eric Biggers <ebiggers@kernel.org>님이 작성:

>
> On Fri, Jun 12, 2020 at 07:49:12AM +0900, Daeho Jeong wrote:
> > 2020년 6월 12일 (금) 오전 1:27, Eric Biggers <ebiggers@kernel.org>님이 작성:
> > >
> > > On Thu, Jun 11, 2020 at 12:16:52PM +0900, Daeho Jeong wrote:
> > > > +     for (index = pg_start; index < pg_end;) {
> > > > +             struct dnode_of_data dn;
> > > > +             unsigned int end_offset;
> > > > +
> > > > +             set_new_dnode(&dn, inode, NULL, NULL, 0);
> > > > +             ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
> > > > +             if (ret)
> > > > +                     goto out;
> > > > +
> > > > +             end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
> > > > +             if (pg_end < end_offset + index)
> > > > +                     end_offset = pg_end - index;
> > > > +
> > > > +             for (; dn.ofs_in_node < end_offset;
> > > > +                             dn.ofs_in_node++, index++) {
> > > > +                     struct block_device *cur_bdev;
> > > > +                     block_t blkaddr = f2fs_data_blkaddr(&dn);
> > > > +
> > > > +                     if (__is_valid_data_blkaddr(blkaddr)) {
> > > > +                             if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
> > > > +                                     blkaddr, DATA_GENERIC_ENHANCE)) {
> > > > +                                     ret = -EFSCORRUPTED;
> > > > +                                     goto out;
> > > > +                             }
> > > > +                     } else
> > > > +                             continue;
> > > > +
> > > > +                     cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
> > > > +                     if (f2fs_is_multi_device(sbi)) {
> > > > +                             int i = f2fs_target_device_index(sbi, blkaddr);
> > > > +
> > > > +                             blkaddr -= FDEV(i).start_blk;
> > > > +                     }
> > > > +
> > > > +                     if (len) {
> > > > +                             if (prev_bdev == cur_bdev &&
> > > > +                                     blkaddr == prev_block + len) {
> > > > +                                     len++;
> > > > +                             } else {
> > > > +                                     ret = f2fs_secure_erase(prev_bdev,
> > > > +                                                     prev_block, len, flags);
> > > > +                                     if (ret)
> > > > +                                             goto out;
> > > > +
> > > > +                                     len = 0;
> > > > +                             }
> > > > +                     }
> > > > +
> > > > +                     if (!len) {
> > > > +                             prev_bdev = cur_bdev;
> > > > +                             prev_block = blkaddr;
> > > > +                             len = 1;
> > > > +                     }
> > > > +             }
> > > > +
> > > > +             f2fs_put_dnode(&dn);
> > > > +     }
> > >
> > > This loop processes the entire file, which may be very large.  So it could take
> > > a very long time to execute.
> > >
> > > It should at least use the following to make the task killable and preemptible:
> > >
> > >                 if (fatal_signal_pending(current)) {
> > >                         err = -EINTR;
> > >                         goto out;
> > >                 }
> > >                 cond_resched();
> > >
> >
> > Good point!
> >
> > > Also, perhaps this ioctl should be made incremental, i.e. take in an
> > > (offset, length) like pwrite()?
> > >
> > > - Eric
> >
> > Discard and Zeroing will be treated in a unit of block, which is 4KB
> > in F2FS case.
> > Do you really need the (offset, length) option here even if the unit
> > is a 4KB block? I guess this option might make the user even
> > inconvenienced to use this ioctl, because they have to bear 4KB
> > alignment in mind.
>
> The ioctl as currently proposed always erases the entire file, which could be
> gigabytes.  That could take a very long time.
>
> I'm suggesting considering making it possible to call the ioctl multiple times
> to process the file incrementally, like you would do with write() or pwrite().
>
> That implies that for each ioctl call, the length would need to be specified
> unless it's hardcoded to 4KiB which would be very inefficient.  Users would
> probably want to process a larger amount at a time, like 1 MiB, right?
>
> Likewise the offset would need to be specified as well, unless it were to be
> taken implicitly from f_pos.
>
> - Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  2020-06-12  0:00       ` Daeho Jeong
@ 2020-06-12  0:13         ` Eric Biggers
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Biggers @ 2020-06-12  0:13 UTC (permalink / raw)
  To: Daeho Jeong; +Cc: linux-kernel, linux-f2fs-devel, kernel-team, Daeho Jeong

On Fri, Jun 12, 2020 at 09:00:58AM +0900, Daeho Jeong wrote:
> For the incremental way of erasing, we might as well support the
> (offset, length) option in a unit of 4KiB.
> 
> So, you might use this ioctl like the below. Does it work for you?
> struct f2fs_sec_trim {
>         u64 startblk;
>         u64 blklen;
>         u32 flags;
> };
> 
> sectrim.startblk = 0;
> sectrim.blklen = 256;     // 1MiB
> sectrim.flags = F2FS_TRIM_FILE_DISCARD | F2FS_TRIM_FILE_ZEROOUT;
> ret = ioctl(fd, F2FS_SEC_TRIM_FILE, &sectrim);

Most filesystem ioctls and syscalls take offsets and lengths in bytes,
especially newer ones.  This way implementations of these APIs can support other
alignments later.  The implementation can still require alignment for now, i.e.
return -EINVAL if !IS_ALIGNED(arg.pos | arg.len, sb->s_blocksize).

Also, flags should be a u64, especially since it wouldn't actually increase the
size of the struct (due to padding).

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-06-12  0:13 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-11  3:16 [PATCH v2] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl Daeho Jeong
2020-06-11  8:54 ` [f2fs-dev] " Chao Yu
2020-06-11 11:04   ` Daeho Jeong
2020-06-11 16:19     ` Eric Biggers
2020-06-11 16:27 ` Eric Biggers
2020-06-11 22:49   ` Daeho Jeong
2020-06-11 23:00     ` Eric Biggers
2020-06-12  0:00       ` Daeho Jeong
2020-06-12  0:13         ` Eric Biggers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).