linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] iomap: add a swapfile activation function
@ 2018-04-18  2:50 Darrick J. Wong
  2018-04-21 12:33 ` Jan Kara
  2018-04-24 17:35 ` Christoph Hellwig
  0 siblings, 2 replies; 7+ messages in thread
From: Darrick J. Wong @ 2018-04-18  2:50 UTC (permalink / raw)
  To: xfs, linux-fsdevel; +Cc: Christoph Hellwig, linux-mm

From: Darrick J. Wong <darrick.wong@oracle.com>

Add a new iomap_swapfile_activate function so that filesystems can
activate swap files without having to use the obsolete and slow bmap
function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            |   99 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_aops.c     |   12 ++++++
 include/linux/iomap.h |    7 +++
 3 files changed, 118 insertions(+)

diff --git a/fs/iomap.c b/fs/iomap.c
index afd1635..ace921b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1089,3 +1089,102 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
+
+/* Swapfile activation */
+
+struct iomap_swapfile_info {
+	struct swap_info_struct *sis;
+	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
+	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
+	unsigned long expected_page_no;	/* next logical offset wanted (pages) */
+	int nr_extents;			/* extent count */
+};
+
+static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
+		loff_t count, void *data, struct iomap *iomap)
+{
+	struct iomap_swapfile_info *isi = data;
+	unsigned long page_no = iomap->offset >> PAGE_SHIFT;
+	unsigned long nr_pages = iomap->length >> PAGE_SHIFT;
+	uint64_t first_ppage = iomap->addr >> PAGE_SHIFT;
+	uint64_t last_ppage = ((iomap->addr + iomap->length) >> PAGE_SHIFT) - 1;
+
+	/* Only one bdev per swap file. */
+	if (iomap->bdev != isi->sis->bdev)
+		goto err;
+
+	/* Must be aligned to a page boundary. */
+	if ((iomap->offset & ~PAGE_MASK) || (iomap->addr & ~PAGE_MASK) ||
+	    (iomap->length & ~PAGE_MASK))
+		goto err;
+
+	/* Only real or unwritten extents. */
+	if (iomap->type != IOMAP_MAPPED && iomap->type != IOMAP_UNWRITTEN)
+		goto err;
+
+	/* No sparse files. */
+	if (isi->expected_page_no != page_no)
+		goto err;
+
+	/* No uncommitted metadata or shared blocks or inline data. */
+	if (iomap->flags & (IOMAP_F_DIRTY | IOMAP_F_SHARED |
+			    IOMAP_F_DATA_INLINE))
+		goto err;
+
+	/*
+	 * Calculate how much swap space we're adding; the first page contains
+	 * the swap header and doesn't count.
+	 */
+	if (page_no == 0)
+		first_ppage++;
+	if (isi->lowest_ppage > first_ppage)
+		isi->lowest_ppage = first_ppage;
+	if (isi->highest_ppage < last_ppage)
+		isi->highest_ppage = last_ppage;
+
+	/* Add extent, set up for the next call. */
+	isi->nr_extents += add_swap_extent(isi->sis, page_no, nr_pages,
+			first_ppage);
+	isi->expected_page_no = page_no + nr_pages;
+
+	return count;
+err:
+	pr_err("swapon: swapfile has holes\n");
+	return -EINVAL;
+}
+
+int iomap_swapfile_activate(struct swap_info_struct *sis,
+		struct file *swap_file, sector_t *pagespan,
+		const struct iomap_ops *ops)
+{
+	struct iomap_swapfile_info isi = {
+		.sis = sis,
+		.lowest_ppage = (sector_t)-1ULL,
+	};
+	struct address_space *mapping = swap_file->f_mapping;
+	struct inode *inode = mapping->host;
+	loff_t pos = 0;
+	loff_t len = i_size_read(inode);
+	loff_t ret;
+
+	ret = filemap_write_and_wait(inode->i_mapping);
+	if (ret)
+		return ret;
+
+	while (len > 0) {
+		ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
+				ops, &isi, iomap_swapfile_activate_actor);
+		if (ret <= 0)
+			return ret;
+
+		pos += ret;
+		len -= ret;
+	}
+
+	*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
+	sis->max = isi.expected_page_no;
+	sis->pages = isi.expected_page_no - 1;
+	sis->highest_bit = isi.expected_page_no - 1;
+	return isi.nr_extents;
+}
+EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0ab824f..80de476 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1475,6 +1475,16 @@ xfs_vm_set_page_dirty(
 	return newly_dirty;
 }
 
+static int
+xfs_iomap_swapfile_activate(
+	struct swap_info_struct		*sis,
+	struct file			*swap_file,
+	sector_t			*span)
+{
+	sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
+	return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
+}
+
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
@@ -1488,6 +1498,7 @@ const struct address_space_operations xfs_address_space_operations = {
 	.migratepage		= buffer_migrate_page,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
+	.swap_activate		= xfs_iomap_swapfile_activate,
 };
 
 const struct address_space_operations xfs_dax_aops = {
@@ -1495,4 +1506,5 @@ const struct address_space_operations xfs_dax_aops = {
 	.direct_IO		= noop_direct_IO,
 	.set_page_dirty		= noop_set_page_dirty,
 	.invalidatepage		= noop_invalidatepage,
+	.swap_activate		= xfs_iomap_swapfile_activate,
 };
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 19a07de..66d1c35 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -106,4 +106,11 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
 
+struct file;
+struct swap_info_struct;
+
+int iomap_swapfile_activate(struct swap_info_struct *sis,
+		struct file *swap_file, sector_t *pagespan,
+		const struct iomap_ops *ops);
+
 #endif /* LINUX_IOMAP_H */

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] iomap: add a swapfile activation function
  2018-04-18  2:50 [PATCH] iomap: add a swapfile activation function Darrick J. Wong
@ 2018-04-21 12:33 ` Jan Kara
  2018-05-02 20:29   ` Darrick J. Wong
  2018-04-24 17:35 ` Christoph Hellwig
  1 sibling, 1 reply; 7+ messages in thread
From: Jan Kara @ 2018-04-21 12:33 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: xfs, linux-fsdevel, Christoph Hellwig, linux-mm

On Tue 17-04-18 19:50:23, Darrick J. Wong wrote:
> +
> +/* Swapfile activation */
> +
> +struct iomap_swapfile_info {
> +	struct swap_info_struct *sis;
> +	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
> +	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
> +	unsigned long expected_page_no;	/* next logical offset wanted (pages) */
> +	int nr_extents;			/* extent count */
> +};
> +
> +static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> +		loff_t count, void *data, struct iomap *iomap)
> +{
> +	struct iomap_swapfile_info *isi = data;
> +	unsigned long page_no = iomap->offset >> PAGE_SHIFT;
> +	unsigned long nr_pages = iomap->length >> PAGE_SHIFT;
> +	uint64_t first_ppage = iomap->addr >> PAGE_SHIFT;
> +	uint64_t last_ppage = ((iomap->addr + iomap->length) >> PAGE_SHIFT) - 1;
> +
> +	/* Only one bdev per swap file. */
> +	if (iomap->bdev != isi->sis->bdev)
> +		goto err;
> +
> +	/* Must be aligned to a page boundary. */
> +	if ((iomap->offset & ~PAGE_MASK) || (iomap->addr & ~PAGE_MASK) ||
> +	    (iomap->length & ~PAGE_MASK))
> +		goto err;

Reporting error in this case does not look equivalent to
generic_swapfile_activate()? That function just skips blocks with
insufficient alignment... And I'm actually puzzled why alignment of
physical block is needed but that's independent question.

> +	/* Only real or unwritten extents. */
> +	if (iomap->type != IOMAP_MAPPED && iomap->type != IOMAP_UNWRITTEN)
> +		goto err;
> +
> +	/* No sparse files. */
> +	if (isi->expected_page_no != page_no)
> +		goto err;
> +
> +	/* No uncommitted metadata or shared blocks or inline data. */
> +	if (iomap->flags & (IOMAP_F_DIRTY | IOMAP_F_SHARED |
> +			    IOMAP_F_DATA_INLINE))
> +		goto err;
> +
> +	/*
> +	 * Calculate how much swap space we're adding; the first page contains
> +	 * the swap header and doesn't count.
> +	 */
> +	if (page_no == 0)
> +		first_ppage++;
> +	if (isi->lowest_ppage > first_ppage)
> +		isi->lowest_ppage = first_ppage;
> +	if (isi->highest_ppage < last_ppage)
> +		isi->highest_ppage = last_ppage;
> +
> +	/* Add extent, set up for the next call. */
> +	isi->nr_extents += add_swap_extent(isi->sis, page_no, nr_pages,
> +			first_ppage);

And here add_swap_extent() can return error.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] iomap: add a swapfile activation function
  2018-04-18  2:50 [PATCH] iomap: add a swapfile activation function Darrick J. Wong
  2018-04-21 12:33 ` Jan Kara
@ 2018-04-24 17:35 ` Christoph Hellwig
  2018-04-25 23:46   ` Darrick J. Wong
  1 sibling, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2018-04-24 17:35 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: xfs, linux-fsdevel, Christoph Hellwig, linux-mm

This looks much better than using bmap, but I still think that
having the swap code build its own ineffecient extents maps is
a horible idea..

On Tue, Apr 17, 2018 at 07:50:23PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Add a new iomap_swapfile_activate function so that filesystems can
> activate swap files without having to use the obsolete and slow bmap
> function.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/iomap.c            |   99 +++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_aops.c     |   12 ++++++
>  include/linux/iomap.h |    7 +++
>  3 files changed, 118 insertions(+)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index afd1635..ace921b 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -1089,3 +1089,102 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(iomap_dio_rw);
> +
> +/* Swapfile activation */
> +
> +struct iomap_swapfile_info {
> +	struct swap_info_struct *sis;
> +	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
> +	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
> +	unsigned long expected_page_no;	/* next logical offset wanted (pages) */
> +	int nr_extents;			/* extent count */
> +};
> +
> +static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> +		loff_t count, void *data, struct iomap *iomap)
> +{
> +	struct iomap_swapfile_info *isi = data;
> +	unsigned long page_no = iomap->offset >> PAGE_SHIFT;
> +	unsigned long nr_pages = iomap->length >> PAGE_SHIFT;
> +	uint64_t first_ppage = iomap->addr >> PAGE_SHIFT;
> +	uint64_t last_ppage = ((iomap->addr + iomap->length) >> PAGE_SHIFT) - 1;
> +
> +	/* Only one bdev per swap file. */
> +	if (iomap->bdev != isi->sis->bdev)
> +		goto err;
> +
> +	/* Must be aligned to a page boundary. */
> +	if ((iomap->offset & ~PAGE_MASK) || (iomap->addr & ~PAGE_MASK) ||
> +	    (iomap->length & ~PAGE_MASK))
> +		goto err;
> +
> +	/* Only real or unwritten extents. */
> +	if (iomap->type != IOMAP_MAPPED && iomap->type != IOMAP_UNWRITTEN)
> +		goto err;
> +
> +	/* No sparse files. */
> +	if (isi->expected_page_no != page_no)
> +		goto err;
> +
> +	/* No uncommitted metadata or shared blocks or inline data. */
> +	if (iomap->flags & (IOMAP_F_DIRTY | IOMAP_F_SHARED |
> +			    IOMAP_F_DATA_INLINE))
> +		goto err;
> +
> +	/*
> +	 * Calculate how much swap space we're adding; the first page contains
> +	 * the swap header and doesn't count.
> +	 */
> +	if (page_no == 0)
> +		first_ppage++;
> +	if (isi->lowest_ppage > first_ppage)
> +		isi->lowest_ppage = first_ppage;
> +	if (isi->highest_ppage < last_ppage)
> +		isi->highest_ppage = last_ppage;
> +
> +	/* Add extent, set up for the next call. */
> +	isi->nr_extents += add_swap_extent(isi->sis, page_no, nr_pages,
> +			first_ppage);
> +	isi->expected_page_no = page_no + nr_pages;
> +
> +	return count;
> +err:
> +	pr_err("swapon: swapfile has holes\n");
> +	return -EINVAL;
> +}
> +
> +int iomap_swapfile_activate(struct swap_info_struct *sis,
> +		struct file *swap_file, sector_t *pagespan,
> +		const struct iomap_ops *ops)
> +{
> +	struct iomap_swapfile_info isi = {
> +		.sis = sis,
> +		.lowest_ppage = (sector_t)-1ULL,
> +	};
> +	struct address_space *mapping = swap_file->f_mapping;
> +	struct inode *inode = mapping->host;
> +	loff_t pos = 0;
> +	loff_t len = i_size_read(inode);
> +	loff_t ret;
> +
> +	ret = filemap_write_and_wait(inode->i_mapping);
> +	if (ret)
> +		return ret;
> +
> +	while (len > 0) {
> +		ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
> +				ops, &isi, iomap_swapfile_activate_actor);
> +		if (ret <= 0)
> +			return ret;
> +
> +		pos += ret;
> +		len -= ret;
> +	}
> +
> +	*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
> +	sis->max = isi.expected_page_no;
> +	sis->pages = isi.expected_page_no - 1;
> +	sis->highest_bit = isi.expected_page_no - 1;
> +	return isi.nr_extents;
> +}
> +EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 0ab824f..80de476 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -1475,6 +1475,16 @@ xfs_vm_set_page_dirty(
>  	return newly_dirty;
>  }
>  
> +static int
> +xfs_iomap_swapfile_activate(
> +	struct swap_info_struct		*sis,
> +	struct file			*swap_file,
> +	sector_t			*span)
> +{
> +	sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
> +	return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
> +}
> +
>  const struct address_space_operations xfs_address_space_operations = {
>  	.readpage		= xfs_vm_readpage,
>  	.readpages		= xfs_vm_readpages,
> @@ -1488,6 +1498,7 @@ const struct address_space_operations xfs_address_space_operations = {
>  	.migratepage		= buffer_migrate_page,
>  	.is_partially_uptodate  = block_is_partially_uptodate,
>  	.error_remove_page	= generic_error_remove_page,
> +	.swap_activate		= xfs_iomap_swapfile_activate,
>  };
>  
>  const struct address_space_operations xfs_dax_aops = {
> @@ -1495,4 +1506,5 @@ const struct address_space_operations xfs_dax_aops = {
>  	.direct_IO		= noop_direct_IO,
>  	.set_page_dirty		= noop_set_page_dirty,
>  	.invalidatepage		= noop_invalidatepage,
> +	.swap_activate		= xfs_iomap_swapfile_activate,
>  };
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 19a07de..66d1c35 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -106,4 +106,11 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
>  ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
>  
> +struct file;
> +struct swap_info_struct;
> +
> +int iomap_swapfile_activate(struct swap_info_struct *sis,
> +		struct file *swap_file, sector_t *pagespan,
> +		const struct iomap_ops *ops);
> +
>  #endif /* LINUX_IOMAP_H */
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
---end quoted text---

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] iomap: add a swapfile activation function
  2018-04-24 17:35 ` Christoph Hellwig
@ 2018-04-25 23:46   ` Darrick J. Wong
  2018-04-26  5:57     ` Christoph Hellwig
  0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2018-04-25 23:46 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs, linux-fsdevel, linux-mm

On Tue, Apr 24, 2018 at 10:35:39AM -0700, Christoph Hellwig wrote:
> This looks much better than using bmap, but I still think that
> having the swap code build its own ineffecient extents maps is
> a horible idea..

The iomaps that are fed to the actor function are limited in length by
the underlying filesystem's extent records, so if we allocate a single
16GB physical extent, xfs creates two 2^20 block extent records and calls
the actor on both extents.  add_swap_extent merges those into a single
internal extent record behind the scenes to satisfy its own requirements
and reduce memory usage.  The current separation of duties means that
the mm code doesn't care about what the fs does internally and the fs
doesn't know or care about what the mm does with the information
afterwards.

Hey memory management developers, what do you all think of this?
Nobody I cornered at LSF pointed out any problems.

(I mean, we /could/ just treat the swapfile as an unbreakable rdma/dax
style lease, but ugh...)

--D

> 
> On Tue, Apr 17, 2018 at 07:50:23PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Add a new iomap_swapfile_activate function so that filesystems can
> > activate swap files without having to use the obsolete and slow bmap
> > function.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/iomap.c            |   99 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/xfs_aops.c     |   12 ++++++
> >  include/linux/iomap.h |    7 +++
> >  3 files changed, 118 insertions(+)
> > 
> > diff --git a/fs/iomap.c b/fs/iomap.c
> > index afd1635..ace921b 100644
> > --- a/fs/iomap.c
> > +++ b/fs/iomap.c
> > @@ -1089,3 +1089,102 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> >  	return ret;
> >  }
> >  EXPORT_SYMBOL_GPL(iomap_dio_rw);
> > +
> > +/* Swapfile activation */
> > +
> > +struct iomap_swapfile_info {
> > +	struct swap_info_struct *sis;
> > +	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
> > +	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
> > +	unsigned long expected_page_no;	/* next logical offset wanted (pages) */
> > +	int nr_extents;			/* extent count */
> > +};
> > +
> > +static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> > +		loff_t count, void *data, struct iomap *iomap)
> > +{
> > +	struct iomap_swapfile_info *isi = data;
> > +	unsigned long page_no = iomap->offset >> PAGE_SHIFT;
> > +	unsigned long nr_pages = iomap->length >> PAGE_SHIFT;
> > +	uint64_t first_ppage = iomap->addr >> PAGE_SHIFT;
> > +	uint64_t last_ppage = ((iomap->addr + iomap->length) >> PAGE_SHIFT) - 1;
> > +
> > +	/* Only one bdev per swap file. */
> > +	if (iomap->bdev != isi->sis->bdev)
> > +		goto err;
> > +
> > +	/* Must be aligned to a page boundary. */
> > +	if ((iomap->offset & ~PAGE_MASK) || (iomap->addr & ~PAGE_MASK) ||
> > +	    (iomap->length & ~PAGE_MASK))
> > +		goto err;
> > +
> > +	/* Only real or unwritten extents. */
> > +	if (iomap->type != IOMAP_MAPPED && iomap->type != IOMAP_UNWRITTEN)
> > +		goto err;
> > +
> > +	/* No sparse files. */
> > +	if (isi->expected_page_no != page_no)
> > +		goto err;
> > +
> > +	/* No uncommitted metadata or shared blocks or inline data. */
> > +	if (iomap->flags & (IOMAP_F_DIRTY | IOMAP_F_SHARED |
> > +			    IOMAP_F_DATA_INLINE))
> > +		goto err;
> > +
> > +	/*
> > +	 * Calculate how much swap space we're adding; the first page contains
> > +	 * the swap header and doesn't count.
> > +	 */
> > +	if (page_no == 0)
> > +		first_ppage++;
> > +	if (isi->lowest_ppage > first_ppage)
> > +		isi->lowest_ppage = first_ppage;
> > +	if (isi->highest_ppage < last_ppage)
> > +		isi->highest_ppage = last_ppage;
> > +
> > +	/* Add extent, set up for the next call. */
> > +	isi->nr_extents += add_swap_extent(isi->sis, page_no, nr_pages,
> > +			first_ppage);
> > +	isi->expected_page_no = page_no + nr_pages;
> > +
> > +	return count;
> > +err:
> > +	pr_err("swapon: swapfile has holes\n");
> > +	return -EINVAL;
> > +}
> > +
> > +int iomap_swapfile_activate(struct swap_info_struct *sis,
> > +		struct file *swap_file, sector_t *pagespan,
> > +		const struct iomap_ops *ops)
> > +{
> > +	struct iomap_swapfile_info isi = {
> > +		.sis = sis,
> > +		.lowest_ppage = (sector_t)-1ULL,
> > +	};
> > +	struct address_space *mapping = swap_file->f_mapping;
> > +	struct inode *inode = mapping->host;
> > +	loff_t pos = 0;
> > +	loff_t len = i_size_read(inode);
> > +	loff_t ret;
> > +
> > +	ret = filemap_write_and_wait(inode->i_mapping);
> > +	if (ret)
> > +		return ret;
> > +
> > +	while (len > 0) {
> > +		ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
> > +				ops, &isi, iomap_swapfile_activate_actor);
> > +		if (ret <= 0)
> > +			return ret;
> > +
> > +		pos += ret;
> > +		len -= ret;
> > +	}
> > +
> > +	*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
> > +	sis->max = isi.expected_page_no;
> > +	sis->pages = isi.expected_page_no - 1;
> > +	sis->highest_bit = isi.expected_page_no - 1;
> > +	return isi.nr_extents;
> > +}
> > +EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
> > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> > index 0ab824f..80de476 100644
> > --- a/fs/xfs/xfs_aops.c
> > +++ b/fs/xfs/xfs_aops.c
> > @@ -1475,6 +1475,16 @@ xfs_vm_set_page_dirty(
> >  	return newly_dirty;
> >  }
> >  
> > +static int
> > +xfs_iomap_swapfile_activate(
> > +	struct swap_info_struct		*sis,
> > +	struct file			*swap_file,
> > +	sector_t			*span)
> > +{
> > +	sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
> > +	return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
> > +}
> > +
> >  const struct address_space_operations xfs_address_space_operations = {
> >  	.readpage		= xfs_vm_readpage,
> >  	.readpages		= xfs_vm_readpages,
> > @@ -1488,6 +1498,7 @@ const struct address_space_operations xfs_address_space_operations = {
> >  	.migratepage		= buffer_migrate_page,
> >  	.is_partially_uptodate  = block_is_partially_uptodate,
> >  	.error_remove_page	= generic_error_remove_page,
> > +	.swap_activate		= xfs_iomap_swapfile_activate,
> >  };
> >  
> >  const struct address_space_operations xfs_dax_aops = {
> > @@ -1495,4 +1506,5 @@ const struct address_space_operations xfs_dax_aops = {
> >  	.direct_IO		= noop_direct_IO,
> >  	.set_page_dirty		= noop_set_page_dirty,
> >  	.invalidatepage		= noop_invalidatepage,
> > +	.swap_activate		= xfs_iomap_swapfile_activate,
> >  };
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index 19a07de..66d1c35 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -106,4 +106,11 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
> >  ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> >  		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
> >  
> > +struct file;
> > +struct swap_info_struct;
> > +
> > +int iomap_swapfile_activate(struct swap_info_struct *sis,
> > +		struct file *swap_file, sector_t *pagespan,
> > +		const struct iomap_ops *ops);
> > +
> >  #endif /* LINUX_IOMAP_H */
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> ---end quoted text---
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] iomap: add a swapfile activation function
  2018-04-25 23:46   ` Darrick J. Wong
@ 2018-04-26  5:57     ` Christoph Hellwig
  2018-04-26  7:27       ` Omar Sandoval
  0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2018-04-26  5:57 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, xfs, linux-fsdevel, linux-mm, Aleksei Besogonov

On Wed, Apr 25, 2018 at 04:46:22PM -0700, Darrick J. Wong wrote:
> (I mean, we /could/ just treat the swapfile as an unbreakable rdma/dax
> style lease, but ugh...)

That is what I think it should be long term, instead of a strange
parallel I/O path.

But in the mean time we have a real problem with supporting swap files,
so we should merge the approaches from you and Aleksei and get something
in ASAP.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] iomap: add a swapfile activation function
  2018-04-26  5:57     ` Christoph Hellwig
@ 2018-04-26  7:27       ` Omar Sandoval
  0 siblings, 0 replies; 7+ messages in thread
From: Omar Sandoval @ 2018-04-26  7:27 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Darrick J. Wong, xfs, linux-fsdevel, linux-mm, Aleksei Besogonov

On Wed, Apr 25, 2018 at 10:57:27PM -0700, Christoph Hellwig wrote:
> On Wed, Apr 25, 2018 at 04:46:22PM -0700, Darrick J. Wong wrote:
> > (I mean, we /could/ just treat the swapfile as an unbreakable rdma/dax
> > style lease, but ugh...)
> 
> That is what I think it should be long term, instead of a strange
> parallel I/O path.
> 
> But in the mean time we have a real problem with supporting swap files,
> so we should merge the approaches from you and Aleksei and get something
> in ASAP.

I'm planning to do something along these lines for Btrfs, as well (have
swap_activate add the swap extents itself), because the previous thing I
tried with going through ->read_iter() and ->write_iter() ran into too
many locking issues (i.e., GFP_NOFS can suddenly go through FS locks).

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] iomap: add a swapfile activation function
  2018-04-21 12:33 ` Jan Kara
@ 2018-05-02 20:29   ` Darrick J. Wong
  0 siblings, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2018-05-02 20:29 UTC (permalink / raw)
  To: Jan Kara; +Cc: xfs, linux-fsdevel, Christoph Hellwig, linux-mm

On Sat, Apr 21, 2018 at 02:33:01PM +0200, Jan Kara wrote:
> On Tue 17-04-18 19:50:23, Darrick J. Wong wrote:
> > +
> > +/* Swapfile activation */
> > +
> > +struct iomap_swapfile_info {
> > +	struct swap_info_struct *sis;
> > +	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
> > +	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
> > +	unsigned long expected_page_no;	/* next logical offset wanted (pages) */
> > +	int nr_extents;			/* extent count */
> > +};
> > +
> > +static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> > +		loff_t count, void *data, struct iomap *iomap)
> > +{
> > +	struct iomap_swapfile_info *isi = data;
> > +	unsigned long page_no = iomap->offset >> PAGE_SHIFT;
> > +	unsigned long nr_pages = iomap->length >> PAGE_SHIFT;
> > +	uint64_t first_ppage = iomap->addr >> PAGE_SHIFT;
> > +	uint64_t last_ppage = ((iomap->addr + iomap->length) >> PAGE_SHIFT) - 1;
> > +
> > +	/* Only one bdev per swap file. */
> > +	if (iomap->bdev != isi->sis->bdev)
> > +		goto err;
> > +
> > +	/* Must be aligned to a page boundary. */
> > +	if ((iomap->offset & ~PAGE_MASK) || (iomap->addr & ~PAGE_MASK) ||
> > +	    (iomap->length & ~PAGE_MASK))
> > +		goto err;
> 
> Reporting error in this case does not look equivalent to
> generic_swapfile_activate()? That function just skips blocks with
> insufficient alignment...

Yes, I've fixed this to emulate more closely the behavior of the old
bmap implementation, so now we collect physically contiguous iomaps
and trim the accumulated iomap to satisfy the page alignment
requirements.

> And I'm actually puzzled why alignment of physical block is needed but
> that's independent question.

I'm not sure why either. :)

> > +	/* Only real or unwritten extents. */
> > +	if (iomap->type != IOMAP_MAPPED && iomap->type != IOMAP_UNWRITTEN)
> > +		goto err;
> > +
> > +	/* No sparse files. */
> > +	if (isi->expected_page_no != page_no)
> > +		goto err;
> > +
> > +	/* No uncommitted metadata or shared blocks or inline data. */
> > +	if (iomap->flags & (IOMAP_F_DIRTY | IOMAP_F_SHARED |
> > +			    IOMAP_F_DATA_INLINE))
> > +		goto err;
> > +
> > +	/*
> > +	 * Calculate how much swap space we're adding; the first page contains
> > +	 * the swap header and doesn't count.
> > +	 */
> > +	if (page_no == 0)
> > +		first_ppage++;
> > +	if (isi->lowest_ppage > first_ppage)
> > +		isi->lowest_ppage = first_ppage;
> > +	if (isi->highest_ppage < last_ppage)
> > +		isi->highest_ppage = last_ppage;
> > +
> > +	/* Add extent, set up for the next call. */
> > +	isi->nr_extents += add_swap_extent(isi->sis, page_no, nr_pages,
> > +			first_ppage);
> 
> And here add_swap_extent() can return error.

Fixed, thanks.

--D

> 
> 								Honza
> -- 
> Jan Kara <jack@suse.com>
> SUSE Labs, CR

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-05-02 20:29 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-18  2:50 [PATCH] iomap: add a swapfile activation function Darrick J. Wong
2018-04-21 12:33 ` Jan Kara
2018-05-02 20:29   ` Darrick J. Wong
2018-04-24 17:35 ` Christoph Hellwig
2018-04-25 23:46   ` Darrick J. Wong
2018-04-26  5:57     ` Christoph Hellwig
2018-04-26  7:27       ` Omar Sandoval

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).