All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing
@ 2021-12-17 15:29 David Howells
  2021-12-17 15:29 ` [PATCH v2 2/2] ceph: Remove some other inline-setting bits David Howells
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: David Howells @ 2021-12-17 15:29 UTC (permalink / raw)
  To: jlayton; +Cc: ceph-devel, idryomov, dhowells, linux-fsdevel

If a ceph file is made up of inline data, uninline that in the ceph_open()
rather than in ceph_page_mkwrite(), ceph_write_iter(), ceph_fallocate() or
ceph_write_begin().

This makes it easier to convert to using the netfs library for VM write
hooks.

Changes
=======
ver #2:
 - Removed the uninline-handling code from ceph_write_begin() also.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: ceph-devel@vger.kernel.org
---

 fs/ceph/addr.c  |   97 +++++++++++++------------------------------------------
 fs/ceph/file.c  |   28 +++++++++-------
 fs/ceph/super.h |    2 +
 3 files changed, 40 insertions(+), 87 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e836f8f1d4f8..6e1b15cc87cf 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1278,45 +1278,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 			    struct page **pagep, void **fsdata)
 {
 	struct inode *inode = file_inode(file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct folio *folio = NULL;
-	pgoff_t index = pos >> PAGE_SHIFT;
 	int r;
 
-	/*
-	 * Uninlining should have already been done and everything updated, EXCEPT
-	 * for inline_version sent to the MDS.
-	 */
-	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
-		if (aop_flags & AOP_FLAG_NOFS)
-			fgp_flags |= FGP_NOFS;
-		folio = __filemap_get_folio(mapping, index, fgp_flags,
-					    mapping_gfp_mask(mapping));
-		if (!folio)
-			return -ENOMEM;
-
-		/*
-		 * The inline_version on a new inode is set to 1. If that's the
-		 * case, then the folio is brand new and isn't yet Uptodate.
-		 */
-		r = 0;
-		if (index == 0 && ci->i_inline_version != 1) {
-			if (!folio_test_uptodate(folio)) {
-				WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
-					  ci->i_inline_version);
-				r = -EINVAL;
-			}
-			goto out;
-		}
-		zero_user_segment(&folio->page, 0, folio_size(folio));
-		folio_mark_uptodate(folio);
-		goto out;
-	}
-
 	r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
 			      &ceph_netfs_read_ops, NULL);
-out:
 	if (r == 0)
 		folio_wait_fscache(folio);
 	if (r < 0) {
@@ -1512,19 +1478,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
 	sb_start_pagefault(inode->i_sb);
 	ceph_block_sigs(&oldset);
 
-	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		struct page *locked_page = NULL;
-		if (off == 0) {
-			lock_page(page);
-			locked_page = page;
-		}
-		err = ceph_uninline_data(vma->vm_file, locked_page);
-		if (locked_page)
-			unlock_page(locked_page);
-		if (err < 0)
-			goto out_free;
-	}
-
 	if (off + thp_size(page) <= size)
 		len = thp_size(page);
 	else
@@ -1649,13 +1602,14 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 	}
 }
 
-int ceph_uninline_data(struct file *filp, struct page *locked_page)
+int ceph_uninline_data(struct file *file)
 {
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_osd_request *req;
-	struct page *page = NULL;
+	struct folio *folio = NULL;
+	struct page *pages[1];
 	u64 len, inline_version;
 	int err = 0;
 	bool from_pagecache = false;
@@ -1671,34 +1625,30 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 	    inline_version == CEPH_INLINE_NONE)
 		goto out;
 
-	if (locked_page) {
-		page = locked_page;
-		WARN_ON(!PageUptodate(page));
-	} else if (ceph_caps_issued(ci) &
-		   (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
-		page = find_get_page(inode->i_mapping, 0);
-		if (page) {
-			if (PageUptodate(page)) {
+	if (ceph_caps_issued(ci) & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
+		folio = filemap_get_folio(inode->i_mapping, 0);
+		if (folio) {
+			if (folio_test_uptodate(folio)) {
 				from_pagecache = true;
-				lock_page(page);
+				folio_lock(folio);
 			} else {
-				put_page(page);
-				page = NULL;
+				folio_put(folio);
+				folio = NULL;
 			}
 		}
 	}
 
-	if (page) {
+	if (folio) {
 		len = i_size_read(inode);
-		if (len > PAGE_SIZE)
-			len = PAGE_SIZE;
+		if (len >  folio_size(folio))
+			len = folio_size(folio);
 	} else {
-		page = __page_cache_alloc(GFP_NOFS);
-		if (!page) {
+		folio = filemap_alloc_folio(GFP_NOFS, 0);
+		if (!folio) {
 			err = -ENOMEM;
 			goto out;
 		}
-		err = __ceph_do_getattr(inode, page,
+		err = __ceph_do_getattr(inode, folio_page(folio, 0),
 					CEPH_STAT_CAP_INLINE_DATA, true);
 		if (err < 0) {
 			/* no inline data */
@@ -1736,7 +1686,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 		goto out;
 	}
 
-	osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
+	pages[0] = folio_page(folio, 0);
+	osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false);
 
 	{
 		__le64 xattr_buf = cpu_to_le64(inline_version);
@@ -1773,12 +1724,10 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 	if (err == -ECANCELED)
 		err = 0;
 out:
-	if (page && page != locked_page) {
-		if (from_pagecache) {
-			unlock_page(page);
-			put_page(page);
-		} else
-			__free_pages(page, 0);
+	if (folio) {
+		if (from_pagecache)
+			folio_unlock(folio);
+		folio_put(folio);
 	}
 
 	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bf1017682d09..d16ba8720783 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -205,6 +205,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi;
+	int ret;
 
 	dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
 			inode->i_mode, isdir ? "dir" : "regular");
@@ -235,7 +236,22 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 	INIT_LIST_HEAD(&fi->rw_contexts);
 	fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
 
+	if ((file->f_mode & FMODE_WRITE) &&
+	    ci->i_inline_version != CEPH_INLINE_NONE) {
+		ret = ceph_uninline_data(file);
+		if (ret < 0)
+			goto error;
+	}
+
 	return 0;
+
+error:
+	ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE);
+	ceph_put_fmode(ci, fi->fmode, 1);
+	kmem_cache_free(ceph_file_cachep, fi);
+	/* wake up anyone waiting for caps on this inode */
+	wake_up_all(&ci->i_cap_wq);
+	return ret;
 }
 
 /*
@@ -1751,12 +1767,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (err)
 		goto out;
 
-	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		err = ceph_uninline_data(file, NULL);
-		if (err < 0)
-			goto out;
-	}
-
 	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
 	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2082,12 +2092,6 @@ static long ceph_fallocate(struct file *file, int mode,
 		goto unlock;
 	}
 
-	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		ret = ceph_uninline_data(file, NULL);
-		if (ret < 0)
-			goto unlock;
-	}
-
 	size = i_size_read(inode);
 
 	/* Are we punching a hole beyond EOF? */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d0142cc5c41b..f1cec05e4eb8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1207,7 +1207,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
 /* addr.c */
 extern const struct address_space_operations ceph_aops;
 extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
-extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
+extern int ceph_uninline_data(struct file *file);
 extern int ceph_pool_perm_check(struct inode *inode, int need);
 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
 int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/2] ceph: Remove some other inline-setting bits
  2021-12-17 15:29 [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing David Howells
@ 2021-12-17 15:29 ` David Howells
  2021-12-17 16:07   ` Jeff Layton
  2021-12-17 16:05 ` [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing Jeff Layton
  2021-12-17 16:20 ` Matthew Wilcox
  2 siblings, 1 reply; 7+ messages in thread
From: David Howells @ 2021-12-17 15:29 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, dhowells, linux-fsdevel

Remove some other bits where a ceph file can't be inline because we
uninlined it when we opened it for writing.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/ceph/addr.c |    4 +---
 fs/ceph/file.c |    4 ----
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6e1b15cc87cf..553e2b5653f3 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1534,11 +1534,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
 		ceph_put_snap_context(snapc);
 	} while (err == 0);
 
-	if (ret == VM_FAULT_LOCKED ||
-	    ci->i_inline_version != CEPH_INLINE_NONE) {
+	if (ret == VM_FAULT_LOCKED) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
-		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
 					       &prealloc_cf);
 		spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d16ba8720783..4a0aeed7f660 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1031,7 +1031,6 @@ static void ceph_aio_complete(struct inode *inode,
 		}
 
 		spin_lock(&ci->i_ceph_lock);
-		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
 					       &aio_req->prealloc_cf);
 		spin_unlock(&ci->i_ceph_lock);
@@ -1838,7 +1837,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		int dirty;
 
 		spin_lock(&ci->i_ceph_lock);
-		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
 					       &prealloc_cf);
 		spin_unlock(&ci->i_ceph_lock);
@@ -2116,7 +2114,6 @@ static long ceph_fallocate(struct file *file, int mode,
 
 	if (!ret) {
 		spin_lock(&ci->i_ceph_lock);
-		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
 					       &prealloc_cf);
 		spin_unlock(&ci->i_ceph_lock);
@@ -2509,7 +2506,6 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	}
 	/* Mark Fw dirty */
 	spin_lock(&dst_ci->i_ceph_lock);
-	dst_ci->i_inline_version = CEPH_INLINE_NONE;
 	dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf);
 	spin_unlock(&dst_ci->i_ceph_lock);
 	if (dirty)



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing
  2021-12-17 15:29 [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing David Howells
  2021-12-17 15:29 ` [PATCH v2 2/2] ceph: Remove some other inline-setting bits David Howells
@ 2021-12-17 16:05 ` Jeff Layton
  2021-12-17 16:20 ` Matthew Wilcox
  2 siblings, 0 replies; 7+ messages in thread
From: Jeff Layton @ 2021-12-17 16:05 UTC (permalink / raw)
  To: David Howells; +Cc: ceph-devel, idryomov, linux-fsdevel

On Fri, 2021-12-17 at 15:29 +0000, David Howells wrote:
> If a ceph file is made up of inline data, uninline that in the ceph_open()
> rather than in ceph_page_mkwrite(), ceph_write_iter(), ceph_fallocate() or
> ceph_write_begin().
> 
> This makes it easier to convert to using the netfs library for VM write
> hooks.
> 
> Changes
> =======
> ver #2:
>  - Removed the uninline-handling code from ceph_write_begin() also.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: ceph-devel@vger.kernel.org
> ---
> 
>  fs/ceph/addr.c  |   97 +++++++++++++------------------------------------------
>  fs/ceph/file.c  |   28 +++++++++-------
>  fs/ceph/super.h |    2 +
>  3 files changed, 40 insertions(+), 87 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index e836f8f1d4f8..6e1b15cc87cf 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1278,45 +1278,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
>  			    struct page **pagep, void **fsdata)
>  {
>  	struct inode *inode = file_inode(file);
> -	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct folio *folio = NULL;
> -	pgoff_t index = pos >> PAGE_SHIFT;
>  	int r;
>  
> -	/*
> -	 * Uninlining should have already been done and everything updated, EXCEPT
> -	 * for inline_version sent to the MDS.
> -	 */
> -	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> -		unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
> -		if (aop_flags & AOP_FLAG_NOFS)
> -			fgp_flags |= FGP_NOFS;
> -		folio = __filemap_get_folio(mapping, index, fgp_flags,
> -					    mapping_gfp_mask(mapping));
> -		if (!folio)
> -			return -ENOMEM;
> -
> -		/*
> -		 * The inline_version on a new inode is set to 1. If that's the
> -		 * case, then the folio is brand new and isn't yet Uptodate.
> -		 */
> -		r = 0;
> -		if (index == 0 && ci->i_inline_version != 1) {
> -			if (!folio_test_uptodate(folio)) {
> -				WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
> -					  ci->i_inline_version);
> -				r = -EINVAL;
> -			}
> -			goto out;
> -		}
> -		zero_user_segment(&folio->page, 0, folio_size(folio));
> -		folio_mark_uptodate(folio);
> -		goto out;
> -	}
> -
>  	r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
>  			      &ceph_netfs_read_ops, NULL);
> -out:
>  	if (r == 0)
>  		folio_wait_fscache(folio);
>  	if (r < 0) {
> @@ -1512,19 +1478,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
>  	sb_start_pagefault(inode->i_sb);
>  	ceph_block_sigs(&oldset);
>  
> -	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> -		struct page *locked_page = NULL;
> -		if (off == 0) {
> -			lock_page(page);
> -			locked_page = page;
> -		}
> -		err = ceph_uninline_data(vma->vm_file, locked_page);
> -		if (locked_page)
> -			unlock_page(locked_page);
> -		if (err < 0)
> -			goto out_free;
> -	}
> -
>  	if (off + thp_size(page) <= size)
>  		len = thp_size(page);
>  	else
> @@ -1649,13 +1602,14 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
>  	}
>  }
>  
> -int ceph_uninline_data(struct file *filp, struct page *locked_page)
> +int ceph_uninline_data(struct file *file)
>  {
> -	struct inode *inode = file_inode(filp);
> +	struct inode *inode = file_inode(file);
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
>  	struct ceph_osd_request *req;
> -	struct page *page = NULL;
> +	struct folio *folio = NULL;
> +	struct page *pages[1];
>  	u64 len, inline_version;
>  	int err = 0;
>  	bool from_pagecache = false;
> @@ -1671,34 +1625,30 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
>  	    inline_version == CEPH_INLINE_NONE)
>  		goto out;
>  
> -	if (locked_page) {
> -		page = locked_page;
> -		WARN_ON(!PageUptodate(page));
> -	} else if (ceph_caps_issued(ci) &
> -		   (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
> -		page = find_get_page(inode->i_mapping, 0);
> -		if (page) {
> -			if (PageUptodate(page)) {
> +	if (ceph_caps_issued(ci) & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
> +		folio = filemap_get_folio(inode->i_mapping, 0);
> +		if (folio) {
> +			if (folio_test_uptodate(folio)) {
>  				from_pagecache = true;
> -				lock_page(page);
> +				folio_lock(folio);
>  			} else {
> -				put_page(page);
> -				page = NULL;
> +				folio_put(folio);
> +				folio = NULL;
>  			}
>  		}
>  	}
>  
> -	if (page) {
> +	if (folio) {
>  		len = i_size_read(inode);
> -		if (len > PAGE_SIZE)
> -			len = PAGE_SIZE;
> +		if (len >  folio_size(folio))
> +			len = folio_size(folio);
>  	} else {
> -		page = __page_cache_alloc(GFP_NOFS);
> -		if (!page) {
> +		folio = filemap_alloc_folio(GFP_NOFS, 0);
> +		if (!folio) {
>  			err = -ENOMEM;
>  			goto out;
>  		}
> -		err = __ceph_do_getattr(inode, page,
> +		err = __ceph_do_getattr(inode, folio_page(folio, 0),
>  					CEPH_STAT_CAP_INLINE_DATA, true);
>  		if (err < 0) {
>  			/* no inline data */
> @@ -1736,7 +1686,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
>  		goto out;
>  	}
>  
> -	osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
> +	pages[0] = folio_page(folio, 0);
> +	osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false);
>  
>  	{
>  		__le64 xattr_buf = cpu_to_le64(inline_version);
> @@ -1773,12 +1724,10 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
>  	if (err == -ECANCELED)
>  		err = 0;
>  out:
> -	if (page && page != locked_page) {
> -		if (from_pagecache) {
> -			unlock_page(page);
> -			put_page(page);
> -		} else
> -			__free_pages(page, 0);
> +	if (folio) {
> +		if (from_pagecache)
> +			folio_unlock(folio);
> +		folio_put(folio);
>  	}
>  
>  	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index bf1017682d09..d16ba8720783 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -205,6 +205,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_file_info *fi;
> +	int ret;
>  
>  	dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
>  			inode->i_mode, isdir ? "dir" : "regular");
> @@ -235,7 +236,22 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>  	INIT_LIST_HEAD(&fi->rw_contexts);
>  	fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
>  
> +	if ((file->f_mode & FMODE_WRITE) &&
> +	    ci->i_inline_version != CEPH_INLINE_NONE) {
> +		ret = ceph_uninline_data(file);
> +		if (ret < 0)
> +			goto error;
> +	}
> +
>  	return 0;
> +
> +error:
> +	ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE);
> +	ceph_put_fmode(ci, fi->fmode, 1);
> +	kmem_cache_free(ceph_file_cachep, fi);
> +	/* wake up anyone waiting for caps on this inode */
> +	wake_up_all(&ci->i_cap_wq);
> +	return ret;
>  }
>  
>  /*
> @@ -1751,12 +1767,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  	if (err)
>  		goto out;
>  
> -	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> -		err = ceph_uninline_data(file, NULL);
> -		if (err < 0)
> -			goto out;
> -	}
> -
>  	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
>  	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
>  	if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -2082,12 +2092,6 @@ static long ceph_fallocate(struct file *file, int mode,
>  		goto unlock;
>  	}
>  
> -	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> -		ret = ceph_uninline_data(file, NULL);
> -		if (ret < 0)
> -			goto unlock;
> -	}
> -
>  	size = i_size_read(inode);
>  
>  	/* Are we punching a hole beyond EOF? */
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index d0142cc5c41b..f1cec05e4eb8 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1207,7 +1207,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
>  /* addr.c */
>  extern const struct address_space_operations ceph_aops;
>  extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
> -extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
> +extern int ceph_uninline_data(struct file *file);
>  extern int ceph_pool_perm_check(struct inode *inode, int need);
>  extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
>  int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);
> 
> 

It's a substantial reduction in code and gets the uninlining out of the
write codepaths. I like it.

Looks like this relies on the changes in your fscache-rewrite branch, so
we may need to wait until after the upcoming merge window to take this
in, or just plan to fold your fscache-rewrite branch into the ceph
testing branch for now.

I'll at least do some local testing in the interim, but it's not trivial
to set up inlining these days, so we may not be able to test what this
affects easily.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 2/2] ceph: Remove some other inline-setting bits
  2021-12-17 15:29 ` [PATCH v2 2/2] ceph: Remove some other inline-setting bits David Howells
@ 2021-12-17 16:07   ` Jeff Layton
  0 siblings, 0 replies; 7+ messages in thread
From: Jeff Layton @ 2021-12-17 16:07 UTC (permalink / raw)
  To: David Howells; +Cc: idryomov, linux-fsdevel

On Fri, 2021-12-17 at 15:29 +0000, David Howells wrote:
> Remove some other bits where a ceph file can't be inline because we
> uninlined it when we opened it for writing.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  fs/ceph/addr.c |    4 +---
>  fs/ceph/file.c |    4 ----
>  2 files changed, 1 insertion(+), 7 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 6e1b15cc87cf..553e2b5653f3 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1534,11 +1534,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
>  		ceph_put_snap_context(snapc);
>  	} while (err == 0);
>  
> -	if (ret == VM_FAULT_LOCKED ||
> -	    ci->i_inline_version != CEPH_INLINE_NONE) {
> +	if (ret == VM_FAULT_LOCKED) {
>  		int dirty;
>  		spin_lock(&ci->i_ceph_lock);
> -		ci->i_inline_version = CEPH_INLINE_NONE;
>  		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
>  					       &prealloc_cf);
>  		spin_unlock(&ci->i_ceph_lock);
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index d16ba8720783..4a0aeed7f660 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1031,7 +1031,6 @@ static void ceph_aio_complete(struct inode *inode,
>  		}
>  
>  		spin_lock(&ci->i_ceph_lock);
> -		ci->i_inline_version = CEPH_INLINE_NONE;
>  		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
>  					       &aio_req->prealloc_cf);
>  		spin_unlock(&ci->i_ceph_lock);
> @@ -1838,7 +1837,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  		int dirty;
>  
>  		spin_lock(&ci->i_ceph_lock);
> -		ci->i_inline_version = CEPH_INLINE_NONE;
>  		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
>  					       &prealloc_cf);
>  		spin_unlock(&ci->i_ceph_lock);
> @@ -2116,7 +2114,6 @@ static long ceph_fallocate(struct file *file, int mode,
>  
>  	if (!ret) {
>  		spin_lock(&ci->i_ceph_lock);
> -		ci->i_inline_version = CEPH_INLINE_NONE;
>  		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
>  					       &prealloc_cf);
>  		spin_unlock(&ci->i_ceph_lock);
> @@ -2509,7 +2506,6 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
>  	}
>  	/* Mark Fw dirty */
>  	spin_lock(&dst_ci->i_ceph_lock);
> -	dst_ci->i_inline_version = CEPH_INLINE_NONE;
>  	dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf);
>  	spin_unlock(&dst_ci->i_ceph_lock);
>  	if (dirty)
> 
> 

I'll probably just fold this one into the first patch if that's ok.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing
  2021-12-17 15:29 [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing David Howells
  2021-12-17 15:29 ` [PATCH v2 2/2] ceph: Remove some other inline-setting bits David Howells
  2021-12-17 16:05 ` [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing Jeff Layton
@ 2021-12-17 16:20 ` Matthew Wilcox
  2021-12-17 17:18   ` Jeff Layton
  2 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2021-12-17 16:20 UTC (permalink / raw)
  To: David Howells; +Cc: jlayton, ceph-devel, idryomov, linux-fsdevel

On Fri, Dec 17, 2021 at 03:29:45PM +0000, David Howells wrote:
> If a ceph file is made up of inline data, uninline that in the ceph_open()
> rather than in ceph_page_mkwrite(), ceph_write_iter(), ceph_fallocate() or
> ceph_write_begin().

I don't think this is the right approach.  Just opening a file with
O_RDWR shouldn't take it out of inline mode; an actual write (or fault)
should be required to uninline it.

> This makes it easier to convert to using the netfs library for VM write
> hooks.

I don't understand.  You're talking about the fault path?  Surely
the filesystem gets called with the vm_fault parameter only, then
calls into the netfs code, passing vmf and the operations struct?
And ceph could uninline there.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing
  2021-12-17 16:20 ` Matthew Wilcox
@ 2021-12-17 17:18   ` Jeff Layton
  2021-12-17 17:29     ` Matthew Wilcox
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Layton @ 2021-12-17 17:18 UTC (permalink / raw)
  To: Matthew Wilcox, David Howells; +Cc: ceph-devel, idryomov, linux-fsdevel

On Fri, 2021-12-17 at 16:20 +0000, Matthew Wilcox wrote:
> On Fri, Dec 17, 2021 at 03:29:45PM +0000, David Howells wrote:
> > If a ceph file is made up of inline data, uninline that in the ceph_open()
> > rather than in ceph_page_mkwrite(), ceph_write_iter(), ceph_fallocate() or
> > ceph_write_begin().
> 
> I don't think this is the right approach.  Just opening a file with
> O_RDWR shouldn't take it out of inline mode; an actual write (or fault)
> should be required to uninline it.
> 

This feature is being deprecated in ceph altogether, so more
aggressively uninlining of files is fine. The kernel cephfs client never
supported writes to it anyway so this feature was really only used by a
few brave souls.

We're hoping to have it formally removed by the time the Ceph Quincy
release ships (~April-May timeframe). Unfortunately, we need to keep
support for it around for a bit longer since some still-supported ceph
releases don't have this deprecated.

> > This makes it easier to convert to using the netfs library for VM write
> > hooks.
> 
> I don't understand.  You're talking about the fault path?  Surely
> the filesystem gets called with the vm_fault parameter only, then
> calls into the netfs code, passing vmf and the operations struct?
> And ceph could uninline there.
> 

Taking the uninlining out of the write codepaths is a _good_ thing, IMO.
If we were planning to keep this feature around, then I might disagree
with doing this, but it fits with the current plans for inline data just
fine for now.

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing
  2021-12-17 17:18   ` Jeff Layton
@ 2021-12-17 17:29     ` Matthew Wilcox
  0 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2021-12-17 17:29 UTC (permalink / raw)
  To: Jeff Layton; +Cc: David Howells, ceph-devel, idryomov, linux-fsdevel

On Fri, Dec 17, 2021 at 12:18:04PM -0500, Jeff Layton wrote:
> This feature is being deprecated in ceph altogether, so more
> aggressively uninlining of files is fine. The kernel cephfs client never
> supported writes to it anyway so this feature was really only used by a
> few brave souls.
> 
> We're hoping to have it formally removed by the time the Ceph Quincy
> release ships (~April-May timeframe). Unfortunately, we need to keep
> support for it around for a bit longer since some still-supported ceph
> releases don't have this deprecated.

OK, I shan't bother pointing out the awful bugs and races in the current
uninlining code then ...

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-12-17 17:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-17 15:29 [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing David Howells
2021-12-17 15:29 ` [PATCH v2 2/2] ceph: Remove some other inline-setting bits David Howells
2021-12-17 16:07   ` Jeff Layton
2021-12-17 16:05 ` [PATCH v2 1/2] ceph: Uninline the data on a file opened for writing Jeff Layton
2021-12-17 16:20 ` Matthew Wilcox
2021-12-17 17:18   ` Jeff Layton
2021-12-17 17:29     ` Matthew Wilcox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.