From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Xiubo Li <xiubli@redhat.com>, Jeff Layton <jlayton@kernel.org>,
Ilya Dryomov <idryomov@gmail.com>,
Sasha Levin <sashal@kernel.org>,
ceph-devel@vger.kernel.org
Subject: [PATCH AUTOSEL 5.14 13/25] ceph: remove the capsnaps when removing caps
Date: Mon, 13 Sep 2021 18:33:27 -0400 [thread overview]
Message-ID: <20210913223339.435347-13-sashal@kernel.org> (raw)
In-Reply-To: <20210913223339.435347-1-sashal@kernel.org>
From: Xiubo Li <xiubli@redhat.com>
[ Upstream commit a6d37ccdd240e80f26aaea0e62cda310e0e184d7 ]
capsnaps will take inode references via ihold when queueing to flush.
When force unmounting, the client will just close the sessions and
may never get a flush reply, causing a leak and inode ref leak.
Fix this by removing the capsnaps for an inode when removing the caps.
URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/ceph/caps.c | 68 +++++++++++++++++++++++++++++++++-----------
fs/ceph/mds_client.c | 31 +++++++++++++++++++-
fs/ceph/super.h | 6 ++++
3 files changed, 87 insertions(+), 18 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c2d654156783..73d8487a71d8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3114,7 +3114,16 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
break;
}
}
- BUG_ON(!found);
+
+ if (!found) {
+ /*
+ * The capsnap should already be removed when removing
+ * auth cap in the case of a forced unmount.
+ */
+ WARN_ON_ONCE(ci->i_auth_cap);
+ goto unlock;
+ }
+
capsnap->dirty_pages -= nr;
if (capsnap->dirty_pages == 0) {
complete_capsnap = true;
@@ -3136,6 +3145,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
complete_capsnap ? " (complete capsnap)" : "");
}
+unlock:
spin_unlock(&ci->i_ceph_lock);
if (last) {
@@ -3606,6 +3616,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
iput(inode);
}
+void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+ bool *wake_ci, bool *wake_mdsc)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+ bool ret;
+
+ lockdep_assert_held(&ci->i_ceph_lock);
+
+ dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
+
+ list_del_init(&capsnap->ci_item);
+ ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
+ if (wake_ci)
+ *wake_ci = ret;
+
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (list_empty(&ci->i_cap_flush_list))
+ list_del_init(&ci->i_flushing_item);
+
+ ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
+ if (wake_mdsc)
+ *wake_mdsc = ret;
+ spin_unlock(&mdsc->cap_dirty_lock);
+}
+
+void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+ bool *wake_ci, bool *wake_mdsc)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ lockdep_assert_held(&ci->i_ceph_lock);
+
+ WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
+ __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
+}
+
/*
* Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can
* throw away our cap_snap.
@@ -3643,23 +3690,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
capsnap, capsnap->follows);
}
}
- if (flushed) {
- WARN_ON(capsnap->dirty_pages || capsnap->writing);
- dout(" removing %p cap_snap %p follows %lld\n",
- inode, capsnap, follows);
- list_del(&capsnap->ci_item);
- wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
-
- spin_lock(&mdsc->cap_dirty_lock);
-
- if (list_empty(&ci->i_cap_flush_list))
- list_del_init(&ci->i_flushing_item);
-
- wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
- &capsnap->cap_flush);
- spin_unlock(&mdsc->cap_dirty_lock);
- }
+ if (flushed)
+ ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
spin_unlock(&ci->i_ceph_lock);
+
if (flushed) {
ceph_put_snap_context(capsnap->context);
ceph_put_cap_snap(capsnap);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0b69aec23e5c..bbd46b728eeb 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1583,14 +1583,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
return ret;
}
+static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_cap_snap *capsnap;
+ int capsnap_release = 0;
+
+ lockdep_assert_held(&ci->i_ceph_lock);
+
+ dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+ while (!list_empty(&ci->i_cap_snaps)) {
+ capsnap = list_first_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap, ci_item);
+ __ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+ ceph_put_snap_context(capsnap->context);
+ ceph_put_cap_snap(capsnap);
+ capsnap_release++;
+ }
+ wake_up_all(&ci->i_cap_wq);
+ wake_up_all(&mdsc->cap_flushing_wq);
+ return capsnap_release;
+}
+
static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
void *arg)
{
struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
LIST_HEAD(to_remove);
bool dirty_dropped = false;
bool invalidate = false;
+ int capsnap_release = 0;
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
@@ -1598,7 +1623,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
__ceph_remove_cap(cap, false);
if (!ci->i_auth_cap) {
struct ceph_cap_flush *cf;
- struct ceph_mds_client *mdsc = fsc->mdsc;
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (inode->i_data.nrpages > 0)
@@ -1662,6 +1686,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (!list_empty(&ci->i_cap_snaps))
+ capsnap_release = remove_capsnaps(mdsc, inode);
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -1678,6 +1705,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
ceph_queue_invalidate(inode);
if (dirty_dropped)
iput(inode);
+ while (capsnap_release--)
+ iput(inode);
return 0;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b1a363641beb..2200ed76b123 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1163,6 +1163,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
+extern void __ceph_remove_capsnap(struct inode *inode,
+ struct ceph_cap_snap *capsnap,
+ bool *wake_ci, bool *wake_mdsc);
+extern void ceph_remove_capsnap(struct inode *inode,
+ struct ceph_cap_snap *capsnap,
+ bool *wake_ci, bool *wake_mdsc);
extern void ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession);
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
--
2.30.2
next prev parent reply other threads:[~2021-09-13 22:36 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-13 22:33 [PATCH AUTOSEL 5.14 01/25] dmaengine: idxd: depends on !UML Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 02/25] dmaengine: sprd: Add missing MODULE_DEVICE_TABLE Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 03/25] cxl: Move cxl_core to new directory Sasha Levin
2021-09-14 8:56 ` Jonathan Cameron
2021-09-14 8:57 ` Jonathan Cameron
2021-09-14 15:05 ` Ben Widawsky
2021-09-14 15:40 ` Dan Williams
2021-09-14 17:00 ` Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 04/25] cxl/pci: Introduce cdevm_file_operations Sasha Levin
2021-09-14 15:42 ` Dan Williams
2021-09-14 17:01 ` Sasha Levin
2021-09-14 17:46 ` Dan Williams
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 05/25] gfs2: Switch to may_setattr in gfs2_setattr Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 06/25] dmaengine: ioat: depends on !UML Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 07/25] dmaengine: xilinx_dma: Set DMA mask for coherent APIs Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 08/25] s390: add kmemleak annotation in stack_alloc() Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 09/25] ASoC: audio-graph: respawn Platform Support Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 10/25] ACPI: PM: s2idle: Run both AMD and Microsoft methods if both are supported Sasha Levin
2021-09-21 14:57 ` Limonciello, Mario
2021-09-24 11:40 ` Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 11/25] ceph: fix memory leak on decode error in ceph_handle_caps Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 12/25] ceph: request Fw caps before updating the mtime in ceph_write_iter Sasha Levin
2021-09-13 22:33 ` Sasha Levin [this message]
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 14/25] ceph: lockdep annotations for try_nonblocking_invalidate Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 15/25] s390/unwind: use current_frame_address() to unwind current task Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 16/25] btrfs: update the bdev time directly when closing Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 17/25] btrfs: delay blkdev_put until after the device remove Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 18/25] btrfs: fix lockdep warning while mounting sprout fs Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 19/25] connector: send event on write to /proc/[pid]/comm Sasha Levin
2021-09-15 13:45 ` Eric W. Biederman
2021-09-16 0:36 ` Sasha Levin
2021-09-16 15:49 ` Eric W. Biederman
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 20/25] nilfs2: fix memory leak in nilfs_sysfs_create_device_group Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 21/25] nilfs2: fix NULL pointer in nilfs_##name##_attr_release Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 22/25] nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 23/25] nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 24/25] nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group Sasha Levin
2021-09-13 22:33 ` [PATCH AUTOSEL 5.14 25/25] nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210913223339.435347-13-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=ceph-devel@vger.kernel.org \
--cc=idryomov@gmail.com \
--cc=jlayton@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=xiubli@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).