All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] ceph: file lock related fixes
@ 2017-09-12  2:53 Yan, Zheng
  2017-09-12  2:53 ` [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks Yan, Zheng
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12  2:53 UTC (permalink / raw)
  To: ceph-devel, jlayton, idryomov; +Cc: Yan, Zheng

Yan, Zheng (5):
  ceph: keep auth cap when inode has flocks or posix locks
  ceph: make lock_to_ceph_filelock() 'static'
  ceph: optimize flock encoding during reconnect
  ceph: handle 'session get evicted while there are file locks'
  ceph: avoid null pointer derefernece in case of utsname() return NULL

 fs/ceph/inode.c      |   1 +
 fs/ceph/locks.c      | 177 ++++++++++++++++++++++++++++++++++++---------------
 fs/ceph/mds_client.c |  61 ++++++++++++------
 fs/ceph/mds_client.h |   3 +
 fs/ceph/super.h      |   4 +-
 5 files changed, 174 insertions(+), 72 deletions(-)

-- 
2.13.5


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks
  2017-09-12  2:53 [PATCH 0/5] ceph: file lock related fixes Yan, Zheng
@ 2017-09-12  2:53 ` Yan, Zheng
  2017-09-12 10:56   ` Jeff Layton
  2017-09-12 13:21   ` Jeff Layton
  2017-09-12  2:53 ` [PATCH 2/5] ceph: make lock_to_ceph_filelock() 'static' Yan, Zheng
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12  2:53 UTC (permalink / raw)
  To: ceph-devel, jlayton, idryomov; +Cc: Yan, Zheng

file locks are tracked by inode's auth mds. releasing auth caps
is equivalent to releasing all file locks.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/inode.c      |  1 +
 fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
 fs/ceph/mds_client.c |  2 ++
 fs/ceph/super.h      |  1 +
 4 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 373dab5173ca..c70b26d2bf8a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -492,6 +492,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_wb_ref = 0;
 	ci->i_wrbuffer_ref = 0;
 	ci->i_wrbuffer_ref_head = 0;
+	atomic_set(&ci->i_filelock_ref, 0);
 	ci->i_shared_gen = 0;
 	ci->i_rdcache_gen = 0;
 	ci->i_rdcache_revoking = 0;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 64ae74472046..69f731e75302 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -29,19 +29,46 @@ void __init ceph_flock_init(void)
 	get_random_bytes(&lock_secret, sizeof(lock_secret));
 }
 
+static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+{
+	struct inode *inode = file_inode(src->fl_file);
+	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+}
+
+static void ceph_fl_release_lock(struct file_lock *fl)
+{
+	struct inode *inode = file_inode(fl->fl_file);
+	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
+}
+
+static const struct file_lock_operations ceph_fl_lock_ops = {
+	.fl_copy_lock = ceph_fl_copy_lock,
+	.fl_release_private = ceph_fl_release_lock,
+};
+
 /**
  * Implement fcntl and flock locking functions.
  */
-static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
+static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 			     int cmd, u8 wait, struct file_lock *fl)
 {
-	struct inode *inode = file_inode(file);
 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 	u64 length = 0;
 	u64 owner;
 
+	if (operation == CEPH_MDS_OP_SETFILELOCK) {
+		/*
+		 * increasing i_filelock_ref closes race window between
+		 * handling request reply and adding file_lock struct to
+		 * inode. Otherwise, i_auth_cap may get trimmed in the
+		 * window. Caller function will decrease the counter.
+		 */
+		fl->fl_ops = &ceph_fl_lock_ops;
+		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	}
+
 	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
 		wait = 0;
 
@@ -179,10 +206,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
  */
 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u8 lock_cmd;
+	struct inode *inode = file_inode(file);
 	int err;
-	u8 wait = 0;
 	u16 op = CEPH_MDS_OP_SETFILELOCK;
+	u8 lock_cmd;
+	u8 wait = 0;
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
@@ -198,6 +226,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else if (IS_SETLKW(cmd))
 		wait = 1;
 
+	if (op == CEPH_MDS_OP_SETFILELOCK) {
+		/*
+		 * increasing i_filelock_ref closes race window between
+		 * handling request reply and adding file_lock struct to
+		 * inode. Otherwise, i_auth_cap may get trimmed in the
+		 * window. Caller function will decrease the counter.
+		 */
+		fl->fl_ops = &ceph_fl_lock_ops;
+		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	}
+
 	if (F_RDLCK == fl->fl_type)
 		lock_cmd = CEPH_LOCK_SHARED;
 	else if (F_WRLCK == fl->fl_type)
@@ -205,7 +244,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else
 		lock_cmd = CEPH_LOCK_UNLOCK;
 
-	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
+	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
 		if (op != CEPH_MDS_OP_GETFILELOCK) {
 			dout("mds locked, locking locally");
@@ -214,7 +253,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				/* undo! This should only happen if
 				 * the kernel detects local
 				 * deadlock. */
-				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
 						  CEPH_LOCK_UNLOCK, 0, fl);
 				dout("got %d on posix_lock_file, undid lock",
 				     err);
@@ -226,8 +265,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 
 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u8 lock_cmd;
+	struct inode *inode = file_inode(file);
 	int err;
+	u8 lock_cmd;
 	u8 wait = 0;
 
 	if (!(fl->fl_flags & FL_FLOCK))
@@ -238,6 +278,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	dout("ceph_flock, fl_file: %p", fl->fl_file);
 
+	/* see comment in ceph_lock */
+	fl->fl_ops = &ceph_fl_lock_ops;
+	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+
 	if (IS_SETLKW(cmd))
 		wait = 1;
 
@@ -249,13 +293,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 		lock_cmd = CEPH_LOCK_UNLOCK;
 
 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
-				file, lock_cmd, wait, fl);
+				inode, lock_cmd, wait, fl);
 	if (!err) {
 		err = locks_lock_file_wait(file, fl);
 		if (err) {
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
-					  file, CEPH_LOCK_UNLOCK, 0, fl);
+					  inode, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on locks_lock_file_wait, undid lock", err);
 		}
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9dd6b836ac9e..6146af4ed03c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1461,6 +1461,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 			goto out;
 		if ((used | wanted) & CEPH_CAP_ANY_WR)
 			goto out;
+		if (atomic_read(&ci->i_filelock_ref) > 0)
+			goto out;
 	}
 	/* The inode has cached pages, but it's no longer used.
 	 * we can safely drop it */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 279a2f401cf5..9e0de8264257 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -351,6 +351,7 @@ struct ceph_inode_info {
 	int i_pin_ref;
 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
+	atomic_t i_filelock_ref;
 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
-- 
2.13.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/5] ceph: make lock_to_ceph_filelock() 'static'
  2017-09-12  2:53 [PATCH 0/5] ceph: file lock related fixes Yan, Zheng
  2017-09-12  2:53 ` [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks Yan, Zheng
@ 2017-09-12  2:53 ` Yan, Zheng
  2017-09-12 12:48   ` Jeff Layton
  2017-09-12  2:53 ` [PATCH 3/5] ceph: optimize flock encoding during reconnect Yan, Zheng
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12  2:53 UTC (permalink / raw)
  To: ceph-devel, jlayton, idryomov; +Cc: Yan, Zheng

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++-----------------------------
 fs/ceph/super.h |  1 -
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 69f731e75302..33e091e6e8d3 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -331,6 +331,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 	     *flock_count, *fcntl_count);
 }
 
+/*
+ * Given a pointer to a lock, convert it to a ceph filelock
+ */
+static int lock_to_ceph_filelock(struct file_lock *lock,
+				 struct ceph_filelock *cephlock)
+{
+	int err = 0;
+	cephlock->start = cpu_to_le64(lock->fl_start);
+	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
+	cephlock->client = cpu_to_le64(0);
+	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
+	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+
+	switch (lock->fl_type) {
+	case F_RDLCK:
+		cephlock->type = CEPH_LOCK_SHARED;
+		break;
+	case F_WRLCK:
+		cephlock->type = CEPH_LOCK_EXCL;
+		break;
+	case F_UNLCK:
+		cephlock->type = CEPH_LOCK_UNLOCK;
+		break;
+	default:
+		dout("Have unknown lock type %d", lock->fl_type);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
 /**
  * Encode the flock and fcntl locks for the given inode into the ceph_filelock
  * array. Must be called with inode->i_lock already held.
@@ -415,34 +446,3 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 out_fail:
 	return err;
 }
-
-/*
- * Given a pointer to a lock, convert it to a ceph filelock
- */
-int lock_to_ceph_filelock(struct file_lock *lock,
-			  struct ceph_filelock *cephlock)
-{
-	int err = 0;
-	cephlock->start = cpu_to_le64(lock->fl_start);
-	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
-	cephlock->client = cpu_to_le64(0);
-	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
-	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
-
-	switch (lock->fl_type) {
-	case F_RDLCK:
-		cephlock->type = CEPH_LOCK_SHARED;
-		break;
-	case F_WRLCK:
-		cephlock->type = CEPH_LOCK_EXCL;
-		break;
-	case F_UNLCK:
-		cephlock->type = CEPH_LOCK_UNLOCK;
-		break;
-	default:
-		dout("Have unknown lock type %d", lock->fl_type);
-		err = -EINVAL;
-	}
-
-	return err;
-}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 9e0de8264257..88cbc0981c23 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1011,7 +1011,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode,
 extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 				  struct ceph_pagelist *pagelist,
 				  int num_fcntl_locks, int num_flock_locks);
-extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
 
 /* debugfs.c */
 extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
-- 
2.13.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/5] ceph: optimize flock encoding during reconnect
  2017-09-12  2:53 [PATCH 0/5] ceph: file lock related fixes Yan, Zheng
  2017-09-12  2:53 ` [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks Yan, Zheng
  2017-09-12  2:53 ` [PATCH 2/5] ceph: make lock_to_ceph_filelock() 'static' Yan, Zheng
@ 2017-09-12  2:53 ` Yan, Zheng
  2017-09-12 13:03   ` Jeff Layton
  2017-09-12  2:53 ` [PATCH 4/5] ceph: handle 'session get evicted while there are file locks' Yan, Zheng
  2017-09-12  2:53 ` [PATCH 5/5] ceph: avoid null pointer derefernece in case of utsname() return NULL Yan, Zheng
  4 siblings, 1 reply; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12  2:53 UTC (permalink / raw)
  To: ceph-devel, jlayton, idryomov; +Cc: Yan, Zheng

don't malloc if there is no flock

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/locks.c      | 17 ++++++++++-------
 fs/ceph/mds_client.c | 34 ++++++++++++++++++++--------------
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 33e091e6e8d3..4addd18ac6f9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -430,19 +430,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 	if (err)
 		goto out_fail;
 
-	err = ceph_pagelist_append(pagelist, flocks,
-				   num_fcntl_locks * sizeof(*flocks));
-	if (err)
-		goto out_fail;
+	if (num_fcntl_locks > 0) {
+		err = ceph_pagelist_append(pagelist, flocks,
+					   num_fcntl_locks * sizeof(*flocks));
+		if (err)
+			goto out_fail;
+	}
 
 	nlocks = cpu_to_le32(num_flock_locks);
 	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
 	if (err)
 		goto out_fail;
 
-	err = ceph_pagelist_append(pagelist,
-				   &flocks[num_fcntl_locks],
-				   num_flock_locks * sizeof(*flocks));
+	if (num_flock_locks > 0) {
+		err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
+					   num_flock_locks * sizeof(*flocks));
+	}
 out_fail:
 	return err;
 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6146af4ed03c..817ea438aa19 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2895,26 +2895,32 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 
 	if (recon_state->msg_version >= 2) {
 		int num_fcntl_locks, num_flock_locks;
-		struct ceph_filelock *flocks;
+		struct ceph_filelock *flocks = NULL;
 		size_t struct_len, total_len = 0;
 		u8 struct_v = 0;
 
 encode_again:
 		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
-		flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
-				 sizeof(struct ceph_filelock), GFP_NOFS);
-		if (!flocks) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-		err = ceph_encode_locks_to_buffer(inode, flocks,
-						  num_fcntl_locks,
-						  num_flock_locks);
-		if (err) {
+		if (num_fcntl_locks + num_flock_locks > 0) {
+			flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
+					 sizeof(struct ceph_filelock), GFP_NOFS);
+			if (!flocks) {
+				err = -ENOMEM;
+				goto out_free;
+			}
+			err = ceph_encode_locks_to_buffer(inode, flocks,
+							  num_fcntl_locks,
+							  num_flock_locks);
+			if (err) {
+				kfree(flocks);
+				flocks = NULL;
+				if (err == -ENOSPC)
+					goto encode_again;
+				goto out_free;
+			}
+		} else {
 			kfree(flocks);
-			if (err == -ENOSPC)
-				goto encode_again;
-			goto out_free;
+			flocks = NULL;
 		}
 
 		if (recon_state->msg_version >= 3) {
-- 
2.13.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/5] ceph: handle 'session get evicted while there are file locks'
  2017-09-12  2:53 [PATCH 0/5] ceph: file lock related fixes Yan, Zheng
                   ` (2 preceding siblings ...)
  2017-09-12  2:53 ` [PATCH 3/5] ceph: optimize flock encoding during reconnect Yan, Zheng
@ 2017-09-12  2:53 ` Yan, Zheng
  2017-09-12 13:13   ` Jeff Layton
  2017-09-12  2:53 ` [PATCH 5/5] ceph: avoid null pointer derefernece in case of utsname() return NULL Yan, Zheng
  4 siblings, 1 reply; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12  2:53 UTC (permalink / raw)
  To: ceph-devel, jlayton, idryomov; +Cc: Yan, Zheng

When session get evicted, all file locks associated with the session
get released remotely by mds. File locks tracked by kernel become
stale. In this situation, set an error flag on inode. The flag makes
further file locks return -EIO.

Another option to handle this situation is cleanup file locks tracked
kernel. I do not choose it because it is inconvenient to notify user
program about the error.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/locks.c      | 52 ++++++++++++++++++++++++++++++++++++++++------------
 fs/ceph/mds_client.c | 21 ++++++++++++++++-----
 fs/ceph/super.h      |  2 ++
 3 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 4addd18ac6f9..e240ac0903e4 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -38,7 +38,13 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
 	struct inode *inode = file_inode(fl->fl_file);
-	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
+		/* clear error when all locks are released */
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
+		spin_unlock(&ci->i_ceph_lock);
+	}
 }
 
 static const struct file_lock_operations ceph_fl_lock_ops = {
@@ -207,10 +213,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
-	int err;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err = 0;
 	u16 op = CEPH_MDS_OP_SETFILELOCK;
-	u8 lock_cmd;
 	u8 wait = 0;
+	u8 lock_cmd;
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
@@ -226,7 +233,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else if (IS_SETLKW(cmd))
 		wait = 1;
 
-	if (op == CEPH_MDS_OP_SETFILELOCK) {
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+		err = -EIO;
+	} else if (op == CEPH_MDS_OP_SETFILELOCK) {
 		/*
 		 * increasing i_filelock_ref closes race window between
 		 * handling request reply and adding file_lock struct to
@@ -234,7 +244,13 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 		 * window. Caller function will decrease the counter.
 		 */
 		fl->fl_ops = &ceph_fl_lock_ops;
-		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+		atomic_inc(&ci->i_filelock_ref);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	if (err < 0) {
+		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
+			posix_lock_file(file, fl, NULL);
+		return err;
 	}
 
 	if (F_RDLCK == fl->fl_type)
@@ -246,10 +262,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
-		if (op != CEPH_MDS_OP_GETFILELOCK) {
+		if (op == CEPH_MDS_OP_SETFILELOCK) {
 			dout("mds locked, locking locally");
 			err = posix_lock_file(file, fl, NULL);
-			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+			if (err) {
 				/* undo! This should only happen if
 				 * the kernel detects local
 				 * deadlock. */
@@ -266,9 +282,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
-	int err;
-	u8 lock_cmd;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err = 0;
 	u8 wait = 0;
+	u8 lock_cmd;
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
@@ -278,9 +295,20 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	dout("ceph_flock, fl_file: %p", fl->fl_file);
 
-	/* see comment in ceph_lock */
-	fl->fl_ops = &ceph_fl_lock_ops;
-	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+		err = -EIO;
+	} else {
+		/* see comment in ceph_lock */
+		fl->fl_ops = &ceph_fl_lock_ops;
+		atomic_inc(&ci->i_filelock_ref);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	if (err < 0) {
+		if (F_UNLCK == fl->fl_type)
+			locks_lock_file_wait(file, fl);
+		return err;
+	}
 
 	if (IS_SETLKW(cmd))
 		wait = 1;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 817ea438aa19..26893cc1fbee 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1214,6 +1214,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		}
 		spin_unlock(&mdsc->cap_dirty_lock);
 
+		if (atomic_read(&ci->i_filelock_ref) > 0) {
+			/* make further file lock syscall return -EIO */
+			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
+					    inode, ceph_ino(inode));
+		}
+
 		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
 			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
 			ci->i_prealloc_cap_flush = NULL;
@@ -2828,7 +2835,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		struct ceph_mds_cap_reconnect v2;
 		struct ceph_mds_cap_reconnect_v1 v1;
 	} rec;
-	struct ceph_inode_info *ci;
+	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_reconnect_state *recon_state = arg;
 	struct ceph_pagelist *pagelist = recon_state->pagelist;
 	char *path;
@@ -2837,8 +2844,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	u64 snap_follows;
 	struct dentry *dentry;
 
-	ci = cap->ci;
-
 	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
 	     inode, ceph_vinop(inode), cap, cap->cap_id,
 	     ceph_cap_string(cap->issued));
@@ -2871,7 +2876,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		rec.v2.issued = cpu_to_le32(cap->issued);
 		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
 		rec.v2.pathbase = cpu_to_le64(pathbase);
-		rec.v2.flock_len = 0;
+		rec.v2.flock_len =
+			(ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1;
 	} else {
 		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
 		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
@@ -2900,7 +2906,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		u8 struct_v = 0;
 
 encode_again:
-		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+		if (rec.v2.flock_len) {
+			ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+		} else {
+			num_fcntl_locks = 0;
+			num_flock_locks = 0;
+		}
 		if (num_fcntl_locks + num_flock_locks > 0) {
 			flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
 					 sizeof(struct ceph_filelock), GFP_NOFS);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88cbc0981c23..6fe4a2c5fd24 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -487,6 +487,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_KICK_FLUSH	(1 << 9)  /* kick flushing caps */
 #define CEPH_I_FLUSH_SNAPS	(1 << 10) /* need flush snapss */
 #define CEPH_I_ERROR_WRITE	(1 << 11) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK	(1 << 12) /* have seen file lock errors */
+
 
 /*
  * We set the ERROR_WRITE bit when we start seeing write errors on an inode
-- 
2.13.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/5] ceph: avoid null pointer derefernece in case of utsname() return NULL
  2017-09-12  2:53 [PATCH 0/5] ceph: file lock related fixes Yan, Zheng
                   ` (3 preceding siblings ...)
  2017-09-12  2:53 ` [PATCH 4/5] ceph: handle 'session get evicted while there are file locks' Yan, Zheng
@ 2017-09-12  2:53 ` Yan, Zheng
  2017-09-12 13:15   ` Jeff Layton
  4 siblings, 1 reply; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12  2:53 UTC (permalink / raw)
  To: ceph-devel, jlayton, idryomov; +Cc: Yan, Zheng

utsname() can return NULL while process is exiting. kernel releases
file locks during process exits. We send request to mds when releasing
file lock. So it's possible that we open mds session while process is
exiting. utsname() is called in create_session_open_msg()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/mds_client.c | 6 ++++--
 fs/ceph/mds_client.h | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 26893cc1fbee..ab6b998d3f44 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -884,8 +884,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	void *p;
 
 	const char* metadata[][2] = {
-		{"hostname", utsname()->nodename},
-		{"kernel_version", utsname()->release},
+		{"hostname", mdsc->nodename},
+		{"kernel_version", init_utsname()->release},
 		{"entity_id", opt->name ? : ""},
 		{"root", fsopt->server_path ? : "/"},
 		{NULL, NULL}
@@ -3558,6 +3558,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_rwsem(&mdsc->pool_perm_rwsem);
 	mdsc->pool_perm_tree = RB_ROOT;
 
+	strncpy(mdsc->nodename, utsname()->nodename,
+		sizeof(mdsc->nodename) - 1);
 	return 0;
 }
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index db57ae98ed34..636d6b2ec49c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
+#include <linux/utsname.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
 
 	struct rw_semaphore     pool_perm_rwsem;
 	struct rb_root		pool_perm_tree;
+
+	char nodename[__NEW_UTS_LEN + 1];
 };
 
 extern const char *ceph_mds_op_name(int op);
-- 
2.13.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks
  2017-09-12  2:53 ` [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks Yan, Zheng
@ 2017-09-12 10:56   ` Jeff Layton
  2017-09-12 12:57     ` Yan, Zheng
  2017-09-12 13:21   ` Jeff Layton
  1 sibling, 1 reply; 15+ messages in thread
From: Jeff Layton @ 2017-09-12 10:56 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel, idryomov

On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> file locks are tracked by inode's auth mds. releasing auth caps
> is equivalent to releasing all file locks.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/inode.c      |  1 +
>  fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
>  fs/ceph/mds_client.c |  2 ++
>  fs/ceph/super.h      |  1 +
>  4 files changed, 57 insertions(+), 9 deletions(-)
> 

Uhh, really? So if someone wants to, for instance, change the mode on
the file, they have to wait until the file is unlocked? Or am I missing
the way this works?

FWIW, looking at how file locking works in ceph in detail is still on my
to-do list...

> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 373dab5173ca..c70b26d2bf8a 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -492,6 +492,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>  	ci->i_wb_ref = 0;
>  	ci->i_wrbuffer_ref = 0;
>  	ci->i_wrbuffer_ref_head = 0;
> +	atomic_set(&ci->i_filelock_ref, 0);
>  	ci->i_shared_gen = 0;
>  	ci->i_rdcache_gen = 0;
>  	ci->i_rdcache_revoking = 0;
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 64ae74472046..69f731e75302 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -29,19 +29,46 @@ void __init ceph_flock_init(void)
>  	get_random_bytes(&lock_secret, sizeof(lock_secret));
>  }
>  
> +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> +{
> +	struct inode *inode = file_inode(src->fl_file);
> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +}
> +
> +static void ceph_fl_release_lock(struct file_lock *fl)
> +{
> +	struct inode *inode = file_inode(fl->fl_file);
> +	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
> +}
> +
> +static const struct file_lock_operations ceph_fl_lock_ops = {
> +	.fl_copy_lock = ceph_fl_copy_lock,
> +	.fl_release_private = ceph_fl_release_lock,
> +};
> +
>  /**
>   * Implement fcntl and flock locking functions.
>   */
> -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
> +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
>  			     int cmd, u8 wait, struct file_lock *fl)
>  {
> -	struct inode *inode = file_inode(file);
>  	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>  	struct ceph_mds_request *req;
>  	int err;
>  	u64 length = 0;
>  	u64 owner;
>  
> +	if (operation == CEPH_MDS_OP_SETFILELOCK) {
> +		/*
> +		 * increasing i_filelock_ref closes race window between
> +		 * handling request reply and adding file_lock struct to
> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
> +		 * window. Caller function will decrease the counter.
> +		 */
> +		fl->fl_ops = &ceph_fl_lock_ops;
> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +	}
> +
>  	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
>  		wait = 0;
>  
> @@ -179,10 +206,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
>   */
>  int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  {
> -	u8 lock_cmd;
> +	struct inode *inode = file_inode(file);
>  	int err;
> -	u8 wait = 0;
>  	u16 op = CEPH_MDS_OP_SETFILELOCK;
> +	u8 lock_cmd;
> +	u8 wait = 0;
>  
>  	if (!(fl->fl_flags & FL_POSIX))
>  		return -ENOLCK;
> @@ -198,6 +226,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  	else if (IS_SETLKW(cmd))
>  		wait = 1;
>  
> +	if (op == CEPH_MDS_OP_SETFILELOCK) {
> +		/*
> +		 * increasing i_filelock_ref closes race window between
> +		 * handling request reply and adding file_lock struct to
> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
> +		 * window. Caller function will decrease the counter.
> +		 */
> +		fl->fl_ops = &ceph_fl_lock_ops;
> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +	}
> +
>  	if (F_RDLCK == fl->fl_type)
>  		lock_cmd = CEPH_LOCK_SHARED;
>  	else if (F_WRLCK == fl->fl_type)
> @@ -205,7 +244,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  	else
>  		lock_cmd = CEPH_LOCK_UNLOCK;
>  
> -	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
> +	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
>  	if (!err) {
>  		if (op != CEPH_MDS_OP_GETFILELOCK) {
>  			dout("mds locked, locking locally");
> @@ -214,7 +253,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  				/* undo! This should only happen if
>  				 * the kernel detects local
>  				 * deadlock. */
> -				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
> +				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
>  						  CEPH_LOCK_UNLOCK, 0, fl);
>  				dout("got %d on posix_lock_file, undid lock",
>  				     err);
> @@ -226,8 +265,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  
>  int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  {
> -	u8 lock_cmd;
> +	struct inode *inode = file_inode(file);
>  	int err;
> +	u8 lock_cmd;
>  	u8 wait = 0;
>  
>  	if (!(fl->fl_flags & FL_FLOCK))
> @@ -238,6 +278,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  
>  	dout("ceph_flock, fl_file: %p", fl->fl_file);
>  
> +	/* see comment in ceph_lock */
> +	fl->fl_ops = &ceph_fl_lock_ops;
> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +
>  	if (IS_SETLKW(cmd))
>  		wait = 1;
>  
> @@ -249,13 +293,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  		lock_cmd = CEPH_LOCK_UNLOCK;
>  
>  	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
> -				file, lock_cmd, wait, fl);
> +				inode, lock_cmd, wait, fl);
>  	if (!err) {
>  		err = locks_lock_file_wait(file, fl);
>  		if (err) {
>  			ceph_lock_message(CEPH_LOCK_FLOCK,
>  					  CEPH_MDS_OP_SETFILELOCK,
> -					  file, CEPH_LOCK_UNLOCK, 0, fl);
> +					  inode, CEPH_LOCK_UNLOCK, 0, fl);
>  			dout("got %d on locks_lock_file_wait, undid lock", err);
>  		}
>  	}
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 9dd6b836ac9e..6146af4ed03c 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1461,6 +1461,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>  			goto out;
>  		if ((used | wanted) & CEPH_CAP_ANY_WR)
>  			goto out;
> +		if (atomic_read(&ci->i_filelock_ref) > 0)
> +			goto out;
>  	}
>  	/* The inode has cached pages, but it's no longer used.
>  	 * we can safely drop it */
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 279a2f401cf5..9e0de8264257 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -351,6 +351,7 @@ struct ceph_inode_info {
>  	int i_pin_ref;
>  	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
>  	int i_wrbuffer_ref, i_wrbuffer_ref_head;
> +	atomic_t i_filelock_ref;
>  	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
>  	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
>  	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/5] ceph: make lock_to_ceph_filelock() 'static'
  2017-09-12  2:53 ` [PATCH 2/5] ceph: make lock_to_ceph_filelock() 'static' Yan, Zheng
@ 2017-09-12 12:48   ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2017-09-12 12:48 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel, idryomov

On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++-----------------------------
>  fs/ceph/super.h |  1 -
>  2 files changed, 31 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 69f731e75302..33e091e6e8d3 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -331,6 +331,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
>  	     *flock_count, *fcntl_count);
>  }
>  
> +/*
> + * Given a pointer to a lock, convert it to a ceph filelock
> + */
> +static int lock_to_ceph_filelock(struct file_lock *lock,
> +				 struct ceph_filelock *cephlock)
> +{
> +	int err = 0;
> +	cephlock->start = cpu_to_le64(lock->fl_start);
> +	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
> +	cephlock->client = cpu_to_le64(0);
> +	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
> +	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
> +
> +	switch (lock->fl_type) {
> +	case F_RDLCK:
> +		cephlock->type = CEPH_LOCK_SHARED;
> +		break;
> +	case F_WRLCK:
> +		cephlock->type = CEPH_LOCK_EXCL;
> +		break;
> +	case F_UNLCK:
> +		cephlock->type = CEPH_LOCK_UNLOCK;
> +		break;
> +	default:
> +		dout("Have unknown lock type %d", lock->fl_type);
> +		err = -EINVAL;
> +	}
> +
> +	return err;
> +}
> +
>  /**
>   * Encode the flock and fcntl locks for the given inode into the ceph_filelock
>   * array. Must be called with inode->i_lock already held.
> @@ -415,34 +446,3 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
>  out_fail:
>  	return err;
>  }
> -
> -/*
> - * Given a pointer to a lock, convert it to a ceph filelock
> - */
> -int lock_to_ceph_filelock(struct file_lock *lock,
> -			  struct ceph_filelock *cephlock)
> -{
> -	int err = 0;
> -	cephlock->start = cpu_to_le64(lock->fl_start);
> -	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
> -	cephlock->client = cpu_to_le64(0);
> -	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
> -	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
> -
> -	switch (lock->fl_type) {
> -	case F_RDLCK:
> -		cephlock->type = CEPH_LOCK_SHARED;
> -		break;
> -	case F_WRLCK:
> -		cephlock->type = CEPH_LOCK_EXCL;
> -		break;
> -	case F_UNLCK:
> -		cephlock->type = CEPH_LOCK_UNLOCK;
> -		break;
> -	default:
> -		dout("Have unknown lock type %d", lock->fl_type);
> -		err = -EINVAL;
> -	}
> -
> -	return err;
> -}
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 9e0de8264257..88cbc0981c23 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1011,7 +1011,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode,
>  extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
>  				  struct ceph_pagelist *pagelist,
>  				  int num_fcntl_locks, int num_flock_locks);
> -extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
>  
>  /* debugfs.c */
>  extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);

Reviewed-by: Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks
  2017-09-12 10:56   ` Jeff Layton
@ 2017-09-12 12:57     ` Yan, Zheng
  0 siblings, 0 replies; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12 12:57 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Ceph Development, idryomov


> On 12 Sep 2017, at 18:56, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
>> file locks are tracked by inode's auth mds. releasing auth caps
>> is equivalent to releasing all file locks.
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/inode.c      |  1 +
>> fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
>> fs/ceph/mds_client.c |  2 ++
>> fs/ceph/super.h      |  1 +
>> 4 files changed, 57 insertions(+), 9 deletions(-)
>> 
> 
> Uhh, really? So if someone wants to, for instance, change the mode on
> the file, they have to wait until the file is unlocked? Or am I missing
> the way this works?
> 

I mean dropping auth caps completely. For the case that mds revokes caps,
client still hold CEPH_CAP_PIN.

Regards
Yan, Zheng

> FWIW, looking at how file locking works in ceph in detail is still on my
> to-do list...
> 
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 373dab5173ca..c70b26d2bf8a 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -492,6 +492,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>> 	ci->i_wb_ref = 0;
>> 	ci->i_wrbuffer_ref = 0;
>> 	ci->i_wrbuffer_ref_head = 0;
>> +	atomic_set(&ci->i_filelock_ref, 0);
>> 	ci->i_shared_gen = 0;
>> 	ci->i_rdcache_gen = 0;
>> 	ci->i_rdcache_revoking = 0;
>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>> index 64ae74472046..69f731e75302 100644
>> --- a/fs/ceph/locks.c
>> +++ b/fs/ceph/locks.c
>> @@ -29,19 +29,46 @@ void __init ceph_flock_init(void)
>> 	get_random_bytes(&lock_secret, sizeof(lock_secret));
>> }
>> 
>> +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>> +{
>> +	struct inode *inode = file_inode(src->fl_file);
>> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +}
>> +
>> +static void ceph_fl_release_lock(struct file_lock *fl)
>> +{
>> +	struct inode *inode = file_inode(fl->fl_file);
>> +	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
>> +}
>> +
>> +static const struct file_lock_operations ceph_fl_lock_ops = {
>> +	.fl_copy_lock = ceph_fl_copy_lock,
>> +	.fl_release_private = ceph_fl_release_lock,
>> +};
>> +
>> /**
>>  * Implement fcntl and flock locking functions.
>>  */
>> -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
>> +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
>> 			     int cmd, u8 wait, struct file_lock *fl)
>> {
>> -	struct inode *inode = file_inode(file);
>> 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>> 	struct ceph_mds_request *req;
>> 	int err;
>> 	u64 length = 0;
>> 	u64 owner;
>> 
>> +	if (operation == CEPH_MDS_OP_SETFILELOCK) {
>> +		/*
>> +		 * increasing i_filelock_ref closes race window between
>> +		 * handling request reply and adding file_lock struct to
>> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
>> +		 * window. Caller function will decrease the counter.
>> +		 */
>> +		fl->fl_ops = &ceph_fl_lock_ops;
>> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +	}
>> +
>> 	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
>> 		wait = 0;
>> 
>> @@ -179,10 +206,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
>>  */
>> int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> {
>> -	u8 lock_cmd;
>> +	struct inode *inode = file_inode(file);
>> 	int err;
>> -	u8 wait = 0;
>> 	u16 op = CEPH_MDS_OP_SETFILELOCK;
>> +	u8 lock_cmd;
>> +	u8 wait = 0;
>> 
>> 	if (!(fl->fl_flags & FL_POSIX))
>> 		return -ENOLCK;
>> @@ -198,6 +226,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 	else if (IS_SETLKW(cmd))
>> 		wait = 1;
>> 
>> +	if (op == CEPH_MDS_OP_SETFILELOCK) {
>> +		/*
>> +		 * increasing i_filelock_ref closes race window between
>> +		 * handling request reply and adding file_lock struct to
>> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
>> +		 * window. Caller function will decrease the counter.
>> +		 */
>> +		fl->fl_ops = &ceph_fl_lock_ops;
>> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +	}
>> +
>> 	if (F_RDLCK == fl->fl_type)
>> 		lock_cmd = CEPH_LOCK_SHARED;
>> 	else if (F_WRLCK == fl->fl_type)
>> @@ -205,7 +244,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 	else
>> 		lock_cmd = CEPH_LOCK_UNLOCK;
>> 
>> -	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
>> +	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
>> 	if (!err) {
>> 		if (op != CEPH_MDS_OP_GETFILELOCK) {
>> 			dout("mds locked, locking locally");
>> @@ -214,7 +253,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 				/* undo! This should only happen if
>> 				 * the kernel detects local
>> 				 * deadlock. */
>> -				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
>> +				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
>> 						  CEPH_LOCK_UNLOCK, 0, fl);
>> 				dout("got %d on posix_lock_file, undid lock",
>> 				     err);
>> @@ -226,8 +265,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 
>> int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>> {
>> -	u8 lock_cmd;
>> +	struct inode *inode = file_inode(file);
>> 	int err;
>> +	u8 lock_cmd;
>> 	u8 wait = 0;
>> 
>> 	if (!(fl->fl_flags & FL_FLOCK))
>> @@ -238,6 +278,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>> 
>> 	dout("ceph_flock, fl_file: %p", fl->fl_file);
>> 
>> +	/* see comment in ceph_lock */
>> +	fl->fl_ops = &ceph_fl_lock_ops;
>> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +
>> 	if (IS_SETLKW(cmd))
>> 		wait = 1;
>> 
>> @@ -249,13 +293,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>> 		lock_cmd = CEPH_LOCK_UNLOCK;
>> 
>> 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
>> -				file, lock_cmd, wait, fl);
>> +				inode, lock_cmd, wait, fl);
>> 	if (!err) {
>> 		err = locks_lock_file_wait(file, fl);
>> 		if (err) {
>> 			ceph_lock_message(CEPH_LOCK_FLOCK,
>> 					  CEPH_MDS_OP_SETFILELOCK,
>> -					  file, CEPH_LOCK_UNLOCK, 0, fl);
>> +					  inode, CEPH_LOCK_UNLOCK, 0, fl);
>> 			dout("got %d on locks_lock_file_wait, undid lock", err);
>> 		}
>> 	}
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 9dd6b836ac9e..6146af4ed03c 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1461,6 +1461,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>> 			goto out;
>> 		if ((used | wanted) & CEPH_CAP_ANY_WR)
>> 			goto out;
>> +		if (atomic_read(&ci->i_filelock_ref) > 0)
>> +			goto out;
>> 	}
>> 	/* The inode has cached pages, but it's no longer used.
>> 	 * we can safely drop it */
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 279a2f401cf5..9e0de8264257 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -351,6 +351,7 @@ struct ceph_inode_info {
>> 	int i_pin_ref;
>> 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
>> 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
>> +	atomic_t i_filelock_ref;
>> 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
>> 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
>> 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
> 
> -- 
> Jeff Layton <jlayton@redhat.com>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/5] ceph: optimize flock encoding during reconnect
  2017-09-12  2:53 ` [PATCH 3/5] ceph: optimize flock encoding during reconnect Yan, Zheng
@ 2017-09-12 13:03   ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2017-09-12 13:03 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel, idryomov

On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> don't malloc if there is no flock
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/locks.c      | 17 ++++++++++-------
>  fs/ceph/mds_client.c | 34 ++++++++++++++++++++--------------
>  2 files changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 33e091e6e8d3..4addd18ac6f9 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -430,19 +430,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
>  	if (err)
>  		goto out_fail;
>  
> -	err = ceph_pagelist_append(pagelist, flocks,
> -				   num_fcntl_locks * sizeof(*flocks));
> -	if (err)
> -		goto out_fail;
> +	if (num_fcntl_locks > 0) {
> +		err = ceph_pagelist_append(pagelist, flocks,
> +					   num_fcntl_locks * sizeof(*flocks));
> +		if (err)
> +			goto out_fail;
> +	}

Maybe it would be cleaner to just have ceph_pagelist_append immediately
return 0 when len == 0? Looks like it's basically a no-op otherwise.
>  
>  	nlocks = cpu_to_le32(num_flock_locks);
>  	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
>  	if (err)
>  		goto out_fail;
>  
> -	err = ceph_pagelist_append(pagelist,
> -				   &flocks[num_fcntl_locks],
> -				   num_flock_locks * sizeof(*flocks));
> +	if (num_flock_locks > 0) {
> +		err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
> +					   num_flock_locks * sizeof(*flocks));
> +	}
>  out_fail:
>  	return err;
>  }
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 6146af4ed03c..817ea438aa19 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2895,26 +2895,32 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  
>  	if (recon_state->msg_version >= 2) {
>  		int num_fcntl_locks, num_flock_locks;
> -		struct ceph_filelock *flocks;
> +		struct ceph_filelock *flocks = NULL;
>  		size_t struct_len, total_len = 0;
>  		u8 struct_v = 0;
>  
>  encode_again:
>  		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
> -		flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
> -				 sizeof(struct ceph_filelock), GFP_NOFS);
> -		if (!flocks) {
> -			err = -ENOMEM;
> -			goto out_free;
> -		}
> -		err = ceph_encode_locks_to_buffer(inode, flocks,
> -						  num_fcntl_locks,
> -						  num_flock_locks);
> -		if (err) {
> +		if (num_fcntl_locks + num_flock_locks > 0) {
> +			flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
> +					 sizeof(struct ceph_filelock), GFP_NOFS);
> +			if (!flocks) {
> +				err = -ENOMEM;
> +				goto out_free;
> +			}
> +			err = ceph_encode_locks_to_buffer(inode, flocks,
> +							  num_fcntl_locks,
> +							  num_flock_locks);
> +			if (err) {
> +				kfree(flocks);
> +				flocks = NULL;
> +				if (err == -ENOSPC)
> +					goto encode_again;
> +				goto out_free;
> +			}
> +		} else {
>  			kfree(flocks);
> -			if (err == -ENOSPC)
> -				goto encode_again;
> -			goto out_free;
> +			flocks = NULL;
>  		}
>  
>  		if (recon_state->msg_version >= 3) {

Looks fine other than the nit above:

Reviewed-by: Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/5] ceph: handle 'session get evicted while there are file locks'
  2017-09-12  2:53 ` [PATCH 4/5] ceph: handle 'session get evicted while there are file locks' Yan, Zheng
@ 2017-09-12 13:13   ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2017-09-12 13:13 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel, idryomov

On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> When session get evicted, all file locks associated with the session
> get released remotely by mds. File locks tracked by kernel become
> stale. In this situation, set an error flag on inode. The flag makes
> further file locks return -EIO.
>
> Another option to handle this situation is cleanup file locks tracked
> kernel. I do not choose it because it is inconvenient to notify user
> program about the error.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/locks.c      | 52 ++++++++++++++++++++++++++++++++++++++++------------
>  fs/ceph/mds_client.c | 21 ++++++++++++++++-----
>  fs/ceph/super.h      |  2 ++
>  3 files changed, 58 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 4addd18ac6f9..e240ac0903e4 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -38,7 +38,13 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>  static void ceph_fl_release_lock(struct file_lock *fl)
>  {
>  	struct inode *inode = file_inode(fl->fl_file);
> -	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
> +		/* clear error when all locks are released */
> +		spin_lock(&ci->i_ceph_lock);
> +		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
> +		spin_unlock(&ci->i_ceph_lock);
> +	}
>  }
>  
>  static const struct file_lock_operations ceph_fl_lock_ops = {
> @@ -207,10 +213,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
>  int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  {
>  	struct inode *inode = file_inode(file);
> -	int err;
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	int err = 0;
>  	u16 op = CEPH_MDS_OP_SETFILELOCK;
> -	u8 lock_cmd;
>  	u8 wait = 0;
> +	u8 lock_cmd;
>  
>  	if (!(fl->fl_flags & FL_POSIX))
>  		return -ENOLCK;
> @@ -226,7 +233,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  	else if (IS_SETLKW(cmd))
>  		wait = 1;
>  
> -	if (op == CEPH_MDS_OP_SETFILELOCK) {
> +	spin_lock(&ci->i_ceph_lock);
> +	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
> +		err = -EIO;
> +	} else if (op == CEPH_MDS_OP_SETFILELOCK) {
>  		/*
>  		 * increasing i_filelock_ref closes race window between
>  		 * handling request reply and adding file_lock struct to
> @@ -234,7 +244,13 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  		 * window. Caller function will decrease the counter.
>  		 */
>  		fl->fl_ops = &ceph_fl_lock_ops;
> -		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +		atomic_inc(&ci->i_filelock_ref);
> +	}
> +	spin_unlock(&ci->i_ceph_lock);
> +	if (err < 0) {
> +		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
> +			posix_lock_file(file, fl, NULL);
> +		return err;
>  	}
>  
>  	if (F_RDLCK == fl->fl_type)
> @@ -246,10 +262,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  
>  	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
>  	if (!err) {
> -		if (op != CEPH_MDS_OP_GETFILELOCK) {
> +		if (op == CEPH_MDS_OP_SETFILELOCK) {
>  			dout("mds locked, locking locally");
>  			err = posix_lock_file(file, fl, NULL);
> -			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
> +			if (err) {
>  				/* undo! This should only happen if
>  				 * the kernel detects local
>  				 * deadlock. */
> @@ -266,9 +282,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  {
>  	struct inode *inode = file_inode(file);
> -	int err;
> -	u8 lock_cmd;
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	int err = 0;
>  	u8 wait = 0;
> +	u8 lock_cmd;
>  
>  	if (!(fl->fl_flags & FL_FLOCK))
>  		return -ENOLCK;
> @@ -278,9 +295,20 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  
>  	dout("ceph_flock, fl_file: %p", fl->fl_file);
>  
> -	/* see comment in ceph_lock */
> -	fl->fl_ops = &ceph_fl_lock_ops;
> -	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +	spin_lock(&ci->i_ceph_lock);
> +	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
> +		err = -EIO;
> +	} else {
> +		/* see comment in ceph_lock */
> +		fl->fl_ops = &ceph_fl_lock_ops;
> +		atomic_inc(&ci->i_filelock_ref);
> +	}
> +	spin_unlock(&ci->i_ceph_lock);
> +	if (err < 0) {
> +		if (F_UNLCK == fl->fl_type)
> +			locks_lock_file_wait(file, fl);
> +		return err;
> +	}
>  
>  	if (IS_SETLKW(cmd))
>  		wait = 1;
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 817ea438aa19..26893cc1fbee 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1214,6 +1214,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		}
>  		spin_unlock(&mdsc->cap_dirty_lock);
>  
> +		if (atomic_read(&ci->i_filelock_ref) > 0) {
> +			/* make further file lock syscall return -EIO */
> +			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
> +			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
> +					    inode, ceph_ino(inode));
> +		}
> +
>  		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
>  			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
>  			ci->i_prealloc_cap_flush = NULL;
> @@ -2828,7 +2835,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		struct ceph_mds_cap_reconnect v2;
>  		struct ceph_mds_cap_reconnect_v1 v1;
>  	} rec;
> -	struct ceph_inode_info *ci;
> +	struct ceph_inode_info *ci = cap->ci;
>  	struct ceph_reconnect_state *recon_state = arg;
>  	struct ceph_pagelist *pagelist = recon_state->pagelist;
>  	char *path;
> @@ -2837,8 +2844,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	u64 snap_follows;
>  	struct dentry *dentry;
>  
> -	ci = cap->ci;
> -
>  	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
>  	     inode, ceph_vinop(inode), cap, cap->cap_id,
>  	     ceph_cap_string(cap->issued));
> @@ -2871,7 +2876,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		rec.v2.issued = cpu_to_le32(cap->issued);
>  		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
>  		rec.v2.pathbase = cpu_to_le64(pathbase);
> -		rec.v2.flock_len = 0;
> +		rec.v2.flock_len =
> +			(ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1;
>  	} else {
>  		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
>  		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
> @@ -2900,7 +2906,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		u8 struct_v = 0;
>  
>  encode_again:
> -		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
> +		if (rec.v2.flock_len) {
> +			ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
> +		} else {
> +			num_fcntl_locks = 0;
> +			num_flock_locks = 0;
> +		}
>  		if (num_fcntl_locks + num_flock_locks > 0) {
>  			flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
>  					 sizeof(struct ceph_filelock), GFP_NOFS);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 88cbc0981c23..6fe4a2c5fd24 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -487,6 +487,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
>  #define CEPH_I_KICK_FLUSH	(1 << 9)  /* kick flushing caps */
>  #define CEPH_I_FLUSH_SNAPS	(1 << 10) /* need flush snapss */
>  #define CEPH_I_ERROR_WRITE	(1 << 11) /* have seen write errors */
> +#define CEPH_I_ERROR_FILELOCK	(1 << 12) /* have seen file lock errors */
> +
>  
>  /*
>   * We set the ERROR_WRITE bit when we start seeing write errors on an inode

This is an ugly situation. We don't really have a good notification
mechanism for when this occurs. Older UNIXes would sometimes issue a
SIGLOST to the application when something like this happens.

We don't have anything like that in Linux at the moment, though I know
Anna Schumaker proposed a mechanism like that for NFS a few years ago.
Maybe we should consider resurrecting that effort? You can certainly hit
the same problem with CIFS as well...

In any case, this is as good a way to handle this as any for now. You
can add:

Acked-by: Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] ceph: avoid null pointer derefernece in case of utsname() return NULL
  2017-09-12  2:53 ` [PATCH 5/5] ceph: avoid null pointer derefernece in case of utsname() return NULL Yan, Zheng
@ 2017-09-12 13:15   ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2017-09-12 13:15 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel, idryomov

On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> utsname() can return NULL while process is exiting. kernel releases
> file locks during process exits. We send request to mds when releasing
> file lock. So it's possible that we open mds session while process is
> exiting. utsname() is called in create_session_open_msg()
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/mds_client.c | 6 ++++--
>  fs/ceph/mds_client.h | 3 +++
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 26893cc1fbee..ab6b998d3f44 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -884,8 +884,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
>  	void *p;
>  
>  	const char* metadata[][2] = {
> -		{"hostname", utsname()->nodename},
> -		{"kernel_version", utsname()->release},
> +		{"hostname", mdsc->nodename},
> +		{"kernel_version", init_utsname()->release},
>  		{"entity_id", opt->name ? : ""},
>  		{"root", fsopt->server_path ? : "/"},
>  		{NULL, NULL}
> @@ -3558,6 +3558,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>  	init_rwsem(&mdsc->pool_perm_rwsem);
>  	mdsc->pool_perm_tree = RB_ROOT;
>  
> +	strncpy(mdsc->nodename, utsname()->nodename,
> +		sizeof(mdsc->nodename) - 1);
>  	return 0;
>  }
>  
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index db57ae98ed34..636d6b2ec49c 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -8,6 +8,7 @@
>  #include <linux/rbtree.h>
>  #include <linux/spinlock.h>
>  #include <linux/refcount.h>
> +#include <linux/utsname.h>
>  
>  #include <linux/ceph/types.h>
>  #include <linux/ceph/messenger.h>
> @@ -368,6 +369,8 @@ struct ceph_mds_client {
>  
>  	struct rw_semaphore     pool_perm_rwsem;
>  	struct rb_root		pool_perm_tree;
> +
> +	char nodename[__NEW_UTS_LEN + 1];
>  };
>  
>  extern const char *ceph_mds_op_name(int op);

(nit: might want to add a link to the tracker bug in the description)

Reviewed-by: Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks
  2017-09-12  2:53 ` [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks Yan, Zheng
  2017-09-12 10:56   ` Jeff Layton
@ 2017-09-12 13:21   ` Jeff Layton
  2017-09-12 13:36     ` Yan, Zheng
  1 sibling, 1 reply; 15+ messages in thread
From: Jeff Layton @ 2017-09-12 13:21 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel, idryomov

On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> file locks are tracked by inode's auth mds. releasing auth caps
> is equivalent to releasing all file locks.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/inode.c      |  1 +
>  fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
>  fs/ceph/mds_client.c |  2 ++
>  fs/ceph/super.h      |  1 +
>  4 files changed, 57 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 373dab5173ca..c70b26d2bf8a 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -492,6 +492,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>  	ci->i_wb_ref = 0;
>  	ci->i_wrbuffer_ref = 0;
>  	ci->i_wrbuffer_ref_head = 0;
> +	atomic_set(&ci->i_filelock_ref, 0);
>  	ci->i_shared_gen = 0;
>  	ci->i_rdcache_gen = 0;
>  	ci->i_rdcache_revoking = 0;
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index 64ae74472046..69f731e75302 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -29,19 +29,46 @@ void __init ceph_flock_init(void)
>  	get_random_bytes(&lock_secret, sizeof(lock_secret));
>  }
>  
> +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> +{
> +	struct inode *inode = file_inode(src->fl_file);
> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +}
> +
> +static void ceph_fl_release_lock(struct file_lock *fl)
> +{
> +	struct inode *inode = file_inode(fl->fl_file);
> +	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
> +}
> +
> +static const struct file_lock_operations ceph_fl_lock_ops = {
> +	.fl_copy_lock = ceph_fl_copy_lock,
> +	.fl_release_private = ceph_fl_release_lock,
> +};
> +
>  /**
>   * Implement fcntl and flock locking functions.
>   */
> -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
> +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
>  			     int cmd, u8 wait, struct file_lock *fl)
>  {
> -	struct inode *inode = file_inode(file);
>  	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>  	struct ceph_mds_request *req;
>  	int err;
>  	u64 length = 0;
>  	u64 owner;
>  
> +	if (operation == CEPH_MDS_OP_SETFILELOCK) {
> +		/*
> +		 * increasing i_filelock_ref closes race window between
> +		 * handling request reply and adding file_lock struct to
> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
> +		 * window. Caller function will decrease the counter.
> +		 */
> +		fl->fl_ops = &ceph_fl_lock_ops;
> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +	}
> +
>  	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
>  		wait = 0;
>  
> @@ -179,10 +206,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
>   */
>  int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  {
> -	u8 lock_cmd;
> +	struct inode *inode = file_inode(file);
>  	int err;
> -	u8 wait = 0;
>  	u16 op = CEPH_MDS_OP_SETFILELOCK;
> +	u8 lock_cmd;
> +	u8 wait = 0;
>  
>  	if (!(fl->fl_flags & FL_POSIX))
>  		return -ENOLCK;
> @@ -198,6 +226,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  	else if (IS_SETLKW(cmd))
>  		wait = 1;
>  
> +	if (op == CEPH_MDS_OP_SETFILELOCK) {
> +		/*
> +		 * increasing i_filelock_ref closes race window between
> +		 * handling request reply and adding file_lock struct to
> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
> +		 * window. Caller function will decrease the counter.
> +		 */
> +		fl->fl_ops = &ceph_fl_lock_ops;
> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +	}
> +
>  	if (F_RDLCK == fl->fl_type)
>  		lock_cmd = CEPH_LOCK_SHARED;
>  	else if (F_WRLCK == fl->fl_type)
> @@ -205,7 +244,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  	else
>  		lock_cmd = CEPH_LOCK_UNLOCK;
>  
> -	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
> +	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
>  	if (!err) {
>  		if (op != CEPH_MDS_OP_GETFILELOCK) {
>  			dout("mds locked, locking locally");
> @@ -214,7 +253,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  				/* undo! This should only happen if
>  				 * the kernel detects local
>  				 * deadlock. */
> -				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
> +				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
>  						  CEPH_LOCK_UNLOCK, 0, fl);
>  				dout("got %d on posix_lock_file, undid lock",
>  				     err);
> @@ -226,8 +265,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>  
>  int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  {
> -	u8 lock_cmd;
> +	struct inode *inode = file_inode(file);
>  	int err;
> +	u8 lock_cmd;
>  	u8 wait = 0;
>  
>  	if (!(fl->fl_flags & FL_FLOCK))
> @@ -238,6 +278,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  
>  	dout("ceph_flock, fl_file: %p", fl->fl_file);
>  
> +	/* see comment in ceph_lock */
> +	fl->fl_ops = &ceph_fl_lock_ops;
> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +
>  	if (IS_SETLKW(cmd))
>  		wait = 1;
>  
> @@ -249,13 +293,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>  		lock_cmd = CEPH_LOCK_UNLOCK;
>  
>  	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
> -				file, lock_cmd, wait, fl);
> +				inode, lock_cmd, wait, fl);
>  	if (!err) {
>  		err = locks_lock_file_wait(file, fl);
>  		if (err) {
>  			ceph_lock_message(CEPH_LOCK_FLOCK,
>  					  CEPH_MDS_OP_SETFILELOCK,
> -					  file, CEPH_LOCK_UNLOCK, 0, fl);
> +					  inode, CEPH_LOCK_UNLOCK, 0, fl);
>  			dout("got %d on locks_lock_file_wait, undid lock", err);
>  		}
>  	}
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 9dd6b836ac9e..6146af4ed03c 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1461,6 +1461,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>  			goto out;
>  		if ((used | wanted) & CEPH_CAP_ANY_WR)
>  			goto out;
> +		if (atomic_read(&ci->i_filelock_ref) > 0)
> +			goto out;

Is there a potential race here? Could i_filelock_ref do a 0->1
transition just after you check it?

>  	}
>  	/* The inode has cached pages, but it's no longer used.
>  	 * we can safely drop it */
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 279a2f401cf5..9e0de8264257 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -351,6 +351,7 @@ struct ceph_inode_info {
>  	int i_pin_ref;
>  	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
>  	int i_wrbuffer_ref, i_wrbuffer_ref_head;
> +	atomic_t i_filelock_ref;
>  	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
>  	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
>  	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks
  2017-09-12 13:21   ` Jeff Layton
@ 2017-09-12 13:36     ` Yan, Zheng
  2017-09-18 15:06       ` Jeff Layton
  0 siblings, 1 reply; 15+ messages in thread
From: Yan, Zheng @ 2017-09-12 13:36 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Ceph Development, idryomov


> On 12 Sep 2017, at 21:21, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
>> file locks are tracked by inode's auth mds. releasing auth caps
>> is equivalent to releasing all file locks.
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/inode.c      |  1 +
>> fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
>> fs/ceph/mds_client.c |  2 ++
>> fs/ceph/super.h      |  1 +
>> 4 files changed, 57 insertions(+), 9 deletions(-)
>> 
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 373dab5173ca..c70b26d2bf8a 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -492,6 +492,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>> 	ci->i_wb_ref = 0;
>> 	ci->i_wrbuffer_ref = 0;
>> 	ci->i_wrbuffer_ref_head = 0;
>> +	atomic_set(&ci->i_filelock_ref, 0);
>> 	ci->i_shared_gen = 0;
>> 	ci->i_rdcache_gen = 0;
>> 	ci->i_rdcache_revoking = 0;
>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>> index 64ae74472046..69f731e75302 100644
>> --- a/fs/ceph/locks.c
>> +++ b/fs/ceph/locks.c
>> @@ -29,19 +29,46 @@ void __init ceph_flock_init(void)
>> 	get_random_bytes(&lock_secret, sizeof(lock_secret));
>> }
>> 
>> +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>> +{
>> +	struct inode *inode = file_inode(src->fl_file);
>> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +}
>> +
>> +static void ceph_fl_release_lock(struct file_lock *fl)
>> +{
>> +	struct inode *inode = file_inode(fl->fl_file);
>> +	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
>> +}
>> +
>> +static const struct file_lock_operations ceph_fl_lock_ops = {
>> +	.fl_copy_lock = ceph_fl_copy_lock,
>> +	.fl_release_private = ceph_fl_release_lock,
>> +};
>> +
>> /**
>>  * Implement fcntl and flock locking functions.
>>  */
>> -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
>> +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
>> 			     int cmd, u8 wait, struct file_lock *fl)
>> {
>> -	struct inode *inode = file_inode(file);
>> 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>> 	struct ceph_mds_request *req;
>> 	int err;
>> 	u64 length = 0;
>> 	u64 owner;
>> 
>> +	if (operation == CEPH_MDS_OP_SETFILELOCK) {
>> +		/*
>> +		 * increasing i_filelock_ref closes race window between
>> +		 * handling request reply and adding file_lock struct to
>> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
>> +		 * window. Caller function will decrease the counter.
>> +		 */
>> +		fl->fl_ops = &ceph_fl_lock_ops;
>> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +	}
>> +
>> 	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
>> 		wait = 0;
>> 
>> @@ -179,10 +206,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
>>  */
>> int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> {
>> -	u8 lock_cmd;
>> +	struct inode *inode = file_inode(file);
>> 	int err;
>> -	u8 wait = 0;
>> 	u16 op = CEPH_MDS_OP_SETFILELOCK;
>> +	u8 lock_cmd;
>> +	u8 wait = 0;
>> 
>> 	if (!(fl->fl_flags & FL_POSIX))
>> 		return -ENOLCK;
>> @@ -198,6 +226,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 	else if (IS_SETLKW(cmd))
>> 		wait = 1;
>> 
>> +	if (op == CEPH_MDS_OP_SETFILELOCK) {
>> +		/*
>> +		 * increasing i_filelock_ref closes race window between
>> +		 * handling request reply and adding file_lock struct to
>> +		 * inode. Otherwise, i_auth_cap may get trimmed in the
>> +		 * window. Caller function will decrease the counter.
>> +		 */
>> +		fl->fl_ops = &ceph_fl_lock_ops;
>> +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +	}
>> +
>> 	if (F_RDLCK == fl->fl_type)
>> 		lock_cmd = CEPH_LOCK_SHARED;
>> 	else if (F_WRLCK == fl->fl_type)
>> @@ -205,7 +244,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 	else
>> 		lock_cmd = CEPH_LOCK_UNLOCK;
>> 
>> -	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
>> +	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
>> 	if (!err) {
>> 		if (op != CEPH_MDS_OP_GETFILELOCK) {
>> 			dout("mds locked, locking locally");
>> @@ -214,7 +253,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 				/* undo! This should only happen if
>> 				 * the kernel detects local
>> 				 * deadlock. */
>> -				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
>> +				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
>> 						  CEPH_LOCK_UNLOCK, 0, fl);
>> 				dout("got %d on posix_lock_file, undid lock",
>> 				     err);
>> @@ -226,8 +265,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
>> 
>> int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>> {
>> -	u8 lock_cmd;
>> +	struct inode *inode = file_inode(file);
>> 	int err;
>> +	u8 lock_cmd;
>> 	u8 wait = 0;
>> 
>> 	if (!(fl->fl_flags & FL_FLOCK))
>> @@ -238,6 +278,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>> 
>> 	dout("ceph_flock, fl_file: %p", fl->fl_file);
>> 
>> +	/* see comment in ceph_lock */
>> +	fl->fl_ops = &ceph_fl_lock_ops;
>> +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>> +
>> 	if (IS_SETLKW(cmd))
>> 		wait = 1;
>> 
>> @@ -249,13 +293,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
>> 		lock_cmd = CEPH_LOCK_UNLOCK;
>> 
>> 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
>> -				file, lock_cmd, wait, fl);
>> +				inode, lock_cmd, wait, fl);
>> 	if (!err) {
>> 		err = locks_lock_file_wait(file, fl);
>> 		if (err) {
>> 			ceph_lock_message(CEPH_LOCK_FLOCK,
>> 					  CEPH_MDS_OP_SETFILELOCK,
>> -					  file, CEPH_LOCK_UNLOCK, 0, fl);
>> +					  inode, CEPH_LOCK_UNLOCK, 0, fl);
>> 			dout("got %d on locks_lock_file_wait, undid lock", err);
>> 		}
>> 	}
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 9dd6b836ac9e..6146af4ed03c 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1461,6 +1461,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>> 			goto out;
>> 		if ((used | wanted) & CEPH_CAP_ANY_WR)
>> 			goto out;
>> +		if (atomic_read(&ci->i_filelock_ref) > 0)
>> +			goto out;
> 
> Is there a potential race here? Could i_filelock_ref do a 0->1
> transition just after you check it?

It’s possible, but it does not hurt. Because i_filelock_ref gets increased before sending mds request.
The extra reference gets dropped after setup the kernel file_lock data structure. The reply of mds
request re-add auth cap if auth caps was dropped.

Regards
Yan, Zheng

> 
>> 	}
>> 	/* The inode has cached pages, but it's no longer used.
>> 	 * we can safely drop it */
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 279a2f401cf5..9e0de8264257 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -351,6 +351,7 @@ struct ceph_inode_info {
>> 	int i_pin_ref;
>> 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
>> 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
>> +	atomic_t i_filelock_ref;
>> 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
>> 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
>> 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
> 
> -- 
> Jeff Layton <jlayton@redhat.com>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks
  2017-09-12 13:36     ` Yan, Zheng
@ 2017-09-18 15:06       ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2017-09-18 15:06 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Ceph Development, idryomov

On Tue, 2017-09-12 at 21:36 +0800, Yan, Zheng wrote:
> > On 12 Sep 2017, at 21:21, Jeff Layton <jlayton@redhat.com> wrote:
> > 
> > On Tue, 2017-09-12 at 10:53 +0800, Yan, Zheng wrote:
> > > file locks are tracked by inode's auth mds. releasing auth caps
> > > is equivalent to releasing all file locks.
> > > 
> > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> > > ---
> > > fs/ceph/inode.c      |  1 +
> > > fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
> > > fs/ceph/mds_client.c |  2 ++
> > > fs/ceph/super.h      |  1 +
> > > 4 files changed, 57 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > index 373dab5173ca..c70b26d2bf8a 100644
> > > --- a/fs/ceph/inode.c
> > > +++ b/fs/ceph/inode.c
> > > @@ -492,6 +492,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> > > 	ci->i_wb_ref = 0;
> > > 	ci->i_wrbuffer_ref = 0;
> > > 	ci->i_wrbuffer_ref_head = 0;
> > > +	atomic_set(&ci->i_filelock_ref, 0);
> > > 	ci->i_shared_gen = 0;
> > > 	ci->i_rdcache_gen = 0;
> > > 	ci->i_rdcache_revoking = 0;
> > > diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> > > index 64ae74472046..69f731e75302 100644
> > > --- a/fs/ceph/locks.c
> > > +++ b/fs/ceph/locks.c
> > > @@ -29,19 +29,46 @@ void __init ceph_flock_init(void)
> > > 	get_random_bytes(&lock_secret, sizeof(lock_secret));
> > > }
> > > 
> > > +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> > > +{
> > > +	struct inode *inode = file_inode(src->fl_file);
> > > +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> > > +}
> > > +
> > > +static void ceph_fl_release_lock(struct file_lock *fl)
> > > +{
> > > +	struct inode *inode = file_inode(fl->fl_file);
> > > +	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
> > > +}
> > > +
> > > +static const struct file_lock_operations ceph_fl_lock_ops = {
> > > +	.fl_copy_lock = ceph_fl_copy_lock,
> > > +	.fl_release_private = ceph_fl_release_lock,
> > > +};
> > > +
> > > /**
> > >  * Implement fcntl and flock locking functions.
> > >  */
> > > -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
> > > +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
> > > 			     int cmd, u8 wait, struct file_lock *fl)
> > > {
> > > -	struct inode *inode = file_inode(file);
> > > 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> > > 	struct ceph_mds_request *req;
> > > 	int err;
> > > 	u64 length = 0;
> > > 	u64 owner;
> > > 
> > > +	if (operation == CEPH_MDS_OP_SETFILELOCK) {
> > > +		/*
> > > +		 * increasing i_filelock_ref closes race window between
> > > +		 * handling request reply and adding file_lock struct to
> > > +		 * inode. Otherwise, i_auth_cap may get trimmed in the
> > > +		 * window. Caller function will decrease the counter.
> > > +		 */
> > > +		fl->fl_ops = &ceph_fl_lock_ops;
> > > +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> > > +	}
> > > +
> > > 	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
> > > 		wait = 0;
> > > 
> > > @@ -179,10 +206,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
> > >  */
> > > int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
> > > {
> > > -	u8 lock_cmd;
> > > +	struct inode *inode = file_inode(file);
> > > 	int err;
> > > -	u8 wait = 0;
> > > 	u16 op = CEPH_MDS_OP_SETFILELOCK;
> > > +	u8 lock_cmd;
> > > +	u8 wait = 0;
> > > 
> > > 	if (!(fl->fl_flags & FL_POSIX))
> > > 		return -ENOLCK;
> > > @@ -198,6 +226,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
> > > 	else if (IS_SETLKW(cmd))
> > > 		wait = 1;
> > > 
> > > +	if (op == CEPH_MDS_OP_SETFILELOCK) {
> > > +		/*
> > > +		 * increasing i_filelock_ref closes race window between
> > > +		 * handling request reply and adding file_lock struct to
> > > +		 * inode. Otherwise, i_auth_cap may get trimmed in the
> > > +		 * window. Caller function will decrease the counter.
> > > +		 */
> > > +		fl->fl_ops = &ceph_fl_lock_ops;
> > > +		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> > > +	}
> > > +
> > > 	if (F_RDLCK == fl->fl_type)
> > > 		lock_cmd = CEPH_LOCK_SHARED;
> > > 	else if (F_WRLCK == fl->fl_type)
> > > @@ -205,7 +244,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
> > > 	else
> > > 		lock_cmd = CEPH_LOCK_UNLOCK;
> > > 
> > > -	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
> > > +	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
> > > 	if (!err) {
> > > 		if (op != CEPH_MDS_OP_GETFILELOCK) {
> > > 			dout("mds locked, locking locally");
> > > @@ -214,7 +253,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
> > > 				/* undo! This should only happen if
> > > 				 * the kernel detects local
> > > 				 * deadlock. */
> > > -				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
> > > +				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
> > > 						  CEPH_LOCK_UNLOCK, 0, fl);
> > > 				dout("got %d on posix_lock_file, undid lock",
> > > 				     err);
> > > @@ -226,8 +265,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
> > > 
> > > int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
> > > {
> > > -	u8 lock_cmd;
> > > +	struct inode *inode = file_inode(file);
> > > 	int err;
> > > +	u8 lock_cmd;
> > > 	u8 wait = 0;
> > > 
> > > 	if (!(fl->fl_flags & FL_FLOCK))
> > > @@ -238,6 +278,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
> > > 
> > > 	dout("ceph_flock, fl_file: %p", fl->fl_file);
> > > 
> > > +	/* see comment in ceph_lock */
> > > +	fl->fl_ops = &ceph_fl_lock_ops;
> > > +	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> > > +
> > > 	if (IS_SETLKW(cmd))
> > > 		wait = 1;
> > > 
> > > @@ -249,13 +293,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
> > > 		lock_cmd = CEPH_LOCK_UNLOCK;
> > > 
> > > 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
> > > -				file, lock_cmd, wait, fl);
> > > +				inode, lock_cmd, wait, fl);
> > > 	if (!err) {
> > > 		err = locks_lock_file_wait(file, fl);
> > > 		if (err) {
> > > 			ceph_lock_message(CEPH_LOCK_FLOCK,
> > > 					  CEPH_MDS_OP_SETFILELOCK,
> > > -					  file, CEPH_LOCK_UNLOCK, 0, fl);
> > > +					  inode, CEPH_LOCK_UNLOCK, 0, fl);
> > > 			dout("got %d on locks_lock_file_wait, undid lock", err);
> > > 		}
> > > 	}
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index 9dd6b836ac9e..6146af4ed03c 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -1461,6 +1461,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
> > > 			goto out;
> > > 		if ((used | wanted) & CEPH_CAP_ANY_WR)
> > > 			goto out;
> > > +		if (atomic_read(&ci->i_filelock_ref) > 0)
> > > +			goto out;
> > 
> > Is there a potential race here? Could i_filelock_ref do a 0->1
> > transition just after you check it?
> 
> It’s possible, but it does not hurt. Because i_filelock_ref gets increased before sending mds request.
> The extra reference gets dropped after setup the kernel file_lock data structure. The reply of mds
> request re-add auth cap if auth caps was dropped.
> 
> Regards
> Yan, Zheng
> 

Hmm, ok. It'd be nice to flesh out the description a bit, and talk about
 what problem this fixes. I guess this is just a best effort thing to
avoid releasing auth caps if locks are held or are being requested? It's
not immediately obvious to me.

With that understood, the patch itself looks ok:

Acked-by: Jeff Layton <jlayton@redhat.com>

> > 
> > > 	}
> > > 	/* The inode has cached pages, but it's no longer used.
> > > 	 * we can safely drop it */
> > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > > index 279a2f401cf5..9e0de8264257 100644
> > > --- a/fs/ceph/super.h
> > > +++ b/fs/ceph/super.h
> > > @@ -351,6 +351,7 @@ struct ceph_inode_info {
> > > 	int i_pin_ref;
> > > 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
> > > 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
> > > +	atomic_t i_filelock_ref;
> > > 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
> > > 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
> > > 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
> > 
> > -- 
> > Jeff Layton <jlayton@redhat.com>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-09-18 15:06 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-12  2:53 [PATCH 0/5] ceph: file lock related fixes Yan, Zheng
2017-09-12  2:53 ` [PATCH 1/5] ceph: keep auth cap when inode has flocks or posix locks Yan, Zheng
2017-09-12 10:56   ` Jeff Layton
2017-09-12 12:57     ` Yan, Zheng
2017-09-12 13:21   ` Jeff Layton
2017-09-12 13:36     ` Yan, Zheng
2017-09-18 15:06       ` Jeff Layton
2017-09-12  2:53 ` [PATCH 2/5] ceph: make lock_to_ceph_filelock() 'static' Yan, Zheng
2017-09-12 12:48   ` Jeff Layton
2017-09-12  2:53 ` [PATCH 3/5] ceph: optimize flock encoding during reconnect Yan, Zheng
2017-09-12 13:03   ` Jeff Layton
2017-09-12  2:53 ` [PATCH 4/5] ceph: handle 'session get evicted while there are file locks' Yan, Zheng
2017-09-12 13:13   ` Jeff Layton
2017-09-12  2:53 ` [PATCH 5/5] ceph: avoid null pointer derefernece in case of utsname() return NULL Yan, Zheng
2017-09-12 13:15   ` Jeff Layton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.