All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/9] fixes for kclient
@ 2013-06-04  3:19 Yan, Zheng
  2013-06-04  3:19 ` [PATCH 1/9] libceph: fix safe completion Yan, Zheng
                   ` (9 more replies)
  0 siblings, 10 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

this patch series are also in:
  git://github.com/ukernel/linux.git wip-ceph

Regards
Yan, Zheng

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/9] libceph: fix safe completion
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-11  4:04   ` Alex Elder
  2013-06-04  3:19 ` [PATCH 2/9] libceph: call r_unsafe_callback when unsafe reply is received Yan, Zheng
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

handle_reply() calls complete_request() only if the first OSD reply
has ONDISK flag.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 include/linux/ceph/osd_client.h |  1 -
 net/ceph/osd_client.c           | 16 ++++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 186db0b..ce6df39 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -145,7 +145,6 @@ struct ceph_osd_request {
 	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
 	int               r_got_reply;
 	int		  r_linger;
-	int		  r_completed;
 
 	struct ceph_osd_client *r_osdc;
 	struct kref       r_kref;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a3395fd..536c0e5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1525,6 +1525,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	for (i = 0; i < numops; i++)
 		req->r_reply_op_result[i] = ceph_decode_32(&p);
 
+	already_completed = req->r_got_reply;
+
 	if (!req->r_got_reply) {
 
 		req->r_result = result;
@@ -1555,16 +1557,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
 		__unregister_request(osdc, req);
 
-	already_completed = req->r_completed;
-	req->r_completed = 1;
 	mutex_unlock(&osdc->request_mutex);
-	if (already_completed)
-		goto done;
 
-	if (req->r_callback)
-		req->r_callback(req, msg);
-	else
-		complete_all(&req->r_completion);
+	if (!already_completed) {
+		if (req->r_callback)
+			req->r_callback(req, msg);
+		else
+			complete_all(&req->r_completion);
+	}
 
 	if (flags & CEPH_OSD_FLAG_ONDISK)
 		complete_request(req);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/9] libceph: call r_unsafe_callback when unsafe reply is received
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
  2013-06-04  3:19 ` [PATCH 1/9] libceph: fix safe completion Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-09  6:19   ` Sage Weil
  2013-06-04  3:19 ` [PATCH 3/9] libceph: fix truncate size calculation Yan, Zheng
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

We can't use !req->r_sent to check if OSD request is sent for the
first time, this is because __cancel_request() zeros req->r_sent
when OSD map changes. Rather than adding a new variable to
ceph_osd_request to indicate if it's sent for the first time, We
can call the unsafe callback only when unsafe OSD reply is received.
If OSD's first reply is safe, just skip calling the unsafe callback.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 net/ceph/osd_client.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 536c0e5..6972d17 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1338,10 +1338,6 @@ static void __send_request(struct ceph_osd_client *osdc,
 
 	ceph_msg_get(req->r_request); /* send consumes a ref */
 
-	/* Mark the request unsafe if this is the first timet's being sent. */
-
-	if (!req->r_sent && req->r_unsafe_callback)
-		req->r_unsafe_callback(req, true);
 	req->r_sent = req->r_osd->o_incarnation;
 
 	ceph_con_send(&req->r_osd->o_con, req->r_request);
@@ -1432,8 +1428,6 @@ static void handle_osds_timeout(struct work_struct *work)
 
 static void complete_request(struct ceph_osd_request *req)
 {
-	if (req->r_unsafe_callback)
-		req->r_unsafe_callback(req, false);
 	complete_all(&req->r_safe_completion);  /* fsync waiter */
 }
 
@@ -1560,14 +1554,20 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	mutex_unlock(&osdc->request_mutex);
 
 	if (!already_completed) {
+		if (req->r_unsafe_callback &&
+		    result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
+			req->r_unsafe_callback(req, true);
 		if (req->r_callback)
 			req->r_callback(req, msg);
 		else
 			complete_all(&req->r_completion);
 	}
 
-	if (flags & CEPH_OSD_FLAG_ONDISK)
+	if (flags & CEPH_OSD_FLAG_ONDISK) {
+		if (req->r_unsafe_callback && already_completed)
+			req->r_unsafe_callback(req, false);
 		complete_request(req);
+	}
 
 done:
 	dout("req=%p req->r_linger=%d\n", req, req->r_linger);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/9] libceph: fix truncate size calculation
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
  2013-06-04  3:19 ` [PATCH 1/9] libceph: fix safe completion Yan, Zheng
  2013-06-04  3:19 ` [PATCH 2/9] libceph: call r_unsafe_callback when unsafe reply is received Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-04  3:19 ` [PATCH 4/9] ceph: fix cap release race Yan, Zheng
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

check the "not truncated yet" case

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 net/ceph/osd_client.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6972d17..93efdfb 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
 	object_size = le32_to_cpu(layout->fl_object_size);
 	object_base = off - objoff;
-	if (truncate_size <= object_base) {
-		truncate_size = 0;
-	} else {
-		truncate_size -= object_base;
-		if (truncate_size > object_size)
-			truncate_size = object_size;
+	if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+		if (truncate_size <= object_base) {
+			truncate_size = 0;
+		} else {
+			truncate_size -= object_base;
+			if (truncate_size > object_size)
+				truncate_size = object_size;
+		}
 	}
 
 	osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/9] ceph: fix cap release race
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (2 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 3/9] libceph: fix truncate size calculation Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-04  3:19 ` [PATCH 5/9] ceph: reset iov_len when discarding cap release messages Yan, Zheng
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

ceph_encode_inode_release() can race with ceph_open() and release
caps wanted by open files. So it should call __ceph_caps_wanted()
to get the wanted caps.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index da0f9b8..54c290b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3042,21 +3042,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 		     (cap->issued & unless) == 0)) {
 			if ((cap->issued & drop) &&
 			    (cap->issued & unless) == 0) {
-				dout("encode_inode_release %p cap %p %s -> "
-				     "%s\n", inode, cap,
+				int wanted = __ceph_caps_wanted(ci);
+				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
+					wanted |= cap->mds_wanted;
+				dout("encode_inode_release %p cap %p "
+				     "%s -> %s, wanted %s -> %s\n", inode, cap,
 				     ceph_cap_string(cap->issued),
-				     ceph_cap_string(cap->issued & ~drop));
+				     ceph_cap_string(cap->issued & ~drop),
+				     ceph_cap_string(cap->mds_wanted),
+				     ceph_cap_string(wanted));
+
 				cap->issued &= ~drop;
 				cap->implemented &= ~drop;
-				if (ci->i_ceph_flags & CEPH_I_NODELAY) {
-					int wanted = __ceph_caps_wanted(ci);
-					dout("  wanted %s -> %s (act %s)\n",
-					     ceph_cap_string(cap->mds_wanted),
-					     ceph_cap_string(cap->mds_wanted &
-							     ~wanted),
-					     ceph_cap_string(wanted));
-					cap->mds_wanted &= wanted;
-				}
+				cap->mds_wanted = wanted;
 			} else {
 				dout("encode_inode_release %p cap %p %s"
 				     " (force)\n", inode, cap,
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/9] ceph: reset iov_len when discarding cap release messages
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (3 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 4/9] ceph: fix cap release race Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-04  3:19 ` [PATCH 6/9] ceph: fix race between page writeback and truncate Yan, Zheng
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/mds_client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4f22671..e2d7e56 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1391,6 +1391,7 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 	num = le32_to_cpu(head->num);
 	dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num);
 	head->num = cpu_to_le32(0);
+	msg->front.iov_len = sizeof(*head);
 	session->s_num_cap_releases += num;
 
 	/* requeue completed messages */
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/9] ceph: fix race between page writeback and truncate
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (4 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 5/9] ceph: reset iov_len when discarding cap release messages Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-04  3:19 ` [PATCH 7/9] ceph: check migrate seq before changing auth cap Yan, Zheng
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

The client can receive truncate request from MDS at any time.
So the page writeback code need to get i_size, truncate_seq and
truncate_size atomically

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/addr.c | 84 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 40 insertions(+), 44 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3e68ac1..3500b74 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -438,13 +438,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	struct ceph_inode_info *ci;
 	struct ceph_fs_client *fsc;
 	struct ceph_osd_client *osdc;
-	loff_t page_off = page_offset(page);
-	int len = PAGE_CACHE_SIZE;
-	loff_t i_size;
-	int err = 0;
 	struct ceph_snap_context *snapc, *oldest;
-	u64 snap_size = 0;
+	loff_t page_off = page_offset(page);
 	long writeback_stat;
+	u64 truncate_size, snap_size = 0;
+	u32 truncate_seq;
+	int err = 0, len = PAGE_CACHE_SIZE;
 
 	dout("writepage %p idx %lu\n", page, page->index);
 
@@ -474,13 +473,20 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	}
 	ceph_put_snap_context(oldest);
 
+	spin_lock(&ci->i_ceph_lock);
+	truncate_seq = ci->i_truncate_seq;
+	truncate_size = ci->i_truncate_size;
+	if (!snap_size)
+		snap_size = i_size_read(inode);
+	spin_unlock(&ci->i_ceph_lock);
+
 	/* is this a partial page at end of file? */
-	if (snap_size)
-		i_size = snap_size;
-	else
-		i_size = i_size_read(inode);
-	if (i_size < page_off + len)
-		len = i_size - page_off;
+	if (page_off >= snap_size) {
+		dout("%p page eof %llu\n", page, snap_size);
+		goto out;
+	}
+	if (snap_size < page_off + len)
+		len = snap_size - page_off;
 
 	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
 	     inode, page, page->index, page_off, len, snapc);
@@ -494,7 +500,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
 				   &ci->i_layout, snapc,
 				   page_off, len,
-				   ci->i_truncate_seq, ci->i_truncate_size,
+				   truncate_seq, truncate_size,
 				   &inode->i_mtime, &page, 1);
 	if (err < 0) {
 		dout("writepage setting page/mapping error %d %p\n", err, page);
@@ -631,25 +637,6 @@ static void writepages_finish(struct ceph_osd_request *req,
 	ceph_osdc_put_request(req);
 }
 
-static struct ceph_osd_request *
-ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len,
-				struct ceph_snap_context *snapc, int num_ops)
-{
-	struct ceph_fs_client *fsc;
-	struct ceph_inode_info *ci;
-	struct ceph_vino vino;
-
-	fsc = ceph_inode_to_client(inode);
-	ci = ceph_inode(inode);
-	vino = ceph_vino(inode);
-	/* BUG_ON(vino.snap != CEPH_NOSNAP); */
-
-	return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-			vino, offset, len, num_ops, CEPH_OSD_OP_WRITE,
-			CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK,
-			snapc, ci->i_truncate_seq, ci->i_truncate_size, true);
-}
-
 /*
  * initiate async writeback
  */
@@ -658,7 +645,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc;
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_vino vino = ceph_vino(inode);
 	pgoff_t index, start, end;
 	int range_whole = 0;
 	int should_loop = 1;
@@ -670,7 +658,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 	unsigned wsize = 1 << inode->i_blkbits;
 	struct ceph_osd_request *req = NULL;
 	int do_sync;
-	u64 snap_size;
+	u64 truncate_size, snap_size;
+	u32 truncate_seq;
 
 	/*
 	 * Include a 'sync' in the OSD request if this is a data
@@ -685,7 +674,6 @@ static int ceph_writepages_start(struct address_space *mapping,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-	fsc = ceph_inode_to_client(inode);
 	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
 		pr_warning("writepage_start %p on forced umount\n", inode);
 		return -EIO; /* we're in a forced umount, don't write! */
@@ -728,6 +716,14 @@ retry:
 		snap_size = i_size_read(inode);
 	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
 	     snapc, snapc->seq, snapc->num_snaps);
+
+	spin_lock(&ci->i_ceph_lock);
+	truncate_seq = ci->i_truncate_seq;
+	truncate_size = ci->i_truncate_size;
+	if (!snap_size)
+		snap_size = i_size_read(inode);
+	spin_unlock(&ci->i_ceph_lock);
+
 	if (last_snapc && snapc != last_snapc) {
 		/* if we switched to a newer snapc, restart our scan at the
 		 * start of the original file range. */
@@ -739,7 +735,6 @@ retry:
 
 	while (!done && index <= end) {
 		int num_ops = do_sync ? 2 : 1;
-		struct ceph_vino vino;
 		unsigned i;
 		int first;
 		pgoff_t next;
@@ -833,17 +828,18 @@ get_more_pages:
 			 * that it will use.
 			 */
 			if (locked_pages == 0) {
-				size_t size;
-
 				BUG_ON(pages);
-
 				/* prepare async write request */
 				offset = (u64)page_offset(page);
 				len = wsize;
-				req = ceph_writepages_osd_request(inode,
-							offset, &len, snapc,
-							num_ops);
-
+				req = ceph_osdc_new_request(&fsc->client->osdc,
+							&ci->i_layout, vino,
+							offset, &len, num_ops,
+							CEPH_OSD_OP_WRITE,
+							CEPH_OSD_FLAG_WRITE |
+							CEPH_OSD_FLAG_ONDISK,
+							snapc, truncate_seq,
+							truncate_size, true);
 				if (IS_ERR(req)) {
 					rc = PTR_ERR(req);
 					unlock_page(page);
@@ -854,8 +850,8 @@ get_more_pages:
 				req->r_inode = inode;
 
 				max_pages = calc_pages_for(0, (u64)len);
-				size = max_pages * sizeof (*pages);
-				pages = kmalloc(size, GFP_NOFS);
+				pages = kmalloc(max_pages * sizeof (*pages),
+						GFP_NOFS);
 				if (!pages) {
 					pool = fsc->wb_pagevec_pool;
 					pages = mempool_alloc(pool, GFP_NOFS);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 7/9] ceph: check migrate seq before changing auth cap
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (5 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 6/9] ceph: fix race between page writeback and truncate Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-04  3:19 ` [PATCH 8/9] ceph: clear migrate seq when MDS restarts Yan, Zheng
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

We may receive old request reply from the exporter MDS after receiving
the importer MDS' cap import message.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 54c290b..790f88b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -612,9 +612,11 @@ retry:
 		__cap_delay_requeue(mdsc, ci);
 	}
 
-	if (flags & CEPH_CAP_FLAG_AUTH)
-		ci->i_auth_cap = cap;
-	else if (ci->i_auth_cap == cap) {
+	if (flags & CEPH_CAP_FLAG_AUTH) {
+		if (ci->i_auth_cap == NULL ||
+		    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0)
+			ci->i_auth_cap = cap;
+	} else if (ci->i_auth_cap == cap) {
 		ci->i_auth_cap = NULL;
 		spin_lock(&mdsc->cap_dirty_lock);
 		if (!list_empty(&ci->i_dirty_item)) {
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 8/9] ceph: clear migrate seq when MDS restarts
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (6 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 7/9] ceph: check migrate seq before changing auth cap Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-04  3:19 ` [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes Yan, Zheng
  2013-06-09  6:23 ` [PATCH 0/9] fixes for kclient Sage Weil
  9 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/mds_client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e2d7e56..ce7a789 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2455,6 +2455,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	spin_lock(&ci->i_ceph_lock);
 	cap->seq = 0;        /* reset cap seq */
 	cap->issue_seq = 0;  /* and issue_seq */
+	cap->mseq = 0;       /* and migrate_seq */
 
 	if (recon_state->flock) {
 		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (7 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 8/9] ceph: clear migrate seq when MDS restarts Yan, Zheng
@ 2013-06-04  3:19 ` Yan, Zheng
  2013-06-11  6:09   ` Sage Weil
  2013-06-09  6:23 ` [PATCH 0/9] fixes for kclient Sage Weil
  9 siblings, 1 reply; 18+ messages in thread
From: Yan, Zheng @ 2013-06-04  3:19 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, elder, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 790f88b..458a66e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1982,8 +1982,14 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
 	cap = ci->i_auth_cap;
 	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
 	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+
 	__ceph_flush_snaps(ci, &session, 1);
+
 	if (ci->i_flushing_caps) {
+		spin_lock(&mdsc->cap_dirty_lock);
+		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
+		spin_unlock(&mdsc->cap_dirty_lock);
+
 		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
 				     __ceph_caps_used(ci),
 				     __ceph_caps_wanted(ci),
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/9] libceph: call r_unsafe_callback when unsafe reply is received
  2013-06-04  3:19 ` [PATCH 2/9] libceph: call r_unsafe_callback when unsafe reply is received Yan, Zheng
@ 2013-06-09  6:19   ` Sage Weil
  0 siblings, 0 replies; 18+ messages in thread
From: Sage Weil @ 2013-06-09  6:19 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, elder

This one I need to review more carefully, as I didn't fully grok the last 
change here.

On Tue, 4 Jun 2013, Yan, Zheng wrote:

> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> We can't use !req->r_sent to check if OSD request is sent for the
> first time, this is because __cancel_request() zeros req->r_sent
> when OSD map changes. Rather than adding a new variable to
> ceph_osd_request to indicate if it's sent for the first time, We
> can call the unsafe callback only when unsafe OSD reply is received.
> If OSD's first reply is safe, just skip calling the unsafe callback.
> 
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  net/ceph/osd_client.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 536c0e5..6972d17 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -1338,10 +1338,6 @@ static void __send_request(struct ceph_osd_client *osdc,
>  
>  	ceph_msg_get(req->r_request); /* send consumes a ref */
>  
> -	/* Mark the request unsafe if this is the first timet's being sent. */
> -
> -	if (!req->r_sent && req->r_unsafe_callback)
> -		req->r_unsafe_callback(req, true);
>  	req->r_sent = req->r_osd->o_incarnation;
>  
>  	ceph_con_send(&req->r_osd->o_con, req->r_request);
> @@ -1432,8 +1428,6 @@ static void handle_osds_timeout(struct work_struct *work)
>  
>  static void complete_request(struct ceph_osd_request *req)
>  {
> -	if (req->r_unsafe_callback)
> -		req->r_unsafe_callback(req, false);
>  	complete_all(&req->r_safe_completion);  /* fsync waiter */
>  }
>  
> @@ -1560,14 +1554,20 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
>  	mutex_unlock(&osdc->request_mutex);
>  
>  	if (!already_completed) {
> +		if (req->r_unsafe_callback &&
> +		    result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
> +			req->r_unsafe_callback(req, true);
>  		if (req->r_callback)
>  			req->r_callback(req, msg);
>  		else
>  			complete_all(&req->r_completion);
>  	}
>  
> -	if (flags & CEPH_OSD_FLAG_ONDISK)
> +	if (flags & CEPH_OSD_FLAG_ONDISK) {
> +		if (req->r_unsafe_callback && already_completed)
> +			req->r_unsafe_callback(req, false);
>  		complete_request(req);
> +	}
>  
>  done:
>  	dout("req=%p req->r_linger=%d\n", req, req->r_linger);
> -- 
> 1.8.1.4
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/9] fixes for kclient
  2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
                   ` (8 preceding siblings ...)
  2013-06-04  3:19 ` [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes Yan, Zheng
@ 2013-06-09  6:23 ` Sage Weil
  9 siblings, 0 replies; 18+ messages in thread
From: Sage Weil @ 2013-06-09  6:23 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, elder

I pulled these into the testing branch, with the exception of #2, which 
needs a closer look.

We should really be checking the userspace client for these same issues 
(the logic ones at least) to avoid fixing the same or similar bugs later..

Thanks!
sage

On Tue, 4 Jun 2013, Yan, Zheng wrote:

> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> this patch series are also in:
>   git://github.com/ukernel/linux.git wip-ceph
> 
> Regards
> Yan, Zheng
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/9] libceph: fix safe completion
  2013-06-04  3:19 ` [PATCH 1/9] libceph: fix safe completion Yan, Zheng
@ 2013-06-11  4:04   ` Alex Elder
  0 siblings, 0 replies; 18+ messages in thread
From: Alex Elder @ 2013-06-11  4:04 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, sage, elder

On 06/03/2013 10:19 PM, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> handle_reply() calls complete_request() only if the first OSD reply
> has ONDISK flag.

I believe that you're trying to fix a simple problem here, but
you are changing the logic around in several ways at the same
time and it makes it very difficult to see.

Let me see if I can explain what you've done:
- There's no reason to defer setting already_completed; it can
  set earlier.
- req->r_completed will be 0 until the first time a reply for
  req is received, at which point it will be set to 1.  That
  is exactly the same as what happens for req->r_got_reply,
  so already_completed can be equivalently set from that.
- That makes req->r_completed unnecessary, so it can be
  removed.
- The test near the end can be inverted, and a block can
  be executed rather than jumping over it with "goto done;"

Now, given those changes...
- This leaves the call to complete_request() happening *only*
  when the request had not already been completed *and* the
  current completion supplied the ONDISK flag.

And therein lies the problem you're trying to solve--it's
possible that a completion for the request arrived before,
but did not have the ONDISK flag set, and because of that a
later request with ONDISK set will not call complete_request()
as required.

The fix for that is to move the complete_request() call out
so it's called only when ONDISK is set, but regardless of
the value of already_completed.

Is that correct?

If my understanding is correct, I guess I'll say I've reviewed
this change, and in that case:

    Reviewed-by: Alex Elder <elder@kernel.org>

It would have been a lot easier to review this with a
better explanation, and with fewer logic changes rolled
into the patch.

					-Alex


> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  include/linux/ceph/osd_client.h |  1 -
>  net/ceph/osd_client.c           | 16 ++++++++--------
>  2 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 186db0b..ce6df39 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -145,7 +145,6 @@ struct ceph_osd_request {
>  	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
>  	int               r_got_reply;
>  	int		  r_linger;
> -	int		  r_completed;
>  
>  	struct ceph_osd_client *r_osdc;
>  	struct kref       r_kref;
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index a3395fd..536c0e5 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -1525,6 +1525,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
>  	for (i = 0; i < numops; i++)
>  		req->r_reply_op_result[i] = ceph_decode_32(&p);
>  
> +	already_completed = req->r_got_reply;
> +
>  	if (!req->r_got_reply) {
>  
>  		req->r_result = result;
> @@ -1555,16 +1557,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
>  	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
>  		__unregister_request(osdc, req);
>  
> -	already_completed = req->r_completed;
> -	req->r_completed = 1;
>  	mutex_unlock(&osdc->request_mutex);
> -	if (already_completed)
> -		goto done;
>  
> -	if (req->r_callback)
> -		req->r_callback(req, msg);
> -	else
> -		complete_all(&req->r_completion);
> +	if (!already_completed) {
> +		if (req->r_callback)
> +			req->r_callback(req, msg);
> +		else
> +			complete_all(&req->r_completion);
> +	}
>  
>  	if (flags & CEPH_OSD_FLAG_ONDISK)
>  		complete_request(req);
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes
  2013-06-04  3:19 ` [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes Yan, Zheng
@ 2013-06-11  6:09   ` Sage Weil
  2013-06-11  6:17     ` Sage Weil
  2013-06-17  2:45     ` Yan, Zheng
  0 siblings, 2 replies; 18+ messages in thread
From: Sage Weil @ 2013-06-11  6:09 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, elder

Hi Yan-

On Tue, 4 Jun 2013, Yan, Zheng wrote:

> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  fs/ceph/caps.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 790f88b..458a66e 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1982,8 +1982,14 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
>  	cap = ci->i_auth_cap;
>  	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
>  	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
> +
>  	__ceph_flush_snaps(ci, &session, 1);

This function does funny things to the local session pointer... did you 
consider this when using it below?  It can change to the auth cap mds if 
it is different than the value passed in...

> +
>  	if (ci->i_flushing_caps) {
> +		spin_lock(&mdsc->cap_dirty_lock);
> +		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
> +		spin_unlock(&mdsc->cap_dirty_lock);
> +
>  		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
>  				     __ceph_caps_used(ci),
>  				     __ceph_caps_wanted(ci),
> -- 
> 1.8.1.4
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes
  2013-06-11  6:09   ` Sage Weil
@ 2013-06-11  6:17     ` Sage Weil
  2013-06-11 10:37       ` Yan, Zheng
  2013-06-17  2:45     ` Yan, Zheng
  1 sibling, 1 reply; 18+ messages in thread
From: Sage Weil @ 2013-06-11  6:17 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, elder

On Mon, 10 Jun 2013, Sage Weil wrote:
> Hi Yan-
> 
> On Tue, 4 Jun 2013, Yan, Zheng wrote:
> 
> > From: "Yan, Zheng" <zheng.z.yan@intel.com>
> > 
> > Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> > ---
> >  fs/ceph/caps.c | 6 ++++++
> >  1 file changed, 6 insertions(+)
> > 
> > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > index 790f88b..458a66e 100644
> > --- a/fs/ceph/caps.c
> > +++ b/fs/ceph/caps.c
> > @@ -1982,8 +1982,14 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
> >  	cap = ci->i_auth_cap;
> >  	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
> >  	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
> > +
> >  	__ceph_flush_snaps(ci, &session, 1);
> 
> This function does funny things to the local session pointer... did you 
> consider this when using it below?  It can change to the auth cap mds if 
> it is different than the value passed in...

I wonder if we screwed something up here, but I just got a crash inside 
remove_session_caps() that might be explained by a corrupt list.  I don't 
think I've seen this before..

0xffff880214aabf20      753        2  1    3   R  0xffff880214aac3a8 
*kworker/3:2
 ffff880224a33ae8 0000000000000018 ffffffffa0814d63 ffff880224f85800
 ffff88020b277790 ffff880224f85800 ffff88020c04e800 ffff880224a33c08
 ffffffffa081a1cf ffffffffffffffff ffff880224a33fd8 ffffffffffffffff
Call Trace:
 [<ffffffffa0814d63>] ? remove_session_caps+0x33/0x140 [ceph]
 [<ffffffffa081a1cf>] ? dispatch+0x7ff/0x1740 [ceph]
 [<ffffffff81510b66>] ? kernel_recvmsg+0x46/0x60
 [<ffffffffa07c4e38>] ? ceph_tcp_recvmsg+0x48/0x60 [libceph]
 [<ffffffff810a317d>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffffa07c81f8>] ? con_work+0x1948/0x2d50 [libceph]
 [<ffffffff81080c93>] ? idle_balance+0x133/0x180
 [<ffffffff81071c58>] ? finish_task_switch+0x48/0x110
 [<ffffffff81071c58>] ? finish_task_switch+0x48/0x110
 [<ffffffff8105f44f>] ? process_one_work+0x16f/0x540
 [<ffffffff8105f4ba>] ? process_one_work+0x1da/0x540
 [<ffffffff8105f44f>] ? process_one_work+0x16f/0x540
 [<ffffffff8106069c>] ? worker_thread+0x11c/0x370
 [<ffffffff81060580>] ? manage_workers.isra.20+0x2e0/0x2e0
 [<ffffffff8106735a>] ? kthread+0xea/0xf0



> 
> > +
> >  	if (ci->i_flushing_caps) {
> > +		spin_lock(&mdsc->cap_dirty_lock);
> > +		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
> > +		spin_unlock(&mdsc->cap_dirty_lock);
> > +
> >  		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
> >  				     __ceph_caps_used(ci),
> >  				     __ceph_caps_wanted(ci),
> > -- 
> > 1.8.1.4
> > 
> > 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes
  2013-06-11  6:17     ` Sage Weil
@ 2013-06-11 10:37       ` Yan, Zheng
  0 siblings, 0 replies; 18+ messages in thread
From: Yan, Zheng @ 2013-06-11 10:37 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel, elder

On 06/11/2013 02:17 PM, Sage Weil wrote:
> On Mon, 10 Jun 2013, Sage Weil wrote:
>> Hi Yan-
>>
>> On Tue, 4 Jun 2013, Yan, Zheng wrote:
>>
>>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>>
>>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
>>> ---
>>>  fs/ceph/caps.c | 6 ++++++
>>>  1 file changed, 6 insertions(+)
>>>
>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>> index 790f88b..458a66e 100644
>>> --- a/fs/ceph/caps.c
>>> +++ b/fs/ceph/caps.c
>>> @@ -1982,8 +1982,14 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
>>>  	cap = ci->i_auth_cap;
>>>  	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
>>>  	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
>>> +
>>>  	__ceph_flush_snaps(ci, &session, 1);
>>
>> This function does funny things to the local session pointer... did you 
>> consider this when using it below?  It can change to the auth cap mds if 
>> it is different than the value passed in...
> 
I didn't realize that. But even take it into consideration, I still don't understand
how the list gets corrupt. Did you use snapshot? how many active MDS?.

> I wonder if we screwed something up here, but I just got a crash inside 
> remove_session_caps() that might be explained by a corrupt list.  I don't 
> think I've seen this before..

BUG_ON(session->s_nr_caps > 0) or BUG_ON(!list_empty(&session->s_cap_flushing)) ?
and why the kclient receives CEPH_SESSION_CLOSE message ?

Regards
Yan, Zheng
> 
> 0xffff880214aabf20      753        2  1    3   R  0xffff880214aac3a8 
> *kworker/3:2
>  ffff880224a33ae8 0000000000000018 ffffffffa0814d63 ffff880224f85800
>  ffff88020b277790 ffff880224f85800 ffff88020c04e800 ffff880224a33c08
>  ffffffffa081a1cf ffffffffffffffff ffff880224a33fd8 ffffffffffffffff
> Call Trace:
>  [<ffffffffa0814d63>] ? remove_session_caps+0x33/0x140 [ceph]
>  [<ffffffffa081a1cf>] ? dispatch+0x7ff/0x1740 [ceph]
>  [<ffffffff81510b66>] ? kernel_recvmsg+0x46/0x60
>  [<ffffffffa07c4e38>] ? ceph_tcp_recvmsg+0x48/0x60 [libceph]
>  [<ffffffff810a317d>] ? trace_hardirqs_on+0xd/0x10
>  [<ffffffffa07c81f8>] ? con_work+0x1948/0x2d50 [libceph]
>  [<ffffffff81080c93>] ? idle_balance+0x133/0x180
>  [<ffffffff81071c58>] ? finish_task_switch+0x48/0x110
>  [<ffffffff81071c58>] ? finish_task_switch+0x48/0x110
>  [<ffffffff8105f44f>] ? process_one_work+0x16f/0x540
>  [<ffffffff8105f4ba>] ? process_one_work+0x1da/0x540
>  [<ffffffff8105f44f>] ? process_one_work+0x16f/0x540
>  [<ffffffff8106069c>] ? worker_thread+0x11c/0x370
>  [<ffffffff81060580>] ? manage_workers.isra.20+0x2e0/0x2e0
>  [<ffffffff8106735a>] ? kthread+0xea/0xf0
> 
> 
> 
>>
>>> +
>>>  	if (ci->i_flushing_caps) {
>>> +		spin_lock(&mdsc->cap_dirty_lock);
>>> +		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
>>> +		spin_unlock(&mdsc->cap_dirty_lock);
>>> +
>>>  		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
>>>  				     __ceph_caps_used(ci),
>>>  				     __ceph_caps_wanted(ci),
>>> -- 
>>> 1.8.1.4
>>>
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes
  2013-06-11  6:09   ` Sage Weil
  2013-06-11  6:17     ` Sage Weil
@ 2013-06-17  2:45     ` Yan, Zheng
  2013-06-17  2:54       ` Sage Weil
  1 sibling, 1 reply; 18+ messages in thread
From: Yan, Zheng @ 2013-06-17  2:45 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel

updated patch, use cap->session instead of local session pointer

---
From 2675b6cc7afe25c9b7e644396df3b54bec003ab2 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 31 May 2013 16:40:24 +0800
Subject: [PATCH 1/3] ceph: move inode to proper flushing list when auth MDS
 changes

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 790f88b..9a5ccc9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1982,8 +1982,15 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
 	cap = ci->i_auth_cap;
 	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
 	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+
 	__ceph_flush_snaps(ci, &session, 1);
+
 	if (ci->i_flushing_caps) {
+		spin_lock(&mdsc->cap_dirty_lock);
+		list_move_tail(&ci->i_flushing_item,
+			       &cap->session->s_cap_flushing);
+		spin_unlock(&mdsc->cap_dirty_lock);
+
 		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
 				     __ceph_caps_used(ci),
 				     __ceph_caps_wanted(ci),
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes
  2013-06-17  2:45     ` Yan, Zheng
@ 2013-06-17  2:54       ` Sage Weil
  0 siblings, 0 replies; 18+ messages in thread
From: Sage Weil @ 2013-06-17  2:54 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel

Excellent, pushed.  I'll review the others tomorrow.

Thanks!
sage

On Mon, 17 Jun 2013, Yan, Zheng wrote:

> updated patch, use cap->session instead of local session pointer
> 
> ---
> >From 2675b6cc7afe25c9b7e644396df3b54bec003ab2 Mon Sep 17 00:00:00 2001
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> Date: Fri, 31 May 2013 16:40:24 +0800
> Subject: [PATCH 1/3] ceph: move inode to proper flushing list when auth MDS
>  changes
> 
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  fs/ceph/caps.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 790f88b..9a5ccc9 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1982,8 +1982,15 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
>  	cap = ci->i_auth_cap;
>  	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
>  	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
> +
>  	__ceph_flush_snaps(ci, &session, 1);
> +
>  	if (ci->i_flushing_caps) {
> +		spin_lock(&mdsc->cap_dirty_lock);
> +		list_move_tail(&ci->i_flushing_item,
> +			       &cap->session->s_cap_flushing);
> +		spin_unlock(&mdsc->cap_dirty_lock);
> +
>  		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
>  				     __ceph_caps_used(ci),
>  				     __ceph_caps_wanted(ci),
> -- 
> 1.8.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2013-06-17  2:54 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-04  3:19 [PATCH 0/9] fixes for kclient Yan, Zheng
2013-06-04  3:19 ` [PATCH 1/9] libceph: fix safe completion Yan, Zheng
2013-06-11  4:04   ` Alex Elder
2013-06-04  3:19 ` [PATCH 2/9] libceph: call r_unsafe_callback when unsafe reply is received Yan, Zheng
2013-06-09  6:19   ` Sage Weil
2013-06-04  3:19 ` [PATCH 3/9] libceph: fix truncate size calculation Yan, Zheng
2013-06-04  3:19 ` [PATCH 4/9] ceph: fix cap release race Yan, Zheng
2013-06-04  3:19 ` [PATCH 5/9] ceph: reset iov_len when discarding cap release messages Yan, Zheng
2013-06-04  3:19 ` [PATCH 6/9] ceph: fix race between page writeback and truncate Yan, Zheng
2013-06-04  3:19 ` [PATCH 7/9] ceph: check migrate seq before changing auth cap Yan, Zheng
2013-06-04  3:19 ` [PATCH 8/9] ceph: clear migrate seq when MDS restarts Yan, Zheng
2013-06-04  3:19 ` [PATCH 9/9] ceph: move inode to proper flushing list when auth MDS changes Yan, Zheng
2013-06-11  6:09   ` Sage Weil
2013-06-11  6:17     ` Sage Weil
2013-06-11 10:37       ` Yan, Zheng
2013-06-17  2:45     ` Yan, Zheng
2013-06-17  2:54       ` Sage Weil
2013-06-09  6:23 ` [PATCH 0/9] fixes for kclient Sage Weil

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.