All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 07/27] xfs: Introduce ordered log vector support
Date: Wed, 12 Jun 2013 20:22:27 +1000	[thread overview]
Message-ID: <1371032567-21772-8-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1371032567-21772-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

And "ordered log vector" is a log vector that is used for
tracking a log item through the CIL and into the AIL as part of the
log checkpointing. These ordered log vectors are special in that
they are not written to to journal in any way, and are not accounted
to the checkpoint being written.

The reason for this behaviour is to allow operations to attach items
to transactions and have them follow the normal transactional
lifecycle without actually having to write them to the journal. This
allows logging of items that track high level logical changes and
writing them to the log, while the physical items being modified
pass through into the AIL and pin the tail of the log (and therefore
the logical item in the log) until all the modified items are
physically written to disk.

IOWs, it allows us to write metadata without physically logging
every individual change but still maintain the full transactional
integrity guarantees we currently have w.r.t. crash recovery.

This change modifies some of the CIL item insertion loops, as
ordered log vectors introduce some new constraints as they don't
track any data. One advantage of this change is that it combines
two log vector chain walks into a single pass, so there is less
overhead in the transaction commit pass as well. It also kills some
unused code in the log vector walk loop when committing the CIL.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log.c     |   21 +++++++++++---
 fs/xfs/xfs_log.h     |    2 ++
 fs/xfs/xfs_log_cil.c |   75 ++++++++++++++++++++++++++++++++++----------------
 3 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b345a7c..db08d34 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1963,6 +1963,10 @@ xlog_write_calc_vec_length(
 		headers++;
 
 	for (lv = log_vector; lv; lv = lv->lv_next) {
+		/* we don't write ordered log vectors */
+		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
+			continue;
+
 		headers += lv->lv_niovecs;
 
 		for (i = 0; i < lv->lv_niovecs; i++) {
@@ -2216,7 +2220,7 @@ xlog_write(
 	index = 0;
 	lv = log_vector;
 	vecp = lv->lv_iovecp;
-	while (lv && index < lv->lv_niovecs) {
+	while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
 		void		*ptr;
 		int		log_offset;
 
@@ -2236,13 +2240,21 @@ xlog_write(
 		 * This loop writes out as many regions as can fit in the amount
 		 * of space which was allocated by xlog_state_get_iclog_space().
 		 */
-		while (lv && index < lv->lv_niovecs) {
-			struct xfs_log_iovec	*reg = &vecp[index];
+		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
+			struct xfs_log_iovec	*reg;
 			struct xlog_op_header	*ophdr;
 			int			start_rec_copy;
 			int			copy_len;
 			int			copy_off;
+			bool			ordered = false;
+
+			/* ordered log vectors have no regions to write */
+			if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
+				ordered = true;
+				goto next_lv;
+			}
 
+			reg = &vecp[index];
 			ASSERT(reg->i_len % sizeof(__int32_t) == 0);
 			ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
 
@@ -2302,12 +2314,13 @@ xlog_write(
 				break;
 
 			if (++index == lv->lv_niovecs) {
+next_lv:
 				lv = lv->lv_next;
 				index = 0;
 				if (lv)
 					vecp = lv->lv_iovecp;
 			}
-			if (record_cnt == 0) {
+			if (record_cnt == 0 && ordered == false) {
 				if (!lv)
 					return 0;
 				break;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 5caee96..b20918c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -105,6 +105,8 @@ struct xfs_log_vec {
 	int			lv_buf_len;	/* size of formatted buffer */
 };
 
+#define XFS_LOG_VEC_ORDERED	(-1)
+
 /*
  * Structure used to pass callback function and the function's argument
  * to the log manager.
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index d0833b5..02b9cf3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -127,6 +127,7 @@ xlog_cil_prepare_log_vecs(
 		int	index;
 		int	len = 0;
 		uint	niovecs;
+		bool	ordered = false;
 
 		/* Skip items which aren't dirty in this transaction. */
 		if (!(lidp->lid_flags & XFS_LID_DIRTY))
@@ -137,14 +138,30 @@ xlog_cil_prepare_log_vecs(
 		if (!niovecs)
 			continue;
 
+		/*
+		 * Ordered items need to be tracked but we do not wish to write
+		 * them. We need a logvec to track the object, but we do not
+		 * need an iovec or buffer to be allocated for copying data.
+		 */
+		if (niovecs == XFS_LOG_VEC_ORDERED) {
+			ordered = true;
+			niovecs = 0;
+		}
+
 		new_lv = kmem_zalloc(sizeof(*new_lv) +
 				niovecs * sizeof(struct xfs_log_iovec),
 				KM_SLEEP|KM_NOFS);
 
+		new_lv->lv_item = lidp->lid_item;
+		new_lv->lv_niovecs = niovecs;
+		if (ordered) {
+			/* track as an ordered logvec */
+			new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+			goto next;
+		}
+
 		/* The allocated iovec region lies beyond the log vector. */
 		new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
-		new_lv->lv_niovecs = niovecs;
-		new_lv->lv_item = lidp->lid_item;
 
 		/* build the vector array and calculate it's length */
 		IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
@@ -165,6 +182,7 @@ xlog_cil_prepare_log_vecs(
 		}
 		ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
 
+next:
 		if (!ret_lv)
 			ret_lv = new_lv;
 		else
@@ -191,8 +209,18 @@ xfs_cil_prepare_item(
 
 	if (old) {
 		/* existing lv on log item, space used is a delta */
-		ASSERT(!list_empty(&lv->lv_item->li_cil));
-		ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+		ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) ||
+			old->lv_buf_len == XFS_LOG_VEC_ORDERED);
+
+		/*
+		 * If the new item is ordered, keep the old one that is already
+		 * tracking dirty or ordered regions
+		 */
+		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
+			ASSERT(!lv->lv_buf);
+			kmem_free(lv);
+			return;
+		}
 
 		*len += lv->lv_buf_len - old->lv_buf_len;
 		*diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
@@ -201,10 +229,11 @@ xfs_cil_prepare_item(
 	} else {
 		/* new lv, must pin the log item */
 		ASSERT(!lv->lv_item->li_lv);
-		ASSERT(list_empty(&lv->lv_item->li_cil));
 
-		*len += lv->lv_buf_len;
-		*diff_iovecs += lv->lv_niovecs;
+		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+			*len += lv->lv_buf_len;
+			*diff_iovecs += lv->lv_niovecs;
+		}
 		IOP_PIN(lv->lv_item);
 
 	}
@@ -259,18 +288,24 @@ xlog_cil_insert_items(
 	 * We can do this safely because the context can't checkpoint until we
 	 * are done so it doesn't matter exactly how we update the CIL.
 	 */
-	for (lv = log_vector; lv; lv = lv->lv_next)
-		xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
-
-	/* account for space used by new iovec headers  */
-	len += diff_iovecs * sizeof(xlog_op_header_t);
-
 	spin_lock(&cil->xc_cil_lock);
+	for (lv = log_vector; lv; ) {
+		struct xfs_log_vec *next = lv->lv_next;
 
-	/* move the items to the tail of the CIL */
-	for (lv = log_vector; lv; lv = lv->lv_next)
+		ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil));
+		lv->lv_next = NULL;
+
+		/*
+		 * xfs_cil_prepare_item() may free the lv, so move the item on
+		 * the CIL first.
+		 */
 		list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+		xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
+		lv = next;
+	}
 
+	/* account for space used by new iovec headers  */
+	len += diff_iovecs * sizeof(xlog_op_header_t);
 	ctx->nvecs += diff_iovecs;
 
 	/*
@@ -381,9 +416,7 @@ xlog_cil_push(
 	struct xfs_cil_ctx	*new_ctx;
 	struct xlog_in_core	*commit_iclog;
 	struct xlog_ticket	*tic;
-	int			num_lv;
 	int			num_iovecs;
-	int			len;
 	int			error = 0;
 	struct xfs_trans_header thdr;
 	struct xfs_log_iovec	lhdr;
@@ -428,12 +461,9 @@ xlog_cil_push(
 	 * side which is currently locked out by the flush lock.
 	 */
 	lv = NULL;
-	num_lv = 0;
 	num_iovecs = 0;
-	len = 0;
 	while (!list_empty(&cil->xc_cil)) {
 		struct xfs_log_item	*item;
-		int			i;
 
 		item = list_first_entry(&cil->xc_cil,
 					struct xfs_log_item, li_cil);
@@ -444,11 +474,7 @@ xlog_cil_push(
 			lv->lv_next = item->li_lv;
 		lv = item->li_lv;
 		item->li_lv = NULL;
-
-		num_lv++;
 		num_iovecs += lv->lv_niovecs;
-		for (i = 0; i < lv->lv_niovecs; i++)
-			len += lv->lv_iovecp[i].i_len;
 	}
 
 	/*
@@ -701,6 +727,7 @@ xfs_log_commit_cil(
 	if (commit_lsn)
 		*commit_lsn = log->l_cilp->xc_ctx->sequence;
 
+	/* xlog_cil_insert_items() destroys log_vector list */
 	xlog_cil_insert_items(log, log_vector, tp->t_ticket);
 
 	/* check we didn't blow the reservation */
-- 
1.7.10.4

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2013-06-12 10:23 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-12 10:22 [PATCH 00/27] xfs: current patch queue for 3.11 Dave Chinner
2013-06-12 10:22 ` [PATCH 01/27] xfs: update mount options documentation Dave Chinner
2013-06-13 13:34   ` Eric Sandeen
2013-06-14  0:40     ` Dave Chinner
2013-06-14  0:53       ` Eric Sandeen
2013-06-12 10:22 ` [PATCH 02/27] xfs: add pluging for bulkstat readahead Dave Chinner
2013-06-12 10:22 ` [PATCH 03/27] xfs: plug directory buffer readahead Dave Chinner
2013-06-12 10:22 ` [PATCH 04/27] xfs: don't use speculative prealloc for small files Dave Chinner
2013-06-12 16:10   ` Brian Foster
2013-06-13  0:50     ` Dave Chinner
2013-06-12 10:22 ` [PATCH 05/27] xfs: don't do IO when creating an new inode Dave Chinner
2013-06-12 10:22 ` [PATCH 06/27] xfs: xfs_ifree doesn't need to modify the inode buffer Dave Chinner
2013-06-12 10:22 ` Dave Chinner [this message]
2013-06-12 10:22 ` [PATCH 08/27] xfs: Introduce an ordered buffer item Dave Chinner
2013-06-12 10:22 ` [PATCH 09/27] xfs: Inode create log items Dave Chinner
2013-06-12 10:22 ` [PATCH 10/27] xfs: Inode create transaction reservations Dave Chinner
2013-06-12 10:22 ` [PATCH 11/27] xfs: Inode create item recovery Dave Chinner
2013-06-12 10:22 ` [PATCH 12/27] xfs: Use inode create transaction Dave Chinner
2013-06-12 10:22 ` [PATCH 13/27] xfs: remove local fork format handling from xfs_bmapi_write() Dave Chinner
2013-06-12 10:22 ` [PATCH 14/27] xfs: move getdents code into it's own file Dave Chinner
2013-06-12 10:22 ` [PATCH 15/27] xfs: reshuffle dir2 definitions around for userspace Dave Chinner
2013-06-17 16:05   ` Christoph Hellwig
2013-06-18 21:12     ` Dave Chinner
2013-06-18 21:35       ` Dave Chinner
2013-06-12 10:22 ` [PATCH 16/27] xfs: split out attribute listing code into separate file Dave Chinner
2013-06-12 10:22 ` [PATCH 17/27] xfs: split out attribute fork truncation " Dave Chinner
2013-06-12 10:22 ` [PATCH 18/27] xfs: split out xfs inode operations " Dave Chinner
2013-06-12 14:05   ` Christoph Hellwig
2013-06-13  1:14     ` Dave Chinner
2013-06-13  8:00       ` Dave Chinner
2013-06-17 15:56         ` Christoph Hellwig
2013-06-17 18:14           ` Ben Myers
2013-06-18 20:40             ` Dave Chinner
2013-06-18 21:37               ` Ben Myers
2013-06-18 22:02                 ` Dave Chinner
2013-06-12 10:22 ` [PATCH 19/27] xfs: consolidate xfs_vnodeops.c into xfs_inode_ops.c Dave Chinner
2013-06-12 13:59   ` Christoph Hellwig
2013-06-13  1:39     ` Dave Chinner
2013-06-17 16:02       ` Christoph Hellwig
2013-06-18 20:55         ` Dave Chinner
2013-06-12 10:22 ` [PATCH 20/27] xfs: move xfs_getbmap to xfs_extent_ops.c Dave Chinner
2013-06-12 10:22 ` [PATCH 21/27] xfs: introduce xfs_sb.c for sharing with libxfs Dave Chinner
2013-06-12 10:22 ` [PATCH 22/27] xfs: move xfs_trans_reservations to xfs_trans.h Dave Chinner
2013-06-12 10:22 ` [PATCH 23/27] xfs: sync minor header differences needed by userspace Dave Chinner
2013-06-12 10:22 ` [PATCH 24/27] xfs: move xfs_bmap_punch_delalloc() to xfs_aops.c Dave Chinner
2013-06-12 14:06   ` Christoph Hellwig
2013-06-13  1:39     ` Dave Chinner
2013-06-12 10:22 ` [PATCH 25/27] xfs: split out transaction reservation code Dave Chinner
2013-06-12 10:22 ` [PATCH 26/27] xfs: minor cleanups Dave Chinner
2013-06-12 10:22 ` [PATCH 27/27] xfs: fix issues that cause userspace warnings Dave Chinner
2013-06-17 19:32   ` Brian Foster
2013-06-18 21:42     ` Dave Chinner
2013-06-12 13:06 ` [PATCH 00/27] xfs: current patch queue for 3.11 Brian Foster
2013-06-13  1:40   ` Dave Chinner
2013-06-12 14:17 ` Ben Myers
2013-06-13  1:58   ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1371032567-21772-8-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.