All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-25  8:23 ` zwu.kernel
  0 siblings, 0 replies; 14+ messages in thread
From: zwu.kernel @ 2013-07-25  8:23 UTC (permalink / raw)
  To: xfs; +Cc: linux-fsdevel, linux-kernel, Zhi Yong Wu

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

  It can take a long time to run log recovery operation because it is
single threaded and is bound by read latency. We can find that it took
most of the time to wait for the read IO to occur, so if one object
readahead is introduced to log recovery, it will obviously reduce the
log recovery time.

  In dirty log case as below:
    data device: 0xfd10
    log device: 0xfd10 daddr: 20480032 length: 20480

    log tail: 7941 head: 11077 state: <DIRTY>

This dirty ratio is about 15%. I am trying to do tests in larger scale
and dirtier filesystem environment.

Log recovery time stat:

            w/o this patch        w/ this patch
  real         0m1.051s             0m0.965s
  sys          0m0.033s             0m0.035s

  iowait       0m1.018s             0m0.930s

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 fs/xfs/xfs_log_recover.c | 131 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a..f07e5e0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3107,6 +3107,121 @@ xlog_recover_free_trans(
 	kmem_free(trans);
 }
 
+STATIC void
+xlog_recover_buffer_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
+	xfs_mount_t		*mp = log->l_mp;
+
+	if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
+			buf_f->blf_len, buf_f->blf_flags)) {
+		return;
+	}
+
+	xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno,
+			buf_f->blf_len, NULL);
+}
+
+STATIC void
+xlog_recover_inode_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	xfs_inode_log_format_t	*in_f;
+	xfs_mount_t		*mp = log->l_mp;
+	int			error;
+	int			need_free = 0;
+
+	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
+		in_f = item->ri_buf[0].i_addr;
+	} else {
+		in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
+		need_free = 1;
+		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
+		if (error)
+			goto error;
+	}
+
+	if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
+					in_f->ilf_len, 0)) {
+		goto error;
+	}
+
+	xfs_buf_readahead(mp->m_ddev_targp, in_f->ilf_blkno,
+			in_f->ilf_len, &xfs_inode_buf_ops);
+
+error:
+	if (need_free)
+		kmem_free(in_f);
+}
+
+STATIC void
+xlog_recover_dquot_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	xfs_mount_t		*mp = log->l_mp;
+	xfs_buf_t		*bp;
+	struct xfs_disk_dquot	*recddq;
+	int			error;
+	xfs_dq_logformat_t	*dq_f;
+	uint			type;
+
+
+	if (mp->m_qflags == 0)
+		return;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL)
+		return;
+	if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t))
+		return;
+
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return;
+
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
+			   "xlog_recover_dquot_ra_pass2 (log copy)");
+	if (error)
+		return;
+	ASSERT(dq_f->qlf_len == 1);
+
+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
+				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
+				   NULL);
+	if (!error)
+		xfs_buf_relse(bp);
+}
+
+STATIC void
+xlog_recover_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	switch (ITEM_TYPE(item)) {
+	case XFS_LI_BUF:
+		xlog_recover_buffer_ra_pass2(log, item);
+		break;
+	case XFS_LI_INODE:
+		xlog_recover_inode_ra_pass2(log, item);
+		break;
+	case XFS_LI_DQUOT:
+		xlog_recover_dquot_ra_pass2(log, item);
+		break;
+	case XFS_LI_EFI:
+	case XFS_LI_EFD:
+	case XFS_LI_QUOTAOFF:
+	default:
+		break;
+	}
+}
+
 STATIC int
 xlog_recover_commit_pass1(
 	struct xlog			*log,
@@ -3140,10 +3255,14 @@ xlog_recover_commit_pass2(
 	struct xlog			*log,
 	struct xlog_recover		*trans,
 	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
+	struct xlog_recover_item	*item,
+	struct xlog_recover_item	*next_item)
 {
 	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
 
+	if (next_item)
+		xlog_recover_ra_pass2(log, next_item);
+
 	switch (ITEM_TYPE(item)) {
 	case XFS_LI_BUF:
 		return xlog_recover_buffer_pass2(log, buffer_list, item);
@@ -3181,7 +3300,7 @@ xlog_recover_commit_trans(
 	int			pass)
 {
 	int			error = 0, error2;
-	xlog_recover_item_t	*item;
+	xlog_recover_item_t	*item, *next_item, *temp_item;
 	LIST_HEAD		(buffer_list);
 
 	hlist_del(&trans->r_list);
@@ -3190,14 +3309,18 @@ xlog_recover_commit_trans(
 	if (error)
 		return error;
 
-	list_for_each_entry(item, &trans->r_itemq, ri_list) {
+	list_for_each_entry_safe(item, temp_item, &trans->r_itemq, ri_list) {
 		switch (pass) {
 		case XLOG_RECOVER_PASS1:
 			error = xlog_recover_commit_pass1(log, trans, item);
 			break;
 		case XLOG_RECOVER_PASS2:
+			if (&temp_item->ri_list != &trans->r_itemq)
+				next_item = temp_item;
+			else
+				next_item = NULL;
 			error = xlog_recover_commit_pass2(log, trans,
-							  &buffer_list, item);
+					  &buffer_list, item, next_item);
 			break;
 		default:
 			ASSERT(0);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-25  8:23 ` zwu.kernel
  0 siblings, 0 replies; 14+ messages in thread
From: zwu.kernel @ 2013-07-25  8:23 UTC (permalink / raw)
  To: xfs; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

  It can take a long time to run log recovery operation because it is
single threaded and is bound by read latency. We can find that it took
most of the time to wait for the read IO to occur, so if one object
readahead is introduced to log recovery, it will obviously reduce the
log recovery time.

  In dirty log case as below:
    data device: 0xfd10
    log device: 0xfd10 daddr: 20480032 length: 20480

    log tail: 7941 head: 11077 state: <DIRTY>

This dirty ratio is about 15%. I am trying to do tests in larger scale
and dirtier filesystem environment.

Log recovery time stat:

            w/o this patch        w/ this patch
  real         0m1.051s             0m0.965s
  sys          0m0.033s             0m0.035s

  iowait       0m1.018s             0m0.930s

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
 fs/xfs/xfs_log_recover.c | 131 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a..f07e5e0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3107,6 +3107,121 @@ xlog_recover_free_trans(
 	kmem_free(trans);
 }
 
+STATIC void
+xlog_recover_buffer_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
+	xfs_mount_t		*mp = log->l_mp;
+
+	if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
+			buf_f->blf_len, buf_f->blf_flags)) {
+		return;
+	}
+
+	xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno,
+			buf_f->blf_len, NULL);
+}
+
+STATIC void
+xlog_recover_inode_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	xfs_inode_log_format_t	*in_f;
+	xfs_mount_t		*mp = log->l_mp;
+	int			error;
+	int			need_free = 0;
+
+	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
+		in_f = item->ri_buf[0].i_addr;
+	} else {
+		in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
+		need_free = 1;
+		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
+		if (error)
+			goto error;
+	}
+
+	if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
+					in_f->ilf_len, 0)) {
+		goto error;
+	}
+
+	xfs_buf_readahead(mp->m_ddev_targp, in_f->ilf_blkno,
+			in_f->ilf_len, &xfs_inode_buf_ops);
+
+error:
+	if (need_free)
+		kmem_free(in_f);
+}
+
+STATIC void
+xlog_recover_dquot_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	xfs_mount_t		*mp = log->l_mp;
+	xfs_buf_t		*bp;
+	struct xfs_disk_dquot	*recddq;
+	int			error;
+	xfs_dq_logformat_t	*dq_f;
+	uint			type;
+
+
+	if (mp->m_qflags == 0)
+		return;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL)
+		return;
+	if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t))
+		return;
+
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return;
+
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
+			   "xlog_recover_dquot_ra_pass2 (log copy)");
+	if (error)
+		return;
+	ASSERT(dq_f->qlf_len == 1);
+
+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
+				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
+				   NULL);
+	if (!error)
+		xfs_buf_relse(bp);
+}
+
+STATIC void
+xlog_recover_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	switch (ITEM_TYPE(item)) {
+	case XFS_LI_BUF:
+		xlog_recover_buffer_ra_pass2(log, item);
+		break;
+	case XFS_LI_INODE:
+		xlog_recover_inode_ra_pass2(log, item);
+		break;
+	case XFS_LI_DQUOT:
+		xlog_recover_dquot_ra_pass2(log, item);
+		break;
+	case XFS_LI_EFI:
+	case XFS_LI_EFD:
+	case XFS_LI_QUOTAOFF:
+	default:
+		break;
+	}
+}
+
 STATIC int
 xlog_recover_commit_pass1(
 	struct xlog			*log,
@@ -3140,10 +3255,14 @@ xlog_recover_commit_pass2(
 	struct xlog			*log,
 	struct xlog_recover		*trans,
 	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
+	struct xlog_recover_item	*item,
+	struct xlog_recover_item	*next_item)
 {
 	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
 
+	if (next_item)
+		xlog_recover_ra_pass2(log, next_item);
+
 	switch (ITEM_TYPE(item)) {
 	case XFS_LI_BUF:
 		return xlog_recover_buffer_pass2(log, buffer_list, item);
@@ -3181,7 +3300,7 @@ xlog_recover_commit_trans(
 	int			pass)
 {
 	int			error = 0, error2;
-	xlog_recover_item_t	*item;
+	xlog_recover_item_t	*item, *next_item, *temp_item;
 	LIST_HEAD		(buffer_list);
 
 	hlist_del(&trans->r_list);
@@ -3190,14 +3309,18 @@ xlog_recover_commit_trans(
 	if (error)
 		return error;
 
-	list_for_each_entry(item, &trans->r_itemq, ri_list) {
+	list_for_each_entry_safe(item, temp_item, &trans->r_itemq, ri_list) {
 		switch (pass) {
 		case XLOG_RECOVER_PASS1:
 			error = xlog_recover_commit_pass1(log, trans, item);
 			break;
 		case XLOG_RECOVER_PASS2:
+			if (&temp_item->ri_list != &trans->r_itemq)
+				next_item = temp_item;
+			else
+				next_item = NULL;
 			error = xlog_recover_commit_pass2(log, trans,
-							  &buffer_list, item);
+					  &buffer_list, item, next_item);
 			break;
 		default:
 			ASSERT(0);
-- 
1.7.11.7

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
  2013-07-25  8:23 ` zwu.kernel
@ 2013-07-26  2:50   ` Dave Chinner
  -1 siblings, 0 replies; 14+ messages in thread
From: Dave Chinner @ 2013-07-26  2:50 UTC (permalink / raw)
  To: zwu.kernel; +Cc: xfs, linux-fsdevel, linux-kernel, Zhi Yong Wu

On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> 
>   It can take a long time to run log recovery operation because it is
> single threaded and is bound by read latency. We can find that it took
> most of the time to wait for the read IO to occur, so if one object
> readahead is introduced to log recovery, it will obviously reduce the
> log recovery time.
> 
>   In dirty log case as below:
>     data device: 0xfd10
>     log device: 0xfd10 daddr: 20480032 length: 20480
> 
>     log tail: 7941 head: 11077 state: <DIRTY>

That's only a small log (10MB). As I've said on irc, readahead won't
show any real difference on this and you need to be testing with
large logs.

> 
> This dirty ratio is about 15%. I am trying to do tests in larger scale
> and dirtier filesystem environment.
> 
> Log recovery time stat:
> 
>             w/o this patch        w/ this patch
>   real         0m1.051s             0m0.965s
>   sys          0m0.033s             0m0.035s
> 
>   iowait       0m1.018s             0m0.930s

Well, it's not made much of a difference there. That's probably
within the noise of repeated log recovery runs.

My simple test is:

$ cat recovery_time.sh
#!/bin/bash

cd /home/dave/src/compilebench-0.6/

mkfs.xfs -f /dev/vdc;
mount /dev/vdc /mnt/scratch
chmod 777 /mnt/scratch;
./compilebench --no-sync -D /mnt/scratch &
sleep 55
/home/dave/src/xfstests-dev/src/godown /mnt/scratch
umount /mnt/scratch
xfs_logprint -t /dev/vdc |head -20
time mount /dev/vdc /mnt/scratch
umount /mnt/scratch
$

And the recovery time from this is between 15-17s:

....
    log device: 0xfd20 daddr: 107374182032 length: 4173824
                                                   ^^^^^^^ almost 2GB
        log tail: 19288 head: 264809 state: <DIRTY>
....
real    0m17.913s
user    0m0.000s
sys     0m2.381s

And runs at 3-4000 read IOPs for most of that time. It's largely IO
bound, even on SSDs.

With your patch:

log tail: 35871 head: 308393 state: <DIRTY>
real    0m12.715s
user    0m0.000s
sys     0m2.247s

And it peaked at ~5000 read IOPS.

It's definitely an improvement, but there's a lot of dead time still
spent waiting for IO that we should be able to remove. Let's have a
look at the code...


> +STATIC void
> +xlog_recover_inode_ra_pass2(
> +	struct xlog                     *log,
> +	struct xlog_recover_item        *item)
> +{
> +	xfs_inode_log_format_t	*in_f;
> +	xfs_mount_t		*mp = log->l_mp;
> +	int			error;
> +	int			need_free = 0;
> +
> +	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
> +		in_f = item->ri_buf[0].i_addr;
> +	} else {
> +		in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
> +		need_free = 1;
> +		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
> +		if (error)
> +			goto error;
> +	}

I'd just put the conversion buffer on stack and avoid the need to
alloc/free memory here. There's plenty of stack space available
during recovery here, so let's use it to keep the overhead of
readahead down.

> +STATIC void
> +xlog_recover_dquot_ra_pass2(
> +	struct xlog			*log,
> +	struct xlog_recover_item	*item)
> +{
> +	xfs_mount_t		*mp = log->l_mp;
> +	xfs_buf_t		*bp;
> +	struct xfs_disk_dquot	*recddq;
> +	int			error;
> +	xfs_dq_logformat_t	*dq_f;
> +	uint			type;
> +
> +
> +	if (mp->m_qflags == 0)
> +		return;
> +
> +	recddq = item->ri_buf[1].i_addr;
> +	if (recddq == NULL)
> +		return;
> +	if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t))
> +		return;
> +
> +	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
> +	ASSERT(type);
> +	if (log->l_quotaoffs_flag & type)
> +		return;
> +
> +	dq_f = item->ri_buf[0].i_addr;
> +	ASSERT(dq_f);
> +	error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
> +			   "xlog_recover_dquot_ra_pass2 (log copy)");
> +	if (error)
> +		return;
> +	ASSERT(dq_f->qlf_len == 1);
> +
> +	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
> +				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
> +				   NULL);
> +	if (!error)
> +		xfs_buf_relse(bp);
> +}

That's not doing readahead - that's an integrity check followed by a
blocking read.  Shouldn't it just be calling xfs_buf_readahead() on
dq_f->qlf_blkno/dq_f->qlf_len, after checking whether it had been
cancelled?

>  STATIC int
>  xlog_recover_commit_pass1(
>  	struct xlog			*log,
> @@ -3140,10 +3255,14 @@ xlog_recover_commit_pass2(
>  	struct xlog			*log,
>  	struct xlog_recover		*trans,
>  	struct list_head		*buffer_list,
> -	struct xlog_recover_item	*item)
> +	struct xlog_recover_item	*item,
> +	struct xlog_recover_item	*next_item)
>  {
>  	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
>  
> +	if (next_item)
> +		xlog_recover_ra_pass2(log, next_item);
> +

Ah, that explains the limited improvement - it's only doing
readahead of a single item at a time. IOWs, we are completing
recovery of the current object before IO completion of the next
object has occurred. This generally means the read-ahead queue is
not deep enough...

>  	switch (ITEM_TYPE(item)) {
>  	case XFS_LI_BUF:
>  		return xlog_recover_buffer_pass2(log, buffer_list, item);
> @@ -3181,7 +3300,7 @@ xlog_recover_commit_trans(
>  	int			pass)
>  {
>  	int			error = 0, error2;
> -	xlog_recover_item_t	*item;
> +	xlog_recover_item_t	*item, *next_item, *temp_item;
>  	LIST_HEAD		(buffer_list);
>  
>  	hlist_del(&trans->r_list);
> @@ -3190,14 +3309,18 @@ xlog_recover_commit_trans(
>  	if (error)
>  		return error;
>  
> -	list_for_each_entry(item, &trans->r_itemq, ri_list) {
> +	list_for_each_entry_safe(item, temp_item, &trans->r_itemq, ri_list) {
>  		switch (pass) {
>  		case XLOG_RECOVER_PASS1:
>  			error = xlog_recover_commit_pass1(log, trans, item);
>  			break;
>  		case XLOG_RECOVER_PASS2:
> +			if (&temp_item->ri_list != &trans->r_itemq)
> +				next_item = temp_item;
> +			else
> +				next_item = NULL;
>  			error = xlog_recover_commit_pass2(log, trans,
> -							  &buffer_list, item);
> +					  &buffer_list, item, next_item);

Ok, so you've based the readahead on the transaction item list
having a next pointer. What I think you should do is turn this into
a readahead queue by moving objects to a new list. i.e.

	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {

		case XLOG_RECOVER_PASS2:
			if (ra_qdepth++ >= MAX_QDEPTH) {
				recover_items(log, trans, &buffer_list, &ra_item_list);
				ra_qdepth = 0;
			} else {
				xlog_recover_item_readahead(log, item);
				list_move_tail(&item->ri_list, &ra_item_list);
			}
			break;
		...
		}
	}
	if (!list_empty(&ra_item_list))
		recover_items(log, trans, &buffer_list, &ra_item_list);

I'd suggest that a queue depth somewhere between 10 and 100 will
be necessary to keep enough IO in flight to keep the pipeline full
and prevent recovery from having to wait on IO...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-26  2:50   ` Dave Chinner
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Chinner @ 2013-07-26  2:50 UTC (permalink / raw)
  To: zwu.kernel; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel, xfs

On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> 
>   It can take a long time to run log recovery operation because it is
> single threaded and is bound by read latency. We can find that it took
> most of the time to wait for the read IO to occur, so if one object
> readahead is introduced to log recovery, it will obviously reduce the
> log recovery time.
> 
>   In dirty log case as below:
>     data device: 0xfd10
>     log device: 0xfd10 daddr: 20480032 length: 20480
> 
>     log tail: 7941 head: 11077 state: <DIRTY>

That's only a small log (10MB). As I've said on irc, readahead won't
show any real difference on this and you need to be testing with
large logs.

> 
> This dirty ratio is about 15%. I am trying to do tests in larger scale
> and dirtier filesystem environment.
> 
> Log recovery time stat:
> 
>             w/o this patch        w/ this patch
>   real         0m1.051s             0m0.965s
>   sys          0m0.033s             0m0.035s
> 
>   iowait       0m1.018s             0m0.930s

Well, it's not made much of a difference there. That's probably
within the noise of repeated log recovery runs.

My simple test is:

$ cat recovery_time.sh
#!/bin/bash

cd /home/dave/src/compilebench-0.6/

mkfs.xfs -f /dev/vdc;
mount /dev/vdc /mnt/scratch
chmod 777 /mnt/scratch;
./compilebench --no-sync -D /mnt/scratch &
sleep 55
/home/dave/src/xfstests-dev/src/godown /mnt/scratch
umount /mnt/scratch
xfs_logprint -t /dev/vdc |head -20
time mount /dev/vdc /mnt/scratch
umount /mnt/scratch
$

And the recovery time from this is between 15-17s:

....
    log device: 0xfd20 daddr: 107374182032 length: 4173824
                                                   ^^^^^^^ almost 2GB
        log tail: 19288 head: 264809 state: <DIRTY>
....
real    0m17.913s
user    0m0.000s
sys     0m2.381s

And runs at 3-4000 read IOPs for most of that time. It's largely IO
bound, even on SSDs.

With your patch:

log tail: 35871 head: 308393 state: <DIRTY>
real    0m12.715s
user    0m0.000s
sys     0m2.247s

And it peaked at ~5000 read IOPS.

It's definitely an improvement, but there's a lot of dead time still
spent waiting for IO that we should be able to remove. Let's have a
look at the code...


> +STATIC void
> +xlog_recover_inode_ra_pass2(
> +	struct xlog                     *log,
> +	struct xlog_recover_item        *item)
> +{
> +	xfs_inode_log_format_t	*in_f;
> +	xfs_mount_t		*mp = log->l_mp;
> +	int			error;
> +	int			need_free = 0;
> +
> +	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
> +		in_f = item->ri_buf[0].i_addr;
> +	} else {
> +		in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
> +		need_free = 1;
> +		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
> +		if (error)
> +			goto error;
> +	}

I'd just put the conversion buffer on stack and avoid the need to
alloc/free memory here. There's plenty of stack space available
during recovery here, so let's use it to keep the overhead of
readahead down.

> +STATIC void
> +xlog_recover_dquot_ra_pass2(
> +	struct xlog			*log,
> +	struct xlog_recover_item	*item)
> +{
> +	xfs_mount_t		*mp = log->l_mp;
> +	xfs_buf_t		*bp;
> +	struct xfs_disk_dquot	*recddq;
> +	int			error;
> +	xfs_dq_logformat_t	*dq_f;
> +	uint			type;
> +
> +
> +	if (mp->m_qflags == 0)
> +		return;
> +
> +	recddq = item->ri_buf[1].i_addr;
> +	if (recddq == NULL)
> +		return;
> +	if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t))
> +		return;
> +
> +	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
> +	ASSERT(type);
> +	if (log->l_quotaoffs_flag & type)
> +		return;
> +
> +	dq_f = item->ri_buf[0].i_addr;
> +	ASSERT(dq_f);
> +	error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
> +			   "xlog_recover_dquot_ra_pass2 (log copy)");
> +	if (error)
> +		return;
> +	ASSERT(dq_f->qlf_len == 1);
> +
> +	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
> +				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
> +				   NULL);
> +	if (!error)
> +		xfs_buf_relse(bp);
> +}

That's not doing readahead - that's an integrity check followed by a
blocking read.  Shouldn't it just be calling xfs_buf_readahead() on
dq_f->qlf_blkno/dq_f->qlf_len, after checking whether it had been
cancelled?

>  STATIC int
>  xlog_recover_commit_pass1(
>  	struct xlog			*log,
> @@ -3140,10 +3255,14 @@ xlog_recover_commit_pass2(
>  	struct xlog			*log,
>  	struct xlog_recover		*trans,
>  	struct list_head		*buffer_list,
> -	struct xlog_recover_item	*item)
> +	struct xlog_recover_item	*item,
> +	struct xlog_recover_item	*next_item)
>  {
>  	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
>  
> +	if (next_item)
> +		xlog_recover_ra_pass2(log, next_item);
> +

Ah, that explains the limited improvement - it's only doing
readahead of a single item at a time. IOWs, we are completing
recovery of the current object before IO completion of the next
object has occurred. This generally means the read-ahead queue is
not deep enough...

>  	switch (ITEM_TYPE(item)) {
>  	case XFS_LI_BUF:
>  		return xlog_recover_buffer_pass2(log, buffer_list, item);
> @@ -3181,7 +3300,7 @@ xlog_recover_commit_trans(
>  	int			pass)
>  {
>  	int			error = 0, error2;
> -	xlog_recover_item_t	*item;
> +	xlog_recover_item_t	*item, *next_item, *temp_item;
>  	LIST_HEAD		(buffer_list);
>  
>  	hlist_del(&trans->r_list);
> @@ -3190,14 +3309,18 @@ xlog_recover_commit_trans(
>  	if (error)
>  		return error;
>  
> -	list_for_each_entry(item, &trans->r_itemq, ri_list) {
> +	list_for_each_entry_safe(item, temp_item, &trans->r_itemq, ri_list) {
>  		switch (pass) {
>  		case XLOG_RECOVER_PASS1:
>  			error = xlog_recover_commit_pass1(log, trans, item);
>  			break;
>  		case XLOG_RECOVER_PASS2:
> +			if (&temp_item->ri_list != &trans->r_itemq)
> +				next_item = temp_item;
> +			else
> +				next_item = NULL;
>  			error = xlog_recover_commit_pass2(log, trans,
> -							  &buffer_list, item);
> +					  &buffer_list, item, next_item);

Ok, so you've based the readahead on the transaction item list
having a next pointer. What I think you should do is turn this into
a readahead queue by moving objects to a new list. i.e.

	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {

		case XLOG_RECOVER_PASS2:
			if (ra_qdepth++ >= MAX_QDEPTH) {
				recover_items(log, trans, &buffer_list, &ra_item_list);
				ra_qdepth = 0;
			} else {
				xlog_recover_item_readahead(log, item);
				list_move_tail(&item->ri_list, &ra_item_list);
			}
			break;
		...
		}
	}
	if (!list_empty(&ra_item_list))
		recover_items(log, trans, &buffer_list, &ra_item_list);

I'd suggest that a queue depth somewhere between 10 and 100 will
be necessary to keep enough IO in flight to keep the pipeline full
and prevent recovery from having to wait on IO...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
  2013-07-26  2:50   ` Dave Chinner
@ 2013-07-26  6:36     ` Zhi Yong Wu
  -1 siblings, 0 replies; 14+ messages in thread
From: Zhi Yong Wu @ 2013-07-26  6:36 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfstests, linux-fsdevel, linux-kernel mlist, Zhi Yong Wu

Dave,

All comments are good to me, and will be applied to next version, thanks a lot.

On Fri, Jul 26, 2013 at 10:50 AM, Dave Chinner <david@fromorbit.com> wrote:
> On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>>   It can take a long time to run log recovery operation because it is
>> single threaded and is bound by read latency. We can find that it took
>> most of the time to wait for the read IO to occur, so if one object
>> readahead is introduced to log recovery, it will obviously reduce the
>> log recovery time.
>>
>>   In dirty log case as below:
>>     data device: 0xfd10
>>     log device: 0xfd10 daddr: 20480032 length: 20480
>>
>>     log tail: 7941 head: 11077 state: <DIRTY>
>
> That's only a small log (10MB). As I've said on irc, readahead won't
Yeah, it is one 10MB log, but how do you calculate it based on the above info?

> show any real difference on this and you need to be testing with
> large logs.
>
>>
>> This dirty ratio is about 15%. I am trying to do tests in larger scale
>> and dirtier filesystem environment.
>>
>> Log recovery time stat:
>>
>>             w/o this patch        w/ this patch
>>   real         0m1.051s             0m0.965s
>>   sys          0m0.033s             0m0.035s
>>
>>   iowait       0m1.018s             0m0.930s
>
> Well, it's not made much of a difference there. That's probably
> within the noise of repeated log recovery runs.
>
> My simple test is:
>
> $ cat recovery_time.sh
> #!/bin/bash
>
> cd /home/dave/src/compilebench-0.6/
>
> mkfs.xfs -f /dev/vdc;
> mount /dev/vdc /mnt/scratch
> chmod 777 /mnt/scratch;
> ./compilebench --no-sync -D /mnt/scratch &
> sleep 55
> /home/dave/src/xfstests-dev/src/godown /mnt/scratch
> umount /mnt/scratch
> xfs_logprint -t /dev/vdc |head -20
> time mount /dev/vdc /mnt/scratch
> umount /mnt/scratch
> $
>
> And the recovery time from this is between 15-17s:
>
> ....
>     log device: 0xfd20 daddr: 107374182032 length: 4173824
>                                                    ^^^^^^^ almost 2GB
>         log tail: 19288 head: 264809 state: <DIRTY>
> ....
> real    0m17.913s
> user    0m0.000s
> sys     0m2.381s
>
> And runs at 3-4000 read IOPs for most of that time. It's largely IO
> bound, even on SSDs.
>
> With your patch:
>
> log tail: 35871 head: 308393 state: <DIRTY>
> real    0m12.715s
> user    0m0.000s
> sys     0m2.247s
>
> And it peaked at ~5000 read IOPS.
How do you know its READ IOPS is ~5000?

>
> It's definitely an improvement, but there's a lot of dead time still
> spent waiting for IO that we should be able to remove. Let's have a
> look at the code...
>
>
>> +STATIC void
>> +xlog_recover_inode_ra_pass2(
>> +     struct xlog                     *log,
>> +     struct xlog_recover_item        *item)
>> +{
>> +     xfs_inode_log_format_t  *in_f;
>> +     xfs_mount_t             *mp = log->l_mp;
>> +     int                     error;
>> +     int                     need_free = 0;
>> +
>> +     if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
>> +             in_f = item->ri_buf[0].i_addr;
>> +     } else {
>> +             in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
>> +             need_free = 1;
>> +             error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
>> +             if (error)
>> +                     goto error;
>> +     }
>
> I'd just put the conversion buffer on stack and avoid the need to
> alloc/free memory here. There's plenty of stack space available
> during recovery here, so let's use it to keep the overhead of
> readahead down.
OK, will use the buffer on stack.
>
>> +STATIC void
>> +xlog_recover_dquot_ra_pass2(
>> +     struct xlog                     *log,
>> +     struct xlog_recover_item        *item)
>> +{
>> +     xfs_mount_t             *mp = log->l_mp;
>> +     xfs_buf_t               *bp;
>> +     struct xfs_disk_dquot   *recddq;
>> +     int                     error;
>> +     xfs_dq_logformat_t      *dq_f;
>> +     uint                    type;
>> +
>> +
>> +     if (mp->m_qflags == 0)
>> +             return;
>> +
>> +     recddq = item->ri_buf[1].i_addr;
>> +     if (recddq == NULL)
>> +             return;
>> +     if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t))
>> +             return;
>> +
>> +     type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
>> +     ASSERT(type);
>> +     if (log->l_quotaoffs_flag & type)
>> +             return;
>> +
>> +     dq_f = item->ri_buf[0].i_addr;
>> +     ASSERT(dq_f);
>> +     error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
>> +                        "xlog_recover_dquot_ra_pass2 (log copy)");
>> +     if (error)
>> +             return;
>> +     ASSERT(dq_f->qlf_len == 1);
>> +
>> +     error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
>> +                                XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
>> +                                NULL);
>> +     if (!error)
>> +             xfs_buf_relse(bp);
>> +}
>
> That's not doing readahead - that's an integrity check followed by a
> blocking read.  Shouldn't it just be calling xfs_buf_readahead() on
> dq_f->qlf_blkno/dq_f->qlf_len, after checking whether it had been
> cancelled?
ah, i miss tha it is one blocking read. will fix it
>
>>  STATIC int
>>  xlog_recover_commit_pass1(
>>       struct xlog                     *log,
>> @@ -3140,10 +3255,14 @@ xlog_recover_commit_pass2(
>>       struct xlog                     *log,
>>       struct xlog_recover             *trans,
>>       struct list_head                *buffer_list,
>> -     struct xlog_recover_item        *item)
>> +     struct xlog_recover_item        *item,
>> +     struct xlog_recover_item        *next_item)
>>  {
>>       trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
>>
>> +     if (next_item)
>> +             xlog_recover_ra_pass2(log, next_item);
>> +
>
> Ah, that explains the limited improvement - it's only doing
> readahead of a single item at a time. IOWs, we are completing
> recovery of the current object before IO completion of the next
> object has occurred. This generally means the read-ahead queue is
Yes.
> not deep enough...
>
>>       switch (ITEM_TYPE(item)) {
>>       case XFS_LI_BUF:
>>               return xlog_recover_buffer_pass2(log, buffer_list, item);
>> @@ -3181,7 +3300,7 @@ xlog_recover_commit_trans(
>>       int                     pass)
>>  {
>>       int                     error = 0, error2;
>> -     xlog_recover_item_t     *item;
>> +     xlog_recover_item_t     *item, *next_item, *temp_item;
>>       LIST_HEAD               (buffer_list);
>>
>>       hlist_del(&trans->r_list);
>> @@ -3190,14 +3309,18 @@ xlog_recover_commit_trans(
>>       if (error)
>>               return error;
>>
>> -     list_for_each_entry(item, &trans->r_itemq, ri_list) {
>> +     list_for_each_entry_safe(item, temp_item, &trans->r_itemq, ri_list) {
>>               switch (pass) {
>>               case XLOG_RECOVER_PASS1:
>>                       error = xlog_recover_commit_pass1(log, trans, item);
>>                       break;
>>               case XLOG_RECOVER_PASS2:
>> +                     if (&temp_item->ri_list != &trans->r_itemq)
>> +                             next_item = temp_item;
>> +                     else
>> +                             next_item = NULL;
>>                       error = xlog_recover_commit_pass2(log, trans,
>> -                                                       &buffer_list, item);
>> +                                       &buffer_list, item, next_item);
>
> Ok, so you've based the readahead on the transaction item list
> having a next pointer. What I think you should do is turn this into
> a readahead queue by moving objects to a new list. i.e.
>
>         list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
>
>                 case XLOG_RECOVER_PASS2:
>                         if (ra_qdepth++ >= MAX_QDEPTH) {
>                                 recover_items(log, trans, &buffer_list, &ra_item_list);
>                                 ra_qdepth = 0;
>                         } else {
>                                 xlog_recover_item_readahead(log, item);
>                                 list_move_tail(&item->ri_list, &ra_item_list);
>                         }
>                         break;
>                 ...
>                 }
>         }
>         if (!list_empty(&ra_item_list))
>                 recover_items(log, trans, &buffer_list, &ra_item_list);
>
> I'd suggest that a queue depth somewhere between 10 and 100 will
> be necessary to keep enough IO in flight to keep the pipeline full
> and prevent recovery from having to wait on IO...
Good suggestion, will apply it to next version, thanks.

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com


-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-26  6:36     ` Zhi Yong Wu
  0 siblings, 0 replies; 14+ messages in thread
From: Zhi Yong Wu @ 2013-07-26  6:36 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel mlist, xfstests

Dave,

All comments are good to me, and will be applied to next version, thanks a lot.

On Fri, Jul 26, 2013 at 10:50 AM, Dave Chinner <david@fromorbit.com> wrote:
> On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
>> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>>
>>   It can take a long time to run log recovery operation because it is
>> single threaded and is bound by read latency. We can find that it took
>> most of the time to wait for the read IO to occur, so if one object
>> readahead is introduced to log recovery, it will obviously reduce the
>> log recovery time.
>>
>>   In dirty log case as below:
>>     data device: 0xfd10
>>     log device: 0xfd10 daddr: 20480032 length: 20480
>>
>>     log tail: 7941 head: 11077 state: <DIRTY>
>
> That's only a small log (10MB). As I've said on irc, readahead won't
Yeah, it is one 10MB log, but how do you calculate it based on the above info?

> show any real difference on this and you need to be testing with
> large logs.
>
>>
>> This dirty ratio is about 15%. I am trying to do tests in larger scale
>> and dirtier filesystem environment.
>>
>> Log recovery time stat:
>>
>>             w/o this patch        w/ this patch
>>   real         0m1.051s             0m0.965s
>>   sys          0m0.033s             0m0.035s
>>
>>   iowait       0m1.018s             0m0.930s
>
> Well, it's not made much of a difference there. That's probably
> within the noise of repeated log recovery runs.
>
> My simple test is:
>
> $ cat recovery_time.sh
> #!/bin/bash
>
> cd /home/dave/src/compilebench-0.6/
>
> mkfs.xfs -f /dev/vdc;
> mount /dev/vdc /mnt/scratch
> chmod 777 /mnt/scratch;
> ./compilebench --no-sync -D /mnt/scratch &
> sleep 55
> /home/dave/src/xfstests-dev/src/godown /mnt/scratch
> umount /mnt/scratch
> xfs_logprint -t /dev/vdc |head -20
> time mount /dev/vdc /mnt/scratch
> umount /mnt/scratch
> $
>
> And the recovery time from this is between 15-17s:
>
> ....
>     log device: 0xfd20 daddr: 107374182032 length: 4173824
>                                                    ^^^^^^^ almost 2GB
>         log tail: 19288 head: 264809 state: <DIRTY>
> ....
> real    0m17.913s
> user    0m0.000s
> sys     0m2.381s
>
> And runs at 3-4000 read IOPs for most of that time. It's largely IO
> bound, even on SSDs.
>
> With your patch:
>
> log tail: 35871 head: 308393 state: <DIRTY>
> real    0m12.715s
> user    0m0.000s
> sys     0m2.247s
>
> And it peaked at ~5000 read IOPS.
How do you know its READ IOPS is ~5000?

>
> It's definitely an improvement, but there's a lot of dead time still
> spent waiting for IO that we should be able to remove. Let's have a
> look at the code...
>
>
>> +STATIC void
>> +xlog_recover_inode_ra_pass2(
>> +     struct xlog                     *log,
>> +     struct xlog_recover_item        *item)
>> +{
>> +     xfs_inode_log_format_t  *in_f;
>> +     xfs_mount_t             *mp = log->l_mp;
>> +     int                     error;
>> +     int                     need_free = 0;
>> +
>> +     if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
>> +             in_f = item->ri_buf[0].i_addr;
>> +     } else {
>> +             in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
>> +             need_free = 1;
>> +             error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
>> +             if (error)
>> +                     goto error;
>> +     }
>
> I'd just put the conversion buffer on stack and avoid the need to
> alloc/free memory here. There's plenty of stack space available
> during recovery here, so let's use it to keep the overhead of
> readahead down.
OK, will use the buffer on stack.
>
>> +STATIC void
>> +xlog_recover_dquot_ra_pass2(
>> +     struct xlog                     *log,
>> +     struct xlog_recover_item        *item)
>> +{
>> +     xfs_mount_t             *mp = log->l_mp;
>> +     xfs_buf_t               *bp;
>> +     struct xfs_disk_dquot   *recddq;
>> +     int                     error;
>> +     xfs_dq_logformat_t      *dq_f;
>> +     uint                    type;
>> +
>> +
>> +     if (mp->m_qflags == 0)
>> +             return;
>> +
>> +     recddq = item->ri_buf[1].i_addr;
>> +     if (recddq == NULL)
>> +             return;
>> +     if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t))
>> +             return;
>> +
>> +     type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
>> +     ASSERT(type);
>> +     if (log->l_quotaoffs_flag & type)
>> +             return;
>> +
>> +     dq_f = item->ri_buf[0].i_addr;
>> +     ASSERT(dq_f);
>> +     error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
>> +                        "xlog_recover_dquot_ra_pass2 (log copy)");
>> +     if (error)
>> +             return;
>> +     ASSERT(dq_f->qlf_len == 1);
>> +
>> +     error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
>> +                                XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
>> +                                NULL);
>> +     if (!error)
>> +             xfs_buf_relse(bp);
>> +}
>
> That's not doing readahead - that's an integrity check followed by a
> blocking read.  Shouldn't it just be calling xfs_buf_readahead() on
> dq_f->qlf_blkno/dq_f->qlf_len, after checking whether it had been
> cancelled?
ah, i miss tha it is one blocking read. will fix it
>
>>  STATIC int
>>  xlog_recover_commit_pass1(
>>       struct xlog                     *log,
>> @@ -3140,10 +3255,14 @@ xlog_recover_commit_pass2(
>>       struct xlog                     *log,
>>       struct xlog_recover             *trans,
>>       struct list_head                *buffer_list,
>> -     struct xlog_recover_item        *item)
>> +     struct xlog_recover_item        *item,
>> +     struct xlog_recover_item        *next_item)
>>  {
>>       trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
>>
>> +     if (next_item)
>> +             xlog_recover_ra_pass2(log, next_item);
>> +
>
> Ah, that explains the limited improvement - it's only doing
> readahead of a single item at a time. IOWs, we are completing
> recovery of the current object before IO completion of the next
> object has occurred. This generally means the read-ahead queue is
Yes.
> not deep enough...
>
>>       switch (ITEM_TYPE(item)) {
>>       case XFS_LI_BUF:
>>               return xlog_recover_buffer_pass2(log, buffer_list, item);
>> @@ -3181,7 +3300,7 @@ xlog_recover_commit_trans(
>>       int                     pass)
>>  {
>>       int                     error = 0, error2;
>> -     xlog_recover_item_t     *item;
>> +     xlog_recover_item_t     *item, *next_item, *temp_item;
>>       LIST_HEAD               (buffer_list);
>>
>>       hlist_del(&trans->r_list);
>> @@ -3190,14 +3309,18 @@ xlog_recover_commit_trans(
>>       if (error)
>>               return error;
>>
>> -     list_for_each_entry(item, &trans->r_itemq, ri_list) {
>> +     list_for_each_entry_safe(item, temp_item, &trans->r_itemq, ri_list) {
>>               switch (pass) {
>>               case XLOG_RECOVER_PASS1:
>>                       error = xlog_recover_commit_pass1(log, trans, item);
>>                       break;
>>               case XLOG_RECOVER_PASS2:
>> +                     if (&temp_item->ri_list != &trans->r_itemq)
>> +                             next_item = temp_item;
>> +                     else
>> +                             next_item = NULL;
>>                       error = xlog_recover_commit_pass2(log, trans,
>> -                                                       &buffer_list, item);
>> +                                       &buffer_list, item, next_item);
>
> Ok, so you've based the readahead on the transaction item list
> having a next pointer. What I think you should do is turn this into
> a readahead queue by moving objects to a new list. i.e.
>
>         list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
>
>                 case XLOG_RECOVER_PASS2:
>                         if (ra_qdepth++ >= MAX_QDEPTH) {
>                                 recover_items(log, trans, &buffer_list, &ra_item_list);
>                                 ra_qdepth = 0;
>                         } else {
>                                 xlog_recover_item_readahead(log, item);
>                                 list_move_tail(&item->ri_list, &ra_item_list);
>                         }
>                         break;
>                 ...
>                 }
>         }
>         if (!list_empty(&ra_item_list))
>                 recover_items(log, trans, &buffer_list, &ra_item_list);
>
> I'd suggest that a queue depth somewhere between 10 and 100 will
> be necessary to keep enough IO in flight to keep the pipeline full
> and prevent recovery from having to wait on IO...
Good suggestion, will apply it to next version, thanks.

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com


-- 
Regards,

Zhi Yong Wu

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
  2013-07-26  6:36     ` Zhi Yong Wu
@ 2013-07-26 11:35       ` Dave Chinner
  -1 siblings, 0 replies; 14+ messages in thread
From: Dave Chinner @ 2013-07-26 11:35 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: xfstests, linux-fsdevel, linux-kernel mlist, Zhi Yong Wu

On Fri, Jul 26, 2013 at 02:36:15PM +0800, Zhi Yong Wu wrote:
> Dave,
> 
> All comments are good to me, and will be applied to next version, thanks a lot.
> 
> On Fri, Jul 26, 2013 at 10:50 AM, Dave Chinner <david@fromorbit.com> wrote:
> > On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
> >> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> >>
> >>   It can take a long time to run log recovery operation because it is
> >> single threaded and is bound by read latency. We can find that it took
> >> most of the time to wait for the read IO to occur, so if one object
> >> readahead is introduced to log recovery, it will obviously reduce the
> >> log recovery time.
> >>
> >>   In dirty log case as below:
> >>     data device: 0xfd10
> >>     log device: 0xfd10 daddr: 20480032 length: 20480
> >>
> >>     log tail: 7941 head: 11077 state: <DIRTY>
> >
> > That's only a small log (10MB). As I've said on irc, readahead won't
> Yeah, it is one 10MB log, but how do you calculate it based on the above info?

length = 20480 blocks. 20480 * 512 = 10MB....

> > And the recovery time from this is between 15-17s:
> >
> > ....
> >     log device: 0xfd20 daddr: 107374182032 length: 4173824
> >                                                    ^^^^^^^ almost 2GB
> >         log tail: 19288 head: 264809 state: <DIRTY>
> > ....
> > real    0m17.913s
> > user    0m0.000s
> > sys     0m2.381s
> >
> > And runs at 3-4000 read IOPs for most of that time. It's largely IO
> > bound, even on SSDs.
> >
> > With your patch:
> >
> > log tail: 35871 head: 308393 state: <DIRTY>
> > real    0m12.715s
> > user    0m0.000s
> > sys     0m2.247s
> >
> > And it peaked at ~5000 read IOPS.
> How do you know its READ IOPS is ~5000?

Other monitoring. iostat can tell you this, though I use PCP...

> > Ok, so you've based the readahead on the transaction item list
> > having a next pointer. What I think you should do is turn this into
> > a readahead queue by moving objects to a new list. i.e.
> >
> >         list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
> >
> >                 case XLOG_RECOVER_PASS2:
> >                         if (ra_qdepth++ >= MAX_QDEPTH) {
> >                                 recover_items(log, trans, &buffer_list, &ra_item_list);
> >                                 ra_qdepth = 0;
> >                         } else {
> >                                 xlog_recover_item_readahead(log, item);
> >                                 list_move_tail(&item->ri_list, &ra_item_list);
> >                         }
> >                         break;
> >                 ...
> >                 }
> >         }
> >         if (!list_empty(&ra_item_list))
> >                 recover_items(log, trans, &buffer_list, &ra_item_list);
> >
> > I'd suggest that a queue depth somewhere between 10 and 100 will
> > be necessary to keep enough IO in flight to keep the pipeline full
> > and prevent recovery from having to wait on IO...
> Good suggestion, will apply it to next version, thanks.

FWIW, I hacked a quick test of this into your patch here and a depth
of 100 brought the reocvery time down to under 8s. For other
workloads which have nothing but dirty inodes (like fsmark) a depth
of 100 drops the recovery time from ~100s to ~25s, and the iop rate
is peaking at well over 15,000 IOPS. So we definitely want to queue
up more than a single readahead...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-26 11:35       ` Dave Chinner
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Chinner @ 2013-07-26 11:35 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel mlist, xfstests

On Fri, Jul 26, 2013 at 02:36:15PM +0800, Zhi Yong Wu wrote:
> Dave,
> 
> All comments are good to me, and will be applied to next version, thanks a lot.
> 
> On Fri, Jul 26, 2013 at 10:50 AM, Dave Chinner <david@fromorbit.com> wrote:
> > On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
> >> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> >>
> >>   It can take a long time to run log recovery operation because it is
> >> single threaded and is bound by read latency. We can find that it took
> >> most of the time to wait for the read IO to occur, so if one object
> >> readahead is introduced to log recovery, it will obviously reduce the
> >> log recovery time.
> >>
> >>   In dirty log case as below:
> >>     data device: 0xfd10
> >>     log device: 0xfd10 daddr: 20480032 length: 20480
> >>
> >>     log tail: 7941 head: 11077 state: <DIRTY>
> >
> > That's only a small log (10MB). As I've said on irc, readahead won't
> Yeah, it is one 10MB log, but how do you calculate it based on the above info?

length = 20480 blocks. 20480 * 512 = 10MB....

> > And the recovery time from this is between 15-17s:
> >
> > ....
> >     log device: 0xfd20 daddr: 107374182032 length: 4173824
> >                                                    ^^^^^^^ almost 2GB
> >         log tail: 19288 head: 264809 state: <DIRTY>
> > ....
> > real    0m17.913s
> > user    0m0.000s
> > sys     0m2.381s
> >
> > And runs at 3-4000 read IOPs for most of that time. It's largely IO
> > bound, even on SSDs.
> >
> > With your patch:
> >
> > log tail: 35871 head: 308393 state: <DIRTY>
> > real    0m12.715s
> > user    0m0.000s
> > sys     0m2.247s
> >
> > And it peaked at ~5000 read IOPS.
> How do you know its READ IOPS is ~5000?

Other monitoring. iostat can tell you this, though I use PCP...

> > Ok, so you've based the readahead on the transaction item list
> > having a next pointer. What I think you should do is turn this into
> > a readahead queue by moving objects to a new list. i.e.
> >
> >         list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
> >
> >                 case XLOG_RECOVER_PASS2:
> >                         if (ra_qdepth++ >= MAX_QDEPTH) {
> >                                 recover_items(log, trans, &buffer_list, &ra_item_list);
> >                                 ra_qdepth = 0;
> >                         } else {
> >                                 xlog_recover_item_readahead(log, item);
> >                                 list_move_tail(&item->ri_list, &ra_item_list);
> >                         }
> >                         break;
> >                 ...
> >                 }
> >         }
> >         if (!list_empty(&ra_item_list))
> >                 recover_items(log, trans, &buffer_list, &ra_item_list);
> >
> > I'd suggest that a queue depth somewhere between 10 and 100 will
> > be necessary to keep enough IO in flight to keep the pipeline full
> > and prevent recovery from having to wait on IO...
> Good suggestion, will apply it to next version, thanks.

FWIW, I hacked a quick test of this into your patch here and a depth
of 100 brought the reocvery time down to under 8s. For other
workloads which have nothing but dirty inodes (like fsmark) a depth
of 100 drops the recovery time from ~100s to ~25s, and the iop rate
is peaking at well over 15,000 IOPS. So we definitely want to queue
up more than a single readahead...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
  2013-07-26 11:35       ` Dave Chinner
@ 2013-07-29  1:38         ` Zhi Yong Wu
  -1 siblings, 0 replies; 14+ messages in thread
From: Zhi Yong Wu @ 2013-07-29  1:38 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfstests, linux-fsdevel, linux-kernel mlist, Zhi Yong Wu

On Fri, Jul 26, 2013 at 7:35 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Fri, Jul 26, 2013 at 02:36:15PM +0800, Zhi Yong Wu wrote:
>> Dave,
>>
>> All comments are good to me, and will be applied to next version, thanks a lot.
>>
>> On Fri, Jul 26, 2013 at 10:50 AM, Dave Chinner <david@fromorbit.com> wrote:
>> > On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
>> >> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> >>
>> >>   It can take a long time to run log recovery operation because it is
>> >> single threaded and is bound by read latency. We can find that it took
>> >> most of the time to wait for the read IO to occur, so if one object
>> >> readahead is introduced to log recovery, it will obviously reduce the
>> >> log recovery time.
>> >>
>> >>   In dirty log case as below:
>> >>     data device: 0xfd10
>> >>     log device: 0xfd10 daddr: 20480032 length: 20480
>> >>
>> >>     log tail: 7941 head: 11077 state: <DIRTY>
>> >
>> > That's only a small log (10MB). As I've said on irc, readahead won't
>> Yeah, it is one 10MB log, but how do you calculate it based on the above info?
>
> length = 20480 blocks. 20480 * 512 = 10MB....
Thanks.
>
>> > And the recovery time from this is between 15-17s:
>> >
>> > ....
>> >     log device: 0xfd20 daddr: 107374182032 length: 4173824
>> >                                                    ^^^^^^^ almost 2GB
>> >         log tail: 19288 head: 264809 state: <DIRTY>
>> > ....
>> > real    0m17.913s
>> > user    0m0.000s
>> > sys     0m2.381s
>> >
>> > And runs at 3-4000 read IOPs for most of that time. It's largely IO
>> > bound, even on SSDs.
>> >
>> > With your patch:
>> >
>> > log tail: 35871 head: 308393 state: <DIRTY>
>> > real    0m12.715s
>> > user    0m0.000s
>> > sys     0m2.247s
>> >
>> > And it peaked at ~5000 read IOPS.
>> How do you know its READ IOPS is ~5000?
>
> Other monitoring. iostat can tell you this, though I use PCP...
thanks.
>
>> > Ok, so you've based the readahead on the transaction item list
>> > having a next pointer. What I think you should do is turn this into
>> > a readahead queue by moving objects to a new list. i.e.
>> >
>> >         list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
>> >
>> >                 case XLOG_RECOVER_PASS2:
>> >                         if (ra_qdepth++ >= MAX_QDEPTH) {
>> >                                 recover_items(log, trans, &buffer_list, &ra_item_list);
>> >                                 ra_qdepth = 0;
>> >                         } else {
>> >                                 xlog_recover_item_readahead(log, item);
>> >                                 list_move_tail(&item->ri_list, &ra_item_list);
>> >                         }
>> >                         break;
>> >                 ...
>> >                 }
>> >         }
>> >         if (!list_empty(&ra_item_list))
>> >                 recover_items(log, trans, &buffer_list, &ra_item_list);
>> >
>> > I'd suggest that a queue depth somewhere between 10 and 100 will
>> > be necessary to keep enough IO in flight to keep the pipeline full
>> > and prevent recovery from having to wait on IO...
>> Good suggestion, will apply it to next version, thanks.
>
> FWIW, I hacked a quick test of this into your patch here and a depth
> of 100 brought the reocvery time down to under 8s. For other
> workloads which have nothing but dirty inodes (like fsmark) a depth
> of 100 drops the recovery time from ~100s to ~25s, and the iop rate
> is peaking at well over 15,000 IOPS. So we definitely want to queue
> up more than a single readahead...
Excited, I will try it.
By the way, how do you try the workload which has nothing but dirty
dquote objects?

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-29  1:38         ` Zhi Yong Wu
  0 siblings, 0 replies; 14+ messages in thread
From: Zhi Yong Wu @ 2013-07-29  1:38 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel mlist, xfstests

On Fri, Jul 26, 2013 at 7:35 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Fri, Jul 26, 2013 at 02:36:15PM +0800, Zhi Yong Wu wrote:
>> Dave,
>>
>> All comments are good to me, and will be applied to next version, thanks a lot.
>>
>> On Fri, Jul 26, 2013 at 10:50 AM, Dave Chinner <david@fromorbit.com> wrote:
>> > On Thu, Jul 25, 2013 at 04:23:39PM +0800, zwu.kernel@gmail.com wrote:
>> >> From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>> >>
>> >>   It can take a long time to run log recovery operation because it is
>> >> single threaded and is bound by read latency. We can find that it took
>> >> most of the time to wait for the read IO to occur, so if one object
>> >> readahead is introduced to log recovery, it will obviously reduce the
>> >> log recovery time.
>> >>
>> >>   In dirty log case as below:
>> >>     data device: 0xfd10
>> >>     log device: 0xfd10 daddr: 20480032 length: 20480
>> >>
>> >>     log tail: 7941 head: 11077 state: <DIRTY>
>> >
>> > That's only a small log (10MB). As I've said on irc, readahead won't
>> Yeah, it is one 10MB log, but how do you calculate it based on the above info?
>
> length = 20480 blocks. 20480 * 512 = 10MB....
Thanks.
>
>> > And the recovery time from this is between 15-17s:
>> >
>> > ....
>> >     log device: 0xfd20 daddr: 107374182032 length: 4173824
>> >                                                    ^^^^^^^ almost 2GB
>> >         log tail: 19288 head: 264809 state: <DIRTY>
>> > ....
>> > real    0m17.913s
>> > user    0m0.000s
>> > sys     0m2.381s
>> >
>> > And runs at 3-4000 read IOPs for most of that time. It's largely IO
>> > bound, even on SSDs.
>> >
>> > With your patch:
>> >
>> > log tail: 35871 head: 308393 state: <DIRTY>
>> > real    0m12.715s
>> > user    0m0.000s
>> > sys     0m2.247s
>> >
>> > And it peaked at ~5000 read IOPS.
>> How do you know its READ IOPS is ~5000?
>
> Other monitoring. iostat can tell you this, though I use PCP...
thanks.
>
>> > Ok, so you've based the readahead on the transaction item list
>> > having a next pointer. What I think you should do is turn this into
>> > a readahead queue by moving objects to a new list. i.e.
>> >
>> >         list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
>> >
>> >                 case XLOG_RECOVER_PASS2:
>> >                         if (ra_qdepth++ >= MAX_QDEPTH) {
>> >                                 recover_items(log, trans, &buffer_list, &ra_item_list);
>> >                                 ra_qdepth = 0;
>> >                         } else {
>> >                                 xlog_recover_item_readahead(log, item);
>> >                                 list_move_tail(&item->ri_list, &ra_item_list);
>> >                         }
>> >                         break;
>> >                 ...
>> >                 }
>> >         }
>> >         if (!list_empty(&ra_item_list))
>> >                 recover_items(log, trans, &buffer_list, &ra_item_list);
>> >
>> > I'd suggest that a queue depth somewhere between 10 and 100 will
>> > be necessary to keep enough IO in flight to keep the pipeline full
>> > and prevent recovery from having to wait on IO...
>> Good suggestion, will apply it to next version, thanks.
>
> FWIW, I hacked a quick test of this into your patch here and a depth
> of 100 brought the reocvery time down to under 8s. For other
> workloads which have nothing but dirty inodes (like fsmark) a depth
> of 100 drops the recovery time from ~100s to ~25s, and the iop rate
> is peaking at well over 15,000 IOPS. So we definitely want to queue
> up more than a single readahead...
Excited, I will try it.
By the way, how do you try the workload which has nothing but dirty
dquote objects?

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com



-- 
Regards,

Zhi Yong Wu

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
  2013-07-29  1:38         ` Zhi Yong Wu
@ 2013-07-29  2:45           ` Dave Chinner
  -1 siblings, 0 replies; 14+ messages in thread
From: Dave Chinner @ 2013-07-29  2:45 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: xfstests, linux-fsdevel, linux-kernel mlist, Zhi Yong Wu

On Mon, Jul 29, 2013 at 09:38:11AM +0800, Zhi Yong Wu wrote:
> By the way, how do you try the workload which has nothing but dirty
> dquote objects?

Create quota limits for non-existent users. That will allocate the
dquots and dirty them.

Or if you already have a few hundred thousand dquots, just change
the limits on them all to get them logged and dirty...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-29  2:45           ` Dave Chinner
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Chinner @ 2013-07-29  2:45 UTC (permalink / raw)
  To: Zhi Yong Wu; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel mlist, xfstests

On Mon, Jul 29, 2013 at 09:38:11AM +0800, Zhi Yong Wu wrote:
> By the way, how do you try the workload which has nothing but dirty
> dquote objects?

Create quota limits for non-existent users. That will allocate the
dquots and dirty them.

Or if you already have a few hundred thousand dquots, just change
the limits on them all to get them logged and dirty...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
  2013-07-29  2:45           ` Dave Chinner
@ 2013-07-29  3:12             ` Zhi Yong Wu
  -1 siblings, 0 replies; 14+ messages in thread
From: Zhi Yong Wu @ 2013-07-29  3:12 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfstests, linux-fsdevel, linux-kernel mlist, Zhi Yong Wu

On Mon, Jul 29, 2013 at 10:45 AM, Dave Chinner <david@fromorbit.com> wrote:
> On Mon, Jul 29, 2013 at 09:38:11AM +0800, Zhi Yong Wu wrote:
>> By the way, how do you try the workload which has nothing but dirty
>> dquote objects?
>
> Create quota limits for non-existent users. That will allocate the
> dquots and dirty them.
>
> Or if you already have a few hundred thousand dquots, just change
> the limits on them all to get them logged and dirty...
OK, thanks.
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com



-- 
Regards,

Zhi Yong Wu

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] xfs: introduce object readahead to log recovery
@ 2013-07-29  3:12             ` Zhi Yong Wu
  0 siblings, 0 replies; 14+ messages in thread
From: Zhi Yong Wu @ 2013-07-29  3:12 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-fsdevel, Zhi Yong Wu, linux-kernel mlist, xfstests

On Mon, Jul 29, 2013 at 10:45 AM, Dave Chinner <david@fromorbit.com> wrote:
> On Mon, Jul 29, 2013 at 09:38:11AM +0800, Zhi Yong Wu wrote:
>> By the way, how do you try the workload which has nothing but dirty
>> dquote objects?
>
> Create quota limits for non-existent users. That will allocate the
> dquots and dirty them.
>
> Or if you already have a few hundred thousand dquots, just change
> the limits on them all to get them logged and dirty...
OK, thanks.
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com



-- 
Regards,

Zhi Yong Wu

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2013-07-29  3:13 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-25  8:23 [PATCH] xfs: introduce object readahead to log recovery zwu.kernel
2013-07-25  8:23 ` zwu.kernel
2013-07-26  2:50 ` Dave Chinner
2013-07-26  2:50   ` Dave Chinner
2013-07-26  6:36   ` Zhi Yong Wu
2013-07-26  6:36     ` Zhi Yong Wu
2013-07-26 11:35     ` Dave Chinner
2013-07-26 11:35       ` Dave Chinner
2013-07-29  1:38       ` Zhi Yong Wu
2013-07-29  1:38         ` Zhi Yong Wu
2013-07-29  2:45         ` Dave Chinner
2013-07-29  2:45           ` Dave Chinner
2013-07-29  3:12           ` Zhi Yong Wu
2013-07-29  3:12             ` Zhi Yong Wu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.