All of lore.kernel.org
 help / color / mirror / Atom feed
* xfs: add FITRIM support
@ 2010-11-25 11:23 Christoph Hellwig
  2010-12-22 21:41 ` Alex Elder
  2010-12-23  1:44 ` Dave Chinner
  0 siblings, 2 replies; 19+ messages in thread
From: Christoph Hellwig @ 2010-11-25 11:23 UTC (permalink / raw)
  To: xfs

Allow manual discards from userspace using the FITRIM ioctl.  This is not
intended to be run during normal workloads, as the freepsace btree walks
can cause large performance degradation.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: linux-2.6/fs/xfs/xfs_alloc.c
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_alloc.c	2010-11-25 11:30:29.425010758 +0100
+++ linux-2.6/fs/xfs/xfs_alloc.c	2010-11-25 11:30:36.408013342 +0100
@@ -41,10 +41,6 @@
 #define	XFSA_FIXUP_BNO_OK	1
 #define	XFSA_FIXUP_CNT_OK	2
 
-static int
-xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
-		    xfs_agblock_t bno, xfs_extlen_t len);
-
 /*
  * Prototypes for per-ag allocation routines
  */
@@ -94,7 +90,7 @@ xfs_alloc_lookup_ge(
  * Lookup the first record less than or equal to [bno, len]
  * in the btree given by cur.
  */
-STATIC int				/* error */
+int					/* error */
 xfs_alloc_lookup_le(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	xfs_agblock_t		bno,	/* starting block of extent */
@@ -127,7 +123,7 @@ xfs_alloc_update(
 /*
  * Get the data from the pointed-to record.
  */
-STATIC int				/* error */
+int					/* error */
 xfs_alloc_get_rec(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	xfs_agblock_t		*bno,	/* output: starting block of extent */
@@ -2676,7 +2672,7 @@ restart:
  * will require a synchronous transaction, but it can still be
  * used to distinguish between a partial or exact match.
  */
-static int
+int
 xfs_alloc_busy_search(
 	struct xfs_mount	*mp,
 	xfs_agnumber_t		agno,
Index: linux-2.6/fs/xfs/xfs_alloc.h
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_alloc.h	2010-11-25 11:30:29.431003844 +0100
+++ linux-2.6/fs/xfs/xfs_alloc.h	2010-11-25 11:30:36.408013342 +0100
@@ -19,6 +19,7 @@
 #define	__XFS_ALLOC_H__
 
 struct xfs_buf;
+struct xfs_btree_cur;
 struct xfs_mount;
 struct xfs_perag;
 struct xfs_trans;
@@ -118,16 +119,16 @@ xfs_alloc_longest_free_extent(struct xfs
 		struct xfs_perag *pag);
 
 #ifdef __KERNEL__
-
 void
-xfs_alloc_busy_insert(xfs_trans_t *tp,
-		xfs_agnumber_t agno,
-		xfs_agblock_t bno,
-		xfs_extlen_t len);
+xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
+	xfs_agblock_t bno, xfs_extlen_t len);
 
 void
 xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
 
+int
+xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
+	xfs_agblock_t bno, xfs_extlen_t len);
 #endif	/* __KERNEL__ */
 
 /*
@@ -205,4 +206,18 @@ xfs_free_extent(
 	xfs_fsblock_t	bno,	/* starting block number of extent */
 	xfs_extlen_t	len);	/* length of extent */
 
+int					/* error */
+xfs_alloc_lookup_le(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len,	/* length of extent */
+	int			*stat);	/* success/failure */
+
+int					/* error */
+xfs_alloc_get_rec(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		*bno,	/* output: starting block of extent */
+	xfs_extlen_t		*len,	/* output: length of extent */
+	int			*stat);	/* output: success/failure */
+
 #endif	/* __XFS_ALLOC_H__ */
Index: linux-2.6/fs/xfs/Makefile
===================================================================
--- linux-2.6.orig/fs/xfs/Makefile	2010-11-25 11:30:29.437012364 +0100
+++ linux-2.6/fs/xfs/Makefile	2010-11-25 11:30:36.409023608 +0100
@@ -98,6 +98,7 @@ xfs-y				+= $(addprefix $(XFS_LINUX)/, \
 				   kmem.o \
 				   xfs_aops.o \
 				   xfs_buf.o \
+				   xfs_discard.o \
 				   xfs_export.o \
 				   xfs_file.o \
 				   xfs_fs_subr.o \
Index: linux-2.6/fs/xfs/linux-2.6/xfs_discard.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/fs/xfs/linux-2.6/xfs_discard.c	2010-11-25 12:14:43.270005863 +0100
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_discard.h"
+#include "xfs_trace.h"
+
+STATIC int
+xfs_trim_extents(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	xfs_fsblock_t		start,
+	xfs_fsblock_t		len,
+	xfs_fsblock_t		minlen)
+{
+	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
+	struct xfs_btree_cur	*cur;
+	struct xfs_buf		*agbp;
+	struct xfs_perag	*pag;
+	int			error;
+	int			i;
+
+	pag = xfs_perag_get(mp, agno);
+
+	error = xfs_alloc_read_agf(mp, NULL, agno,
+				   XFS_ALLOC_FLAG_TRYLOCK, &agbp);
+	if (error || !agbp) {
+		if (error == EAGAIN)
+			error = 0;
+		goto out_put_perag;
+	}
+
+	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
+	/*
+	 * Force out the log.  This means any transactions that might have freed
+	 * space before we took the AGF buffer lock are now on disk, and the
+	 * volatile disk cache is flushed.
+	 */
+	xfs_log_force(mp, XFS_LOG_SYNC);
+
+	/*
+	 * Look up the longest btree in the AGF and start with it.
+	 */
+	error = xfs_alloc_lookup_le(cur, 0,
+				    XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+	if (error)
+		goto out_del_cursor;
+
+	/*
+	 * Loop until we are done with all extents that are large
+	 * enough to be worth discarding.
+	 */
+	while (i) {
+		xfs_agblock_t fbno;
+		xfs_extlen_t flen;
+
+		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
+		if (error)
+			goto out_del_cursor;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+		ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+
+		/*
+		 * Too small?  Give up.
+		 */
+		if (flen < minlen) {
+			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
+			goto out_del_cursor;
+		}
+
+		/*
+		 * If the extent is entirely outside of the range we are
+		 * supposed to discard skip it.  Do not bother to trim
+		 * down partially overlapping ranges for now.
+		 */
+		if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
+		    XFS_AGB_TO_FSB(mp, agno, fbno) > start + len) {
+			trace_xfs_discard_exclude(mp, agno, fbno, flen);
+			goto next_extent;
+		}
+
+		/*
+		 * If any blocks in the range are still busy, skip the
+		 * discard and try again the next time.
+		 */
+		if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
+			trace_xfs_discard_busy(mp, agno, fbno, flen);
+			goto next_extent;
+		}
+
+		trace_xfs_discard_extent(mp, agno, fbno, flen);
+		error = -blkdev_issue_discard(bdev,
+				XFS_AGB_TO_DADDR(mp, agno, fbno),
+				XFS_FSB_TO_BB(mp, flen),
+				GFP_NOFS, 0);
+		if (error)
+			goto out_del_cursor;
+
+next_extent:
+		error = xfs_btree_decrement(cur, 0, &i);
+		if (error)
+			goto out_del_cursor;
+	}
+
+out_del_cursor:
+	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_buf_relse(agbp);
+out_put_perag:
+	xfs_perag_put(pag);
+	return error;
+}
+
+int
+xfs_ioc_trim(
+	struct xfs_mount	*mp,
+	struct fstrim_range	*urange)
+{
+	struct request_queue	*q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+	unsigned int		granularity = q->limits.discard_granularity;
+	struct fstrim_range	range;
+	xfs_fsblock_t		start, len, minlen;
+	xfs_agnumber_t		start_agno, end_agno, agno;
+	int			error, last_error = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&range, urange, sizeof(range)))
+		return -XFS_ERROR(EFAULT);
+
+	/*
+	 * Truncating down the len isn't actually quite correct, but using
+	 * XFS_B_TO_FSB would mean we trivially get overflows for values
+	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
+	 * used by the fstrim application.  In the end it really doesn't
+	 * matter as trimming blocks is an advisory interface.
+	 */
+	start = XFS_B_TO_FSBT(mp, range.start);
+	len = XFS_B_TO_FSBT(mp, range.len);
+	minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+
+	start_agno = XFS_FSB_TO_AGNO(mp, start);
+	if (start_agno >= mp->m_sb.sb_agcount)
+		return -XFS_ERROR(EINVAL);
+
+	end_agno = XFS_FSB_TO_AGNO(mp, start + len);
+	if (end_agno >= mp->m_sb.sb_agcount)
+		end_agno = mp->m_sb.sb_agcount - 1;
+
+	for (agno = start_agno; agno <= end_agno; agno++) {
+		error = -xfs_trim_extents(mp, agno, start, len, minlen);
+		if (error)
+			last_error = error;
+	}
+
+	if (copy_to_user(urange, &range, sizeof(range)))
+		return -XFS_ERROR(EFAULT);
+	return last_error;
+}
Index: linux-2.6/fs/xfs/linux-2.6/xfs_discard.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/fs/xfs/linux-2.6/xfs_discard.h	2010-11-25 11:30:36.419276716 +0100
@@ -0,0 +1,8 @@
+#ifndef XFS_DISCARD_H
+#define XFS_DISCARD_H 1
+
+struct fstrim_range;
+
+extern int	xfs_ioc_trim(struct xfs_mount *, struct fstrim_range *);
+
+#endif /* XFS_DISCARD_H */
Index: linux-2.6/fs/xfs/linux-2.6/xfs_trace.h
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_trace.h	2010-11-25 11:30:29.461011526 +0100
+++ linux-2.6/fs/xfs/linux-2.6/xfs_trace.h	2010-11-25 11:30:36.427255554 +0100
@@ -1752,6 +1752,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_reco
 DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
 DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
 
+DECLARE_EVENT_CLASS(xfs_discard_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len),
+	TP_ARGS(mp, agno, agbno, len),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len)
+)
+
+#define DEFINE_DISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_discard_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len), \
+	TP_ARGS(mp, agno, agbno, len))
+DEFINE_DISCARD_EVENT(xfs_discard_extent);
+DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
+DEFINE_DISCARD_EVENT(xfs_discard_exclude);
+DEFINE_DISCARD_EVENT(xfs_discard_busy);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
Index: linux-2.6/fs/xfs/linux-2.6/xfs_ioctl.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_ioctl.c	2010-11-25 11:30:29.468004752 +0100
+++ linux-2.6/fs/xfs/linux-2.6/xfs_ioctl.c	2010-11-25 11:30:36.434255275 +0100
@@ -39,6 +39,7 @@
 #include "xfs_dfrag.h"
 #include "xfs_fsops.h"
 #include "xfs_vnodeops.h"
+#include "xfs_discard.h"
 #include "xfs_quota.h"
 #include "xfs_inode_item.h"
 #include "xfs_export.h"
@@ -1294,6 +1295,8 @@ xfs_file_ioctl(
 	trace_xfs_file_ioctl(ip);
 
 	switch (cmd) {
+	case FITRIM:
+		return xfs_ioc_trim(mp, arg);
 	case XFS_IOC_ALLOCSP:
 	case XFS_IOC_FREESP:
 	case XFS_IOC_RESVSP:

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2010-11-25 11:23 xfs: add FITRIM support Christoph Hellwig
@ 2010-12-22 21:41 ` Alex Elder
  2010-12-28 16:09   ` Christoph Hellwig
  2010-12-23  1:44 ` Dave Chinner
  1 sibling, 1 reply; 19+ messages in thread
From: Alex Elder @ 2010-12-22 21:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

(I actually wrote most of this last week and
finally decided it'd be better to send it than
to sit on it.)

On Thu, 2010-11-25 at 06:23 -0500, Christoph Hellwig wrote: 
> Allow manual discards from userspace using the FITRIM ioctl.  This is not
> intended to be run during normal workloads, as the freepsace btree walks
> can cause large performance degradation.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

I missed this when it first came through, sorry.

A few comments and questions, below.  Mostly
driven by my not knowing where to find a
reference on what (precisely) FITRIM is supposed
to do.

					-Alex

. . .

> Index: linux-2.6/fs/xfs/linux-2.6/xfs_discard.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6/fs/xfs/linux-2.6/xfs_discard.c	2010-11-25 12:14:43.270005863 +0100
> @@ -0,0 +1,187 @@

. . .

> +
> +STATIC int
> +xfs_trim_extents(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	xfs_fsblock_t		start,
> +	xfs_fsblock_t		len,
> +	xfs_fsblock_t		minlen)
> +{
> +	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
> +	struct xfs_btree_cur	*cur;
> +	struct xfs_buf		*agbp;
> +	struct xfs_perag	*pag;
> +	int			error;
> +	int			i;
> +
> +	pag = xfs_perag_get(mp, agno);
> +
> +	error = xfs_alloc_read_agf(mp, NULL, agno,
> +				   XFS_ALLOC_FLAG_TRYLOCK, &agbp);
> +	if (error || !agbp) {
> +		if (error == EAGAIN)
> +			error = 0;

EAGAIN is ignored because it's an advisory interface, right?
How hard are we expected to try?  What I really mean is,
is the benefit of FITRIM enough that we should try again
later when we can get a buffer or lock on it?

> +		goto out_put_perag;
> +	}
> +
> +	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
> +
> +	/*
> +	 * Force out the log.  This means any transactions that might have freed
> +	 * space before we took the AGF buffer lock are now on disk, and the
> +	 * volatile disk cache is flushed.
> +	 */
> +	xfs_log_force(mp, XFS_LOG_SYNC);
> +
> +	/*
> +	 * Look up the longest btree in the AGF and start with it.
> +	 */
> +	error = xfs_alloc_lookup_le(cur, 0,
> +				    XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
> +	if (error)
> +		goto out_del_cursor;
> +
> +	/*
> +	 * Loop until we are done with all extents that are large
> +	 * enough to be worth discarding.
> +	 */
> +	while (i) {
> +		xfs_agblock_t fbno;
> +		xfs_extlen_t flen;
> +
> +		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
> +		if (error)
> +			goto out_del_cursor;
> +		XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
> +		ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
> +
> +		/*
> +		 * Too small?  Give up.
> +		 */
> +		if (flen < minlen) {
> +			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
> +			goto out_del_cursor;
> +		}
> +
> +		/*
> +		 * If the extent is entirely outside of the range we are
> +		 * supposed to discard skip it.  Do not bother to trim
> +		 * down partially overlapping ranges for now.
> +		 */
> +		if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
> +		    XFS_AGB_TO_FSB(mp, agno, fbno) > start + len) {
> +			trace_xfs_discard_exclude(mp, agno, fbno, flen);
> +			goto next_extent;
> +		}
> +
> +		/*
> +		 * If any blocks in the range are still busy, skip the
> +		 * discard and try again the next time.
> +		 */
> +		if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
> +			trace_xfs_discard_busy(mp, agno, fbno, flen);
> +			goto next_extent;
> +		}
> +
> +		trace_xfs_discard_extent(mp, agno, fbno, flen);
> +		error = -blkdev_issue_discard(bdev,
> +				XFS_AGB_TO_DADDR(mp, agno, fbno),
> +				XFS_FSB_TO_BB(mp, flen),
> +				GFP_NOFS, 0);
> +		if (error)
> +			goto out_del_cursor;
> +
> +next_extent:
> +		error = xfs_btree_decrement(cur, 0, &i);
> +		if (error)
> +			goto out_del_cursor;
> +	}
> +
> +out_del_cursor:
> +	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
> +	xfs_buf_relse(agbp);
> +out_put_perag:
> +	xfs_perag_put(pag);
> +	return error;
> +}
> +
> +int
> +xfs_ioc_trim(
> +	struct xfs_mount	*mp,
> +	struct fstrim_range	*urange)

        struct fstrim_range __user *urange)

> +{
> +	struct request_queue	*q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
> +	unsigned int		granularity = q->limits.discard_granularity;
> +	struct fstrim_range	range;
> +	xfs_fsblock_t		start, len, minlen;
> +	xfs_agnumber_t		start_agno, end_agno, agno;
> +	int			error, last_error = 0;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -XFS_ERROR(EPERM);
> +	if (copy_from_user(&range, urange, sizeof(range)))
> +		return -XFS_ERROR(EFAULT);
> +
> +	/*
> +	 * Truncating down the len isn't actually quite correct, but using
> +	 * XFS_B_TO_FSB would mean we trivially get overflows for values
> +	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
> +	 * used by the fstrim application.  In the end it really doesn't
> +	 * matter as trimming blocks is an advisory interface.

I don't know where (or if) FITRIM is precisely documented.
But I question whether truncating down the start offset is
the correct thing to do.  If the starting byte offset given
were not block-aligned, it seems like you should not assume
that the caller wanted the bytes below unmapped.  (This is
a broader question, not related directly to your change.)

Similarly, on the length it is probably best to truncate
it, because it avoids any bytes beyond the specified range
getting unmapped.  (I.e., in my mind what you did is the
right way to do it.)  But these interpretations are
dependent on the specific interpretation of FITRIM...

> +	 */
> +	start = XFS_B_TO_FSBT(mp, range.start);
> +	len = XFS_B_TO_FSBT(mp, range.len);
> +	minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
> +
> +	start_agno = XFS_FSB_TO_AGNO(mp, start);
> +	if (start_agno >= mp->m_sb.sb_agcount)
> +		return -XFS_ERROR(EINVAL);
> +
> +	end_agno = XFS_FSB_TO_AGNO(mp, start + len);
> +	if (end_agno >= mp->m_sb.sb_agcount)
> +		end_agno = mp->m_sb.sb_agcount - 1;
> +
> +	for (agno = start_agno; agno <= end_agno; agno++) {
> +		error = -xfs_trim_extents(mp, agno, start, len, minlen);
> +		if (error)
> +			last_error = error;
> +	}
> +

You don't update range anywhere, so the copyout below
is not really doing anything useful.  However I think
it should stay, and the number of bytes actually
trimmed should be updated and returned to the user.
That seems to be what ext4 does (the only reference
I found at the moment for what FITRIM is supposed
to return).

> +	if (copy_to_user(urange, &range, sizeof(range)))
> +		return -XFS_ERROR(EFAULT);
> +	return last_error;
> +}

. . . 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2010-11-25 11:23 xfs: add FITRIM support Christoph Hellwig
  2010-12-22 21:41 ` Alex Elder
@ 2010-12-23  1:44 ` Dave Chinner
  2010-12-30 11:41   ` Christoph Hellwig
  1 sibling, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2010-12-23  1:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Thu, Nov 25, 2010 at 06:23:04AM -0500, Christoph Hellwig wrote:
> Allow manual discards from userspace using the FITRIM ioctl.  This is not
> intended to be run during normal workloads, as the freepsace btree walks
> can cause large performance degradation.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
.....

> +STATIC int
> +xfs_trim_extents(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	xfs_fsblock_t		start,
> +	xfs_fsblock_t		len,
> +	xfs_fsblock_t		minlen)
> +{
> +	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
> +	struct xfs_btree_cur	*cur;
> +	struct xfs_buf		*agbp;
> +	struct xfs_perag	*pag;
> +	int			error;
> +	int			i;
> +
> +	pag = xfs_perag_get(mp, agno);
> +
> +	error = xfs_alloc_read_agf(mp, NULL, agno,
> +				   XFS_ALLOC_FLAG_TRYLOCK, &agbp);
> +	if (error || !agbp) {
> +		if (error == EAGAIN)
> +			error = 0;
> +		goto out_put_perag;
> +	}
> +
> +	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
> +
> +	/*
> +	 * Force out the log.  This means any transactions that might have freed
> +	 * space before we took the AGF buffer lock are now on disk, and the
> +	 * volatile disk cache is flushed.
> +	 */
> +	xfs_log_force(mp, XFS_LOG_SYNC);
> +
> +	/*
> +	 * Look up the longest btree in the AGF and start with it.
> +	 */
> +	error = xfs_alloc_lookup_le(cur, 0,
> +				    XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
> +	if (error)
> +		goto out_del_cursor;
> +
> +	/*
> +	 * Loop until we are done with all extents that are large
> +	 * enough to be worth discarding.
> +	 */
> +	while (i) {
> +		xfs_agblock_t fbno;
> +		xfs_extlen_t flen;
> +
> +		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
> +		if (error)
> +			goto out_del_cursor;
> +		XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
> +		ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
> +
> +		/*
> +		 * Too small?  Give up.
> +		 */
> +		if (flen < minlen) {
> +			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
> +			goto out_del_cursor;
> +		}
> +
> +		/*
> +		 * If the extent is entirely outside of the range we are
> +		 * supposed to discard skip it.  Do not bother to trim
> +		 * down partially overlapping ranges for now.
> +		 */
> +		if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
> +		    XFS_AGB_TO_FSB(mp, agno, fbno) > start + len) {
> +			trace_xfs_discard_exclude(mp, agno, fbno, flen);
> +			goto next_extent;
> +		}

Hmmmm - if we are given a range to trim, wouldn't we do better to
walk the by-bno btree instead?  i.e, we have two different cases
here - trim an entire AG, and trim part of an AG given by {start, end}. 

We only need these range checks on the AGs that are only partially
trimmed, and it would seem more efficient to me to walk the by-bno
tree for those rather than walk the by-size tree trying to find
range matches.

> +
> +		/*
> +		 * If any blocks in the range are still busy, skip the
> +		 * discard and try again the next time.
> +		 */
> +		if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
> +			trace_xfs_discard_busy(mp, agno, fbno, flen);
> +			goto next_extent;
> +		}
> +
> +		trace_xfs_discard_extent(mp, agno, fbno, flen);
> +		error = -blkdev_issue_discard(bdev,
> +				XFS_AGB_TO_DADDR(mp, agno, fbno),
> +				XFS_FSB_TO_BB(mp, flen),
> +				GFP_NOFS, 0);
> +		if (error)
> +			goto out_del_cursor;
> +
> +next_extent:
> +		error = xfs_btree_decrement(cur, 0, &i);
> +		if (error)
> +			goto out_del_cursor;
> +	}

Hmmm - so we hold the agf locked for the entire trim? That's a bit
ugly. Given this is best effort, we could avoid this by changing it
to something like:

	longest = 0;
	do {
		lock agf
		force log
		if (!longest)
			longest = agf->longest
		init cursor
		do {
			xfs_alloc_lookup_le(longest)
			alloc_get_rec(&fbno, &flen)
			check flen
			busy search
			discard
			decrement cursor
		} while (flen == longest)
		destroy cursor
		unlock agf
		longest = flen;
	} while(1)

This way we walk the tree in a manner that does not hold the agf for
extended periods of time, but still catches all the extents of the
same size and, when idle, will catch all the extents longer than the
given length to discard. If the filesystem is busy, it will have
much less impact on it due to the much short AGF hold times.

And perhaps the inner loop could simply be terminated on the number
of discards issued or a timer to keep the number of log forces down
to a sane number (e.g. one every 50ms).

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2010-12-22 21:41 ` Alex Elder
@ 2010-12-28 16:09   ` Christoph Hellwig
  2011-01-03 10:49     ` Lukas Czerner
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2010-12-28 16:09 UTC (permalink / raw)
  To: Alex Elder; +Cc: lczerner, xfs

On Wed, Dec 22, 2010 at 03:41:13PM -0600, Alex Elder wrote:
> > +	error = xfs_alloc_read_agf(mp, NULL, agno,
> > +				   XFS_ALLOC_FLAG_TRYLOCK, &agbp);
> > +	if (error || !agbp) {
> > +		if (error == EAGAIN)
> > +			error = 0;
> 
> EAGAIN is ignored because it's an advisory interface, right?
> How hard are we expected to try?  What I really mean is,
> is the benefit of FITRIM enough that we should try again
> later when we can get a buffer or lock on it?

That was the idea when I wrote this code.  But back then we called it
regularly from a kernel thread.  For FITRIM it makes more sense to just
remove the trylock.

> I don't know where (or if) FITRIM is precisely documented.
> But I question whether truncating down the start offset is
> the correct thing to do.  If the starting byte offset given
> were not block-aligned, it seems like you should not assume
> that the caller wanted the bytes below unmapped.  (This is
> a broader question, not related directly to your change.)
> 
> Similarly, on the length it is probably best to truncate
> it, because it avoids any bytes beyond the specified range
> getting unmapped.  (I.e., in my mind what you did is the
> right way to do it.)  But these interpretations are
> dependent on the specific interpretation of FITRIM...

Good question.  Adding Lukas to the Cc.  I tried to talk him into
writing a manpage to document the interface better, but that's only
been a few days before the holidays.  This is something we should
documented.  I don't quite understand the need for the range interface
anyway.

> You don't update range anywhere, so the copyout below
> is not really doing anything useful.  However I think
> it should stay, and the number of bytes actually
> trimmed should be updated and returned to the user.
> That seems to be what ext4 does (the only reference
> I found at the moment for what FITRIM is supposed
> to return).

Yes, I guess I should update the range.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2010-12-23  1:44 ` Dave Chinner
@ 2010-12-30 11:41   ` Christoph Hellwig
  2011-01-03 10:57     ` Lukas Czerner
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2010-12-30 11:41 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Lukas Czerner, xfs

On Thu, Dec 23, 2010 at 12:44:09PM +1100, Dave Chinner wrote:
> Hmmmm - if we are given a range to trim, wouldn't we do better to
> walk the by-bno btree instead?  i.e, we have two different cases
> here - trim an entire AG, and trim part of an AG given by {start, end}. 
> 
> We only need these range checks on the AGs that are only partially
> trimmed, and it would seem more efficient to me to walk the by-bno
> tree for those rather than walk the by-size tree trying to find
> range matches.

It might be, but I'm not sure it's really worth the complexity.  I can't
really find any good use case for a partially trim anyway.

Ccing Lukas to figure out what his intent with this was.

> Hmmm - so we hold the agf locked for the entire trim? That's a bit
> ugly. Given this is best effort, we could avoid this by changing it
> to something like:
> 
> 	longest = 0;
> 	do {
> 		lock agf
> 		force log
> 		if (!longest)
> 			longest = agf->longest
> 		init cursor
> 		do {
> 			xfs_alloc_lookup_le(longest)
> 			alloc_get_rec(&fbno, &flen)
> 			check flen
> 			busy search
> 			discard
> 			decrement cursor
> 		} while (flen == longest)
> 		destroy cursor
> 		unlock agf
> 		longest = flen;
> 	} while(1)

This doesn't seem overly efficient.  Unless we have lots of extents
with same size we keep having to allocate new cursors all the time.

I'm not too worried about busy systems - FITRIM is explicitly called and
we should expect admins not to call it during the most busy time of the
day.  And even in it's current form it's already much better than
wiper.sh in that respect.

I think adding a periodical break using a modified scheme is fine, but
I'd really like to get the code out into some more testers hands for
now.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2010-12-28 16:09   ` Christoph Hellwig
@ 2011-01-03 10:49     ` Lukas Czerner
  0 siblings, 0 replies; 19+ messages in thread
From: Lukas Czerner @ 2011-01-03 10:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: lczerner, xfs, Alex Elder

On Tue, 28 Dec 2010, Christoph Hellwig wrote:

> On Wed, Dec 22, 2010 at 03:41:13PM -0600, Alex Elder wrote:
> > > +	error = xfs_alloc_read_agf(mp, NULL, agno,
> > > +				   XFS_ALLOC_FLAG_TRYLOCK, &agbp);
> > > +	if (error || !agbp) {
> > > +		if (error == EAGAIN)
> > > +			error = 0;
> > 
> > EAGAIN is ignored because it's an advisory interface, right?
> > How hard are we expected to try?  What I really mean is,
> > is the benefit of FITRIM enough that we should try again
> > later when we can get a buffer or lock on it?
> 
> That was the idea when I wrote this code.  But back then we called it
> regularly from a kernel thread.  For FITRIM it makes more sense to just
> remove the trylock.
> 
> > I don't know where (or if) FITRIM is precisely documented.
> > But I question whether truncating down the start offset is
> > the correct thing to do.  If the starting byte offset given
> > were not block-aligned, it seems like you should not assume
> > that the caller wanted the bytes below unmapped.  (This is
> > a broader question, not related directly to your change.)
> > 
> > Similarly, on the length it is probably best to truncate
> > it, because it avoids any bytes beyond the specified range
> > getting unmapped.  (I.e., in my mind what you did is the
> > right way to do it.)  But these interpretations are
> > dependent on the specific interpretation of FITRIM...
> 
> Good question.  Adding Lukas to the Cc.  I tried to talk him into
> writing a manpage to document the interface better, but that's only
> been a few days before the holidays.  This is something we should
> documented.  I don't quite understand the need for the range interface
> anyway.

First of all, sorry for not having proper documentation just yet, I'll
try to work something out.

Regarding truncation of starting offset and length (also minlen) the
proper way is to truncate everything down to align with block size. For
example this is the way I am doing it in ext4:

	start = range->start >> sb->s_blocksize_bits;

It is not really a big deal to trim something that was not originally
intended to, not mentioning that there probably was not any intention at
all when it is not aligned to block size. We just trim slightly more, or
slightly less and it does not affect filesystem nor user of the
filesystem, since it trims just not used space.

But what we want to do (and what I missed in ext4) is to align start+len
not just len alone, because we might miss some blocks, when the FITRIM is
invoked in sequential manner. Then, truncating start down and truncation
start+len down is the right thing to do.

Regarding the need to have range interface I had two reasons to do this
as it is, but only one is really worth it. Since we want to run FITRIM
from the userspace on the background, we want to disturb other IO as
little as possible and whole filesystem trim can take minutes on some
devices (not talking about LUNs which is even more painful). So you'll
probably agree that we do not want to have possibly minute long stalls
when doing FITRIM.

But it is optional, so if you have fast device with small, not very
fragmented filesystem you can end up doing FITRIM on the whole filesystem
at once and it will be the right thing to do. Also, some might want to have
nice-n-shiny progress bars:).

Thanks!
-Lukas

> 
> > You don't update range anywhere, so the copyout below
> > is not really doing anything useful.  However I think
> > it should stay, and the number of bytes actually
> > trimmed should be updated and returned to the user.
> > That seems to be what ext4 does (the only reference
> > I found at the moment for what FITRIM is supposed
> > to return).
> 
> Yes, I guess I should update the range.
> 
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2010-12-30 11:41   ` Christoph Hellwig
@ 2011-01-03 10:57     ` Lukas Czerner
  2011-01-03 23:25       ` Dave Chinner
  0 siblings, 1 reply; 19+ messages in thread
From: Lukas Czerner @ 2011-01-03 10:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Lukas Czerner, xfs

On Thu, 30 Dec 2010, Christoph Hellwig wrote:

> On Thu, Dec 23, 2010 at 12:44:09PM +1100, Dave Chinner wrote:
> > Hmmmm - if we are given a range to trim, wouldn't we do better to
> > walk the by-bno btree instead?  i.e, we have two different cases
> > here - trim an entire AG, and trim part of an AG given by {start, end}. 
> > 
> > We only need these range checks on the AGs that are only partially
> > trimmed, and it would seem more efficient to me to walk the by-bno
> > tree for those rather than walk the by-size tree trying to find
> > range matches.
> 
> It might be, but I'm not sure it's really worth the complexity.  I can't
> really find any good use case for a partially trim anyway.
> 
> Ccing Lukas to figure out what his intent with this was.

Hi, I assume that you're talking about situation, when you call FITRIM
with start and len not covering the whole filesystem possibly resulting
in trimming just a part of the AG ? In this case I just copy my answer
from previous mail...

I had two reasons to do this as it is, but only one is really worth it.
Since we want to run FITRIM from the userspace on the background, we want
to disturb other IO as little as possible and whole filesystem trim can
take minutes on some devices (not talking about LUNs which is even more
painful). So you'll probably agree that we do not want to have possibly
minute long stalls when doing FITRIM. And presumably we do not want the
users to care about the size of AG, nor the blocksize (preferably).

But it is optional, so if you have fast device with small, not very
fragmented filesystem you can end up doing FITRIM on the whole filesystem
at once and it will be the right thing to do. Also, some might want to
have nice-n-shiny progress bars:).

Thanks!
-Lukas

> 
> > Hmmm - so we hold the agf locked for the entire trim? That's a bit
> > ugly. Given this is best effort, we could avoid this by changing it
> > to something like:
> > 
> > 	longest = 0;
> > 	do {
> > 		lock agf
> > 		force log
> > 		if (!longest)
> > 			longest = agf->longest
> > 		init cursor
> > 		do {
> > 			xfs_alloc_lookup_le(longest)
> > 			alloc_get_rec(&fbno, &flen)
> > 			check flen
> > 			busy search
> > 			discard
> > 			decrement cursor
> > 		} while (flen == longest)
> > 		destroy cursor
> > 		unlock agf
> > 		longest = flen;
> > 	} while(1)
> 
> This doesn't seem overly efficient.  Unless we have lots of extents
> with same size we keep having to allocate new cursors all the time.
> 
> I'm not too worried about busy systems - FITRIM is explicitly called and
> we should expect admins not to call it during the most busy time of the
> day.  And even in it's current form it's already much better than
> wiper.sh in that respect.
> 
> I think adding a periodical break using a modified scheme is fine, but
> I'd really like to get the code out into some more testers hands for
> now.
> 
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-03 10:57     ` Lukas Czerner
@ 2011-01-03 23:25       ` Dave Chinner
  2011-01-05 10:21         ` Lukas Czerner
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2011-01-03 23:25 UTC (permalink / raw)
  To: Lukas Czerner; +Cc: Christoph Hellwig, xfs

On Mon, Jan 03, 2011 at 11:57:23AM +0100, Lukas Czerner wrote:
> On Thu, 30 Dec 2010, Christoph Hellwig wrote:
> 
> > On Thu, Dec 23, 2010 at 12:44:09PM +1100, Dave Chinner wrote:
> > > Hmmmm - if we are given a range to trim, wouldn't we do better to
> > > walk the by-bno btree instead?  i.e, we have two different cases
> > > here - trim an entire AG, and trim part of an AG given by {start, end}. 
> > > 
> > > We only need these range checks on the AGs that are only partially
> > > trimmed, and it would seem more efficient to me to walk the by-bno
> > > tree for those rather than walk the by-size tree trying to find
> > > range matches.
> > 
> > It might be, but I'm not sure it's really worth the complexity.  I can't
> > really find any good use case for a partially trim anyway.
> > 
> > Ccing Lukas to figure out what his intent with this was.
> 
> Hi, I assume that you're talking about situation, when you call FITRIM
> with start and len not covering the whole filesystem possibly resulting
> in trimming just a part of the AG ? In this case I just copy my answer
> from previous mail...

Yes.

> I had two reasons to do this as it is, but only one is really worth it.
> Since we want to run FITRIM from the userspace on the background, we want
> to disturb other IO as little as possible and whole filesystem trim can
> take minutes on some devices (not talking about LUNs which is even more
> painful).

Right - it's the high end we have to worry about for XFS: how long do you
expect a 100TB filesystem to take to TRIM? ;)

>
> So you'll probably agree that we do not want to have possibly
> minute long stalls when doing FITRIM. And presumably we do not want the
> users to care about the size of AG, nor the blocksize (preferably).

The issue is that an AG can cover 1TB of disk space, and locking it
for the entire time it takes to trim the free space will cause
IO disturbances. Even holding the AGF locked for a few seconds
can cause problems.

So I guess the question is what sort of ranged woul dwe be expecting
to see a userspace background trim daemon be using?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-03 23:25       ` Dave Chinner
@ 2011-01-05 10:21         ` Lukas Czerner
  2011-01-05 22:07           ` Michael Monnerie
  0 siblings, 1 reply; 19+ messages in thread
From: Lukas Czerner @ 2011-01-05 10:21 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, Lukas Czerner, xfs

On Tue, 4 Jan 2011, Dave Chinner wrote:

> On Mon, Jan 03, 2011 at 11:57:23AM +0100, Lukas Czerner wrote:
> > On Thu, 30 Dec 2010, Christoph Hellwig wrote:
> > 
> > > On Thu, Dec 23, 2010 at 12:44:09PM +1100, Dave Chinner wrote:
> > > > Hmmmm - if we are given a range to trim, wouldn't we do better to
> > > > walk the by-bno btree instead?  i.e, we have two different cases
> > > > here - trim an entire AG, and trim part of an AG given by {start, end}. 
> > > > 
> > > > We only need these range checks on the AGs that are only partially
> > > > trimmed, and it would seem more efficient to me to walk the by-bno
> > > > tree for those rather than walk the by-size tree trying to find
> > > > range matches.
> > > 
> > > It might be, but I'm not sure it's really worth the complexity.  I can't
> > > really find any good use case for a partially trim anyway.
> > > 
> > > Ccing Lukas to figure out what his intent with this was.
> > 
> > Hi, I assume that you're talking about situation, when you call FITRIM
> > with start and len not covering the whole filesystem possibly resulting
> > in trimming just a part of the AG ? In this case I just copy my answer
> > from previous mail...
> 
> Yes.
> 
> > I had two reasons to do this as it is, but only one is really worth it.
> > Since we want to run FITRIM from the userspace on the background, we want
> > to disturb other IO as little as possible and whole filesystem trim can
> > take minutes on some devices (not talking about LUNs which is even more
> > painful).
> 
> Right - it's the high end we have to worry about for XFS: how long do you
> expect a 100TB filesystem to take to TRIM? ;)

Presumably a really long time, but it really differs from device to
device.

> 
> >
> > So you'll probably agree that we do not want to have possibly
> > minute long stalls when doing FITRIM. And presumably we do not want the
> > users to care about the size of AG, nor the blocksize (preferably).
> 
> The issue is that an AG can cover 1TB of disk space, and locking it
> for the entire time it takes to trim the free space will cause
> IO disturbances. Even holding the AGF locked for a few seconds
> can cause problems.
> 
> So I guess the question is what sort of ranged woul dwe be expecting
> to see a userspace background trim daemon be using?

Well, I think that doing 1TB trim is not very good idea even if AG is
not 1TB big. So doing smaller chunks is probably what userspace daemon
need to do.

Also note that we do not exactly need to do trim all the time. If we
notice that we are running out of space in advance (how much in advance?),
we can start trimming smaller chunks, until we reach reasonable a
reasonable pool of reclaimed space, or until we trim the whole device.

OR, the daemon can watch IO load and when it is low (presumably
at night) it can trim the device (possibly with very small cadence) as
some kind of precaution measure.

The fact is, I am not very familiar with various server IO loads and
typical usage of huge storages, so someone who is, can help us to create
heuristic for trim daemon.

Also I think it is a good idea to something like:

	if (need_resched()) {
		unlock()
		cond_resched();
		lock()
	}

while trimming free chunks in the AG.

-Lukas


> 
> Cheers,
> 
> Dave.
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-05 10:21         ` Lukas Czerner
@ 2011-01-05 22:07           ` Michael Monnerie
  2011-01-05 22:50             ` Dave Chinner
  2011-01-06 18:06             ` Christoph Hellwig
  0 siblings, 2 replies; 19+ messages in thread
From: Michael Monnerie @ 2011-01-05 22:07 UTC (permalink / raw)
  To: xfs; +Cc: Christoph Hellwig, Lukas Czerner


[-- Attachment #1.1: Type: Text/Plain, Size: 1766 bytes --]

On Mittwoch, 5. Januar 2011 Lukas Czerner wrote:
> If we
> notice that we are running out of space in advance (how much in
> advance?), we can start trimming smaller chunks, until we reach
> reasonable a reasonable pool of reclaimed space, or until we trim
> the whole device.

Would it be possible that all blocks that have been in use since the 
last FITRIM run can be logged? Like this, we would only need to clean 
those. If you have a 2TB volume, probably only 25% of it have been 
rewritten (=500GB) since the last run, and of that maybe 80% are still 
in use at the time we run FITRIM, so only 100GB would need the cleanup.
Maybe each AG could store a bitmap of written blocks, that are reset by 
a FITRIM run. That could be an asynchronous written bitmap and shouldn't 
disturb performance too much. Maybe it's even only needed to store a bit 
per sunit*swidth blocks, to keep that table small. A mount option could 
be used to enable that feature, so only those which use thin 
provisioning or SSDs or similar devices enable it at wish.

Especially for 100TB size devices that seems like something that should 
be thought of, as maybe if you run FITRIM once a week there, only <10TB 
have been rewritten, if at all, and such a table would boost a FITRIM 
run a lot.

But maybe this is just bullshit of my tired brain, and I'm not a dev so 
I have no idea how hard it would be to implement that.

-- 
mit freundlichen Grüssen,
Michael Monnerie, Ing. BSc

it-management Internet Services: Protéger
http://proteger.at [gesprochen: Prot-e-schee]
Tel: +43 660 / 415 6531

// ****** Radiointerview zum Thema Spam ******
// http://www.it-podcast.at/archiv.html#podcast-100716
// 
// Haus zu verkaufen: http://zmi.at/langegg/

[-- Attachment #1.2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 121 bytes --]

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-05 22:07           ` Michael Monnerie
@ 2011-01-05 22:50             ` Dave Chinner
  2011-01-06  8:10               ` Michael Monnerie
  2011-01-06 18:08               ` Christoph Hellwig
  2011-01-06 18:06             ` Christoph Hellwig
  1 sibling, 2 replies; 19+ messages in thread
From: Dave Chinner @ 2011-01-05 22:50 UTC (permalink / raw)
  To: Michael Monnerie; +Cc: Christoph Hellwig, Lukas Czerner, xfs

On Wed, Jan 05, 2011 at 11:07:35PM +0100, Michael Monnerie wrote:
> On Mittwoch, 5. Januar 2011 Lukas Czerner wrote:
> > If we
> > notice that we are running out of space in advance (how much in
> > advance?), we can start trimming smaller chunks, until we reach
> > reasonable a reasonable pool of reclaimed space, or until we trim
> > the whole device.
> 
> Would it be possible that all blocks that have been in use since the 
> last FITRIM run can be logged? Like this, we would only need to clean 
> those. If you have a 2TB volume, probably only 25% of it have been 
> rewritten (=500GB) since the last run, and of that maybe 80% are still 
> in use at the time we run FITRIM, so only 100GB would need the cleanup.
> Maybe each AG could store a bitmap of written blocks, that are reset by 
> a FITRIM run. That could be an asynchronous written bitmap and shouldn't 
> disturb performance too much. Maybe it's even only needed to store a bit 
> per sunit*swidth blocks, to keep that table small. A mount option could 
> be used to enable that feature, so only those which use thin 
> provisioning or SSDs or similar devices enable it at wish.

Not easily. It would need a second set of free space btrees for
tracking freed but untrimmed extents. The idea of the background
trim is that it doesn't need all that complexity because all the
status information on where the trim process is up to can be kept
in userspace.

This is basically the same mode of functioning as the period
background xfs_fsr defragmentation mode - run it for an hour every
couple of nights,and it will slowly work it way through the entire
filesystem over a period of weeks. No state or additional on-disk
structures are needed for xfs_fsr to do it's work....

The background trim is intended to enable even the slowest of
devices to be trimmed over time, while introducing as little runtime
overhead and complexity as possible. Hence adding complexity and
runtime overhead to optimise background trimming tends to defeat the
primary design goal....

> Especially for 100TB size devices that seems like something that should 
> be thought of, as maybe if you run FITRIM once a week there, only <10TB 
> have been rewritten, if at all, and such a table would boost a FITRIM 
> run a lot.

If we want optimised, only-trim-what-we-free behaviour, we need to
hook into the transaction subsystem and issue TRIM commands at the
time extents are actually freed. That is much more complex to
implement but much easier to optimise because it doesn't require
persistent state on disk. However, most devices are simply not ready
to handle the flood of TRIM commands this generates, with
performance degrading by ~10-20% for the best of devices and
_10-100x_ for the worst...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-05 22:50             ` Dave Chinner
@ 2011-01-06  8:10               ` Michael Monnerie
  2011-01-06  8:33                 ` Lukas Czerner
  2011-01-06  9:17                 ` Dave Chinner
  2011-01-06 18:08               ` Christoph Hellwig
  1 sibling, 2 replies; 19+ messages in thread
From: Michael Monnerie @ 2011-01-06  8:10 UTC (permalink / raw)
  To: xfs; +Cc: Christoph Hellwig, Lukas Czerner


[-- Attachment #1.1: Type: Text/Plain, Size: 1762 bytes --]

On Mittwoch, 5. Januar 2011 Dave Chinner wrote:
> No state or additional on-disk
> structures are needed for xfs_fsr to do it's work....

That's not exactly the same - once you defraged a file, you know it's 
done, and can skip it next time. But you dont know if the (free) space 
between block 0 and 20 on disk has been rewritten since the last trim 
run or not used at all, so you'd have to do it all again.
 
> The background trim is intended to enable even the slowest of
> devices to be trimmed over time, while introducing as little runtime
> overhead and complexity as possible. Hence adding complexity and
> runtime overhead to optimise background trimming tends to defeat the
> primary design goal....

It would be interesting to have real world numbers to see what's "best". 
I'd imagine a normal file or web server to store tons of files that are 
mostly read-only, while 5% of it a used a lot, as well as lots of temp 
files. For this, knowing what's been used would be great.

Also, I'm thinking of a NetApp storage, that has been setup to run 
deduplication on Sunday. It's best to run trim on Saturday and it should 
be finished before Sunday. For big storages that might be not easy to 
finish, if all disk space has to be freed explicitly.

And wouldn't it still be cheaper to keep a "written bmap" than to run 
over the full space of a (big) disk? I'd say depends on the workload.

-- 
mit freundlichen Grüssen,
Michael Monnerie, Ing. BSc

it-management Internet Services: Protéger
http://proteger.at [gesprochen: Prot-e-schee]
Tel: +43 660 / 415 6531

// ****** Radiointerview zum Thema Spam ******
// http://www.it-podcast.at/archiv.html#podcast-100716
// 
// Haus zu verkaufen: http://zmi.at/langegg/

[-- Attachment #1.2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 121 bytes --]

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-06  8:10               ` Michael Monnerie
@ 2011-01-06  8:33                 ` Lukas Czerner
  2011-01-06  8:40                   ` Lukas Czerner
  2011-01-06  9:17                 ` Dave Chinner
  1 sibling, 1 reply; 19+ messages in thread
From: Lukas Czerner @ 2011-01-06  8:33 UTC (permalink / raw)
  To: Michael Monnerie; +Cc: Christoph Hellwig, Lukas Czerner, xfs

On Thu, 6 Jan 2011, Michael Monnerie wrote:

> On Mittwoch, 5. Januar 2011 Dave Chinner wrote:
> > No state or additional on-disk
> > structures are needed for xfs_fsr to do it's work....
> 
> That's not exactly the same - once you defraged a file, you know it's 
> done, and can skip it next time. But you dont know if the (free) space 
> between block 0 and 20 on disk has been rewritten since the last trim 
> run or not used at all, so you'd have to do it all again.
>  
> > The background trim is intended to enable even the slowest of
> > devices to be trimmed over time, while introducing as little runtime
> > overhead and complexity as possible. Hence adding complexity and
> > runtime overhead to optimise background trimming tends to defeat the
> > primary design goal....
> 
> It would be interesting to have real world numbers to see what's "best". 
> I'd imagine a normal file or web server to store tons of files that are 
> mostly read-only, while 5% of it a used a lot, as well as lots of temp 
> files. For this, knowing what's been used would be great.
> 
> Also, I'm thinking of a NetApp storage, that has been setup to run 
> deduplication on Sunday. It's best to run trim on Saturday and it should 
> be finished before Sunday. For big storages that might be not easy to 
> finish, if all disk space has to be freed explicitly.
> 
> And wouldn't it still be cheaper to keep a "written bmap" than to run 
> over the full space of a (big) disk? I'd say depends on the workload.
> 

I have already investigated approach with storing the information about
blocks freed since last trim. However I found it not that useful for
several reasons.

1. Bitmaps are big, especially on huge filesystems you are talking about
it will significantly increase the memory utilization.

2. Rbtree might be better, however there is some threshold we need to
watch, because when it gets really fragmented it can be bigger than
bitmap. Moreover it adds significant complexity and of course CPU
utilization.

3. As I said several times, we do not need to trim when there was not
enough writes from the last trim, because when we have enough space for
example for wear leveling in SSD, we do not need to reclaim more, OR we
can do is really slowly as a precaution measure.

All that said, we have much more flexibility in user space and we can
think of a lots of different heuristic to determine whether or not to do
the trim and how.

Thanks!
-Lukas

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-06  8:33                 ` Lukas Czerner
@ 2011-01-06  8:40                   ` Lukas Czerner
  0 siblings, 0 replies; 19+ messages in thread
From: Lukas Czerner @ 2011-01-06  8:40 UTC (permalink / raw)
  To: Lukas Czerner; +Cc: Michael Monnerie, Christoph Hellwig, xfs

On Thu, 6 Jan 2011, Lukas Czerner wrote:

> On Thu, 6 Jan 2011, Michael Monnerie wrote:
> 
> > On Mittwoch, 5. Januar 2011 Dave Chinner wrote:
> > > No state or additional on-disk
> > > structures are needed for xfs_fsr to do it's work....
> > 
> > That's not exactly the same - once you defraged a file, you know it's 
> > done, and can skip it next time. But you dont know if the (free) space 
> > between block 0 and 20 on disk has been rewritten since the last trim 
> > run or not used at all, so you'd have to do it all again.
> >  
> > > The background trim is intended to enable even the slowest of
> > > devices to be trimmed over time, while introducing as little runtime
> > > overhead and complexity as possible. Hence adding complexity and
> > > runtime overhead to optimise background trimming tends to defeat the
> > > primary design goal....
> > 
> > It would be interesting to have real world numbers to see what's "best". 
> > I'd imagine a normal file or web server to store tons of files that are 
> > mostly read-only, while 5% of it a used a lot, as well as lots of temp 
> > files. For this, knowing what's been used would be great.
> > 
> > Also, I'm thinking of a NetApp storage, that has been setup to run 
> > deduplication on Sunday. It's best to run trim on Saturday and it should 
> > be finished before Sunday. For big storages that might be not easy to 
> > finish, if all disk space has to be freed explicitly.
> > 
> > And wouldn't it still be cheaper to keep a "written bmap" than to run 
> > over the full space of a (big) disk? I'd say depends on the workload.
> > 
> 
> I have already investigated approach with storing the information about
> blocks freed since last trim. However I found it not that useful for
> several reasons.
> 
> 1. Bitmaps are big, especially on huge filesystems you are talking about
> it will significantly increase the memory utilization.
> 
> 2. Rbtree might be better, however there is some threshold we need to
> watch, because when it gets really fragmented it can be bigger than
> bitmap. Moreover it adds significant complexity and of course CPU
> utilization.

Not talking about the fact that neither bitmaps not rbtrees can survive
umount.

> 
> 3. As I said several times, we do not need to trim when there was not
> enough writes from the last trim, because when we have enough space for
> example for wear leveling in SSD, we do not need to reclaim more, OR we
> can do is really slowly as a precaution measure.
> 
> All that said, we have much more flexibility in user space and we can
> think of a lots of different heuristic to determine whether or not to do
> the trim and how.
> 
> Thanks!
> -Lukas
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-06  8:10               ` Michael Monnerie
  2011-01-06  8:33                 ` Lukas Czerner
@ 2011-01-06  9:17                 ` Dave Chinner
  2011-01-06 16:50                   ` Michael Monnerie
  1 sibling, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2011-01-06  9:17 UTC (permalink / raw)
  To: Michael Monnerie; +Cc: Christoph Hellwig, Lukas Czerner, xfs

On Thu, Jan 06, 2011 at 09:10:29AM +0100, Michael Monnerie wrote:
> On Mittwoch, 5. Januar 2011 Dave Chinner wrote:
> > No state or additional on-disk
> > structures are needed for xfs_fsr to do it's work....
> 
> That's not exactly the same - once you defraged a file, you know it's 
> done, and can skip it next time.

Sure, but the way xfs_fsr skips it is by physically checking the
inode on the next filesystem pass. It does that efficiently because
the necessary information is cheap to read (via bulkstat), not
because we track what needs defrag in the filesystem on every
operation.

> But you dont know if the (free) space 
> between block 0 and 20 on disk has been rewritten since the last trim 
> run or not used at all, so you'd have to do it all again.

Sure, but the block device should, and therefore a TRIM to an area
with nothing to trim should be fast. The current generation drives
still have problems with this, but once device implementations are
better optimised there should be little penalty for trying to trim a
region that currently holds no data on the device.

basically we need to design for the future, not for the limitations
the current generation of devices have....

> > The background trim is intended to enable even the slowest of
> > devices to be trimmed over time, while introducing as little runtime
> > overhead and complexity as possible. Hence adding complexity and
> > runtime overhead to optimise background trimming tends to defeat the
> > primary design goal....
> 
> It would be interesting to have real world numbers to see what's "best". 
> I'd imagine a normal file or web server to store tons of files that are 
> mostly read-only, while 5% of it a used a lot, as well as lots of temp 
> files. For this, knowing what's been used would be great.

A filesystem does not necessarily reuse the same blocks for
temporary data. That "5%" of data that is written and erase all the
time could end up spanning 50% of the filesystem free space over the
period of a week....

> Also, I'm thinking of a NetApp storage, that has been setup to run 
> deduplication on Sunday. It's best to run trim on Saturday and it should 
> be finished before Sunday. For big storages that might be not easy to 
> finish, if all disk space has to be freed explicitly.
> 
> And wouldn't it still be cheaper to keep a "written bmap" than to run 
> over the full space of a (big) disk? I'd say depends on the workload.

So, lets keep a "used free space" tree in the filesystem for this
purpose. I'll spell out what it means in terms of runtime overhead
for you.

Firstly, every extent that is freed now needs to be inserted into
the new used free space tree.  That means transactions reservations
all increase in size by 30%, log traffic increases by 30%, cpu
overhead increases by ~30%, buffer cache footprint increases by 30%
and we've got 30% more metadata to write to disk. (30% because there
are already 2 free space btrees that are updated on every extent
free.)

Secondly, when we allocate an extent, we now have to check whether the
extent is in the used free space btree and remove it from there if
it is. That adds another btree lookup and modification to the
allocation code, which adds roughly 30% overhead there as well.

That's a lot of additional runtime overhead.

And then we have to consider the userspace utilities - we need to
add code to mkfs, xfs_repair, xfs_db, etc to enable checking and
repairing of the new btree, cross checking that every extent in used
free space tree is in the free space tree, etc. That's a lot of work
on top of just the kernel allocation code changes to keep the new
tree up to date.

IMO, tracking used free space to optimise background trim is
premature optimisation - it might be needed for a year or two, but
it will take at least that long to get such an optimisation stable
enough to consider for enterprise distros. And at which point, it
probably isn't going to be needed anymore.  Realistically, we have
to design for how we expect devices to behave in 2-3 years time, not
waste time trying to optimise for fundamentally broken devices that
nobody will be using in 2-3 years time...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-06  9:17                 ` Dave Chinner
@ 2011-01-06 16:50                   ` Michael Monnerie
  2011-01-06 18:10                     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Monnerie @ 2011-01-06 16:50 UTC (permalink / raw)
  To: xfs; +Cc: Christoph Hellwig, Lukas Czerner


[-- Attachment #1.1: Type: Text/Plain, Size: 861 bytes --]

On Donnerstag, 6. Januar 2011 Dave Chinner wrote:
> Realistically, we have
> to design for how we expect devices to behave in 2-3 years time, not
> waste time trying to optimise for fundamentally broken devices that
> nobody will be using in 2-3 years time...

I see, thanks for the explanation. I hope that there will be a solution 
that allows us to FITRIM on Saturday "in the background", so the thin 
provisioned VMs are smaller and faster to back up. That's my immediate 
wish, as we currently don't use SSDs.

-- 
mit freundlichen Grüssen,
Michael Monnerie, Ing. BSc

it-management Internet Services: Protéger
http://proteger.at [gesprochen: Prot-e-schee]
Tel: +43 660 / 415 6531

// ****** Radiointerview zum Thema Spam ******
// http://www.it-podcast.at/archiv.html#podcast-100716
// 
// Haus zu verkaufen: http://zmi.at/langegg/

[-- Attachment #1.2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 121 bytes --]

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-05 22:07           ` Michael Monnerie
  2011-01-05 22:50             ` Dave Chinner
@ 2011-01-06 18:06             ` Christoph Hellwig
  1 sibling, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2011-01-06 18:06 UTC (permalink / raw)
  To: Michael Monnerie; +Cc: Christoph Hellwig, Lukas Czerner, xfs

On Wed, Jan 05, 2011 at 11:07:35PM +0100, Michael Monnerie wrote:
> On Mittwoch, 5. Januar 2011 Lukas Czerner wrote:
> > If we
> > notice that we are running out of space in advance (how much in
> > advance?), we can start trimming smaller chunks, until we reach
> > reasonable a reasonable pool of reclaimed space, or until we trim
> > the whole device.
> 
> Would it be possible that all blocks that have been in use since the 
> last FITRIM run can be logged?

It's a lot of overhead for the big hammer approach the FITRIM is.  If
you're interested look for some of my earlier discard support patches
as they implemented it using an rbtree.  But getting this fully right
just was a lot of overhead.  My strategy is to instead have the simple
big hammer FITRIM for e.g. people that want to reclaim storage from a
VM or thing provisioned array once in a while, while implementing an
optimized discard on commit for SSDs.  I'm planning to have the latter
ready for 2.6.39.  The biggest part to get it is to avoid allocating
extents from busy ranges, and I've finally got an older patch from Dave
to fully work for that.  The next step is to have proper asynchronous
discard requests to avoid the waiting overhead the other filesystems
have right now.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-05 22:50             ` Dave Chinner
  2011-01-06  8:10               ` Michael Monnerie
@ 2011-01-06 18:08               ` Christoph Hellwig
  1 sibling, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2011-01-06 18:08 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Michael Monnerie, Lukas Czerner, Christoph Hellwig, xfs

On Thu, Jan 06, 2011 at 09:50:39AM +1100, Dave Chinner wrote:
> If we want optimised, only-trim-what-we-free behaviour, we need to
> hook into the transaction subsystem and issue TRIM commands at the
> time extents are actually freed. That is much more complex to
> implement but much easier to optimise because it doesn't require
> persistent state on disk. However, most devices are simply not ready
> to handle the flood of TRIM commands this generates, with
> performance degrading by ~10-20% for the best of devices and
> _10-100x_ for the worst...

I tested a few devices that have zero to 1% degradation, but those
were arrays, which allow for queued WRITE SAME/UNMAP commands instead
of the unqueued TRIM in ATA.  But I'm gettig close to finishing the
online discard and will start another benchmark session soon.  With
PCI-E flash devices that aren't limited by the ATA protocol, and the
proposed queueable TRIM command this looks like what we need for the
future.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: xfs: add FITRIM support
  2011-01-06 16:50                   ` Michael Monnerie
@ 2011-01-06 18:10                     ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2011-01-06 18:10 UTC (permalink / raw)
  To: Michael Monnerie; +Cc: Christoph Hellwig, Lukas Czerner, xfs

On Thu, Jan 06, 2011 at 05:50:42PM +0100, Michael Monnerie wrote:
> On Donnerstag, 6. Januar 2011 Dave Chinner wrote:
> > Realistically, we have
> > to design for how we expect devices to behave in 2-3 years time, not
> > waste time trying to optimise for fundamentally broken devices that
> > nobody will be using in 2-3 years time...
> 
> I see, thanks for the explanation. I hope that there will be a solution 
> that allows us to FITRIM on Saturday "in the background", so the thin 
> provisioned VMs are smaller and faster to back up. That's my immediate 
> wish, as we currently don't use SSDs.

That's what you can do with my FITRIM patches.  Right now the only
discard support known to me is my impementation that requires XFS as
backing store for the images.  And a hole punch of an existing hole
is very cheap in XFS, so trimming things multiple times is not an issue.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2011-01-06 18:08 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-25 11:23 xfs: add FITRIM support Christoph Hellwig
2010-12-22 21:41 ` Alex Elder
2010-12-28 16:09   ` Christoph Hellwig
2011-01-03 10:49     ` Lukas Czerner
2010-12-23  1:44 ` Dave Chinner
2010-12-30 11:41   ` Christoph Hellwig
2011-01-03 10:57     ` Lukas Czerner
2011-01-03 23:25       ` Dave Chinner
2011-01-05 10:21         ` Lukas Czerner
2011-01-05 22:07           ` Michael Monnerie
2011-01-05 22:50             ` Dave Chinner
2011-01-06  8:10               ` Michael Monnerie
2011-01-06  8:33                 ` Lukas Czerner
2011-01-06  8:40                   ` Lukas Czerner
2011-01-06  9:17                 ` Dave Chinner
2011-01-06 16:50                   ` Michael Monnerie
2011-01-06 18:10                     ` Christoph Hellwig
2011-01-06 18:08               ` Christoph Hellwig
2011-01-06 18:06             ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.