All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] better dquot caching
@ 2012-02-01 13:57 Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist Christoph Hellwig
                   ` (8 more replies)
  0 siblings, 9 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

This series improves handling of large number of dquots.  It replaced
the direct recycling of dquots from the freelist with a shrinker, removes
the upper bound of dquots, and uses per-filesystem structures for all
quota state, including switching from a hash to a radix-tree for lookups.

For repeated lookups of dquots out of a large pool I see improvements
betwen 50% and 500% compared to the previous code.  All these tests
have been performed with Q_XQUOTASYNC already disabled as it would
change the result to much for both the old and new code.

Note that the first patch probably is a candidate for Linux 3.3, as
the previous quota updates caused a lock order reversal in the old
quota reclaim code.  See the actual patch for more details.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-09 22:03   ` Ben Myers
  2012-02-01 13:57 ` [PATCH 2/7] xfs: per-filesystem dquot LRU lists Christoph Hellwig
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-fix-shrinker --]
[-- Type: text/plain, Size: 17212 bytes --]

Stop reusing dquots from the freelist when allocating new ones directly, and
implement a shrinker that actually follows the specifications for the
interface.  The shrinker implementation is still highly suboptimal at this
point, but we can gradually work on it.

This also fixes an bug in the previous lock ordering, where we would take
the hash and dqlist locks inside of the freelist lock against the normal
lock ordering.  This is only solvable by introducing the dispose list,
and thus not when using direct reclaim of unused dquots for new allocations.

As a side-effect the quota upper bound and used to free ratio values in
/proc/fs/xfs/xqm are set to 0 as these values don't make any sense in the
new world order.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/kmem.h         |    6 -
 fs/xfs/xfs_dquot.c    |  103 ++++-------------
 fs/xfs/xfs_qm.c       |  293 +++++++++++++++++++-------------------------------
 fs/xfs/xfs_qm.h       |   14 --
 fs/xfs/xfs_qm_stats.c |    4 
 fs/xfs/xfs_trace.h    |    5 
 6 files changed, 142 insertions(+), 283 deletions(-)

Index: xfs/fs/xfs/kmem.h
===================================================================
--- xfs.orig/fs/xfs/kmem.h	2012-02-01 12:05:12.530712997 +0100
+++ xfs/fs/xfs/kmem.h	2012-02-01 12:06:55.620154512 +0100
@@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
-	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
 #endif /* __XFS_SUPPORT_KMEM_H__ */
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:05:12.540712942 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:22:08.051878113 +0100
@@ -50,7 +50,6 @@
  */
 struct mutex	xfs_Gqm_lock;
 struct xfs_qm	*xfs_Gqm;
-uint		ndquot;
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
@@ -93,7 +92,6 @@ xfs_Gqm_init(void)
 		goto out_free_udqhash;
 
 	hsize /= sizeof(xfs_dqhash_t);
-	ndquot = hsize << 8;
 
 	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
 	xqm->qm_dqhashmask = hsize - 1;
@@ -137,7 +135,6 @@ xfs_Gqm_init(void)
 		xqm->qm_dqtrxzone = qm_dqtrxzone;
 
 	atomic_set(&xqm->qm_totaldquots, 0);
-	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
 	xqm->qm_nrefs = 0;
 	return xqm;
 
@@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
 	return 0;
 }
 
+STATIC void
+xfs_qm_dqfree_one(
+	struct xfs_dquot	*dqp)
+{
+	struct xfs_mount	*mp = dqp->q_mount;
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 
+	mutex_lock(&dqp->q_hash->qh_lock);
+	list_del_init(&dqp->q_hashlist);
+	dqp->q_hash->qh_version++;
+	mutex_unlock(&dqp->q_hash->qh_lock);
+
+	mutex_lock(&qi->qi_dqlist_lock);
+	list_del_init(&dqp->q_mplist);
+	qi->qi_dquots--;
+	qi->qi_dqreclaims++;
+	mutex_unlock(&qi->qi_dqlist_lock);
 
-/*
- * Pop the least recently used dquot off the freelist and recycle it.
- */
-STATIC struct xfs_dquot *
-xfs_qm_dqreclaim_one(void)
+	xfs_qm_dqdestroy(dqp);
+}
+
+STATIC void
+xfs_qm_dqreclaim_one(
+	struct xfs_dquot	*dqp,
+	struct list_head	*dispose_list)
 {
-	struct xfs_dquot	*dqp;
-	int			restarts = 0;
+	struct xfs_mount	*mp = dqp->q_mount;
+	int			error;
 
-	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-restart:
-	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-		struct xfs_mount *mp = dqp->q_mount;
+	if (!xfs_dqlock_nowait(dqp))
+		goto out_busy;
 
-		if (!xfs_dqlock_nowait(dqp))
-			continue;
+	/*
+	 * This dquot has acquired a reference in the meantime remove it from
+	 * the freelist and try again.
+	 */
+	if (dqp->q_nrefs) {
+		xfs_dqunlock(dqp);
 
-		/*
-		 * This dquot has already been grabbed by dqlookup.
-		 * Remove it from the freelist and try again.
-		 */
-		if (dqp->q_nrefs) {
-			trace_xfs_dqreclaim_want(dqp);
-			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-
-			list_del_init(&dqp->q_freelist);
-			xfs_Gqm->qm_dqfrlist_cnt--;
-			restarts++;
-			goto dqunlock;
-		}
+		trace_xfs_dqreclaim_want(dqp);
+		XQM_STATS_INC(xqmstats.xs_qm_dqwants);
 
-		ASSERT(dqp->q_hash);
-		ASSERT(!list_empty(&dqp->q_mplist));
+		list_del_init(&dqp->q_freelist);
+		xfs_Gqm->qm_dqfrlist_cnt--;
+		return;
+	}
 
-		/*
-		 * Try to grab the flush lock. If this dquot is in the process
-		 * of getting flushed to disk, we don't want to reclaim it.
-		 */
-		if (!xfs_dqflock_nowait(dqp))
-			goto dqunlock;
+	ASSERT(dqp->q_hash);
+	ASSERT(!list_empty(&dqp->q_mplist));
 
-		/*
-		 * We have the flush lock so we know that this is not in the
-		 * process of being flushed. So, if this is dirty, flush it
-		 * DELWRI so that we don't get a freelist infested with
-		 * dirty dquots.
-		 */
-		if (XFS_DQ_IS_DIRTY(dqp)) {
-			int	error;
+	/*
+	 * Try to grab the flush lock. If this dquot is in the process of
+	 * getting flushed to disk, we don't want to reclaim it.
+	 */
+	if (!xfs_dqflock_nowait(dqp))
+		goto out_busy;
 
-			trace_xfs_dqreclaim_dirty(dqp);
+	/*
+	 * We have the flush lock so we know that this is not in the
+	 * process of being flushed. So, if this is dirty, flush it
+	 * DELWRI so that we don't get a freelist infested with
+	 * dirty dquots.
+	 */
+	if (XFS_DQ_IS_DIRTY(dqp)) {
+		trace_xfs_dqreclaim_dirty(dqp);
 
-			/*
-			 * We flush it delayed write, so don't bother
-			 * releasing the freelist lock.
-			 */
-			error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
-			if (error) {
-				xfs_warn(mp, "%s: dquot %p flush failed",
-					__func__, dqp);
-			}
-			goto dqunlock;
+		/*
+		 * We flush it delayed write, so don't bother releasing the
+		 * freelist lock.
+		 */
+		error = xfs_qm_dqflush(dqp, 0);
+		if (error) {
+			xfs_warn(mp, "%s: dquot %p flush failed",
+				 __func__, dqp);
 		}
-		xfs_dqfunlock(dqp);
 
 		/*
-		 * Prevent lookup now that we are going to reclaim the dquot.
-		 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
-		 * thus we can drop the lock now.
+		 * Give the dquot another try on the freelist, as the
+		 * flushing will take some time.
 		 */
-		dqp->dq_flags |= XFS_DQ_FREEING;
-		xfs_dqunlock(dqp);
-
-		mutex_lock(&dqp->q_hash->qh_lock);
-		list_del_init(&dqp->q_hashlist);
-		dqp->q_hash->qh_version++;
-		mutex_unlock(&dqp->q_hash->qh_lock);
-
-		mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-		list_del_init(&dqp->q_mplist);
-		mp->m_quotainfo->qi_dquots--;
-		mp->m_quotainfo->qi_dqreclaims++;
-		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-
-		ASSERT(dqp->q_nrefs == 0);
-		list_del_init(&dqp->q_freelist);
-		xfs_Gqm->qm_dqfrlist_cnt--;
-
-		mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-		return dqp;
-dqunlock:
-		xfs_dqunlock(dqp);
-		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-			break;
-		goto restart;
+		goto out_busy;
 	}
+	xfs_dqfunlock(dqp);
 
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-	return NULL;
-}
+	/*
+	 * Prevent lookups now that we are past the point of no return.
+	 */
+	dqp->dq_flags |= XFS_DQ_FREEING;
+	xfs_dqunlock(dqp);
 
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
-	int	howmany)
-{
-	int		nreclaimed = 0;
-	xfs_dquot_t	*dqp;
+	ASSERT(dqp->q_nrefs == 0);
+	list_move_tail(&dqp->q_freelist, dispose_list);
+	xfs_Gqm->qm_dqfrlist_cnt--;
+
+	trace_xfs_dqreclaim_done(dqp);
+	XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+	return;
 
-	if (howmany <= 0)
-		return 0;
+out_busy:
+	xfs_dqunlock(dqp);
 
-	while (nreclaimed < howmany) {
-		dqp = xfs_qm_dqreclaim_one();
-		if (!dqp)
-			return nreclaimed;
-		xfs_qm_dqdestroy(dqp);
-		nreclaimed++;
-	}
-	return nreclaimed;
+	/*
+	 * Move the dquot to the tail of the list so that we don't spin on it.
+	 */
+	list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+
+	trace_xfs_dqreclaim_busy(dqp);
+	XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
 }
 
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
 STATIC int
 xfs_qm_shake(
-	struct shrinker	*shrink,
-	struct shrink_control *sc)
+	struct shrinker		*shrink,
+	struct shrink_control	*sc)
 {
-	int	ndqused, nfree, n;
-	gfp_t gfp_mask = sc->gfp_mask;
-
-	if (!kmem_shake_allow(gfp_mask))
-		return 0;
-	if (!xfs_Gqm)
-		return 0;
-
-	nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
-	/* incore dquots in all f/s's */
-	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
-	ASSERT(ndqused >= 0);
+	int			nr_to_scan = sc->nr_to_scan;
+	LIST_HEAD		(dispose_list);
+	struct xfs_dquot	*dqp;
 
-	if (nfree <= ndqused && nfree < ndquot)
+	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
 		return 0;
+	if (!nr_to_scan)
+		goto out;
 
-	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
-	n = nfree - ndqused - ndquot;		/* # over target */
-
-	return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-	xfs_dquot_t **O_dqpp)
-{
-	xfs_dquot_t	*dqp;
-
-	/*
-	 * Check against high water mark to see if we want to pop
-	 * a nincompoop dquot off the freelist.
-	 */
-	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-		/*
-		 * Try to recycle a dquot from the freelist.
-		 */
-		if ((dqp = xfs_qm_dqreclaim_one())) {
-			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-			/*
-			 * Just zero the core here. The rest will get
-			 * reinitialized by caller. XXX we shouldn't even
-			 * do this zero ...
-			 */
-			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-			*O_dqpp = dqp;
-			return B_FALSE;
-		}
-		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+	while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
+		if (nr_to_scan-- <= 0)
+			break;
+		dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
+				       q_freelist);
+		xfs_qm_dqreclaim_one(dqp, &dispose_list);
 	}
+	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
 
-	/*
-	 * Allocate a brand new dquot on the kernel heap and return it
-	 * to the caller to initialize.
-	 */
-	ASSERT(xfs_Gqm->qm_dqzone != NULL);
-	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
-	atomic_inc(&xfs_Gqm->qm_totaldquots);
-
-	return B_TRUE;
+	while (!list_empty(&dispose_list)) {
+		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
+				       q_freelist);
+		list_del_init(&dqp->q_freelist);
+		xfs_qm_dqfree_one(dqp);
+	}
+out:
+	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
 }
 
-
 /*
  * Start a transaction and write the incore superblock changes to
  * disk. flags parameter indicates which fields have changed.
Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:05:12.554046204 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:22:02.135243499 +0100
@@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33;
 static struct lock_class_key xfs_dquot_other_class;
 
 /*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-	xfs_mount_t  *mp,
-	xfs_dqid_t   id,
-	uint	     type)
-{
-	xfs_dquot_t	*dqp;
-	boolean_t	brandnewdquot;
-
-	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-	dqp->dq_flags = type;
-	dqp->q_core.d_id = cpu_to_be32(id);
-	dqp->q_mount = mp;
-
-	/*
-	 * No need to re-initialize these if this is a reclaimed dquot.
-	 */
-	if (brandnewdquot) {
-		INIT_LIST_HEAD(&dqp->q_freelist);
-		mutex_init(&dqp->q_qlock);
-		init_waitqueue_head(&dqp->q_pinwait);
-
-		/*
-		 * Because we want to use a counting completion, complete
-		 * the flush completion once to allow a single access to
-		 * the flush completion without blocking.
-		 */
-		init_completion(&dqp->q_flush);
-		complete(&dqp->q_flush);
-
-		trace_xfs_dqinit(dqp);
-	} else {
-		/*
-		 * Only the q_core portion was zeroed in dqreclaim_one().
-		 * So, we need to reset others.
-		 */
-		dqp->q_nrefs = 0;
-		dqp->q_blkno = 0;
-		INIT_LIST_HEAD(&dqp->q_mplist);
-		INIT_LIST_HEAD(&dqp->q_hashlist);
-		dqp->q_bufoffset = 0;
-		dqp->q_fileoffset = 0;
-		dqp->q_transp = NULL;
-		dqp->q_gdquot = NULL;
-		dqp->q_res_bcount = 0;
-		dqp->q_res_icount = 0;
-		dqp->q_res_rtbcount = 0;
-		atomic_set(&dqp->q_pincount, 0);
-		dqp->q_hash = NULL;
-		ASSERT(list_empty(&dqp->q_freelist));
-
-		trace_xfs_dqreuse(dqp);
-	}
-
-	/*
-	 * In either case we need to make sure group quotas have a different
-	 * lock class than user quotas, to make sure lockdep knows we can
-	 * locks of one of each at the same time.
-	 */
-	if (!(type & XFS_DQ_USER))
-		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
-	/*
-	 * log item gets initialized later
-	 */
-	return (dqp);
-}
-
-/*
  * This is called to free all the memory associated with a dquot
  */
 void
@@ -567,7 +491,32 @@ xfs_qm_dqread(
 	int			error;
 	int			cancelflags = 0;
 
-	dqp = xfs_qm_dqinit(mp, id, type);
+
+	dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+
+	dqp->dq_flags = type;
+	dqp->q_core.d_id = cpu_to_be32(id);
+	dqp->q_mount = mp;
+	INIT_LIST_HEAD(&dqp->q_freelist);
+	mutex_init(&dqp->q_qlock);
+	init_waitqueue_head(&dqp->q_pinwait);
+
+	/*
+	 * Because we want to use a counting completion, complete
+	 * the flush completion once to allow a single access to
+	 * the flush completion without blocking.
+	 */
+	init_completion(&dqp->q_flush);
+	complete(&dqp->q_flush);
+
+	/*
+	 * Make sure group quotas have a different lock class than user
+	 * quotas.
+	 */
+	if (!(type & XFS_DQ_USER))
+		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+
+	atomic_inc(&xfs_Gqm->qm_totaldquots);
 
 	trace_xfs_dqread(dqp);
 
Index: xfs/fs/xfs/xfs_qm.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:05:12.564046150 +0100
+++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:22:02.171909967 +0100
@@ -26,24 +26,12 @@
 struct xfs_qm;
 struct xfs_inode;
 
-extern uint		ndquot;
 extern struct mutex	xfs_Gqm_lock;
 extern struct xfs_qm	*xfs_Gqm;
 extern kmem_zone_t	*qm_dqzone;
 extern kmem_zone_t	*qm_dqtrxzone;
 
 /*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS	4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO		2
-
-/*
  * Dquot hashtable constants/threshold values.
  */
 #define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
@@ -74,7 +62,6 @@ typedef struct xfs_qm {
 	int		 qm_dqfrlist_cnt;
 	atomic_t	 qm_totaldquots; /* total incore dquots */
 	uint		 qm_nrefs;	 /* file systems with quota on */
-	int		 qm_dqfree_ratio;/* ratio of free to inuse dquots */
 	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
 	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
 } xfs_qm_t;
@@ -143,7 +130,6 @@ extern int		xfs_qm_quotacheck(xfs_mount_
 extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
 
 /* dquot stuff */
-extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
 extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
Index: xfs/fs/xfs/xfs_trace.h
===================================================================
--- xfs.orig/fs/xfs/xfs_trace.h	2012-02-01 12:05:12.577379410 +0100
+++ xfs/fs/xfs/xfs_trace.h	2012-02-01 12:06:55.623487828 +0100
@@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
 DEFINE_DQUOT_EVENT(xfs_dqadjust);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
 DEFINE_DQUOT_EVENT(xfs_dqattach_found);
 DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
 DEFINE_DQUOT_EVENT(xfs_dqalloc);
 DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
 DEFINE_DQUOT_EVENT(xfs_dqread);
Index: xfs/fs/xfs/xfs_qm_stats.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_stats.c	2012-02-01 12:05:12.590712672 +0100
+++ xfs/fs/xfs/xfs_qm_stats.c	2012-02-01 12:22:02.185243229 +0100
@@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file
 {
 	/* maximum; incore; ratio free to inuse; freelist */
 	seq_printf(m, "%d\t%d\t%d\t%u\n",
-			ndquot,
+			0,
 			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-			xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+			0,
 			xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
 	return 0;
 }

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 2/7] xfs: per-filesystem dquot LRU lists
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 3/7] xfs: use per-filesystem radix trees for dquot lookup Christoph Hellwig
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-per-mount-lru-2 --]
[-- Type: text/plain, Size: 11237 bytes --]

Replace the global dquot lru lists with a per-filesystem one.

Note that the shrinker isn't wire up to the per-superblock VFS shrinker
infrastructure as would have problems summing up and splitting the counts
for inodes and dquots.  I don't think this is a major problem as the quota
cache isn't as interwinded with the inode cache as the dentry cache is,
because an inode that is dropped from the cache will generally release
a dquot reference, but most of the time it won't be the last one.

This patch temporarily stops tracking the system-wide counting of dquots
on the LRU lists for /proc/fs/xfs/xqm, which will be added back later in
the series.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dquot.c    |   37 +++++++++++++++--------------
 fs/xfs/xfs_dquot.h    |    2 -
 fs/xfs/xfs_qm.c       |   62 +++++++++++++++++++++-----------------------------
 fs/xfs/xfs_qm.h       |    7 +++--
 fs/xfs/xfs_qm_stats.c |    2 -
 5 files changed, 52 insertions(+), 58 deletions(-)

Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:22:02.135243499 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:22:12.341854872 +0100
@@ -47,7 +47,7 @@
  *     qi->qi_dqlist_lock
  *       dquot->q_qlock (xfs_dqlock() and friends)
  *         dquot->q_flush (xfs_dqflock() and friends)
- *         xfs_Gqm->qm_dqfrlist_lock
+ *         qi->qi_lru_lock
  *
  * If two dquots need to be locked the order is user before group/project,
  * otherwise by the lowest id first, see xfs_dqlock2.
@@ -69,7 +69,7 @@ void
 xfs_qm_dqdestroy(
 	xfs_dquot_t	*dqp)
 {
-	ASSERT(list_empty(&dqp->q_freelist));
+	ASSERT(list_empty(&dqp->q_lru));
 
 	mutex_destroy(&dqp->q_qlock);
 	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
@@ -497,7 +497,7 @@ xfs_qm_dqread(
 	dqp->dq_flags = type;
 	dqp->q_core.d_id = cpu_to_be32(id);
 	dqp->q_mount = mp;
-	INIT_LIST_HEAD(&dqp->q_freelist);
+	INIT_LIST_HEAD(&dqp->q_lru);
 	mutex_init(&dqp->q_qlock);
 	init_waitqueue_head(&dqp->q_pinwait);
 
@@ -858,7 +858,6 @@ restart:
 	return (0);
 }
 
-
 /*
  * Release a reference to the dquot (decrement ref-count)
  * and unlock it. If there is a group quota attached to this
@@ -884,12 +883,13 @@ recurse:
 
 	trace_xfs_dqput_free(dqp);
 
-	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-	if (list_empty(&dqp->q_freelist)) {
-		list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-		xfs_Gqm->qm_dqfrlist_cnt++;
+	mutex_lock(&dqp->q_mount->m_quotainfo->qi_lru_lock);
+	if (list_empty(&dqp->q_lru)) {
+		list_add_tail(&dqp->q_lru,
+			      &dqp->q_mount->m_quotainfo->qi_lru_list);
+		dqp->q_mount->m_quotainfo->qi_lru_count++;
 	}
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+	mutex_unlock(&dqp->q_mount->m_quotainfo->qi_lru_lock);
 
 	/*
 	 * If we just added a udquot to the freelist, then we want to release
@@ -1140,6 +1140,7 @@ xfs_qm_dqpurge(
 {
 	struct xfs_mount	*mp = dqp->q_mount;
 	struct xfs_dqhash	*qh = dqp->q_hash;
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 
 	xfs_dqlock(dqp);
 
@@ -1190,21 +1191,21 @@ xfs_qm_dqpurge(
 	qh->qh_version++;
 	mutex_unlock(&qh->qh_lock);
 
-	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
+	mutex_lock(&qi->qi_dqlist_lock);
 	list_del_init(&dqp->q_mplist);
-	mp->m_quotainfo->qi_dqreclaims++;
-	mp->m_quotainfo->qi_dquots--;
-	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+	qi->qi_dqreclaims++;
+	qi->qi_dquots--;
+	mutex_unlock(&qi->qi_dqlist_lock);
 
 	/*
 	 * We move dquots to the freelist as soon as their reference count
 	 * hits zero, so it really should be on the freelist here.
 	 */
-	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-	ASSERT(!list_empty(&dqp->q_freelist));
-	list_del_init(&dqp->q_freelist);
-	xfs_Gqm->qm_dqfrlist_cnt--;
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+	mutex_lock(&qi->qi_lru_lock);
+	ASSERT(!list_empty(&dqp->q_lru));
+	list_del_init(&dqp->q_lru);
+	qi->qi_lru_count--;
+	mutex_unlock(&qi->qi_lru_lock);
 
 	xfs_qm_dqdestroy(dqp);
 }
Index: xfs/fs/xfs/xfs_dquot.h
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.h	2012-02-01 12:22:02.000000000 +0100
+++ xfs/fs/xfs/xfs_dquot.h	2012-02-01 12:22:12.341854872 +0100
@@ -47,7 +47,7 @@ struct xfs_trans;
  */
 typedef struct xfs_dquot {
 	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
-	struct list_head q_freelist;	/* global free list of dquots */
+	struct list_head q_lru;		/* global free list of dquots */
 	struct list_head q_mplist;	/* mount's list of dquots */
 	struct list_head q_hashlist;	/* gloabl hash list of dquots */
 	xfs_dqhash_t	*q_hash;	/* the hashchain header */
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:22:08.051878113 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:24:27.234457429 +0100
@@ -61,11 +61,6 @@ STATIC int	xfs_qm_init_quotainos(xfs_mou
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
-static struct shrinker xfs_qm_shaker = {
-	.shrink = xfs_qm_shake,
-	.seeks = DEFAULT_SEEKS,
-};
-
 /*
  * Initialize the XQM structure.
  * Note that there is not one quota manager per file system.
@@ -106,13 +101,6 @@ xfs_Gqm_init(void)
 	}
 
 	/*
-	 * Freelist of all dquots of all file systems
-	 */
-	INIT_LIST_HEAD(&xqm->qm_dqfrlist);
-	xqm->qm_dqfrlist_cnt = 0;
-	mutex_init(&xqm->qm_dqfrlist_lock);
-
-	/*
 	 * dquot zone. we register our own low-memory callback.
 	 */
 	if (!qm_dqzone) {
@@ -122,8 +110,6 @@ xfs_Gqm_init(void)
 	} else
 		xqm->qm_dqzone = qm_dqzone;
 
-	register_shrinker(&xfs_qm_shaker);
-
 	/*
 	 * The t_dqinfo portion of transactions.
 	 */
@@ -156,12 +142,6 @@ xfs_qm_destroy(
 	ASSERT(xqm != NULL);
 	ASSERT(xqm->qm_nrefs == 0);
 
-	unregister_shrinker(&xfs_qm_shaker);
-
-	mutex_lock(&xqm->qm_dqfrlist_lock);
-	ASSERT(list_empty(&xqm->qm_dqfrlist));
-	mutex_unlock(&xqm->qm_dqfrlist_lock);
-
 	hsize = xqm->qm_dqhashmask + 1;
 	for (i = 0; i < hsize; i++) {
 		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -827,6 +807,10 @@ xfs_qm_init_quotainfo(
 	mutex_init(&qinf->qi_dqlist_lock);
 	lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
 
+	INIT_LIST_HEAD(&qinf->qi_lru_list);
+	qinf->qi_lru_count = 0;
+	mutex_init(&qinf->qi_lru_lock);
+
 	qinf->qi_dqreclaims = 0;
 
 	/* mutex used to serialize quotaoffs */
@@ -894,6 +878,9 @@ xfs_qm_init_quotainfo(
 		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
 	}
 
+	qinf->qi_shrinker.shrink = xfs_qm_shake;
+	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
+	register_shrinker(&qinf->qi_shrinker);
 	return 0;
 }
 
@@ -913,6 +900,8 @@ xfs_qm_destroy_quotainfo(
 	ASSERT(qi != NULL);
 	ASSERT(xfs_Gqm != NULL);
 
+	unregister_shrinker(&qi->qi_shrinker);
+
 	/*
 	 * Release the reference that XQM kept, so that we know
 	 * when the XQM structure should be freed. We cannot assume
@@ -1624,6 +1613,7 @@ xfs_qm_dqreclaim_one(
 	struct list_head	*dispose_list)
 {
 	struct xfs_mount	*mp = dqp->q_mount;
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 	int			error;
 
 	if (!xfs_dqlock_nowait(dqp))
@@ -1639,8 +1629,8 @@ xfs_qm_dqreclaim_one(
 		trace_xfs_dqreclaim_want(dqp);
 		XQM_STATS_INC(xqmstats.xs_qm_dqwants);
 
-		list_del_init(&dqp->q_freelist);
-		xfs_Gqm->qm_dqfrlist_cnt--;
+		list_del_init(&dqp->q_lru);
+		qi->qi_lru_count--;
 		return;
 	}
 
@@ -1688,8 +1678,8 @@ xfs_qm_dqreclaim_one(
 	xfs_dqunlock(dqp);
 
 	ASSERT(dqp->q_nrefs == 0);
-	list_move_tail(&dqp->q_freelist, dispose_list);
-	xfs_Gqm->qm_dqfrlist_cnt--;
+	list_move_tail(&dqp->q_lru, dispose_list);
+	qi->qi_lru_count--;
 
 	trace_xfs_dqreclaim_done(dqp);
 	XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
@@ -1701,7 +1691,7 @@ out_busy:
 	/*
 	 * Move the dquot to the tail of the list so that we don't spin on it.
 	 */
-	list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+	list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
 
 	trace_xfs_dqreclaim_busy(dqp);
 	XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
@@ -1712,6 +1702,8 @@ xfs_qm_shake(
 	struct shrinker		*shrink,
 	struct shrink_control	*sc)
 {
+	struct xfs_quotainfo	*qi =
+		container_of(shrink, struct xfs_quotainfo, qi_shrinker);
 	int			nr_to_scan = sc->nr_to_scan;
 	LIST_HEAD		(dispose_list);
 	struct xfs_dquot	*dqp;
@@ -1721,24 +1713,23 @@ xfs_qm_shake(
 	if (!nr_to_scan)
 		goto out;
 
-	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-	while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
+	mutex_lock(&qi->qi_lru_lock);
+	while (!list_empty(&qi->qi_lru_list)) {
 		if (nr_to_scan-- <= 0)
 			break;
-		dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
-				       q_freelist);
+		dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
+				       q_lru);
 		xfs_qm_dqreclaim_one(dqp, &dispose_list);
 	}
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+	mutex_unlock(&qi->qi_lru_lock);
 
 	while (!list_empty(&dispose_list)) {
-		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
-				       q_freelist);
-		list_del_init(&dqp->q_freelist);
+		dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
+		list_del_init(&dqp->q_lru);
 		xfs_qm_dqfree_one(dqp);
 	}
 out:
-	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
+	return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
 }
 
 /*
Index: xfs/fs/xfs/xfs_qm.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:22:02.000000000 +0100
+++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:22:12.345188187 +0100
@@ -57,9 +57,6 @@ typedef struct xfs_qm {
 	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
 	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
 	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
-	struct list_head qm_dqfrlist;	 /* freelist of dquots */
-	struct mutex	 qm_dqfrlist_lock;
-	int		 qm_dqfrlist_cnt;
 	atomic_t	 qm_totaldquots; /* total incore dquots */
 	uint		 qm_nrefs;	 /* file systems with quota on */
 	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
@@ -73,6 +70,9 @@ typedef struct xfs_qm {
 typedef struct xfs_quotainfo {
 	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
 	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
+	struct list_head qi_lru_list;
+	struct mutex	 qi_lru_lock;
+	int		 qi_lru_count;
 	struct list_head qi_dqlist;	 /* all dquots in filesys */
 	struct mutex	 qi_dqlist_lock;
 	int		 qi_dquots;
@@ -93,6 +93,7 @@ typedef struct xfs_quotainfo {
 	xfs_qcnt_t	 qi_isoftlimit;	 /* default inode count soft limit */
 	xfs_qcnt_t	 qi_rtbhardlimit;/* default realtime blk hard limit */
 	xfs_qcnt_t	 qi_rtbsoftlimit;/* default realtime blk soft limit */
+	struct shrinker  qi_shrinker;
 } xfs_quotainfo_t;
 
 
Index: xfs/fs/xfs/xfs_qm_stats.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_stats.c	2012-02-01 12:22:02.000000000 +0100
+++ xfs/fs/xfs/xfs_qm_stats.c	2012-02-01 12:22:12.345188187 +0100
@@ -45,7 +45,7 @@ static int xqm_proc_show(struct seq_file
 			0,
 			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
 			0,
-			xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
+			0);
 	return 0;
 }
 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 3/7] xfs: use per-filesystem radix trees for dquot lookup
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 2/7] xfs: per-filesystem dquot LRU lists Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 4/7] xfs: remove the per-filesystem list of dquots Christoph Hellwig
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-radix-tree --]
[-- Type: text/plain, Size: 16898 bytes --]

Replace the global hash tables for looking up in-memory dquot structures
with per-filesystem radix trees to allow scaling to a large number of
in-memory dquot structures.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dquot.c      |  175 ++++++++++--------------------------------------
 fs/xfs/xfs_dquot.h      |   12 ---
 fs/xfs/xfs_qm.c         |   95 ++------------------------
 fs/xfs/xfs_qm.h         |   19 ++---
 fs/xfs/xfs_quota_priv.h |   11 ---
 fs/xfs/xfs_trace.h      |    4 -
 6 files changed, 58 insertions(+), 258 deletions(-)

Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:24:27.234457429 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:25:23.134154595 +0100
@@ -54,9 +54,6 @@ struct xfs_qm	*xfs_Gqm;
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
 
-STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
-
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
@@ -68,37 +65,9 @@ STATIC int	xfs_qm_shake(struct shrinker
 STATIC struct xfs_qm *
 xfs_Gqm_init(void)
 {
-	xfs_dqhash_t	*udqhash, *gdqhash;
 	xfs_qm_t	*xqm;
-	size_t		hsize;
-	uint		i;
-
-	/*
-	 * Initialize the dquot hash tables.
-	 */
-	udqhash = kmem_zalloc_greedy(&hsize,
-				     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
-				     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
-	if (!udqhash)
-		goto out;
-
-	gdqhash = kmem_zalloc_large(hsize);
-	if (!gdqhash)
-		goto out_free_udqhash;
-
-	hsize /= sizeof(xfs_dqhash_t);
 
 	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-	xqm->qm_dqhashmask = hsize - 1;
-	xqm->qm_usr_dqhtable = udqhash;
-	xqm->qm_grp_dqhtable = gdqhash;
-	ASSERT(xqm->qm_usr_dqhtable != NULL);
-	ASSERT(xqm->qm_grp_dqhtable != NULL);
-
-	for (i = 0; i < hsize; i++) {
-		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
-	}
 
 	/*
 	 * dquot zone. we register our own low-memory callback.
@@ -123,11 +92,6 @@ xfs_Gqm_init(void)
 	atomic_set(&xqm->qm_totaldquots, 0);
 	xqm->qm_nrefs = 0;
 	return xqm;
-
- out_free_udqhash:
-	kmem_free_large(udqhash);
- out:
-	return NULL;
 }
 
 /*
@@ -137,22 +101,9 @@ STATIC void
 xfs_qm_destroy(
 	struct xfs_qm	*xqm)
 {
-	int		hsize, i;
-
 	ASSERT(xqm != NULL);
 	ASSERT(xqm->qm_nrefs == 0);
 
-	hsize = xqm->qm_dqhashmask + 1;
-	for (i = 0; i < hsize; i++) {
-		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
-	}
-	kmem_free_large(xqm->qm_usr_dqhtable);
-	kmem_free_large(xqm->qm_grp_dqhtable);
-	xqm->qm_usr_dqhtable = NULL;
-	xqm->qm_grp_dqhtable = NULL;
-	xqm->qm_dqhashmask = 0;
-
 	kmem_free(xqm);
 }
 
@@ -763,14 +714,6 @@ xfs_qm_dqdetach(
 }
 
 /*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
-/*
  * This initializes all the quota information that's kept in the
  * mount structure
  */
@@ -803,9 +746,12 @@ xfs_qm_init_quotainfo(
 		return error;
 	}
 
+	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
+	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
+	mutex_init(&qinf->qi_tree_lock);
+
 	INIT_LIST_HEAD(&qinf->qi_dqlist);
 	mutex_init(&qinf->qi_dqlist_lock);
-	lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
 
 	INIT_LIST_HEAD(&qinf->qi_lru_list);
 	qinf->qi_lru_count = 0;
@@ -925,30 +871,6 @@ xfs_qm_destroy_quotainfo(
 	mp->m_quotainfo = NULL;
 }
 
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-	xfs_dqlist_t	*list,
-	char		*str,
-	int		n)
-{
-	mutex_init(&list->qh_lock);
-	INIT_LIST_HEAD(&list->qh_list);
-	list->qh_version = 0;
-	list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-	xfs_dqlist_t	*list)
-{
-	mutex_destroy(&(list->qh_lock));
-}
-
 /*
  * Create an inode and return with a reference already taken, but unlocked
  * This is how we create quota inodes
@@ -1593,10 +1515,10 @@ xfs_qm_dqfree_one(
 	struct xfs_mount	*mp = dqp->q_mount;
 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 
-	mutex_lock(&dqp->q_hash->qh_lock);
-	list_del_init(&dqp->q_hashlist);
-	dqp->q_hash->qh_version++;
-	mutex_unlock(&dqp->q_hash->qh_lock);
+	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
+	radix_tree_delete(XFS_DQUOT_TREE(mp, dqp->q_core.d_flags),
+			  be32_to_cpu(dqp->q_core.d_id));
+	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 
 	mutex_lock(&qi->qi_dqlist_lock);
 	list_del_init(&dqp->q_mplist);
@@ -1634,7 +1556,6 @@ xfs_qm_dqreclaim_one(
 		return;
 	}
 
-	ASSERT(dqp->q_hash);
 	ASSERT(!list_empty(&dqp->q_mplist));
 
 	/*
Index: xfs/fs/xfs/xfs_qm.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:22:12.345188187 +0100
+++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:24:50.577664301 +0100
@@ -32,12 +32,6 @@ extern kmem_zone_t	*qm_dqzone;
 extern kmem_zone_t	*qm_dqtrxzone;
 
 /*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH		((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
-
-/*
  * This defines the unit of allocation of dquots.
  * Currently, it is just one file system block, and a 4K blk contains 30
  * (136 * 30 = 4080) dquots. It's probably not worth trying to make
@@ -48,15 +42,10 @@ extern kmem_zone_t	*qm_dqtrxzone;
  */
 #define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
 
-typedef xfs_dqhash_t	xfs_dqlist_t;
-
 /*
  * Quota Manager (global) structure. Lives only in core.
  */
 typedef struct xfs_qm {
-	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
-	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
-	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
 	atomic_t	 qm_totaldquots; /* total incore dquots */
 	uint		 qm_nrefs;	 /* file systems with quota on */
 	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
@@ -68,6 +57,9 @@ typedef struct xfs_qm {
  * The mount structure keeps a pointer to this.
  */
 typedef struct xfs_quotainfo {
+	struct radix_tree_root qi_uquota_tree;
+	struct radix_tree_root qi_gquota_tree;
+	struct mutex qi_tree_lock;
 	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
 	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
 	struct list_head qi_lru_list;
@@ -96,6 +88,11 @@ typedef struct xfs_quotainfo {
 	struct shrinker  qi_shrinker;
 } xfs_quotainfo_t;
 
+#define XFS_DQUOT_TREE(mp, type) \
+	((type & XFS_DQ_USER) ? \
+	 &((mp)->m_quotainfo->qi_uquota_tree) : \
+	 &((mp)->m_quotainfo->qi_gquota_tree))
+
 
 extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
 extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:22:12.341854872 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:24:50.577664301 +0100
@@ -43,7 +43,7 @@
  * Lock order:
  *
  * ip->i_lock
- *   qh->qh_lock
+ *   qi->qi_tree_lock
  *     qi->qi_dqlist_lock
  *       dquot->q_qlock (xfs_dqlock() and friends)
  *         dquot->q_flush (xfs_dqflock() and friends)
@@ -602,60 +602,6 @@ error0:
 }
 
 /*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-	xfs_mount_t		*mp,
-	xfs_dqid_t		id,
-	xfs_dqhash_t		*qh,
-	xfs_dquot_t		**O_dqpp)
-{
-	xfs_dquot_t		*dqp;
-
-	ASSERT(mutex_is_locked(&qh->qh_lock));
-
-	/*
-	 * Traverse the hashchain looking for a match
-	 */
-	list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
-		/*
-		 * We already have the hashlock. We don't need the
-		 * dqlock to look at the id field of the dquot, since the
-		 * id can't be modified without the hashlock anyway.
-		 */
-		if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
-			continue;
-
-		trace_xfs_dqlookup_found(dqp);
-
-		xfs_dqlock(dqp);
-		if (dqp->dq_flags & XFS_DQ_FREEING) {
-			*O_dqpp = NULL;
-			xfs_dqunlock(dqp);
-			return -1;
-		}
-
-		dqp->q_nrefs++;
-
-		/*
-		 * move the dquot to the front of the hashchain
-		 */
-		list_move(&dqp->q_hashlist, &qh->qh_list);
-		trace_xfs_dqlookup_done(dqp);
-		*O_dqpp = dqp;
-		return 0;
-	}
-
-	*O_dqpp = NULL;
-	return 1;
-}
-
-/*
  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
  * a locked dquot, doing an allocation (if requested) as needed.
  * When both an inode and an id are given, the inode's id takes precedence.
@@ -672,9 +618,8 @@ xfs_qm_dqget(
 	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
 	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
 {
+	struct radix_tree_root *tree = XFS_DQUOT_TREE(mp, type);
 	xfs_dquot_t	*dqp;
-	xfs_dqhash_t	*h;
-	uint		version;
 	int		error;
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -683,7 +628,6 @@ xfs_qm_dqget(
 	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
 		return (ESRCH);
 	}
-	h = XFS_DQ_HASH(mp, id, type);
 
 #ifdef DEBUG
 	if (xfs_do_dqerror) {
@@ -707,34 +651,29 @@ xfs_qm_dqget(
 #endif
 
 restart:
-	mutex_lock(&h->qh_lock);
+	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
+	dqp = radix_tree_lookup(tree, id);
+	if (dqp) {
+		xfs_dqlock(dqp);
+		if (dqp->dq_flags & XFS_DQ_FREEING) {
+			xfs_dqunlock(dqp);
+			mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+			trace_xfs_dqget_freeing(dqp);
+			delay(1);
+			goto restart;
+		}
 
-	/*
-	 * Look in the cache (hashtable).
-	 * The chain is kept locked during lookup.
-	 */
-	switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
-	case -1:
-		XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-		mutex_unlock(&h->qh_lock);
-		delay(1);
-		goto restart;
-	case 0:
+		dqp->q_nrefs++;
+		mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+
+		trace_xfs_dqget_hit(dqp);
 		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
-		/*
-		 * The dquot was found, moved to the front of the chain,
-		 * taken off the freelist if it was on it, and locked
-		 * at this point. Just unlock the hashchain and return.
-		 */
-		ASSERT(*O_dqpp);
-		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-		mutex_unlock(&h->qh_lock);
-		trace_xfs_dqget_hit(*O_dqpp);
-		return 0;	/* success */
-	default:
-		XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-		break;
+		*O_dqpp = dqp;
+		return 0;
 	}
+	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+
+	XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
 
 	/*
 	 * Dquot cache miss. We don't want to keep the inode lock across
@@ -745,12 +684,6 @@ restart:
 	 */
 	if (ip)
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	/*
-	 * Save the hashchain version stamp, and unlock the chain, so that
-	 * we don't keep the lock across a disk read
-	 */
-	version = h->qh_version;
-	mutex_unlock(&h->qh_lock);
 
 	error = xfs_qm_dqread(mp, id, type, flags, &dqp);
 
@@ -760,9 +693,6 @@ restart:
 	if (error)
 		return error;
 
-	/*
-	 * Dquot lock comes after hashlock in the lock ordering
-	 */
 	if (ip) {
 		/*
 		 * A dquot could be attached to this inode by now, since
@@ -795,46 +725,21 @@ restart:
 		}
 	}
 
-	/*
-	 * Hashlock comes after ilock in lock order
-	 */
-	mutex_lock(&h->qh_lock);
-	if (version != h->qh_version) {
-		xfs_dquot_t *tmpdqp;
+	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
+	error = -radix_tree_insert(tree, id, dqp);
+	if (unlikely(error)) {
+		WARN_ON(error != EEXIST);
+
 		/*
-		 * Now, see if somebody else put the dquot in the
-		 * hashtable before us. This can happen because we didn't
-		 * keep the hashchain lock. We don't have to worry about
-		 * lock order between the two dquots here since dqp isn't
-		 * on any findable lists yet.
+		 * Duplicate found. Just throw away the new dquot and start
+		 * over.
 		 */
-		switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
-		case 0:
-		case -1:
-			/*
-			 * Duplicate found, either in cache or on its way out.
-			 * Just throw away the new dquot and start over.
-			 */
-			if (tmpdqp)
-				xfs_qm_dqput(tmpdqp);
-			mutex_unlock(&h->qh_lock);
-			xfs_qm_dqdestroy(dqp);
-			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-			goto restart;
-		default:
-			break;
-		}
+		mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+		trace_xfs_dqget_dup(dqp);
+		xfs_qm_dqdestroy(dqp);
+		XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+		goto restart;
 	}
-
-	/*
-	 * Put the dquot at the beginning of the hash-chain and mp's list
-	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-	 */
-	ASSERT(mutex_is_locked(&h->qh_lock));
-	dqp->q_hash = h;
-	list_add(&dqp->q_hashlist, &h->qh_list);
-	h->qh_version++;
-
 	/*
 	 * Attach this dquot to this filesystem's list of all dquots,
 	 * kept inside the mount structure in m_quotainfo field
@@ -850,7 +755,8 @@ restart:
 	list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
 	mp->m_quotainfo->qi_dquots++;
 	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-	mutex_unlock(&h->qh_lock);
+	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+
  dqret:
 	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	trace_xfs_dqget_miss(dqp);
@@ -1139,7 +1045,6 @@ xfs_qm_dqpurge(
 	struct xfs_dquot	*dqp)
 {
 	struct xfs_mount	*mp = dqp->q_mount;
-	struct xfs_dqhash	*qh = dqp->q_hash;
 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 
 	xfs_dqlock(dqp);
@@ -1186,10 +1091,10 @@ xfs_qm_dqpurge(
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
 
-	mutex_lock(&qh->qh_lock);
-	list_del_init(&dqp->q_hashlist);
-	qh->qh_version++;
-	mutex_unlock(&qh->qh_lock);
+	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
+	radix_tree_delete(XFS_DQUOT_TREE(mp, dqp->q_core.d_flags),
+			  be32_to_cpu(dqp->q_core.d_id));
+	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 
 	mutex_lock(&qi->qi_dqlist_lock);
 	list_del_init(&dqp->q_mplist);
Index: xfs/fs/xfs/xfs_dquot.h
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.h	2012-02-01 12:22:12.341854872 +0100
+++ xfs/fs/xfs/xfs_dquot.h	2012-02-01 12:24:50.577664301 +0100
@@ -29,16 +29,6 @@
  * when quotas are off.
  */
 
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-	struct list_head  qh_list;
-	struct mutex	  qh_lock;
-	uint		  qh_version;	/* ever increasing version */
-	uint		  qh_nelems;	/* number of dquots on the list */
-} xfs_dqhash_t;
-
 struct xfs_mount;
 struct xfs_trans;
 
@@ -49,8 +39,6 @@ typedef struct xfs_dquot {
 	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
 	struct list_head q_lru;		/* global free list of dquots */
 	struct list_head q_mplist;	/* mount's list of dquots */
-	struct list_head q_hashlist;	/* gloabl hash list of dquots */
-	xfs_dqhash_t	*q_hash;	/* the hashchain header */
 	struct xfs_mount*q_mount;	/* filesystem this relates to */
 	struct xfs_trans*q_transp;	/* trans this belongs to currently */
 	uint		 q_nrefs;	/* # active refs from inodes */
Index: xfs/fs/xfs/xfs_quota_priv.h
===================================================================
--- xfs.orig/fs/xfs/xfs_quota_priv.h	2012-02-01 12:05:07.000000000 +0100
+++ xfs/fs/xfs/xfs_quota_priv.h	2012-02-01 12:24:50.577664301 +0100
@@ -24,17 +24,6 @@
  */
 #define XFS_DQITER_MAP_SIZE	10
 
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-				 (__psunsigned_t)(id)) & \
-				(xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-				     (xfs_Gqm->qm_usr_dqhtable + \
-				      XFS_DQ_HASHVAL(mp, id)) : \
-				     (xfs_Gqm->qm_grp_dqhtable + \
-				      XFS_DQ_HASHVAL(mp, id)))
 #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
 	!dqp->q_core.d_blk_hardlimit && \
 	!dqp->q_core.d_blk_softlimit && \
Index: xfs/fs/xfs/xfs_trace.h
===================================================================
--- xfs.orig/fs/xfs/xfs_trace.h	2012-02-01 12:06:55.000000000 +0100
+++ xfs/fs/xfs/xfs_trace.h	2012-02-01 12:24:50.580997617 +0100
@@ -741,10 +741,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc);
 DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
 DEFINE_DQUOT_EVENT(xfs_dqread);
 DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
 DEFINE_DQUOT_EVENT(xfs_dqget_hit);
 DEFINE_DQUOT_EVENT(xfs_dqget_miss);
+DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
+DEFINE_DQUOT_EVENT(xfs_dqget_dup);
 DEFINE_DQUOT_EVENT(xfs_dqput);
 DEFINE_DQUOT_EVENT(xfs_dqput_wait);
 DEFINE_DQUOT_EVENT(xfs_dqput_free);

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 4/7] xfs: remove the per-filesystem list of dquots
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
                   ` (2 preceding siblings ...)
  2012-02-01 13:57 ` [PATCH 3/7] xfs: use per-filesystem radix trees for dquot lookup Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 5/7] xfs: use per-cpu data for the quota statistics Christoph Hellwig
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-remove-mplist --]
[-- Type: text/plain, Size: 14155 bytes --]

Instead of keeping a separate per-filesystem list of dquots we can walk
the radix tree for the two places where we need to iterate all quota
structures.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dquot.c |   35 ++----
 fs/xfs/xfs_dquot.h |    2 
 fs/xfs/xfs_qm.c    |  287 +++++++++++++++++++++++------------------------------
 fs/xfs/xfs_qm.h    |    4 
 4 files changed, 143 insertions(+), 185 deletions(-)

Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:24:50.577664301 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:26:36.210425373 +0100
@@ -44,10 +44,9 @@
  *
  * ip->i_lock
  *   qi->qi_tree_lock
- *     qi->qi_dqlist_lock
- *       dquot->q_qlock (xfs_dqlock() and friends)
- *         dquot->q_flush (xfs_dqflock() and friends)
- *         qi->qi_lru_lock
+ *     dquot->q_qlock (xfs_dqlock() and friends)
+ *       dquot->q_flush (xfs_dqflock() and friends)
+ *       qi->qi_lru_lock
  *
  * If two dquots need to be locked the order is user before group/project,
  * otherwise by the lowest id first, see xfs_dqlock2.
@@ -740,11 +739,6 @@ restart:
 		XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
 		goto restart;
 	}
-	/*
-	 * Attach this dquot to this filesystem's list of all dquots,
-	 * kept inside the mount structure in m_quotainfo field
-	 */
-	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
 
 	/*
 	 * We return a locked dquot to the caller, with a reference taken
@@ -752,9 +746,7 @@ restart:
 	xfs_dqlock(dqp);
 	dqp->q_nrefs = 1;
 
-	list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
 	mp->m_quotainfo->qi_dquots++;
-	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
 	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 
  dqret:
@@ -1038,16 +1030,23 @@ xfs_dqlock2(
 
 /*
  * Take a dquot out of the mount's dqlist as well as the hashlist.  This is
- * called via unmount as well as quotaoff, and the purge will always succeed.
+ * called via unmount as well as quotaoff.
  */
-void
+int
 xfs_qm_dqpurge(
-	struct xfs_dquot	*dqp)
+	struct xfs_dquot	*dqp,
+	int			flags)
 {
 	struct xfs_mount	*mp = dqp->q_mount;
 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 
 	xfs_dqlock(dqp);
+	if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
+		xfs_dqlock(dqp);
+		return EAGAIN;
+	}
+
+	dqp->dq_flags |= XFS_DQ_FREEING;
 
 	/*
 	 * If we're turning off quotas, we have to make sure that, for
@@ -1091,16 +1090,9 @@ xfs_qm_dqpurge(
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
 
-	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
 	radix_tree_delete(XFS_DQUOT_TREE(mp, dqp->q_core.d_flags),
 			  be32_to_cpu(dqp->q_core.d_id));
-	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
-
-	mutex_lock(&qi->qi_dqlist_lock);
-	list_del_init(&dqp->q_mplist);
-	qi->qi_dqreclaims++;
 	qi->qi_dquots--;
-	mutex_unlock(&qi->qi_dqlist_lock);
 
 	/*
 	 * We move dquots to the freelist as soon as their reference count
@@ -1113,6 +1105,7 @@ xfs_qm_dqpurge(
 	mutex_unlock(&qi->qi_lru_lock);
 
 	xfs_qm_dqdestroy(dqp);
+	return 0;
 }
 
 /*
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:25:23.134154595 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:26:36.210425373 +0100
@@ -308,172 +308,157 @@ xfs_qm_unmount_quotas(
 }
 
 /*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
+ * The quota lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage.
  */
+#define XFS_DQ_LOOKUP_BATCH	32
+
 STATIC int
-xfs_qm_dqflush_all(
-	struct xfs_mount	*mp)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	int			recl;
-	struct xfs_dquot	*dqp;
-	int			error;
+xfs_qm_dquot_walk(
+	struct xfs_mount	*mp,
+	int			type,
+	int			(*execute)(struct xfs_dquot *dqp, int flags),
+	int			flags)
+{
+	struct radix_tree_root	*tree = XFS_DQUOT_TREE(mp, type);
+	uint32_t		first_index;
+	int			last_error = 0;
+	int			skipped;
+	int			nr_found;
+
+restart:
+	skipped = 0;
+	first_index = 0;
+	nr_found = 0;
 
-	if (!q)
-		return 0;
-again:
-	mutex_lock(&q->qi_dqlist_lock);
-	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-		xfs_dqlock(dqp);
-		if ((dqp->dq_flags & XFS_DQ_FREEING) ||
-		    !XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
+	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
+	do {
+		struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
+		int		error = 0;
+		int		i;
+
+		nr_found = radix_tree_gang_lookup(tree, (void **)batch,
+					first_index, XFS_DQ_LOOKUP_BATCH);
+		if (!nr_found)
+			break;
 
-		/* XXX a sentinel would be better */
-		recl = q->qi_dqreclaims;
-		if (!xfs_dqflock_nowait(dqp)) {
-			/*
-			 * If we can't grab the flush lock then check
-			 * to see if the dquot has been flushed delayed
-			 * write.  If so, grab its buffer and send it
-			 * out immediately.  We'll be able to acquire
-			 * the flush lock when the I/O completes.
-			 */
-			xfs_dqflock_pushbuf_wait(dqp);
+		for (i = 0; i < nr_found; i++) {
+			struct xfs_dquot *dqp = batch[i];
+
+			first_index = be32_to_cpu(dqp->q_core.d_id) + 1;
+
+			error = execute(batch[i], flags);
+			if (error == EAGAIN) {
+				skipped++;
+				continue;
+			}
+			if (error && last_error != EFSCORRUPTED)
+				last_error = error;
+		}
+		/* bail out if the filesystem is corrupted.  */
+		if (error == EFSCORRUPTED) {
+			skipped = 0;
+			break;
 		}
-		/*
-		 * Let go of the mplist lock. We don't want to hold it
-		 * across a disk write.
-		 */
-		mutex_unlock(&q->qi_dqlist_lock);
-		error = xfs_qm_dqflush(dqp, 0);
-		xfs_dqunlock(dqp);
-		if (error)
-			return error;
 
-		mutex_lock(&q->qi_dqlist_lock);
-		if (recl != q->qi_dqreclaims) {
-			mutex_unlock(&q->qi_dqlist_lock);
-			/* XXX restart limit */
-			goto again;
+		if (need_resched()) {
+			mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+			cond_resched();
+			mutex_lock(&mp->m_quotainfo->qi_tree_lock);
 		}
+	} while (nr_found);
+	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
+
+	if (skipped) {
+		delay(1);
+		goto restart;
 	}
 
-	mutex_unlock(&q->qi_dqlist_lock);
-	/* return ! busy */
-	return 0;
+	return last_error;
 }
 
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-	struct xfs_mount	*mp)
+STATIC int
+xfs_qm_flush_one(
+	struct xfs_dquot	*dqp,
+	int			flags)
 {
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_dquot	*dqp, *gdqp;
+	int			error = 0;
 
- again:
-	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-		xfs_dqlock(dqp);
-		if (dqp->dq_flags & XFS_DQ_FREEING) {
-			xfs_dqunlock(dqp);
-			mutex_unlock(&q->qi_dqlist_lock);
-			delay(1);
-			mutex_lock(&q->qi_dqlist_lock);
-			goto again;
-		}
+	xfs_dqlock(dqp);
+	if (dqp->dq_flags & XFS_DQ_FREEING)
+		goto out_unlock;
+	if (!XFS_DQ_IS_DIRTY(dqp))
+		goto out_unlock;
 
-		gdqp = dqp->q_gdquot;
-		if (gdqp)
-			dqp->q_gdquot = NULL;
-		xfs_dqunlock(dqp);
+	if (!xfs_dqflock_nowait(dqp))
+		xfs_dqflock_pushbuf_wait(dqp);
 
-		if (gdqp)
-			xfs_qm_dqrele(gdqp);
-	}
+	error = xfs_qm_dqflush(dqp, flags);
+
+out_unlock:
+	xfs_dqunlock(dqp);
+	return error;
 }
 
 /*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
+ * Release the group dquot pointer the user dquot may be carrying around
+ * as a hint.
  */
 STATIC int
-xfs_qm_dqpurge_int(
+xfs_qm_detach_gdquot(
+	struct xfs_dquot	*dqp,
+	int			flags)
+{
+	struct xfs_dquot	*gdqp;
+
+	xfs_dqlock(dqp);
+	/* XXX(hch): should we bother with freeeing dquots here? */
+	if (dqp->dq_flags & XFS_DQ_FREEING) {
+		xfs_dqunlock(dqp);
+		return 0;
+	}
+	gdqp = dqp->q_gdquot;
+	if (gdqp) {
+		xfs_dqlock(gdqp);
+		dqp->q_gdquot = NULL;
+	}
+	xfs_dqunlock(dqp);
+	if (gdqp)
+		xfs_qm_dqput(gdqp);
+	return 0;
+}
+
+/*
+ * Purge the dquot cache.
+ *
+ * None of the dquots should really be busy at this point.
+ */
+int
+xfs_qm_dqpurge_all(
 	struct xfs_mount	*mp,
 	uint			flags)
 {
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_dquot	*dqp, *n;
-	uint			dqtype;
-	int			nmisses = 0;
-	LIST_HEAD		(dispose_list);
+	int			error = 0;
 
-	if (!q)
+	if (!mp->m_quotainfo)
 		return 0;
 
-	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
-	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-	mutex_lock(&q->qi_dqlist_lock);
-
 	/*
 	 * In the first pass through all incore dquots of this filesystem,
 	 * we release the group dquot pointers the user dquots may be
 	 * carrying around as a hint. We need to do this irrespective of
 	 * what's being turned off.
 	 */
-	xfs_qm_detach_gdquots(mp);
-
-	/*
-	 * Try to get rid of all of the unwanted dquots.
-	 */
-	list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-		xfs_dqlock(dqp);
-		if ((dqp->dq_flags & dqtype) != 0 &&
-		    !(dqp->dq_flags & XFS_DQ_FREEING)) {
-			if (dqp->q_nrefs == 0) {
-				dqp->dq_flags |= XFS_DQ_FREEING;
-				list_move_tail(&dqp->q_mplist, &dispose_list);
-			} else
-				nmisses++;
-		}
-		xfs_dqunlock(dqp);
-	}
-	mutex_unlock(&q->qi_dqlist_lock);
-
-	list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
-		xfs_qm_dqpurge(dqp);
-
-	return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	int		ndquots;
+	xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_detach_gdquot, 0);
 
-	/*
-	 * Purge the dquot cache.
-	 * None of the dquots should really be busy at this point.
-	 */
-	if (mp->m_quotainfo) {
-		while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
-			delay(ndquots * 10);
-		}
-	}
-	return 0;
+	if (!error && (flags & XFS_QMOPT_UQUOTA))
+		error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, 0);
+	if (!error && (flags & XFS_QMOPT_GQUOTA))
+		error = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, 0);
+	if (!error && (flags & XFS_QMOPT_PQUOTA))
+		error = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, 0);
+	return error;
 }
 
 STATIC int
@@ -750,15 +735,10 @@ xfs_qm_init_quotainfo(
 	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
 	mutex_init(&qinf->qi_tree_lock);
 
-	INIT_LIST_HEAD(&qinf->qi_dqlist);
-	mutex_init(&qinf->qi_dqlist_lock);
-
 	INIT_LIST_HEAD(&qinf->qi_lru_list);
 	qinf->qi_lru_count = 0;
 	mutex_init(&qinf->qi_lru_lock);
 
-	qinf->qi_dqreclaims = 0;
-
 	/* mutex used to serialize quotaoffs */
 	mutex_init(&qinf->qi_quotaofflock);
 
@@ -855,9 +835,6 @@ xfs_qm_destroy_quotainfo(
 	 */
 	xfs_qm_rele_quotafs_ref(mp);
 
-	ASSERT(list_empty(&qi->qi_dqlist));
-	mutex_destroy(&qi->qi_dqlist_lock);
-
 	if (qi->qi_uquotaip) {
 		IRELE(qi->qi_uquotaip);
 		qi->qi_uquotaip = NULL; /* paranoia */
@@ -1330,12 +1307,6 @@ xfs_qm_quotacheck(
 	ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
-	/*
-	 * There should be no cached dquots. The (simplistic) quotacheck
-	 * algorithm doesn't like that.
-	 */
-	ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
 	xfs_notice(mp, "Quotacheck needed: Please wait.");
 
 	/*
@@ -1374,12 +1345,15 @@ xfs_qm_quotacheck(
 	} while (!done);
 
 	/*
-	 * We've made all the changes that we need to make incore.
-	 * Flush them down to disk buffers if everything was updated
-	 * successfully.
+	 * We've made all the changes that we need to make incore.  Flush them
+	 * down to disk buffers if everything was updated successfully.
 	 */
-	if (!error)
-		error = xfs_qm_dqflush_all(mp);
+	if (!error && XFS_IS_UQUOTA_ON(mp))
+		error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, 0);
+	if (!error && XFS_IS_GQUOTA_ON(mp))
+		error = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, 0);
+	if (!error && XFS_IS_PQUOTA_ON(mp))
+		error = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, 0);
 
 	/*
 	 * We can get this error if we couldn't do a dquot allocation inside
@@ -1518,13 +1492,8 @@ xfs_qm_dqfree_one(
 	mutex_lock(&mp->m_quotainfo->qi_tree_lock);
 	radix_tree_delete(XFS_DQUOT_TREE(mp, dqp->q_core.d_flags),
 			  be32_to_cpu(dqp->q_core.d_id));
-	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
-
-	mutex_lock(&qi->qi_dqlist_lock);
-	list_del_init(&dqp->q_mplist);
 	qi->qi_dquots--;
-	qi->qi_dqreclaims++;
-	mutex_unlock(&qi->qi_dqlist_lock);
+	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 
 	xfs_qm_dqdestroy(dqp);
 }
Index: xfs/fs/xfs/xfs_qm.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:24:50.577664301 +0100
+++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:26:36.210425373 +0100
@@ -65,11 +65,7 @@ typedef struct xfs_quotainfo {
 	struct list_head qi_lru_list;
 	struct mutex	 qi_lru_lock;
 	int		 qi_lru_count;
-	struct list_head qi_dqlist;	 /* all dquots in filesys */
-	struct mutex	 qi_dqlist_lock;
 	int		 qi_dquots;
-	int		 qi_dqreclaims;	 /* a change here indicates
-					    a removal in the dqlist */
 	time_t		 qi_btimelimit;	 /* limit for blks timer */
 	time_t		 qi_itimelimit;	 /* limit for inodes timer */
 	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
Index: xfs/fs/xfs/xfs_dquot.h
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.h	2012-02-01 12:24:50.577664301 +0100
+++ xfs/fs/xfs/xfs_dquot.h	2012-02-01 12:26:36.210425373 +0100
@@ -121,7 +121,7 @@ extern int		xfs_qm_dqread(struct xfs_mou
 					uint, struct xfs_dquot	**);
 extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern void		xfs_qm_dqpurge(xfs_dquot_t *);
+extern int		xfs_qm_dqpurge(xfs_dquot_t *, int);
 extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
 extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
 					xfs_disk_dquot_t *);

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 5/7] xfs: use per-cpu data for the quota statistics
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
                   ` (3 preceding siblings ...)
  2012-02-01 13:57 ` [PATCH 4/7] xfs: remove the per-filesystem list of dquots Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 6/7] xfs: user per-cpu stats for the total dquot numbers Christoph Hellwig
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-percpu-stats --]
[-- Type: text/plain, Size: 4598 bytes --]

Use the same per-cpu scheme used in the main XFS statistics, as well as
the VFS inode and dcache statistics for the quota code.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dquot.c    |    6 +++---
 fs/xfs/xfs_qm.c       |    6 +++---
 fs/xfs/xfs_qm_stats.c |   28 +++++++++++++++++-----------
 fs/xfs/xfs_qm_stats.h |   11 ++++++++---
 4 files changed, 31 insertions(+), 20 deletions(-)

Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:29:56.000000000 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:30:02.839305968 +0100
@@ -666,13 +666,13 @@ restart:
 		mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 
 		trace_xfs_dqget_hit(dqp);
-		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+		XQM_STATS_INC(xs_qm_dqcachehits);
 		*O_dqpp = dqp;
 		return 0;
 	}
 	mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 
-	XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+	XQM_STATS_INC(xs_qm_dqcachemisses);
 
 	/*
 	 * Dquot cache miss. We don't want to keep the inode lock across
@@ -736,7 +736,7 @@ restart:
 		mutex_unlock(&mp->m_quotainfo->qi_tree_lock);
 		trace_xfs_dqget_dup(dqp);
 		xfs_qm_dqdestroy(dqp);
-		XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+		XQM_STATS_INC(xs_qm_dquot_dups);
 		goto restart;
 	}
 
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:29:56.000000000 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:30:26.685843449 +0100
@@ -1518,7 +1518,7 @@ xfs_qm_dqreclaim_one(
 		xfs_dqunlock(dqp);
 
 		trace_xfs_dqreclaim_want(dqp);
-		XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+		XQM_STATS_INC(xs_qm_dqwants);
 
 		list_del_init(&dqp->q_lru);
 		qi->qi_lru_count--;
@@ -1572,7 +1572,7 @@ xfs_qm_dqreclaim_one(
 	qi->qi_lru_count--;
 
 	trace_xfs_dqreclaim_done(dqp);
-	XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+	XQM_STATS_INC(xs_qm_dqreclaims);
 	return;
 
 out_busy:
@@ -1584,7 +1584,7 @@ out_busy:
 	list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
 
 	trace_xfs_dqreclaim_busy(dqp);
-	XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+	XQM_STATS_INC(xs_qm_dqreclaim_misses);
 }
 
 STATIC int
Index: xfs/fs/xfs/xfs_qm_stats.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_stats.c	2012-02-01 12:22:12.000000000 +0100
+++ xfs/fs/xfs/xfs_qm_stats.c	2012-02-01 12:30:02.842639283 +0100
@@ -36,7 +36,16 @@
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
 
-struct xqmstats xqmstats;
+DEFINE_PER_CPU(struct xqmstats, xqmstats);
+
+static int xqmstats_sum(int idx)
+{
+	int val = 0, cpu;
+
+	for_each_possible_cpu(cpu)
+		val += *(((__u32 *)&per_cpu(xqmstats, cpu) + idx));
+	return max(val, 0);
+}
 
 static int xqm_proc_show(struct seq_file *m, void *v)
 {
@@ -64,16 +73,13 @@ static const struct file_operations xqm_
 
 static int xqmstat_proc_show(struct seq_file *m, void *v)
 {
-	/* quota performance statistics */
-	seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
-			xqmstats.xs_qm_dqreclaims,
-			xqmstats.xs_qm_dqreclaim_misses,
-			xqmstats.xs_qm_dquot_dups,
-			xqmstats.xs_qm_dqcachemisses,
-			xqmstats.xs_qm_dqcachehits,
-			xqmstats.xs_qm_dqwants,
-			xqmstats.xs_qm_dqshake_reclaims,
-			xqmstats.xs_qm_dqinact_reclaims);
+	int j;
+
+	seq_printf(m, "qm");
+	for (j = 0; j < XQMSTAT_END_XQMSTAT; j++)
+		seq_printf(m, " %u", xqmstats_sum(j));
+	seq_putc(m, '\n');
+
 	return 0;
 }
 
Index: xfs/fs/xfs/xfs_qm_stats.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_stats.h	2012-02-01 12:05:06.000000000 +0100
+++ xfs/fs/xfs/xfs_qm_stats.h	2012-02-01 12:30:02.842639283 +0100
@@ -32,18 +32,23 @@ struct xqmstats {
 	__uint32_t		xs_qm_dqwants;
 	__uint32_t		xs_qm_dqshake_reclaims;
 	__uint32_t		xs_qm_dqinact_reclaims;
+#define XQMSTAT_END_XQMSTAT	8
 };
 
-extern struct xqmstats xqmstats;
+DECLARE_PER_CPU(struct xqmstats, xqmstats);
 
-# define XQM_STATS_INC(count)	( (count)++ )
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+# define XQM_STATS_INC(v)	(per_cpu(xqmstats, current_cpu()).v++)
 
 extern void xfs_qm_init_procfs(void);
 extern void xfs_qm_cleanup_procfs(void);
 
 #else
 
-# define XQM_STATS_INC(count)	do { } while (0)
+# define XQM_STATS_INC(v)	do { } while (0)
 
 static inline void xfs_qm_init_procfs(void) { };
 static inline void xfs_qm_cleanup_procfs(void) { };

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 6/7] xfs: user per-cpu stats for the total dquot numbers
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
                   ` (4 preceding siblings ...)
  2012-02-01 13:57 ` [PATCH 5/7] xfs: use per-cpu data for the quota statistics Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-01 13:57 ` [PATCH 7/7] xfs: remove the globalk xfs_Gqm structure Christoph Hellwig
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-percpu-stats-2 --]
[-- Type: text/plain, Size: 4901 bytes --]

Switch the total number of dquots counter over to use the per-cpu stats
implementation, and reintroduce the number of unused dquots counter
dropped earlier in the series.

Btw, I wonder if we should simply add these counters to /proc/fs/xfs/xqmstat
instead of keeping the odd format and mostly superflous /proc/fs/xfs/xqm
around.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dquot.c    |    7 ++++---
 fs/xfs/xfs_qm.c       |    3 ++-
 fs/xfs/xfs_qm.h       |    1 -
 fs/xfs/xfs_qm_stats.c |    4 ++--
 fs/xfs/xfs_qm_stats.h |    5 +++++
 5 files changed, 13 insertions(+), 7 deletions(-)

Index: xfs/fs/xfs/xfs_qm_stats.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_stats.c	2012-02-01 12:30:02.000000000 +0100
+++ xfs/fs/xfs/xfs_qm_stats.c	2012-02-01 12:30:48.359059367 +0100
@@ -52,9 +52,9 @@ static int xqm_proc_show(struct seq_file
 	/* maximum; incore; ratio free to inuse; freelist */
 	seq_printf(m, "%d\t%d\t%d\t%u\n",
 			0,
-			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+			xqmstats_sum(XQMSTAT_END_XQMSTAT),
 			0,
-			0);
+			xqmstats_sum(XQMSTAT_END_XQMSTAT + 1));
 	return 0;
 }
 
Index: xfs/fs/xfs/xfs_qm_stats.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_stats.h	2012-02-01 12:30:02.000000000 +0100
+++ xfs/fs/xfs/xfs_qm_stats.h	2012-02-01 12:30:48.362392682 +0100
@@ -33,6 +33,9 @@ struct xqmstats {
 	__uint32_t		xs_qm_dqshake_reclaims;
 	__uint32_t		xs_qm_dqinact_reclaims;
 #define XQMSTAT_END_XQMSTAT	8
+	__uint32_t		xs_qm_dquots;
+	__uint32_t		xs_qm_dquots_unused;
+#define XQMSTAT_END_XQM		(XQMSTAT_END_XQMSTAT + 4)
 };
 
 DECLARE_PER_CPU(struct xqmstats, xqmstats);
@@ -42,6 +45,7 @@ DECLARE_PER_CPU(struct xqmstats, xqmstat
  * wrong CPU's stat for now (also aggregated before reporting).
  */
 # define XQM_STATS_INC(v)	(per_cpu(xqmstats, current_cpu()).v++)
+# define XQM_STATS_DEC(v)	(per_cpu(xqmstats, current_cpu()).v--)
 
 extern void xfs_qm_init_procfs(void);
 extern void xfs_qm_cleanup_procfs(void);
@@ -49,6 +53,7 @@ extern void xfs_qm_cleanup_procfs(void);
 #else
 
 # define XQM_STATS_INC(v)	do { } while (0)
+# define XQM_STATS_DEC(v)	do { } while (0)
 
 static inline void xfs_qm_init_procfs(void) { };
 static inline void xfs_qm_cleanup_procfs(void) { };
Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:30:02.000000000 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:30:48.362392682 +0100
@@ -72,8 +72,7 @@ xfs_qm_dqdestroy(
 
 	mutex_destroy(&dqp->q_qlock);
 	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-
-	atomic_dec(&xfs_Gqm->qm_totaldquots);
+	XQM_STATS_DEC(xs_qm_dquots);
 }
 
 /*
@@ -515,7 +514,7 @@ xfs_qm_dqread(
 	if (!(type & XFS_DQ_USER))
 		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
 
-	atomic_inc(&xfs_Gqm->qm_totaldquots);
+	XQM_STATS_INC(xs_qm_dquots);
 
 	trace_xfs_dqread(dqp);
 
@@ -786,6 +785,7 @@ recurse:
 		list_add_tail(&dqp->q_lru,
 			      &dqp->q_mount->m_quotainfo->qi_lru_list);
 		dqp->q_mount->m_quotainfo->qi_lru_count++;
+		XQM_STATS_INC(xs_qm_dquots_unused);
 	}
 	mutex_unlock(&dqp->q_mount->m_quotainfo->qi_lru_lock);
 
@@ -1104,6 +1104,7 @@ xfs_qm_dqpurge(
 	qi->qi_lru_count--;
 	mutex_unlock(&qi->qi_lru_lock);
 
+	XQM_STATS_DEC(xs_qm_dquots_unused);
 	xfs_qm_dqdestroy(dqp);
 	return 0;
 }
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:30:26.000000000 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:31:11.988931351 +0100
@@ -89,7 +89,6 @@ xfs_Gqm_init(void)
 	} else
 		xqm->qm_dqtrxzone = qm_dqtrxzone;
 
-	atomic_set(&xqm->qm_totaldquots, 0);
 	xqm->qm_nrefs = 0;
 	return xqm;
 }
@@ -1522,6 +1521,7 @@ xfs_qm_dqreclaim_one(
 
 		list_del_init(&dqp->q_lru);
 		qi->qi_lru_count--;
+		XQM_STATS_DEC(xs_qm_dquots_unused);
 		return;
 	}
 
@@ -1570,6 +1570,7 @@ xfs_qm_dqreclaim_one(
 	ASSERT(dqp->q_nrefs == 0);
 	list_move_tail(&dqp->q_lru, dispose_list);
 	qi->qi_lru_count--;
+	XQM_STATS_DEC(xs_qm_dquots_unused);
 
 	trace_xfs_dqreclaim_done(dqp);
 	XQM_STATS_INC(xs_qm_dqreclaims);
Index: xfs/fs/xfs/xfs_qm.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:26:36.000000000 +0100
+++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:30:48.365725997 +0100
@@ -46,7 +46,6 @@ extern kmem_zone_t	*qm_dqtrxzone;
  * Quota Manager (global) structure. Lives only in core.
  */
 typedef struct xfs_qm {
-	atomic_t	 qm_totaldquots; /* total incore dquots */
 	uint		 qm_nrefs;	 /* file systems with quota on */
 	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
 	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 7/7] xfs: remove the globalk xfs_Gqm structure
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
                   ` (5 preceding siblings ...)
  2012-02-01 13:57 ` [PATCH 6/7] xfs: user per-cpu stats for the total dquot numbers Christoph Hellwig
@ 2012-02-01 13:57 ` Christoph Hellwig
  2012-02-07  7:30 ` [PATCH 0/7] better dquot caching Ben Myers
  2012-02-11 20:13 ` Arkadiusz Miśkiewicz
  8 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-01 13:57 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-quota-kill-xfs-qm --]
[-- Type: text/plain, Size: 10502 bytes --]

If we initialize the slab caches for the quote code when XFS is loaded there
is no need for a global and reference counted quota manager structure.  Drop
all this overhead and also fix the error handling during quota initialization.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_dquot.c       |   38 ++++++++++++
 fs/xfs/xfs_qm.c          |  138 -----------------------------------------------
 fs/xfs/xfs_qm.h          |   15 -----
 fs/xfs/xfs_qm_bhv.c      |   18 ------
 fs/xfs/xfs_super.c       |   10 ++-
 fs/xfs/xfs_super.h       |    8 +-
 fs/xfs/xfs_trans_dquot.c |    4 -
 7 files changed, 49 insertions(+), 182 deletions(-)

Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:37:45.280134045 +0100
+++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:37:45.296800621 +0100
@@ -59,6 +59,9 @@ int xfs_dqreq_num;
 int xfs_dqerror_mod = 33;
 #endif
 
+struct kmem_zone		*xfs_qm_dqtrxzone;
+static struct kmem_zone		*xfs_qm_dqzone;
+
 static struct lock_class_key xfs_dquot_other_class;
 
 /*
@@ -71,7 +74,7 @@ xfs_qm_dqdestroy(
 	ASSERT(list_empty(&dqp->q_lru));
 
 	mutex_destroy(&dqp->q_qlock);
-	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+	kmem_zone_free(xfs_qm_dqzone, dqp);
 	XQM_STATS_DEC(xs_qm_dquots);
 }
 
@@ -490,7 +493,7 @@ xfs_qm_dqread(
 	int			cancelflags = 0;
 
 
-	dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+	dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 
 	dqp->dq_flags = type;
 	dqp->q_core.d_id = cpu_to_be32(id);
@@ -1141,3 +1144,34 @@ xfs_dqflock_pushbuf_wait(
 out_lock:
 	xfs_dqflock(dqp);
 }
+
+int __init
+xfs_qm_init(void)
+{
+	xfs_qm_dqzone =
+		kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
+	if (!xfs_qm_dqzone)
+		goto out;
+
+	xfs_qm_dqtrxzone =
+		kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
+	if (!xfs_qm_dqtrxzone)
+		goto out_free_dqzone;
+
+	xfs_qm_init_procfs();
+	return 0;
+
+out_free_dqzone:
+	kmem_zone_destroy(xfs_qm_dqzone);
+out:
+	return -ENOMEM;
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+	xfs_qm_cleanup_procfs();
+
+	kmem_zone_destroy(xfs_qm_dqtrxzone);
+	kmem_zone_destroy(xfs_qm_dqzone);
+}
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:37:45.280134045 +0100
+++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:37:45.000000000 +0100
@@ -42,133 +42,11 @@
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct mutex	xfs_Gqm_lock;
-struct xfs_qm	*xfs_Gqm;
-
-kmem_zone_t	*qm_dqzone;
-kmem_zone_t	*qm_dqtrxzone;
-
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
 /*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
-	xfs_qm_t	*xqm;
-
-	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-
-	/*
-	 * dquot zone. we register our own low-memory callback.
-	 */
-	if (!qm_dqzone) {
-		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-						"xfs_dquots");
-		qm_dqzone = xqm->qm_dqzone;
-	} else
-		xqm->qm_dqzone = qm_dqzone;
-
-	/*
-	 * The t_dqinfo portion of transactions.
-	 */
-	if (!qm_dqtrxzone) {
-		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-						   "xfs_dqtrx");
-		qm_dqtrxzone = xqm->qm_dqtrxzone;
-	} else
-		xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-	xqm->qm_nrefs = 0;
-	return xqm;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
-	struct xfs_qm	*xqm)
-{
-	ASSERT(xqm != NULL);
-	ASSERT(xqm->qm_nrefs == 0);
-
-	kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-	struct xfs_mount *mp)
-{
-	/*
-	 * Need to lock the xfs_Gqm structure for things like this. For example,
-	 * the structure could disappear between the entry to this routine and
-	 * a HOLD operation if not locked.
-	 */
-	mutex_lock(&xfs_Gqm_lock);
-
-	if (!xfs_Gqm) {
-		xfs_Gqm = xfs_Gqm_init();
-		if (!xfs_Gqm) {
-			mutex_unlock(&xfs_Gqm_lock);
-			return ENOMEM;
-		}
-	}
-
-	/*
-	 * We can keep a list of all filesystems with quotas mounted for
-	 * debugging and statistical purposes, but ...
-	 * Just take a reference and get out.
-	 */
-	xfs_Gqm->qm_nrefs++;
-	mutex_unlock(&xfs_Gqm_lock);
-
-	return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-	struct xfs_mount *mp)
-{
-	ASSERT(xfs_Gqm);
-	ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-	/*
-	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-	 * be restarted.
-	 */
-	mutex_lock(&xfs_Gqm_lock);
-	if (--xfs_Gqm->qm_nrefs == 0) {
-		xfs_qm_destroy(xfs_Gqm);
-		xfs_Gqm = NULL;
-	}
-	mutex_unlock(&xfs_Gqm_lock);
-}
-
-/*
  * Just destroy the quotainfo structure.
  */
 void
@@ -711,13 +589,6 @@ xfs_qm_init_quotainfo(
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
-	/*
-	 * Tell XQM that we exist as soon as possible.
-	 */
-	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-		return error;
-	}
-
 	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
 
 	/*
@@ -823,17 +694,9 @@ xfs_qm_destroy_quotainfo(
 
 	qi = mp->m_quotainfo;
 	ASSERT(qi != NULL);
-	ASSERT(xfs_Gqm != NULL);
 
 	unregister_shrinker(&qi->qi_shrinker);
 
-	/*
-	 * Release the reference that XQM kept, so that we know
-	 * when the XQM structure should be freed. We cannot assume
-	 * that xfs_Gqm is non-null after this point.
-	 */
-	xfs_qm_rele_quotafs_ref(mp);
-
 	if (qi->qi_uquotaip) {
 		IRELE(qi->qi_uquotaip);
 		qi->qi_uquotaip = NULL; /* paranoia */
@@ -1392,7 +1255,6 @@ xfs_qm_quotacheck(
 		 * We must turn off quotas.
 		 */
 		ASSERT(mp->m_quotainfo != NULL);
-		ASSERT(xfs_Gqm != NULL);
 		xfs_qm_destroy_quotainfo(mp);
 		if (xfs_mount_reset_sbqflags(mp)) {
 			xfs_warn(mp,
Index: xfs/fs/xfs/xfs_qm.h
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:37:45.280134045 +0100
+++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:37:45.300133937 +0100
@@ -23,13 +23,9 @@
 #include "xfs_quota_priv.h"
 #include "xfs_qm_stats.h"
 
-struct xfs_qm;
 struct xfs_inode;
 
-extern struct mutex	xfs_Gqm_lock;
-extern struct xfs_qm	*xfs_Gqm;
-extern kmem_zone_t	*qm_dqzone;
-extern kmem_zone_t	*qm_dqtrxzone;
+extern struct kmem_zone	*xfs_qm_dqtrxzone;
 
 /*
  * This defines the unit of allocation of dquots.
@@ -43,15 +39,6 @@ extern kmem_zone_t	*qm_dqtrxzone;
 #define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
 
 /*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-	uint		 qm_nrefs;	 /* file systems with quota on */
-	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
-	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
  * Various quota information for individual filesystems.
  * The mount structure keeps a pointer to this.
  */
Index: xfs/fs/xfs/xfs_qm_bhv.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm_bhv.c	2012-02-01 12:34:07.194648849 +0100
+++ xfs/fs/xfs/xfs_qm_bhv.c	2012-02-01 12:37:45.300133937 +0100
@@ -86,21 +86,3 @@ xfs_qm_statvfs(
 		xfs_qm_dqput(dqp);
 	}
 }
-
-void __init
-xfs_qm_init(void)
-{
-	printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
-	mutex_init(&xfs_Gqm_lock);
-	xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
-	xfs_qm_cleanup_procfs();
-	if (qm_dqzone)
-		kmem_zone_destroy(qm_dqzone);
-	if (qm_dqtrxzone)
-		kmem_zone_destroy(qm_dqtrxzone);
-}
Index: xfs/fs/xfs/xfs_trans_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_trans_dquot.c	2012-02-01 12:34:07.204648795 +0100
+++ xfs/fs/xfs/xfs_trans_dquot.c	2012-02-01 12:37:45.300133937 +0100
@@ -876,7 +876,7 @@ STATIC void
 xfs_trans_alloc_dqinfo(
 	xfs_trans_t	*tp)
 {
-	tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+	tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
 }
 
 void
@@ -885,6 +885,6 @@ xfs_trans_free_dqinfo(
 {
 	if (!tp->t_dqinfo)
 		return;
-	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+	kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
 	tp->t_dqinfo = NULL;
 }
Index: xfs/fs/xfs/xfs_super.c
===================================================================
--- xfs.orig/fs/xfs/xfs_super.c	2012-02-01 12:34:07.214648740 +0100
+++ xfs/fs/xfs/xfs_super.c	2012-02-01 12:37:45.303467252 +0100
@@ -1652,13 +1652,17 @@ init_xfs_fs(void)
 	if (error)
 		goto out_cleanup_procfs;
 
-	vfs_initquota();
+	error = xfs_qm_init();
+	if (error)
+		goto out_sysctl_unregister;
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
-		goto out_sysctl_unregister;
+		goto out_qm_exit;
 	return 0;
 
+ out_qm_exit:
+	xfs_qm_exit();
  out_sysctl_unregister:
 	xfs_sysctl_unregister();
  out_cleanup_procfs:
@@ -1680,7 +1684,7 @@ init_xfs_fs(void)
 STATIC void __exit
 exit_xfs_fs(void)
 {
-	vfs_exitquota();
+	xfs_qm_exit();
 	unregister_filesystem(&xfs_fs_type);
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
Index: xfs/fs/xfs/xfs_super.h
===================================================================
--- xfs.orig/fs/xfs/xfs_super.h	2012-02-01 12:34:07.231315317 +0100
+++ xfs/fs/xfs/xfs_super.h	2012-02-01 12:37:45.303467252 +0100
@@ -21,13 +21,11 @@
 #include <linux/exportfs.h>
 
 #ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
+extern int xfs_qm_init(void);
 extern void xfs_qm_exit(void);
-# define vfs_initquota()	xfs_qm_init()
-# define vfs_exitquota()	xfs_qm_exit()
 #else
-# define vfs_initquota()	do { } while (0)
-# define vfs_exitquota()	do { } while (0)
+# define xfs_qm_init()	(0)
+# define xfs_qm_exit()	do { } while (0)
 #endif
 
 #ifdef CONFIG_XFS_POSIX_ACL

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/7] better dquot caching
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
                   ` (6 preceding siblings ...)
  2012-02-01 13:57 ` [PATCH 7/7] xfs: remove the globalk xfs_Gqm structure Christoph Hellwig
@ 2012-02-07  7:30 ` Ben Myers
  2012-02-07 13:24   ` Christoph Hellwig
  2012-02-11 20:13 ` Arkadiusz Miśkiewicz
  8 siblings, 1 reply; 19+ messages in thread
From: Ben Myers @ 2012-02-07  7:30 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

Hey Christoph,

On Wed, Feb 01, 2012 at 08:57:19AM -0500, Christoph Hellwig wrote:
> This series improves handling of large number of dquots.  It replaced
> the direct recycling of dquots from the freelist with a shrinker, removes
> the upper bound of dquots, and uses per-filesystem structures for all
> quota state, including switching from a hash to a radix-tree for lookups.
> 
> For repeated lookups of dquots out of a large pool I see improvements
> betwen 50% and 500% compared to the previous code.  All these tests
> have been performed with Q_XQUOTASYNC already disabled as it would
> change the result to much for both the old and new code.
> 
> Note that the first patch probably is a candidate for Linux 3.3, as
> the previous quota updates caused a lock order reversal in the old
> quota reclaim code.  See the actual patch for more details.

These conflict with Chandra's project quota patches.  I've started
looking at them again, but I am a little uncomfortable with the change
to the superblock...  could I get your opinion of that while I take a
look at these?

Thanks,
	Ben

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/7] better dquot caching
  2012-02-07  7:30 ` [PATCH 0/7] better dquot caching Ben Myers
@ 2012-02-07 13:24   ` Christoph Hellwig
  2012-02-07 15:23     ` Ben Myers
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-07 13:24 UTC (permalink / raw)
  To: Ben Myers; +Cc: Christoph Hellwig, xfs

On Tue, Feb 07, 2012 at 01:30:09AM -0600, Ben Myers wrote:
> These conflict with Chandra's project quota patches.  I've started
> looking at them again, but I am a little uncomfortable with the change
> to the superblock...  could I get your opinion of that while I take a
> look at these?

Can you review patch 1 for now as that's a 3.3 candidate.  I'll rebase
the rest on top his changes later.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/7] better dquot caching
  2012-02-07 13:24   ` Christoph Hellwig
@ 2012-02-07 15:23     ` Ben Myers
  0 siblings, 0 replies; 19+ messages in thread
From: Ben Myers @ 2012-02-07 15:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Tue, Feb 07, 2012 at 08:24:52AM -0500, Christoph Hellwig wrote:
> On Tue, Feb 07, 2012 at 01:30:09AM -0600, Ben Myers wrote:
> > These conflict with Chandra's project quota patches.  I've started
> > looking at them again, but I am a little uncomfortable with the change
> > to the superblock...  could I get your opinion of that while I take a
> > look at these?
> 
> Can you review patch 1 for now as that's a 3.3 candidate.  I'll rebase
> the rest on top his changes later.

Sounds reasonable.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-01 13:57 ` [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist Christoph Hellwig
@ 2012-02-09 22:03   ` Ben Myers
  2012-02-09 22:56     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Ben Myers @ 2012-02-09 22:03 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

Hey Christoph,

On Wed, Feb 01, 2012 at 08:57:20AM -0500, Christoph Hellwig wrote:
> Stop reusing dquots from the freelist when allocating new ones
> directly, and implement a shrinker that actually follows the
> specifications for the interface.  The shrinker implementation is
> still highly suboptimal at this point, but we can gradually work on
> it.

I've been messing with this and haven't gotten it to call us with
nr_to_scan other than 0 or -1 yet.  Maybe I need more dquots.
(time passes)  Ok, I have it going now.  Comments below.

> This also fixes an bug in the previous lock ordering, where we would take
> the hash and dqlist locks inside of the freelist lock against the normal
> lock ordering.  This is only solvable by introducing the dispose list,
> and thus not when using direct reclaim of unused dquots for new allocations.

FWICS this fixes a possible deadlock, xfs_qm_dqget vs xfs_qm_dqreclaim
one.
 
> As a side-effect the quota upper bound and used to free ratio values
> in /proc/fs/xfs/xqm are set to 0 as these values don't make any sense
> in the new world order.

If you don't mind I'd like to make the two changes to xfs_qm_shake that
I mention below.  If you do... no problem.  ;)

Reviewed-by: Ben Myers <bpm@sgi.com>

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/kmem.h         |    6 -
>  fs/xfs/xfs_dquot.c    |  103 ++++-------------
>  fs/xfs/xfs_qm.c       |  293 +++++++++++++++++++-------------------------------
>  fs/xfs/xfs_qm.h       |   14 --
>  fs/xfs/xfs_qm_stats.c |    4 
>  fs/xfs/xfs_trace.h    |    5 
>  6 files changed, 142 insertions(+), 283 deletions(-)
> 
> Index: xfs/fs/xfs/kmem.h
> ===================================================================
> --- xfs.orig/fs/xfs/kmem.h	2012-02-01 12:05:12.530712997 +0100
> +++ xfs/fs/xfs/kmem.h	2012-02-01 12:06:55.620154512 +0100
> @@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
>  extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
>  extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
>  
> -static inline int
> -kmem_shake_allow(gfp_t gfp_mask)
> -{
> -	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
> -}
> -
>  #endif /* __XFS_SUPPORT_KMEM_H__ */
> Index: xfs/fs/xfs/xfs_qm.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_qm.c	2012-02-01 12:05:12.540712942 +0100
> +++ xfs/fs/xfs/xfs_qm.c	2012-02-01 12:22:08.051878113 +0100
> @@ -50,7 +50,6 @@
>   */
>  struct mutex	xfs_Gqm_lock;
>  struct xfs_qm	*xfs_Gqm;
> -uint		ndquot;
>  
>  kmem_zone_t	*qm_dqzone;
>  kmem_zone_t	*qm_dqtrxzone;
> @@ -93,7 +92,6 @@ xfs_Gqm_init(void)
>  		goto out_free_udqhash;
>  
>  	hsize /= sizeof(xfs_dqhash_t);
> -	ndquot = hsize << 8;
>  
>  	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
>  	xqm->qm_dqhashmask = hsize - 1;
> @@ -137,7 +135,6 @@ xfs_Gqm_init(void)
>  		xqm->qm_dqtrxzone = qm_dqtrxzone;
>  
>  	atomic_set(&xqm->qm_totaldquots, 0);
> -	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
>  	xqm->qm_nrefs = 0;
>  	return xqm;
>  
> @@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
>  	return 0;
>  }
>  
> +STATIC void
> +xfs_qm_dqfree_one(
> +	struct xfs_dquot	*dqp)
> +{
> +	struct xfs_mount	*mp = dqp->q_mount;
> +	struct xfs_quotainfo	*qi = mp->m_quotainfo;
>  
> +	mutex_lock(&dqp->q_hash->qh_lock);
> +	list_del_init(&dqp->q_hashlist);
> +	dqp->q_hash->qh_version++;
> +	mutex_unlock(&dqp->q_hash->qh_lock);
> +
> +	mutex_lock(&qi->qi_dqlist_lock);
> +	list_del_init(&dqp->q_mplist);
> +	qi->qi_dquots--;
> +	qi->qi_dqreclaims++;
> +	mutex_unlock(&qi->qi_dqlist_lock);
>  
> -/*
> - * Pop the least recently used dquot off the freelist and recycle it.
> - */
> -STATIC struct xfs_dquot *
> -xfs_qm_dqreclaim_one(void)
> +	xfs_qm_dqdestroy(dqp);
> +}
> +
> +STATIC void
> +xfs_qm_dqreclaim_one(
> +	struct xfs_dquot	*dqp,
> +	struct list_head	*dispose_list)
>  {
> -	struct xfs_dquot	*dqp;
> -	int			restarts = 0;
> +	struct xfs_mount	*mp = dqp->q_mount;
> +	int			error;
>  
> -	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
> -restart:
> -	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
> -		struct xfs_mount *mp = dqp->q_mount;
> +	if (!xfs_dqlock_nowait(dqp))
> +		goto out_busy;
>  
> -		if (!xfs_dqlock_nowait(dqp))
> -			continue;
> +	/*
> +	 * This dquot has acquired a reference in the meantime remove it from
> +	 * the freelist and try again.
> +	 */
> +	if (dqp->q_nrefs) {
> +		xfs_dqunlock(dqp);
>  
> -		/*
> -		 * This dquot has already been grabbed by dqlookup.
> -		 * Remove it from the freelist and try again.
> -		 */
> -		if (dqp->q_nrefs) {
> -			trace_xfs_dqreclaim_want(dqp);
> -			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
> -
> -			list_del_init(&dqp->q_freelist);
> -			xfs_Gqm->qm_dqfrlist_cnt--;
> -			restarts++;
> -			goto dqunlock;
> -		}
> +		trace_xfs_dqreclaim_want(dqp);
> +		XQM_STATS_INC(xqmstats.xs_qm_dqwants);
>  
> -		ASSERT(dqp->q_hash);
> -		ASSERT(!list_empty(&dqp->q_mplist));
> +		list_del_init(&dqp->q_freelist);
> +		xfs_Gqm->qm_dqfrlist_cnt--;
> +		return;
> +	}
>  
> -		/*
> -		 * Try to grab the flush lock. If this dquot is in the process
> -		 * of getting flushed to disk, we don't want to reclaim it.
> -		 */
> -		if (!xfs_dqflock_nowait(dqp))
> -			goto dqunlock;
> +	ASSERT(dqp->q_hash);

q_hash is never cleared anymore... maybe this ASSERT is no longer
valuable.  You probably delete it later in your patchset.

> +	ASSERT(!list_empty(&dqp->q_mplist));
>  
> -		/*
> -		 * We have the flush lock so we know that this is not in the
> -		 * process of being flushed. So, if this is dirty, flush it
> -		 * DELWRI so that we don't get a freelist infested with
> -		 * dirty dquots.
> -		 */
> -		if (XFS_DQ_IS_DIRTY(dqp)) {
> -			int	error;
> +	/*
> +	 * Try to grab the flush lock. If this dquot is in the process of
> +	 * getting flushed to disk, we don't want to reclaim it.
> +	 */
> +	if (!xfs_dqflock_nowait(dqp))
> +		goto out_busy;
>  
> -			trace_xfs_dqreclaim_dirty(dqp);
> +	/*
> +	 * We have the flush lock so we know that this is not in the
> +	 * process of being flushed. So, if this is dirty, flush it
> +	 * DELWRI so that we don't get a freelist infested with
> +	 * dirty dquots.
> +	 */
> +	if (XFS_DQ_IS_DIRTY(dqp)) {
> +		trace_xfs_dqreclaim_dirty(dqp);
>  
> -			/*
> -			 * We flush it delayed write, so don't bother
> -			 * releasing the freelist lock.
> -			 */
> -			error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
> -			if (error) {
> -				xfs_warn(mp, "%s: dquot %p flush failed",
> -					__func__, dqp);
> -			}
> -			goto dqunlock;
> +		/*
> +		 * We flush it delayed write, so don't bother releasing the
> +		 * freelist lock.
> +		 */
> +		error = xfs_qm_dqflush(dqp, 0);
> +		if (error) {
> +			xfs_warn(mp, "%s: dquot %p flush failed",
> +				 __func__, dqp);
>  		}
> -		xfs_dqfunlock(dqp);
>  
>  		/*
> -		 * Prevent lookup now that we are going to reclaim the dquot.
> -		 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
> -		 * thus we can drop the lock now.
> +		 * Give the dquot another try on the freelist, as the
> +		 * flushing will take some time.
>  		 */
> -		dqp->dq_flags |= XFS_DQ_FREEING;
> -		xfs_dqunlock(dqp);
> -
> -		mutex_lock(&dqp->q_hash->qh_lock);
> -		list_del_init(&dqp->q_hashlist);
> -		dqp->q_hash->qh_version++;
> -		mutex_unlock(&dqp->q_hash->qh_lock);
> -
> -		mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
> -		list_del_init(&dqp->q_mplist);
> -		mp->m_quotainfo->qi_dquots--;
> -		mp->m_quotainfo->qi_dqreclaims++;
> -		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
> -
> -		ASSERT(dqp->q_nrefs == 0);
> -		list_del_init(&dqp->q_freelist);
> -		xfs_Gqm->qm_dqfrlist_cnt--;
> -
> -		mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
> -		return dqp;
> -dqunlock:
> -		xfs_dqunlock(dqp);
> -		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
> -			break;
> -		goto restart;
> +		goto out_busy;
>  	}
> +	xfs_dqfunlock(dqp);
>  
> -	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
> -	return NULL;
> -}
> +	/*
> +	 * Prevent lookups now that we are past the point of no return.
> +	 */
> +	dqp->dq_flags |= XFS_DQ_FREEING;
> +	xfs_dqunlock(dqp);
>  
> -/*
> - * Traverse the freelist of dquots and attempt to reclaim a maximum of
> - * 'howmany' dquots. This operation races with dqlookup(), and attempts to
> - * favor the lookup function ...
> - */
> -STATIC int
> -xfs_qm_shake_freelist(
> -	int	howmany)
> -{
> -	int		nreclaimed = 0;
> -	xfs_dquot_t	*dqp;
> +	ASSERT(dqp->q_nrefs == 0);
> +	list_move_tail(&dqp->q_freelist, dispose_list);
> +	xfs_Gqm->qm_dqfrlist_cnt--;
> +
> +	trace_xfs_dqreclaim_done(dqp);
> +	XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
> +	return;
>  
> -	if (howmany <= 0)
> -		return 0;
> +out_busy:
> +	xfs_dqunlock(dqp);
>  
> -	while (nreclaimed < howmany) {
> -		dqp = xfs_qm_dqreclaim_one();
> -		if (!dqp)
> -			return nreclaimed;
> -		xfs_qm_dqdestroy(dqp);
> -		nreclaimed++;
> -	}
> -	return nreclaimed;
> +	/*
> +	 * Move the dquot to the tail of the list so that we don't spin on it.
> +	 */
> +	list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
> +
> +	trace_xfs_dqreclaim_busy(dqp);
> +	XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
>  }
>  
> -/*
> - * The kmem_shake interface is invoked when memory is running low.
> - */
> -/* ARGSUSED */
>  STATIC int
>  xfs_qm_shake(
> -	struct shrinker	*shrink,
> -	struct shrink_control *sc)
> +	struct shrinker		*shrink,
> +	struct shrink_control	*sc)
>  {
> -	int	ndqused, nfree, n;
> -	gfp_t gfp_mask = sc->gfp_mask;
> -
> -	if (!kmem_shake_allow(gfp_mask))
> -		return 0;
> -	if (!xfs_Gqm)
> -		return 0;
> -
> -	nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
> -	/* incore dquots in all f/s's */
> -	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
> -
> -	ASSERT(ndqused >= 0);
> +	int			nr_to_scan = sc->nr_to_scan;
> +	LIST_HEAD		(dispose_list);
> +	struct xfs_dquot	*dqp;
>  
> -	if (nfree <= ndqused && nfree < ndquot)
> +	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
>  		return 0;
> +	if (!nr_to_scan)
> +		goto out;

I suggest something more like:

	if (!nr_to_scan)
		goto out;
        if ((sc->gfp_mask...
		return -1;

>  
> -	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
> -	n = nfree - ndqused - ndquot;		/* # over target */
> -
> -	return xfs_qm_shake_freelist(MAX(nfree, n));
> -}
> -
> -
> -/*------------------------------------------------------------------*/
> -
> -/*
> - * Return a new incore dquot. Depending on the number of
> - * dquots in the system, we either allocate a new one on the kernel heap,
> - * or reclaim a free one.
> - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
> - * to reclaim an existing one from the freelist.
> - */
> -boolean_t
> -xfs_qm_dqalloc_incore(
> -	xfs_dquot_t **O_dqpp)
> -{
> -	xfs_dquot_t	*dqp;
> -
> -	/*
> -	 * Check against high water mark to see if we want to pop
> -	 * a nincompoop dquot off the freelist.
> -	 */
> -	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
> -		/*
> -		 * Try to recycle a dquot from the freelist.
> -		 */
> -		if ((dqp = xfs_qm_dqreclaim_one())) {
> -			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
> -			/*
> -			 * Just zero the core here. The rest will get
> -			 * reinitialized by caller. XXX we shouldn't even
> -			 * do this zero ...
> -			 */
> -			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
> -			*O_dqpp = dqp;
> -			return B_FALSE;
> -		}
> -		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
> +	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
> +	while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
> +		if (nr_to_scan-- <= 0)
> +			break;
> +		dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
> +				       q_freelist);
> +		xfs_qm_dqreclaim_one(dqp, &dispose_list);
>  	}
> +	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
>  
> -	/*
> -	 * Allocate a brand new dquot on the kernel heap and return it
> -	 * to the caller to initialize.
> -	 */
> -	ASSERT(xfs_Gqm->qm_dqzone != NULL);
> -	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
> -	atomic_inc(&xfs_Gqm->qm_totaldquots);
> -
> -	return B_TRUE;
> +	while (!list_empty(&dispose_list)) {
> +		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
> +				       q_freelist);
> +		list_del_init(&dqp->q_freelist);
> +		xfs_qm_dqfree_one(dqp);
> +	}
> +out:
> +	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;

return atomic_read(&xfs_Gqm->qm_totaldquots);

This works well for me and seems to be closer to the shrinker interface
as documented:

/*
 * A callback you can register to apply pressure to ageable caches.
 *
 * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
 * and a 'gfpmask'.  It should look through the least-recently-used
 * 'nr_to_scan' entries and attempt to free them up.  It should return
 * the number of objects which remain in the cache.  If it returns -1,
 * it means
 * it cannot do any scanning at this time (eg. there is a risk of
 * deadlock).
 * The callback must not return -1 if nr_to_scan is zero.
 *
 * The 'gfpmask' refers to the allocation we are currently trying to
 * fulfil.
 *
 * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
 * querying the cache size, so a fastpath for that case is appropriate.
 */
struct shrinker {
...


>  }
>  
> -
>  /*
>   * Start a transaction and write the incore superblock changes to
>   * disk. flags parameter indicates which fields have changed.
> Index: xfs/fs/xfs/xfs_dquot.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_dquot.c	2012-02-01 12:05:12.554046204 +0100
> +++ xfs/fs/xfs/xfs_dquot.c	2012-02-01 12:22:02.135243499 +0100
> @@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33;
>  static struct lock_class_key xfs_dquot_other_class;
>  
>  /*
> - * Allocate and initialize a dquot. We don't always allocate fresh memory;
> - * we try to reclaim a free dquot if the number of incore dquots are above
> - * a threshold.
> - * The only field inside the core that gets initialized at this point
> - * is the d_id field. The idea is to fill in the entire q_core
> - * when we read in the on disk dquot.
> - */
> -STATIC xfs_dquot_t *
> -xfs_qm_dqinit(
> -	xfs_mount_t  *mp,
> -	xfs_dqid_t   id,
> -	uint	     type)
> -{
> -	xfs_dquot_t	*dqp;
> -	boolean_t	brandnewdquot;
> -
> -	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
> -	dqp->dq_flags = type;
> -	dqp->q_core.d_id = cpu_to_be32(id);
> -	dqp->q_mount = mp;
> -
> -	/*
> -	 * No need to re-initialize these if this is a reclaimed dquot.
> -	 */
> -	if (brandnewdquot) {
> -		INIT_LIST_HEAD(&dqp->q_freelist);
> -		mutex_init(&dqp->q_qlock);
> -		init_waitqueue_head(&dqp->q_pinwait);
> -
> -		/*
> -		 * Because we want to use a counting completion, complete
> -		 * the flush completion once to allow a single access to
> -		 * the flush completion without blocking.
> -		 */
> -		init_completion(&dqp->q_flush);
> -		complete(&dqp->q_flush);
> -
> -		trace_xfs_dqinit(dqp);
> -	} else {
> -		/*
> -		 * Only the q_core portion was zeroed in dqreclaim_one().
> -		 * So, we need to reset others.
> -		 */
> -		dqp->q_nrefs = 0;
> -		dqp->q_blkno = 0;
> -		INIT_LIST_HEAD(&dqp->q_mplist);
> -		INIT_LIST_HEAD(&dqp->q_hashlist);
> -		dqp->q_bufoffset = 0;
> -		dqp->q_fileoffset = 0;
> -		dqp->q_transp = NULL;
> -		dqp->q_gdquot = NULL;
> -		dqp->q_res_bcount = 0;
> -		dqp->q_res_icount = 0;
> -		dqp->q_res_rtbcount = 0;
> -		atomic_set(&dqp->q_pincount, 0);
> -		dqp->q_hash = NULL;
> -		ASSERT(list_empty(&dqp->q_freelist));
> -
> -		trace_xfs_dqreuse(dqp);
> -	}
> -
> -	/*
> -	 * In either case we need to make sure group quotas have a different
> -	 * lock class than user quotas, to make sure lockdep knows we can
> -	 * locks of one of each at the same time.
> -	 */
> -	if (!(type & XFS_DQ_USER))
> -		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
> -
> -	/*
> -	 * log item gets initialized later
> -	 */
> -	return (dqp);
> -}
> -
> -/*
>   * This is called to free all the memory associated with a dquot
>   */
>  void
> @@ -567,7 +491,32 @@ xfs_qm_dqread(
>  	int			error;
>  	int			cancelflags = 0;
>  
> -	dqp = xfs_qm_dqinit(mp, id, type);
> +
> +	dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
> +
> +	dqp->dq_flags = type;
> +	dqp->q_core.d_id = cpu_to_be32(id);
> +	dqp->q_mount = mp;
> +	INIT_LIST_HEAD(&dqp->q_freelist);
> +	mutex_init(&dqp->q_qlock);
> +	init_waitqueue_head(&dqp->q_pinwait);
> +
> +	/*
> +	 * Because we want to use a counting completion, complete
> +	 * the flush completion once to allow a single access to
> +	 * the flush completion without blocking.
> +	 */
> +	init_completion(&dqp->q_flush);
> +	complete(&dqp->q_flush);
> +
> +	/*
> +	 * Make sure group quotas have a different lock class than user
> +	 * quotas.
> +	 */
> +	if (!(type & XFS_DQ_USER))
> +		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
> +
> +	atomic_inc(&xfs_Gqm->qm_totaldquots);
>  
>  	trace_xfs_dqread(dqp);
>  
> Index: xfs/fs/xfs/xfs_qm.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_qm.h	2012-02-01 12:05:12.564046150 +0100
> +++ xfs/fs/xfs/xfs_qm.h	2012-02-01 12:22:02.171909967 +0100
> @@ -26,24 +26,12 @@
>  struct xfs_qm;
>  struct xfs_inode;
>  
> -extern uint		ndquot;
>  extern struct mutex	xfs_Gqm_lock;
>  extern struct xfs_qm	*xfs_Gqm;
>  extern kmem_zone_t	*qm_dqzone;
>  extern kmem_zone_t	*qm_dqtrxzone;
>  
>  /*
> - * Ditto, for xfs_qm_dqreclaim_one.
> - */
> -#define XFS_QM_RECLAIM_MAX_RESTARTS	4
> -
> -/*
> - * Ideal ratio of free to in use dquots. Quota manager makes an attempt
> - * to keep this balance.
> - */
> -#define XFS_QM_DQFREE_RATIO		2
> -
> -/*
>   * Dquot hashtable constants/threshold values.
>   */
>  #define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
> @@ -74,7 +62,6 @@ typedef struct xfs_qm {
>  	int		 qm_dqfrlist_cnt;
>  	atomic_t	 qm_totaldquots; /* total incore dquots */
>  	uint		 qm_nrefs;	 /* file systems with quota on */
> -	int		 qm_dqfree_ratio;/* ratio of free to inuse dquots */
>  	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
>  	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
>  } xfs_qm_t;
> @@ -143,7 +130,6 @@ extern int		xfs_qm_quotacheck(xfs_mount_
>  extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
>  
>  /* dquot stuff */
> -extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
>  extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
>  extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
>  
> Index: xfs/fs/xfs/xfs_trace.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_trace.h	2012-02-01 12:05:12.577379410 +0100
> +++ xfs/fs/xfs/xfs_trace.h	2012-02-01 12:06:55.623487828 +0100
> @@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
>  DEFINE_DQUOT_EVENT(xfs_dqadjust);
>  DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
>  DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
> -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
> +DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
> +DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
>  DEFINE_DQUOT_EVENT(xfs_dqattach_found);
>  DEFINE_DQUOT_EVENT(xfs_dqattach_get);
> -DEFINE_DQUOT_EVENT(xfs_dqinit);
> -DEFINE_DQUOT_EVENT(xfs_dqreuse);
>  DEFINE_DQUOT_EVENT(xfs_dqalloc);
>  DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
>  DEFINE_DQUOT_EVENT(xfs_dqread);
> Index: xfs/fs/xfs/xfs_qm_stats.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_qm_stats.c	2012-02-01 12:05:12.590712672 +0100
> +++ xfs/fs/xfs/xfs_qm_stats.c	2012-02-01 12:22:02.185243229 +0100
> @@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file
>  {
>  	/* maximum; incore; ratio free to inuse; freelist */
>  	seq_printf(m, "%d\t%d\t%d\t%u\n",
> -			ndquot,
> +			0,
>  			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
> -			xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
> +			0,
>  			xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
>  	return 0;
>  }
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-09 22:03   ` Ben Myers
@ 2012-02-09 22:56     ` Christoph Hellwig
  2012-02-09 23:13       ` Ben Myers
  2012-02-10  1:49       ` Dave Chinner
  0 siblings, 2 replies; 19+ messages in thread
From: Christoph Hellwig @ 2012-02-09 22:56 UTC (permalink / raw)
  To: Ben Myers; +Cc: Christoph Hellwig, xfs

On Thu, Feb 09, 2012 at 04:03:20PM -0600, Ben Myers wrote:
> I've been messing with this and haven't gotten it to call us with
> nr_to_scan other than 0 or -1 yet.  Maybe I need more dquots.
> (time passes)  Ok, I have it going now.  Comments below.

To actually hit this I hade to use a VM with very little memory assigned
to it, and then creat lots of dquots and causes memory pressure.

I have about 20.000 users on it, and I did a quota report for all of
them while catting one block device into another using buffered I/O.

> 
> > This also fixes an bug in the previous lock ordering, where we would take
> > the hash and dqlist locks inside of the freelist lock against the normal
> > lock ordering.  This is only solvable by introducing the dispose list,
> > and thus not when using direct reclaim of unused dquots for new allocations.
> 
> FWICS this fixes a possible deadlock, xfs_qm_dqget vs xfs_qm_dqreclaim
> one.

Yes.

> > +	LIST_HEAD		(dispose_list);
> > +	struct xfs_dquot	*dqp;
> >  
> > -	if (nfree <= ndqused && nfree < ndquot)
> > +	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
> >  		return 0;
> > +	if (!nr_to_scan)
> > +		goto out;
> 
> I suggest something more like:
> 
> 	if (!nr_to_scan)
> 		goto out;
>         if ((sc->gfp_mask...
> 		return -1;

Why?  Counting the number of objects when we can't actually do anything
is just a waste of time, and -1 vs 0 for the sizing pass seem to be
treateds the same in the calling code.

> > -
> > -	return B_TRUE;
> > +	while (!list_empty(&dispose_list)) {
> > +		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
> > +				       q_freelist);
> > +		list_del_init(&dqp->q_freelist);
> > +		xfs_qm_dqfree_one(dqp);
> > +	}
> > +out:
> > +	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
> 
> return atomic_read(&xfs_Gqm->qm_totaldquots);
> 
> This works well for me and seems to be closer to the shrinker interface
> as documented:

It's pointless - we can only apply pressure to dquots that are on the
freelist.  No amount of shaking will allow us to reclaim a referenced
dquot.

>  * The callback must not return -1 if nr_to_scan is zero.

this is against your suggestion of using -1 for the estimation pass
above, btw.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-09 22:56     ` Christoph Hellwig
@ 2012-02-09 23:13       ` Ben Myers
  2012-02-10  1:52         ` Dave Chinner
  2012-02-10  1:49       ` Dave Chinner
  1 sibling, 1 reply; 19+ messages in thread
From: Ben Myers @ 2012-02-09 23:13 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

Hey Christoph,

On Thu, Feb 09, 2012 at 05:56:26PM -0500, Christoph Hellwig wrote:
> On Thu, Feb 09, 2012 at 04:03:20PM -0600, Ben Myers wrote:
> > I've been messing with this and haven't gotten it to call us with
> > nr_to_scan other than 0 or -1 yet.  Maybe I need more dquots.
> > (time passes)  Ok, I have it going now.  Comments below.
> 
> To actually hit this I hade to use a VM with very little memory assigned
> to it, and then creat lots of dquots and causes memory pressure.
> 
> I have about 20.000 users on it, and I did a quota report for all of
> them while catting one block device into another using buffered I/O.

Ah, I see.

> > > +	LIST_HEAD		(dispose_list);
> > > +	struct xfs_dquot	*dqp;
> > >  
> > > -	if (nfree <= ndqused && nfree < ndquot)
> > > +	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
> > >  		return 0;
> > > +	if (!nr_to_scan)
> > > +		goto out;
> > 
> > I suggest something more like:
> > 
> > 	if (!nr_to_scan)
> > 		goto out;
> >         if ((sc->gfp_mask...
> > 		return -1;
> 
> Why?  Counting the number of objects when we can't actually do anything
> is just a waste of time,
> and -1 vs 0 for the sizing pass seem to be
> treateds the same in the calling code.

That's a good point, but the shrinker interface has documented that
you're supposed to return -1 in this situation... and that you aren't
allowed to return -1 when nr_to_scan == 0.

> > > -
> > > -	return B_TRUE;
> > > +	while (!list_empty(&dispose_list)) {
> > > +		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
> > > +				       q_freelist);
> > > +		list_del_init(&dqp->q_freelist);
> > > +		xfs_qm_dqfree_one(dqp);
> > > +	}
> > > +out:
> > > +	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
> > 
> > return atomic_read(&xfs_Gqm->qm_totaldquots);
> > 
> > This works well for me and seems to be closer to the shrinker interface
> > as documented:
> 
> It's pointless - we can only apply pressure to dquots that are on the
> freelist.  No amount of shaking will allow us to reclaim a referenced
> dquot.

Sure... then it should be:

return atomic_read(&xfs_Gqm->qm_frlist_cnt);

What is the value of the additional calculation?

> >  * The callback must not return -1 if nr_to_scan is zero.
> 
> this is against your suggestion of using -1 for the estimation pass
> above, btw.

No it isn't... if nr_to_scan == 0 we would have jumped to 'out' and
returned the count.

Thanks,
	Ben

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-09 22:56     ` Christoph Hellwig
  2012-02-09 23:13       ` Ben Myers
@ 2012-02-10  1:49       ` Dave Chinner
  2012-02-10 16:56         ` Ben Myers
  1 sibling, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2012-02-10  1:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Ben Myers, xfs

On Thu, Feb 09, 2012 at 05:56:26PM -0500, Christoph Hellwig wrote:
> On Thu, Feb 09, 2012 at 04:03:20PM -0600, Ben Myers wrote:
> > > +	LIST_HEAD		(dispose_list);
> > > +	struct xfs_dquot	*dqp;
> > >  
> > > -	if (nfree <= ndqused && nfree < ndquot)
> > > +	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
> > >  		return 0;
> > > +	if (!nr_to_scan)
> > > +		goto out;
> > 
> > I suggest something more like:
> > 
> > 	if (!nr_to_scan)
> > 		goto out;
> >         if ((sc->gfp_mask...
> > 		return -1;
> 
> Why?  Counting the number of objects when we can't actually do anything
> is just a waste of time, and -1 vs 0 for the sizing pass seem to be
> treateds the same in the calling code.

.....

> >  * The callback must not return -1 if nr_to_scan is zero.
> 
> this is against your suggestion of using -1 for the estimation pass
> above, btw.

Technically, if the shrinker cannot make progress or the gfp mask
means it cannot enter the filesystem code, then it should return -1,
not zero. Yes, the calc code treats 0 and -1 the same because it is
defensive - for the calculation a shrinker can validly return 0 to
mean "I have no work to do" rather than "I cannot do any work in
this context", but both mean the same thing - don't try to run the
shrinker here.

However, the later shrinker callout to do work (i.e. nr_to_scan !=
0) relies on this distinction to break out of the shrink loop early
whenteh shrinker says "can't do any work". If you just keep
returning zero there then it will just looping uselessly until the
scan count runs out.

The interface is a piece of shit, and I need to get back to my patch
series that fixes this all up by separating the calculation callback
from the work callback...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-09 23:13       ` Ben Myers
@ 2012-02-10  1:52         ` Dave Chinner
  2012-02-10 16:48           ` Ben Myers
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2012-02-10  1:52 UTC (permalink / raw)
  To: Ben Myers; +Cc: Christoph Hellwig, xfs

On Thu, Feb 09, 2012 at 05:13:46PM -0600, Ben Myers wrote:
> On Thu, Feb 09, 2012 at 05:56:26PM -0500, Christoph Hellwig wrote:
> > On Thu, Feb 09, 2012 at 04:03:20PM -0600, Ben Myers wrote:
> > > > -
> > > > -	return B_TRUE;
> > > > +	while (!list_empty(&dispose_list)) {
> > > > +		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
> > > > +				       q_freelist);
> > > > +		list_del_init(&dqp->q_freelist);
> > > > +		xfs_qm_dqfree_one(dqp);
> > > > +	}
> > > > +out:
> > > > +	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
> > > 
> > > return atomic_read(&xfs_Gqm->qm_totaldquots);
> > > 
> > > This works well for me and seems to be closer to the shrinker interface
> > > as documented:
> > 
> > It's pointless - we can only apply pressure to dquots that are on the
> > freelist.  No amount of shaking will allow us to reclaim a referenced
> > dquot.
> 
> Sure... then it should be:
> 
> return atomic_read(&xfs_Gqm->qm_frlist_cnt);
> 
> What is the value of the additional calculation?

It's applying the user controllable vfs_cache_pressure setting to
the reclaim weight. That is, if the user wants to reclaim
inode/dentry/dquot slab caches faster than the page cache (i.e.
perfer data caching over metadata caching) or vice cersa, then the
change the sysctl value and shrinkers should then take that into
account....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-10  1:52         ` Dave Chinner
@ 2012-02-10 16:48           ` Ben Myers
  0 siblings, 0 replies; 19+ messages in thread
From: Ben Myers @ 2012-02-10 16:48 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Fri, Feb 10, 2012 at 12:52:33PM +1100, Dave Chinner wrote:
> On Thu, Feb 09, 2012 at 05:13:46PM -0600, Ben Myers wrote:
> > On Thu, Feb 09, 2012 at 05:56:26PM -0500, Christoph Hellwig wrote:
> > > On Thu, Feb 09, 2012 at 04:03:20PM -0600, Ben Myers wrote:
> > > > > -
> > > > > -	return B_TRUE;
> > > > > +	while (!list_empty(&dispose_list)) {
> > > > > +		dqp = list_first_entry(&dispose_list, struct xfs_dquot,
> > > > > +				       q_freelist);
> > > > > +		list_del_init(&dqp->q_freelist);
> > > > > +		xfs_qm_dqfree_one(dqp);
> > > > > +	}
> > > > > +out:
> > > > > +	return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
> > > > 
> > > > return atomic_read(&xfs_Gqm->qm_totaldquots);
> > > > 
> > > > This works well for me and seems to be closer to the shrinker interface
> > > > as documented:
> > > 
> > > It's pointless - we can only apply pressure to dquots that are on the
> > > freelist.  No amount of shaking will allow us to reclaim a referenced
> > > dquot.
> > 
> > Sure... then it should be:
> > 
> > return atomic_read(&xfs_Gqm->qm_frlist_cnt);
> > 
> > What is the value of the additional calculation?
> 
> It's applying the user controllable vfs_cache_pressure setting to
> the reclaim weight. That is, if the user wants to reclaim
> inode/dentry/dquot slab caches faster than the page cache (i.e.
> perfer data caching over metadata caching) or vice cersa, then the
> change the sysctl value and shrinkers should then take that into
> account....

Aha.  Thanks for the explanation.  It sounds like including
sysclt_vfs_cache_pressure in this calculation is a good thing.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist
  2012-02-10  1:49       ` Dave Chinner
@ 2012-02-10 16:56         ` Ben Myers
  0 siblings, 0 replies; 19+ messages in thread
From: Ben Myers @ 2012-02-10 16:56 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Fri, Feb 10, 2012 at 12:49:47PM +1100, Dave Chinner wrote:
> On Thu, Feb 09, 2012 at 05:56:26PM -0500, Christoph Hellwig wrote:
> > On Thu, Feb 09, 2012 at 04:03:20PM -0600, Ben Myers wrote:
> > > > +	LIST_HEAD		(dispose_list);
> > > > +	struct xfs_dquot	*dqp;
> > > >  
> > > > -	if (nfree <= ndqused && nfree < ndquot)
> > > > +	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
> > > >  		return 0;
> > > > +	if (!nr_to_scan)
> > > > +		goto out;
> > > 
> > > I suggest something more like:
> > > 
> > > 	if (!nr_to_scan)
> > > 		goto out;
> > >         if ((sc->gfp_mask...
> > > 		return -1;
> > 
> > Why?  Counting the number of objects when we can't actually do anything
> > is just a waste of time, and -1 vs 0 for the sizing pass seem to be
> > treateds the same in the calling code.
> 
> .....
> 
> > >  * The callback must not return -1 if nr_to_scan is zero.
> > 
> > this is against your suggestion of using -1 for the estimation pass
> > above, btw.
> 
> Technically, if the shrinker cannot make progress or the gfp mask
> means it cannot enter the filesystem code, then it should return -1,
> not zero. Yes, the calc code treats 0 and -1 the same because it is
> defensive - for the calculation a shrinker can validly return 0 to
> mean "I have no work to do" rather than "I cannot do any work in
> this context", but both mean the same thing - don't try to run the
> shrinker here.
> 
> However, the later shrinker callout to do work (i.e. nr_to_scan !=
> 0) relies on this distinction to break out of the shrink loop early
> whenteh shrinker says "can't do any work". If you just keep
> returning zero there then it will just looping uselessly until the
> scan count runs out.
> 
> The interface is a piece of shit, and I need to get back to my patch
> series that fixes this all up by separating the calculation callback
> from the work callback...

Ok... so it'll be sorted out in a different patch.  ;)

-Ben

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/7] better dquot caching
  2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
                   ` (7 preceding siblings ...)
  2012-02-07  7:30 ` [PATCH 0/7] better dquot caching Ben Myers
@ 2012-02-11 20:13 ` Arkadiusz Miśkiewicz
  8 siblings, 0 replies; 19+ messages in thread
From: Arkadiusz Miśkiewicz @ 2012-02-11 20:13 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Wednesday 01 of February 2012, Christoph Hellwig wrote:
> This series improves handling of large number of dquots.  It replaced
> the direct recycling of dquots from the freelist with a shrinker, removes
> the upper bound of dquots, and uses per-filesystem structures for all
> quota state, including switching from a hash to a radix-tree for lookups.
> 
> For repeated lookups of dquots out of a large pool I see improvements
> betwen 50% and 500% compared to the previous code.  All these tests
> have been performed with Q_XQUOTASYNC already disabled as it would
> change the result to much for both the old and new code.

Thanks for this improvement!

On 3.0.17 my script that executes ~130k of Q_XQUOTAGET lookups does it's job 
in 1m35s-1m45s and most of that is sys time.

3.3.0git + quota patchsets does the same thing in 23s and when cache gets hot 
it goes down to 9s-10s but sys time is only ~1s.

Note that I'm using xfs_quota patched to avoid doing XQUOTASYNC, so that time 
is mostly XQUOTAGET also on 3.0.17 kernel.

Of course the best test would be to test 3.3 without and with quota patchsets 
but I don't think there were any other improvements between 3.0 and 3.3 that 
could get such speed improvement for tons of XQUOTAGET calls.

ps. my 3.3.0git kernel had this patchset and "[PATCH 0/3] include reservations 
in quota reportin" included


3.0.17
real    1m34.970s
user    0m4.026s
sys     1m25.081s

real    1m45.365s
user    0m4.106s
sys     1m35.380s

real    1m45.835s
user    0m3.936s
sys     1m35.854s

real    1m45.608s
user    0m4.023s
sys     1m34.181s

real    1m36.207s
user    0m3.986s
sys     1m26.068s

real    1m45.969s
user    0m4.016s
sys     1m35.827s

real    1m35.608s
user    0m3.836s
sys     1m25.534s

real    1m45.882s
user    0m3.986s
sys     1m35.700s

real    1m45.913s
user    0m3.876s
sys     1m36.064s

real    1m45.702s
user    0m4.010s
sys     1m35.937s


3.3git + quota patchsets
real    0m23.811s
user    0m3.760s
sys     0m1.373s

real    0m9.712s
user    0m3.740s
sys     0m1.047s

real    0m9.276s
user    0m3.713s
sys     0m1.113s

real    0m9.880s
user    0m3.870s
sys     0m0.937s

real    0m11.841s
user    0m3.740s
sys     0m1.077s

real    0m10.086s
user    0m3.760s
sys     0m1.057s

real    0m10.170s
user    0m3.790s
sys     0m1.063s

real    0m9.457s
user    0m3.686s
sys     0m1.100s

real    0m10.150s
user    0m3.756s
sys     0m1.073s

real    0m9.331s
user    0m3.810s
sys     0m1.007s
-- 
Arkadiusz Miśkiewicz        PLD/Linux Team
arekm / maven.pl            http://ftp.pld-linux.org/

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-02-11 20:13 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-01 13:57 [PATCH 0/7] better dquot caching Christoph Hellwig
2012-02-01 13:57 ` [PATCH 1/7] xfs: use a normal shrinker for the dquot freelist Christoph Hellwig
2012-02-09 22:03   ` Ben Myers
2012-02-09 22:56     ` Christoph Hellwig
2012-02-09 23:13       ` Ben Myers
2012-02-10  1:52         ` Dave Chinner
2012-02-10 16:48           ` Ben Myers
2012-02-10  1:49       ` Dave Chinner
2012-02-10 16:56         ` Ben Myers
2012-02-01 13:57 ` [PATCH 2/7] xfs: per-filesystem dquot LRU lists Christoph Hellwig
2012-02-01 13:57 ` [PATCH 3/7] xfs: use per-filesystem radix trees for dquot lookup Christoph Hellwig
2012-02-01 13:57 ` [PATCH 4/7] xfs: remove the per-filesystem list of dquots Christoph Hellwig
2012-02-01 13:57 ` [PATCH 5/7] xfs: use per-cpu data for the quota statistics Christoph Hellwig
2012-02-01 13:57 ` [PATCH 6/7] xfs: user per-cpu stats for the total dquot numbers Christoph Hellwig
2012-02-01 13:57 ` [PATCH 7/7] xfs: remove the globalk xfs_Gqm structure Christoph Hellwig
2012-02-07  7:30 ` [PATCH 0/7] better dquot caching Ben Myers
2012-02-07 13:24   ` Christoph Hellwig
2012-02-07 15:23     ` Ben Myers
2012-02-11 20:13 ` Arkadiusz Miśkiewicz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.