* [PATCH] quota: Make quota stat accounting lockless.
@ 2010-04-23 5:31 Dmitry Monakhov
2010-04-26 15:16 ` Jan Kara
0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-04-23 5:31 UTC (permalink / raw)
To: linux-fsdevel; +Cc: Jan Kara
[-- Attachment #1: Type: text/plain, Size: 128 bytes --]
I've written this patch long time ago, it is pretty simple, and
allow more non obvious locking optimization to be done later.
[-- Attachment #2: 0001-quota-Make-quota-stat-accounting-lockless.patch --]
[-- Type: text/plain, Size: 10841 bytes --]
>From 234b13091fcb16966c2abcd936c8a0ed99822952 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 23 Apr 2010 09:06:41 +0400
Subject: [PATCH] quota: Make quota stat accounting lockless.
Quota stats it mostly writable data structure. Let's alloc percpu
bucket for each value.
NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
and may return inconsistent value. But this is ok since absolute
accuracy is not required.
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
fs/quota/dquot.c | 122 +++++++++++++++++++++++++++++++++++--------------
fs/quota/quota_tree.c | 4 +-
fs/quota/quota_v1.c | 4 +-
include/linux/quota.h | 26 ++++++----
4 files changed, 106 insertions(+), 50 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 05c590e..aa78e26 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
/*
* There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats, dqstats structure containing statistics about the lists
+ * and quota formats.
* dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
* also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -132,6 +132,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
+#ifdef CONFIG_SMP
+struct dqstats *dqstats_pcpu;
+#endif
+struct dqstats dqstats;
+
static char *quotatypes[] = INITQFNAMES;
static struct quota_format_type *quota_formats; /* List of registered formats */
static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
@@ -273,7 +278,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
static inline void put_dquot_last(struct dquot *dquot)
{
list_add_tail(&dquot->dq_free, &free_dquots);
- dqstats.free_dquots++;
+ dqstats_inc(DQST_FREE_DQUOTS);
}
static inline void remove_free_dquot(struct dquot *dquot)
@@ -281,7 +286,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
if (list_empty(&dquot->dq_free))
return;
list_del_init(&dquot->dq_free);
- dqstats.free_dquots--;
+ dqstats_dec(DQST_FREE_DQUOTS);
}
static inline void put_inuse(struct dquot *dquot)
@@ -289,12 +294,12 @@ static inline void put_inuse(struct dquot *dquot)
/* We add to the back of inuse list so we don't have to restart
* when traversing this list and we block */
list_add_tail(&dquot->dq_inuse, &inuse_list);
- dqstats.allocated_dquots++;
+ dqstats_inc(DQST_ALLOC_DQUOTS);
}
static inline void remove_inuse(struct dquot *dquot)
{
- dqstats.allocated_dquots--;
+ dqstats_dec(DQST_ALLOC_DQUOTS);
list_del(&dquot->dq_inuse);
}
/*
@@ -559,8 +564,8 @@ int dquot_scan_active(struct super_block *sb,
continue;
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
ret = fn(dquot, priv);
@@ -605,8 +610,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
* holding reference so we can safely just increase
* use count */
atomic_inc(&dquot->dq_count);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
sb->dq_op->write_dquot(dquot);
dqput(dquot);
spin_lock(&dq_list_lock);
@@ -618,9 +623,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
&& info_dirty(&dqopt->info[cnt]))
sb->dq_op->write_info(sb, cnt);
- spin_lock(&dq_list_lock);
- dqstats.syncs++;
- spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_SYNCS);
mutex_unlock(&dqopt->dqonoff_mutex);
if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -684,7 +687,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
prune_dqcache(nr);
spin_unlock(&dq_list_lock);
}
- return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+ return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dqcache_shrinker = {
@@ -712,10 +715,7 @@ void dqput(struct dquot *dquot)
BUG();
}
#endif
-
- spin_lock(&dq_list_lock);
- dqstats.drops++;
- spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_DROPS);
we_slept:
spin_lock(&dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
@@ -832,15 +832,16 @@ we_slept:
put_inuse(dquot);
/* hash it first so it can be found */
insert_dquot_hash(dquot);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
+
} else {
if (!atomic_read(&dquot->dq_count))
remove_free_dquot(dquot);
atomic_inc(&dquot->dq_count);
- dqstats.cache_hits++;
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_CACHE_HITS);
+ dqstats_inc(DQST_LOOKUPS);
}
/* Wait for dq_lock - after this we know that either dquot_release() is
* already finished or it will be canceled due to dq_count > 1 test */
@@ -2474,68 +2475,112 @@ const struct quotactl_ops vfs_quotactl_ops = {
.set_dqblk = vfs_set_dqblk
};
+void dqstats_inc(unsigned int type)
+{
+#ifdef CONFIG_SMP
+ per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
+#else
+ dqstats[type]++;
+#endif
+}
+
+void dqstats_dec(unsigned int type)
+{
+#ifdef CONFIG_SMP
+ per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
+#else
+ dqstats[type]--;
+#endif
+}
+
+int dqstats_read(unsigned int type)
+{
+ int count = 0;
+ int cpu;
+#ifdef CONFIG_SMP
+ for_each_possible_cpu(cpu)
+ count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
+ /* Statistics reading is racy, but absolute accuracy isn't required */
+ if (count < 0)
+ count = 0;
+#else
+ count = dqstats[type];
+#endif
+ return count;
+}
+
+static int do_proc_dqstats(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+ /* Update global table */
+ unsigned int type = (int *)table->data - dqstats.stat;
+ dqstats.stat[type] = dqstats_read(type);
+#endif
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
static ctl_table fs_dqstats_table[] = {
{
.procname = "lookups",
- .data = &dqstats.lookups,
+ .data = &dqstats.stat[DQST_LOOKUPS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "drops",
- .data = &dqstats.drops,
+ .data = &dqstats.stat[DQST_DROPS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "reads",
- .data = &dqstats.reads,
+ .data = &dqstats.stat[DQST_READS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "writes",
- .data = &dqstats.writes,
+ .data = &dqstats.stat[DQST_WRITES],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "cache_hits",
- .data = &dqstats.cache_hits,
+ .data = &dqstats.stat[DQST_CACHE_HITS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "allocated_dquots",
- .data = &dqstats.allocated_dquots,
+ .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "free_dquots",
- .data = &dqstats.free_dquots,
+ .data = &dqstats.stat[DQST_FREE_DQUOTS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "syncs",
- .data = &dqstats.syncs,
+ .data = &dqstats.stat[DQST_SYNCS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
#ifdef CONFIG_PRINT_QUOTA_WARNING
{
.procname = "warnings",
.data = &flag_print_warnings,
- .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
@@ -2581,6 +2626,13 @@ static int __init dquot_init(void)
if (!dquot_hash)
panic("Cannot create dquot hash table");
+#ifdef CONFIG_SMP
+ dqstats_pcpu = alloc_percpu(struct dqstats);
+ if (!dqstats_pcpu)
+ panic("Cannot create dquot stats table");
+#endif
+ memset(&dqstats, 0, sizeof(struct dqstats));
+
/* Find power-of-two hlist_heads which can fit into allocation */
nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
dq_hash_bits = 0;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bc..5b7f741 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
} else {
ret = 0;
}
- dqstats.writes++;
+ dqstats_inc(DQST_WRITES);
kfree(ddquot);
return ret;
@@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
spin_unlock(&dq_data_lock);
kfree(ddquot);
out:
- dqstats.reads++;
+ dqstats_inc(DQST_READS);
return ret;
}
EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e..4af344c 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
dquot->dq_dqb.dqb_ihardlimit == 0 &&
dquot->dq_dqb.dqb_isoftlimit == 0)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- dqstats.reads++;
+ dqstats_inc(DQST_READS);
return 0;
}
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
ret = 0;
out:
- dqstats.writes++;
+ dqstats_inc(DQST_WRITES);
return ret;
}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b462916..40e0157 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -237,19 +237,23 @@ static inline int info_dirty(struct mem_dqinfo *info)
{
return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
}
-
+enum {
+ DQST_LOOKUPS,
+ DQST_DROPS,
+ DQST_READS,
+ DQST_WRITES,
+ DQST_CACHE_HITS,
+ DQST_ALLOC_DQUOTS,
+ DQST_FREE_DQUOTS,
+ DQST_SYNCS,
+ _DQST_DQSTAT_LAST
+};
struct dqstats {
- int lookups;
- int drops;
- int reads;
- int writes;
- int cache_hits;
- int allocated_dquots;
- int free_dquots;
- int syncs;
+ int stat[_DQST_DQSTAT_LAST];
};
-
-extern struct dqstats dqstats;
+extern void dqstats_inc(unsigned int type);
+extern void dqstats_dec(unsigned int type);
+extern int dqstats_read(unsigned int type);
#define DQ_MOD_B 0 /* dquot modified since read */
#define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] quota: Make quota stat accounting lockless.
2010-04-23 5:31 [PATCH] quota: Make quota stat accounting lockless Dmitry Monakhov
@ 2010-04-26 15:16 ` Jan Kara
2010-04-26 15:24 ` Jan Kara
0 siblings, 1 reply; 7+ messages in thread
From: Jan Kara @ 2010-04-26 15:16 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, Jan Kara
On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
>
> I've written this patch long time ago, it is pretty simple, and
> allow more non obvious locking optimization to be done later.
The patch looks OK to me. Will merge it.
Honza
> From 234b13091fcb16966c2abcd936c8a0ed99822952 Mon Sep 17 00:00:00 2001
> From: Dmitry Monakhov <dmonakhov@openvz.org>
> Date: Fri, 23 Apr 2010 09:06:41 +0400
> Subject: [PATCH] quota: Make quota stat accounting lockless.
>
> Quota stats it mostly writable data structure. Let's alloc percpu
> bucket for each value.
>
> NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
> and may return inconsistent value. But this is ok since absolute
> accuracy is not required.
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> ---
> fs/quota/dquot.c | 122 +++++++++++++++++++++++++++++++++++--------------
> fs/quota/quota_tree.c | 4 +-
> fs/quota/quota_v1.c | 4 +-
> include/linux/quota.h | 26 ++++++----
> 4 files changed, 106 insertions(+), 50 deletions(-)
>
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index 05c590e..aa78e26 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -82,7 +82,7 @@
>
> /*
> * There are three quota SMP locks. dq_list_lock protects all lists with quotas
> - * and quota formats, dqstats structure containing statistics about the lists
> + * and quota formats.
> * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
> * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
> * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
> @@ -132,6 +132,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
> __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
> EXPORT_SYMBOL(dq_data_lock);
>
> +#ifdef CONFIG_SMP
> +struct dqstats *dqstats_pcpu;
> +#endif
> +struct dqstats dqstats;
> +
> static char *quotatypes[] = INITQFNAMES;
> static struct quota_format_type *quota_formats; /* List of registered formats */
> static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
> @@ -273,7 +278,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
> static inline void put_dquot_last(struct dquot *dquot)
> {
> list_add_tail(&dquot->dq_free, &free_dquots);
> - dqstats.free_dquots++;
> + dqstats_inc(DQST_FREE_DQUOTS);
> }
>
> static inline void remove_free_dquot(struct dquot *dquot)
> @@ -281,7 +286,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
> if (list_empty(&dquot->dq_free))
> return;
> list_del_init(&dquot->dq_free);
> - dqstats.free_dquots--;
> + dqstats_dec(DQST_FREE_DQUOTS);
> }
>
> static inline void put_inuse(struct dquot *dquot)
> @@ -289,12 +294,12 @@ static inline void put_inuse(struct dquot *dquot)
> /* We add to the back of inuse list so we don't have to restart
> * when traversing this list and we block */
> list_add_tail(&dquot->dq_inuse, &inuse_list);
> - dqstats.allocated_dquots++;
> + dqstats_inc(DQST_ALLOC_DQUOTS);
> }
>
> static inline void remove_inuse(struct dquot *dquot)
> {
> - dqstats.allocated_dquots--;
> + dqstats_dec(DQST_ALLOC_DQUOTS);
> list_del(&dquot->dq_inuse);
> }
> /*
> @@ -559,8 +564,8 @@ int dquot_scan_active(struct super_block *sb,
> continue;
> /* Now we have active dquot so we can just increase use count */
> atomic_inc(&dquot->dq_count);
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_LOOKUPS);
> dqput(old_dquot);
> old_dquot = dquot;
> ret = fn(dquot, priv);
> @@ -605,8 +610,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
> * holding reference so we can safely just increase
> * use count */
> atomic_inc(&dquot->dq_count);
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_LOOKUPS);
> sb->dq_op->write_dquot(dquot);
> dqput(dquot);
> spin_lock(&dq_list_lock);
> @@ -618,9 +623,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
> if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
> && info_dirty(&dqopt->info[cnt]))
> sb->dq_op->write_info(sb, cnt);
> - spin_lock(&dq_list_lock);
> - dqstats.syncs++;
> - spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_SYNCS);
> mutex_unlock(&dqopt->dqonoff_mutex);
>
> if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
> @@ -684,7 +687,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
> prune_dqcache(nr);
> spin_unlock(&dq_list_lock);
> }
> - return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
> + return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
> }
>
> static struct shrinker dqcache_shrinker = {
> @@ -712,10 +715,7 @@ void dqput(struct dquot *dquot)
> BUG();
> }
> #endif
> -
> - spin_lock(&dq_list_lock);
> - dqstats.drops++;
> - spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_DROPS);
> we_slept:
> spin_lock(&dq_list_lock);
> if (atomic_read(&dquot->dq_count) > 1) {
> @@ -832,15 +832,16 @@ we_slept:
> put_inuse(dquot);
> /* hash it first so it can be found */
> insert_dquot_hash(dquot);
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_LOOKUPS);
> +
> } else {
> if (!atomic_read(&dquot->dq_count))
> remove_free_dquot(dquot);
> atomic_inc(&dquot->dq_count);
> - dqstats.cache_hits++;
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_CACHE_HITS);
> + dqstats_inc(DQST_LOOKUPS);
> }
> /* Wait for dq_lock - after this we know that either dquot_release() is
> * already finished or it will be canceled due to dq_count > 1 test */
> @@ -2474,68 +2475,112 @@ const struct quotactl_ops vfs_quotactl_ops = {
> .set_dqblk = vfs_set_dqblk
> };
>
> +void dqstats_inc(unsigned int type)
> +{
> +#ifdef CONFIG_SMP
> + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
> +#else
> + dqstats[type]++;
> +#endif
> +}
> +
> +void dqstats_dec(unsigned int type)
> +{
> +#ifdef CONFIG_SMP
> + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
> +#else
> + dqstats[type]--;
> +#endif
> +}
> +
> +int dqstats_read(unsigned int type)
> +{
> + int count = 0;
> + int cpu;
> +#ifdef CONFIG_SMP
> + for_each_possible_cpu(cpu)
> + count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
> + /* Statistics reading is racy, but absolute accuracy isn't required */
> + if (count < 0)
> + count = 0;
> +#else
> + count = dqstats[type];
> +#endif
> + return count;
> +}
> +
> +static int do_proc_dqstats(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +#ifdef CONFIG_SMP
> + /* Update global table */
> + unsigned int type = (int *)table->data - dqstats.stat;
> + dqstats.stat[type] = dqstats_read(type);
> +#endif
> + return proc_dointvec(table, write, buffer, lenp, ppos);
> +}
> +
> static ctl_table fs_dqstats_table[] = {
> {
> .procname = "lookups",
> - .data = &dqstats.lookups,
> + .data = &dqstats.stat[DQST_LOOKUPS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "drops",
> - .data = &dqstats.drops,
> + .data = &dqstats.stat[DQST_DROPS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "reads",
> - .data = &dqstats.reads,
> + .data = &dqstats.stat[DQST_READS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "writes",
> - .data = &dqstats.writes,
> + .data = &dqstats.stat[DQST_WRITES],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "cache_hits",
> - .data = &dqstats.cache_hits,
> + .data = &dqstats.stat[DQST_CACHE_HITS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "allocated_dquots",
> - .data = &dqstats.allocated_dquots,
> + .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "free_dquots",
> - .data = &dqstats.free_dquots,
> + .data = &dqstats.stat[DQST_FREE_DQUOTS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "syncs",
> - .data = &dqstats.syncs,
> + .data = &dqstats.stat[DQST_SYNCS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> #ifdef CONFIG_PRINT_QUOTA_WARNING
> {
> .procname = "warnings",
> .data = &flag_print_warnings,
> - .maxlen = sizeof(int),
> .mode = 0644,
> .proc_handler = proc_dointvec,
> },
> @@ -2581,6 +2626,13 @@ static int __init dquot_init(void)
> if (!dquot_hash)
> panic("Cannot create dquot hash table");
>
> +#ifdef CONFIG_SMP
> + dqstats_pcpu = alloc_percpu(struct dqstats);
> + if (!dqstats_pcpu)
> + panic("Cannot create dquot stats table");
> +#endif
> + memset(&dqstats, 0, sizeof(struct dqstats));
> +
> /* Find power-of-two hlist_heads which can fit into allocation */
> nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
> dq_hash_bits = 0;
> diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
> index f81f4bc..5b7f741 100644
> --- a/fs/quota/quota_tree.c
> +++ b/fs/quota/quota_tree.c
> @@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> } else {
> ret = 0;
> }
> - dqstats.writes++;
> + dqstats_inc(DQST_WRITES);
> kfree(ddquot);
>
> return ret;
> @@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> spin_unlock(&dq_data_lock);
> kfree(ddquot);
> out:
> - dqstats.reads++;
> + dqstats_inc(DQST_READS);
> return ret;
> }
> EXPORT_SYMBOL(qtree_read_dquot);
> diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
> index 2ae757e..4af344c 100644
> --- a/fs/quota/quota_v1.c
> +++ b/fs/quota/quota_v1.c
> @@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
> dquot->dq_dqb.dqb_ihardlimit == 0 &&
> dquot->dq_dqb.dqb_isoftlimit == 0)
> set_bit(DQ_FAKE_B, &dquot->dq_flags);
> - dqstats.reads++;
> + dqstats_inc(DQST_READS);
>
> return 0;
> }
> @@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
> ret = 0;
>
> out:
> - dqstats.writes++;
> + dqstats_inc(DQST_WRITES);
>
> return ret;
> }
> diff --git a/include/linux/quota.h b/include/linux/quota.h
> index b462916..40e0157 100644
> --- a/include/linux/quota.h
> +++ b/include/linux/quota.h
> @@ -237,19 +237,23 @@ static inline int info_dirty(struct mem_dqinfo *info)
> {
> return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
> }
> -
> +enum {
> + DQST_LOOKUPS,
> + DQST_DROPS,
> + DQST_READS,
> + DQST_WRITES,
> + DQST_CACHE_HITS,
> + DQST_ALLOC_DQUOTS,
> + DQST_FREE_DQUOTS,
> + DQST_SYNCS,
> + _DQST_DQSTAT_LAST
> +};
> struct dqstats {
> - int lookups;
> - int drops;
> - int reads;
> - int writes;
> - int cache_hits;
> - int allocated_dquots;
> - int free_dquots;
> - int syncs;
> + int stat[_DQST_DQSTAT_LAST];
> };
> -
> -extern struct dqstats dqstats;
> +extern void dqstats_inc(unsigned int type);
> +extern void dqstats_dec(unsigned int type);
> +extern int dqstats_read(unsigned int type);
>
> #define DQ_MOD_B 0 /* dquot modified since read */
> #define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */
> --
> 1.6.6.1
>
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] quota: Make quota stat accounting lockless.
2010-04-26 15:16 ` Jan Kara
@ 2010-04-26 15:24 ` Jan Kara
2010-04-26 16:14 ` Dmitry Monakhov
0 siblings, 1 reply; 7+ messages in thread
From: Jan Kara @ 2010-04-26 15:24 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, Jan Kara
On Mon 26-04-10 17:16:23, Jan Kara wrote:
> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
> >
> > I've written this patch long time ago, it is pretty simple, and
> > allow more non obvious locking optimization to be done later.
> The patch looks OK to me. Will merge it.
Hmm, maybe I spoke to soon. One question:
> > +void dqstats_inc(unsigned int type)
> > +{
> > +#ifdef CONFIG_SMP
> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
> > +#else
> > + dqstats[type]++;
> > +#endif
> > +}
> > +
> > +void dqstats_dec(unsigned int type)
> > +{
> > +#ifdef CONFIG_SMP
> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
> > +#else
> > + dqstats[type]--;
> > +#endif
> > +}
Why not make these two functions inline? Since they are used
also from quota_tree.c and quota_v1.c, we'd need to move them to a
quota.h header in that case.
Honza
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] quota: Make quota stat accounting lockless.
2010-04-26 15:24 ` Jan Kara
@ 2010-04-26 16:14 ` Dmitry Monakhov
2010-04-26 17:37 ` Jan Kara
0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-04-26 16:14 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel
[-- Attachment #1: Type: text/plain, Size: 1100 bytes --]
Jan Kara <jack@suse.cz> writes:
> On Mon 26-04-10 17:16:23, Jan Kara wrote:
>> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
>> >
>> > I've written this patch long time ago, it is pretty simple, and
>> > allow more non obvious locking optimization to be done later.
>> The patch looks OK to me. Will merge it.
> Hmm, maybe I spoke to soon. One question:
>
>> > +void dqstats_inc(unsigned int type)
>> > +{
>> > +#ifdef CONFIG_SMP
>> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
>> > +#else
>> > + dqstats[type]++;
>> > +#endif
>> > +}
>> > +
>> > +void dqstats_dec(unsigned int type)
>> > +{
>> > +#ifdef CONFIG_SMP
>> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
>> > +#else
>> > + dqstats[type]--;
>> > +#endif
>> > +}
> Why not make these two functions inline? Since they are used
> also from quota_tree.c and quota_v1.c, we'd need to move them to a
> quota.h header in that case.
My initial goal was to keep inc/dec/read helpers together.
We still need to export dqstats_pcpu. But if you prefer this style
i'm ok with it. New version attached.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-quota-Make-quota-stat-accounting-lockless.patch --]
[-- Type: text/x-diff, Size: 11243 bytes --]
>From 6d3b422f18882896df1a862d081209c1938bc7a4 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 26 Apr 2010 20:03:33 +0400
Subject: [PATCH] quota: Make quota stat accounting lockless.
Quota stats it mostly writable data structure. Let's alloc percpu
bucket for each value.
NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
and may return inconsistent value. But this is ok since absolute
accuracy is not required.
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
fs/quota/dquot.c | 106 ++++++++++++++++++++++++++++++++-----------------
fs/quota/quota_tree.c | 4 +-
fs/quota/quota_v1.c | 4 +-
include/linux/quota.h | 41 ++++++++++++++-----
4 files changed, 105 insertions(+), 50 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 05c590e..244e198 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
/*
* There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats, dqstats structure containing statistics about the lists
+ * and quota formats.
* dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
* also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -131,7 +131,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
-
+#ifdef CONFIG_SMP
+struct dqstats *dqstats_pcpu;
+EXPORT_SYMBOL(dqstats_pcpu);
+#endif
+struct dqstats dqstats;
static char *quotatypes[] = INITQFNAMES;
static struct quota_format_type *quota_formats; /* List of registered formats */
static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
@@ -273,7 +277,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
static inline void put_dquot_last(struct dquot *dquot)
{
list_add_tail(&dquot->dq_free, &free_dquots);
- dqstats.free_dquots++;
+ dqstats_inc(DQST_FREE_DQUOTS);
}
static inline void remove_free_dquot(struct dquot *dquot)
@@ -281,7 +285,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
if (list_empty(&dquot->dq_free))
return;
list_del_init(&dquot->dq_free);
- dqstats.free_dquots--;
+ dqstats_dec(DQST_FREE_DQUOTS);
}
static inline void put_inuse(struct dquot *dquot)
@@ -289,12 +293,12 @@ static inline void put_inuse(struct dquot *dquot)
/* We add to the back of inuse list so we don't have to restart
* when traversing this list and we block */
list_add_tail(&dquot->dq_inuse, &inuse_list);
- dqstats.allocated_dquots++;
+ dqstats_inc(DQST_ALLOC_DQUOTS);
}
static inline void remove_inuse(struct dquot *dquot)
{
- dqstats.allocated_dquots--;
+ dqstats_dec(DQST_ALLOC_DQUOTS);
list_del(&dquot->dq_inuse);
}
/*
@@ -559,8 +563,8 @@ int dquot_scan_active(struct super_block *sb,
continue;
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
ret = fn(dquot, priv);
@@ -605,8 +609,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
* holding reference so we can safely just increase
* use count */
atomic_inc(&dquot->dq_count);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
sb->dq_op->write_dquot(dquot);
dqput(dquot);
spin_lock(&dq_list_lock);
@@ -618,9 +622,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
&& info_dirty(&dqopt->info[cnt]))
sb->dq_op->write_info(sb, cnt);
- spin_lock(&dq_list_lock);
- dqstats.syncs++;
- spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_SYNCS);
mutex_unlock(&dqopt->dqonoff_mutex);
if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -672,6 +674,22 @@ static void prune_dqcache(int count)
}
}
+static int dqstats_read(unsigned int type)
+{
+ int count = 0;
+ int cpu;
+#ifdef CONFIG_SMP
+ for_each_possible_cpu(cpu)
+ count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
+ /* Statistics reading is racy, but absolute accuracy isn't required */
+ if (count < 0)
+ count = 0;
+#else
+ count = dqstats[type];
+#endif
+ return count;
+}
+
/*
* This is called from kswapd when we think we need some
* more memory
@@ -684,7 +702,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
prune_dqcache(nr);
spin_unlock(&dq_list_lock);
}
- return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+ return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dqcache_shrinker = {
@@ -712,10 +730,7 @@ void dqput(struct dquot *dquot)
BUG();
}
#endif
-
- spin_lock(&dq_list_lock);
- dqstats.drops++;
- spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_DROPS);
we_slept:
spin_lock(&dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
@@ -832,15 +847,16 @@ we_slept:
put_inuse(dquot);
/* hash it first so it can be found */
insert_dquot_hash(dquot);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
+
} else {
if (!atomic_read(&dquot->dq_count))
remove_free_dquot(dquot);
atomic_inc(&dquot->dq_count);
- dqstats.cache_hits++;
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_CACHE_HITS);
+ dqstats_inc(DQST_LOOKUPS);
}
/* Wait for dq_lock - after this we know that either dquot_release() is
* already finished or it will be canceled due to dq_count > 1 test */
@@ -2474,68 +2490,79 @@ const struct quotactl_ops vfs_quotactl_ops = {
.set_dqblk = vfs_set_dqblk
};
+
+static int do_proc_dqstats(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+ /* Update global table */
+ unsigned int type = (int *)table->data - dqstats.stat;
+ dqstats.stat[type] = dqstats_read(type);
+#endif
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
static ctl_table fs_dqstats_table[] = {
{
.procname = "lookups",
- .data = &dqstats.lookups,
+ .data = &dqstats.stat[DQST_LOOKUPS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "drops",
- .data = &dqstats.drops,
+ .data = &dqstats.stat[DQST_DROPS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "reads",
- .data = &dqstats.reads,
+ .data = &dqstats.stat[DQST_READS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "writes",
- .data = &dqstats.writes,
+ .data = &dqstats.stat[DQST_WRITES],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "cache_hits",
- .data = &dqstats.cache_hits,
+ .data = &dqstats.stat[DQST_CACHE_HITS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "allocated_dquots",
- .data = &dqstats.allocated_dquots,
+ .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "free_dquots",
- .data = &dqstats.free_dquots,
+ .data = &dqstats.stat[DQST_FREE_DQUOTS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "syncs",
- .data = &dqstats.syncs,
+ .data = &dqstats.stat[DQST_SYNCS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
#ifdef CONFIG_PRINT_QUOTA_WARNING
{
.procname = "warnings",
.data = &flag_print_warnings,
- .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
@@ -2581,6 +2608,13 @@ static int __init dquot_init(void)
if (!dquot_hash)
panic("Cannot create dquot hash table");
+#ifdef CONFIG_SMP
+ dqstats_pcpu = alloc_percpu(struct dqstats);
+ if (!dqstats_pcpu)
+ panic("Cannot create dquot stats table");
+#endif
+ memset(&dqstats, 0, sizeof(struct dqstats));
+
/* Find power-of-two hlist_heads which can fit into allocation */
nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
dq_hash_bits = 0;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bc..5b7f741 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
} else {
ret = 0;
}
- dqstats.writes++;
+ dqstats_inc(DQST_WRITES);
kfree(ddquot);
return ret;
@@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
spin_unlock(&dq_data_lock);
kfree(ddquot);
out:
- dqstats.reads++;
+ dqstats_inc(DQST_READS);
return ret;
}
EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e..4af344c 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
dquot->dq_dqb.dqb_ihardlimit == 0 &&
dquot->dq_dqb.dqb_isoftlimit == 0)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- dqstats.reads++;
+ dqstats_inc(DQST_READS);
return 0;
}
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
ret = 0;
out:
- dqstats.writes++;
+ dqstats_inc(DQST_WRITES);
return ret;
}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b462916..0f5b4a7 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -174,6 +174,8 @@ enum {
#include <linux/rwsem.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
#include <linux/dqblk_xfs.h>
#include <linux/dqblk_v1.h>
@@ -237,19 +239,38 @@ static inline int info_dirty(struct mem_dqinfo *info)
{
return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
}
-
+enum {
+ DQST_LOOKUPS,
+ DQST_DROPS,
+ DQST_READS,
+ DQST_WRITES,
+ DQST_CACHE_HITS,
+ DQST_ALLOC_DQUOTS,
+ DQST_FREE_DQUOTS,
+ DQST_SYNCS,
+ _DQST_DQSTAT_LAST
+};
struct dqstats {
- int lookups;
- int drops;
- int reads;
- int writes;
- int cache_hits;
- int allocated_dquots;
- int free_dquots;
- int syncs;
+ int stat[_DQST_DQSTAT_LAST];
};
-extern struct dqstats dqstats;
+extern struct dqstats *dqstats_pcpu;
+static inline void dqstats_inc(unsigned int type)
+{
+#ifdef CONFIG_SMP
+ per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
+#else
+ dqstats[type]++;
+#endif
+}
+static inline void dqstats_dec(unsigned int type)
+{
+#ifdef CONFIG_SMP
+ per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
+#else
+ dqstats[type]--;
+#endif
+}
#define DQ_MOD_B 0 /* dquot modified since read */
#define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] quota: Make quota stat accounting lockless.
2010-04-26 16:14 ` Dmitry Monakhov
@ 2010-04-26 17:37 ` Jan Kara
2010-04-26 20:14 ` Jan Kara
0 siblings, 1 reply; 7+ messages in thread
From: Jan Kara @ 2010-04-26 17:37 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: Jan Kara, linux-fsdevel
On Mon 26-04-10 20:14:24, Dmitry Monakhov wrote:
> Jan Kara <jack@suse.cz> writes:
>
> > On Mon 26-04-10 17:16:23, Jan Kara wrote:
> >> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
> >> >
> >> > I've written this patch long time ago, it is pretty simple, and
> >> > allow more non obvious locking optimization to be done later.
> >> The patch looks OK to me. Will merge it.
> > Hmm, maybe I spoke to soon. One question:
> >
> >> > +void dqstats_inc(unsigned int type)
> >> > +{
> >> > +#ifdef CONFIG_SMP
> >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
> >> > +#else
> >> > + dqstats[type]++;
> >> > +#endif
> >> > +}
> >> > +
> >> > +void dqstats_dec(unsigned int type)
> >> > +{
> >> > +#ifdef CONFIG_SMP
> >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
> >> > +#else
> >> > + dqstats[type]--;
> >> > +#endif
> >> > +}
> > Why not make these two functions inline? Since they are used
> > also from quota_tree.c and quota_v1.c, we'd need to move them to a
> > quota.h header in that case.
> My initial goal was to keep inc/dec/read helpers together.
> We still need to export dqstats_pcpu. But if you prefer this style
> i'm ok with it. New version attached.
OK, I've merged the new version (with a few whitespace fixes). Thanks.
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] quota: Make quota stat accounting lockless.
2010-04-26 17:37 ` Jan Kara
@ 2010-04-26 20:14 ` Jan Kara
2010-04-27 3:54 ` Dmitry Monakhov
0 siblings, 1 reply; 7+ messages in thread
From: Jan Kara @ 2010-04-26 20:14 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: Jan Kara, linux-fsdevel
[-- Attachment #1: Type: text/plain, Size: 1716 bytes --]
On Mon 26-04-10 19:37:29, Jan Kara wrote:
> On Mon 26-04-10 20:14:24, Dmitry Monakhov wrote:
> > Jan Kara <jack@suse.cz> writes:
> >
> > > On Mon 26-04-10 17:16:23, Jan Kara wrote:
> > >> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
> > >> >
> > >> > I've written this patch long time ago, it is pretty simple, and
> > >> > allow more non obvious locking optimization to be done later.
> > >> The patch looks OK to me. Will merge it.
> > > Hmm, maybe I spoke to soon. One question:
> > >
> > >> > +void dqstats_inc(unsigned int type)
> > >> > +{
> > >> > +#ifdef CONFIG_SMP
> > >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
> > >> > +#else
> > >> > + dqstats[type]++;
> > >> > +#endif
> > >> > +}
> > >> > +
> > >> > +void dqstats_dec(unsigned int type)
> > >> > +{
> > >> > +#ifdef CONFIG_SMP
> > >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
> > >> > +#else
> > >> > + dqstats[type]--;
> > >> > +#endif
> > >> > +}
> > > Why not make these two functions inline? Since they are used
> > > also from quota_tree.c and quota_v1.c, we'd need to move them to a
> > > quota.h header in that case.
> > My initial goal was to keep inc/dec/read helpers together.
> > We still need to export dqstats_pcpu. But if you prefer this style
> > i'm ok with it. New version attached.
> OK, I've merged the new version (with a few whitespace fixes). Thanks.
The patch didn't compile for !CONFIG_SMP systems and also broke
/proc/sys/fs/quota/warning file. Dmitry, could you be more careful next
time, please? I've fixed that up and tested that it actually works.
Attached is a resulting patch that I currently carry.
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
[-- Attachment #2: 0001-quota-Make-quota-stat-accounting-lockless.patch --]
[-- Type: text/x-patch, Size: 10990 bytes --]
>From 007b64d2e84c0aefdb4ffac507e013123e79b14b Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 26 Apr 2010 20:03:33 +0400
Subject: [PATCH] quota: Make quota stat accounting lockless.
Quota stats is mostly writable data structure. Let's alloc percpu
bucket for each value.
NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
and may return inconsistent value. But this is ok since absolute
accuracy is not required.
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/quota/dquot.c | 102 ++++++++++++++++++++++++++++++++----------------
fs/quota/quota_tree.c | 4 +-
fs/quota/quota_v1.c | 4 +-
include/linux/quota.h | 42 ++++++++++++++++----
4 files changed, 106 insertions(+), 46 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ae76605..acbacc0 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
/*
* There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats, dqstats structure containing statistics about the lists
+ * and quota formats.
* dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
* also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -228,6 +228,10 @@ static struct hlist_head *dquot_hash;
struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);
+#ifdef CONFIG_SMP
+struct dqstats *dqstats_pcpu;
+EXPORT_SYMBOL(dqstats_pcpu);
+#endif
static qsize_t inode_get_rsv_space(struct inode *inode);
static void __dquot_initialize(struct inode *inode, int type);
@@ -275,7 +279,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
static inline void put_dquot_last(struct dquot *dquot)
{
list_add_tail(&dquot->dq_free, &free_dquots);
- dqstats.free_dquots++;
+ dqstats_inc(DQST_FREE_DQUOTS);
}
static inline void remove_free_dquot(struct dquot *dquot)
@@ -283,7 +287,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
if (list_empty(&dquot->dq_free))
return;
list_del_init(&dquot->dq_free);
- dqstats.free_dquots--;
+ dqstats_dec(DQST_FREE_DQUOTS);
}
static inline void put_inuse(struct dquot *dquot)
@@ -291,12 +295,12 @@ static inline void put_inuse(struct dquot *dquot)
/* We add to the back of inuse list so we don't have to restart
* when traversing this list and we block */
list_add_tail(&dquot->dq_inuse, &inuse_list);
- dqstats.allocated_dquots++;
+ dqstats_inc(DQST_ALLOC_DQUOTS);
}
static inline void remove_inuse(struct dquot *dquot)
{
- dqstats.allocated_dquots--;
+ dqstats_dec(DQST_ALLOC_DQUOTS);
list_del(&dquot->dq_inuse);
}
/*
@@ -561,8 +565,8 @@ int dquot_scan_active(struct super_block *sb,
continue;
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
ret = fn(dquot, priv);
@@ -607,8 +611,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
* holding reference so we can safely just increase
* use count */
atomic_inc(&dquot->dq_count);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
sb->dq_op->write_dquot(dquot);
dqput(dquot);
spin_lock(&dq_list_lock);
@@ -620,9 +624,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
&& info_dirty(&dqopt->info[cnt]))
sb->dq_op->write_info(sb, cnt);
- spin_lock(&dq_list_lock);
- dqstats.syncs++;
- spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_SYNCS);
mutex_unlock(&dqopt->dqonoff_mutex);
if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -674,6 +676,22 @@ static void prune_dqcache(int count)
}
}
+static int dqstats_read(unsigned int type)
+{
+ int count = 0;
+ int cpu;
+#ifdef CONFIG_SMP
+ for_each_possible_cpu(cpu)
+ count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
+ /* Statistics reading is racy, but absolute accuracy isn't required */
+ if (count < 0)
+ count = 0;
+#else
+ count = dqstats.stat[type];
+#endif
+ return count;
+}
+
/*
* This is called from kswapd when we think we need some
* more memory
@@ -686,7 +704,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
prune_dqcache(nr);
spin_unlock(&dq_list_lock);
}
- return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+ return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dqcache_shrinker = {
@@ -714,10 +732,7 @@ void dqput(struct dquot *dquot)
BUG();
}
#endif
-
- spin_lock(&dq_list_lock);
- dqstats.drops++;
- spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_DROPS);
we_slept:
spin_lock(&dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
@@ -834,15 +849,15 @@ we_slept:
put_inuse(dquot);
/* hash it first so it can be found */
insert_dquot_hash(dquot);
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_LOOKUPS);
} else {
if (!atomic_read(&dquot->dq_count))
remove_free_dquot(dquot);
atomic_inc(&dquot->dq_count);
- dqstats.cache_hits++;
- dqstats.lookups++;
spin_unlock(&dq_list_lock);
+ dqstats_inc(DQST_CACHE_HITS);
+ dqstats_inc(DQST_LOOKUPS);
}
/* Wait for dq_lock - after this we know that either dquot_release() is
* already finished or it will be canceled due to dq_count > 1 test */
@@ -2476,62 +2491,74 @@ const struct quotactl_ops vfs_quotactl_ops = {
.set_dqblk = vfs_set_dqblk
};
+
+static int do_proc_dqstats(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+ /* Update global table */
+ unsigned int type = (int *)table->data - dqstats.stat;
+ dqstats.stat[type] = dqstats_read(type);
+#endif
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
static ctl_table fs_dqstats_table[] = {
{
.procname = "lookups",
- .data = &dqstats.lookups,
+ .data = &dqstats.stat[DQST_LOOKUPS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "drops",
- .data = &dqstats.drops,
+ .data = &dqstats.stat[DQST_DROPS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "reads",
- .data = &dqstats.reads,
+ .data = &dqstats.stat[DQST_READS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "writes",
- .data = &dqstats.writes,
+ .data = &dqstats.stat[DQST_WRITES],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "cache_hits",
- .data = &dqstats.cache_hits,
+ .data = &dqstats.stat[DQST_CACHE_HITS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "allocated_dquots",
- .data = &dqstats.allocated_dquots,
+ .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "free_dquots",
- .data = &dqstats.free_dquots,
+ .data = &dqstats.stat[DQST_FREE_DQUOTS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
{
.procname = "syncs",
- .data = &dqstats.syncs,
+ .data = &dqstats.stat[DQST_SYNCS],
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = do_proc_dqstats,
},
#ifdef CONFIG_PRINT_QUOTA_WARNING
{
@@ -2583,6 +2610,13 @@ static int __init dquot_init(void)
if (!dquot_hash)
panic("Cannot create dquot hash table");
+#ifdef CONFIG_SMP
+ dqstats_pcpu = alloc_percpu(struct dqstats);
+ if (!dqstats_pcpu)
+ panic("Cannot create dquot stats table");
+#endif
+ memset(&dqstats, 0, sizeof(struct dqstats));
+
/* Find power-of-two hlist_heads which can fit into allocation */
nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
dq_hash_bits = 0;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bc..5b7f741 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
} else {
ret = 0;
}
- dqstats.writes++;
+ dqstats_inc(DQST_WRITES);
kfree(ddquot);
return ret;
@@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
spin_unlock(&dq_data_lock);
kfree(ddquot);
out:
- dqstats.reads++;
+ dqstats_inc(DQST_READS);
return ret;
}
EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e..4af344c 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
dquot->dq_dqb.dqb_ihardlimit == 0 &&
dquot->dq_dqb.dqb_isoftlimit == 0)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- dqstats.reads++;
+ dqstats_inc(DQST_READS);
return 0;
}
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
ret = 0;
out:
- dqstats.writes++;
+ dqstats_inc(DQST_WRITES);
return ret;
}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b462916..cdfde10 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -174,6 +174,8 @@ enum {
#include <linux/rwsem.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
#include <linux/dqblk_xfs.h>
#include <linux/dqblk_v1.h>
@@ -238,19 +240,43 @@ static inline int info_dirty(struct mem_dqinfo *info)
return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
}
+enum {
+ DQST_LOOKUPS,
+ DQST_DROPS,
+ DQST_READS,
+ DQST_WRITES,
+ DQST_CACHE_HITS,
+ DQST_ALLOC_DQUOTS,
+ DQST_FREE_DQUOTS,
+ DQST_SYNCS,
+ _DQST_DQSTAT_LAST
+};
+
struct dqstats {
- int lookups;
- int drops;
- int reads;
- int writes;
- int cache_hits;
- int allocated_dquots;
- int free_dquots;
- int syncs;
+ int stat[_DQST_DQSTAT_LAST];
};
+extern struct dqstats *dqstats_pcpu;
extern struct dqstats dqstats;
+static inline void dqstats_inc(unsigned int type)
+{
+#ifdef CONFIG_SMP
+ per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
+#else
+ dqstats.stat[type]++;
+#endif
+}
+
+static inline void dqstats_dec(unsigned int type)
+{
+#ifdef CONFIG_SMP
+ per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
+#else
+ dqstats.stat[type]--;
+#endif
+}
+
#define DQ_MOD_B 0 /* dquot modified since read */
#define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */
#define DQ_INODES_B 2 /* uid/gid has been warned about inode limit */
--
1.6.4.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] quota: Make quota stat accounting lockless.
2010-04-26 20:14 ` Jan Kara
@ 2010-04-27 3:54 ` Dmitry Monakhov
0 siblings, 0 replies; 7+ messages in thread
From: Dmitry Monakhov @ 2010-04-27 3:54 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel
Jan Kara <jack@suse.cz> writes:
> On Mon 26-04-10 19:37:29, Jan Kara wrote:
>> On Mon 26-04-10 20:14:24, Dmitry Monakhov wrote:
>> > Jan Kara <jack@suse.cz> writes:
>> >
>> > > On Mon 26-04-10 17:16:23, Jan Kara wrote:
>> > >> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
>> > >> >
>> > >> > I've written this patch long time ago, it is pretty simple, and
>> > >> > allow more non obvious locking optimization to be done later.
>> > >> The patch looks OK to me. Will merge it.
>> > > Hmm, maybe I spoke to soon. One question:
>> > >
>> > >> > +void dqstats_inc(unsigned int type)
>> > >> > +{
>> > >> > +#ifdef CONFIG_SMP
>> > >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
>> > >> > +#else
>> > >> > + dqstats[type]++;
>> > >> > +#endif
>> > >> > +}
>> > >> > +
>> > >> > +void dqstats_dec(unsigned int type)
>> > >> > +{
>> > >> > +#ifdef CONFIG_SMP
>> > >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
>> > >> > +#else
>> > >> > + dqstats[type]--;
>> > >> > +#endif
>> > >> > +}
>> > > Why not make these two functions inline? Since they are used
>> > > also from quota_tree.c and quota_v1.c, we'd need to move them to a
>> > > quota.h header in that case.
>> > My initial goal was to keep inc/dec/read helpers together.
>> > We still need to export dqstats_pcpu. But if you prefer this style
>> > i'm ok with it. New version attached.
>> OK, I've merged the new version (with a few whitespace fixes). Thanks.
> The patch didn't compile for !CONFIG_SMP systems and also broke
> /proc/sys/fs/quota/warning file. Dmitry, could you be more careful next
> time, please? I've fixed that up and tested that it actually works.
> Attached is a resulting patch that I currently carry.
Hmm.. i dont wat to say... it was my unpredictable laziness effect.
Really sorry for wasting your time in that meaner. Will take more
efforts to avoid this in future.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2010-04-27 3:54 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-23 5:31 [PATCH] quota: Make quota stat accounting lockless Dmitry Monakhov
2010-04-26 15:16 ` Jan Kara
2010-04-26 15:24 ` Jan Kara
2010-04-26 16:14 ` Dmitry Monakhov
2010-04-26 17:37 ` Jan Kara
2010-04-26 20:14 ` Jan Kara
2010-04-27 3:54 ` Dmitry Monakhov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.