diff --git a/block/partition-generic.c b/block/partition-generic.c index c5ec8246e25e..bd6644bf9643 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -140,8 +140,9 @@ ssize_t part_inflight_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), - atomic_read(&p->in_flight[1])); + return sprintf(buf, "%8u %8u\n", + pnode_counter_read_all(&p->in_flight[0]), + pnode_counter_read_all(&p->in_flight[1])); } #ifdef CONFIG_FAIL_MAKE_REQUEST diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 96bd13e581cd..aac0b2235410 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -521,7 +521,7 @@ static void start_io_acct(struct dm_io *io) cpu = part_stat_lock(); part_round_stats(cpu, &dm_disk(md)->part0); part_stat_unlock(); - atomic_set(&dm_disk(md)->part0.in_flight[rw], + pnode_counter_set(&dm_disk(md)->part0.in_flight[rw], atomic_inc_return(&md->pending[rw])); if (unlikely(dm_stats_used(&md->stats))) @@ -550,7 +550,7 @@ static void end_io_acct(struct dm_io *io) * a flush. */ pending = atomic_dec_return(&md->pending[rw]); - atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); + pnode_counter_set(&dm_disk(md)->part0.in_flight[rw], pending); pending += atomic_read(&md->pending[rw^0x1]); /* nudge anyone waiting on suspend queue */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e619fae2f037..40c9bc74a120 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -120,7 +121,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - atomic_t in_flight[2]; + struct pnode_counter in_flight[2]; #ifdef CONFIG_SMP struct disk_stats __percpu *dkstats; #else @@ -364,21 +365,22 @@ static inline void free_part_stats(struct hd_struct *part) static inline void part_inc_in_flight(struct hd_struct *part, int rw) { - atomic_inc(&part->in_flight[rw]); + pnode_counter_inc(&part->in_flight[rw]); if (part->partno) - atomic_inc(&part_to_disk(part)->part0.in_flight[rw]); + pnode_counter_inc(&part_to_disk(part)->part0.in_flight[rw]); } static inline void part_dec_in_flight(struct hd_struct *part, int rw) { - atomic_dec(&part->in_flight[rw]); + pnode_counter_dec(&part->in_flight[rw]); if (part->partno) - atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); + pnode_counter_dec(&part_to_disk(part)->part0.in_flight[rw]); } static inline int part_in_flight(struct hd_struct *part) { - return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]); + return pnode_counter_read_all(&part->in_flight[0]) + \ + pnode_counter_read_all(&part->in_flight[1]); } static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) @@ -627,11 +629,34 @@ extern ssize_t part_fail_store(struct device *dev, const char *buf, size_t count); #endif /* CONFIG_FAIL_MAKE_REQUEST */ +static inline int hd_counter_init(struct hd_struct *part) +{ + if (pnode_counter_init(&part->in_flight[0], GFP_KERNEL)) + return -ENOMEM; + if (pnode_counter_init(&part->in_flight[1], GFP_KERNEL)) { + pnode_counter_deinit(&part->in_flight[0]); + return -ENOMEM; + } + + return 0; +} + +static inline void hd_counter_deinit(struct hd_struct *part) +{ + pnode_counter_deinit(&part->in_flight[0]); + pnode_counter_deinit(&part->in_flight[1]); +} + static inline int hd_ref_init(struct hd_struct *part) { + if (hd_counter_init(part)) + return -ENOMEM; + if (percpu_ref_init(&part->ref, __delete_partition, 0, - GFP_KERNEL)) + GFP_KERNEL)) { + hd_counter_deinit(part); return -ENOMEM; + } return 0; } @@ -659,6 +684,7 @@ static inline void hd_free_part(struct hd_struct *part) { free_part_stats(part); free_part_info(part); + hd_counter_deinit(part); percpu_ref_exit(&part->ref); } diff --git a/include/linux/pernode_counter.h b/include/linux/pernode_counter.h new file mode 100644 index 000000000000..127639fbc25f --- /dev/null +++ b/include/linux/pernode_counter.h @@ -0,0 +1,118 @@ +#ifndef _LINUX_PERNODE_COUNTER_H +#define _LINUX_PERNODE_COUNTER_H +/* + * A simple per node atomic counter for use in block io accounting. + */ + +#include +#include +#include +#include + +struct node_counter { + atomic_t counter_in_node; +}; + +struct pnode_counter { + struct node_counter * __percpu *counter; + struct node_counter **nodes; +}; + +static inline int pnode_counter_init(struct pnode_counter *pnc, gfp_t gfp) +{ + struct node_counter **nodes; + int i, j, cpu; + + nodes = kzalloc(nr_node_ids * sizeof(struct node_counter *), gfp); + if (!nodes) + goto err_nodes; + + for_each_node(i) { + nodes[i] = kzalloc_node(sizeof(struct node_counter), gfp, i); + if (!nodes[i]) + goto err_node_counter; + } + + pnc->counter = alloc_percpu_gfp(struct node_counter *, gfp); + if (!pnc->counter) + goto err_node_counter; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(pnc->counter, cpu) = nodes[cpu_to_node(cpu)]; + + pnc->nodes = nodes; + + return 0; + + err_node_counter: + for (j = 0; j < i; j++) + kfree(nodes[j]); + kfree(nodes); + err_nodes: + return -ENOMEM; +} + +static inline void pnode_counter_deinit(struct pnode_counter *pnc) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct node_counter *node = *per_cpu_ptr(pnc->counter, cpu); + + kfree(node); + *per_cpu_ptr(pnc->counter, cpu) = NULL; + } + free_percpu(pnc->counter); + kfree(pnc->nodes); +} + +static inline void pnode_counter_inc(struct pnode_counter *pnc) +{ + struct node_counter *node = this_cpu_read(*pnc->counter); + + atomic_inc(&node->counter_in_node); +} + +static inline void pnode_counter_inc_cpu(struct pnode_counter *pnc, int cpu) +{ + struct node_counter *node = *per_cpu_ptr(pnc->counter, cpu); + + atomic_inc(&node->counter_in_node); +} + +static inline void pnode_counter_dec(struct pnode_counter *pnc) +{ + struct node_counter *node = this_cpu_read(*pnc->counter); + + atomic_dec(&node->counter_in_node); +} + +static inline void pnode_counter_dec_cpu(struct pnode_counter *pnc, int cpu) +{ + struct node_counter *node = *per_cpu_ptr(pnc->counter, cpu); + + atomic_dec(&node->counter_in_node); +} + +static inline void pnode_counter_set(struct pnode_counter *pnc, int val) +{ + int i; + struct node_counter *node = this_cpu_read(*pnc->counter); + + for_each_node(i) + atomic_set(&pnc->nodes[i]->counter_in_node, 0); + atomic_set(&node->counter_in_node, val); +} + +static inline long pnode_counter_read_all(struct pnode_counter *pnc) +{ + int i; + long val = 0; + + for_each_node(i) + val += atomic_read(&pnc->nodes[i]->counter_in_node); + + return val; +} + +#endif /* _LINUX_PERNODE_COUNTER_H */