From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Stodden Subject: [PATCH 1/5] blktap: Manage segment buffers in mempools. Date: Fri, 12 Nov 2010 15:31:43 -0800 Message-ID: <1289604707-13378-2-git-send-email-daniel.stodden@citrix.com> References: <1289604707-13378-1-git-send-email-daniel.stodden@citrix.com> Return-path: In-Reply-To: <1289604707-13378-1-git-send-email-daniel.stodden@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Xen Cc: Jeremy Fitzhardinge , Daniel Stodden List-Id: xen-devel@lists.xenproject.org - Replaces the request free list with a (mempooled) slab. - Replaces request buckets with a mempool. No buckets, because we're doing full s/g on page granularity anyway, so can gfp() independent pages everywhere. Allocations are 1-11 page-sized segments. - Adds support for multiple page pools. - Adds pools to sysfs. Linked as a 'pools' kset to blktap-control. - Makes the per-tap pool selectable. Attribute 'pool' on the tap device. - Make pools online-resizeable. Attributes free/size on the pool kobj. Signed-off-by: Daniel Stodden --- drivers/xen/blktap/blktap.h | 35 ++- drivers/xen/blktap/control.c | 80 ++++++-- drivers/xen/blktap/device.c | 2 +- drivers/xen/blktap/request.c | 509 +++++++++++++++++++++++++----------------- drivers/xen/blktap/ring.c | 10 +- drivers/xen/blktap/sysfs.c | 36 +++ 6 files changed, 433 insertions(+), 239 deletions(-) diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h index a29b509..ad79c15 100644 --- a/drivers/xen/blktap/blktap.h +++ b/drivers/xen/blktap/blktap.h @@ -121,17 +121,19 @@ struct blktap_statistics { }; struct blktap_request { + struct blktap *tap; struct request *rq; uint16_t usr_idx; uint8_t status; atomic_t pendcnt; - uint8_t nr_pages; unsigned short operation; struct timeval time; struct grant_handle_pair handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct list_head free_list; + + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int nr_pages; }; struct blktap { @@ -140,6 +142,7 @@ struct blktap { struct blktap_ring ring; struct blktap_device device; + struct blktap_page_pool *pool; int pending_cnt; struct blktap_request *pending_requests[MAX_PENDING_REQS]; @@ -152,6 +155,13 @@ struct blktap { struct blktap_statistics stats; }; +struct blktap_page_pool { + struct mempool_s *bufs; + spinlock_t lock; + struct kobject kobj; + wait_queue_head_t wait; +}; + extern struct mutex blktap_lock; extern struct blktap **blktaps; extern int blktap_max_minor; @@ -165,7 +175,6 @@ size_t blktap_ring_debug(struct blktap *, char *, size_t); int blktap_ring_create(struct blktap *); int blktap_ring_destroy(struct blktap *); void blktap_ring_kick_user(struct blktap *); -void blktap_ring_kick_all(void); int blktap_sysfs_init(void); void blktap_sysfs_exit(void); @@ -181,19 +190,23 @@ void blktap_device_destroy_sync(struct blktap *); int blktap_device_run_queue(struct blktap *); void blktap_device_end_request(struct blktap *, struct blktap_request *, int); -int blktap_request_pool_init(void); -void blktap_request_pool_free(void); -int blktap_request_pool_grow(void); -int blktap_request_pool_shrink(void); -struct blktap_request *blktap_request_allocate(struct blktap *); +int blktap_page_pool_init(struct kobject *); +void blktap_page_pool_exit(void); +struct blktap_page_pool *blktap_page_pool_get(const char *); + +size_t blktap_request_debug(struct blktap *, char *, size_t); +struct blktap_request *blktap_request_alloc(struct blktap *); +int blktap_request_get_pages(struct blktap *, struct blktap_request *, int); void blktap_request_free(struct blktap *, struct blktap_request *); -struct page *request_to_page(struct blktap_request *, int); +void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int); static inline unsigned long request_to_kaddr(struct blktap_request *req, int seg) { - unsigned long pfn = page_to_pfn(request_to_page(req, seg)); - return (unsigned long)pfn_to_kaddr(pfn); + return (unsigned long)page_address(req->pages[seg]); } +#define request_to_page(_request, _seg) ((_request)->pages[_seg]) + + #endif diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c index ef54fa1..8652e07 100644 --- a/drivers/xen/blktap/control.c +++ b/drivers/xen/blktap/control.c @@ -1,7 +1,7 @@ #include #include #include - +#include #include #include "blktap.h" @@ -10,6 +10,7 @@ DEFINE_MUTEX(blktap_lock); struct blktap **blktaps; int blktap_max_minor; +static struct blktap_page_pool *default_pool; static struct blktap * blktap_control_get_minor(void) @@ -83,6 +84,9 @@ blktap_control_create_tap(void) if (!tap) return NULL; + kobject_get(&default_pool->kobj); + tap->pool = default_pool; + err = blktap_ring_create(tap); if (err) goto fail_tap; @@ -110,6 +114,8 @@ blktap_control_destroy_tap(struct blktap *tap) if (err) return err; + kobject_put(&tap->pool->kobj); + blktap_sysfs_destroy(tap); blktap_control_put_minor(tap); @@ -166,12 +172,43 @@ static struct file_operations blktap_control_file_operations = { .ioctl = blktap_control_ioctl, }; -static struct miscdevice blktap_misc = { +static struct miscdevice blktap_control = { .minor = MISC_DYNAMIC_MINOR, .name = "blktap-control", .fops = &blktap_control_file_operations, }; +static struct device *control_device; + +static ssize_t +blktap_control_show_default_pool(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s", kobject_name(&default_pool->kobj)); +} + +static ssize_t +blktap_control_store_default_pool(struct device *device, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap_page_pool *pool, *tmp = default_pool; + + pool = blktap_page_pool_get(buf); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + default_pool = pool; + kobject_put(&tmp->kobj); + + return size; +} + +static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, + blktap_control_show_default_pool, + blktap_control_store_default_pool); + size_t blktap_control_debug(struct blktap *tap, char *buf, size_t size) { @@ -190,12 +227,11 @@ blktap_control_init(void) { int err; - err = misc_register(&blktap_misc); - if (err) { - blktap_misc.minor = MISC_DYNAMIC_MINOR; - BTERR("misc_register failed for control device"); + err = misc_register(&blktap_control); + if (err) return err; - } + + control_device = blktap_control.this_device; blktap_max_minor = min(64, MAX_BLKTAP_DEVICE); blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL); @@ -204,20 +240,39 @@ blktap_control_init(void) return -ENOMEM; } + err = blktap_page_pool_init(&control_device->kobj); + if (err) + return err; + + default_pool = blktap_page_pool_get("default"); + if (!default_pool) + return -ENOMEM; + + err = device_create_file(control_device, &dev_attr_default_pool); + if (err) + return err; + return 0; } static void blktap_control_exit(void) { + if (default_pool) { + kobject_put(&default_pool->kobj); + default_pool = NULL; + } + + blktap_page_pool_exit(); + if (blktaps) { kfree(blktaps); blktaps = NULL; } - if (blktap_misc.minor != MISC_DYNAMIC_MINOR) { - misc_deregister(&blktap_misc); - blktap_misc.minor = MISC_DYNAMIC_MINOR; + if (control_device) { + misc_deregister(&blktap_control); + control_device = NULL; } } @@ -228,7 +283,6 @@ blktap_exit(void) blktap_ring_exit(); blktap_sysfs_exit(); blktap_device_exit(); - blktap_request_pool_free(); } static int __init @@ -239,10 +293,6 @@ blktap_init(void) if (!xen_pv_domain()) return -ENODEV; - err = blktap_request_pool_init(); - if (err) - return err; - err = blktap_device_init(); if (err) goto fail; diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c index 3acb8fa..ed95548 100644 --- a/drivers/xen/blktap/device.c +++ b/drivers/xen/blktap/device.c @@ -605,7 +605,7 @@ blktap_device_run_queue(struct blktap *tap) break; } - request = blktap_request_allocate(tap); + request = blktap_request_alloc(tap); if (!request) { tap->stats.st_oo_req++; goto wait; diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c index eee7100..ca12442 100644 --- a/drivers/xen/blktap/request.c +++ b/drivers/xen/blktap/request.c @@ -1,297 +1,400 @@ +#include #include -#include +#include #include +#include +#include #include "blktap.h" -#define MAX_BUCKETS 8 -#define BUCKET_SIZE MAX_PENDING_REQS +/* max pages per shared pool. just to prevent accidental dos. */ +#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST) -#define BLKTAP_POOL_CLOSING 1 +/* default page pool size. when considering to shrink a shared pool, + * note that paused tapdisks may grab a whole lot of pages for a long + * time. */ +#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES) -struct blktap_request_bucket; +/* max number of pages allocatable per request. */ +#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST -struct blktap_request_handle { - int slot; - uint8_t inuse; - struct blktap_request request; - struct blktap_request_bucket *bucket; -}; +/* min request structs per pool. These grow dynamically. */ +#define POOL_MIN_REQS BLK_RING_SIZE -struct blktap_request_bucket { - atomic_t reqs_in_use; - struct blktap_request_handle handles[BUCKET_SIZE]; - struct page **foreign_pages; -}; +static struct kset *pool_set; -struct blktap_request_pool { - spinlock_t lock; - uint8_t status; - struct list_head free_list; - atomic_t reqs_in_use; - wait_queue_head_t wait_queue; - struct blktap_request_bucket *buckets[MAX_BUCKETS]; -}; +#define kobj_to_pool(_kobj) \ + container_of(_kobj, struct blktap_page_pool, kobj) -static struct blktap_request_pool pool; - -static inline struct blktap_request_handle * -blktap_request_to_handle(struct blktap_request *req) -{ - return container_of(req, struct blktap_request_handle, request); -} +static struct kmem_cache *request_cache; +static mempool_t *request_pool; static void -blktap_request_pool_init_request(struct blktap_request *request) +__page_pool_wake(struct blktap_page_pool *pool) { - int i; - - request->usr_idx = -1; - request->nr_pages = 0; - request->status = BLKTAP_REQUEST_FREE; - INIT_LIST_HEAD(&request->free_list); - for (i = 0; i < ARRAY_SIZE(request->handles); i++) { - request->handles[i].user = INVALID_GRANT_HANDLE; - request->handles[i].kernel = INVALID_GRANT_HANDLE; - } + mempool_t *mem = pool->bufs; + + /* + NB. slightly wasteful to always wait for a full segment + set. but this ensures the next disk makes + progress. presently, the repeated request struct + alloc/release cycles would otherwise keep everyone spinning. + */ + + if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES) + wake_up(&pool->wait); } -static int -blktap_request_pool_allocate_bucket(void) +int +blktap_request_get_pages(struct blktap *tap, + struct blktap_request *request, int nr_pages) { - int i, idx; - unsigned long flags; - struct blktap_request *request; - struct blktap_request_handle *handle; - struct blktap_request_bucket *bucket; + struct blktap_page_pool *pool = tap->pool; + mempool_t *mem = pool->bufs; + struct page *page; - bucket = kzalloc(sizeof(struct blktap_request_bucket), GFP_KERNEL); - if (!bucket) - goto fail; + BUG_ON(request->nr_pages != 0); + BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES); - bucket->foreign_pages = alloc_empty_pages_and_pagevec(MMAP_PAGES); - if (!bucket->foreign_pages) - goto fail; + if (mem->curr_nr < nr_pages) + return -ENOMEM; - spin_lock_irqsave(&pool.lock, flags); + /* NB. avoid thundering herds of tapdisks colliding. */ + spin_lock(&pool->lock); - idx = -1; - for (i = 0; i < MAX_BUCKETS; i++) { - if (!pool.buckets[i]) { - idx = i; - pool.buckets[idx] = bucket; - break; - } + if (mem->curr_nr < nr_pages) { + spin_unlock(&pool->lock); + return -ENOMEM; } - if (idx == -1) { - spin_unlock_irqrestore(&pool.lock, flags); - goto fail; + while (request->nr_pages < nr_pages) { + page = mempool_alloc(mem, GFP_NOWAIT); + BUG_ON(!page); + request->pages[request->nr_pages++] = page; } - for (i = 0; i < BUCKET_SIZE; i++) { - handle = bucket->handles + i; - request = &handle->request; + spin_unlock(&pool->lock); - handle->slot = i; - handle->inuse = 0; - handle->bucket = bucket; + return 0; +} + +static void +blktap_request_put_pages(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_page_pool *pool = tap->pool; + struct page *page; - blktap_request_pool_init_request(request); - list_add_tail(&request->free_list, &pool.free_list); + while (request->nr_pages) { + page = request->pages[--request->nr_pages]; + mempool_free(page, pool->bufs); } +} - spin_unlock_irqrestore(&pool.lock, flags); +size_t +blktap_request_debug(struct blktap *tap, char *buf, size_t size) +{ + struct blktap_page_pool *pool = tap->pool; + mempool_t *mem = pool->bufs; + char *s = buf, *end = buf + size; - return 0; + s += snprintf(buf, end - s, + "pool:%s pages:%d free:%d\n", + kobject_name(&pool->kobj), + mem->min_nr, mem->curr_nr); -fail: - if (bucket && bucket->foreign_pages) - free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES); - kfree(bucket); - return -ENOMEM; + return s - buf; } -static void -blktap_request_pool_free_bucket(struct blktap_request_bucket *bucket) +struct blktap_request* +blktap_request_alloc(struct blktap *tap) { - if (!bucket) - return; + struct blktap_request *request; - BTDBG("freeing bucket %p\n", bucket); + request = mempool_alloc(request_pool, GFP_NOWAIT); + if (request) + request->tap = tap; - free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES); - kfree(bucket); + return request; } -struct page * -request_to_page(struct blktap_request *req, int seg) +void +blktap_request_free(struct blktap *tap, + struct blktap_request *request) { - struct blktap_request_handle *handle = blktap_request_to_handle(req); - int idx = handle->slot * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; - return handle->bucket->foreign_pages[idx]; + blktap_request_put_pages(tap, request); + + mempool_free(request, request_pool); + + __page_pool_wake(tap->pool); } -int -blktap_request_pool_shrink(void) +static void +blktap_request_ctor(void *obj) +{ + struct blktap_request *request = obj; + + memset(request, 0, sizeof(*request)); + sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table)); +} + +static int +blktap_page_pool_resize(struct blktap_page_pool *pool, int target) { - int i, err; - unsigned long flags; - struct blktap_request_bucket *bucket; + mempool_t *bufs = pool->bufs; + int err; + + /* NB. mempool asserts min_nr >= 1 */ + target = max(1, target); + + err = mempool_resize(bufs, target, GFP_KERNEL); + if (err) + return err; - err = -EAGAIN; + __page_pool_wake(pool); - spin_lock_irqsave(&pool.lock, flags); + return 0; +} - /* always keep at least one bucket */ - for (i = 1; i < MAX_BUCKETS; i++) { - bucket = pool.buckets[i]; - if (!bucket) - continue; +struct pool_attribute { + struct attribute attr; - if (atomic_read(&bucket->reqs_in_use)) - continue; + ssize_t (*show)(struct blktap_page_pool *pool, + char *buf); - blktap_request_pool_free_bucket(bucket); - pool.buckets[i] = NULL; - err = 0; - break; - } + ssize_t (*store)(struct blktap_page_pool *pool, + const char *buf, size_t count); +}; - spin_unlock_irqrestore(&pool.lock, flags); +#define kattr_to_pool_attr(_kattr) \ + container_of(_kattr, struct pool_attribute, attr) - return err; +static ssize_t +blktap_page_pool_show_size(struct blktap_page_pool *pool, + char *buf) +{ + mempool_t *mem = pool->bufs; + return sprintf(buf, "%d", mem->min_nr); } -int -blktap_request_pool_grow(void) +static ssize_t +blktap_page_pool_store_size(struct blktap_page_pool *pool, + const char *buf, size_t size) { - return blktap_request_pool_allocate_bucket(); + int target; + + /* + * NB. target fixup to avoid undesired results. less than a + * full segment set can wedge the disk. much more than a + * couple times the physical queue depth is rarely useful. + */ + + target = simple_strtoul(buf, NULL, 0); + target = max(POOL_MAX_REQUEST_PAGES, target); + target = min(target, POOL_MAX_PAGES); + + return blktap_page_pool_resize(pool, target) ? : size; } -struct blktap_request * -blktap_request_allocate(struct blktap *tap) +static struct pool_attribute blktap_page_pool_attr_size = + __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, + blktap_page_pool_show_size, + blktap_page_pool_store_size); + +static ssize_t +blktap_page_pool_show_free(struct blktap_page_pool *pool, + char *buf) { - int i; - uint16_t usr_idx; - unsigned long flags; - struct blktap_request *request; + mempool_t *mem = pool->bufs; + return sprintf(buf, "%d", mem->curr_nr); +} - usr_idx = -1; - request = NULL; +static struct pool_attribute blktap_page_pool_attr_free = + __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH, + blktap_page_pool_show_free, + NULL); - spin_lock_irqsave(&pool.lock, flags); +static struct attribute *blktap_page_pool_attrs[] = { + &blktap_page_pool_attr_size.attr, + &blktap_page_pool_attr_free.attr, + NULL, +}; - if (pool.status == BLKTAP_POOL_CLOSING) - goto out; +static inline struct kobject* +__blktap_kset_find_obj(struct kset *kset, const char *name) +{ + struct kobject *k; + struct kobject *ret = NULL; - for (i = 0; i < ARRAY_SIZE(tap->pending_requests); i++) - if (!tap->pending_requests[i]) { - usr_idx = i; + spin_lock(&kset->list_lock); + list_for_each_entry(k, &kset->list, entry) { + if (kobject_name(k) && !strcmp(kobject_name(k), name)) { + ret = kobject_get(k); break; } - - if (usr_idx == (uint16_t)-1) - goto out; - - if (!list_empty(&pool.free_list)) { - request = list_entry(pool.free_list.next, - struct blktap_request, free_list); - list_del(&request->free_list); } + spin_unlock(&kset->list_lock); + return ret; +} - if (request) { - struct blktap_request_handle *handle; +static ssize_t +blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr, + char *buf) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + struct pool_attribute *attr = kattr_to_pool_attr(kattr); - atomic_inc(&pool.reqs_in_use); + if (attr->show) + return attr->show(pool, buf); - handle = blktap_request_to_handle(request); - atomic_inc(&handle->bucket->reqs_in_use); - handle->inuse = 1; + return -EIO; +} - request->usr_idx = usr_idx; +static ssize_t +blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr, + const char *buf, size_t size) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + struct pool_attribute *attr = kattr_to_pool_attr(kattr); - tap->pending_requests[usr_idx] = request; - tap->pending_cnt++; - } + if (attr->show) + return attr->store(pool, buf, size); -out: - spin_unlock_irqrestore(&pool.lock, flags); - return request; + return -EIO; } -void -blktap_request_free(struct blktap *tap, struct blktap_request *request) +static struct sysfs_ops blktap_page_pool_sysfs_ops = { + .show = blktap_page_pool_show_attr, + .store = blktap_page_pool_store_attr, +}; + +static void +blktap_page_pool_release(struct kobject *kobj) { - int free; - unsigned long flags; - struct blktap_request_handle *handle; + struct blktap_page_pool *pool = kobj_to_pool(kobj); + mempool_destroy(pool->bufs); + kfree(pool); +} - BUG_ON(request->usr_idx >= ARRAY_SIZE(tap->pending_requests)); - handle = blktap_request_to_handle(request); +struct kobj_type blktap_page_pool_ktype = { + .release = blktap_page_pool_release, + .sysfs_ops = &blktap_page_pool_sysfs_ops, + .default_attrs = blktap_page_pool_attrs, +}; + +static void* +__mempool_page_alloc(gfp_t gfp_mask, void *pool_data) +{ + struct page *page; - spin_lock_irqsave(&pool.lock, flags); + if (!(gfp_mask & __GFP_WAIT)) + return NULL; - handle->inuse = 0; - tap->pending_requests[request->usr_idx] = NULL; - blktap_request_pool_init_request(request); - list_add(&request->free_list, &pool.free_list); - atomic_dec(&handle->bucket->reqs_in_use); - free = atomic_dec_and_test(&pool.reqs_in_use); - tap->pending_cnt--; + page = alloc_page(gfp_mask); + if (page) + SetPageReserved(page); - spin_unlock_irqrestore(&pool.lock, flags); + return page; +} - if (free) - wake_up(&pool.wait_queue); +static void +__mempool_page_free(void *element, void *pool_data) +{ + struct page *page = element; - blktap_ring_kick_all(); + ClearPageReserved(page); + put_page(page); } -void -blktap_request_pool_free(void) +static struct kobject* +blktap_page_pool_create(const char *name, int nr_pages) { - int i; - unsigned long flags; + struct blktap_page_pool *pool; + int err; - spin_lock_irqsave(&pool.lock, flags); + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + goto fail; - pool.status = BLKTAP_POOL_CLOSING; - while (atomic_read(&pool.reqs_in_use)) { - spin_unlock_irqrestore(&pool.lock, flags); - wait_event(pool.wait_queue, !atomic_read(&pool.reqs_in_use)); - spin_lock_irqsave(&pool.lock, flags); - } + spin_lock_init(&pool->lock); + init_waitqueue_head(&pool->wait); - for (i = 0; i < MAX_BUCKETS; i++) { - blktap_request_pool_free_bucket(pool.buckets[i]); - pool.buckets[i] = NULL; - } + pool->bufs = mempool_create(nr_pages, + __mempool_page_alloc, __mempool_page_free, + pool); + if (!pool->bufs) + goto fail_pool; + + kobject_init(&pool->kobj, &blktap_page_pool_ktype); + pool->kobj.kset = pool_set; + err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name); + if (err) + goto fail_bufs; + + return &pool->kobj; - spin_unlock_irqrestore(&pool.lock, flags); + kobject_del(&pool->kobj); +fail_bufs: + mempool_destroy(pool->bufs); +fail_pool: + kfree(pool); +fail: + return NULL; } -int __init -blktap_request_pool_init(void) +struct blktap_page_pool* +blktap_page_pool_get(const char *name) { - int i, err; + struct kobject *kobj; + + kobj = __blktap_kset_find_obj(pool_set, name); + if (!kobj) + kobj = blktap_page_pool_create(name, + POOL_DEFAULT_PAGES); + if (!kobj) + return ERR_PTR(-ENOMEM); - memset(&pool, 0, sizeof(pool)); + return kobj_to_pool(kobj); +} + +int __init +blktap_page_pool_init(struct kobject *parent) +{ + request_cache = + kmem_cache_create("blktap-request", + sizeof(struct blktap_request), 0, + 0, blktap_request_ctor); + if (!request_cache) + return -ENOMEM; + + request_pool = + mempool_create_slab_pool(POOL_MIN_REQS, request_cache); + if (!request_pool) + return -ENOMEM; + + pool_set = kset_create_and_add("pools", NULL, parent); + if (!pool_set) + return -ENOMEM; - spin_lock_init(&pool.lock); - INIT_LIST_HEAD(&pool.free_list); - atomic_set(&pool.reqs_in_use, 0); - init_waitqueue_head(&pool.wait_queue); + return 0; +} - for (i = 0; i < 2; i++) { - err = blktap_request_pool_allocate_bucket(); - if (err) - goto fail; +void +blktap_page_pool_exit(void) +{ + if (pool_set) { + BUG_ON(!list_empty(&pool_set->list)); + kset_unregister(pool_set); + pool_set = NULL; } - return 0; + if (request_pool) { + mempool_destroy(request_pool); + request_pool = NULL; + } -fail: - blktap_request_pool_free(); - return err; + if (request_cache) { + kmem_cache_destroy(request_cache); + request_cache = NULL; + } } diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c index 057e97f..a72a1b3 100644 --- a/drivers/xen/blktap/ring.c +++ b/drivers/xen/blktap/ring.c @@ -17,8 +17,6 @@ int blktap_ring_major; static struct cdev blktap_ring_cdev; -static DECLARE_WAIT_QUEUE_HEAD(blktap_poll_wait); - static inline struct blktap * vma_to_blktap(struct vm_area_struct *vma) { @@ -409,7 +407,7 @@ static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) struct blktap_ring *ring = &tap->ring; int work = 0; - poll_wait(filp, &blktap_poll_wait, wait); + poll_wait(filp, &tap->pool->wait, wait); poll_wait(filp, &ring->poll_wait, wait); down_read(¤t->mm->mmap_sem); @@ -440,12 +438,6 @@ blktap_ring_kick_user(struct blktap *tap) wake_up(&tap->ring.poll_wait); } -void -blktap_ring_kick_all(void) -{ - wake_up(&blktap_poll_wait); -} - int blktap_ring_destroy(struct blktap *tap) { diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c index e573549..7bbfea8 100644 --- a/drivers/xen/blktap/sysfs.c +++ b/drivers/xen/blktap/sysfs.c @@ -104,6 +104,8 @@ blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, cha s += blktap_control_debug(tap, s, end - s); + s += blktap_request_debug(tap, s, end - s); + s += blktap_device_debug(tap, s, end - s); s += blktap_ring_debug(tap, s, end - s); @@ -129,6 +131,38 @@ blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char * } static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL); +static ssize_t +blktap_sysfs_show_pool(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct blktap *tap = dev_get_drvdata(dev); + return sprintf(buf, "%s", kobject_name(&tap->pool->kobj)); +} + +static ssize_t +blktap_sysfs_store_pool(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap *tap = dev_get_drvdata(dev); + struct blktap_page_pool *pool, *tmp = tap->pool; + + if (tap->device.gd) + return -EBUSY; + + pool = blktap_page_pool_get(buf); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + tap->pool = pool; + kobject_put(&tmp->kobj); + + return size; +} +DEVICE_ATTR(pool, S_IRUSR|S_IWUSR, + blktap_sysfs_show_pool, blktap_sysfs_store_pool); + int blktap_sysfs_create(struct blktap *tap) { @@ -151,6 +185,8 @@ blktap_sysfs_create(struct blktap *tap) if (!err) err = device_create_file(dev, &dev_attr_task); if (!err) + err = device_create_file(dev, &dev_attr_pool); + if (!err) ring->dev = dev; else device_unregister(dev); -- 1.7.0.4