linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] md: add new interface 'mk_rq' in target_type
@ 2012-09-03  2:12 Robin Dong
  2012-09-03  2:12 ` [PATCH 2/2] md: modify dm_io() so it could return bios instead of submitting it Robin Dong
  0 siblings, 1 reply; 4+ messages in thread
From: Robin Dong @ 2012-09-03  2:12 UTC (permalink / raw)
  To: dm-devel, linux-kernel; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

We are now trying to modify flashcache(https://github.com/facebook/flashcache)
to make it request based so that
we can let cfq io-controller control the bandwidth between different
io cgroups.

A search in the dm directory tells me that only multipath is a request
based dm target and its functionality
is very simple and map_rq() is used to map the request to different underlying devices.
We can't work in this way because:

1. the request which processed by map_rq() need to be issued to
	different lower devices (disk device and cache device, in flashcache), therefore the request
	can't be totally remapped by simply changing its queue and returning DM_MAPIO_REMAPPED in map_rq() like multipath_map()
2. to submit bios drectly in map_rq() (by return DM_MAPIO_SUBMITTED) will cause BUG_ON(!irqs_disabled())
	in dm_request_fn() because the submit_bio()->generic_make_request()->blk_queue_bio() will definitly call spin_unlock_irq to enable the irqs


As above,the interface map_rq() provided by devcie-mapper framework
is not enough for an autonomous target, like flashcache.

We propose to add a new
mk_rq interface so that we can make the requests
by ourselves.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 drivers/md/dm.c               |   10 ++++++++++
 include/linux/device-mapper.h |    3 +++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4e09b6f..3ae67de 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1459,11 +1459,21 @@ static int dm_request_based(struct mapped_device *md)
 static void dm_request(struct request_queue *q, struct bio *bio)
 {
 	struct mapped_device *md = q->queuedata;
+	struct dm_table *map = dm_get_live_table(md);
+	struct dm_target *ti = dm_table_find_target(map, bio->bi_sector);
+
+	if (ti->type->mk_rq) {
+		ti->type->mk_rq(ti, q, bio);
+		goto out;
+	}
 
 	if (dm_request_based(md))
 		blk_queue_bio(q, bio);
 	else
 		_dm_request(q, bio);
+
+out:
+	dm_table_put(map);
 }
 
 void dm_dispatch_request(struct request *rq)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 38d27a1..2386389 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -50,6 +50,8 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
 			  union map_info *map_context);
 typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
 				  union map_info *map_context);
+typedef int (*dm_make_request_fn) (struct dm_target *ti,
+				struct request_queue *q, struct bio *bio);
 
 /*
  * Returns:
@@ -136,6 +138,7 @@ struct target_type {
 	dm_dtr_fn dtr;
 	dm_map_fn map;
 	dm_map_request_fn map_rq;
+	dm_make_request_fn mk_rq;
 	dm_endio_fn end_io;
 	dm_request_endio_fn rq_end_io;
 	dm_presuspend_fn presuspend;
-- 
1.7.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/2] md: modify dm_io() so it could return bios instead of submitting it
  2012-09-03  2:12 [PATCH 1/2] md: add new interface 'mk_rq' in target_type Robin Dong
@ 2012-09-03  2:12 ` Robin Dong
  0 siblings, 0 replies; 4+ messages in thread
From: Robin Dong @ 2012-09-03  2:12 UTC (permalink / raw)
  To: dm-devel, linux-kernel; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

When trying to modify flashcache to request based (current it's bio based), we need
to make request from bios by ourselves, but dm_io() will submit these bios directly,
so we propose to modify the dm_io() to return bios instead of submiting it.

This could also improve the flexibility of dm_io().

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 drivers/md/dm-bufio.c           |    2 +
 drivers/md/dm-io.c              |   58 +++++++++++++++++++++++----------------
 drivers/md/dm-kcopyd.c          |    1 +
 drivers/md/dm-log.c             |    1 +
 drivers/md/dm-raid1.c           |    3 ++
 drivers/md/dm-snap-persistent.c |    1 +
 include/linux/dm-io.h           |    3 ++
 7 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index cc06a1e..f5867b9 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -487,6 +487,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
 		.notify.fn = dmio_complete,
 		.notify.context = b,
 		.client = b->c->dm_io,
+		.submit_bio = 1,
 	};
 	struct dm_io_region region = {
 		.bdev = b->c->bdev,
@@ -1200,6 +1201,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = c->dm_io,
+		.submit_bio = 1,
 	};
 	struct dm_io_region io_reg = {
 		.bdev = c->bdev,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ea5dd28..f235182 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -287,8 +287,8 @@ static void km_dp_init(struct dpages *dp, void *data)
 /*-----------------------------------------------------------------
  * IO routines that accept a list of pages.
  *---------------------------------------------------------------*/
-static void do_region(int rw, unsigned region, struct dm_io_region *where,
-		      struct dpages *dp, struct io *io)
+static void do_region(struct dm_io_request *io_req, unsigned region,
+		struct dm_io_region *where, struct dpages *dp, struct io *io)
 {
 	struct bio *bio;
 	struct page *page;
@@ -298,6 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 	sector_t remaining = where->count;
 	struct request_queue *q = bdev_get_queue(where->bdev);
 	sector_t discard_sectors;
+	int rw = io_req->bi_rw;
 
 	/*
 	 * where->count may be zero if rw holds a flush and we need to
@@ -339,15 +340,26 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		}
 
 		atomic_inc(&io->count);
-		submit_bio(rw, bio);
+		if (io_req->submit_bio)
+			submit_bio(rw, bio);
+		else {
+			bio->bi_rw |= rw;
+			if (io_req->start) {
+				io_req->end->bi_next = bio;
+				io_req->end = bio;
+			} else
+				io_req->start = io_req->end = bio;
+			bio->bi_next = NULL;
+		}
 	} while (remaining);
 }
 
-static void dispatch_io(int rw, unsigned int num_regions,
+static void dispatch_io(struct dm_io_request *io_req, unsigned int num_regions,
 			struct dm_io_region *where, struct dpages *dp,
 			struct io *io, int sync)
 {
 	int i;
+	int rw = io_req->bi_rw;
 	struct dpages old_pages = *dp;
 
 	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
@@ -362,7 +374,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	for (i = 0; i < num_regions; i++) {
 		*dp = old_pages;
 		if (where[i].count || (rw & REQ_FLUSH))
-			do_region(rw, i, where + i, dp, io);
+			do_region(io_req, i, where + i, dp, io);
 	}
 
 	/*
@@ -372,8 +384,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	dec_count(io, 0, 0);
 }
 
-static int sync_io(struct dm_io_client *client, unsigned int num_regions,
-		   struct dm_io_region *where, int rw, struct dpages *dp,
+static int sync_io(struct dm_io_request *io_req,  unsigned int num_regions,
+		   struct dm_io_region *where, struct dpages *dp,
 		   unsigned long *error_bits)
 {
 	/*
@@ -385,7 +397,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
 	struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
 
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+	if (num_regions > 1 && (io_req->bi_rw & RW_MASK) != WRITE) {
 		WARN_ON(1);
 		return -EIO;
 	}
@@ -393,12 +405,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
 	io->sleeper = current;
-	io->client = client;
+	io->client = io_req->client;
 
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
-	dispatch_io(rw, num_regions, where, dp, io, 1);
+	dispatch_io(io_req, num_regions, where, dp, io, 1);
 
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
@@ -416,30 +428,29 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	return io->error_bits ? -EIO : 0;
 }
 
-static int async_io(struct dm_io_client *client, unsigned int num_regions,
-		    struct dm_io_region *where, int rw, struct dpages *dp,
-		    io_notify_fn fn, void *context)
+static int async_io(struct dm_io_request *io_req, unsigned int num_regions,
+		    struct dm_io_region *where, struct dpages *dp)
 {
 	struct io *io;
 
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+	if (num_regions > 1 && (io_req->bi_rw & RW_MASK) != WRITE) {
 		WARN_ON(1);
-		fn(1, context);
+		io_req->notify.fn(1, io_req->notify.context);
 		return -EIO;
 	}
 
-	io = mempool_alloc(client->pool, GFP_NOIO);
+	io = mempool_alloc(io_req->client->pool, GFP_NOIO);
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
 	io->sleeper = NULL;
-	io->client = client;
-	io->callback = fn;
-	io->context = context;
+	io->client = io_req->client;
+	io->callback = io_req->notify.fn;
+	io->context = io_req->notify.context;
 
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
-	dispatch_io(rw, num_regions, where, dp, io, 0);
+	dispatch_io(io_req, num_regions, where, dp, io, 0);
 	return 0;
 }
 
@@ -499,11 +510,10 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 		return r;
 
 	if (!io_req->notify.fn)
-		return sync_io(io_req->client, num_regions, where,
-			       io_req->bi_rw, &dp, sync_error_bits);
+		return sync_io(io_req, num_regions, where,
+				&dp, sync_error_bits);
 
-	return async_io(io_req->client, num_regions, where, io_req->bi_rw,
-			&dp, io_req->notify.fn, io_req->notify.context);
+	return async_io(io_req, num_regions, where, &dp);
 }
 EXPORT_SYMBOL(dm_io);
 
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index bed444c..6a8ecc9 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -387,6 +387,7 @@ static int run_io_job(struct kcopyd_job *job)
 		.notify.fn = complete_io,
 		.notify.context = job,
 		.client = job->kc->io_client,
+		.submit_bio = 1,
 	};
 
 	if (job->rw == READ)
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 627d191..8425e84 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -463,6 +463,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 			kfree(lc);
 			return r;
 		}
+		lc->io_req.submit_bio = 1;
 
 		lc->disk_header = vmalloc(buf_size);
 		if (!lc->disk_header) {
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index bc5ddba..338d726 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -261,6 +261,7 @@ static int mirror_flush(struct dm_target *ti)
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = ms->io_client,
+		.submit_bio = 1,
 	};
 
 	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
@@ -530,6 +531,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio)
 		.notify.fn = read_callback,
 		.notify.context = bio,
 		.client = m->ms->io_client,
+		.submit_bio = 1,
 	};
 
 	map_region(&io, m, bio);
@@ -633,6 +635,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
 		.notify.fn = write_callback,
 		.notify.context = bio,
 		.client = ms->io_client,
+		.submit_bio = 1,
 	};
 
 	if (bio->bi_rw & REQ_DISCARD) {
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 3ac4156..0600e1d 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -241,6 +241,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 		.mem.ptr.vma = area,
 		.client = ps->io_client,
 		.notify.fn = NULL,
+		.submit_bio = 1,
 	};
 	struct mdata_req req;
 
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index f4b0aa3..45f576b 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -61,6 +61,9 @@ struct dm_io_request {
 	struct dm_io_memory mem;	/* Memory to use for io */
 	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
 	struct dm_io_client *client;	/* Client memory handler */
+	int submit_bio;
+	struct bio *start;
+	struct bio *end;
 };
 
 /*
-- 
1.7.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] md: add new interface 'mk_rq' in target_type
  2012-09-20  3:18 [PATCH 1/2] md: add new interface 'mk_rq' in target_type Robin Dong
@ 2012-11-14  3:08 ` Robin Dong
  0 siblings, 0 replies; 4+ messages in thread
From: Robin Dong @ 2012-11-14  3:08 UTC (permalink / raw)
  To: linux-kernel, dm-devel

ping

2012/9/20 Robin Dong <robin.k.dong@gmail.com>:
> From: Robin Dong <sanbai@taobao.com>
>
> We are now trying to modify flashcache(https://github.com/facebook/flashcache)
> to make it request based so that
> we can let cfq io-controller control the bandwidth between different
> io cgroups.
>
> A search in the dm directory tells me that only multipath is a request
> based dm target and its functionality
> is very simple and map_rq() is used to map the request to different underlying devices.
> We can't work in this way because:
>
> 1. the request which processed by map_rq() need to be issued to
>         different lower devices (disk device and cache device, in flashcache), therefore the request
>         can't be totally remapped by simply changing its queue and returning DM_MAPIO_REMAPPED in map_rq() like multipath_map()
> 2. to submit bios drectly in map_rq() (by return DM_MAPIO_SUBMITTED) will cause BUG_ON(!irqs_disabled())
>         in dm_request_fn() because the submit_bio()->generic_make_request()->blk_queue_bio() will definitly call spin_unlock_irq to enable the irqs
>
> As above,the interface map_rq() provided by devcie-mapper framework
> is not enough for an autonomous target, like flashcache.
>
> We propose to add a new
> mk_rq interface so that we can make the requests
> by ourselves.
>
> Signed-off-by: Robin Dong <sanbai@taobao.com>
> ---
>  drivers/md/dm-io.c    |   58 ++++++++++++++++++++++++++++--------------------
>  drivers/md/dm-log.c   |    1 +
>  include/linux/dm-io.h |    3 ++
>  3 files changed, 38 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
> index ea5dd28..f767792 100644
> --- a/drivers/md/dm-io.c
> +++ b/drivers/md/dm-io.c
> @@ -287,8 +287,8 @@ static void km_dp_init(struct dpages *dp, void *data)
>  /*-----------------------------------------------------------------
>   * IO routines that accept a list of pages.
>   *---------------------------------------------------------------*/
> -static void do_region(int rw, unsigned region, struct dm_io_region *where,
> -                     struct dpages *dp, struct io *io)
> +static void do_region(struct dm_io_request *io_req, unsigned region,
> +               struct dm_io_region *where, struct dpages *dp, struct io *io)
>  {
>         struct bio *bio;
>         struct page *page;
> @@ -298,6 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
>         sector_t remaining = where->count;
>         struct request_queue *q = bdev_get_queue(where->bdev);
>         sector_t discard_sectors;
> +       int rw = io_req->bi_rw;
>
>         /*
>          * where->count may be zero if rw holds a flush and we need to
> @@ -339,15 +340,26 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
>                 }
>
>                 atomic_inc(&io->count);
> -               submit_bio(rw, bio);
> +               if (!io_req->only_create_bio)
> +                       submit_bio(rw, bio);
> +               else {
> +                       bio->bi_rw |= rw;
> +                       if (io_req->start) {
> +                               io_req->end->bi_next = bio;
> +                               io_req->end = bio;
> +                       } else
> +                               io_req->start = io_req->end = bio;
> +                       bio->bi_next = NULL;
> +               }
>         } while (remaining);
>  }
>
> -static void dispatch_io(int rw, unsigned int num_regions,
> +static void dispatch_io(struct dm_io_request *io_req, unsigned int num_regions,
>                         struct dm_io_region *where, struct dpages *dp,
>                         struct io *io, int sync)
>  {
>         int i;
> +       int rw = io_req->bi_rw;
>         struct dpages old_pages = *dp;
>
>         BUG_ON(num_regions > DM_IO_MAX_REGIONS);
> @@ -362,7 +374,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
>         for (i = 0; i < num_regions; i++) {
>                 *dp = old_pages;
>                 if (where[i].count || (rw & REQ_FLUSH))
> -                       do_region(rw, i, where + i, dp, io);
> +                       do_region(io_req, i, where + i, dp, io);
>         }
>
>         /*
> @@ -372,8 +384,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
>         dec_count(io, 0, 0);
>  }
>
> -static int sync_io(struct dm_io_client *client, unsigned int num_regions,
> -                  struct dm_io_region *where, int rw, struct dpages *dp,
> +static int sync_io(struct dm_io_request *io_req,  unsigned int num_regions,
> +                  struct dm_io_region *where, struct dpages *dp,
>                    unsigned long *error_bits)
>  {
>         /*
> @@ -385,7 +397,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
>         volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
>         struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
>
> -       if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
> +       if (num_regions > 1 && (io_req->bi_rw & RW_MASK) != WRITE) {
>                 WARN_ON(1);
>                 return -EIO;
>         }
> @@ -393,12 +405,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
>         io->error_bits = 0;
>         atomic_set(&io->count, 1); /* see dispatch_io() */
>         io->sleeper = current;
> -       io->client = client;
> +       io->client = io_req->client;
>
>         io->vma_invalidate_address = dp->vma_invalidate_address;
>         io->vma_invalidate_size = dp->vma_invalidate_size;
>
> -       dispatch_io(rw, num_regions, where, dp, io, 1);
> +       dispatch_io(io_req, num_regions, where, dp, io, 1);
>
>         while (1) {
>                 set_current_state(TASK_UNINTERRUPTIBLE);
> @@ -416,30 +428,29 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
>         return io->error_bits ? -EIO : 0;
>  }
>
> -static int async_io(struct dm_io_client *client, unsigned int num_regions,
> -                   struct dm_io_region *where, int rw, struct dpages *dp,
> -                   io_notify_fn fn, void *context)
> +static int async_io(struct dm_io_request *io_req, unsigned int num_regions,
> +               struct dm_io_region *where, struct dpages *dp)
>  {
>         struct io *io;
>
> -       if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
> +       if (num_regions > 1 && (io_req->bi_rw & RW_MASK) != WRITE) {
>                 WARN_ON(1);
> -               fn(1, context);
> +               io_req->notify.fn(1, io_req->notify.context);
>                 return -EIO;
>         }
>
> -       io = mempool_alloc(client->pool, GFP_NOIO);
> +       io = mempool_alloc(io_req->client->pool, GFP_NOIO);
>         io->error_bits = 0;
>         atomic_set(&io->count, 1); /* see dispatch_io() */
>         io->sleeper = NULL;
> -       io->client = client;
> -       io->callback = fn;
> -       io->context = context;
> +       io->client = io_req->client;
> +       io->callback = io_req->notify.fn;
> +       io->context = io_req->notify.context;
>
>         io->vma_invalidate_address = dp->vma_invalidate_address;
>         io->vma_invalidate_size = dp->vma_invalidate_size;
>
> -       dispatch_io(rw, num_regions, where, dp, io, 0);
> +       dispatch_io(io_req, num_regions, where, dp, io, 0);
>         return 0;
>  }
>
> @@ -499,11 +510,10 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
>                 return r;
>
>         if (!io_req->notify.fn)
> -               return sync_io(io_req->client, num_regions, where,
> -                              io_req->bi_rw, &dp, sync_error_bits);
> +               return sync_io(io_req, num_regions, where,
> +                               &dp, sync_error_bits);
>
> -       return async_io(io_req->client, num_regions, where, io_req->bi_rw,
> -                       &dp, io_req->notify.fn, io_req->notify.context);
> +       return async_io(io_req, num_regions, where, &dp);
>  }
>  EXPORT_SYMBOL(dm_io);
>
> diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
> index 627d191..3bf065a 100644
> --- a/drivers/md/dm-log.c
> +++ b/drivers/md/dm-log.c
> @@ -463,6 +463,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
>                         kfree(lc);
>                         return r;
>                 }
> +               lc->io_req.only_create_bio = 0;
>
>                 lc->disk_header = vmalloc(buf_size);
>                 if (!lc->disk_header) {
> diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
> index f4b0aa3..8782163 100644
> --- a/include/linux/dm-io.h
> +++ b/include/linux/dm-io.h
> @@ -61,6 +61,9 @@ struct dm_io_request {
>         struct dm_io_memory mem;        /* Memory to use for io */
>         struct dm_io_notify notify;     /* Synchronous if notify.fn is NULL */
>         struct dm_io_client *client;    /* Client memory handler */
> +       int only_create_bio;
> +       struct bio *start;
> +       struct bio *end;
>  };
>
>  /*
> --
> 1.7.1
>



-- 
--
Best Regard
Robin Dong

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] md: add new interface 'mk_rq' in target_type
@ 2012-09-20  3:18 Robin Dong
  2012-11-14  3:08 ` Robin Dong
  0 siblings, 1 reply; 4+ messages in thread
From: Robin Dong @ 2012-09-20  3:18 UTC (permalink / raw)
  To: linux-kernel, dm-devel; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

We are now trying to modify flashcache(https://github.com/facebook/flashcache)
to make it request based so that
we can let cfq io-controller control the bandwidth between different
io cgroups.

A search in the dm directory tells me that only multipath is a request
based dm target and its functionality
is very simple and map_rq() is used to map the request to different underlying devices.
We can't work in this way because:

1. the request which processed by map_rq() need to be issued to
	different lower devices (disk device and cache device, in flashcache), therefore the request
	can't be totally remapped by simply changing its queue and returning DM_MAPIO_REMAPPED in map_rq() like multipath_map()
2. to submit bios drectly in map_rq() (by return DM_MAPIO_SUBMITTED) will cause BUG_ON(!irqs_disabled())
	in dm_request_fn() because the submit_bio()->generic_make_request()->blk_queue_bio() will definitly call spin_unlock_irq to enable the irqs

As above,the interface map_rq() provided by devcie-mapper framework
is not enough for an autonomous target, like flashcache.

We propose to add a new
mk_rq interface so that we can make the requests
by ourselves.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 drivers/md/dm-io.c    |   58 ++++++++++++++++++++++++++++--------------------
 drivers/md/dm-log.c   |    1 +
 include/linux/dm-io.h |    3 ++
 3 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ea5dd28..f767792 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -287,8 +287,8 @@ static void km_dp_init(struct dpages *dp, void *data)
 /*-----------------------------------------------------------------
  * IO routines that accept a list of pages.
  *---------------------------------------------------------------*/
-static void do_region(int rw, unsigned region, struct dm_io_region *where,
-		      struct dpages *dp, struct io *io)
+static void do_region(struct dm_io_request *io_req, unsigned region,
+		struct dm_io_region *where, struct dpages *dp, struct io *io)
 {
 	struct bio *bio;
 	struct page *page;
@@ -298,6 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 	sector_t remaining = where->count;
 	struct request_queue *q = bdev_get_queue(where->bdev);
 	sector_t discard_sectors;
+	int rw = io_req->bi_rw;
 
 	/*
 	 * where->count may be zero if rw holds a flush and we need to
@@ -339,15 +340,26 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		}
 
 		atomic_inc(&io->count);
-		submit_bio(rw, bio);
+		if (!io_req->only_create_bio)
+			submit_bio(rw, bio);
+		else {
+			bio->bi_rw |= rw;
+			if (io_req->start) {
+				io_req->end->bi_next = bio;
+				io_req->end = bio;
+			} else
+				io_req->start = io_req->end = bio;
+			bio->bi_next = NULL;
+		}
 	} while (remaining);
 }
 
-static void dispatch_io(int rw, unsigned int num_regions,
+static void dispatch_io(struct dm_io_request *io_req, unsigned int num_regions,
 			struct dm_io_region *where, struct dpages *dp,
 			struct io *io, int sync)
 {
 	int i;
+	int rw = io_req->bi_rw;
 	struct dpages old_pages = *dp;
 
 	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
@@ -362,7 +374,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	for (i = 0; i < num_regions; i++) {
 		*dp = old_pages;
 		if (where[i].count || (rw & REQ_FLUSH))
-			do_region(rw, i, where + i, dp, io);
+			do_region(io_req, i, where + i, dp, io);
 	}
 
 	/*
@@ -372,8 +384,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	dec_count(io, 0, 0);
 }
 
-static int sync_io(struct dm_io_client *client, unsigned int num_regions,
-		   struct dm_io_region *where, int rw, struct dpages *dp,
+static int sync_io(struct dm_io_request *io_req,  unsigned int num_regions,
+		   struct dm_io_region *where, struct dpages *dp,
 		   unsigned long *error_bits)
 {
 	/*
@@ -385,7 +397,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
 	struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
 
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+	if (num_regions > 1 && (io_req->bi_rw & RW_MASK) != WRITE) {
 		WARN_ON(1);
 		return -EIO;
 	}
@@ -393,12 +405,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
 	io->sleeper = current;
-	io->client = client;
+	io->client = io_req->client;
 
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
-	dispatch_io(rw, num_regions, where, dp, io, 1);
+	dispatch_io(io_req, num_regions, where, dp, io, 1);
 
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
@@ -416,30 +428,29 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	return io->error_bits ? -EIO : 0;
 }
 
-static int async_io(struct dm_io_client *client, unsigned int num_regions,
-		    struct dm_io_region *where, int rw, struct dpages *dp,
-		    io_notify_fn fn, void *context)
+static int async_io(struct dm_io_request *io_req, unsigned int num_regions,
+		struct dm_io_region *where, struct dpages *dp)
 {
 	struct io *io;
 
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+	if (num_regions > 1 && (io_req->bi_rw & RW_MASK) != WRITE) {
 		WARN_ON(1);
-		fn(1, context);
+		io_req->notify.fn(1, io_req->notify.context);
 		return -EIO;
 	}
 
-	io = mempool_alloc(client->pool, GFP_NOIO);
+	io = mempool_alloc(io_req->client->pool, GFP_NOIO);
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
 	io->sleeper = NULL;
-	io->client = client;
-	io->callback = fn;
-	io->context = context;
+	io->client = io_req->client;
+	io->callback = io_req->notify.fn;
+	io->context = io_req->notify.context;
 
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
-	dispatch_io(rw, num_regions, where, dp, io, 0);
+	dispatch_io(io_req, num_regions, where, dp, io, 0);
 	return 0;
 }
 
@@ -499,11 +510,10 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 		return r;
 
 	if (!io_req->notify.fn)
-		return sync_io(io_req->client, num_regions, where,
-			       io_req->bi_rw, &dp, sync_error_bits);
+		return sync_io(io_req, num_regions, where,
+				&dp, sync_error_bits);
 
-	return async_io(io_req->client, num_regions, where, io_req->bi_rw,
-			&dp, io_req->notify.fn, io_req->notify.context);
+	return async_io(io_req, num_regions, where, &dp);
 }
 EXPORT_SYMBOL(dm_io);
 
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 627d191..3bf065a 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -463,6 +463,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 			kfree(lc);
 			return r;
 		}
+		lc->io_req.only_create_bio = 0;
 
 		lc->disk_header = vmalloc(buf_size);
 		if (!lc->disk_header) {
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index f4b0aa3..8782163 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -61,6 +61,9 @@ struct dm_io_request {
 	struct dm_io_memory mem;	/* Memory to use for io */
 	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
 	struct dm_io_client *client;	/* Client memory handler */
+	int only_create_bio;
+	struct bio *start;
+	struct bio *end;
 };
 
 /*
-- 
1.7.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-11-14  3:08 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-03  2:12 [PATCH 1/2] md: add new interface 'mk_rq' in target_type Robin Dong
2012-09-03  2:12 ` [PATCH 2/2] md: modify dm_io() so it could return bios instead of submitting it Robin Dong
2012-09-20  3:18 [PATCH 1/2] md: add new interface 'mk_rq' in target_type Robin Dong
2012-11-14  3:08 ` Robin Dong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).