All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Xu <xu.an@cloudex.cn>
To: xen-devel@lists.xensource.com, xen-users@lists.xensource.com
Subject: VM disk I/O limit patch
Date: Tue, 21 Jun 2011 16:29:35 +0800	[thread overview]
Message-ID: <20110621162935.F4A1.3A8D29D5@cloudex.cn> (raw)

[-- Attachment #1: Type: text/plain, Size: 13311 bytes --]

Hi all,

I add a blkback QoS patch.
You can config(dynamic/static) different I/O speed for different VM disk
by this patch.

----------------------------------------------------------------------------

diff -urNp blkback/blkback.c blkback-qos/blkback.c
--- blkback/blkback.c	2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c	2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/blkback (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
  * pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
 static int do_block_io_op(blkif_t *blkif);
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req);
+				 pending_req_t *pending_req,
+				 int *done_nr_sects);
 static void make_response(blkif_t *blkif, u64 id,
 			  unsigned short op, int st);
 
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
 	blkif->st_pk_req = 0;
 }
 
+static void refill_reqcount(blkif_t *blkif)
+{
+	blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+ 	blkif->reqcount = blkif->reqrate;
+ 	if (blkif->reqcount < blkif->reqmin)
+ 		blkif->reqcount = blkif->reqmin;
+}
+
 int blkif_schedule(void *arg)
 {
 	blkif_t *blkif = arg;
 	struct vbd *vbd = &blkif->vbd;
+	int	ret = 0;
+	struct timeval cur_time;
 
 	blkif_get(blkif);
 
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret)
 			blkif->waiting_reqs = 1;
 		unplug_queue(blkif);
 
+		if(blkif->reqmin){
+			if(2 == ret && (blkif->reqtime > jiffies)){
+				jiffies_to_timeval(jiffies, &cur_time);
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: going to sleep %d millsecs(rate=%d)\n",
+							current->comm,
+							jiffies_to_msecs(blkif->reqtime - jiffies),
+							blkif->reqrate);
+				
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule_timeout(blkif->reqtime - jiffies);
+				
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: sleep end(rate=%d)\n",
+							current->comm,blkif->reqrate);
+			}
+			if (time_after(jiffies, blkif->reqtime))
+				refill_reqcount(blkif);
+		}
+
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
+		
 	}
 
 	if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-
 static int do_block_io_op(blkif_t *blkif)
 {
 	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
 	pending_req_t *pending_req;
 	RING_IDX rc, rp;
 	int more_to_do = 0, ret;
+	static int last_done_nr_sects = 0;	
 
 	rc = blk_rings->common.req_cons;
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
+	
+	if (blkif->reqmin && blkif->reqcount <= 0)
+		return (rc != rp) ? 2 : 0;
 
 	while ((rc != rp) || (blkif->is_suspended_req)) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 			break;
+		
+		if(blkif->reqmin){
+			blkif->reqcount -= last_done_nr_sects;
+			if (blkif->reqcount <= 0) {
+				more_to_do = 2;
+				break;
+			}
+		}
 
 		if (kthread_should_stop()) {
 			more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
 		switch (req.operation) {
 		case BLKIF_OP_READ:
 			blkif->st_rd_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req); 
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects); 
 			break;
 		case BLKIF_OP_WRITE_BARRIER:
 			blkif->st_br_req++;
 			/* fall through */
 		case BLKIF_OP_WRITE:
 			blkif->st_wr_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req);
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects);
 			break;
 		case BLKIF_OP_PACKET:
 			DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n");
@@ -412,9 +461,29 @@ handle_request:
 	return more_to_do;
 }
 
+static char* operation2str(int operation)
+{
+	char* ret_str = NULL;
+	switch (operation) {
+	case BLKIF_OP_READ:
+		ret_str = "READ";
+		break;
+	case BLKIF_OP_WRITE:
+		ret_str = "WRITE";
+		break;
+	case BLKIF_OP_WRITE_BARRIER:
+		ret_str = "WRITE_BARRIER";
+		break;
+	default:
+		ret_str = "0";
+	}
+	return ret_str;
+}
+
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req)
+				 pending_req_t *pending_req,
+				 int *done_nr_sects)
 {
 	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
 	struct bio *bio = NULL;
 	int ret, i;
 	int operation;
+	struct timeval cur_time;
+
+	*done_nr_sects = 0;
 
 	switch (req->operation) {
 	case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
 	else if (operation == WRITE || operation == WRITE_BARRIER)
 		blkif->st_wr_sect += preq.nr_sects;
 
+	*done_nr_sects = preq.nr_sects;
+	jiffies_to_timeval(jiffies, &cur_time);
+	if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+		printk(KERN_DEBUG "  operation=%s sects=%d\n",
+			operation2str(req->operation),preq.nr_sects);
+
 	return 0;
 
  fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
 
 	blkif_xenbus_init();
 
+	DPRINTK("blkif_inited\n");
+
 	return 0;
 
  out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
 #include "common.h"
 
 #undef DPRINTK
-#define DPRINTK(_f, _a...)			\
-	printk("(%s() file=%s, line=%d) " _f "\n",	\
-		 __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/cdrom (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
 
 
 #define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h	2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
 
 	grant_handle_t shmem_handle;
 	grant_ref_t    shmem_ref;
+
+	/* qos information */
+	unsigned long   reqtime;
+	int    reqcount;
+	int    reqmin;
+	int    reqrate; 
+
 } blkif_t;
 
+#define VBD_QOS_MIN_RATE_LIMIT			2*1024		/* 	1MBs 	*/
+
 struct backend_info
 {
 	struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
 	unsigned major;
 	unsigned minor;
 	char *mode;
+  	struct xenbus_watch rate_watch;
+	int have_rate_watch; 
 };
 
 blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?				\
 	(_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/vbd (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 unsigned long long vbd_size(struct vbd *vbd)
 {
 	return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
 	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
 		vbd->type |= VDISK_REMOVABLE;
 
-	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+	DPRINTK("Successful creation of handle=%04x (dom=%u)",
 		handle, blkif->domid);
 	return 0;
 }
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c	2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
 
 #undef DPRINTK
 #define DPRINTK(fmt, args...)				\
-	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
+	printk("blkback/xenbus (%s:%d) " fmt ".\n",	\
 		 __FUNCTION__, __LINE__, ##args)
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
 			    unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
 
 static int blkback_name(blkif_t *blkif, char *buf)
 {
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
 	char name[TASK_COMM_LEN];
 
 	/* Not ready to connect? */
-	if (!blkif->irq || !blkif->vbd.bdev)
+	if (!blkif->irq || !blkif->vbd.bdev){
+		DPRINTK("Not ready to connect");
 		return;
+	}
 
 	/* Already connected? */
 	if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
 		be->cdrom_watch.node = NULL;
 	}
 
+	unregister_rate_watch(be);
+
 	if (be->blkif) {
 		blkif_disconnect(be->blkif);
 		vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
 
 	err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
 				 &be->backend_watch, backend_changed);
+
+	DPRINTK("blkback_probe called");
+	DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+	
 	if (err)
 		goto fail;
 
@@ -266,7 +275,6 @@ fail:
 	return err;
 }
 
-
 /**
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
 	struct xenbus_device *dev = be->dev;
 	int cdrom = 0;
 	char *device_type;
+	char name[TASK_COMM_LEN];
 
-	DPRINTK("");
+	DPRINTK("backend_changed called");
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
 			   &major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
 		kfree(device_type);
 	}
 
+	/* gather information about QoS policy for this device. */
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+	
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+				"tokens-rate", "%d", &be->blkif->reqrate, 
+				NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0 ;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+	be->blkif->reqtime = jiffies;
+
 	if (be->major == 0 && be->minor == 0) {
 		/* Front end dir is a number, which is used as the handle. */
 
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
 
 /* ** Connection ** */
 
+static void unregister_rate_watch(struct backend_info *be)
+{
+	if (be->have_rate_watch) {
+		unregister_xenbus_watch(&be->rate_watch);
+		kfree(be->rate_watch.node);
+	}
+	be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+                       const char **vec, unsigned int len)
+{
+
+	struct backend_info *be=container_of(watch,struct backend_info, rate_watch);
+	int err;
+	char name[TASK_COMM_LEN];
+
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+
+	err = xenbus_gather(XBT_NIL,be->dev->otherend, 
+					"tokens-rate",	"%d", 
+					&be->blkif->reqrate,NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+}
 
 /**
  * Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
 	if (err)
 		goto abort;
 
+	/*add by andrew for centos pv*/
+	err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+	if (err){
+		xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+			dev->nodename);
+		goto abort;
+	}
+
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    vbd_size(&be->blkif->vbd));
 	if (err) {
@@ -469,11 +557,22 @@ again:
 	if (err)
 		xenbus_dev_fatal(dev, err, "ending transaction");
 
+	DPRINTK("xenbus_switch_to XenbusStateConnected");
+
 	err = xenbus_switch_state(dev, XenbusStateConnected);
 	if (err)
 		xenbus_dev_fatal(dev, err, "switching to Connected state",
 				 dev->nodename);
 
+	unregister_rate_watch(be);
+	err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+								&be->rate_watch,rate_changed);
+	if (!err)
+		be->have_rate_watch = 1;
+	else
+		xenbus_dev_fatal(dev, err, "watching tokens-rate",
+				 dev->nodename);
+
 	return;
  abort:
 	xenbus_transaction_end(xbt, 1);

[-- Attachment #2: blkback-qos-20110621.diff --]
[-- Type: application/octet-stream, Size: 13108 bytes --]

diff -urNp blkback/blkback.c blkback-qos/blkback.c
--- blkback/blkback.c	2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c	2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/blkback (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
  * pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
 static int do_block_io_op(blkif_t *blkif);
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req);
+				 pending_req_t *pending_req,
+				 int *done_nr_sects);
 static void make_response(blkif_t *blkif, u64 id,
 			  unsigned short op, int st);
 
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
 	blkif->st_pk_req = 0;
 }
 
+static void refill_reqcount(blkif_t *blkif)
+{
+	blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+ 	blkif->reqcount = blkif->reqrate;
+ 	if (blkif->reqcount < blkif->reqmin)
+ 		blkif->reqcount = blkif->reqmin;
+}
+
 int blkif_schedule(void *arg)
 {
 	blkif_t *blkif = arg;
 	struct vbd *vbd = &blkif->vbd;
+	int	ret = 0;
+	struct timeval cur_time;
 
 	blkif_get(blkif);
 
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret)
 			blkif->waiting_reqs = 1;
 		unplug_queue(blkif);
 
+		if(blkif->reqmin){
+			if(2 == ret && (blkif->reqtime > jiffies)){
+				jiffies_to_timeval(jiffies, &cur_time);
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: going to sleep %d millsecs(rate=%d)\n",
+							current->comm,
+							jiffies_to_msecs(blkif->reqtime - jiffies),
+							blkif->reqrate);
+				
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule_timeout(blkif->reqtime - jiffies);
+				
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: sleep end(rate=%d)\n",
+							current->comm,blkif->reqrate);
+			}
+			if (time_after(jiffies, blkif->reqtime))
+				refill_reqcount(blkif);
+		}
+
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
+		
 	}
 
 	if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-
 static int do_block_io_op(blkif_t *blkif)
 {
 	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
 	pending_req_t *pending_req;
 	RING_IDX rc, rp;
 	int more_to_do = 0, ret;
+	static int last_done_nr_sects = 0;	
 
 	rc = blk_rings->common.req_cons;
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
+	
+	if (blkif->reqmin && blkif->reqcount <= 0)
+		return (rc != rp) ? 2 : 0;
 
 	while ((rc != rp) || (blkif->is_suspended_req)) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 			break;
+		
+		if(blkif->reqmin){
+			blkif->reqcount -= last_done_nr_sects;
+			if (blkif->reqcount <= 0) {
+				more_to_do = 2;
+				break;
+			}
+		}
 
 		if (kthread_should_stop()) {
 			more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
 		switch (req.operation) {
 		case BLKIF_OP_READ:
 			blkif->st_rd_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req); 
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects); 
 			break;
 		case BLKIF_OP_WRITE_BARRIER:
 			blkif->st_br_req++;
 			/* fall through */
 		case BLKIF_OP_WRITE:
 			blkif->st_wr_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req);
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects);
 			break;
 		case BLKIF_OP_PACKET:
 			DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n");
@@ -412,9 +461,29 @@ handle_request:
 	return more_to_do;
 }
 
+static char* operation2str(int operation)
+{
+	char* ret_str = NULL;
+	switch (operation) {
+	case BLKIF_OP_READ:
+		ret_str = "READ";
+		break;
+	case BLKIF_OP_WRITE:
+		ret_str = "WRITE";
+		break;
+	case BLKIF_OP_WRITE_BARRIER:
+		ret_str = "WRITE_BARRIER";
+		break;
+	default:
+		ret_str = "0";
+	}
+	return ret_str;
+}
+
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req)
+				 pending_req_t *pending_req,
+				 int *done_nr_sects)
 {
 	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
 	struct bio *bio = NULL;
 	int ret, i;
 	int operation;
+	struct timeval cur_time;
+
+	*done_nr_sects = 0;
 
 	switch (req->operation) {
 	case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
 	else if (operation == WRITE || operation == WRITE_BARRIER)
 		blkif->st_wr_sect += preq.nr_sects;
 
+	*done_nr_sects = preq.nr_sects;
+	jiffies_to_timeval(jiffies, &cur_time);
+	if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+		printk(KERN_DEBUG "  operation=%s sects=%d\n",
+			operation2str(req->operation),preq.nr_sects);
+
 	return 0;
 
  fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
 
 	blkif_xenbus_init();
 
+	DPRINTK("blkif_inited\n");
+
 	return 0;
 
  out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
 #include "common.h"
 
 #undef DPRINTK
-#define DPRINTK(_f, _a...)			\
-	printk("(%s() file=%s, line=%d) " _f "\n",	\
-		 __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/cdrom (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
 
 
 #define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h	2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
 
 	grant_handle_t shmem_handle;
 	grant_ref_t    shmem_ref;
+
+	/* qos information */
+	unsigned long   reqtime;
+	int    reqcount;
+	int    reqmin;
+	int    reqrate; 
+
 } blkif_t;
 
+#define VBD_QOS_MIN_RATE_LIMIT			2*1024		/* 	1MBs 	*/
+
 struct backend_info
 {
 	struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
 	unsigned major;
 	unsigned minor;
 	char *mode;
+  	struct xenbus_watch rate_watch;
+	int have_rate_watch; 
 };
 
 blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?				\
 	(_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/vbd (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 unsigned long long vbd_size(struct vbd *vbd)
 {
 	return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
 	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
 		vbd->type |= VDISK_REMOVABLE;
 
-	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+	DPRINTK("Successful creation of handle=%04x (dom=%u)",
 		handle, blkif->domid);
 	return 0;
 }
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c	2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
 
 #undef DPRINTK
 #define DPRINTK(fmt, args...)				\
-	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
+	printk("blkback/xenbus (%s:%d) " fmt ".\n",	\
 		 __FUNCTION__, __LINE__, ##args)
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
 			    unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
 
 static int blkback_name(blkif_t *blkif, char *buf)
 {
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
 	char name[TASK_COMM_LEN];
 
 	/* Not ready to connect? */
-	if (!blkif->irq || !blkif->vbd.bdev)
+	if (!blkif->irq || !blkif->vbd.bdev){
+		DPRINTK("Not ready to connect");
 		return;
+	}
 
 	/* Already connected? */
 	if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
 		be->cdrom_watch.node = NULL;
 	}
 
+	unregister_rate_watch(be);
+
 	if (be->blkif) {
 		blkif_disconnect(be->blkif);
 		vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
 
 	err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
 				 &be->backend_watch, backend_changed);
+
+	DPRINTK("blkback_probe called");
+	DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+	
 	if (err)
 		goto fail;
 
@@ -266,7 +275,6 @@ fail:
 	return err;
 }
 
-
 /**
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
 	struct xenbus_device *dev = be->dev;
 	int cdrom = 0;
 	char *device_type;
+	char name[TASK_COMM_LEN];
 
-	DPRINTK("");
+	DPRINTK("backend_changed called");
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
 			   &major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
 		kfree(device_type);
 	}
 
+	/* gather information about QoS policy for this device. */
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+	
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+				"tokens-rate", "%d", &be->blkif->reqrate, 
+				NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0 ;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+	be->blkif->reqtime = jiffies;
+
 	if (be->major == 0 && be->minor == 0) {
 		/* Front end dir is a number, which is used as the handle. */
 
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
 
 /* ** Connection ** */
 
+static void unregister_rate_watch(struct backend_info *be)
+{
+	if (be->have_rate_watch) {
+		unregister_xenbus_watch(&be->rate_watch);
+		kfree(be->rate_watch.node);
+	}
+	be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+                       const char **vec, unsigned int len)
+{
+
+	struct backend_info *be=container_of(watch,struct backend_info, rate_watch);
+	int err;
+	char name[TASK_COMM_LEN];
+
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+
+	err = xenbus_gather(XBT_NIL,be->dev->otherend, 
+					"tokens-rate",	"%d", 
+					&be->blkif->reqrate,NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+}
 
 /**
  * Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
 	if (err)
 		goto abort;
 
+	/*add by andrew for centos pv*/
+	err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+	if (err){
+		xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+			dev->nodename);
+		goto abort;
+	}
+
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    vbd_size(&be->blkif->vbd));
 	if (err) {
@@ -469,11 +557,22 @@ again:
 	if (err)
 		xenbus_dev_fatal(dev, err, "ending transaction");
 
+	DPRINTK("xenbus_switch_to XenbusStateConnected");
+
 	err = xenbus_switch_state(dev, XenbusStateConnected);
 	if (err)
 		xenbus_dev_fatal(dev, err, "switching to Connected state",
 				 dev->nodename);
 
+	unregister_rate_watch(be);
+	err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+								&be->rate_watch,rate_changed);
+	if (!err)
+		be->have_rate_watch = 1;
+	else
+		xenbus_dev_fatal(dev, err, "watching tokens-rate",
+				 dev->nodename);
+
 	return;
  abort:
 	xenbus_transaction_end(xbt, 1);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2011-06-21  8:29 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-21  8:29 Andrew Xu [this message]
2011-06-21 13:33 ` VM disk I/O limit patch Konrad Rzeszutek Wilk
2011-06-22 12:06   ` [Xen-users] " Andrew Xu
2011-06-22 13:11     ` Konrad Rzeszutek Wilk
2011-06-22 14:12       ` Re[2]: " Andrew Xu
2011-06-22 14:39         ` Konrad Rzeszutek Wilk
2011-06-24 14:29         ` Re[2]: " Ian Jackson
2011-06-22 12:16   ` Re: [Xen-devel] " Florian Heigl
2011-06-22 13:07     ` [Xen-users] " Konrad Rzeszutek Wilk
2011-06-23 20:45 ` Shaun Reitan
2011-06-27 15:41   ` Konrad Rzeszutek Wilk
2011-06-27 19:22     ` Shaun Reitan
2011-06-28 11:32     ` Re: [Xen-devel] " Florian Heigl
2011-06-28 13:29       ` [Xen-users] " Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110621162935.F4A1.3A8D29D5@cloudex.cn \
    --to=xu.an@cloudex.cn \
    --cc=xen-devel@lists.xensource.com \
    --cc=xen-users@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.