VM disk I/O limit patch

From: Andrew Xu <xu.an@cloudex.cn>
To: xen-devel@lists.xensource.com, xen-users@lists.xensource.com
Subject: VM disk I/O limit patch
Date: Tue, 21 Jun 2011 16:29:35 +0800	[thread overview]
Message-ID: <20110621162935.F4A1.3A8D29D5@cloudex.cn> (raw)

[-- Attachment #1: Type: text/plain, Size: 13311 bytes --]

Hi all,

I add a blkback QoS patch.
You can config(dynamic/static) different I/O speed for different VM disk
by this patch.

----------------------------------------------------------------------------

diff -urNp blkback/blkback.c blkback-qos/blkback.c

--- blkback/blkback.c	2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c	2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/blkback (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
  * pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
 static int do_block_io_op(blkif_t *blkif);
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req);
+				 pending_req_t *pending_req,
+				 int *done_nr_sects);
 static void make_response(blkif_t *blkif, u64 id,
 			  unsigned short op, int st);
 
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
 	blkif->st_pk_req = 0;
 }
 
+static void refill_reqcount(blkif_t *blkif)
+{
+	blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+ 	blkif->reqcount = blkif->reqrate;
+ 	if (blkif->reqcount < blkif->reqmin)
+ 		blkif->reqcount = blkif->reqmin;
+}
+
 int blkif_schedule(void *arg)
 {
 	blkif_t *blkif = arg;
 	struct vbd *vbd = &blkif->vbd;
+	int	ret = 0;
+	struct timeval cur_time;
 
 	blkif_get(blkif);
 
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret)
 			blkif->waiting_reqs = 1;
 		unplug_queue(blkif);
 
+		if(blkif->reqmin){
+			if(2 == ret && (blkif->reqtime > jiffies)){
+				jiffies_to_timeval(jiffies, &cur_time);
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: going to sleep %d millsecs(rate=%d)\n",
+							current->comm,
+							jiffies_to_msecs(blkif->reqtime - jiffies),
+							blkif->reqrate);
+				
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule_timeout(blkif->reqtime - jiffies);
+				
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: sleep end(rate=%d)\n",
+							current->comm,blkif->reqrate);
+			}
+			if (time_after(jiffies, blkif->reqtime))
+				refill_reqcount(blkif);
+		}
+
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
+		
 	}
 
 	if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-
 static int do_block_io_op(blkif_t *blkif)
 {
 	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
 	pending_req_t *pending_req;
 	RING_IDX rc, rp;
 	int more_to_do = 0, ret;
+	static int last_done_nr_sects = 0;	
 
 	rc = blk_rings->common.req_cons;
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
+	
+	if (blkif->reqmin && blkif->reqcount <= 0)
+		return (rc != rp) ? 2 : 0;
 
 	while ((rc != rp) || (blkif->is_suspended_req)) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 			break;
+		
+		if(blkif->reqmin){
+			blkif->reqcount -= last_done_nr_sects;
+			if (blkif->reqcount <= 0) {
+				more_to_do = 2;
+				break;
+			}
+		}
 
 		if (kthread_should_stop()) {
 			more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
 		switch (req.operation) {
 		case BLKIF_OP_READ:
 			blkif->st_rd_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req); 
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects); 
 			break;
 		case BLKIF_OP_WRITE_BARRIER:
 			blkif->st_br_req++;
 			/* fall through */
 		case BLKIF_OP_WRITE:
 			blkif->st_wr_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req);
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects);
 			break;
 		case BLKIF_OP_PACKET:
 			DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n");
@@ -412,9 +461,29 @@ handle_request:
 	return more_to_do;
 }
 
+static char* operation2str(int operation)
+{
+	char* ret_str = NULL;
+	switch (operation) {
+	case BLKIF_OP_READ:
+		ret_str = "READ";
+		break;
+	case BLKIF_OP_WRITE:
+		ret_str = "WRITE";
+		break;
+	case BLKIF_OP_WRITE_BARRIER:
+		ret_str = "WRITE_BARRIER";
+		break;
+	default:
+		ret_str = "0";
+	}
+	return ret_str;
+}
+
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req)
+				 pending_req_t *pending_req,
+				 int *done_nr_sects)
 {
 	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
 	struct bio *bio = NULL;
 	int ret, i;
 	int operation;
+	struct timeval cur_time;
+
+	*done_nr_sects = 0;
 
 	switch (req->operation) {
 	case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
 	else if (operation == WRITE || operation == WRITE_BARRIER)
 		blkif->st_wr_sect += preq.nr_sects;
 
+	*done_nr_sects = preq.nr_sects;
+	jiffies_to_timeval(jiffies, &cur_time);
+	if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+		printk(KERN_DEBUG "  operation=%s sects=%d\n",
+			operation2str(req->operation),preq.nr_sects);
+
 	return 0;
 
  fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
 
 	blkif_xenbus_init();
 
+	DPRINTK("blkif_inited\n");
+
 	return 0;
 
  out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
 #include "common.h"
 
 #undef DPRINTK
-#define DPRINTK(_f, _a...)			\
-	printk("(%s() file=%s, line=%d) " _f "\n",	\
-		 __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/cdrom (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
 
 
 #define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h	2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
 
 	grant_handle_t shmem_handle;
 	grant_ref_t    shmem_ref;
+
+	/* qos information */
+	unsigned long   reqtime;
+	int    reqcount;
+	int    reqmin;
+	int    reqrate; 
+
 } blkif_t;
 
+#define VBD_QOS_MIN_RATE_LIMIT			2*1024		/* 	1MBs 	*/
+
 struct backend_info
 {
 	struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
 	unsigned major;
 	unsigned minor;
 	char *mode;
+  	struct xenbus_watch rate_watch;
+	int have_rate_watch; 
 };
 
 blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?				\
 	(_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/vbd (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 unsigned long long vbd_size(struct vbd *vbd)
 {
 	return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
 	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
 		vbd->type |= VDISK_REMOVABLE;
 
-	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+	DPRINTK("Successful creation of handle=%04x (dom=%u)",
 		handle, blkif->domid);
 	return 0;
 }
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c	2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
 
 #undef DPRINTK
 #define DPRINTK(fmt, args...)				\
-	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
+	printk("blkback/xenbus (%s:%d) " fmt ".\n",	\
 		 __FUNCTION__, __LINE__, ##args)
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
 			    unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
 
 static int blkback_name(blkif_t *blkif, char *buf)
 {
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
 	char name[TASK_COMM_LEN];
 
 	/* Not ready to connect? */
-	if (!blkif->irq || !blkif->vbd.bdev)
+	if (!blkif->irq || !blkif->vbd.bdev){
+		DPRINTK("Not ready to connect");
 		return;
+	}
 
 	/* Already connected? */
 	if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
 		be->cdrom_watch.node = NULL;
 	}
 
+	unregister_rate_watch(be);
+
 	if (be->blkif) {
 		blkif_disconnect(be->blkif);
 		vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
 
 	err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
 				 &be->backend_watch, backend_changed);
+
+	DPRINTK("blkback_probe called");
+	DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+	
 	if (err)
 		goto fail;
 
@@ -266,7 +275,6 @@ fail:
 	return err;
 }
 
-
 /**
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
 	struct xenbus_device *dev = be->dev;
 	int cdrom = 0;
 	char *device_type;
+	char name[TASK_COMM_LEN];
 
-	DPRINTK("");
+	DPRINTK("backend_changed called");
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
 			   &major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
 		kfree(device_type);
 	}
 
+	/* gather information about QoS policy for this device. */
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+	
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+				"tokens-rate", "%d", &be->blkif->reqrate, 
+				NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0 ;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+	be->blkif->reqtime = jiffies;
+
 	if (be->major == 0 && be->minor == 0) {
 		/* Front end dir is a number, which is used as the handle. */
 
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
 
 /* ** Connection ** */
 
+static void unregister_rate_watch(struct backend_info *be)
+{
+	if (be->have_rate_watch) {
+		unregister_xenbus_watch(&be->rate_watch);
+		kfree(be->rate_watch.node);
+	}
+	be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+                       const char **vec, unsigned int len)
+{
+
+	struct backend_info *be=container_of(watch,struct backend_info, rate_watch);
+	int err;
+	char name[TASK_COMM_LEN];
+
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+
+	err = xenbus_gather(XBT_NIL,be->dev->otherend, 
+					"tokens-rate",	"%d", 
+					&be->blkif->reqrate,NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+}
 
 /**
  * Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
 	if (err)
 		goto abort;
 
+	/*add by andrew for centos pv*/
+	err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+	if (err){
+		xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+			dev->nodename);
+		goto abort;
+	}
+
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    vbd_size(&be->blkif->vbd));
 	if (err) {
@@ -469,11 +557,22 @@ again:
 	if (err)
 		xenbus_dev_fatal(dev, err, "ending transaction");
 
+	DPRINTK("xenbus_switch_to XenbusStateConnected");
+
 	err = xenbus_switch_state(dev, XenbusStateConnected);
 	if (err)
 		xenbus_dev_fatal(dev, err, "switching to Connected state",
 				 dev->nodename);
 
+	unregister_rate_watch(be);
+	err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+								&be->rate_watch,rate_changed);
+	if (!err)
+		be->have_rate_watch = 1;
+	else
+		xenbus_dev_fatal(dev, err, "watching tokens-rate",
+				 dev->nodename);
+
 	return;
  abort:
 	xenbus_transaction_end(xbt, 1);

[-- Attachment #2: blkback-qos-20110621.diff --]
[-- Type: application/octet-stream, Size: 13108 bytes --]

diff -urNp blkback/blkback.c blkback-qos/blkback.c
--- blkback/blkback.c	2011-06-22 07:54:19.000000000 +0800
+++ blkback-qos/blkback.c	2011-06-22 07:53:18.000000000 +0800
@@ -44,6 +44,11 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/blkback (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
  * pulled from a communication ring are quite likely to end up being part of
@@ -110,7 +115,8 @@ static inline unsigned long vaddr(pendin
 static int do_block_io_op(blkif_t *blkif);
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req);
+				 pending_req_t *pending_req,
+				 int *done_nr_sects);
 static void make_response(blkif_t *blkif, u64 id,
 			  unsigned short op, int st);
 
@@ -206,10 +212,20 @@ static void print_stats(blkif_t *blkif)
 	blkif->st_pk_req = 0;
 }
 
+static void refill_reqcount(blkif_t *blkif)
+{
+	blkif->reqtime = jiffies + msecs_to_jiffies(1000);
+ 	blkif->reqcount = blkif->reqrate;
+ 	if (blkif->reqcount < blkif->reqmin)
+ 		blkif->reqcount = blkif->reqmin;
+}
+
 int blkif_schedule(void *arg)
 {
 	blkif_t *blkif = arg;
 	struct vbd *vbd = &blkif->vbd;
+	int	ret = 0;
+	struct timeval cur_time;
 
 	blkif_get(blkif);
 
@@ -232,12 +248,34 @@ int blkif_schedule(void *arg)
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret)
 			blkif->waiting_reqs = 1;
 		unplug_queue(blkif);
 
+		if(blkif->reqmin){
+			if(2 == ret && (blkif->reqtime > jiffies)){
+				jiffies_to_timeval(jiffies, &cur_time);
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: going to sleep %d millsecs(rate=%d)\n",
+							current->comm,
+							jiffies_to_msecs(blkif->reqtime - jiffies),
+							blkif->reqrate);
+				
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule_timeout(blkif->reqtime - jiffies);
+				
+				if(log_stats && (cur_time.tv_sec % 10 ==1 ))
+					printk(KERN_DEBUG "%s: sleep end(rate=%d)\n",
+							current->comm,blkif->reqrate);
+			}
+			if (time_after(jiffies, blkif->reqtime))
+				refill_reqcount(blkif);
+		}
+
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
+		
 	}
 
 	if (log_stats)
@@ -306,7 +344,6 @@ irqreturn_t blkif_be_int(int irq, void *
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-
 static int do_block_io_op(blkif_t *blkif)
 {
 	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
@@ -314,15 +351,27 @@ static int do_block_io_op(blkif_t *blkif
 	pending_req_t *pending_req;
 	RING_IDX rc, rp;
 	int more_to_do = 0, ret;
+	static int last_done_nr_sects = 0;	
 
 	rc = blk_rings->common.req_cons;
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
+	
+	if (blkif->reqmin && blkif->reqcount <= 0)
+		return (rc != rp) ? 2 : 0;
 
 	while ((rc != rp) || (blkif->is_suspended_req)) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 			break;
+		
+		if(blkif->reqmin){
+			blkif->reqcount -= last_done_nr_sects;
+			if (blkif->reqcount <= 0) {
+				more_to_do = 2;
+				break;
+			}
+		}
 
 		if (kthread_should_stop()) {
 			more_to_do = 1;
@@ -367,14 +416,14 @@ handle_request:
 		switch (req.operation) {
 		case BLKIF_OP_READ:
 			blkif->st_rd_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req); 
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects); 
 			break;
 		case BLKIF_OP_WRITE_BARRIER:
 			blkif->st_br_req++;
 			/* fall through */
 		case BLKIF_OP_WRITE:
 			blkif->st_wr_req++;
-			ret = dispatch_rw_block_io(blkif, &req, pending_req);
+			ret = dispatch_rw_block_io(blkif, &req, pending_req,&last_done_nr_sects);
 			break;
 		case BLKIF_OP_PACKET:
 			DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n");
@@ -412,9 +461,29 @@ handle_request:
 	return more_to_do;
 }
 
+static char* operation2str(int operation)
+{
+	char* ret_str = NULL;
+	switch (operation) {
+	case BLKIF_OP_READ:
+		ret_str = "READ";
+		break;
+	case BLKIF_OP_WRITE:
+		ret_str = "WRITE";
+		break;
+	case BLKIF_OP_WRITE_BARRIER:
+		ret_str = "WRITE_BARRIER";
+		break;
+	default:
+		ret_str = "0";
+	}
+	return ret_str;
+}
+
 static int dispatch_rw_block_io(blkif_t *blkif,
 				 blkif_request_t *req,
-				 pending_req_t *pending_req)
+				 pending_req_t *pending_req,
+				 int *done_nr_sects)
 {
 	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -426,6 +495,9 @@ static int dispatch_rw_block_io(blkif_t
 	struct bio *bio = NULL;
 	int ret, i;
 	int operation;
+	struct timeval cur_time;
+
+	*done_nr_sects = 0;
 
 	switch (req->operation) {
 	case BLKIF_OP_READ:
@@ -582,6 +654,12 @@ static int dispatch_rw_block_io(blkif_t
 	else if (operation == WRITE || operation == WRITE_BARRIER)
 		blkif->st_wr_sect += preq.nr_sects;
 
+	*done_nr_sects = preq.nr_sects;
+	jiffies_to_timeval(jiffies, &cur_time);
+	if ((log_stats == 2) && (cur_time.tv_sec % 10 ==1 ))
+		printk(KERN_DEBUG "  operation=%s sects=%d\n",
+			operation2str(req->operation),preq.nr_sects);
+
 	return 0;
 
  fail_flush:
@@ -695,6 +773,8 @@ static int __init blkif_init(void)
 
 	blkif_xenbus_init();
 
+	DPRINTK("blkif_inited\n");
+
 	return 0;
 
  out_of_memory:
diff -urNp blkback/cdrom.c blkback-qos/cdrom.c
--- blkback/cdrom.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/cdrom.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,9 +35,9 @@
 #include "common.h"
 
 #undef DPRINTK
-#define DPRINTK(_f, _a...)			\
-	printk("(%s() file=%s, line=%d) " _f "\n",	\
-		 __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a )
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/cdrom (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
 
 
 #define MEDIA_PRESENT "media-present"
diff -urNp blkback/common.h blkback-qos/common.h
--- blkback/common.h	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/common.h	2011-06-22 07:34:50.000000000 +0800
@@ -100,8 +100,17 @@ typedef struct blkif_st {
 
 	grant_handle_t shmem_handle;
 	grant_ref_t    shmem_ref;
+
+	/* qos information */
+	unsigned long   reqtime;
+	int    reqcount;
+	int    reqmin;
+	int    reqrate; 
+
 } blkif_t;
 
+#define VBD_QOS_MIN_RATE_LIMIT			2*1024		/* 	1MBs 	*/
+
 struct backend_info
 {
 	struct xenbus_device *dev;
@@ -111,6 +120,8 @@ struct backend_info
 	unsigned major;
 	unsigned minor;
 	char *mode;
+  	struct xenbus_watch rate_watch;
+	int have_rate_watch; 
 };
 
 blkif_t *blkif_alloc(domid_t domid);
diff -urNp blkback/vbd.c blkback-qos/vbd.c
--- blkback/vbd.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/vbd.c	2011-06-22 07:34:50.000000000 +0800
@@ -35,6 +35,11 @@
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?				\
 	(_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 
+#undef DPRINTK
+#define DPRINTK(fmt, args...)				\
+	printk("blkback/vbd (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
 unsigned long long vbd_size(struct vbd *vbd)
 {
 	return vbd_sz(vbd);
@@ -87,7 +92,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
 	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
 		vbd->type |= VDISK_REMOVABLE;
 
-	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+	DPRINTK("Successful creation of handle=%04x (dom=%u)",
 		handle, blkif->domid);
 	return 0;
 }
diff -urNp blkback/xenbus.c blkback-qos/xenbus.c
--- blkback/xenbus.c	2010-05-20 18:07:00.000000000 +0800
+++ blkback-qos/xenbus.c	2011-06-22 07:34:50.000000000 +0800
@@ -25,13 +25,14 @@
 
 #undef DPRINTK
 #define DPRINTK(fmt, args...)				\
-	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
+	printk("blkback/xenbus (%s:%d) " fmt ".\n",	\
 		 __FUNCTION__, __LINE__, ##args)
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
 			    unsigned int);
+static void unregister_rate_watch(struct backend_info *be);
 
 static int blkback_name(blkif_t *blkif, char *buf)
 {
@@ -59,8 +60,10 @@ static void update_blkif_status(blkif_t
 	char name[TASK_COMM_LEN];
 
 	/* Not ready to connect? */
-	if (!blkif->irq || !blkif->vbd.bdev)
+	if (!blkif->irq || !blkif->vbd.bdev){
+		DPRINTK("Not ready to connect");
 		return;
+	}
 
 	/* Already connected? */
 	if (blkif->be->dev->state == XenbusStateConnected)
@@ -193,6 +196,8 @@ static int blkback_remove(struct xenbus_
 		be->cdrom_watch.node = NULL;
 	}
 
+	unregister_rate_watch(be);
+
 	if (be->blkif) {
 		blkif_disconnect(be->blkif);
 		vbd_free(&be->blkif->vbd);
@@ -251,6 +256,10 @@ static int blkback_probe(struct xenbus_d
 
 	err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
 				 &be->backend_watch, backend_changed);
+
+	DPRINTK("blkback_probe called");
+	DPRINTK("dev->nodename=%s/physical-device",dev->nodename);
+	
 	if (err)
 		goto fail;
 
@@ -266,7 +275,6 @@ fail:
 	return err;
 }
 
-
 /**
  * Callback received when the hotplug scripts have placed the physical-device
  * node.  Read it and the mode node, and create a vbd.  If the frontend is
@@ -283,8 +291,9 @@ static void backend_changed(struct xenbu
 	struct xenbus_device *dev = be->dev;
 	int cdrom = 0;
 	char *device_type;
+	char name[TASK_COMM_LEN];
 
-	DPRINTK("");
+	DPRINTK("backend_changed called");
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
 			   &major, &minor);
@@ -322,6 +331,34 @@ static void backend_changed(struct xenbu
 		kfree(device_type);
 	}
 
+	/* gather information about QoS policy for this device. */
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+	
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+				"tokens-rate", "%d", &be->blkif->reqrate, 
+				NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-min,tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0 ;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+	be->blkif->reqtime = jiffies;
+
 	if (be->major == 0 && be->minor == 0) {
 		/* Front end dir is a number, which is used as the handle. */
 
@@ -414,6 +451,49 @@ static void frontend_changed(struct xenb
 
 /* ** Connection ** */
 
+static void unregister_rate_watch(struct backend_info *be)
+{
+	if (be->have_rate_watch) {
+		unregister_xenbus_watch(&be->rate_watch);
+		kfree(be->rate_watch.node);
+	}
+	be->have_rate_watch = 0;
+}
+
+static void rate_changed(struct xenbus_watch *watch,
+                       const char **vec, unsigned int len)
+{
+
+	struct backend_info *be=container_of(watch,struct backend_info, rate_watch);
+	int err;
+	char name[TASK_COMM_LEN];
+
+	err = blkback_name(be->blkif, name);
+	if (err) {
+		xenbus_dev_error(be->dev, err, "get blkback dev name");
+		return;
+	}
+
+	err = xenbus_gather(XBT_NIL,be->dev->otherend, 
+					"tokens-rate",	"%d", 
+					&be->blkif->reqrate,NULL);
+	if(err){
+		DPRINTK("%s xenbus_gather(tokens-rate) error",name);
+	}else{
+		if(be->blkif->reqrate <= 0){
+			be->blkif->reqmin = 0;
+			DPRINTK("%s tokens-rate == 0,no limit",name);	
+		}else{
+			DPRINTK("%s xenbus_gather(tokens-rate=%d)",name,be->blkif->reqrate);
+			be->blkif->reqrate *= 2;
+			be->blkif->reqmin = VBD_QOS_MIN_RATE_LIMIT;
+			if(be->blkif->reqmin > be->blkif->reqrate){
+				be->blkif->reqrate = be->blkif->reqmin;
+				DPRINTK("%s reset default value(tokens-rate=%d)",name,be->blkif->reqrate);
+			}
+		}
+	}
+}
 
 /**
  * Write the physical details regarding the block device to the store, and
@@ -439,6 +519,14 @@ again:
 	if (err)
 		goto abort;
 
+	/*add by andrew for centos pv*/
+	err = xenbus_printf(xbt, dev->nodename,"feature-flush-cache", "1");
+	if (err){
+		xenbus_dev_fatal(dev, err, "writing %s/feature-flush-cache",
+			dev->nodename);
+		goto abort;
+	}
+
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    vbd_size(&be->blkif->vbd));
 	if (err) {
@@ -469,11 +557,22 @@ again:
 	if (err)
 		xenbus_dev_fatal(dev, err, "ending transaction");
 
+	DPRINTK("xenbus_switch_to XenbusStateConnected");
+
 	err = xenbus_switch_state(dev, XenbusStateConnected);
 	if (err)
 		xenbus_dev_fatal(dev, err, "switching to Connected state",
 				 dev->nodename);
 
+	unregister_rate_watch(be);
+	err=xenbus_watch_path2(dev, dev->otherend, "tokens-rate",
+								&be->rate_watch,rate_changed);
+	if (!err)
+		be->have_rate_watch = 1;
+	else
+		xenbus_dev_fatal(dev, err, "watching tokens-rate",
+				 dev->nodename);
+
 	return;
  abort:
 	xenbus_transaction_end(xbt, 1);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel