All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6] Userspace grant communication
@ 2011-02-03 17:18 Daniel De Graaf
  2011-02-03 17:18 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
                   ` (9 more replies)
  0 siblings, 10 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:18 UTC (permalink / raw)
  To: xen-devel; +Cc: jeremy, Ian.Campbell, konrad.wilk

Changes since v5:
  - Added a tested xen version to workaround in #4
  - Cleaned up variable names & structures
  - Clarified some of the cleanup in gntalloc
  - Removed copyright statement from public-domain files

[PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
[PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
[PATCH 3/6] xen-gntdev: Add reference counting to maps
[PATCH 4/6] xen-gntdev: Support mapping in HVM domains
[PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
[PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl

Test/Demo code (also updated):

#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/mman.h>

struct ioctl_gntdev_grant_ref {
	/* The domain ID of the grant to be mapped. */
	uint32_t domid;
	/* The grant reference of the grant to be mapped. */
	uint32_t ref;
};

/*
 * Allocates a new page and creates a new grant reference.
 */
#define IOCTL_GNTALLOC_ALLOC_GREF \
_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
struct ioctl_gntalloc_alloc_gref {
	/* IN parameters */
	/* The ID of the domain to be given access to the grants. */
	uint16_t domid;
	/* Flags for this mapping */
	uint16_t flags;
	/* Number of pages to map */
	uint32_t count;
	/* OUT parameters */
	/* The offset to be used on a subsequent call to mmap(). */
	uint64_t index;
	/* The grant references of the newly created grant, one per page */
	/* Variable size, depending on count */
	uint32_t gref_ids[1];
};

#define GNTALLOC_FLAG_WRITABLE 1

/*
 * Deallocates the grant reference, allowing the associated page to be freed if
 * no other domains are using it.
 */
#define IOCTL_GNTALLOC_DEALLOC_GREF \
_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
struct ioctl_gntalloc_dealloc_gref {
	/* IN parameters */
	/* The offset returned in the map operation */
	uint64_t index;
	/* Number of references to unmap */
	uint32_t count;
};

#define IOCTL_GNTDEV_MAP_GRANT_REF \
_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
struct ioctl_gntdev_map_grant_ref {
    /* IN parameters */
    /* The number of grants to be mapped. */
    uint32_t count;
    uint32_t pad;
    /* OUT parameters */
    /* The offset to be used on a subsequent call to mmap(). */
    uint64_t index;
    /* Variable IN parameter. */
    /* Array of grant references, of size @count. */
    struct ioctl_gntdev_grant_ref refs[1];
};
#define GNTDEV_MAP_WRITABLE 0x1

#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
struct ioctl_gntdev_unmap_grant_ref {
    /* IN parameters */
    /* The offset was returned by the corresponding map operation. */
    uint64_t index;
    /* The number of pages to be unmapped. */
    uint32_t count;
    uint32_t pad;
};

/*
 * Sets up an unmap notification within the page, so that the other side can do
 * cleanup if this side crashes. Required to implement cross-domain robust
 * mutexes or close notification on communication channels.
 *
 * Each mapped page only supports one notification; multiple calls referring to
 * the same page overwrite the previous notification. You must clear the
 * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
 * to occur.
 */
#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
struct ioctl_gntdev_unmap_notify {
	/* IN parameters */
	/* Index of a byte in the page */
	uint64_t index;
	/* Action(s) to take on unmap */
	uint32_t action;
	/* Event channel to notify */
	uint32_t event_channel_port;
};

/* Clear (set to zero) the byte specified by index */
#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
/* Send an interrupt on the indicated event channel */
#define UNMAP_NOTIFY_SEND_EVENT 0x2

/*
 * Sets up an unmap notification within the page, so that the other side can do
 * cleanup if this side crashes. Required to implement cross-domain robust
 * mutexes or close notification on communication channels.
 *
 * Each mapped page only supports one notification; multiple calls referring to
 * the same page overwrite the previous notification. You must clear the
 * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
 * to occur.
 */
#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
struct ioctl_gntalloc_unmap_notify {
	/* IN parameters */
	/* Index of a byte in the page */
	uint64_t index;
	/* Action(s) to take on unmap */
	uint32_t action;
	/* Event channel to notify */
	uint32_t event_channel_port;
};

/* Clear (set to zero) the byte specified by index */
#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
/* Send an interrupt on the indicated event channel */
#define UNMAP_NOTIFY_SEND_EVENT 0x2

#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif


int a_fd;
int d_fd;

struct shr_page {
	uint64_t id;
	char buffer[64];
	uint8_t notifies[8];
};

struct data {
	struct shr_page* mem;
	int handle;
} items[128];

void sa(int id)
{
	struct ioctl_gntalloc_alloc_gref arg = {
		.domid = id,
		.flags = GNTALLOC_FLAG_WRITABLE,
		.count = 1
	};
	int rv = ioctl(a_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg);
	if (rv) {
		printf("src-add error: %s (rv=%d)\n", strerror(errno), rv);
		return;
	}
	int i=0;
	while (items[i].mem) i++;
	items[i].mem = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, a_fd, arg.index);
	if (items[i].mem == MAP_FAILED) {
		items[i].mem = 0;
		printf("mmap failed: SHOULD NOT HAPPEN\n");
		return;
	}
	items[i].handle = arg.index;
	printf("Created shared page with domain %d, grant #%d. Mapped locally at %d=%p\n",
		id, arg.gref_ids[0], arg.index, items[i].mem);

	items[i].mem->id = rand() | ((long)(getpid()) << 32);
	items[i].mem->notifies[0] = 1;
	struct ioctl_gntalloc_unmap_notify uarg = {
		.index = arg.index + offsetof(struct shr_page, notifies[0]),
		.action = UNMAP_NOTIFY_CLEAR_BYTE
	};
	rv = ioctl(a_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, &uarg);
	if (rv)
		printf("gntalloc unmap notify error: %s (rv=%d)\n", strerror(errno), rv);
}

void sd(int ref) {
	struct ioctl_gntalloc_dealloc_gref arg = {
		.index = ref,
		.count = 1
	};

	int rv = ioctl(a_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
	if (rv)
		printf("src-del error: %s (rv=%d)\n", strerror(errno), rv);
	else
		printf("Stopped offering grant at offset %d\n", ref);
}

void mm(int domid, int refid) {
	struct ioctl_gntdev_map_grant_ref arg = {
		.count = 1,
		.refs[0].domid = domid,
		.refs[0].ref = refid,
	};
	int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
	if (rv) {
		printf("Could not map grant %d.%d: %s (rv=%d)\n", domid, refid, strerror(errno), rv);
		return;
	}
	int i=0,j=1;
	while (items[i].mem) i++;
	items[i].mem = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, d_fd, arg.index);
	if (items[i].mem == MAP_FAILED) {
		items[i].mem = 0;
		printf("Could not map grant %d.%d: %s (map failed)\n", domid, refid, strerror(errno), rv);
		return;
	}
	items[i].handle = arg.index;
	printf("Mapped grant %d.%d as %d=%p\n", domid, refid, arg.index, items[i].mem);

	while (items[i].mem->notifies[j]) j++;
	items[i].mem->notifies[j] = 1;
	struct ioctl_gntalloc_unmap_notify uarg = {
		.index = arg.index + offsetof(struct shr_page, notifies[j]),
		.action = UNMAP_NOTIFY_CLEAR_BYTE
	};
	rv = ioctl(d_fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &uarg);
	if (rv)
		printf("gntdev unmap notify error: %s (rv=%d)\n", strerror(errno), rv);
}

void gu(int index) {
	struct ioctl_gntdev_unmap_grant_ref arg = {
		.index = index,
		.count = 1,
	};
	int rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
	if (rv)
		printf("gu error: %s (rv=%d)\n", strerror(errno), rv);
	else
		printf("Unhooked mapped grant at offset %d\n", index);
}

void mu(void* addr) {
	int i = 0;
	munmap(addr, 4096);
	while (i < 128)
	{
		if (items[i].mem == addr)
			items[i].mem = 0;
		i++;
	}
	printf("Unmapped page at %p\n", addr);
}

void show(char* word) {
	int i;
	int wlen = strlen(word);
	for(i=0; i < 128; i++) {
		if (!items[i].mem)
			continue;
		memmove(items[i].mem->buffer + wlen, items[i].mem->buffer, 63 - wlen);
		memcpy(items[i].mem->buffer, word, wlen);
		printf("%02d(%ld,%d): id %16lx n=%d%d%d%d%d%d%d%d b=%s\n",
			i, items[i].mem, items[i].handle, items[i].mem->id,
			items[i].mem->notifies[0], items[i].mem->notifies[1], items[i].mem->notifies[2], items[i].mem->notifies[3], 
			items[i].mem->notifies[4], items[i].mem->notifies[5], items[i].mem->notifies[6], items[i].mem->notifies[7], 
			items[i].mem->buffer);
	}
	printf("END\n");
}

int main(int argc, char** argv) {
	a_fd = open("/dev/xen/gntalloc", O_RDWR);
	d_fd = open("/dev/xen/gntdev", O_RDWR);
	printf(
		"add <domid>           return gntref, address\n"
		"map <domid> <ref>     return index, address\n"
		"adel <gntref>         delete <add> internal\n"
		"ddel <index>          delete <map> internal\n"
		"unmap <address>       unmap memory\n"
		"show                  show all pages\n"
		"<word>                append word to all mapped pages, show\n"
		" PID %x\n", getpid()
	);
	while (1) {
		char line[80];
		char word[80];
		long a, b;
		printf("\n> ");
		fflush(stdout);
		fgets(line, 80, stdin);
		sscanf(line, "%s %ld %ld", word, &a, &b);
		if (!strcmp(word, "add")) {
			sa(a);
		} else if (!strcmp(word, "map")) {
			mm(a, b);
		} else if (!strcmp(word, "adel")) {
			sd(a);
		} else if (!strcmp(word, "ddel")) {
			gu(a);
		} else if (!strcmp(word, "unmap")) {
			mu((void*)a);
		} else if (!strcmp(word, "show")) {
			show("");
		} else {
			show(word);
		}
	}
}

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
@ 2011-02-03 17:18 ` Daniel De Graaf
  2011-02-03 17:19 ` [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually Daniel De Graaf
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:18 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

Because there is no limitation on how many times a user can open a
given device file, an per-file-description limit on the number of
pages granted offers little to no benefit. Change to a global limit
and remove the ioctl() as the parameter can now be changed via sysfs.

Xen tools changeset 22768:f8d801e5573e is needed to eliminate the
error this change produces in xc_gnttab_set_max_grants.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   50 ++++++++++++++------------------------------------
 1 files changed, 14 insertions(+), 36 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2b777c0..3ca47d1 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -45,15 +45,15 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
 	      "Gerd Hoffmann <kraxel@redhat.com>");
 MODULE_DESCRIPTION("User-space granted page access driver");
 
-static int limit = 1024;
+static int limit = 1024*1024;
 module_param(limit, int, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at "
-		"once by a gntdev instance");
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
+		"the gntdev device");
+
+static atomic_t pages_mapped = ATOMIC_INIT(0);
 
 struct gntdev_priv {
 	struct list_head maps;
-	uint32_t used;
-	uint32_t limit;
 	/* lock protects maps from concurrent changes */
 	spinlock_t lock;
 	struct mm_struct *mm;
@@ -82,9 +82,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
 #ifdef DEBUG
 	struct grant_map *map;
 
-	pr_debug("maps list (priv %p, usage %d/%d)\n",
-	       priv, priv->used, priv->limit);
-
+	pr_debug("%s: maps list (priv %p)\n", __func__, priv);
 	list_for_each_entry(map, &priv->maps, next)
 		pr_debug("  index %2d, count %2d %s\n",
 		       map->index, map->count,
@@ -121,9 +119,6 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 	add->count = count;
 	add->priv  = priv;
 
-	if (add->count + priv->used > priv->limit)
-		goto err;
-
 	return add;
 
 err:
@@ -154,7 +149,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
 	list_add_tail(&add->next, &priv->maps);
 
 done:
-	priv->used += add->count;
 	gntdev_print_maps(priv, "[new]", add->index);
 }
 
@@ -200,7 +194,7 @@ static int gntdev_del_map(struct grant_map *map)
 		if (map->unmap_ops[i].handle)
 			return -EBUSY;
 
-	map->priv->used -= map->count;
+	atomic_sub(map->count, &pages_mapped);
 	list_del(&map->next);
 	return 0;
 }
@@ -385,7 +379,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
-	priv->limit = limit;
 
 	priv->mm = get_task_mm(current);
 	if (!priv->mm) {
@@ -442,19 +435,24 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 	pr_debug("priv %p, add %d\n", priv, op.count);
 	if (unlikely(op.count <= 0))
 		return -EINVAL;
-	if (unlikely(op.count > priv->limit))
-		return -EINVAL;
 
 	err = -ENOMEM;
 	map = gntdev_alloc_map(priv, op.count);
 	if (!map)
 		return err;
+
 	if (copy_from_user(map->grants, &u->refs,
 			   sizeof(map->grants[0]) * op.count) != 0) {
 		gntdev_free_map(map);
 		return err;
 	}
 
+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
+		pr_debug("can't map: over limit\n");
+		gntdev_free_map(map);
+		return err;
+	}
+
 	spin_lock(&priv->lock);
 	gntdev_add_map(priv, map);
 	op.index = map->index << PAGE_SHIFT;
@@ -517,23 +515,6 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 	return 0;
 }
 
-static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
-					struct ioctl_gntdev_set_max_grants __user *u)
-{
-	struct ioctl_gntdev_set_max_grants op;
-
-	if (copy_from_user(&op, u, sizeof(op)) != 0)
-		return -EFAULT;
-	pr_debug("priv %p, limit %d\n", priv, op.count);
-	if (op.count > limit)
-		return -E2BIG;
-
-	spin_lock(&priv->lock);
-	priv->limit = op.count;
-	spin_unlock(&priv->lock);
-	return 0;
-}
-
 static long gntdev_ioctl(struct file *flip,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -550,9 +531,6 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
 
-	case IOCTL_GNTDEV_SET_MAX_GRANTS:
-		return gntdev_ioctl_set_max_grants(priv, ptr);
-
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
  2011-02-03 17:18 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
@ 2011-02-03 17:19 ` Daniel De Graaf
  2011-02-03 17:19 ` [PATCH 3/6] xen-gntdev: Add reference counting to maps Daniel De Graaf
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This should be faster if many mappings exist, and also removes
the only user of map->vma not related to PTE modification.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   32 ++++++++------------------------
 1 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 3ca47d1..a42554a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -167,23 +167,6 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 	return NULL;
 }
 
-static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
-					       unsigned long vaddr)
-{
-	struct grant_map *map;
-
-	list_for_each_entry(map, &priv->maps, next) {
-		if (!map->vma)
-			continue;
-		if (vaddr < map->vma->vm_start)
-			continue;
-		if (vaddr >= map->vma->vm_end)
-			continue;
-		return map;
-	}
-	return NULL;
-}
-
 static int gntdev_del_map(struct grant_map *map)
 {
 	int i;
@@ -493,22 +476,23 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 					      struct ioctl_gntdev_get_offset_for_vaddr __user *u)
 {
 	struct ioctl_gntdev_get_offset_for_vaddr op;
+	struct vm_area_struct *vma;
 	struct grant_map *map;
 
 	if (copy_from_user(&op, u, sizeof(op)) != 0)
 		return -EFAULT;
 	pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
 
-	spin_lock(&priv->lock);
-	map = gntdev_find_map_vaddr(priv, op.vaddr);
-	if (map == NULL ||
-	    map->vma->vm_start != op.vaddr) {
-		spin_unlock(&priv->lock);
+	vma = find_vma(current->mm, op.vaddr);
+	if (!vma || vma->vm_ops != &gntdev_vmops)
 		return -EINVAL;
-	}
+
+	map = vma->vm_private_data;
+	if (!map)
+		return -EINVAL;
+
 	op.offset = map->index << PAGE_SHIFT;
 	op.count = map->count;
-	spin_unlock(&priv->lock);
 
 	if (copy_to_user(u, &op, sizeof(op)) != 0)
 		return -EFAULT;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 3/6] xen-gntdev: Add reference counting to maps
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
  2011-02-03 17:18 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
  2011-02-03 17:19 ` [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually Daniel De Graaf
@ 2011-02-03 17:19 ` Daniel De Graaf
  2011-02-03 17:19 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This allows userspace to perform mmap() on the gntdev device and then
immediately close the filehandle or remove the mapping using the
remove ioctl, with the mapped area remaining valid until unmapped.
This also fixes an infinite loop when a gntdev device is closed
without first unmapping all areas.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   67 ++++++++++++++++++++-----------------------------
 1 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a42554a..1581403 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -62,12 +62,12 @@ struct gntdev_priv {
 
 struct grant_map {
 	struct list_head next;
-	struct gntdev_priv *priv;
 	struct vm_area_struct *vma;
 	int index;
 	int count;
 	int flags;
 	int is_mapped;
+	atomic_t users;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
 	struct gnttab_unmap_grant_ref *unmap_ops;
@@ -117,7 +117,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
 	add->index = 0;
 	add->count = count;
-	add->priv  = priv;
+	atomic_set(&add->users, 1);
 
 	return add;
 
@@ -167,28 +167,18 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 	return NULL;
 }
 
-static int gntdev_del_map(struct grant_map *map)
-{
-	int i;
-
-	if (map->vma)
-		return -EBUSY;
-	for (i = 0; i < map->count; i++)
-		if (map->unmap_ops[i].handle)
-			return -EBUSY;
-
-	atomic_sub(map->count, &pages_mapped);
-	list_del(&map->next);
-	return 0;
-}
-
-static void gntdev_free_map(struct grant_map *map)
+static void gntdev_put_map(struct grant_map *map)
 {
 	int i;
 
 	if (!map)
 		return;
 
+	if (!atomic_dec_and_test(&map->users))
+		return;
+
+	atomic_sub(map->count, &pages_mapped);
+
 	if (map->pages)
 		for (i = 0; i < map->count; i++) {
 			if (map->pages[i])
@@ -266,6 +256,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	map->is_mapped = 0;
 	map->vma = NULL;
 	vma->vm_private_data = NULL;
+	gntdev_put_map(map);
 }
 
 static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -387,17 +378,14 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 {
 	struct gntdev_priv *priv = flip->private_data;
 	struct grant_map *map;
-	int err;
 
 	pr_debug("priv %p\n", priv);
 
 	spin_lock(&priv->lock);
 	while (!list_empty(&priv->maps)) {
 		map = list_entry(priv->maps.next, struct grant_map, next);
-		err = gntdev_del_map(map);
-		if (WARN_ON(err))
-			gntdev_free_map(map);
-
+		list_del(&map->next);
+		gntdev_put_map(map);
 	}
 	spin_unlock(&priv->lock);
 
@@ -424,15 +412,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 	if (!map)
 		return err;
 
-	if (copy_from_user(map->grants, &u->refs,
-			   sizeof(map->grants[0]) * op.count) != 0) {
-		gntdev_free_map(map);
+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
+		pr_debug("can't map: over limit\n");
+		gntdev_put_map(map);
 		return err;
 	}
 
-	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
-		pr_debug("can't map: over limit\n");
-		gntdev_free_map(map);
+	if (copy_from_user(map->grants, &u->refs,
+			   sizeof(map->grants[0]) * op.count) != 0) {
+		gntdev_put_map(map);
 		return err;
 	}
 
@@ -441,13 +429,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 	op.index = map->index << PAGE_SHIFT;
 	spin_unlock(&priv->lock);
 
-	if (copy_to_user(u, &op, sizeof(op)) != 0) {
-		spin_lock(&priv->lock);
-		gntdev_del_map(map);
-		spin_unlock(&priv->lock);
-		gntdev_free_map(map);
-		return err;
-	}
+	if (copy_to_user(u, &op, sizeof(op)) != 0)
+		return -EFAULT;
+
 	return 0;
 }
 
@@ -464,11 +448,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
 
 	spin_lock(&priv->lock);
 	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
-	if (map)
-		err = gntdev_del_map(map);
+	if (map) {
+		list_del(&map->next);
+		gntdev_put_map(map);
+		err = 0;
+	}
 	spin_unlock(&priv->lock);
-	if (!err)
-		gntdev_free_map(map);
 	return err;
 }
 
@@ -548,6 +533,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		goto unlock_out;
 	}
 
+	atomic_inc(&map->users);
+
 	vma->vm_ops = &gntdev_vmops;
 
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (2 preceding siblings ...)
  2011-02-03 17:19 ` [PATCH 3/6] xen-gntdev: Add reference counting to maps Daniel De Graaf
@ 2011-02-03 17:19 ` Daniel De Graaf
  2011-02-14 15:51   ` Konrad Rzeszutek Wilk
  2011-02-03 17:19 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

HVM does not allow direct PTE modification, so instead we request
that Xen change its internal p2m mappings on the allocated pages and
map the memory into userspace normally.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c      |  117 ++++++++++++++++++++++++++++++++-------------
 drivers/xen/grant-table.c |    6 ++
 2 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1581403..58fddf3 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+static int use_ptemod;
+
 struct gntdev_priv {
 	struct list_head maps;
 	/* lock protects maps from concurrent changes */
@@ -74,6 +77,8 @@ struct grant_map {
 	struct page **pages;
 };
 
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
-	if (map->pages)
+	if (map->pages) {
+		if (!use_ptemod)
+			unmap_grant_pages(map, 0, map->count);
+
 		for (i = 0; i < map->count; i++) {
-			if (map->pages[i])
+			uint32_t check, *tmp;
+			if (!map->pages[i])
+				continue;
+			/* XXX When unmapping in an HVM domain, Xen will
+			 * sometimes end up mapping the GFN to an invalid MFN.
+			 * In this case, writes will be discarded and reads will
+			 * return all 0xFF bytes.  Leak these unusable GFNs
+			 * until Xen supports fixing their p2m mapping.
+			 *
+			 * Confirmed present in Xen 4.1-RC3 with HVM source
+			 */
+			tmp = kmap(map->pages[i]);
+			*tmp = 0xdeaddead;
+			mb();
+			check = *tmp;
+			kunmap(map->pages[i]);
+			if (check == 0xdeaddead)
 				__free_page(map->pages[i]);
+			else
+				pr_debug("Discard page %d=%ld\n", i,
+					page_to_pfn(map->pages[i]));
 		}
+	}
 	kfree(map->pages);
 	kfree(map->grants);
 	kfree(map->map_ops);
@@ -197,17 +225,16 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 {
 	struct grant_map *map = data;
 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
 	u64 pte_maddr;
 
 	BUG_ON(pgnr >= map->count);
 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 
-	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
-			  GNTMAP_contains_pte | map->flags,
+	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
 			  map->grants[pgnr].ref,
 			  map->grants[pgnr].domid);
-	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
-			    GNTMAP_contains_pte | map->flags,
+	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
 			    0 /* handle */);
 	return 0;
 }
@@ -215,6 +242,19 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 static int map_grant_pages(struct grant_map *map)
 {
 	int i, err = 0;
+	phys_addr_t addr;
+
+	if (!use_ptemod) {
+		for (i = 0; i < map->count; i++) {
+			addr = (phys_addr_t)
+				pfn_to_kaddr(page_to_pfn(map->pages[i]));
+			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
+				map->grants[i].ref,
+				map->grants[i].domid);
+			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
+				map->flags, 0 /* handle */);
+		}
+	}
 
 	pr_debug("map %d+%d\n", map->index, map->count);
 	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
@@ -259,17 +299,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	gntdev_put_map(map);
 }
 
-static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
-			vmf->virtual_address, vmf->pgoff);
-	vmf->flags = VM_FAULT_ERROR;
-	return 0;
-}
-
 static struct vm_operations_struct gntdev_vmops = {
 	.close = gntdev_vma_close,
-	.fault = gntdev_vma_fault,
 };
 
 /* ------------------------------------------------------------------ */
@@ -354,14 +385,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
 
-	priv->mm = get_task_mm(current);
-	if (!priv->mm) {
-		kfree(priv);
-		return -ENOMEM;
+	if (use_ptemod) {
+		priv->mm = get_task_mm(current);
+		if (!priv->mm) {
+			kfree(priv);
+			return -ENOMEM;
+		}
+		priv->mn.ops = &gntdev_mmu_ops;
+		ret = mmu_notifier_register(&priv->mn, priv->mm);
+		mmput(priv->mm);
 	}
-	priv->mn.ops = &gntdev_mmu_ops;
-	ret = mmu_notifier_register(&priv->mn, priv->mm);
-	mmput(priv->mm);
 
 	if (ret) {
 		kfree(priv);
@@ -389,7 +422,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 	}
 	spin_unlock(&priv->lock);
 
-	mmu_notifier_unregister(&priv->mn, priv->mm);
+	if (use_ptemod)
+		mmu_notifier_unregister(&priv->mn, priv->mm);
 	kfree(priv);
 	return 0;
 }
@@ -514,7 +548,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	int index = vma->vm_pgoff;
 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	struct grant_map *map;
-	int err = -EINVAL;
+	int i, err = -EINVAL;
 
 	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
@@ -526,9 +560,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	map = gntdev_find_map_index(priv, index, count);
 	if (!map)
 		goto unlock_out;
-	if (map->vma)
+	if (use_ptemod && map->vma)
 		goto unlock_out;
-	if (priv->mm != vma->vm_mm) {
+	if (use_ptemod && priv->mm != vma->vm_mm) {
 		printk(KERN_WARNING "Huh? Other mm?\n");
 		goto unlock_out;
 	}
@@ -540,20 +574,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
 
 	vma->vm_private_data = map;
-	map->vma = vma;
 
-	map->flags = GNTMAP_host_map | GNTMAP_application_map;
+	if (use_ptemod)
+		map->vma = vma;
+
+	map->flags = GNTMAP_host_map;
 	if (!(vma->vm_flags & VM_WRITE))
 		map->flags |= GNTMAP_readonly;
 
 	spin_unlock(&priv->lock);
 
-	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
-				  vma->vm_end - vma->vm_start,
-				  find_grant_ptes, map);
-	if (err) {
-		printk(KERN_WARNING "find_grant_ptes() failure.\n");
-		return err;
+	if (use_ptemod) {
+		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+					  vma->vm_end - vma->vm_start,
+					  find_grant_ptes, map);
+		if (err) {
+			printk(KERN_WARNING "find_grant_ptes() failure.\n");
+			return err;
+		}
 	}
 
 	err = map_grant_pages(map);
@@ -564,6 +602,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	map->is_mapped = 1;
 
+	if (!use_ptemod) {
+		for (i = 0; i < count; i++) {
+			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
+				map->pages[i]);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 
 unlock_out:
@@ -594,6 +641,8 @@ static int __init gntdev_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	use_ptemod = xen_pv_domain();
+
 	err = misc_register(&gntdev_miscdev);
 	if (err != 0) {
 		printk(KERN_ERR "Could not register gntdev device\n");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 9ef54eb..9428ced 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		/* m2p override only supported for GNTMAP_contains_pte mappings */
 		if (!(map_ops[i].flags & GNTMAP_contains_pte))
@@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		ret = m2p_remove_override(pages[i]);
 		if (ret)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (3 preceding siblings ...)
  2011-02-03 17:19 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
@ 2011-02-03 17:19 ` Daniel De Graaf
  2011-02-08 22:48   ` Konrad Rzeszutek Wilk
  2011-02-03 17:19 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This allows a userspace application to allocate a shared page for
implementing inter-domain communication or device drivers. These
shared pages can be mapped using the gntdev device or by the kernel
in another domain.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/Kconfig    |    8 +
 drivers/xen/Makefile   |    2 +
 drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/gntalloc.h |   50 +++++
 4 files changed, 546 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/gntalloc.c
 create mode 100644 include/xen/gntalloc.h

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 19f1f3c..69d2cd5 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -142,6 +142,14 @@ config XEN_GNTDEV
 	help
 	  Allows userspace processes to use grants.
 
+config XEN_GRANT_DEV_ALLOC
+	tristate "User-space grant reference allocator driver"
+	depends on XEN
+	help
+	  Allows userspace processes to create pages with access granted
+	  to other domains. This can be used to implement frontend drivers
+	  or as part of an inter-domain shared memory channel.
+
 config XEN_PLATFORM_PCI
 	tristate "xen platform pci device driver"
 	depends on XEN_PVHVM && PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 5c3b031..09364b9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
@@ -19,5 +20,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
 
 xen-evtchn-y			:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntalloc-y				:= gntalloc.o
 
 xen-platform-pci-y		:= platform-pci.o
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
new file mode 100644
index 0000000..d06bf2b
--- /dev/null
+++ b/drivers/xen/gntalloc.c
@@ -0,0 +1,486 @@
+/******************************************************************************
+ * gntalloc.c
+ *
+ * Device for creating grant references (in user-space) that may be shared
+ * with other domains.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This driver exists to allow userspace programs in Linux to allocate kernel
+ * memory that will later be shared with another domain.  Without this device,
+ * Linux userspace programs cannot create grant references.
+ *
+ * How this stuff works:
+ *   X -> granting a page to Y
+ *   Y -> mapping the grant from X
+ *
+ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
+ *   2. X creates an entry in the grant table that says domid(Y) can access P.
+ *      This is done without a hypercall unless the grant table needs expansion.
+ *   3. X gives the grant reference identifier, GREF, to Y.
+ *   4. Y maps the page, either directly into kernel memory for use in a backend
+ *      driver, or via a the gntdev device to map into the address space of an
+ *      application running in Y. This is the first point at which Xen does any
+ *      tracking of the page.
+ *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
+ *      to the shared page, and can now communicate with Y over the shared page.
+ *
+ *
+ * NOTE TO USERSPACE LIBRARIES:
+ *   The grant allocation and mmap()ing are, naturally, two separate operations.
+ *   You set up the sharing by calling the create ioctl() and then the mmap().
+ *   Teardown requires munmap() and either close() or ioctl().
+ *
+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
+ * reference, this device can be used to consume kernel memory by leaving grant
+ * references mapped by another domain when an application exits. Therefore,
+ * there is a global limit on the number of pages that can be allocated. When
+ * all references to the page are unmapped, it will be freed during the next
+ * grant operation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/gntalloc.h>
+
+static int limit = 1024;
+module_param(limit, int, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
+		"the gntalloc device");
+
+static LIST_HEAD(gref_list);
+static DEFINE_SPINLOCK(gref_lock);
+static int gref_size;
+
+/* Metadata on a grant reference. */
+struct gntalloc_gref {
+	struct list_head next_gref;  /* list entry gref_list */
+	struct list_head next_file;  /* list entry file->list, if open */
+	struct page *page;	     /* The shared page */
+	uint64_t file_index;         /* File offset for mmap() */
+	unsigned int users;          /* Use count - when zero, waiting on Xen */
+	grant_ref_t gref_id;         /* The grant reference number */
+};
+
+struct gntalloc_file_private_data {
+	struct list_head list;
+	uint64_t index;
+};
+
+static void __del_gref(struct gntalloc_gref *gref);
+
+static void do_cleanup(void)
+{
+	struct gntalloc_gref *gref, *n;
+	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
+		if (!gref->users)
+			__del_gref(gref);
+	}
+}
+
+static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
+	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
+{
+	int i, rc, readonly;
+	LIST_HEAD(queue_gref);
+	LIST_HEAD(queue_file);
+	struct gntalloc_gref *gref;
+
+	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
+	rc = -ENOMEM;
+	for (i = 0; i < op->count; i++) {
+		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
+		if (!gref)
+			goto undo;
+		list_add_tail(&gref->next_gref, &queue_gref);
+		list_add_tail(&gref->next_file, &queue_file);
+		gref->users = 1;
+		gref->file_index = op->index + i * PAGE_SIZE;
+		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!gref->page)
+			goto undo;
+
+		/* Grant foreign access to the page. */
+		gref->gref_id = gnttab_grant_foreign_access(op->domid,
+			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
+		if (gref->gref_id < 0) {
+			rc = gref->gref_id;
+			goto undo;
+		}
+		gref_ids[i] = gref->gref_id;
+	}
+
+	/* Add to gref lists. */
+	spin_lock(&gref_lock);
+	list_splice_tail(&queue_gref, &gref_list);
+	list_splice_tail(&queue_file, &priv->list);
+	spin_unlock(&gref_lock);
+
+	return 0;
+
+undo:
+	spin_lock(&gref_lock);
+	gref_size -= (op->count - i);
+
+	list_for_each_entry(gref, &queue_file, next_file) {
+		/* __del_gref does not remove from queue_file */
+		__del_gref(gref);
+	}
+
+	/* It's possible for the target domain to map the just-allocated grant
+	 * references by blindly guessing their IDs; if this is done, then
+	 * __del_gref will leave them in the queue_gref list. They need to be
+	 * added to the global list so that we can free them when they are no
+	 * longer referenced.
+	 */
+	if (unlikely(!list_empty(&queue_gref)))
+		list_splice_tail(&queue_gref, &gref_list);
+	spin_unlock(&gref_lock);
+	return rc;
+}
+
+static void __del_gref(struct gntalloc_gref *gref)
+{
+	if (gref->gref_id > 0) {
+		if (gnttab_query_foreign_access(gref->gref_id))
+			return;
+
+		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
+			return;
+	}
+
+	gref_size--;
+	list_del(&gref->next_gref);
+
+	if (gref->page)
+		__free_page(gref->page);
+
+	kfree(gref);
+}
+
+/* finds contiguous grant references in a file, returns the first */
+static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
+		uint64_t index, uint32_t count)
+{
+	struct gntalloc_gref *rv = NULL, *gref;
+	list_for_each_entry(gref, &priv->list, next_file) {
+		if (gref->file_index == index && !rv)
+			rv = gref;
+		if (rv) {
+			if (gref->file_index != index)
+				return NULL;
+			index += PAGE_SIZE;
+			count--;
+			if (count == 0)
+				return rv;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * -------------------------------------
+ *  File operations.
+ * -------------------------------------
+ */
+static int gntalloc_open(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto out_nomem;
+	INIT_LIST_HEAD(&priv->list);
+
+	filp->private_data = priv;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	return 0;
+
+out_nomem:
+	return -ENOMEM;
+}
+
+static int gntalloc_release(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	spin_lock(&gref_lock);
+	while (!list_empty(&priv->list)) {
+		gref = list_entry(priv->list.next,
+			struct gntalloc_gref, next_file);
+		list_del(&gref->next_file);
+		gref->users--;
+		if (gref->users == 0)
+			__del_gref(gref);
+	}
+	kfree(priv);
+	spin_unlock(&gref_lock);
+
+	return 0;
+}
+
+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
+		struct ioctl_gntalloc_alloc_gref __user *arg)
+{
+	int rc = 0;
+	struct ioctl_gntalloc_alloc_gref op;
+	uint32_t *gref_ids;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
+	if (!gref_ids) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock(&gref_lock);
+	/* Clean up pages that were at zero (local) users but were still mapped
+	 * by remote domains. Since those pages count towards the limit that we
+	 * are about to enforce, removing them here is a good idea.
+	 */
+	do_cleanup();
+	if (gref_size + op.count > limit) {
+		spin_unlock(&gref_lock);
+		rc = -ENOSPC;
+		goto out_free;
+	}
+	gref_size += op.count;
+	op.index = priv->index;
+	priv->index += op.count * PAGE_SIZE;
+	spin_unlock(&gref_lock);
+
+	rc = add_grefs(&op, gref_ids, priv);
+	if (rc < 0)
+		goto out_free;
+
+	/* Once we finish add_grefs, it is unsafe to touch the new reference,
+	 * since it is possible for a concurrent ioctl to remove it (by guessing
+	 * its index). If the userspace application doesn't provide valid memory
+	 * to write the IDs to, then it will need to close the file in order to
+	 * release - which it will do by segfaulting when it tries to access the
+	 * IDs to close them.
+	 */
+	if (copy_to_user(arg, &op, sizeof(op))) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+	if (copy_to_user(arg->gref_ids, gref_ids,
+			sizeof(gref_ids[0]) * op.count)) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+
+out_free:
+	kfree(gref_ids);
+out:
+	return rc;
+}
+
+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
+		void __user *arg)
+{
+	int i, rc = 0;
+	struct ioctl_gntalloc_dealloc_gref op;
+	struct gntalloc_gref *gref, *n;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto dealloc_grant_out;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, op.index, op.count);
+	if (gref) {
+		/* Remove from the file list only, and decrease reference count.
+		 * The later call to do_cleanup() will remove from gref_list and
+		 * free the memory if the pages aren't mapped anywhere.
+		 */
+		for (i = 0; i < op.count; i++) {
+			n = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+			list_del(&gref->next_file);
+			gref->users--;
+			gref = n;
+		}
+	} else {
+		rc = -EINVAL;
+	}
+
+	do_cleanup();
+
+	spin_unlock(&gref_lock);
+dealloc_grant_out:
+	return rc;
+}
+
+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+
+	switch (cmd) {
+	case IOCTL_GNTALLOC_ALLOC_GREF:
+		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
+
+	case IOCTL_GNTALLOC_DEALLOC_GREF:
+		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
+}
+
+static void gntalloc_vma_close(struct vm_area_struct *vma)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return;
+
+	spin_lock(&gref_lock);
+	gref->users--;
+	if (gref->users == 0)
+		__del_gref(gref);
+	spin_unlock(&gref_lock);
+}
+
+static struct vm_operations_struct gntalloc_vmops = {
+	.close = gntalloc_vma_close,
+};
+
+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int rv, i;
+
+	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
+		       priv, vma->vm_pgoff, count);
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
+		return -EINVAL;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
+	if (gref == NULL) {
+		rv = -ENOENT;
+		pr_debug("%s: Could not find grant reference",
+				__func__);
+		goto out_unlock;
+	}
+
+	vma->vm_private_data = gref;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTCOPY;
+	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
+
+	vma->vm_ops = &gntalloc_vmops;
+
+	for (i = 0; i < count; i++) {
+		gref->users++;
+		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+				gref->page);
+		if (rv)
+			goto out_unlock;
+
+		gref = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+	}
+	rv = 0;
+
+out_unlock:
+	spin_unlock(&gref_lock);
+	return rv;
+}
+
+static const struct file_operations gntalloc_fops = {
+	.owner = THIS_MODULE,
+	.open = gntalloc_open,
+	.release = gntalloc_release,
+	.unlocked_ioctl = gntalloc_ioctl,
+	.mmap = gntalloc_mmap
+};
+
+/*
+ * -------------------------------------
+ * Module creation/destruction.
+ * -------------------------------------
+ */
+static struct miscdevice gntalloc_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "xen/gntalloc",
+	.fops	= &gntalloc_fops,
+};
+
+static int __init gntalloc_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&gntalloc_miscdev);
+	if (err != 0) {
+		printk(KERN_ERR "Could not register misc gntalloc device\n");
+		return err;
+	}
+
+	pr_debug("Created grant allocation device at %d,%d\n",
+			MISC_MAJOR, gntalloc_miscdev.minor);
+
+	return 0;
+}
+
+static void __exit gntalloc_exit(void)
+{
+	misc_deregister(&gntalloc_miscdev);
+}
+
+module_init(gntalloc_init);
+module_exit(gntalloc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
+		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("User-space grant reference allocator driver");
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
new file mode 100644
index 0000000..bc3b85e
--- /dev/null
+++ b/include/xen/gntalloc.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ * gntalloc.h
+ *
+ * Interface to /dev/xen/gntalloc.
+ *
+ * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
+ *
+ * This file is in the public domain.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
+#define __LINUX_PUBLIC_GNTALLOC_H__
+
+/*
+ * Allocates a new page and creates a new grant reference.
+ */
+#define IOCTL_GNTALLOC_ALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
+struct ioctl_gntalloc_alloc_gref {
+	/* IN parameters */
+	/* The ID of the domain to be given access to the grants. */
+	uint16_t domid;
+	/* Flags for this mapping */
+	uint16_t flags;
+	/* Number of pages to map */
+	uint32_t count;
+	/* OUT parameters */
+	/* The offset to be used on a subsequent call to mmap(). */
+	uint64_t index;
+	/* The grant references of the newly created grant, one per page */
+	/* Variable size, depending on count */
+	uint32_t gref_ids[1];
+};
+
+#define GNTALLOC_FLAG_WRITABLE 1
+
+/*
+ * Deallocates the grant reference, allowing the associated page to be freed if
+ * no other domains are using it.
+ */
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
+struct ioctl_gntalloc_dealloc_gref {
+	/* IN parameters */
+	/* The offset returned in the map operation */
+	uint64_t index;
+	/* Number of references to unmap */
+	uint32_t count;
+};
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (4 preceding siblings ...)
  2011-02-03 17:19 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
@ 2011-02-03 17:19 ` Daniel De Graaf
  2011-02-14 15:37   ` Konrad Rzeszutek Wilk
  2011-02-03 19:16 ` [PATCH] xen-gntdev: Fix memory leak when mmap fails Daniel De Graaf
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This ioctl allows the users of a shared page to be notified when
the other end exits abnormally.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntalloc.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/gntdev.c   |   61 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/gntalloc.h |   28 ++++++++++++++++++++++
 include/xen/gntdev.h   |   27 +++++++++++++++++++++
 4 files changed, 174 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index d06bf2b..a7ffdfe 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -60,11 +60,13 @@
 #include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
 #include <xen/gntalloc.h>
+#include <xen/events.h>
 
 static int limit = 1024;
 module_param(limit, int, 0644);
@@ -75,6 +77,12 @@ static LIST_HEAD(gref_list);
 static DEFINE_SPINLOCK(gref_lock);
 static int gref_size;
 
+struct notify_info {
+	uint16_t pgoff:12;    /* Bits 0-11: Offset of the byte to clear */
+	uint16_t flags:2;     /* Bits 12-13: Unmap notification flags */
+	int event;            /* Port (event channel) to notify */
+};
+
 /* Metadata on a grant reference. */
 struct gntalloc_gref {
 	struct list_head next_gref;  /* list entry gref_list */
@@ -83,6 +91,7 @@ struct gntalloc_gref {
 	uint64_t file_index;         /* File offset for mmap() */
 	unsigned int users;          /* Use count - when zero, waiting on Xen */
 	grant_ref_t gref_id;         /* The grant reference number */
+	struct notify_info notify;   /* Unmap notification */
 };
 
 struct gntalloc_file_private_data {
@@ -164,6 +173,16 @@ undo:
 
 static void __del_gref(struct gntalloc_gref *gref)
 {
+	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+		uint8_t *tmp = kmap(gref->page);
+		tmp[gref->notify.pgoff] = 0;
+		kunmap(gref->page);
+	}
+	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+		notify_remote_via_evtchn(gref->notify.event);
+
+	gref->notify.flags = 0;
+
 	if (gref->gref_id > 0) {
 		if (gnttab_query_foreign_access(gref->gref_id))
 			return;
@@ -349,6 +368,43 @@ dealloc_grant_out:
 	return rc;
 }
 
+static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
+		void __user *arg)
+{
+	struct ioctl_gntalloc_unmap_notify op;
+	struct gntalloc_gref *gref;
+	uint64_t index;
+	int pgoff;
+	int rc;
+
+	if (copy_from_user(&op, arg, sizeof(op)))
+		return -EFAULT;
+
+	index = op.index & ~(PAGE_SIZE - 1);
+	pgoff = op.index & (PAGE_SIZE - 1);
+
+	spin_lock(&gref_lock);
+
+	gref = find_grefs(priv, index, 1);
+	if (!gref) {
+		rc = -ENOENT;
+		goto unlock_out;
+	}
+
+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
+	gref->notify.flags = op.action;
+	gref->notify.pgoff = pgoff;
+	gref->notify.event = op.event_channel_port;
+	rc = 0;
+ unlock_out:
+	spin_unlock(&gref_lock);
+	return rc;
+}
+
 static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -361,6 +417,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 	case IOCTL_GNTALLOC_DEALLOC_GREF:
 		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
 
+	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
+		return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
+
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 58fddf3..91706c2 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -37,6 +37,7 @@
 #include <xen/xen.h>
 #include <xen/grant_table.h>
 #include <xen/gntdev.h>
+#include <xen/events.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
@@ -63,6 +64,13 @@ struct gntdev_priv {
 	struct mmu_notifier mn;
 };
 
+struct unmap_notify {
+	int flags;
+	/* Address relative to the start of the grant_map */
+	int addr;
+	int event;
+};
+
 struct grant_map {
 	struct list_head next;
 	struct vm_area_struct *vma;
@@ -71,6 +79,7 @@ struct grant_map {
 	int flags;
 	int is_mapped;
 	atomic_t users;
+	struct unmap_notify notify;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
 	struct gnttab_unmap_grant_ref *unmap_ops;
@@ -165,7 +174,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 	list_for_each_entry(map, &priv->maps, next) {
 		if (map->index != index)
 			continue;
-		if (map->count != count)
+		if (count && map->count != count)
 			continue;
 		return map;
 	}
@@ -184,6 +193,10 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
+	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+		notify_remote_via_evtchn(map->notify.event);
+	}
+
 	if (map->pages) {
 		if (!use_ptemod)
 			unmap_grant_pages(map, 0, map->count);
@@ -273,6 +286,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
 	int i, err = 0;
 
+	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+		int pgno = (map->notify.addr >> PAGE_SHIFT);
+		if (pgno >= offset && pgno < offset + pages) {
+			uint8_t *tmp = kmap(map->pages[pgno]);
+			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+			kunmap(map->pages[pgno]);
+			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+		}
+	}
+
 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
 	if (err)
@@ -518,6 +541,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 	return 0;
 }
 
+static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
+{
+	struct ioctl_gntdev_unmap_notify op;
+	struct grant_map *map;
+	int rc;
+
+	if (copy_from_user(&op, u, sizeof(op)))
+		return -EFAULT;
+
+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
+		return -EINVAL;
+
+	spin_lock(&priv->lock);
+
+	list_for_each_entry(map, &priv->maps, next) {
+		uint64_t begin = map->index << PAGE_SHIFT;
+		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
+		if (op.index >= begin && op.index < end)
+			goto found;
+	}
+	rc = -ENOENT;
+	goto unlock_out;
+
+ found:
+	map->notify.flags = op.action;
+	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
+	map->notify.event = op.event_channel_port;
+	rc = 0;
+ unlock_out:
+	spin_unlock(&priv->lock);
+	return rc;
+}
+
 static long gntdev_ioctl(struct file *flip,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -534,6 +590,9 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
 
+	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
+		return gntdev_ioctl_notify(priv, ptr);
+
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
index bc3b85e..257cc8d 100644
--- a/include/xen/gntalloc.h
+++ b/include/xen/gntalloc.h
@@ -47,4 +47,32 @@ struct ioctl_gntalloc_dealloc_gref {
 	/* Number of references to unmap */
 	uint32_t count;
 };
+
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
+struct ioctl_gntalloc_unmap_notify {
+	/* IN parameters */
+	/* Index of a byte in the page */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
index eb23f41..5d9b9b4 100644
--- a/include/xen/gntdev.h
+++ b/include/xen/gntdev.h
@@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants {
 	uint32_t count;
 };
 
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
+struct ioctl_gntdev_unmap_notify {
+	/* IN parameters */
+	/* Index of a byte in the page */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH] xen-gntdev: Fix memory leak when mmap fails
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (5 preceding siblings ...)
  2011-02-03 17:19 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
@ 2011-02-03 19:16 ` Daniel De Graaf
  2011-02-07 23:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-03 19:16 UTC (permalink / raw)
  To: konrad.wilk; +Cc: jeremy, xen-devel, Ian.Campbell

Fix for a memory leak introduced in patch 3. Doesn't merge cleanly across
patch 4; I can post a corrected version of both patches if that would be
preferred.

------------------------------------------------------------->8

The error path did not decrement the reference count of the grant structure.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   14 ++++++++------
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 91706c2..6c754a9 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -649,15 +649,13 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 					  find_grant_ptes, map);
 		if (err) {
 			printk(KERN_WARNING "find_grant_ptes() failure.\n");
-			return err;
+			goto out_put_map;
 		}
 	}
 
 	err = map_grant_pages(map);
-	if (err) {
-		printk(KERN_WARNING "map_grant_pages() failure.\n");
-		return err;
-	}
+	if (err)
+		goto out_put_map;
 
 	map->is_mapped = 1;
 
@@ -666,7 +664,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
 				map->pages[i]);
 			if (err)
-				return err;
+				goto out_put_map;
 		}
 	}
 
@@ -675,6 +673,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 unlock_out:
 	spin_unlock(&priv->lock);
 	return err;
+
+out_put_map:
+	gntdev_put_map(map);
+	return err;
 }
 
 static const struct file_operations gntdev_fops = {
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH v6] Userspace grant communication
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (6 preceding siblings ...)
  2011-02-03 19:16 ` [PATCH] xen-gntdev: Fix memory leak when mmap fails Daniel De Graaf
@ 2011-02-07 23:14 ` Konrad Rzeszutek Wilk
  2011-02-08 14:14   ` [PATCH] xen-gntdev: Fix unmap notify on PV domains Daniel De Graaf
  2011-02-08 21:49   ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  2011-02-09 21:11 ` [PATCH] xen-gntdev: Avoid double-mapping memory Daniel De Graaf
  2011-02-14 16:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  9 siblings, 2 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-07 23:14 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

[-- Attachment #1: Type: text/plain, Size: 1201 bytes --]

On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:
> Changes since v5:
>   - Added a tested xen version to workaround in #4
>   - Cleaned up variable names & structures
>   - Clarified some of the cleanup in gntalloc
>   - Removed copyright statement from public-domain files
> 
> [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
> [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
> [PATCH 3/6] xen-gntdev: Add reference counting to maps
> [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
> [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
> [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl
> 
> Test/Demo code (also updated):

I played with this (two PV domains) and I got two failures:

1). When forgetting to unmap a grant page and quitting the tool
2). when unmapping appropriately.

Attached are the logs from the domain exporting the grants
(domain 4), and the faulting (domain 3 and domain 5).

This is using this patchset (devel/gntalloc.v6) and sticking
it on top of 2.6.38-rc2 with a whole bunch of patches. To be
specific:

git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git #master


[-- Attachment #2: pv-did-unmap.log --]
[-- Type: text/plain, Size: 21297 bytes --]

22:57:25 # 5 :/mnt/lab/latest/ 
> xm create -c pv.xm
Using config file "./pv.xm".
Started domain pv.xm (id=5)
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Linux version 2.6.38-rc2-00045-gcb81bee (konrad@phenom) (gcc version 4.4.4 20100503 (Red Hat 4.4.4-2) (GCC) ) #1 SMP Mon Feb 7 17:28:33 EST 2011
[    0.000000] Command line: console=hvc0 debug iommu=soft swiotlb=force
[    0.000000] ACPI in unprivileged domain disabled
[    0.000000] released 0 pages of unused memory
[    0.000000] Set 0 page(s) to 1-1 mapping.
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  Xen: 0000000000000000 - 00000000000a0000 (usable)
[    0.000000]  Xen: 00000000000a0000 - 0000000000100000 (reserved)
[    0.000000]  Xen: 0000000000100000 - 000000002d800000 (usable)
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] DMI not present or invalid.
[    0.000000] e820 update range: 0000000000000000 - 0000000000010000 (usable) ==> (reserved)
[    0.000000] e820 remove range: 00000000000a0000 - 0000000000100000 (usable)
[    0.000000] No AGP bridge found
[    0.000000] last_pfn = 0x2d800 max_arch_pfn = 0x400000000
[    0.000000] Scanning 0 areas for low memory corruption
[    0.000000] initial memory mapped : 0 - 0e3ff000
[    0.000000] init_memory_mapping: 0000000000000000-000000002d800000
[    0.000000]  0000000000 - 002d800000 page 4k
[    0.000000] kernel direct mapping tables up to 2d800000 @ e291000-e3ff000
[    0.000000] RAMDISK: 01b5c000 - 0d824000
[    0.000000] No NUMA configuration found
[    0.000000] Faking a node at 0000000000000000-000000002d800000
[    0.000000] Initmem setup node 0 0000000000000000-000000002d800000
[    0.000000]   NODE_DATA [000000002cffb000 - 000000002cffffff]
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA      0x00000010 -> 0x00001000
[    0.000000]   DMA32    0x00001000 -> 0x00100000
[    0.000000]   Normal   empty
[    0.000000] Movable zone start PFN for each node
[    0.000000] early_node_map[2] active PFN ranges
[    0.000000]     0: 0x00000010 -> 0x000000a0
[    0.000000]     0: 0x00000100 -> 0x0002d800
[    0.000000] On node 0 totalpages: 186256
[    0.000000]   DMA zone: 56 pages used for memmap
[    0.000000]   DMA zone: 6 pages reserved
[    0.000000]   DMA zone: 3922 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 2492 pages used for memmap
[    0.000000]   DMA32 zone: 179780 pages, LIFO batch:31
[    0.000000] SMP: Allowing 1 CPUs, 0 hotplug CPUs
[    0.000000] No local APIC present
[    0.000000] APIC: disable apic facility
[    0.000000] APIC: switched to apic NOOP
[    0.000000] nr_irqs_gsi: 16
[    0.000000] PM: Registered nosave memory: 00000000000a0000 - 0000000000100000
[    0.000000] Allocating PCI resources starting at 2d800000 (gap: 2d800000:d2800000)
[    0.000000] Booting paravirtualized kernel on Xen
[    0.000000] Xen version: 4.1-110207 (preserve-AD)
[    0.000000] setup_percpu: NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:1 nr_node_ids:1
[    0.000000] PERCPU: Embedded 28 pages/cpu @ffff88002cfce000 s83136 r8192 d23360 u114688
[    0.000000] pcpu-alloc: s83136 r8192 d23360 u114688 alloc=28*4096
[    0.000000] pcpu-alloc: [0] 0 
[    0.000000] Built 1 zonelists in Node order, mobility grouping on.  Total pages: 183702
[    0.000000] Policy zone: DMA32
[    0.000000] Kernel command line: console=hvc0 debug iommu=soft swiotlb=force
[    0.000000] PID hash table entries: 4096 (order: 3, 32768 bytes)
[    0.000000] Placing 64MB software IO TLB between ffff880027a00000 - ffff88002ba00000
[    0.000000] software IO TLB at phys 0x27a00000 - 0x2ba00000
[    0.000000] Memory: 453168k/745472k available (4423k kernel code, 448k absent, 291856k reserved, 4281k data, 644k init)
[    0.000000] SLUB: Genslabs=15, HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
[    0.000000] Hierarchical RCU implementation.
[    0.000000] 	RCU-based detection of stalled CPUs is disabled.
[    0.000000] NR_IRQS:4352 nr_irqs:256 16
[    0.000000] Console: colour dummy device 80x25
[    0.000000] console [tty0] enabled
[    0.000000] console [hvc0] enabled
[    0.000000] Xen: using vcpuop timer interface
[    0.000000] installing Xen timer for CPU 0
[    0.000000] Detected 3000.180 MHz processor.
[    0.000999] Calibrating delay loop (skipped), value calculated using timer frequency.. 6000.36 BogoMIPS (lpj=3000180)
[    0.000999] pid_max: default: 32768 minimum: 301
[    0.000999] Security Framework initialized
[    0.000999] SELinux:  Initializing.
[    0.000999] SELinux:  Starting in permissive mode
[    0.000999] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes)
[    0.000999] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes)
[    0.000999] Mount-cache hash table entries: 256
[    0.000999] Initializing cgroup subsys ns
[    0.000999] ns_cgroup deprecated: consider using the 'clone_children' flag without the ns_cgroup.
[    0.000999] Initializing cgroup subsys cpuacct
[    0.000999] Initializing cgroup subsys freezer
[    0.001033] tseg: 0000000000
[    0.001043] CPU: Physical Processor ID: 0
[    0.001048] CPU: Processor Core ID: 0
[    0.001109] SMP alternatives: switching to UP code
[    0.003016] Freeing SMP alternatives: 16k freed
[    0.003096] cpu 0 spinlock event irq 17
[    0.003114] Performance Events: 
[    0.003119] no APIC, boot with the "lapic" boot parameter to force-enable it.
[    0.003126] no hardware sampling interrupt available.
[    0.003156] Broken PMU hardware detected, using software events only.
[    0.003321] MCE: In-kernel MCE decoding enabled.
[    0.003329] Brought up 1 CPUs
[    0.003502] kworker/u:0 used greatest stack depth: 6032 bytes left
[    0.003758] Grant table initialized
[    0.022760] Time: 165:165:165  Date: 165/165/65
[    0.022817] NET: Registered protocol family 16
[    0.023261] kworker/u:0 used greatest stack depth: 5768 bytes left
[    0.023361] Extended Config Space enabled on 0 nodes
[    0.023394] PCI: setting up Xen PCI frontend stub
[    0.023394] PCI: pci_cache_line_size set to 64 bytes
[    0.029997] bio: create slab <bio-0> at 0
[    0.030315] ACPI: Interpreter disabled.
[    0.032041] xen_balloon: Initialising balloon driver.
[    0.032120] last_pfn = 0x2d800 max_arch_pfn = 0x400000000
[    0.032158] vgaarb: loaded
[    0.032400] usbcore: registered new interface driver usbfs
[    0.032453] usbcore: registered new interface driver hub
[    0.032515] usbcore: registered new device driver usb
[    0.032746] PCI: System does not support PCI
[    0.032753] PCI: System does not support PCI
[    0.033217] NetLabel: Initializing
[    0.033223] NetLabel:  domain hash size = 128
[    0.033228] NetLabel:  protocols = UNLABELED CIPSOv4
[    0.033243] NetLabel:  unlabeled traffic allowed by default
[    0.033437] Switching to clocksource xen
[    0.035119] pnp: PnP ACPI: disabled
[    0.039518] NET: Registered protocol family 2
[    0.039596] IP route cache hash table entries: 8192 (order: 4, 65536 bytes)
[    0.039868] TCP established hash table entries: 32768 (order: 7, 524288 bytes)
[    0.040182] TCP bind hash table entries: 32768 (order: 8, 1048576 bytes)
[    0.040522] TCP: Hash tables configured (established 32768 bind 32768)
[    0.040530] TCP reno registered
[    0.040548] UDP hash table entries: 512 (order: 3, 49152 bytes)
[    0.040577] UDP-Lite hash table entries: 512 (order: 3, 49152 bytes)
[    0.040665] NET: Registered protocol family 1
[    0.040771] RPC: Registered udp transport module.
[    0.040777] RPC: Registered tcp transport module.
[    0.040782] RPC: Registered tcp NFSv4.1 backchannel transport module.
[    0.040788] PCI: CLS 0 bytes, default 64
[    0.040928] Trying to unpack rootfs image as initramfs...
[    0.328536] Freeing initrd memory: 193312k freed
[    0.394416] platform rtc_cmos: registered platform RTC device (no PNP device found)
[    0.394665] Machine check injector initialized
[    0.395035] microcode: CPU0: patch_level=0x1000086
[    0.395211] microcode: Microcode Update Driver: v2.00 <tigran@aivazian.fsnet.co.uk>, Peter Oruba
[    0.395220] Scanning for low memory corruption every 60 seconds
[    0.395531] audit: initializing netlink socket (disabled)
[    0.395549] type=2000 audit(1297119453.498:1): initialized
[    0.403499] HugeTLB registered 2 MB page size, pre-allocated 0 pages
[    0.407647] VFS: Disk quotas dquot_6.5.2
[    0.407747] Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[    0.408393] NTFS driver 2.1.30 [Flags: R/W].
[    0.408588] msgmni has been set to 1262
[    0.408703] SELinux:  Registering netfilter hooks
[    0.409132] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 253)
[    0.409142] io scheduler noop registered
[    0.409147] io scheduler deadline registered
[    0.409219] io scheduler cfq registered (default)
[    0.409397] pci_hotplug: PCI Hot Plug PCI Core version: 0.5
[    0.446259] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
[    0.446976] Non-volatile memory driver v1.3
[    0.446985] Linux agpgart interface v0.103
[    0.447266] [drm] Initialized drm 1.1.0 20060810
[    0.449531] brd: module loaded
[    0.450718] loop: module loaded
[    0.450833] Fixed MDIO Bus: probed
[    0.450925] tun: Universal TUN/TAP device driver, 1.6
[    0.450931] tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
[    0.451201] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
[    0.451210] ehci_hcd: block sizes: qh 104 qtd 96 itd 192 sitd 96
[    0.451269] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
[    0.451276] ohci_hcd: block sizes: ed 80 td 96
[    0.451325] uhci_hcd: USB Universal Host Controller Interface driver
[    0.451505] usbcore: registered new interface driver usblp
[    0.451556] usbcore: registered new interface driver libusual
[    0.451722] i8042: PNP: No PS/2 controller found. Probing ports directly.
[    0.452055] i8042: No controller found
[    0.452738] mousedev: PS/2 mouse device common for all mice
[    0.452987] rtc_cmos rtc_cmos: rtc core: registered rtc_cmos as rtc0
[    0.453089] rtc_cmos: probe of rtc_cmos failed with error -38
[    0.453281] cpuidle: using governor ladder
[    0.453288] cpuidle: using governor menu
[    0.453396] Netfilter messages via NETLINK v0.30.
[    0.453419] nf_conntrack version 0.5.0 (5050 buckets, 20200 max)
[    0.453561] ctnetlink v0.93: registering with nfnetlink.
[    0.454028] ip_tables: (C) 2000-2006 Netfilter Core Team
[    0.454106] TCP cubic registered
[    0.454111] Initializing XFRM netlink socket
[    0.454429] NET: Registered protocol family 10
[    0.455160] ip6_tables: (C) 2000-2006 Netfilter Core Team
[    0.455212] IPv6 over IPv4 tunneling driver
[    0.455850] NET: Registered protocol family 17
[    0.455887] Registering the dns_resolver key type
[    0.456159] PM: Hibernation image not present or could not be loaded.
[    0.456177] registered taskstats version 1
[    0.456190] XENBUS: Device with no driver: device/vkbd/0
[    0.456195] XENBUS: Device with no driver: device/vfb/0
[    0.456201] XENBUS: Device with no driver: device/console/0
[    0.456216]   Magic number: 1:252:3141
[    0.456666] Freeing unused kernel memory: 644k freed
[    0.456872] Write protecting the kernel read-only data: 8192k
[    0.460157] Testing CPA: undo ffffffff81000000-ffffffff81800000
[    0.462755] Testing CPA: again
[    0.466371] Freeing unused kernel memory: 1700k freed
[    0.467169] Freeing unused kernel memory: 492k freed
\rinit started: BusyBox v1.14.3 (2011-02-07 17:30:37 EST)
[    0.474074] consoletype used greatest stack depth: 5680 bytes left
Mounting directories  [  OK  ]
[    0.693752] modprobe used greatest stack depth: 5472 bytes left
[    0.697605] core_filesystem used greatest stack depth: 5408 bytes left
Running in PV context on Xen v4.1.
[    0.708905] input: Xen Virtual Keyboard as /devices/virtual/input/input0
[    0.709131] input: Xen Virtual Pointer as /devices/virtual/input/input1
[    0.925389] Initialising Xen virtual ethernet driver.
[    0.947486] udevd (1120): /proc/1120/oom_adj is deprecated, please use /proc/1120/oom_score_adj instead.
[    1.152094] ip used greatest stack depth: 4160 bytes left
Waiting for devices [  OK  ]
Waiting for fb [  OK  ]
Starting..[/dev/fb0]
/dev/fb0: len:0
/dev/fb0: bits/pixel32
(7f75e3ef6000): Writting .. [800:600]
Done!
FATAL: Module agpgart_intel not found.
[    1.520122] Console: switching to colour frame buffer device 100x37
[    1.568694] [drm] radeon kernel modesetting enabled.
WARNING: Error inserting drm_kms_helper (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/drm_kms_helper.ko): No such device
WARNING: Error inserting ttm (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/ttm/ttm.ko): No such device
FATAL: Error inserting nouveau (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/nouveau/nouveau.ko): No such device
WARNING: Error inserting drm_kms_helper (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/drm_kms_helper.ko): No such device
FATAL: Error inserting i915 (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/i915/i915.ko): No such device
Waiting for network [  OK  ]
Bringing up loopback interface:  [  OK  ]
Bringing up interface eth0:  Device eth0 does not seem to be present, delaying initialization.
[FAILED]
Bringing up interface switch:  
Determining IP information for switch...
[   30.395114] CPA self-test:
[   30.416206]  4k 186368 large 0 gb 0 x 10986[ffff880000000000-ffff88002d7ff000] miss 0
[   30.459541]  4k 186368 large 0 gb 0 x 10986[ffff880000000000-ffff88002d7ff000] miss 0
[   30.500167]  4k 186368 large 0 gb 0 x 10986[ffff880000000000-ffff88002d7ff000] miss 0
[   30.500223] ok.
 failed.
[FAILED]
/etc/init.d/rcS: line 30: NFS: command not found
Waiting for init.custom [  OK  ]
Start sshd

Starting SSHd ...

    SSH started [2149]


mount.nfs: Failed to resolve server 192.168.101.1: Address family for hostname not supported
mount.nfs: Failed to resolve server 192.168.101.1: Address family for hostname not supported
[   64.102084] [drm] Module unloaded
ERROR: Module nouveau does not exist in /proc/modules
 [0x0->0x2d000] pfn
 [0x0->0x2d000] level entry
 [0x2d000->0x40000] level middle
 [0x2d000->0x1ffffff] missing
 [0x40000->0x1ffffff] level top
Waiting for SSHd [  OK  ]
WARNING: ssh currently running [2149] ignoring start request
Starting..[/dev/fb0]
/dev/fb0: len:0
/dev/fb0: bits/pixel32
(7fd91e639000): Writting .. [800:600]
Done!
FATAL: Module iscsi_ibft not found.
iscsistart: Missing or Invalid version from /sys/module/scsi_transport_iscsi/version. Make sure a up to date scsi_transport_iscsi module is loaded and a up todate version of iscsid is running. Exiting...
Feb  7 22:58:37 (none) syslogd 1.5.0: restart.
FATAL: Module evtchn not found.
[   64.718622] Event-channel device installed.
xencommons should be started first.
           CPU0       
 16:       3908  xen-percpu-virq      timer0
 17:          0  xen-percpu-ipi       spinlock0
 18:          0  xen-percpu-ipi       resched0
 19:          0  xen-percpu-ipi       callfunc0
 20:          0  xen-percpu-virq      debug0
 21:          0  xen-percpu-ipi       callfuncsingle0
 22:        301   xen-dyn-event     xenbus
 23:         91   xen-dyn-event     hvc_console
 24:          0   xen-dyn-event     vkbd
 25:        378   xen-dyn-event     vfb
NMI:          0   Non-maskable interrupts
LOC:          0   Local timer interrupts
SPU:          0   Spurious interrupts
PMI:          0   Performance monitoring interrupts
IWI:          0   IRQ work interrupts
RES:          0   Rescheduling interrupts
CAL:          0   Function call interrupts
TLB:          0   TLB shootdowns
TRM:          0   Thermal event interrupts
THR:          0   Threshold APIC interrupts
MCE:          0   Machine check exceptions
MCP:          0   Machine check polls
ERR:          0
MIS:          0
00000000-0000ffff : reserved
00010000-0009ffff : System RAM
000a0000-000fffff : reserved
00100000-2d7fffff : System RAM
  01000000-01451cab : Kernel code
  01451cac-0188017f : Kernel data
  01928000-01a25fff : Kernel bss
Feb  7 22:58:37 (none) init: starting pid 2234, tty '/dev/tty0': '/bin/sh'
Feb  7 22:58:37 (none) init: starting pid 2235, tty '/dev/tty1': '/bin/sh'
Feb  7 22:58:37 (none) init: starting pid 2236, tty '/dev/hvc0': '/bin/sh'
sh-4.1# 
sh-4.1# test\ag\b \b_gnt
add <domid>           return gntref, address
map <domid> <ref>     return index, address
adel <gntref>         delete <add> internal
ddel <index>          delete <map> internal
unmap <address>       unmap memory
show                  show all pages
<word>                append word to all mapped pages, show
 PID 8be

> map 4 9
Mapped grant 4.9 as 0=0x7ff79801f000

> show
00(140701383913472,0): id      8c16b8b4567 n=11000000 b=
END

> show
00(140701383913472,0): id      8c16b8b4567 n=11000000 b=deadbeef
END

> Woot!
00(140701383913472,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> unmap 0x7ff79801f000
Unmapped page at (nil)

> in\b \b\b \bn\b \bunmap 140701383913472
[  196.362131] BUG: unable to handle kernel paging request at ffff88001fca4049
[  196.363054] IP: [<ffffffff8126cc69>] unmap_grant_pages+0x75/0xd0
[  196.363054] PGD 1804067 PUD 1808067 PMD e31d067 PTE 0
[  196.363054] Oops: 0002 [#1] SMP 
[  196.363054] last sysfs file: /sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_map
[  196.363054] CPU 0 
[  196.363054] Modules linked in: xen_evtchn fbcon tileblit font bitblit ttm softcursor drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma]
[  196.363054] 
[  196.363054] Pid: 2238, comm: test_gnt Not tainted 2.6.38-rc2-00045-gcb81bee #1 /
[  196.363054] RIP: e030:[<ffffffff8126cc69>]  [<ffffffff8126cc69>] unmap_grant_pages+0x75/0xd0
[  196.363054] RSP: e02b:ffff88001b509cb8  EFLAGS: 00010202
[  196.363054] RAX: 0000000000000049 RBX: ffff88001fe17a20 RCX: 00007ff798020000
[  196.363054] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff88001fe17a20
[  196.363054] RBP: ffff88001b509cd8 R08: ffff88001b509ea8 R09: 0000000000000000
[  196.363054] R10: 0000000000000016 R11: 0000000000000206 R12: 0000000000000000
[  196.363054] R13: 0000000000000001 R14: 000000001fca4049 R15: ffff88001fe17a80
[  196.363054] FS:  00007ff798023700(0000) GS:ffff88002cfce000(0000) knlGS:0000000000000000
[  196.363054] CS:  e033 DS: 0000 ES: 0000 CR0: 000000008005003b
[  196.363054] CR2: ffff88001fca4049 CR3: 0000000026d57000 CR4: 0000000000000660
[  196.363054] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  196.363054] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  196.363054] Process test_gnt (pid: 2238, threadinfo ffff88001b508000, task ffff8800275e9e40)
[  196.363054] Stack:
[  196.363054]  00007ff79801f000 ffff88001fe17a20 00007ff798020000 ffff88001fe17a90
[  196.363054]  ffff88001b509d18 ffffffff8126d322 ffffffff81006689 ffff88001fe17ab0
[  196.363054]  ffff88001ec732c0 00007ff798020000 00007ff79801f000 00007ff798020000
[  196.363054] Call Trace:
[  196.363054]  [<ffffffff8126d322>] mn_invl_range_start+0x7c/0xb6
[  196.363054]  [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf
[  196.363054]  [<ffffffff810ec7cb>] __mmu_notifier_invalidate_range_start+0x2e/0x54
[  196.363054]  [<ffffffff810d3ed4>] unmap_vmas+0x93/0x80b
[  196.363054]  [<ffffffff810639dc>] ? remove_wait_queue+0x4c/0x51
[  196.363054]  [<ffffffff811fd12f>] ? do_raw_spin_lock+0x6b/0x120
[  196.363054]  [<ffffffff81008479>] ? xen_spin_unlock+0x11/0x30
[  196.363054]  [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf
[  196.363054]  [<ffffffff811fd0bb>] ? do_raw_spin_unlock+0x8f/0x98
[  196.363054]  [<ffffffff810c371e>] ? ____pagevec_lru_add+0x147/0x15e
[  196.363054]  [<ffffffff810d965b>] unmap_region+0x86/0xe0
[  196.363054]  [<ffffffff810da807>] do_munmap+0x2d4/0x33e
[  196.363054]  [<ffffffff810da8b2>] sys_munmap+0x41/0x58
[  196.363054]  [<ffffffff8100a992>] system_call_fastpath+0x16/0x1b
[  196.363054] Code: 00 00 00 88 ff ff 49 01 c6 48 b8 b7 6d db b6 6d db b6 6d 49 c1 fe 03 4c 0f af f0 48 63 43 30 49 c1 e6 0c 25 ff 0f 00 00 49 01 c6 <41> c6 04 16 00 83 63 2c fe 49 63 fc 48 8b 73 50 44 89 ea 48 6b 
[  196.363054] RIP  [<ffffffff8126cc69>] unmap_grant_pages+0x75/0xd0
[  196.363054]  RSP <ffff88001b509cb8>
[  196.363054] CR2: ffff88001fca4049
[  196.363054] ---[ end trace 27b6948dea470010 ]---



23:01:09 # 6 :/mnt/lab/latest/ 
> 

[-- Attachment #3: pv-did-not-unmap.log --]
[-- Type: text/plain, Size: 24062 bytes --]

> xm list
Name                                        ID   Mem VCPUs      State   Time(s)
Domain-0                                     0   700     4     r-----     36.9

22:48:24 # 2 :~/ 
> cd /mnt/lab/latest

22:48:28 # 3 :/mnt/lab/latest/ 
> more pv.xm
kernel="/mnt/lab/latest/vmlinuz"
ramdisk="/mnt/lab/latest/initramfs.cpio.gz"
extra="console=hvc0 debug iommu=soft swiotlb=force"
memory=720
vcpus=1
cpus='2'
on_crash="preserve"
#vif = [ 'mac=00:0F:4B:00:00:68, bridge=switch' ]
#pci = ["04:00.0"]
vfb = [ 'vnc=1, vnclisten=0.0.0.0,vncunused=1']


22:48:29 # 4 :/mnt/lab/latest/ 
> xm create -c pv.xm
Using config file "./pv.xm".
Started domain pv.xm (id=3)
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Linux version 2.6.38-rc2-00045-gcb81bee (konrad@phenom) (gcc version 4.4.4 20100503 (Red Hat 4.4.4-2) (GCC) ) #1 SMP Mon Feb 7 17:28:33 EST 2011
[    0.000000] Command line: console=hvc0 debug iommu=soft swiotlb=force
[    0.000000] ACPI in unprivileged domain disabled
[    0.000000] released 0 pages of unused memory
[    0.000000] Set 0 page(s) to 1-1 mapping.
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  Xen: 0000000000000000 - 00000000000a0000 (usable)
[    0.000000]  Xen: 00000000000a0000 - 0000000000100000 (reserved)
[    0.000000]  Xen: 0000000000100000 - 000000002d800000 (usable)
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] DMI not present or invalid.
[    0.000000] e820 update range: 0000000000000000 - 0000000000010000 (usable) ==> (reserved)
[    0.000000] e820 remove range: 00000000000a0000 - 0000000000100000 (usable)
[    0.000000] No AGP bridge found
[    0.000000] last_pfn = 0x2d800 max_arch_pfn = 0x400000000
[    0.000000] Scanning 0 areas for low memory corruption
[    0.000000] initial memory mapped : 0 - 0e3ff000
[    0.000000] init_memory_mapping: 0000000000000000-000000002d800000
[    0.000000]  0000000000 - 002d800000 page 4k
[    0.000000] kernel direct mapping tables up to 2d800000 @ e291000-e3ff000
[    0.000000] RAMDISK: 01b5c000 - 0d824000
[    0.000000] No NUMA configuration found
[    0.000000] Faking a node at 0000000000000000-000000002d800000
[    0.000000] Initmem setup node 0 0000000000000000-000000002d800000
[    0.000000]   NODE_DATA [000000002cffb000 - 000000002cffffff]
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA      0x00000010 -> 0x00001000
[    0.000000]   DMA32    0x00001000 -> 0x00100000
[    0.000000]   Normal   empty
[    0.000000] Movable zone start PFN for each node
[    0.000000] early_node_map[2] active PFN ranges
[    0.000000]     0: 0x00000010 -> 0x000000a0
[    0.000000]     0: 0x00000100 -> 0x0002d800
[    0.000000] On node 0 totalpages: 186256
[    0.000000]   DMA zone: 56 pages used for memmap
[    0.000000]   DMA zone: 6 pages reserved
[    0.000000]   DMA zone: 3922 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 2492 pages used for memmap
[    0.000000]   DMA32 zone: 179780 pages, LIFO batch:31
[    0.000000] SMP: Allowing 1 CPUs, 0 hotplug CPUs
[    0.000000] No local APIC present
[    0.000000] APIC: disable apic facility
[    0.000000] APIC: switched to apic NOOP
[    0.000000] nr_irqs_gsi: 16
[    0.000000] PM: Registered nosave memory: 00000000000a0000 - 0000000000100000
[    0.000000] Allocating PCI resources starting at 2d800000 (gap: 2d800000:d2800000)
[    0.000000] Booting paravirtualized kernel on Xen
[    0.000000] Xen version: 4.1-110207 (preserve-AD)
[    0.000000] setup_percpu: NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:1 nr_node_ids:1
[    0.000000] PERCPU: Embedded 28 pages/cpu @ffff88002cfce000 s83136 r8192 d23360 u114688
[    0.000000] pcpu-alloc: s83136 r8192 d23360 u114688 alloc=28*4096
[    0.000000] pcpu-alloc: [0] 0 
[    0.000000] Built 1 zonelists in Node order, mobility grouping on.  Total pages: 183702
[    0.000000] Policy zone: DMA32
[    0.000000] Kernel command line: console=hvc0 debug iommu=soft swiotlb=force
[    0.000000] PID hash table entries: 4096 (order: 3, 32768 bytes)
[    0.000000] Placing 64MB software IO TLB between ffff880027a00000 - ffff88002ba00000
[    0.000000] software IO TLB at phys 0x27a00000 - 0x2ba00000
[    0.000000] Memory: 453168k/745472k available (4423k kernel code, 448k absent, 291856k reserved, 4281k data, 644k init)
[    0.000000] SLUB: Genslabs=15, HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
[    0.000000] Hierarchical RCU implementation.
[    0.000000] 	RCU-based detection of stalled CPUs is disabled.
[    0.000000] NR_IRQS:4352 nr_irqs:256 16
[    0.000000] Console: colour dummy device 80x25
[    0.000000] console [tty0] enabled
[    0.000000] console [hvc0] enabled
[    0.000000] Xen: using vcpuop timer interface
[    0.000000] installing Xen timer for CPU 0
[    0.000000] Detected 3000.180 MHz processor.
[    0.000999] Calibrating delay loop (skipped), value calculated using timer frequency.. 6000.36 BogoMIPS (lpj=3000180)
[    0.000999] pid_max: default: 32768 minimum: 301
[    0.000999] Security Framework initialized
[    0.000999] SELinux:  Initializing.
[    0.000999] SELinux:  Starting in permissive mode
[    0.000999] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes)
[    0.000999] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes)
[    0.000999] Mount-cache hash table entries: 256
[    0.000999] Initializing cgroup subsys ns
[    0.000999] ns_cgroup deprecated: consider using the 'clone_children' flag without the ns_cgroup.
[    0.000999] Initializing cgroup subsys cpuacct
[    0.000999] Initializing cgroup subsys freezer
[    0.001033] tseg: 0000000000
[    0.001042] CPU: Physical Processor ID: 0
[    0.001047] CPU: Processor Core ID: 3
[    0.001109] SMP alternatives: switching to UP code
[    0.002016] Freeing SMP alternatives: 16k freed
[    0.002094] cpu 0 spinlock event irq 17
[    0.002112] Performance Events: 
[    0.002117] no APIC, boot with the "lapic" boot parameter to force-enable it.
[    0.002124] no hardware sampling interrupt available.
[    0.002154] Broken PMU hardware detected, using software events only.
[    0.002636] MCE: In-kernel MCE decoding enabled.
[    0.002645] Brought up 1 CPUs
[    0.002820] kworker/u:0 used greatest stack depth: 6032 bytes left
[    0.003024] Grant table initialized
[    0.022013] Time: 165:165:165  Date: 165/165/65
[    0.022077] NET: Registered protocol family 16
[    0.022557] kworker/u:0 used greatest stack depth: 5768 bytes left
[    0.022654] Extended Config Space enabled on 0 nodes
[    0.022688] PCI: setting up Xen PCI frontend stub
[    0.022688] PCI: pci_cache_line_size set to 64 bytes
[    0.029298] bio: create slab <bio-0> at 0
[    0.029421] ACPI: Interpreter disabled.
[    0.030064] xen_balloon: Initialising balloon driver.
[    0.031071] last_pfn = 0x2d800 max_arch_pfn = 0x400000000
[    0.031142] vgaarb: loaded
[    0.031402] usbcore: registered new interface driver usbfs
[    0.031457] usbcore: registered new interface driver hub
[    0.031519] usbcore: registered new device driver usb
[    0.031751] PCI: System does not support PCI
[    0.031757] PCI: System does not support PCI
[    0.032116] NetLabel: Initializing
[    0.032123] NetLabel:  domain hash size = 128
[    0.032127] NetLabel:  protocols = UNLABELED CIPSOv4
[    0.032143] NetLabel:  unlabeled traffic allowed by default
[    0.032340] Switching to clocksource xen
[    0.034121] pnp: PnP ACPI: disabled
[    0.038514] NET: Registered protocol family 2
[    0.038592] IP route cache hash table entries: 8192 (order: 4, 65536 bytes)
[    0.038862] TCP established hash table entries: 32768 (order: 7, 524288 bytes)
[    0.039178] TCP bind hash table entries: 32768 (order: 8, 1048576 bytes)
[    0.039519] TCP: Hash tables configured (established 32768 bind 32768)
[    0.039528] TCP reno registered
[    0.039546] UDP hash table entries: 512 (order: 3, 49152 bytes)
[    0.039576] UDP-Lite hash table entries: 512 (order: 3, 49152 bytes)
[    0.039667] NET: Registered protocol family 1
[    0.039773] RPC: Registered udp transport module.
[    0.039780] RPC: Registered tcp transport module.
[    0.039785] RPC: Registered tcp NFSv4.1 backchannel transport module.
[    0.039791] PCI: CLS 0 bytes, default 64
[    0.039937] Trying to unpack rootfs image as initramfs...
[    0.325931] Freeing initrd memory: 193312k freed
[    0.393275] platform rtc_cmos: registered platform RTC device (no PNP device found)
[    0.393522] Machine check injector initialized
[    0.393890] microcode: CPU0: patch_level=0x1000086
[    0.393953] microcode: Microcode Update Driver: v2.00 <tigran@aivazian.fsnet.co.uk>, Peter Oruba
[    0.393967] Scanning for low memory corruption every 60 seconds
[    0.394308] audit: initializing netlink socket (disabled)
[    0.394327] type=2000 audit(1297118931.427:1): initialized
[    0.402798] HugeTLB registered 2 MB page size, pre-allocated 0 pages
[    0.406939] VFS: Disk quotas dquot_6.5.2
[    0.407038] Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[    0.407682] NTFS driver 2.1.30 [Flags: R/W].
[    0.407877] msgmni has been set to 1262
[    0.407993] SELinux:  Registering netfilter hooks
[    0.408423] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 253)
[    0.408434] io scheduler noop registered
[    0.408439] io scheduler deadline registered
[    0.408508] io scheduler cfq registered (default)
[    0.408688] pci_hotplug: PCI Hot Plug PCI Core version: 0.5
[    0.445440] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
[    0.446215] Non-volatile memory driver v1.3
[    0.446225] Linux agpgart interface v0.103
[    0.446444] [drm] Initialized drm 1.1.0 20060810
[    0.448696] brd: module loaded
[    0.449898] loop: module loaded
[    0.450017] Fixed MDIO Bus: probed
[    0.450134] tun: Universal TUN/TAP device driver, 1.6
[    0.450140] tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
[    0.450381] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
[    0.450390] ehci_hcd: block sizes: qh 104 qtd 96 itd 192 sitd 96
[    0.450447] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
[    0.450454] ohci_hcd: block sizes: ed 80 td 96
[    0.450503] uhci_hcd: USB Universal Host Controller Interface driver
[    0.450684] usbcore: registered new interface driver usblp
[    0.450735] usbcore: registered new interface driver libusual
[    0.450900] i8042: PNP: No PS/2 controller found. Probing ports directly.
[    0.451054] i8042: No controller found
[    0.451938] mousedev: PS/2 mouse device common for all mice
[    0.452213] rtc_cmos rtc_cmos: rtc core: registered rtc_cmos as rtc0
[    0.452292] rtc_cmos: probe of rtc_cmos failed with error -38
[    0.452482] cpuidle: using governor ladder
[    0.452488] cpuidle: using governor menu
[    0.452598] Netfilter messages via NETLINK v0.30.
[    0.452622] nf_conntrack version 0.5.0 (5050 buckets, 20200 max)
[    0.452765] ctnetlink v0.93: registering with nfnetlink.
[    0.453262] ip_tables: (C) 2000-2006 Netfilter Core Team
[    0.453297] TCP cubic registered
[    0.453302] Initializing XFRM netlink socket
[    0.453618] NET: Registered protocol family 10
[    0.454349] ip6_tables: (C) 2000-2006 Netfilter Core Team
[    0.454400] IPv6 over IPv4 tunneling driver
[    0.455032] NET: Registered protocol family 17
[    0.455099] Registering the dns_resolver key type
[    0.455338] PM: Hibernation image not present or could not be loaded.
[    0.455355] registered taskstats version 1
[    0.455367] XENBUS: Device with no driver: device/vkbd/0
[    0.455373] XENBUS: Device with no driver: device/vfb/0
[    0.455378] XENBUS: Device with no driver: device/console/0
[    0.455393]   Magic number: 1:252:3141
[    0.455828] Freeing unused kernel memory: 644k freed
[    0.456039] Write protecting the kernel read-only data: 8192k
[    0.459171] Testing CPA: undo ffffffff81000000-ffffffff81800000
[    0.461729] Testing CPA: again
[    0.465238] Freeing unused kernel memory: 1700k freed
[    0.466022] Freeing unused kernel memory: 492k freed
\rinit started: BusyBox v1.14.3 (2011-02-07 17:30:37 EST)
[    0.472912] consoletype used greatest stack depth: 5680 bytes left
Mounting directories  [  OK  ]
[    0.692896] modprobe used greatest stack depth: 5472 bytes left
[    0.696759] core_filesystem used greatest stack depth: 5408 bytes left
Running in PV context on Xen v4.1.
[    0.708036] input: Xen Virtual Keyboard as /devices/virtual/input/input0
[    0.708258] input: Xen Virtual Pointer as /devices/virtual/input/input1
[    0.924355] Initialising Xen virtual ethernet driver.
[    0.946476] udevd (1120): /proc/1120/oom_adj is deprecated, please use /proc/1120/oom_score_adj instead.
[    1.150005] ip used greatest stack depth: 4160 bytes left
Waiting for devices [  OK  ]
Waiting for fb [  OK  ]
Starting..[/dev/fb0]
/dev/fb0: len:0
/dev/fb0: bits/pixel32
(7ff625ae5000): Writting .. [800:600]
Done!
FATAL: Module agpgart_intel not found.
[    1.514549] Console: switching to colour frame buffer device 100x37
[    1.565206] [drm] radeon kernel modesetting enabled.
WARNING: Error inserting drm_kms_helper (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/drm_kms_helper.ko): No such device
WARNING: Error inserting ttm (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/ttm/ttm.ko): No such device
FATAL: Error inserting nouveau (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/nouveau/nouveau.ko): No such device
WARNING: Error inserting drm_kms_helper (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/drm_kms_helper.ko): No such device
FATAL: Error inserting i915 (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/i915/i915.ko): No such device
Waiting for network [  OK  ]
Bringing up loopback interface:  [  OK  ]
Bringing up interface eth0:  Device eth0 does not seem to be present, delaying initialization.
[FAILED]
Bringing up interface switch:  
Determining IP information for switch...[   12.738078] switch: no IPv6 routers present
[   30.394107] CPA self-test:
[   30.415265]  4k 186368 large 0 gb 0 x 10986[ffff880000000000-ffff88002d7ff000] miss 0
[   30.458299]  4k 186368 large 0 gb 0 x 10986[ffff880000000000-ffff88002d7ff000] miss 0
[   30.498973]  4k 186368 large 0 gb 0 x 10986[ffff880000000000-ffff88002d7ff000] miss 0
[   30.499026] ok.
 failed.
[FAILED]
/etc/init.d/rcS: line 30: NFS: command not found
Waiting for init.custom [  OK  ]
Start sshd

Starting SSHd ...

    SSH started [2149]


mount.nfs: Failed to resolve server 192.168.101.1: Address family for hostname not supported
mount.nfs: Failed to resolve server 192.168.101.1: Address family for hostname not supported
[   64.185816] [drm] Module unloaded
ERROR: Module nouveau does not exist in /proc/modules
 [0x0->0x2d000] pfn
 [0x0->0x2d000] level entry
 [0x2d000->0x40000] level middle
 [0x2d000->0x1ffffff] missing
 [0x40000->0x1ffffff] level top
Waiting for SSHd [  OK  ]
WARNING: ssh currently running [2149] ignoring start request
Starting..[/dev/fb0]
/dev/fb0: len:0
/dev/fb0: bits/pixel32
(7ffce2265000): Writting .. [800:600]
Done!
FATAL: Module iscsi_ibft not found.
iscsistart: Missing or Invalid version from /sys/module/scsi_transport_iscsi/version. Make sure a up to date scsi_transport_iscsi module is loaded and a up todate version of iscsid is running. Exiting...
Feb  7 22:49:55 (none) syslogd 1.5.0: restart.
FATAL: Module evtchn not found.
[   64.807128] Event-channel device installed.
xencommons should be started first.
           CPU0       
 16:       3925  xen-percpu-virq      timer0
 17:          0  xen-percpu-ipi       spinlock0
 18:          0  xen-percpu-ipi       resched0
 19:          0  xen-percpu-ipi       callfunc0
 20:          0  xen-percpu-virq      debug0
 21:          0  xen-percpu-ipi       callfuncsingle0
 22:        299   xen-dyn-event     xenbus
 23:         91   xen-dyn-event     hvc_console
 24:         37   xen-dyn-event     vkbd
 25:        381   xen-dyn-event     vfb
NMI:          0   Non-maskable interrupts
LOC:          0   Local timer interrupts
SPU:          0   Spurious interrupts
PMI:          0   Performance monitoring interrupts
IWI:          0   IRQ work interrupts
RES:          0   Rescheduling interrupts
CAL:          0   Function call interrupts
TLB:          0   TLB shootdowns
TRM:          0   Thermal event interrupts
THR:          0   Threshold APIC interrupts
MCE:          0   Machine check exceptions
MCP:          0   Machine check polls
ERR:          0
MIS:          0
00000000-0000ffff : reserved
00010000-0009ffff : System RAM
000a0000-000fffff : reserved
00100000-2d7fffff : System RAM
  01000000-01451cab : Kernel code
  01451cac-0188017f : Kernel data
  01928000-01a25fff : Kernel bss
Feb  7 22:49:55 (none) init: starting pid 2234, tty '/dev/tty0': '/bin/sh'
Feb  7 22:49:55 (none) init: starting pid 2235, tty '/dev/tty1': '/bin/sh'
Feb  7 22:49:55 (none) init: starting pid 2236, tty '/dev/hvc0': '/bin/sh'
sh-4.1# xens\atore-ls
xenstore-ls: xs_directory (/): Permission denied
sh-4.1# xen_\a\b \bd \b \b\b \b\b\b\b^[[Ktest_gnt 
add <domid>           return gntref, address
map <domid> <ref>     return index, address
adel <gntref>         delete <add> internal
ddel <index>          delete <map> internal
unmap <address>       unmap memory
show                  show all pages
<word>                append word to all mapped pages, show
 PID 8bf

> END

> ^C
sh-4.1# dmesg | grep Mem
[    0.000000] Memory: 453168k/745472k available (4423k kernel code, 448k absent, 291856k reserved, 4281k data, 644k init)
sh-4.1# 
sh-4.1# test\a_gnt 
add <domid>           return gntref, address
map <domid> <ref>     return index, address
adel <gntref>         delete <add> internal
ddel <index>          delete <map> internal
unmap <address>       unmap memory
show                  show all pages
<word>                append word to all mapped pages, show
 PID 8c2

> show
END

> map 3 8\b \b\b \b\b \b4 8
Mapped grant 4.8 as 0=0x7ff062b95000

> show
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=
END

> show
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=boo
END

> deadbeef
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=deadbeefboo
END

> unmap 0x7ff062b95000
Unmapped page at (nil)

> show
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=deadbeefboo
END

> unmap 8c06b8b4567
Unmapped page at 0x8

> show
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=deadbeefboo
END

> unmap 8
Unmapped page at 0x8

> ^[[A^[[A\b \b\b \b\b \b\b \b\b \b\b \b\b \b\b \bunamp
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=unampdeadbeefboo
END

> unmap 9\b \b0
Unmapped page at (nil)

> ^[[A\b \b\b \b\b \b\b \bshow
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=unampdeadbeefboo
END

> unmap 11000000
Unmapped page at 0xa7d8c0

> show
00(140670425190400,0): id      8c06b8b4567 n=11000000 b=unampdeadbeefboo
END

> so\b \b\b \bshow\b \b\b \b\b \b\b \bq\b \b00(140670425190400,0): id      8c06b8b4567 n=11000000 b=helpddellddellunampdeadbeefboo
END

> ^C[  346.616114] BUG: unable to handle kernel paging request at ffff88001fe16049
[  346.617062] IP: [<ffffffff8126cc69>] unmap_grant_pages+0x75/0xd0
[  346.617062] PGD 1804067 PUD 1808067 PMD e31e067 PTE 0
[  346.617062] Oops: 0002 [#1] SMP 
[  346.617062] last sysfs file: /sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_map
[  346.617062] CPU 0 
[  346.617062] Modules linked in: xen_evtchn fbcon tileblit font bitblit ttm softcursor drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma]
[  346.617062] 
[  346.617062] Pid: 2242, comm: test_gnt Not tainted 2.6.38-rc2-00045-gcb81bee #1 /
[  346.617062] RIP: e030:[<ffffffff8126cc69>]  [<ffffffff8126cc69>] unmap_grant_pages+0x75/0xd0
[  346.617062] RSP: e02b:ffff88001fe2bba8  EFLAGS: 00010202
[  346.617062] RAX: 0000000000000049 RBX: ffff880020685a20 RCX: 0000000000000158
[  346.617062] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff880020685a20
[  346.617062] RBP: ffff88001fe2bbc8 R08: ffff88001fe2bc88 R09: ffff88002cfd91e0
[  346.617062] R10: 00000050b3edc8ec R11: 00000050b3edc8ec R12: 0000000000000000
[  346.617062] R13: 0000000000000001 R14: 000000001fe16049 R15: 0000000000000001
[  346.617062] FS:  00007ff062b99700(0000) GS:ffff88002cfce000(0000) knlGS:0000000000000000
[  346.617062] CS:  e033 DS: 0000 ES: 0000 CR0: 000000008005003b
[  346.617062] CR2: ffff88001fe16049 CR3: 000000001edd5000 CR4: 0000000000000660
[  346.617062] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  346.617062] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  346.617062] Process test_gnt (pid: 2242, threadinfo ffff88001fe2a000, task ffff8800276d74f0)
[  346.617062] Stack:
[  346.617062]  ffff880020685a20 ffff880020685a90 ffff880020685a80 ffff8800276e9320
[  346.617062]  ffff88001fe2bbf8 ffffffff8126d3a8 ffff8800276e9320 ffff880020685ab0
[  346.617062]  ffff8800276e92c0 ffff8800276e92c0 ffff88001fe2bc18 ffffffff810ec4dd
[  346.617062] Call Trace:
[  346.617062]  [<ffffffff8126d3a8>] mn_release+0x3a/0x6e
[  346.617062]  [<ffffffff810ec4dd>] __mmu_notifier_release+0x51/0x85
[  346.617062]  [<ffffffff810d9756>] exit_mmap+0x27/0x101
[  346.617062]  [<ffffffff8104739e>] mmput+0x30/0xd7
[  346.617062]  [<ffffffff8104b173>] exit_mm+0x129/0x136
[  346.617062]  [<ffffffff8104cc1b>] do_exit+0x208/0x7aa
[  346.617062]  [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf
[  346.617062]  [<ffffffff81006cd2>] ? check_events+0x12/0x20
[  346.617062]  [<ffffffff811fd12f>] ? do_raw_spin_lock+0x6b/0x120
[  346.617062]  [<ffffffff8104d22f>] do_group_exit+0x72/0x9a
[  346.617062]  [<ffffffff81059fa9>] get_signal_to_deliver+0x360/0x37f
[  346.617062]  [<ffffffff81274492>] ? n_tty_read+0x6d5/0x7ad
[  346.617062]  [<ffffffff81009f53>] do_signal+0x6d/0x68b
[  346.617062]  [<ffffffff8103dbd0>] ? __wake_up+0x3f/0x48
[  346.617062]  [<ffffffff81276bb6>] ? put_ldisc+0xac/0xb1
[  346.617062]  [<ffffffff81276ca5>] ? tty_ldisc_deref+0x9/0xb
[  346.617062]  [<ffffffff8126f943>] ? tty_read+0x8c/0xc8
[  346.617062]  [<ffffffff8100a598>] do_notify_resume+0x27/0x5f
[  346.617062]  [<ffffffff8100ac60>] int_signal+0x12/0x17
[  346.617062] Code: 00 00 00 88 ff ff 49 01 c6 48 b8 b7 6d db b6 6d db b6 6d 49 c1 fe 03 4c 0f af f0 48 63 43 30 49 c1 e6 0c 25 ff 0f 00 00 49 01 c6 <41> c6 04 16 00 83 63 2c fe 49 63 fc 48 8b 73 50 44 89 ea 48 6b 
[  346.617062] RIP  [<ffffffff8126cc69>] unmap_grant_pages+0x75/0xd0
[  346.617062]  RSP <ffff88001fe2bba8>
[  346.617062] CR2: ffff88001fe16049
[  346.617062] ---[ end trace 31f588d5e4d9bf59 ]---
[  346.617062] Fixing recursive fault but reboot is needed!



ccc

^C^C^C

Feb  7 22:57:24 (none) init: starting pid 2244, tty '': '/etc/init.d/halt'
Usage: /etc/init.d/halt {start}
\rThe system is going down NOW!
\rSent SIGTERM to all processes
Feb  7 22:57:24 (none) exiting on signal 15
\rSent SIGKILL to all processes
\rRequesting system poweroff
[  515.071484] xenbus_dev_shutdown: device/console/0: Initialising != Connected, skipping
[  515.079285] System halted.


[-- Attachment #4: pv-domain-exporting-grants.log --]
[-- Type: text/plain, Size: 20965 bytes --]

22:48:37 # 2 :/mnt/lab/latest/ 
> more pv-2.xm
kernel="/mnt/lab/latest/vmlinuz"
ramdisk="/mnt/lab/latest/initramfs.cpio.gz"
extra="console=hvc0 debug iommu=soft swiotlb=force"
memory=800
vcpus=1
cpus='2'
on_crash="preserve"
#vif = [ 'mac=00:0F:4B:00:00:68, bridge=switch' ]
#pci = ["04:00.0"]
vfb = [ 'vnc=1, vnclisten=0.0.0.0,vncunused=1']


22:48:40 # 3 :/mnt/lab/latest/ 
> xm create -c pv-2.xm
Using config file "./pv-2.xm".
Started domain pv-2.xm (id=4)
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Linux version 2.6.38-rc2-00045-gcb81bee (konrad@phenom) (gcc version 4.4.4 20100503 (Red Hat 4.4.4-2) (GCC) ) #1 SMP Mon Feb 7 17:28:33 EST 2011
[    0.000000] Command line: console=hvc0 debug iommu=soft swiotlb=force
[    0.000000] ACPI in unprivileged domain disabled
[    0.000000] released 0 pages of unused memory
[    0.000000] Set 0 page(s) to 1-1 mapping.
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  Xen: 0000000000000000 - 00000000000a0000 (usable)
[    0.000000]  Xen: 00000000000a0000 - 0000000000100000 (reserved)
[    0.000000]  Xen: 0000000000100000 - 0000000032800000 (usable)
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] DMI not present or invalid.
[    0.000000] e820 update range: 0000000000000000 - 0000000000010000 (usable) ==> (reserved)
[    0.000000] e820 remove range: 00000000000a0000 - 0000000000100000 (usable)
[    0.000000] No AGP bridge found
[    0.000000] last_pfn = 0x32800 max_arch_pfn = 0x400000000
[    0.000000] Scanning 0 areas for low memory corruption
[    0.000000] initial memory mapped : 0 - 0e3ff000
[    0.000000] init_memory_mapping: 0000000000000000-0000000032800000
[    0.000000]  0000000000 - 0032800000 page 4k
[    0.000000] kernel direct mapping tables up to 32800000 @ e269000-e3ff000
[    0.000000] RAMDISK: 01b5c000 - 0d824000
[    0.000000] No NUMA configuration found
[    0.000000] Faking a node at 0000000000000000-0000000032800000
[    0.000000] Initmem setup node 0 0000000000000000-0000000032800000
[    0.000000]   NODE_DATA [0000000031ffb000 - 0000000031ffffff]
[    0.000000] Zone PFN ranges:
[    0.000000]   DMA      0x00000010 -> 0x00001000
[    0.000000]   DMA32    0x00001000 -> 0x00100000
[    0.000000]   Normal   empty
[    0.000000] Movable zone start PFN for each node
[    0.000000] early_node_map[2] active PFN ranges
[    0.000000]     0: 0x00000010 -> 0x000000a0
[    0.000000]     0: 0x00000100 -> 0x00032800
[    0.000000] On node 0 totalpages: 206736
[    0.000000]   DMA zone: 56 pages used for memmap
[    0.000000]   DMA zone: 6 pages reserved
[    0.000000]   DMA zone: 3922 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 2772 pages used for memmap
[    0.000000]   DMA32 zone: 199980 pages, LIFO batch:31
[    0.000000] SMP: Allowing 1 CPUs, 0 hotplug CPUs
[    0.000000] No local APIC present
[    0.000000] APIC: disable apic facility
[    0.000000] APIC: switched to apic NOOP
[    0.000000] nr_irqs_gsi: 16
[    0.000000] PM: Registered nosave memory: 00000000000a0000 - 0000000000100000
[    0.000000] Allocating PCI resources starting at 32800000 (gap: 32800000:cd800000)
[    0.000000] Booting paravirtualized kernel on Xen
[    0.000000] Xen version: 4.1-110207 (preserve-AD)
[    0.000000] setup_percpu: NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:1 nr_node_ids:1
[    0.000000] PERCPU: Embedded 28 pages/cpu @ffff880031fce000 s83136 r8192 d23360 u114688
[    0.000000] pcpu-alloc: s83136 r8192 d23360 u114688 alloc=28*4096
[    0.000000] pcpu-alloc: [0] 0 
[    0.000000] Built 1 zonelists in Node order, mobility grouping on.  Total pages: 203902
[    0.000000] Policy zone: DMA32
[    0.000000] Kernel command line: console=hvc0 debug iommu=soft swiotlb=force
[    0.000000] PID hash table entries: 4096 (order: 3, 32768 bytes)
[    0.000000] Placing 64MB software IO TLB between ffff88002c800000 - ffff880030800000
[    0.000000] software IO TLB at phys 0x2c800000 - 0x30800000
[    0.000000] Memory: 532972k/827392k available (4423k kernel code, 448k absent, 293972k reserved, 4281k data, 644k init)
[    0.000000] SLUB: Genslabs=15, HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
[    0.000000] Hierarchical RCU implementation.
[    0.000000] 	RCU-based detection of stalled CPUs is disabled.
[    0.000000] NR_IRQS:4352 nr_irqs:256 16
[    0.000000] Console: colour dummy device 80x25
[    0.000000] console [tty0] enabled
[    0.000000] console [hvc0] enabled
[    0.000000] Xen: using vcpuop timer interface
[    0.000000] installing Xen timer for CPU 0
[    0.000000] Detected 3000.180 MHz processor.
[    0.000999] Calibrating delay loop (skipped), value calculated using timer frequency.. 6000.36 BogoMIPS (lpj=3000180)
[    0.000999] pid_max: default: 32768 minimum: 301
[    0.000999] Security Framework initialized
[    0.000999] SELinux:  Initializing.
[    0.000999] SELinux:  Starting in permissive mode
[    0.000999] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes)
[    0.000999] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes)
[    0.000999] Mount-cache hash table entries: 256
[    0.000999] Initializing cgroup subsys ns
[    0.000999] ns_cgroup deprecated: consider using the 'clone_children' flag without the ns_cgroup.
[    0.000999] Initializing cgroup subsys cpuacct
[    0.000999] Initializing cgroup subsys freezer
[    0.001033] tseg: 0000000000
[    0.001042] CPU: Physical Processor ID: 0
[    0.001047] CPU: Processor Core ID: 1
[    0.001109] SMP alternatives: switching to UP code
[    0.002017] Freeing SMP alternatives: 16k freed
[    0.002096] cpu 0 spinlock event irq 17
[    0.002114] Performance Events: 
[    0.002119] no APIC, boot with the "lapic" boot parameter to force-enable it.
[    0.002126] no hardware sampling interrupt available.
[    0.002156] Broken PMU hardware detected, using software events only.
[    0.002326] MCE: In-kernel MCE decoding enabled.
[    0.002334] Brought up 1 CPUs
[    0.002511] kworker/u:0 used greatest stack depth: 6032 bytes left
[    0.003097] Grant table initialized
[    0.022107] Time: 165:165:165  Date: 165/165/65
[    0.022180] NET: Registered protocol family 16
[    0.022633] kworker/u:0 used greatest stack depth: 5768 bytes left
[    0.022770] Extended Config Space enabled on 0 nodes
[    0.022803] PCI: setting up Xen PCI frontend stub
[    0.022803] PCI: pci_cache_line_size set to 64 bytes
[    0.029306] bio: create slab <bio-0> at 0
[    0.029431] ACPI: Interpreter disabled.
[    0.031042] xen_balloon: Initialising balloon driver.
[    0.031122] last_pfn = 0x32800 max_arch_pfn = 0x400000000
[    0.031160] vgaarb: loaded
[    0.031406] usbcore: registered new interface driver usbfs
[    0.031458] usbcore: registered new interface driver hub
[    0.031519] usbcore: registered new device driver usb
[    0.031756] PCI: System does not support PCI
[    0.031763] PCI: System does not support PCI
[    0.032190] NetLabel: Initializing
[    0.032197] NetLabel:  domain hash size = 128
[    0.032202] NetLabel:  protocols = UNLABELED CIPSOv4
[    0.032218] NetLabel:  unlabeled traffic allowed by default
[    0.032412] Switching to clocksource xen
[    0.034122] pnp: PnP ACPI: disabled
[    0.038531] NET: Registered protocol family 2
[    0.038650] IP route cache hash table entries: 32768 (order: 6, 262144 bytes)
[    0.039224] TCP established hash table entries: 131072 (order: 9, 2097152 bytes)
[    0.040199] TCP bind hash table entries: 65536 (order: 9, 2097152 bytes)
[    0.040889] TCP: Hash tables configured (established 131072 bind 65536)
[    0.040899] TCP reno registered
[    0.040919] UDP hash table entries: 512 (order: 3, 49152 bytes)
[    0.040948] UDP-Lite hash table entries: 512 (order: 3, 49152 bytes)
[    0.041018] NET: Registered protocol family 1
[    0.041229] RPC: Registered udp transport module.
[    0.041235] RPC: Registered tcp transport module.
[    0.041240] RPC: Registered tcp NFSv4.1 backchannel transport module.
[    0.041247] PCI: CLS 0 bytes, default 64
[    0.041394] Trying to unpack rootfs image as initramfs...
[    0.328808] Freeing initrd memory: 193312k freed
[    0.394814] platform rtc_cmos: registered platform RTC device (no PNP device found)
[    0.395166] Machine check injector initialized
[    0.395517] microcode: CPU0: patch_level=0x1000086
[    0.395578] microcode: Microcode Update Driver: v2.00 <tigran@aivazian.fsnet.co.uk>, Peter Oruba
[    0.395587] Scanning for low memory corruption every 60 seconds
[    0.395909] audit: initializing netlink socket (disabled)
[    0.395928] type=2000 audit(1297118949.293:1): initialized
[    0.403884] HugeTLB registered 2 MB page size, pre-allocated 0 pages
[    0.408118] VFS: Disk quotas dquot_6.5.2
[    0.408294] Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[    0.408798] NTFS driver 2.1.30 [Flags: R/W].
[    0.408997] msgmni has been set to 1418
[    0.409138] SELinux:  Registering netfilter hooks
[    0.409543] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 253)
[    0.409553] io scheduler noop registered
[    0.409558] io scheduler deadline registered
[    0.409629] io scheduler cfq registered (default)
[    0.409810] pci_hotplug: PCI Hot Plug PCI Core version: 0.5
[    0.446576] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
[    0.447352] Non-volatile memory driver v1.3
[    0.447361] Linux agpgart interface v0.103
[    0.447579] [drm] Initialized drm 1.1.0 20060810
[    0.449911] brd: module loaded
[    0.450994] loop: module loaded
[    0.451244] Fixed MDIO Bus: probed
[    0.451334] tun: Universal TUN/TAP device driver, 1.6
[    0.451341] tun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
[    0.451580] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
[    0.451589] ehci_hcd: block sizes: qh 104 qtd 96 itd 192 sitd 96
[    0.451646] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
[    0.451653] ohci_hcd: block sizes: ed 80 td 96
[    0.451701] uhci_hcd: USB Universal Host Controller Interface driver
[    0.451879] usbcore: registered new interface driver usblp
[    0.451929] usbcore: registered new interface driver libusual
[    0.452119] i8042: PNP: No PS/2 controller found. Probing ports directly.
[    0.452944] i8042: No controller found
[    0.453024] mousedev: PS/2 mouse device common for all mice
[    0.453360] rtc_cmos rtc_cmos: rtc core: registered rtc_cmos as rtc0
[    0.453440] rtc_cmos: probe of rtc_cmos failed with error -38
[    0.453629] cpuidle: using governor ladder
[    0.453636] cpuidle: using governor menu
[    0.453746] Netfilter messages via NETLINK v0.30.
[    0.453769] nf_conntrack version 0.5.0 (5674 buckets, 22696 max)
[    0.453908] ctnetlink v0.93: registering with nfnetlink.
[    0.454385] ip_tables: (C) 2000-2006 Netfilter Core Team
[    0.454419] TCP cubic registered
[    0.454424] Initializing XFRM netlink socket
[    0.454743] NET: Registered protocol family 10
[    0.455501] ip6_tables: (C) 2000-2006 Netfilter Core Team
[    0.455556] IPv6 over IPv4 tunneling driver
[    0.456227] NET: Registered protocol family 17
[    0.456265] Registering the dns_resolver key type
[    0.456501] PM: Hibernation image not present or could not be loaded.
[    0.456520] registered taskstats version 1
[    0.456533] XENBUS: Device with no driver: device/vkbd/0
[    0.456538] XENBUS: Device with no driver: device/vfb/0
[    0.456543] XENBUS: Device with no driver: device/console/0
[    0.456558]   Magic number: 1:252:3141
[    0.457000] Freeing unused kernel memory: 644k freed
[    0.457254] Write protecting the kernel read-only data: 8192k
[    0.460469] Testing CPA: undo ffffffff81000000-ffffffff81800000
[    0.462996] Testing CPA: again
[    0.466507] Freeing unused kernel memory: 1700k freed
[    0.467310] Freeing unused kernel memory: 492k freed
\rinit started: BusyBox v1.14.3 (2011-02-07 17:30:37 EST)
[    0.474262] consoletype used greatest stack depth: 5680 bytes left
Mounting directories  [  OK  ]
[    0.695544] modprobe used greatest stack depth: 5472 bytes left
[    0.699416] core_filesystem used greatest stack depth: 5408 bytes left
Running in PV context on Xen v4.1.
[    0.710732] input: Xen Virtual Keyboard as /devices/virtual/input/input0
[    0.710941] input: Xen Virtual Pointer as /devices/virtual/input/input1
[    0.926360] Initialising Xen virtual ethernet driver.
[    0.948476] udevd (1120): /proc/1120/oom_adj is deprecated, please use /proc/1120/oom_score_adj instead.
[    1.153666] ip used greatest stack depth: 4160 bytes left
Waiting for devices [  OK  ]
Waiting for fb [  OK  ]
Starting..[/dev/fb0]
/dev/fb0: len:0
/dev/fb0: bits/pixel32
(7fa1eeb76000): Writting .. [800:600]
Done!
FATAL: Module agpgart_intel not found.
[    1.518929] Console: switching to colour frame buffer device 100x37
[    1.570366] [drm] radeon kernel modesetting enabled.
WARNING: Error inserting drm_kms_helper (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/drm_kms_helper.ko): No such device
WARNING: Error inserting ttm (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/ttm/ttm.ko): No such device
FATAL: Error inserting nouveau (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/nouveau/nouveau.ko): No such device
WARNING: Error inserting drm_kms_helper (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/drm_kms_helper.ko): No such device
FATAL: Error inserting i915 (/lib/modules/2.6.38-rc2-00045-gcb81bee/kernel/drivers/gpu/drm/i915/i915.ko): No such device
Waiting for network [  OK  ]
Bringing up loopback interface:  [  OK  ]
Bringing up interface eth0:  Device eth0 does not seem to be present, delaying initialization.
[FAILED]
Bringing up interface switch:  
Determining IP information for switch...[   12.754080] switch: no IPv6 routers present
[   30.395103] CPA self-test:
[   30.418489]  4k 206848 large 0 gb 0 x 10986[ffff880000000000-ffff8800327ff000] miss 0
[   30.463061]  4k 206848 large 0 gb 0 x 10986[ffff880000000000-ffff8800327ff000] miss 0
[   30.505465]  4k 206848 large 0 gb 0 x 10986[ffff880000000000-ffff8800327ff000] miss 0
[   30.505518] ok.
 failed.
[FAILED]
/etc/init.d/rcS: line 30: NFS: command not found
Waiting for init.custom [  OK  ]
Start sshd

Starting SSHd ...

    SSH started [2149]


mount.nfs: Failed to resolve server 192.168.101.1: Address family for hostname not supported
mount.nfs: Failed to resolve server 192.168.101.1: Address family for hostname not supported
[   64.355127] [drm] Module unloaded
ERROR: Module nouveau does not exist in /proc/modules
 [0x0->0x32000] pfn
 [0x0->0x32000] level entry
 [0x32000->0x40000] level middle
 [0x32000->0x1ffffff] missing
 [0x40000->0x1ffffff] level top
Waiting for SSHd [  OK  ]
WARNING: ssh currently running [2149] ignoring start request
Starting..[/dev/fb0]
/dev/fb0: len:0
/dev/fb0: bits/pixel32
(7f0537b62000): Writting .. [800:600]
Done!
FATAL: Module iscsi_ibft not found.
iscsistart: Missing or Invalid version from /sys/module/scsi_transport_iscsi/version. Make sure a up to date scsi_transport_iscsi module is loaded and a up todate version of iscsid is running. Exiting...
Feb  7 22:50:13 (none) syslogd 1.5.0: restart.
FATAL: Module evtchn not found.
[   64.998808] Event-channel device installed.
xencommons should be started first.
           CPU0       
 16:       4014  xen-percpu-virq      timer0
 17:          0  xen-percpu-ipi       spinlock0
 18:          0  xen-percpu-ipi       resched0
 19:          0  xen-percpu-ipi       callfunc0
 20:          0  xen-percpu-virq      debug0
 21:          0  xen-percpu-ipi       callfuncsingle0
 22:        286   xen-dyn-event     xenbus
 23:         88   xen-dyn-event     hvc_console
 24:        186   xen-dyn-event     vkbd
 25:        382   xen-dyn-event     vfb
NMI:          0   Non-maskable interrupts
LOC:          0   Local timer interrupts
SPU:          0   Spurious interrupts
PMI:          0   Performance monitoring interrupts
IWI:          0   IRQ work interrupts
RES:          0   Rescheduling interrupts
CAL:          0   Function call interrupts
TLB:          0   TLB shootdowns
TRM:          0   Thermal event interrupts
THR:          0   Threshold APIC interrupts
MCE:          0   Machine check exceptions
MCP:          0   Machine check polls
ERR:          0
MIS:          0
00000000-0000ffff : reserved
00010000-0009ffff : System RAM
000a0000-000fffff : reserved
00100000-327fffff : System RAM
  01000000-01451cab : Kernel code
  01451cac-0188017f : Kernel data
  01928000-01a25fff : Kernel bss
Feb  7 22:50:13 (none) init: starting pid 2234, tty '/dev/tty0': '/bin/sh'
Feb  7 22:50:13 (none) init: starting pid 2235, tty '/dev/tty1': '/bin/sh'
Feb  7 22:50:13 (none) init: starting pid 2236, tty '/dev/hvc0': '/bin/sh'
sh-4.1# test_gnt \b\b\b\b\b\b\b\b\b^[[Kcat /pro\b\b\b\b\b\b\b\b^[[Kdmesg | grep Memo
[    0.000000] Memory: 532972k/827392k available (4423k kernel code, 448k absent, 293972k reserved, 4281k data, 644k init)
sh-4.1# 
sh-4.1# tet_\b \b\b \bst_gnt
add <domid>           return gntref, address
map <domid> <ref>     return index, address
adel <gntref>         delete <add> internal
ddel <index>          delete <map> internal
unmap <address>       unmap memory
show                  show all pages
<word>                append word to all mapped pages, show
 PID 8c0

> show
END

> add 3
Created shared page with domain 3, grant #8. Mapped locally at 0=0x7f4422411000

> show
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=
END

> boo
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=boo
END

> show
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=deadbeefboo
END

> show\b \b\b \b\b \b\b \bddel 3
gu error: No such file or directory (rv=-1)

> ddel 7f4422411000
gu error: No such file or directory (rv=-1)

> ddel 0
gu error: No such file or directory (rv=-1)

> ddel
gu error: No such file or directory (rv=-1)

> show
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=unampdeadbeefboo
END

> ddel 8c06b8b4567
gu error: No such file or directory (rv=-1)

> ddel 11000000
gu error: No such file or directory (rv=-1)

> ddell 0
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=ddellunampdeadbeefboo
END

> dde\b \b\b \be\b \bdell 0
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=ddellddellunampdeadbeefboo
END

> show
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=ddellddellunampdeadbeefboo
END

> 
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=ddellddellunampdeadbeefboo
END

> help
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=helpddellddellunampdeadbeefboo
END

> 
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=helphelpddellddellunampdeadbeefboo
END

> 
00(139930609192960,0): id      8c06b8b4567 n=11000000 b=helphelphelpddellddellunampdeadbeefboo
END

> ^C[  331.822244] test_gnt:2240 freeing invalid memtype 0-1000

sh-4.1# 
sh-4.1# 
sh-4.1# test_gnt
add <domid>           return gntref, address
map <domid> <ref>     return index, address
adel <gntref>         delete <add> internal
ddel <index>          delete <map> internal
unmap <address>       unmap memory
show                  show all pages
<word>                append word to all mapped pages, show
 PID 8c1

> add 5
Created shared page with domain 5, grant #9. Mapped locally at 0=0x7fee0345f000

> deadbeef is neat
00(140660233859072,0): id      8c16b8b4567 n=11000000 b=deadbeef
END

> show
00(140660233859072,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> 
00(140660233859072,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> show
00(140660233859072,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> show
00(140660233859072,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> ddel 0
gu error: No such file or directory (rv=-1)

> ddel 8c16b8b4567
gu error: No such file or directory (rv=-1)

> show
00(140660233859072,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> 00(140660233859072,0): id      8c16b8b4567 n=11000000 b=Woot!deadbeef
END

> ^C[  756.625232] test_gnt:2241 freeing invalid memtype 0-1000

sh-4.1# 

[-- Attachment #5: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH] xen-gntdev: Fix unmap notify on PV domains
  2011-02-07 23:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
@ 2011-02-08 14:14   ` Daniel De Graaf
  2011-02-08 22:58     ` Konrad Rzeszutek Wilk
  2011-02-08 21:49   ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-08 14:14 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

In paravirtualized guests, the struct page* for mappings is only a
placeholder, and cannot be used to access the granted memory. Use the
userspace mapping that we have set up in order to implement
UNMAP_NOTIFY_CLEAR_BYTE.
    
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 06de2c0..2b67f15 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -287,7 +287,12 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 
 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		int pgno = (map->notify.addr >> PAGE_SHIFT);
-		if (pgno >= offset && pgno < offset + pages) {
+		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
+			void __user *tmp;
+			tmp = map->vma->vm_start + map->notify.addr;
+			copy_to_user(tmp, &err, 1);
+			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
 			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
 			kunmap(map->pages[pgno]);
@@ -296,7 +301,7 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 	}
 
 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
+	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
 	if (err)
 		return err;

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH v6] Userspace grant communication
  2011-02-07 23:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  2011-02-08 14:14   ` [PATCH] xen-gntdev: Fix unmap notify on PV domains Daniel De Graaf
@ 2011-02-08 21:49   ` Konrad Rzeszutek Wilk
  2011-02-09 20:11     ` [PATCH] xen-gntdev: Use map->vma for checking map validity Daniel De Graaf
  2011-02-09 20:12     ` [PATCH] xen-gntdev: Avoid unmapping ranges twice Daniel De Graaf
  1 sibling, 2 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-08 21:49 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Mon, Feb 07, 2011 at 06:14:16PM -0500, Konrad Rzeszutek Wilk wrote:
> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:
> > Changes since v5:
> >   - Added a tested xen version to workaround in #4
> >   - Cleaned up variable names & structures
> >   - Clarified some of the cleanup in gntalloc
> >   - Removed copyright statement from public-domain files
> > 
> > [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
> > [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
> > [PATCH 3/6] xen-gntdev: Add reference counting to maps
> > [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
> > [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
> > [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl
> > 
> > Test/Demo code (also updated):
> 
> I played with this (two PV domains) and I got two failures:

With your latest patch the issue described earlier disappear,
but if I try to map an non-existed page (say I am confused) I get
this:

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 75f8037..2dd2efa 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -17,7 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#undef DEBUG
+#define DEBUG 1
 
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -304,12 +304,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 
 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
-	if (err)
+	if (err) {
+		printk(KERN_WARNING "grant unmapping hypercall failed: %d\n", err);
 		return err;
-
+	}
 	for (i = 0; i < pages; i++) {
-		if (map->unmap_ops[offset+i].status)
+		if (map->unmap_ops[offset+i].status) {
+			printk(KERN_WARNING "%lx is still active: %d\n",
+				offset+i, map->unmap_ops[offset+i].status);
 			err = -EINVAL;
+		}
 		map->unmap_ops[offset+i].handle = 0;
 	}
 	return err;

(it differs from #master with this patch):

> map 1 9
[  234.522158] priv ffff88001afac420, add 1
[  234.522742] gntdev_print_maps: maps list (priv ffff88001afac420)
[  234.523061]   index  0, count  1 [new]
[  234.523892] map 0+1 at 7fdee5890000 (pgoff 0)
[  234.524464] map 0+1
Could not map grant 1.9: Invalid argument (map failed)

> map 1 8
[  236.313135] priv ffff88001afac420, add 1
[  236.313739] gntdev_print_maps: maps list (priv ffff88001afac420)
[  236.314062]   index  0, count  1 
[  236.314062]   index  1, count  1 [new]
[  236.315521] map 1+1 at 7fdee588f000 (pgoff 1)
[  236.316120] map 1+1
Mapped grant 1.8 as 4096=0x7fdee588f000

> map 1 10
[  242.833149] priv ffff88001afac420, add 1
[  242.833813] gntdev_print_maps: maps list (priv ffff88001afac420)
[  242.834063]   index  0, count  1 
[  242.834063]   index  1, count  1 
[  242.834063]   index  2, count  1 [new]
[  242.836265] map 2+1 at 7fdee588e000 (pgoff 2)
[  242.836866] map 2+1
Could not map grant 1.10: Invalid argument (map failed)

> map 3 10
[  247.224151] priv ffff88001afac420, add 1
[  247.224808] gntdev_print_maps: maps list (priv ffff88001afac420)
[  247.225062]   index  0, count  1 
[  247.225062]   index  1, count  1 
[  247.225062]   index  2, count  1 
[  247.225062]   index  3, count  1 [new][  247.227637] map 3+1 at 7fdee588d000 (pgoff 3)
[  247.228180] map 3+1
Could not map grant 3.10: Invalid argument (map failed)

> unmap 140366365671424
Unmapped page at 0x7fa9975d4000

> show
00(140595310424064,4096): id      99c6b8b4567 n=11000000 b=adjakasdaadaskda
END

> ^C[  252.265142] map 0+1 (7fdee588f000 7fdee5890000)
[  252.265724] map 0+1 [0+1]
[  252.296327] 0 is still active: -1
[  252.296834] ------------[ cut here ]------------
[  252.297310] WARNING: at /home/konrad/ssd/linux/drivers/xen/gntdev.c:396 mn_release+0x6e/0x90()
[  252.297310] Modules linked in: xen_evtchn fbcon tileblit font bitblit softcursor ttm drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma]
[  252.297310] Pid: 2313, comm: test_gnt Tainted: G        W   2.6.38-rc4-xtt-00169-gb68565e-dirty #3
[  252.297310] Call Trace:
[  252.297310]  [<ffffffff81049580>] ? warn_slowpath_common+0x80/0x98
[  252.297310]  [<ffffffff810495ad>] ? warn_slowpath_null+0x15/0x17
[  252.297310]  [<ffffffff8126dcd6>] ? mn_release+0x6e/0x90
[  252.297310]  [<ffffffff810ec5f1>] ? __mmu_notifier_release+0x51/0x85
[  252.297310]  [<ffffffff810d9886>] ? exit_mmap+0x27/0x101
[  252.297310]  [<ffffffff810473c2>] ? mmput+0x30/0xd7
[  252.297310]  [<ffffffff8104b197>] ? exit_mm+0x129/0x136
[  252.297310]  [<ffffffff8104cc3f>] ? do_exit+0x208/0x7aa
[  252.297310]  [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf
[  252.297310]  [<ffffffff81006cd2>] ? check_events+0x12/0x20
[  252.297310]  [<ffffffff8127ad0a>] ? put_ldisc+0xac/0xb1
[  252.297310]  [<ffffffff8127adf9>] ? tty_ldisc_deref+0x9/0xb
[  252.297310]  [<ffffffff81273a97>] ? tty_read+0x8c/0xc8
[  252.297310]  [<ffffffff8100a598>] ? do_notify_resume+0x27/0x5f
[  252.297310]  [<ffffffff8100ac60>] ? int_signal+0x12/0x17
[  252.297310] ---[ end trace e3791f053b755549 ]---
[  252.297310] map 1+1 (7fdee588f000 7fdee5890000)
[  252.297310] map 1+1 [0+1]
[  252.315756] map 2+1 (7fdee588d000 7fdee588e000)
[  252.316341] map 2+1 [0+1]
[  252.346990] 0 is still active: -4
[  252.347569] ------------[ cut here ]------------
[  252.347976] WARNING: at /home/konrad/ssd/linux/drivers/xen/gntdev.c:396 mn_release+0x6e/0x90()
[  252.347976] Modules linked in: xen_evtchn fbcon tileblit font bitblit softcursor ttm drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma]
[  252.347976] Pid: 2313, comm: test_gnt Tainted: G        W   2.6.38-rc4-xtt-00169-gb68565e-dirty #3
[  252.347976] Call Trace:
[  252.347976]  [<ffffffff81049580>] ? warn_slowpath_common+0x80/0x98
[  252.347976]  [<ffffffff810495ad>] ? warn_slowpath_null+0x15/0x17
[  252.347976]  [<ffffffff8126dcd6>] ? mn_release+0x6e/0x90
[  252.347976]  [<ffffffff810ec5f1>] ? __mmu_notifier_release+0x51/0x85
[  252.347976]  [<ffffffff810d9886>] ? exit_mmap+0x27/0x101
[  252.347976]  [<ffffffff810473c2>] ? mmput+0x30/0xd7
[  252.347976]  [<ffffffff8104b197>] ? exit_mm+0x129/0x136
[  252.347976]  [<ffffffff8104cc3f>] ? do_exit+0x208/0x7aa
[  252.347976]  [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf
[  252.347976]  [<ffffffff81006cd2>] ? check_events+0x12/0x20
[  252.347976]  [<ffffffff811fd1ff>] ? do_raw_spin_lock+0x6b/0x120
[  252.347976]  [<ffffffff8104d253>] ? do_group_exit+0x72/0x9a
[  252.347976]  [<ffffffff81059fcd>] ? get_signal_to_deliver+0x360/0x37f
[  252.347976]  [<ffffffff812785e6>] ? n_tty_read+0x6d5/0x7ad
[  252.347976]  [<ffffffff81009f53>] ? do_signal+0x6d/0x68b
[  252.347976]  [<ffffffff8103dbd8>] ? __wake_up+0x3f/0x48
[  252.347976]  [<ffffffff8127ad0a>] ? put_ldisc+0xac/0xb1
[  252.347976]  [<ffffffff8127adf9>] ? tty_ldisc_deref+0x9/0xb
[  252.347976]  [<ffffffff81273a97>] ? tty_read+0x8c/0xc8
[  252.347976]  [<ffffffff8100a598>] ? do_notify_resume+0x27/0x5f
[  252.347976]  [<ffffffff8100ac60>] ? int_signal+0x12/0x17
[  252.347976] map 3+1 (7fdee588d000 7fdee588e000)
[  252.347976] map 3+1 [0+1]
[  252.396334] 0 is still active: -4
[  252.396925] ------------[ cut here ]------------
[  252.397301] WARNING: at /home/konrad/ssd/linux/drivers/xen/gntdev.c:396 mn_release+0x6e/0x90()
[  252.397301] Modules linked in: xen_evtchn fbcon tileblit font bitblit softcursor ttm drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma]
[  252.397301] Pid: 2313, comm: test_gnt Tainted: G        W   2.6.38-rc4-xtt-00169-gb68565e-dirty #3
[  252.397301] Call Trace:
[  252.397301]  [<ffffffff81049580>] ? warn_slowpath_common+0x80/0x98
[  252.397301]  [<ffffffff810495ad>] ? warn_slowpath_null+0x15/0x17
[  252.397301]  [<ffffffff8126dcd6>] ? mn_release+0x6e/0x90
[  252.397301]  [<ffffffff810ec5f1>] ? __mmu_notifier_release+0x51/0x85
[  252.397301]  [<ffffffff810d9886>] ? exit_mmap+0x27/0x101
[  252.397301]  [<ffffffff810473c2>] ? mmput+0x30/0xd7
[  252.397301]  [<ffffffff8104b197>] ? exit_mm+0x129/0x136
[  252.397301]  [<ffffffff8104cc3f>] ? do_exit+0x208/0x7aa
[  252.397301]  [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf
[  252.397301]  [<ffffffff81006cd2>] ? check_events+0x12/0x20
[  252.397301]  [<ffffffff811fd1ff>] ? do_raw_spin_lock+0x6b/0x120
[  252.397301]  [<ffffffff8104d253>] ? do_group_exit+0x72/0x9a
[  252.397301]  [<ffffffff81059fcd>] ? get_signal_to_deliver+0x360/0x37f
[  252.397301]  [<ffffffff812785e6>] ? n_tty_read+0x6d5/0x7ad
[  252.397301]  [<ffffffff81009f53>] ? do_signal+0x6d/0x68b
[  252.397301]  [<ffffffff8103dbd8>] ? __wake_up+0x3f/0x48
[  252.397301]  [<ffffffff8127ad0a>] ? put_ldisc+0xac/0xb1
[  252.397301]  [<ffffffff8127adf9>] ? tty_ldisc_deref+0x9/0xb
[  252.397301]  [<ffffffff81273a97>] ? tty_read+0x8c/0xc8
[  252.397301]  [<ffffffff8100a598>] ? do_notify_resume+0x27/0x5f
[  252.397301]  [<ffffffff8100ac60>] ? int_signal+0x12/0x17
[  252.397301] ---[ end trace e3791f053b75554b ]---
[  252.414807] close ffff88001e35ea10
[  252.415439] priv ffff88001afac420

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-02-03 17:19 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
@ 2011-02-08 22:48   ` Konrad Rzeszutek Wilk
  2011-02-09 18:52     ` Daniel De Graaf
  0 siblings, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-08 22:48 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Thu, Feb 03, 2011 at 12:19:03PM -0500, Daniel De Graaf wrote:
> This allows a userspace application to allocate a shared page for
> implementing inter-domain communication or device drivers. These
> shared pages can be mapped using the gntdev device or by the kernel
> in another domain.
> 
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/Kconfig    |    8 +
>  drivers/xen/Makefile   |    2 +
>  drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/xen/gntalloc.h |   50 +++++
>  4 files changed, 546 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/xen/gntalloc.c
>  create mode 100644 include/xen/gntalloc.h
> 
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 19f1f3c..69d2cd5 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -142,6 +142,14 @@ config XEN_GNTDEV
>  	help
>  	  Allows userspace processes to use grants.
>  
> +config XEN_GRANT_DEV_ALLOC
> +	tristate "User-space grant reference allocator driver"
> +	depends on XEN
> +	help
> +	  Allows userspace processes to create pages with access granted
> +	  to other domains. This can be used to implement frontend drivers
> +	  or as part of an inter-domain shared memory channel.
> +
>  config XEN_PLATFORM_PCI
>  	tristate "xen platform pci device driver"
>  	depends on XEN_PVHVM && PCI
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 5c3b031..09364b9 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
>  obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
>  obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
>  obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
>  obj-$(CONFIG_XENFS)		+= xenfs/
>  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
> @@ -19,5 +20,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
>  
>  xen-evtchn-y			:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
> +xen-gntalloc-y				:= gntalloc.o
>  
>  xen-platform-pci-y		:= platform-pci.o
> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> new file mode 100644
> index 0000000..d06bf2b
> --- /dev/null
> +++ b/drivers/xen/gntalloc.c
> @@ -0,0 +1,486 @@
> +/******************************************************************************
> + * gntalloc.c
> + *
> + * Device for creating grant references (in user-space) that may be shared
> + * with other domains.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +/*
> + * This driver exists to allow userspace programs in Linux to allocate kernel
> + * memory that will later be shared with another domain.  Without this device,
> + * Linux userspace programs cannot create grant references.
> + *
> + * How this stuff works:
> + *   X -> granting a page to Y
> + *   Y -> mapping the grant from X
> + *
> + *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
> + *   2. X creates an entry in the grant table that says domid(Y) can access P.
> + *      This is done without a hypercall unless the grant table needs expansion.
> + *   3. X gives the grant reference identifier, GREF, to Y.
> + *   4. Y maps the page, either directly into kernel memory for use in a backend
> + *      driver, or via a the gntdev device to map into the address space of an
> + *      application running in Y. This is the first point at which Xen does any
> + *      tracking of the page.
> + *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
> + *      to the shared page, and can now communicate with Y over the shared page.
> + *
> + *
> + * NOTE TO USERSPACE LIBRARIES:
> + *   The grant allocation and mmap()ing are, naturally, two separate operations.
> + *   You set up the sharing by calling the create ioctl() and then the mmap().
> + *   Teardown requires munmap() and either close() or ioctl().
> + *
> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
> + * reference, this device can be used to consume kernel memory by leaving grant
> + * references mapped by another domain when an application exits. Therefore,
> + * there is a global limit on the number of pages that can be allocated. When
> + * all references to the page are unmapped, it will be freed during the next
> + * grant operation.
> + */
> +
> +#include <linux/atomic.h>
> +#include <linux/module.h>
> +#include <linux/miscdevice.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/device.h>
> +#include <linux/mm.h>
> +#include <linux/uaccess.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +
> +#include <xen/xen.h>
> +#include <xen/page.h>
> +#include <xen/grant_table.h>
> +#include <xen/gntalloc.h>
> +
> +static int limit = 1024;
> +module_param(limit, int, 0644);
> +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
> +		"the gntalloc device");
> +
> +static LIST_HEAD(gref_list);
> +static DEFINE_SPINLOCK(gref_lock);
> +static int gref_size;
> +
> +/* Metadata on a grant reference. */
> +struct gntalloc_gref {
> +	struct list_head next_gref;  /* list entry gref_list */
> +	struct list_head next_file;  /* list entry file->list, if open */
> +	struct page *page;	     /* The shared page */
> +	uint64_t file_index;         /* File offset for mmap() */
> +	unsigned int users;          /* Use count - when zero, waiting on Xen */
> +	grant_ref_t gref_id;         /* The grant reference number */
> +};
> +
> +struct gntalloc_file_private_data {
> +	struct list_head list;
> +	uint64_t index;
> +};
> +
> +static void __del_gref(struct gntalloc_gref *gref);
> +
> +static void do_cleanup(void)
> +{
> +	struct gntalloc_gref *gref, *n;
> +	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
> +		if (!gref->users)
> +			__del_gref(gref);
> +	}
> +}
> +
> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
> +	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
> +{
> +	int i, rc, readonly;
> +	LIST_HEAD(queue_gref);
> +	LIST_HEAD(queue_file);
> +	struct gntalloc_gref *gref;
> +
> +	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
> +	rc = -ENOMEM;
> +	for (i = 0; i < op->count; i++) {
> +		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
> +		if (!gref)
> +			goto undo;
> +		list_add_tail(&gref->next_gref, &queue_gref);
> +		list_add_tail(&gref->next_file, &queue_file);
> +		gref->users = 1;
> +		gref->file_index = op->index + i * PAGE_SIZE;
> +		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		if (!gref->page)
> +			goto undo;
> +
> +		/* Grant foreign access to the page. */
> +		gref->gref_id = gnttab_grant_foreign_access(op->domid,
> +			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
> +		if (gref->gref_id < 0) {
> +			rc = gref->gref_id;
> +			goto undo;
> +		}
> +		gref_ids[i] = gref->gref_id;
> +	}
> +
> +	/* Add to gref lists. */
> +	spin_lock(&gref_lock);
> +	list_splice_tail(&queue_gref, &gref_list);
> +	list_splice_tail(&queue_file, &priv->list);
> +	spin_unlock(&gref_lock);
> +
> +	return 0;
> +
> +undo:
> +	spin_lock(&gref_lock);
> +	gref_size -= (op->count - i);

So we decrease the gref_size by the count of the ones that we 
allocated..
> +
> +	list_for_each_entry(gref, &queue_file, next_file) {
> +		/* __del_gref does not remove from queue_file */
> +		__del_gref(gref);

.. but __del_gref decreases the gref_size by one, so wouldn't
we decrease by too much?

> +	}
> +
> +	/* It's possible for the target domain to map the just-allocated grant
> +	 * references by blindly guessing their IDs; if this is done, then
> +	 * __del_gref will leave them in the queue_gref list. They need to be
> +	 * added to the global list so that we can free them when they are no
> +	 * longer referenced.
> +	 */
> +	if (unlikely(!list_empty(&queue_gref)))
> +		list_splice_tail(&queue_gref, &gref_list);
> +	spin_unlock(&gref_lock);
> +	return rc;
> +}
> +
> +static void __del_gref(struct gntalloc_gref *gref)
> +{
> +	if (gref->gref_id > 0) {
> +		if (gnttab_query_foreign_access(gref->gref_id))
> +			return;
> +
> +		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
> +			return;
> +	}
> +
> +	gref_size--;
> +	list_del(&gref->next_gref);
> +
> +	if (gref->page)
> +		__free_page(gref->page);
> +
> +	kfree(gref);
> +}
> +
> +/* finds contiguous grant references in a file, returns the first */
> +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
> +		uint64_t index, uint32_t count)
> +{
> +	struct gntalloc_gref *rv = NULL, *gref;
> +	list_for_each_entry(gref, &priv->list, next_file) {
> +		if (gref->file_index == index && !rv)
> +			rv = gref;
> +		if (rv) {
> +			if (gref->file_index != index)
> +				return NULL;
> +			index += PAGE_SIZE;
> +			count--;
> +			if (count == 0)
> +				return rv;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * -------------------------------------
> + *  File operations.
> + * -------------------------------------
> + */
> +static int gntalloc_open(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv;
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		goto out_nomem;
> +	INIT_LIST_HEAD(&priv->list);
> +
> +	filp->private_data = priv;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	return 0;
> +
> +out_nomem:
> +	return -ENOMEM;
> +}
> +
> +static int gntalloc_release(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	spin_lock(&gref_lock);
> +	while (!list_empty(&priv->list)) {
> +		gref = list_entry(priv->list.next,
> +			struct gntalloc_gref, next_file);
> +		list_del(&gref->next_file);
> +		gref->users--;
> +		if (gref->users == 0)
> +			__del_gref(gref);
> +	}
> +	kfree(priv);
> +	spin_unlock(&gref_lock);
> +
> +	return 0;
> +}
> +
> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
> +		struct ioctl_gntalloc_alloc_gref __user *arg)
> +{
> +	int rc = 0;
> +	struct ioctl_gntalloc_alloc_gref op;
> +	uint32_t *gref_ids;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto out;
> +	}
> +
> +	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
> +	if (!gref_ids) {
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	/* Clean up pages that were at zero (local) users but were still mapped
> +	 * by remote domains. Since those pages count towards the limit that we
> +	 * are about to enforce, removing them here is a good idea.
> +	 */
> +	do_cleanup();
> +	if (gref_size + op.count > limit) {
> +		spin_unlock(&gref_lock);
> +		rc = -ENOSPC;
> +		goto out_free;
> +	}
> +	gref_size += op.count;
> +	op.index = priv->index;
> +	priv->index += op.count * PAGE_SIZE;
> +	spin_unlock(&gref_lock);
> +
> +	rc = add_grefs(&op, gref_ids, priv);
> +	if (rc < 0)
> +		goto out_free;

Should we cleanup up priv->index to its earlier value?

> +
> +	/* Once we finish add_grefs, it is unsafe to touch the new reference,
> +	 * since it is possible for a concurrent ioctl to remove it (by guessing
> +	 * its index). If the userspace application doesn't provide valid memory
> +	 * to write the IDs to, then it will need to close the file in order to
> +	 * release - which it will do by segfaulting when it tries to access the
> +	 * IDs to close them.
> +	 */
> +	if (copy_to_user(arg, &op, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto out_free;
> +	}
> +	if (copy_to_user(arg->gref_ids, gref_ids,
> +			sizeof(gref_ids[0]) * op.count)) {
> +		rc = -EFAULT;
> +		goto out_free;
> +	}
> +
> +out_free:
> +	kfree(gref_ids);
> +out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
> +		void __user *arg)
> +{
> +	int i, rc = 0;
> +	struct ioctl_gntalloc_dealloc_gref op;
> +	struct gntalloc_gref *gref, *n;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto dealloc_grant_out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_grefs(priv, op.index, op.count);
> +	if (gref) {
> +		/* Remove from the file list only, and decrease reference count.
> +		 * The later call to do_cleanup() will remove from gref_list and
> +		 * free the memory if the pages aren't mapped anywhere.
> +		 */
> +		for (i = 0; i < op.count; i++) {
> +			n = list_entry(gref->next_file.next,
> +				struct gntalloc_gref, next_file);
> +			list_del(&gref->next_file);
> +			gref->users--;
> +			gref = n;
> +		}
> +	} else {
> +		rc = -EINVAL;
> +	}
> +
> +	do_cleanup();
> +
> +	spin_unlock(&gref_lock);
> +dealloc_grant_out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
> +		unsigned long arg)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +
> +	switch (cmd) {
> +	case IOCTL_GNTALLOC_ALLOC_GREF:
> +		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
> +
> +	case IOCTL_GNTALLOC_DEALLOC_GREF:
> +		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
> +
> +	default:
> +		return -ENOIOCTLCMD;
> +	}
> +
> +	return 0;
> +}
> +
> +static void gntalloc_vma_close(struct vm_area_struct *vma)
> +{
> +	struct gntalloc_gref *gref = vma->vm_private_data;
> +	if (!gref)
> +		return;
> +
> +	spin_lock(&gref_lock);
> +	gref->users--;
> +	if (gref->users == 0)
> +		__del_gref(gref);
> +	spin_unlock(&gref_lock);
> +}
> +
> +static struct vm_operations_struct gntalloc_vmops = {
> +	.close = gntalloc_vma_close,
> +};
> +
> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
> +	int rv, i;
> +
> +	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
> +		       priv, vma->vm_pgoff, count);
> +
> +	if (!(vma->vm_flags & VM_SHARED)) {
> +		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
> +	if (gref == NULL) {
> +		rv = -ENOENT;
> +		pr_debug("%s: Could not find grant reference",
> +				__func__);
> +		goto out_unlock;
> +	}
> +
> +	vma->vm_private_data = gref;
> +
> +	vma->vm_flags |= VM_RESERVED;
> +	vma->vm_flags |= VM_DONTCOPY;
> +	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
> +
> +	vma->vm_ops = &gntalloc_vmops;
> +
> +	for (i = 0; i < count; i++) {
> +		gref->users++;
> +		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> +				gref->page);
> +		if (rv)
> +			goto out_unlock;
> +
> +		gref = list_entry(gref->next_file.next,
> +				struct gntalloc_gref, next_file);
> +	}
> +	rv = 0;
> +
> +out_unlock:
> +	spin_unlock(&gref_lock);
> +	return rv;
> +}
> +
> +static const struct file_operations gntalloc_fops = {
> +	.owner = THIS_MODULE,
> +	.open = gntalloc_open,
> +	.release = gntalloc_release,
> +	.unlocked_ioctl = gntalloc_ioctl,
> +	.mmap = gntalloc_mmap
> +};
> +
> +/*
> + * -------------------------------------
> + * Module creation/destruction.
> + * -------------------------------------
> + */
> +static struct miscdevice gntalloc_miscdev = {
> +	.minor	= MISC_DYNAMIC_MINOR,
> +	.name	= "xen/gntalloc",
> +	.fops	= &gntalloc_fops,
> +};
> +
> +static int __init gntalloc_init(void)
> +{
> +	int err;
> +
> +	if (!xen_domain())
> +		return -ENODEV;
> +
> +	err = misc_register(&gntalloc_miscdev);
> +	if (err != 0) {
> +		printk(KERN_ERR "Could not register misc gntalloc device\n");
> +		return err;
> +	}
> +
> +	pr_debug("Created grant allocation device at %d,%d\n",
> +			MISC_MAJOR, gntalloc_miscdev.minor);
> +
> +	return 0;
> +}
> +
> +static void __exit gntalloc_exit(void)
> +{
> +	misc_deregister(&gntalloc_miscdev);
> +}
> +
> +module_init(gntalloc_init);
> +module_exit(gntalloc_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
> +		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> new file mode 100644
> index 0000000..bc3b85e
> --- /dev/null
> +++ b/include/xen/gntalloc.h
> @@ -0,0 +1,50 @@
> +/******************************************************************************
> + * gntalloc.h
> + *
> + * Interface to /dev/xen/gntalloc.
> + *
> + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> + *
> + * This file is in the public domain.
> + */
> +
> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
> +#define __LINUX_PUBLIC_GNTALLOC_H__
> +
> +/*
> + * Allocates a new page and creates a new grant reference.
> + */
> +#define IOCTL_GNTALLOC_ALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
> +struct ioctl_gntalloc_alloc_gref {
> +	/* IN parameters */
> +	/* The ID of the domain to be given access to the grants. */
> +	uint16_t domid;
> +	/* Flags for this mapping */
> +	uint16_t flags;
> +	/* Number of pages to map */
> +	uint32_t count;
> +	/* OUT parameters */
> +	/* The offset to be used on a subsequent call to mmap(). */
> +	uint64_t index;
> +	/* The grant references of the newly created grant, one per page */
> +	/* Variable size, depending on count */
> +	uint32_t gref_ids[1];
> +};
> +
> +#define GNTALLOC_FLAG_WRITABLE 1
> +
> +/*
> + * Deallocates the grant reference, allowing the associated page to be freed if
> + * no other domains are using it.
> + */
> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
> +struct ioctl_gntalloc_dealloc_gref {
> +	/* IN parameters */
> +	/* The offset returned in the map operation */
> +	uint64_t index;
> +	/* Number of references to unmap */
> +	uint32_t count;
> +};
> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> -- 
> 1.7.3.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] xen-gntdev: Fix unmap notify on PV domains
  2011-02-08 14:14   ` [PATCH] xen-gntdev: Fix unmap notify on PV domains Daniel De Graaf
@ 2011-02-08 22:58     ` Konrad Rzeszutek Wilk
  2011-02-09 20:33       ` [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings Daniel De Graaf
  0 siblings, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-08 22:58 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Tue, Feb 08, 2011 at 09:14:06AM -0500, Daniel De Graaf wrote:
> In paravirtualized guests, the struct page* for mappings is only a
> placeholder, and cannot be used to access the granted memory. Use the
> userspace mapping that we have set up in order to implement
> UNMAP_NOTIFY_CLEAR_BYTE.
>     
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> 
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 06de2c0..2b67f15 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -287,7 +287,12 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
>  
>  	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
>  		int pgno = (map->notify.addr >> PAGE_SHIFT);
> -		if (pgno >= offset && pgno < offset + pages) {
> +		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
> +			void __user *tmp;
> +			tmp = map->vma->vm_start + map->notify.addr;
> +			copy_to_user(tmp, &err, 1);

The compiler really hates that. You could use a uninitialized_value macro
to ignore it.

> +			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
> +		} else if (pgno >= offset && pgno < offset + pages) {
>  			uint8_t *tmp = kmap(map->pages[pgno]);
>  			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
>  			kunmap(map->pages[pgno]);
> @@ -296,7 +301,7 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
>  	}
>  
>  	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
> -	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
> +	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
>  	if (err)
>  		return err;
>  

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-02-08 22:48   ` Konrad Rzeszutek Wilk
@ 2011-02-09 18:52     ` Daniel De Graaf
  0 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 18:52 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/08/2011 05:48 PM, Konrad Rzeszutek Wilk wrote:
> On Thu, Feb 03, 2011 at 12:19:03PM -0500, Daniel De Graaf wrote:
>> This allows a userspace application to allocate a shared page for
>> implementing inter-domain communication or device drivers. These
>> shared pages can be mapped using the gntdev device or by the kernel
>> in another domain.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/Kconfig    |    8 +
>>  drivers/xen/Makefile   |    2 +
>>  drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/xen/gntalloc.h |   50 +++++
>>  4 files changed, 546 insertions(+), 0 deletions(-)
>>  create mode 100644 drivers/xen/gntalloc.c
>>  create mode 100644 include/xen/gntalloc.h
>>

[snip]

>> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
>> +	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
>> +{
>> +	int i, rc, readonly;
>> +	LIST_HEAD(queue_gref);
>> +	LIST_HEAD(queue_file);
>> +	struct gntalloc_gref *gref;
>> +
>> +	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
>> +	rc = -ENOMEM;
>> +	for (i = 0; i < op->count; i++) {
>> +		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
>> +		if (!gref)
>> +			goto undo;
>> +		list_add_tail(&gref->next_gref, &queue_gref);
>> +		list_add_tail(&gref->next_file, &queue_file);
>> +		gref->users = 1;
>> +		gref->file_index = op->index + i * PAGE_SIZE;
>> +		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>> +		if (!gref->page)
>> +			goto undo;
>> +
>> +		/* Grant foreign access to the page. */
>> +		gref->gref_id = gnttab_grant_foreign_access(op->domid,
>> +			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
>> +		if (gref->gref_id < 0) {
>> +			rc = gref->gref_id;
>> +			goto undo;
>> +		}
>> +		gref_ids[i] = gref->gref_id;
>> +	}
>> +
>> +	/* Add to gref lists. */
>> +	spin_lock(&gref_lock);
>> +	list_splice_tail(&queue_gref, &gref_list);
>> +	list_splice_tail(&queue_file, &priv->list);
>> +	spin_unlock(&gref_lock);
>> +
>> +	return 0;
>> +
>> +undo:
>> +	spin_lock(&gref_lock);
>> +	gref_size -= (op->count - i);
> 
> So we decrease the gref_size by the count of the ones that we 
> allocated..

No, (op->count - i) is the number that we haven't yet allocated. Those
aren't in queue_file, so __del_gref is never called on them.

>> +
>> +	list_for_each_entry(gref, &queue_file, next_file) {
>> +		/* __del_gref does not remove from queue_file */
>> +		__del_gref(gref);
> 
> .. but __del_gref decreases the gref_size by one, so wouldn't
> we decrease by too much?

[snip]

>> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
>> +		struct ioctl_gntalloc_alloc_gref __user *arg)
>> +{
>> +	int rc = 0;
>> +	struct ioctl_gntalloc_alloc_gref op;
>> +	uint32_t *gref_ids;
>> +
>> +	pr_debug("%s: priv %p\n", __func__, priv);
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto out;
>> +	}
>> +
>> +	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
>> +	if (!gref_ids) {
>> +		rc = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	/* Clean up pages that were at zero (local) users but were still mapped
>> +	 * by remote domains. Since those pages count towards the limit that we
>> +	 * are about to enforce, removing them here is a good idea.
>> +	 */
>> +	do_cleanup();
>> +	if (gref_size + op.count > limit) {
>> +		spin_unlock(&gref_lock);
>> +		rc = -ENOSPC;
>> +		goto out_free;
>> +	}
>> +	gref_size += op.count;
>> +	op.index = priv->index;
>> +	priv->index += op.count * PAGE_SIZE;
>> +	spin_unlock(&gref_lock);
>> +
>> +	rc = add_grefs(&op, gref_ids, priv);
>> +	if (rc < 0)
>> +		goto out_free;
> 
> Should we cleanup up priv->index to its earlier value?

We could, but this could also introduce a possible race if two threads were
running the ioctl at the same time. There's no harm in letting the index increase,
since it is 64-bit so doesn't have overflow issues.

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH] xen-gntdev: Use map->vma for checking map validity
  2011-02-08 21:49   ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
@ 2011-02-09 20:11     ` Daniel De Graaf
  2011-02-09 20:12     ` [PATCH] xen-gntdev: Avoid unmapping ranges twice Daniel De Graaf
  1 sibling, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 20:11 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: Daniel De Graaf, jeremy, xen-devel, Ian.Campbell

The is_mapped flag used to be set at the completion of the map operation,
but was not checked in all error paths. Use map->vma instead, which will
now be cleared if the initial grant mapping fails.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |    8 ++------
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 75f8037..4ca4262 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -77,7 +77,6 @@ struct grant_map {
 	int index;
 	int count;
 	int flags;
-	int is_mapped;
 	atomic_t users;
 	struct unmap_notify notify;
 	struct ioctl_gntdev_grant_ref *grants;
@@ -322,7 +321,6 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	struct grant_map *map = vma->vm_private_data;
 
 	pr_debug("close %p\n", vma);
-	map->is_mapped = 0;
 	map->vma = NULL;
 	vma->vm_private_data = NULL;
 	gntdev_put_map(map);
@@ -347,8 +345,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
 	list_for_each_entry(map, &priv->maps, next) {
 		if (!map->vma)
 			continue;
-		if (!map->is_mapped)
-			continue;
 		if (map->vma->vm_start >= end)
 			continue;
 		if (map->vma->vm_end <= start)
@@ -663,8 +659,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	if (err)
 		goto out_put_map;
 
-	map->is_mapped = 1;
-
 	if (!use_ptemod) {
 		for (i = 0; i < count; i++) {
 			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
@@ -681,6 +675,8 @@ unlock_out:
 	return err;
 
 out_put_map:
+	if (use_ptemod)
+		map->vma = NULL;
 	gntdev_put_map(map);
 	return err;
 }
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH] xen-gntdev: Avoid unmapping ranges twice
  2011-02-08 21:49   ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  2011-02-09 20:11     ` [PATCH] xen-gntdev: Use map->vma for checking map validity Daniel De Graaf
@ 2011-02-09 20:12     ` Daniel De Graaf
  1 sibling, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 20:12 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: Daniel De Graaf, jeremy, xen-devel, Ian.Campbell

In paravirtualized domains, mn_invl_page or mn_invl_range_start can
unmap a segment of a mapped region without unmapping all pages. When
the region is later released, the pages will be unmapped twice, leading
to an incorrect -EINVAL return.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   33 +++++++++++++++++++++++++++++++--
 1 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4ca4262..4687cd5 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -282,7 +282,7 @@ static int map_grant_pages(struct grant_map *map)
 	return err;
 }
 
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
 	int i, err = 0;
 
@@ -301,7 +301,6 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 	}
 
-	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
 	if (err)
 		return err;
@@ -314,6 +313,36 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 	return err;
 }
 
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+{
+	int range, err = 0;
+
+	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
+
+	/* It is possible the requested range will have a "hole" where we
+	 * already unmapped some of the grants. Only unmap valid ranges.
+	 */
+	while (pages && !err) {
+		while (pages && !map->unmap_ops[offset].handle) {
+			offset++;
+			pages--;
+		}
+		range = 0;
+		while (range < pages) {
+			if (!map->unmap_ops[offset+range].handle) {
+				range--;
+				break;
+			}
+			range++;
+		}
+		err = __unmap_grant_pages(map, offset, range);
+		offset += range;
+		pages -= range;
+	}
+
+	return err;
+}
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_vma_close(struct vm_area_struct *vma)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
  2011-02-08 22:58     ` Konrad Rzeszutek Wilk
@ 2011-02-09 20:33       ` Daniel De Graaf
  2011-02-09 21:09         ` [PATCH v2] " Daniel De Graaf
  2011-02-09 22:22         ` [PATCH] " Jeremy Fitzhardinge
  0 siblings, 2 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 20:33 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4687cd5..00e4644 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
 			void __user *tmp;
 			tmp = map->vma->vm_start + map->notify.addr;
-			copy_to_user(tmp, &err, 1);
+			WARN_ON(copy_to_user(tmp, &err, 1));
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
@@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 	goto unlock_out;
 
  found:
+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
+			(op.flags & GNTMAP_readonly)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
  2011-02-09 20:33       ` [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings Daniel De Graaf
@ 2011-02-09 21:09         ` Daniel De Graaf
  2011-02-09 22:22         ` [PATCH] " Jeremy Fitzhardinge
  1 sibling, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 21:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4687cd5..00e4644 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
 			void __user *tmp;
 			tmp = map->vma->vm_start + map->notify.addr;
-			copy_to_user(tmp, &err, 1);
+			WARN_ON(copy_to_user(tmp, &err, 1));
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
@@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 	goto unlock_out;
 
  found:
+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
+			(map->flags & GNTMAP_readonly)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH] xen-gntdev: Avoid double-mapping memory
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (7 preceding siblings ...)
  2011-02-07 23:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
@ 2011-02-09 21:11 ` Daniel De Graaf
  2011-02-14 16:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  9 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 21:11 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: Daniel De Graaf, jeremy, xen-devel, Ian.Campbell

If an already-mapped area of the device was mapped into userspace a
second time, a hypercall was incorrectly made to remap the memory
again. Avoid the hypercall on later mmap calls, and fail the mmap call
if a writable mapping is attempted on a read-only range.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   15 ++++++++++++---
 1 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 00e4644..e9d9180 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -258,6 +258,9 @@ static int map_grant_pages(struct grant_map *map)
 	phys_addr_t addr;
 
 	if (!use_ptemod) {
+		/* Note: it could already be mapped */
+		if (map->map_ops[0].handle)
+			return 0;
 		for (i = 0; i < map->count; i++) {
 			addr = (phys_addr_t)
 				pfn_to_kaddr(page_to_pfn(map->pages[i]));
@@ -674,9 +677,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	if (use_ptemod)
 		map->vma = vma;
 
-	map->flags = GNTMAP_host_map;
-	if (!(vma->vm_flags & VM_WRITE))
-		map->flags |= GNTMAP_readonly;
+	if (map->flags) {
+		if ((vma->vm_flags & VM_WRITE) &&
+				(map->flags & GNTMAP_readonly))
+			return -EINVAL;
+	} else {
+		map->flags = GNTMAP_host_map;
+		if (!(vma->vm_flags & VM_WRITE))
+			map->flags |= GNTMAP_readonly;
+	}
 
 	spin_unlock(&priv->lock);
 
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
  2011-02-09 20:33       ` [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings Daniel De Graaf
  2011-02-09 21:09         ` [PATCH v2] " Daniel De Graaf
@ 2011-02-09 22:22         ` Jeremy Fitzhardinge
  2011-02-09 23:11           ` Daniel De Graaf
  2011-02-09 23:15           ` [PATCH v3] " Daniel De Graaf
  1 sibling, 2 replies; 39+ messages in thread
From: Jeremy Fitzhardinge @ 2011-02-09 22:22 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: xen-devel, Ian.Campbell, Konrad Rzeszutek Wilk

On 02/09/2011 12:33 PM, Daniel De Graaf wrote:
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 4687cd5..00e4644 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
>  		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
>  			void __user *tmp;
>  			tmp = map->vma->vm_start + map->notify.addr;
> -			copy_to_user(tmp, &err, 1);
> +			WARN_ON(copy_to_user(tmp, &err, 1));

Please don't put side-effecty predicates in WARN_ON/BUG_ON.

There's no useful report we can return?

    J

>  			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
>  		} else if (pgno >= offset && pgno < offset + pages) {
>  			uint8_t *tmp = kmap(map->pages[pgno]);
> @@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
>  	goto unlock_out;
>  
>   found:
> +	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
> +			(op.flags & GNTMAP_readonly)) {
> +		rc = -EINVAL;
> +		goto unlock_out;
> +	}
> +
>  	map->notify.flags = op.action;
>  	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
>  	map->notify.event = op.event_channel_port;
>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
  2011-02-09 22:22         ` [PATCH] " Jeremy Fitzhardinge
@ 2011-02-09 23:11           ` Daniel De Graaf
  2011-02-09 23:15           ` [PATCH v3] " Daniel De Graaf
  1 sibling, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 23:11 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Ian.Campbell, Konrad Rzeszutek Wilk

On 02/09/2011 05:22 PM, Jeremy Fitzhardinge wrote:
> On 02/09/2011 12:33 PM, Daniel De Graaf wrote:
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>>
>> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
>> index 4687cd5..00e4644 100644
>> --- a/drivers/xen/gntdev.c
>> +++ b/drivers/xen/gntdev.c
>> @@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
>>  		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
>>  			void __user *tmp;
>>  			tmp = map->vma->vm_start + map->notify.addr;
>> -			copy_to_user(tmp, &err, 1);
>> +			WARN_ON(copy_to_user(tmp, &err, 1));
> 
> Please don't put side-effecty predicates in WARN_ON/BUG_ON.
> 
> There's no useful report we can return?
> 
>     J

This code is called when the application may be crashing or exiting, so
there is not guaranteed to be a return path to the program. The change
in the second part of this patch should prevent the copy_to_user from failing.

Placing the call inside WARN_ON is clearly a bad idea. Will resend a more sane
version of this patch with a comment explaining why we don't return.

> 
>>  			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
>>  		} else if (pgno >= offset && pgno < offset + pages) {
>>  			uint8_t *tmp = kmap(map->pages[pgno]);
>> @@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
>>  	goto unlock_out;
>>  
>>   found:
>> +	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
>> +			(op.flags & GNTMAP_readonly)) {
>> +		rc = -EINVAL;
>> +		goto unlock_out;
>> +	}
>> +
>>  	map->notify.flags = op.action;
>>  	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
>>  	map->notify.event = op.event_channel_port;
>>
> 


-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
  2011-02-09 22:22         ` [PATCH] " Jeremy Fitzhardinge
  2011-02-09 23:11           ` Daniel De Graaf
@ 2011-02-09 23:15           ` Daniel De Graaf
  1 sibling, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-09 23:15 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, jeremy; +Cc: Daniel De Graaf, xen-devel, Ian.Campbell

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   10 +++++++++-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2c4cc94..2a4733c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -294,7 +294,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
 			void __user *tmp;
 			tmp = map->vma->vm_start + map->notify.addr;
-			copy_to_user(tmp, &err, 1);
+			err = copy_to_user(tmp, &err, 1);
+			if (err)
+				return err;
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
@@ -599,6 +601,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 	goto unlock_out;
 
  found:
+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
+			(map->flags & GNTMAP_readonly)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
  2011-02-03 17:19 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
@ 2011-02-14 15:37   ` Konrad Rzeszutek Wilk
  2011-02-14 18:07     ` Daniel De Graaf
  0 siblings, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 15:37 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> index bc3b85e..257cc8d 100644
> --- a/include/xen/gntalloc.h
> +++ b/include/xen/gntalloc.h
> @@ -47,4 +47,32 @@ struct ioctl_gntalloc_dealloc_gref {
>  	/* Number of references to unmap */
>  	uint32_t count;
>  };
> +
> +/*
> + * Sets up an unmap notification within the page, so that the other side can do
> + * cleanup if this side crashes. Required to implement cross-domain robust
> + * mutexes or close notification on communication channels.
> + *
> + * Each mapped page only supports one notification; multiple calls referring to
> + * the same page overwrite the previous notification. You must clear the
> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
> + * to occur.
> + */
> +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
> +struct ioctl_gntalloc_unmap_notify {
> +	/* IN parameters */
> +	/* Index of a byte in the page */
> +	uint64_t index;

That isn't actually the whole truth. It is the index within the page
and also the "offset used on a subsequent call to mmap()" (ioctl_gntdev_map_grant_ref)

It might make sense to change the description to: "offset of the mmap region
and the index within the page" perhaps?

> +	/* Action(s) to take on unmap */
> +	uint32_t action;
> +	/* Event channel to notify */
> +	uint32_t event_channel_port;
> +};
> +
> +/* Clear (set to zero) the byte specified by index */
> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> +/* Send an interrupt on the indicated event channel */
> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> +
>  #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
> index eb23f41..5d9b9b4 100644
> --- a/include/xen/gntdev.h
> +++ b/include/xen/gntdev.h
> @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants {
>  	uint32_t count;
>  };
>  
> +/*
> + * Sets up an unmap notification within the page, so that the other side can do
> + * cleanup if this side crashes. Required to implement cross-domain robust
> + * mutexes or close notification on communication channels.
> + *
> + * Each mapped page only supports one notification; multiple calls referring to
> + * the same page overwrite the previous notification. You must clear the
> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
> + * to occur.
> + */
> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
> +struct ioctl_gntdev_unmap_notify {
> +	/* IN parameters */
> +	/* Index of a byte in the page */
> +	uint64_t index;

Ditto.
> +	/* Action(s) to take on unmap */
> +	uint32_t action;
> +	/* Event channel to notify */
> +	uint32_t event_channel_port;
> +};
> +
> +/* Clear (set to zero) the byte specified by index */
> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> +/* Send an interrupt on the indicated event channel */
> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> +
>  #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
> -- 
> 1.7.3.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-02-03 17:19 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
@ 2011-02-14 15:51   ` Konrad Rzeszutek Wilk
  2011-02-14 17:43     ` Daniel De Graaf
  0 siblings, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 15:51 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
> +
>  /* ------------------------------------------------------------------ */
>  
>  static void gntdev_print_maps(struct gntdev_priv *priv,
> @@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map)
>  
>  	atomic_sub(map->count, &pages_mapped);
>  
> -	if (map->pages)
> +	if (map->pages) {
> +		if (!use_ptemod)
> +			unmap_grant_pages(map, 0, map->count);

In the past (before this patch) the unmap_grant_pages would be called
on the .ioctl, .release, and .close (on VMA). This adds it now also
on the mmu_notifier_ops paths. Why?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v6] Userspace grant communication
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
                   ` (8 preceding siblings ...)
  2011-02-09 21:11 ` [PATCH] xen-gntdev: Avoid double-mapping memory Daniel De Graaf
@ 2011-02-14 16:14 ` Konrad Rzeszutek Wilk
  2011-02-14 16:38   ` Konrad Rzeszutek Wilk
  2011-02-14 17:55   ` Daniel De Graaf
  9 siblings, 2 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 16:14 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:
> Changes since v5:
>   - Added a tested xen version to workaround in #4
>   - Cleaned up variable names & structures
>   - Clarified some of the cleanup in gntalloc
>   - Removed copyright statement from public-domain files
> 
> [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
> [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
> [PATCH 3/6] xen-gntdev: Add reference counting to maps
> [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
> [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
> [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl

Hey Daniel,

I took a look at the patchset and then the bug-fixes:

Daniel De Graaf (12):
      xen-gntdev: Change page limit to be global instead of per-open
      xen-gntdev: Use find_vma rather than iterating our vma list manually
      xen-gntdev: Add reference counting to maps
      xen-gntdev: Support mapping in HVM domains
      xen-gntalloc: Userspace grant allocation driver
      xen/gntalloc,gntdev: Add unmap notify ioctl
      xen-gntdev: Fix memory leak when mmap fails
      xen-gntdev: Fix unmap notify on PV domains
      xen-gntdev: Use map->vma for checking map validity
      xen-gntdev: Avoid unmapping ranges twice
      xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
      xen-gntdev: Avoid double-mapping memory


And besides the two questions I posted today they look OK to me. However
I have on question that I think points to a bug.

Say that I call GNTDEV_MAP_GRANT_REF three times. The first time I provide
a count of 4, then 1, and then once more 1.

The first call would end up with priv having:

priv-map[0] => map.count=4, map.user=1, map.index=0. We return op.index as 0.

The next call:

priv-map[0] => map.count=4, map.user=1, map.index=0.
priv-map[1] => map.count=1, map.user=1, map.index=5 (gntdev_add_map
ends up adding the index and the count from the previous map to it). We return op.index as 20480.

The last call ends up with
priv-map[0] => map.count=4, map.user=1, map.index=0.
priv-map[1] => map.count=1, map.user=1, map.index=5
priv-map[2] => map.count=1, map.user=1, map.index=0. And we return
op.index as = 0.

It looks as gntdev_add_map ends does not do anything to the 
map.index if the "if (add->index + add->count < map->index)" comes
out true, and we end up with op.index=0. Which naturally is
incorrect as that is associated with grant_map that has four entries!

I hadn't yet tried to modify the nice test-case program you provided
to see if this is can happen in practice, but it sure looks like it could?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Re: [PATCH v6] Userspace grant communication
  2011-02-14 16:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
@ 2011-02-14 16:38   ` Konrad Rzeszutek Wilk
  2011-02-14 17:56     ` Daniel De Graaf
  2011-02-14 17:55   ` Daniel De Graaf
  1 sibling, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 16:38 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Mon, Feb 14, 2011 at 11:14:03AM -0500, Konrad Rzeszutek Wilk wrote:
> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:
> > Changes since v5:
> >   - Added a tested xen version to workaround in #4
> >   - Cleaned up variable names & structures
> >   - Clarified some of the cleanup in gntalloc
> >   - Removed copyright statement from public-domain files
> > 
> > [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
> > [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
> > [PATCH 3/6] xen-gntdev: Add reference counting to maps
> > [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
> > [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
> > [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl
> 
> Hey Daniel,
> 
> I took a look at the patchset and then the bug-fixes:
> 
> Daniel De Graaf (12):
>       xen-gntdev: Change page limit to be global instead of per-open
>       xen-gntdev: Use find_vma rather than iterating our vma list manually
>       xen-gntdev: Add reference counting to maps
>       xen-gntdev: Support mapping in HVM domains
>       xen-gntalloc: Userspace grant allocation driver
>       xen/gntalloc,gntdev: Add unmap notify ioctl
>       xen-gntdev: Fix memory leak when mmap fails
>       xen-gntdev: Fix unmap notify on PV domains
>       xen-gntdev: Use map->vma for checking map validity
>       xen-gntdev: Avoid unmapping ranges twice
>       xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
>       xen-gntdev: Avoid double-mapping memory
> 
> 
> And besides the two questions I posted today they look OK to me. However

Oh, and Jeremy's comment about the WARN_ON on the copy_to_user..

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-02-14 15:51   ` Konrad Rzeszutek Wilk
@ 2011-02-14 17:43     ` Daniel De Graaf
  2011-02-14 18:52       ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-14 17:43 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/14/2011 10:51 AM, Konrad Rzeszutek Wilk wrote:
>> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
>> +
>>  /* ------------------------------------------------------------------ */
>>  
>>  static void gntdev_print_maps(struct gntdev_priv *priv,
>> @@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map)
>>  
>>  	atomic_sub(map->count, &pages_mapped);
>>  
>> -	if (map->pages)
>> +	if (map->pages) {
>> +		if (!use_ptemod)
>> +			unmap_grant_pages(map, 0, map->count);
> 
> In the past (before this patch) the unmap_grant_pages would be called
> on the .ioctl, .release, and .close (on VMA). This adds it now also
> on the mmu_notifier_ops paths. Why?
> 
This does not actually add the unmap on the mmu_notifier path. The MMU
notifier is used only if use_ptemod is true, and unmap_grant_pages is
only called when use_ptemod is false.

The HVM path for map and unmap is slightly different: HVM keeps the pages
mapped until the area is deleted, while the PV case (use_ptemod being true)
must unmap them when userspace unmaps the range. In the normal use case,
this makes no difference to users since unmap time is deletion time.

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v6] Userspace grant communication
  2011-02-14 16:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
  2011-02-14 16:38   ` Konrad Rzeszutek Wilk
@ 2011-02-14 17:55   ` Daniel De Graaf
  2011-02-14 19:04     ` Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-14 17:55 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/14/2011 11:14 AM, Konrad Rzeszutek Wilk wrote:
> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:
>> Changes since v5:
>>   - Added a tested xen version to workaround in #4
>>   - Cleaned up variable names & structures
>>   - Clarified some of the cleanup in gntalloc
>>   - Removed copyright statement from public-domain files
>>
>> [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
>> [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
>> [PATCH 3/6] xen-gntdev: Add reference counting to maps
>> [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
>> [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
>> [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl
> 
> Hey Daniel,
> 
> I took a look at the patchset and then the bug-fixes:
> 
> Daniel De Graaf (12):
>       xen-gntdev: Change page limit to be global instead of per-open
>       xen-gntdev: Use find_vma rather than iterating our vma list manually
>       xen-gntdev: Add reference counting to maps
>       xen-gntdev: Support mapping in HVM domains
>       xen-gntalloc: Userspace grant allocation driver
>       xen/gntalloc,gntdev: Add unmap notify ioctl
>       xen-gntdev: Fix memory leak when mmap fails
>       xen-gntdev: Fix unmap notify on PV domains
>       xen-gntdev: Use map->vma for checking map validity
>       xen-gntdev: Avoid unmapping ranges twice
>       xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
>       xen-gntdev: Avoid double-mapping memory
> 
> 
> And besides the two questions I posted today they look OK to me. However
> I have on question that I think points to a bug.
> 
> Say that I call GNTDEV_MAP_GRANT_REF three times. The first time I provide
> a count of 4, then 1, and then once more 1.
> 
> The first call would end up with priv having:
> 
> priv-map[0] => map.count=4, map.user=1, map.index=0. We return op.index as 0.
> 
> The next call:
> 
> priv-map[0] => map.count=4, map.user=1, map.index=0.
> priv-map[1] => map.count=1, map.user=1, map.index=5 (gntdev_add_map
> ends up adding the index and the count from the previous map to it). We return op.index as 20480.
> 
I think this will come out with map.index=4, op.index=8192, since the only
entry in priv->maps has map->index = 0 and map->count = 4.

> The last call ends up with
> priv-map[0] => map.count=4, map.user=1, map.index=0.
> priv-map[1] => map.count=1, map.user=1, map.index=5
> priv-map[2] => map.count=1, map.user=1, map.index=0. And we return
> op.index as = 0.
> 
How do we return that? The "goto done" branch should not be taken unless there is
a hole in the existing priv->maps list created by a previous deletion.

I see add->index starting at 0, then set to 4 and then 5, its final value.

> It looks as gntdev_add_map ends does not do anything to the 
> map.index if the "if (add->index + add->count < map->index)" comes
> out true, and we end up with op.index=0. Which naturally is
> incorrect as that is associated with grant_map that has four entries!
> 
> I hadn't yet tried to modify the nice test-case program you provided
> to see if this is can happen in practice, but it sure looks like it could?
 
This code wasn't changed from the old gntdev code. In gntalloc, I went with the
much simpler method of an always-incrementing index for generating offsets;
there's no reason that that can't be done here if it looks like there's a
mistake. The code does look correct to me, and I have tested it with variable-size
grants (although not in the 4/1/1 configuration you suggested).

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Re: [PATCH v6] Userspace grant communication
  2011-02-14 16:38   ` Konrad Rzeszutek Wilk
@ 2011-02-14 17:56     ` Daniel De Graaf
  2011-02-14 19:21       ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-14 17:56 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/14/2011 11:38 AM, Konrad Rzeszutek Wilk wrote:
> On Mon, Feb 14, 2011 at 11:14:03AM -0500, Konrad Rzeszutek Wilk wrote:
>> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:
>>> Changes since v5:
>>>   - Added a tested xen version to workaround in #4
>>>   - Cleaned up variable names & structures
>>>   - Clarified some of the cleanup in gntalloc
>>>   - Removed copyright statement from public-domain files
>>>
>>> [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
>>> [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
>>> [PATCH 3/6] xen-gntdev: Add reference counting to maps
>>> [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
>>> [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
>>> [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl
>>
>> Hey Daniel,
>>
>> I took a look at the patchset and then the bug-fixes:
>>
>> Daniel De Graaf (12):
>>       xen-gntdev: Change page limit to be global instead of per-open
>>       xen-gntdev: Use find_vma rather than iterating our vma list manually
>>       xen-gntdev: Add reference counting to maps
>>       xen-gntdev: Support mapping in HVM domains
>>       xen-gntalloc: Userspace grant allocation driver
>>       xen/gntalloc,gntdev: Add unmap notify ioctl
>>       xen-gntdev: Fix memory leak when mmap fails
>>       xen-gntdev: Fix unmap notify on PV domains
>>       xen-gntdev: Use map->vma for checking map validity
>>       xen-gntdev: Avoid unmapping ranges twice
>>       xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
>>       xen-gntdev: Avoid double-mapping memory
>>
>>
>> And besides the two questions I posted today they look OK to me. However
> 
> Oh, and Jeremy's comment about the WARN_ON on the copy_to_user..
> 

I did post an updated version of that patch without the bad WARN_ON. It still
resolves to a WARN_ON in all paths, but it's at a higher level.

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
  2011-02-14 15:37   ` Konrad Rzeszutek Wilk
@ 2011-02-14 18:07     ` Daniel De Graaf
  0 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-14 18:07 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/14/2011 10:37 AM, Konrad Rzeszutek Wilk wrote:
>> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
>> index bc3b85e..257cc8d 100644
>> --- a/include/xen/gntalloc.h
>> +++ b/include/xen/gntalloc.h
>> @@ -47,4 +47,32 @@ struct ioctl_gntalloc_dealloc_gref {
>>  	/* Number of references to unmap */
>>  	uint32_t count;
>>  };
>> +
>> +/*
>> + * Sets up an unmap notification within the page, so that the other side can do
>> + * cleanup if this side crashes. Required to implement cross-domain robust
>> + * mutexes or close notification on communication channels.
>> + *
>> + * Each mapped page only supports one notification; multiple calls referring to
>> + * the same page overwrite the previous notification. You must clear the
>> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
>> + * to occur.
>> + */
>> +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
>> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
>> +struct ioctl_gntalloc_unmap_notify {
>> +	/* IN parameters */
>> +	/* Index of a byte in the page */
>> +	uint64_t index;
> 
> That isn't actually the whole truth. It is the index within the page
> and also the "offset used on a subsequent call to mmap()" (ioctl_gntdev_map_grant_ref)
> 
> It might make sense to change the description to: "offset of the mmap region
> and the index within the page" perhaps?

How about:
	/* Offset in the file descriptor for a byte within the page (same as
	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
	 * be cleared. Otherwise, it can be any byte in the page whose
	 * notification we are adjusting.
	 */ 

Would you prefer a patch that just changes the descriptions, or a repost of
the patch that adds the ioctl with the better descriptions?

>> +	/* Action(s) to take on unmap */
>> +	uint32_t action;
>> +	/* Event channel to notify */
>> +	uint32_t event_channel_port;
>> +};
>> +
>> +/* Clear (set to zero) the byte specified by index */
>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
>> +/* Send an interrupt on the indicated event channel */
>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
>> +
>>  #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */

[and the same for gntdev]

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-02-14 17:43     ` Daniel De Graaf
@ 2011-02-14 18:52       ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 18:52 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Mon, Feb 14, 2011 at 12:43:10PM -0500, Daniel De Graaf wrote:
> On 02/14/2011 10:51 AM, Konrad Rzeszutek Wilk wrote:
> >> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
> >> +
> >>  /* ------------------------------------------------------------------ */
> >>  
> >>  static void gntdev_print_maps(struct gntdev_priv *priv,
> >> @@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map)
> >>  
> >>  	atomic_sub(map->count, &pages_mapped);
> >>  
> >> -	if (map->pages)
> >> +	if (map->pages) {
> >> +		if (!use_ptemod)
> >> +			unmap_grant_pages(map, 0, map->count);
> > 
> > In the past (before this patch) the unmap_grant_pages would be called
> > on the .ioctl, .release, and .close (on VMA). This adds it now also
> > on the mmu_notifier_ops paths. Why?
> > 
> This does not actually add the unmap on the mmu_notifier path. The MMU

Duh, you are right. I misread my notes. I meant that in the past the
.ioctl, .release, and .close would just do gntdev_put_map, but now they
are also calling the unmap_grant_pages (on the HVM path).

> notifier is used only if use_ptemod is true, and unmap_grant_pages is
> only called when use_ptemod is false.

.. and that would explain it - you need to call it on, and the MMU notifier
is unavailable for you on HVM case.

Not exactly sure why that wasn't clear before.

> 
> The HVM path for map and unmap is slightly different: HVM keeps the pages
> mapped until the area is deleted, while the PV case (use_ptemod being true)
> must unmap them when userspace unmaps the range. In the normal use case,
> this makes no difference to users since unmap time is deletion time.

<nods> Let me augment the path description to contain this.

> 
> -- 
> Daniel De Graaf
> National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v6] Userspace grant communication
  2011-02-14 17:55   ` Daniel De Graaf
@ 2011-02-14 19:04     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 19:04 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

> > And besides the two questions I posted today they look OK to me. However
> > I have on question that I think points to a bug.

.. snip ..

> I think this will come out with map.index=4, op.index=8192, since the only
> entry in priv->maps has map->index = 0 and map->count = 4.

Duh! Yes, and that changes how this all works out.
..
> much simpler method of an always-incrementing index for generating offsets;
> there's no reason that that can't be done here if it looks like there's a
> mistake. The code does look correct to me, and I have tested it with variable-size
> grants (although not in the 4/1/1 configuration you suggested).

That is OK. Thanks for working through this example.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Re: [PATCH v6] Userspace grant communication
  2011-02-14 17:56     ` Daniel De Graaf
@ 2011-02-14 19:21       ` Konrad Rzeszutek Wilk
  2011-02-14 20:55         ` Daniel De Graaf
  0 siblings, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-14 19:21 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

> >> And besides the two questions I posted today they look OK to me. However
> > 
> > Oh, and Jeremy's comment about the WARN_ON on the copy_to_user..
> > 
> 
> I did post an updated version of that patch without the bad WARN_ON. It still
> resolves to a WARN_ON in all paths, but it's at a higher level.

Ah there it is. 

Look in git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git stable/gntalloc.v6

[maybe in an hour - git.kernel.org takes a bit of time to resync to the mirrors]
to see if I am missing anything please.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: Re: [PATCH v6] Userspace grant communication
  2011-02-14 19:21       ` Konrad Rzeszutek Wilk
@ 2011-02-14 20:55         ` Daniel De Graaf
  0 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-02-14 20:55 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/14/2011 02:21 PM, Konrad Rzeszutek Wilk wrote:
>>>> And besides the two questions I posted today they look OK to me. However
>>>
>>> Oh, and Jeremy's comment about the WARN_ON on the copy_to_user..
>>>
>>
>> I did post an updated version of that patch without the bad WARN_ON. It still
>> resolves to a WARN_ON in all paths, but it's at a higher level.
> 
> Ah there it is. 
> 
> Look in git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git stable/gntalloc.v6
> 
> [maybe in an hour - git.kernel.org takes a bit of time to resync to the mirrors]
> to see if I am missing anything please.

9960be970cea52c1cb7d7c747ff6da367e1c01b5 looks good to me, nothing missing.

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-03-04 15:57     ` Ian Campbell
@ 2011-03-04 16:34       ` Daniel De Graaf
  0 siblings, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-03-04 16:34 UTC (permalink / raw)
  To: Ian Campbell; +Cc: jeremy, xen-devel, Konrad Rzeszutek Wilk

On 03/04/2011 10:57 AM, Ian Campbell wrote:
> On Thu, 2011-01-27 at 18:52 +0000, Konrad Rzeszutek Wilk wrote:
>>> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
>>>  
>>>       atomic_sub(map->count, &pages_mapped);
>>>  
>>> -     if (map->pages)
>>> +     if (map->pages) {
>>> +             if (!use_ptemod)
>>> +                     unmap_grant_pages(map, 0, map->count);
>>> +
>>>               for (i = 0; i < map->count; i++) {
>>> -                     if (map->pages[i])
>>> +                     uint32_t check, *tmp;
>>> +                     if (!map->pages[i])
>>> +                             continue;
>>> +                     /* XXX When unmapping in an HVM domain, Xen will
>>> +                      * sometimes end up mapping the GFN to an invalid MFN.
>>> +                      * In this case, writes will be discarded and reads will
>>> +                      * return all 0xFF bytes.  Leak these unusable GFNs
>>
>> I forgot to ask, under what version of Xen did you run this? I want to add
>> that to the comment so when it gets fixed we know what the failing version is.
>>
>>> +                      * until Xen supports fixing their p2m mapping.
>>> +                      */
>>> +                     tmp = kmap(map->pages[i]);
>>> +                     *tmp = 0xdeaddead;
> 
> I've just tripped over this check which faults in my PV guest. Seems to
> be related to the handling failures of map_grant_pages()?
> 
> Was the underlying Xen issue here reported somewhere more obvious than
> this comment buried in a patch to the kernel?
> 
> If not please can you raise it as a separate thread clearly marked as a
> hypervisor issue/question, all I can find is bits and pieces spread
> through the threads associated with this kernel patch. I don't think
> I've got a clear enough picture of the issue from those fragments to
> pull it together into a sensible report.
> 
> Ian.
> 

I think there may be other bugs lurking here with these freed pages; I have
observed that even pages that pass this kmap check can become bad at a later
time. This might be due to TLB issues; I haven't had a chance to debug it.
I do have a patch that prevents the pages that have been granted from being
recycled into general use by the kernel; I hadn't posted it yet because it
didn't resolve the issue completely.

8<---------------------------------------------------------
xen-gntdev: avoid reuse of possibly-bad pages
    
In HVM, the unmap hypercall does not reliably associate a valid MFN with
a just-unmapped GFN. A simple validity test of the page is not
sufficient to determine if it will remain valid; pages have been
observed to remain mapped and later become invalid.
    
Instead of releasing the pages to the allocator, keep them in a list to
reuse their GFNs for future mappings, which should always produce a valid
mapping.

** Note: this patch is an RFC, not for a stable patch queue **
    
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index d43ff30..b9b1577 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -54,6 +54,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+/* Pages that are unsafe to use except as gntdev pages due to bad PFN/MFN
+ * mapping after an unmap.
+ */
+static LIST_HEAD(page_reuse);
+static DEFINE_SPINLOCK(page_reuse_lock);
+
 static int use_ptemod;
 
 struct gntdev_priv {
@@ -122,13 +128,21 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 	    NULL == add->pages)
 		goto err;
 
+	spin_lock(&page_reuse_lock);
 	for (i = 0; i < count; i++) {
-		add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-		if (add->pages[i] == NULL)
-			goto err;
+		if (list_empty(&page_reuse)) {
+			add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+			if (add->pages[i] == NULL)
+				goto err_unlock;
+		} else {
+			add->pages[i] = list_entry(page_reuse.next,
+						   struct page, lru);
+			list_del(&add->pages[i]->lru);
+		}
 		add->map_ops[i].handle = -1;
 		add->unmap_ops[i].handle = -1;
 	}
+	spin_unlock(&page_reuse_lock);
 
 	add->index = 0;
 	add->count = count;
@@ -136,12 +150,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
 	return add;
 
+err_unlock:
+	for (i = 0; i < count; i++) {
+		if (add->pages[i])
+			list_add(&add->pages[i]->lru, &page_reuse);
+	}
+	spin_unlock(&page_reuse_lock);
 err:
-	if (add->pages)
-		for (i = 0; i < count; i++) {
-			if (add->pages[i])
-				__free_page(add->pages[i]);
-		}
 	kfree(add->pages);
 	kfree(add->grants);
 	kfree(add->map_ops);
@@ -202,29 +217,14 @@ static void gntdev_put_map(struct grant_map *map)
 		if (!use_ptemod)
 			unmap_grant_pages(map, 0, map->count);
 
+		spin_lock(&page_reuse_lock);
 		for (i = 0; i < map->count; i++) {
 			uint32_t check, *tmp;
 			if (!map->pages[i])
 				continue;
-			/* XXX When unmapping in an HVM domain, Xen will
-			 * sometimes end up mapping the GFN to an invalid MFN.
-			 * In this case, writes will be discarded and reads will
-			 * return all 0xFF bytes.  Leak these unusable GFNs
-			 * until Xen supports fixing their p2m mapping.
-			 *
-			 * Confirmed present in Xen 4.1-RC3 with HVM source
-			 */
-			tmp = kmap(map->pages[i]);
-			*tmp = 0xdeaddead;
-			mb();
-			check = *tmp;
-			kunmap(map->pages[i]);
-			if (check == 0xdeaddead)
-				__free_page(map->pages[i]);
-			else
-				pr_debug("Discard page %d=%ld\n", i,
-					page_to_pfn(map->pages[i]));
+			list_add(&map->pages[i]->lru, &page_reuse);
 		}
+		spin_unlock(&page_reuse_lock);
 	}
 	kfree(map->pages);
 	kfree(map->grants);

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-27 19:26     ` Daniel De Graaf
@ 2011-03-04 15:57     ` Ian Campbell
  2011-03-04 16:34       ` Daniel De Graaf
  1 sibling, 1 reply; 39+ messages in thread
From: Ian Campbell @ 2011-03-04 15:57 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Konrad Rzeszutek Wilk

On Thu, 2011-01-27 at 18:52 +0000, Konrad Rzeszutek Wilk wrote:
> > @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
> >  
> >       atomic_sub(map->count, &pages_mapped);
> >  
> > -     if (map->pages)
> > +     if (map->pages) {
> > +             if (!use_ptemod)
> > +                     unmap_grant_pages(map, 0, map->count);
> > +
> >               for (i = 0; i < map->count; i++) {
> > -                     if (map->pages[i])
> > +                     uint32_t check, *tmp;
> > +                     if (!map->pages[i])
> > +                             continue;
> > +                     /* XXX When unmapping in an HVM domain, Xen will
> > +                      * sometimes end up mapping the GFN to an invalid MFN.
> > +                      * In this case, writes will be discarded and reads will
> > +                      * return all 0xFF bytes.  Leak these unusable GFNs
> 
> I forgot to ask, under what version of Xen did you run this? I want to add
> that to the comment so when it gets fixed we know what the failing version is.
> 
> > +                      * until Xen supports fixing their p2m mapping.
> > +                      */
> > +                     tmp = kmap(map->pages[i]);
> > +                     *tmp = 0xdeaddead;

I've just tripped over this check which faults in my PV guest. Seems to
be related to the handling failures of map_grant_pages()?

Was the underlying Xen issue here reported somewhere more obvious than
this comment buried in a patch to the kernel?

If not please can you raise it as a separate thread clearly marked as a
hypervisor issue/question, all I can find is bits and pieces spread
through the threads associated with this kernel patch. I don't think
I've got a clear enough picture of the issue from those fragments to
pull it together into a sensible report.

Ian.

> > +                     mb();
> > +                     check = *tmp;
> > +                     kunmap(map->pages[i]); 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
@ 2011-01-27 19:26     ` Daniel De Graaf
  2011-03-04 15:57     ` Ian Campbell
  1 sibling, 0 replies; 39+ messages in thread
From: Daniel De Graaf @ 2011-01-27 19:26 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 01/27/2011 01:52 PM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 21, 2011 at 10:59:06AM -0500, Daniel De Graaf wrote:
>> HVM does not allow direct PTE modification, so instead we request
>> that Xen change its internal p2m mappings on the allocated pages and
>> map the memory into userspace normally.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/gntdev.c      |  115 +++++++++++++++++++++++++++++++-------------
>>  drivers/xen/grant-table.c |    6 ++
>>  2 files changed, 87 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
>> index 256162b..a5710e8 100644
>> --- a/drivers/xen/gntdev.c
>> +++ b/drivers/xen/gntdev.c
>> @@ -32,6 +32,7 @@
>>  #include <linux/sched.h>
>>  #include <linux/spinlock.h>
>>  #include <linux/slab.h>
>> +#include <linux/highmem.h>
>>  
>>  #include <xen/xen.h>
>>  #include <xen/grant_table.h>
>> @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
>>  
>>  static atomic_t pages_mapped = ATOMIC_INIT(0);
>>  
>> +static int use_ptemod;
>> +
>>  struct gntdev_priv {
>>  	struct list_head maps;
>>  	/* lock protects maps from concurrent changes */
>> @@ -74,6 +77,8 @@ struct grant_map {
>>  	struct page **pages;
>>  };
>>  
>> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
>> +
>>  /* ------------------------------------------------------------------ */
>>  
>>  static void gntdev_print_maps(struct gntdev_priv *priv,
>> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
>>  
>>  	atomic_sub(map->count, &pages_mapped);
>>  
>> -	if (map->pages)
>> +	if (map->pages) {
>> +		if (!use_ptemod)
>> +			unmap_grant_pages(map, 0, map->count);
>> +
>>  		for (i = 0; i < map->count; i++) {
>> -			if (map->pages[i])
>> +			uint32_t check, *tmp;
>> +			if (!map->pages[i])
>> +				continue;
>> +			/* XXX When unmapping in an HVM domain, Xen will
>> +			 * sometimes end up mapping the GFN to an invalid MFN.
>> +			 * In this case, writes will be discarded and reads will
>> +			 * return all 0xFF bytes.  Leak these unusable GFNs
> 
> I forgot to ask, under what version of Xen did you run this? I want to add
> that to the comment so when it gets fixed we know what the failing version is.

4.1-unstable. In particular, 22641:4e108cf56d07 exhibits the bug, and I have not
found a version of 4.1 that doesn't (although I haven't searched for that in
particular). I could try to find other versions also exhibit this behavior, if
it would be useful to have a list.

Note for reproducing: at r22641, you need to revert 22402:7d2fdc083c9c or the
grant table of the HVM guest will not be readable by Xen. My test that exercises
the bug is an HVM-to-HVM grant.

>> +			 * until Xen supports fixing their p2m mapping.
>> +			 */
>> +			tmp = kmap(map->pages[i]);
>> +			*tmp = 0xdeaddead;
>> +			mb();
>> +			check = *tmp;
>> +			kunmap(map->pages[i]);
>> +			if (check == 0xdeaddead)
>>  				__free_page(map->pages[i]);
>> +			else
>> +				pr_debug("Discard page %d=%ld\n", i,
>> +					page_to_pfn(map->pages[i]));
>>  		}
>> +	}
>>  	kfree(map->pages);
>>  	kfree(map->grants);
>>  	kfree(map->map_ops);
>> @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>>  {
>>  	struct grant_map *map = data;
>>  	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
>> +	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
>>  	u64 pte_maddr;
>>  
>>  	BUG_ON(pgnr >= map->count);
>>  	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
>>  
>> -	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
>> -			  GNTMAP_contains_pte | map->flags,
>> +	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
>>  			  map->grants[pgnr].ref,
>>  			  map->grants[pgnr].domid);
>> -	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
>> -			    GNTMAP_contains_pte | map->flags,
>> +	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
>>  			    0 /* handle */);
>>  	return 0;
>>  }
>> @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>>  static int map_grant_pages(struct grant_map *map)
>>  {
>>  	int i, err = 0;
>> +	phys_addr_t addr;
>> +
>> +	if (!use_ptemod) {
>> +		for (i = 0; i < map->count; i++) {
>> +			addr = (phys_addr_t)
>> +				pfn_to_kaddr(page_to_pfn(map->pages[i]));
>> +			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
>> +				map->grants[i].ref,
>> +				map->grants[i].domid);
>> +			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
>> +				map->flags, 0 /* handle */);
>> +		}
>> +	}
>>  
>>  	pr_debug("map %d+%d\n", map->index, map->count);
>>  	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
>> @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
>>  	gntdev_put_map(map);
>>  }
>>  
>> -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>> -{
>> -	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
>> -			vmf->virtual_address, vmf->pgoff);
>> -	vmf->flags = VM_FAULT_ERROR;
>> -	return 0;
>> -}
>> -
>>  static struct vm_operations_struct gntdev_vmops = {
>>  	.close = gntdev_vma_close,
>> -	.fault = gntdev_vma_fault,
>>  };
>>  
>>  /* ------------------------------------------------------------------ */
>> @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
>>  	INIT_LIST_HEAD(&priv->maps);
>>  	spin_lock_init(&priv->lock);
>>  
>> -	priv->mm = get_task_mm(current);
>> -	if (!priv->mm) {
>> -		kfree(priv);
>> -		return -ENOMEM;
>> +	if (use_ptemod) {
>> +		priv->mm = get_task_mm(current);
>> +		if (!priv->mm) {
>> +			kfree(priv);
>> +			return -ENOMEM;
>> +		}
>> +		priv->mn.ops = &gntdev_mmu_ops;
>> +		ret = mmu_notifier_register(&priv->mn, priv->mm);
>> +		mmput(priv->mm);
>>  	}
>> -	priv->mn.ops = &gntdev_mmu_ops;
>> -	ret = mmu_notifier_register(&priv->mn, priv->mm);
>> -	mmput(priv->mm);
>>  
>>  	if (ret) {
>>  		kfree(priv);
>> @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
>>  	}
>>  	spin_unlock(&priv->lock);
>>  
>> -	mmu_notifier_unregister(&priv->mn, priv->mm);
>> +	if (use_ptemod)
>> +		mmu_notifier_unregister(&priv->mn, priv->mm);
>>  	kfree(priv);
>>  	return 0;
>>  }
>> @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  	int index = vma->vm_pgoff;
>>  	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
>>  	struct grant_map *map;
>> -	int err = -EINVAL;
>> +	int i, err = -EINVAL;
>>  
>>  	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
>>  		return -EINVAL;
>> @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  	map = gntdev_find_map_index(priv, index, count);
>>  	if (!map)
>>  		goto unlock_out;
>> -	if (map->vma)
>> +	if (use_ptemod && map->vma)
>>  		goto unlock_out;
>> -	if (priv->mm != vma->vm_mm) {
>> +	if (use_ptemod && priv->mm != vma->vm_mm) {
>>  		printk(KERN_WARNING "Huh? Other mm?\n");
>>  		goto unlock_out;
>>  	}
>> @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
>>  
>>  	vma->vm_private_data = map;
>> -	map->vma = vma;
>>  
>> -	map->flags = GNTMAP_host_map | GNTMAP_application_map;
>> +	if (use_ptemod)
>> +		map->vma = vma;
>> +
>> +	map->flags = GNTMAP_host_map;
>>  	if (!(vma->vm_flags & VM_WRITE))
>>  		map->flags |= GNTMAP_readonly;
>>  
>>  	spin_unlock(&priv->lock);
>>  
>> -	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
>> -				  vma->vm_end - vma->vm_start,
>> -				  find_grant_ptes, map);
>> -	if (err) {
>> -		printk(KERN_WARNING "find_grant_ptes() failure.\n");
>> -		return err;
>> +	if (use_ptemod) {
>> +		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
>> +					  vma->vm_end - vma->vm_start,
>> +					  find_grant_ptes, map);
>> +		if (err) {
>> +			printk(KERN_WARNING "find_grant_ptes() failure.\n");
>> +			return err;
>> +		}
>>  	}
>>  
>>  	err = map_grant_pages(map);
>> @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  
>>  	map->is_mapped = 1;
>>  
>> +	if (!use_ptemod) {
>> +		for (i = 0; i < count; i++) {
>> +			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
>> +				map->pages[i]);
>> +			if (err)
>> +				return err;
>> +		}
>> +	}
>> +
>>  	return 0;
>>  
>>  unlock_out:
>> @@ -595,6 +640,8 @@ static int __init gntdev_init(void)
>>  	if (!xen_domain())
>>  		return -ENODEV;
>>  
>> +	use_ptemod = xen_pv_domain();
>> +
>>  	err = misc_register(&gntdev_miscdev);
>>  	if (err != 0) {
>>  		printk(KERN_ERR "Could not register gntdev device\n");
>> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
>> index 9ef54eb..9428ced 100644
>> --- a/drivers/xen/grant-table.c
>> +++ b/drivers/xen/grant-table.c
>> @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
>>  	if (ret)
>>  		return ret;
>>  
>> +	if (xen_feature(XENFEAT_auto_translated_physmap))
>> +		return ret;
>> +
>>  	for (i = 0; i < count; i++) {
>>  		/* m2p override only supported for GNTMAP_contains_pte mappings */
>>  		if (!(map_ops[i].flags & GNTMAP_contains_pte))
>> @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
>>  	if (ret)
>>  		return ret;
>>  
>> +	if (xen_feature(XENFEAT_auto_translated_physmap))
>> +		return ret;
>> +
>>  	for (i = 0; i < count; i++) {
>>  		ret = m2p_remove_override(pages[i]);
>>  		if (ret)
>> -- 
>> 1.7.3.4

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-21 15:59 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
@ 2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-27 19:26     ` Daniel De Graaf
  2011-03-04 15:57     ` Ian Campbell
  0 siblings, 2 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-01-27 18:52 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Fri, Jan 21, 2011 at 10:59:06AM -0500, Daniel De Graaf wrote:
> HVM does not allow direct PTE modification, so instead we request
> that Xen change its internal p2m mappings on the allocated pages and
> map the memory into userspace normally.
> 
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/gntdev.c      |  115 +++++++++++++++++++++++++++++++-------------
>  drivers/xen/grant-table.c |    6 ++
>  2 files changed, 87 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 256162b..a5710e8 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -32,6 +32,7 @@
>  #include <linux/sched.h>
>  #include <linux/spinlock.h>
>  #include <linux/slab.h>
> +#include <linux/highmem.h>
>  
>  #include <xen/xen.h>
>  #include <xen/grant_table.h>
> @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
>  
>  static atomic_t pages_mapped = ATOMIC_INIT(0);
>  
> +static int use_ptemod;
> +
>  struct gntdev_priv {
>  	struct list_head maps;
>  	/* lock protects maps from concurrent changes */
> @@ -74,6 +77,8 @@ struct grant_map {
>  	struct page **pages;
>  };
>  
> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
> +
>  /* ------------------------------------------------------------------ */
>  
>  static void gntdev_print_maps(struct gntdev_priv *priv,
> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
>  
>  	atomic_sub(map->count, &pages_mapped);
>  
> -	if (map->pages)
> +	if (map->pages) {
> +		if (!use_ptemod)
> +			unmap_grant_pages(map, 0, map->count);
> +
>  		for (i = 0; i < map->count; i++) {
> -			if (map->pages[i])
> +			uint32_t check, *tmp;
> +			if (!map->pages[i])
> +				continue;
> +			/* XXX When unmapping in an HVM domain, Xen will
> +			 * sometimes end up mapping the GFN to an invalid MFN.
> +			 * In this case, writes will be discarded and reads will
> +			 * return all 0xFF bytes.  Leak these unusable GFNs

I forgot to ask, under what version of Xen did you run this? I want to add
that to the comment so when it gets fixed we know what the failing version is.

> +			 * until Xen supports fixing their p2m mapping.
> +			 */
> +			tmp = kmap(map->pages[i]);
> +			*tmp = 0xdeaddead;
> +			mb();
> +			check = *tmp;
> +			kunmap(map->pages[i]);
> +			if (check == 0xdeaddead)
>  				__free_page(map->pages[i]);
> +			else
> +				pr_debug("Discard page %d=%ld\n", i,
> +					page_to_pfn(map->pages[i]));
>  		}
> +	}
>  	kfree(map->pages);
>  	kfree(map->grants);
>  	kfree(map->map_ops);
> @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>  {
>  	struct grant_map *map = data;
>  	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
> +	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
>  	u64 pte_maddr;
>  
>  	BUG_ON(pgnr >= map->count);
>  	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
>  
> -	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
> -			  GNTMAP_contains_pte | map->flags,
> +	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
>  			  map->grants[pgnr].ref,
>  			  map->grants[pgnr].domid);
> -	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
> -			    GNTMAP_contains_pte | map->flags,
> +	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
>  			    0 /* handle */);
>  	return 0;
>  }
> @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>  static int map_grant_pages(struct grant_map *map)
>  {
>  	int i, err = 0;
> +	phys_addr_t addr;
> +
> +	if (!use_ptemod) {
> +		for (i = 0; i < map->count; i++) {
> +			addr = (phys_addr_t)
> +				pfn_to_kaddr(page_to_pfn(map->pages[i]));
> +			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
> +				map->grants[i].ref,
> +				map->grants[i].domid);
> +			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
> +				map->flags, 0 /* handle */);
> +		}
> +	}
>  
>  	pr_debug("map %d+%d\n", map->index, map->count);
>  	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
> @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
>  	gntdev_put_map(map);
>  }
>  
> -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> -{
> -	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
> -			vmf->virtual_address, vmf->pgoff);
> -	vmf->flags = VM_FAULT_ERROR;
> -	return 0;
> -}
> -
>  static struct vm_operations_struct gntdev_vmops = {
>  	.close = gntdev_vma_close,
> -	.fault = gntdev_vma_fault,
>  };
>  
>  /* ------------------------------------------------------------------ */
> @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
>  	INIT_LIST_HEAD(&priv->maps);
>  	spin_lock_init(&priv->lock);
>  
> -	priv->mm = get_task_mm(current);
> -	if (!priv->mm) {
> -		kfree(priv);
> -		return -ENOMEM;
> +	if (use_ptemod) {
> +		priv->mm = get_task_mm(current);
> +		if (!priv->mm) {
> +			kfree(priv);
> +			return -ENOMEM;
> +		}
> +		priv->mn.ops = &gntdev_mmu_ops;
> +		ret = mmu_notifier_register(&priv->mn, priv->mm);
> +		mmput(priv->mm);
>  	}
> -	priv->mn.ops = &gntdev_mmu_ops;
> -	ret = mmu_notifier_register(&priv->mn, priv->mm);
> -	mmput(priv->mm);
>  
>  	if (ret) {
>  		kfree(priv);
> @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
>  	}
>  	spin_unlock(&priv->lock);
>  
> -	mmu_notifier_unregister(&priv->mn, priv->mm);
> +	if (use_ptemod)
> +		mmu_notifier_unregister(&priv->mn, priv->mm);
>  	kfree(priv);
>  	return 0;
>  }
> @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  	int index = vma->vm_pgoff;
>  	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
>  	struct grant_map *map;
> -	int err = -EINVAL;
> +	int i, err = -EINVAL;
>  
>  	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
>  		return -EINVAL;
> @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  	map = gntdev_find_map_index(priv, index, count);
>  	if (!map)
>  		goto unlock_out;
> -	if (map->vma)
> +	if (use_ptemod && map->vma)
>  		goto unlock_out;
> -	if (priv->mm != vma->vm_mm) {
> +	if (use_ptemod && priv->mm != vma->vm_mm) {
>  		printk(KERN_WARNING "Huh? Other mm?\n");
>  		goto unlock_out;
>  	}
> @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
>  
>  	vma->vm_private_data = map;
> -	map->vma = vma;
>  
> -	map->flags = GNTMAP_host_map | GNTMAP_application_map;
> +	if (use_ptemod)
> +		map->vma = vma;
> +
> +	map->flags = GNTMAP_host_map;
>  	if (!(vma->vm_flags & VM_WRITE))
>  		map->flags |= GNTMAP_readonly;
>  
>  	spin_unlock(&priv->lock);
>  
> -	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
> -				  vma->vm_end - vma->vm_start,
> -				  find_grant_ptes, map);
> -	if (err) {
> -		printk(KERN_WARNING "find_grant_ptes() failure.\n");
> -		return err;
> +	if (use_ptemod) {
> +		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
> +					  vma->vm_end - vma->vm_start,
> +					  find_grant_ptes, map);
> +		if (err) {
> +			printk(KERN_WARNING "find_grant_ptes() failure.\n");
> +			return err;
> +		}
>  	}
>  
>  	err = map_grant_pages(map);
> @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  
>  	map->is_mapped = 1;
>  
> +	if (!use_ptemod) {
> +		for (i = 0; i < count; i++) {
> +			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
> +				map->pages[i]);
> +			if (err)
> +				return err;
> +		}
> +	}
> +
>  	return 0;
>  
>  unlock_out:
> @@ -595,6 +640,8 @@ static int __init gntdev_init(void)
>  	if (!xen_domain())
>  		return -ENODEV;
>  
> +	use_ptemod = xen_pv_domain();
> +
>  	err = misc_register(&gntdev_miscdev);
>  	if (err != 0) {
>  		printk(KERN_ERR "Could not register gntdev device\n");
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index 9ef54eb..9428ced 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
>  	if (ret)
>  		return ret;
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap))
> +		return ret;
> +
>  	for (i = 0; i < count; i++) {
>  		/* m2p override only supported for GNTMAP_contains_pte mappings */
>  		if (!(map_ops[i].flags & GNTMAP_contains_pte))
> @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
>  	if (ret)
>  		return ret;
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap))
> +		return ret;
> +
>  	for (i = 0; i < count; i++) {
>  		ret = m2p_remove_override(pages[i]);
>  		if (ret)
> -- 
> 1.7.3.4

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-21 15:59 [SPAM] [PATCH v5] " Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 39+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

HVM does not allow direct PTE modification, so instead we request
that Xen change its internal p2m mappings on the allocated pages and
map the memory into userspace normally.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c      |  115 +++++++++++++++++++++++++++++++-------------
 drivers/xen/grant-table.c |    6 ++
 2 files changed, 87 insertions(+), 34 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 256162b..a5710e8 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+static int use_ptemod;
+
 struct gntdev_priv {
 	struct list_head maps;
 	/* lock protects maps from concurrent changes */
@@ -74,6 +77,8 @@ struct grant_map {
 	struct page **pages;
 };
 
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
-	if (map->pages)
+	if (map->pages) {
+		if (!use_ptemod)
+			unmap_grant_pages(map, 0, map->count);
+
 		for (i = 0; i < map->count; i++) {
-			if (map->pages[i])
+			uint32_t check, *tmp;
+			if (!map->pages[i])
+				continue;
+			/* XXX When unmapping in an HVM domain, Xen will
+			 * sometimes end up mapping the GFN to an invalid MFN.
+			 * In this case, writes will be discarded and reads will
+			 * return all 0xFF bytes.  Leak these unusable GFNs
+			 * until Xen supports fixing their p2m mapping.
+			 */
+			tmp = kmap(map->pages[i]);
+			*tmp = 0xdeaddead;
+			mb();
+			check = *tmp;
+			kunmap(map->pages[i]);
+			if (check == 0xdeaddead)
 				__free_page(map->pages[i]);
+			else
+				pr_debug("Discard page %d=%ld\n", i,
+					page_to_pfn(map->pages[i]));
 		}
+	}
 	kfree(map->pages);
 	kfree(map->grants);
 	kfree(map->map_ops);
@@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
 {
 	struct grant_map *map = data;
 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
 	u64 pte_maddr;
 
 	BUG_ON(pgnr >= map->count);
 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 
-	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
-			  GNTMAP_contains_pte | map->flags,
+	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
 			  map->grants[pgnr].ref,
 			  map->grants[pgnr].domid);
-	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
-			    GNTMAP_contains_pte | map->flags,
+	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
 			    0 /* handle */);
 	return 0;
 }
@@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
 static int map_grant_pages(struct grant_map *map)
 {
 	int i, err = 0;
+	phys_addr_t addr;
+
+	if (!use_ptemod) {
+		for (i = 0; i < map->count; i++) {
+			addr = (phys_addr_t)
+				pfn_to_kaddr(page_to_pfn(map->pages[i]));
+			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
+				map->grants[i].ref,
+				map->grants[i].domid);
+			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
+				map->flags, 0 /* handle */);
+		}
+	}
 
 	pr_debug("map %d+%d\n", map->index, map->count);
 	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
@@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	gntdev_put_map(map);
 }
 
-static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
-			vmf->virtual_address, vmf->pgoff);
-	vmf->flags = VM_FAULT_ERROR;
-	return 0;
-}
-
 static struct vm_operations_struct gntdev_vmops = {
 	.close = gntdev_vma_close,
-	.fault = gntdev_vma_fault,
 };
 
 /* ------------------------------------------------------------------ */
@@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
 
-	priv->mm = get_task_mm(current);
-	if (!priv->mm) {
-		kfree(priv);
-		return -ENOMEM;
+	if (use_ptemod) {
+		priv->mm = get_task_mm(current);
+		if (!priv->mm) {
+			kfree(priv);
+			return -ENOMEM;
+		}
+		priv->mn.ops = &gntdev_mmu_ops;
+		ret = mmu_notifier_register(&priv->mn, priv->mm);
+		mmput(priv->mm);
 	}
-	priv->mn.ops = &gntdev_mmu_ops;
-	ret = mmu_notifier_register(&priv->mn, priv->mm);
-	mmput(priv->mm);
 
 	if (ret) {
 		kfree(priv);
@@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 	}
 	spin_unlock(&priv->lock);
 
-	mmu_notifier_unregister(&priv->mn, priv->mm);
+	if (use_ptemod)
+		mmu_notifier_unregister(&priv->mn, priv->mm);
 	kfree(priv);
 	return 0;
 }
@@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	int index = vma->vm_pgoff;
 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	struct grant_map *map;
-	int err = -EINVAL;
+	int i, err = -EINVAL;
 
 	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
@@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	map = gntdev_find_map_index(priv, index, count);
 	if (!map)
 		goto unlock_out;
-	if (map->vma)
+	if (use_ptemod && map->vma)
 		goto unlock_out;
-	if (priv->mm != vma->vm_mm) {
+	if (use_ptemod && priv->mm != vma->vm_mm) {
 		printk(KERN_WARNING "Huh? Other mm?\n");
 		goto unlock_out;
 	}
@@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
 
 	vma->vm_private_data = map;
-	map->vma = vma;
 
-	map->flags = GNTMAP_host_map | GNTMAP_application_map;
+	if (use_ptemod)
+		map->vma = vma;
+
+	map->flags = GNTMAP_host_map;
 	if (!(vma->vm_flags & VM_WRITE))
 		map->flags |= GNTMAP_readonly;
 
 	spin_unlock(&priv->lock);
 
-	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
-				  vma->vm_end - vma->vm_start,
-				  find_grant_ptes, map);
-	if (err) {
-		printk(KERN_WARNING "find_grant_ptes() failure.\n");
-		return err;
+	if (use_ptemod) {
+		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+					  vma->vm_end - vma->vm_start,
+					  find_grant_ptes, map);
+		if (err) {
+			printk(KERN_WARNING "find_grant_ptes() failure.\n");
+			return err;
+		}
 	}
 
 	err = map_grant_pages(map);
@@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	map->is_mapped = 1;
 
+	if (!use_ptemod) {
+		for (i = 0; i < count; i++) {
+			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
+				map->pages[i]);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 
 unlock_out:
@@ -595,6 +640,8 @@ static int __init gntdev_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	use_ptemod = xen_pv_domain();
+
 	err = misc_register(&gntdev_miscdev);
 	if (err != 0) {
 		printk(KERN_ERR "Could not register gntdev device\n");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 9ef54eb..9428ced 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		/* m2p override only supported for GNTMAP_contains_pte mappings */
 		if (!(map_ops[i].flags & GNTMAP_contains_pte))
@@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		ret = m2p_remove_override(pages[i]);
 		if (ret)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2011-03-04 16:34 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
2011-02-03 17:18 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
2011-02-03 17:19 ` [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually Daniel De Graaf
2011-02-03 17:19 ` [PATCH 3/6] xen-gntdev: Add reference counting to maps Daniel De Graaf
2011-02-03 17:19 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
2011-02-14 15:51   ` Konrad Rzeszutek Wilk
2011-02-14 17:43     ` Daniel De Graaf
2011-02-14 18:52       ` Konrad Rzeszutek Wilk
2011-02-03 17:19 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
2011-02-08 22:48   ` Konrad Rzeszutek Wilk
2011-02-09 18:52     ` Daniel De Graaf
2011-02-03 17:19 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
2011-02-14 15:37   ` Konrad Rzeszutek Wilk
2011-02-14 18:07     ` Daniel De Graaf
2011-02-03 19:16 ` [PATCH] xen-gntdev: Fix memory leak when mmap fails Daniel De Graaf
2011-02-07 23:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
2011-02-08 14:14   ` [PATCH] xen-gntdev: Fix unmap notify on PV domains Daniel De Graaf
2011-02-08 22:58     ` Konrad Rzeszutek Wilk
2011-02-09 20:33       ` [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings Daniel De Graaf
2011-02-09 21:09         ` [PATCH v2] " Daniel De Graaf
2011-02-09 22:22         ` [PATCH] " Jeremy Fitzhardinge
2011-02-09 23:11           ` Daniel De Graaf
2011-02-09 23:15           ` [PATCH v3] " Daniel De Graaf
2011-02-08 21:49   ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
2011-02-09 20:11     ` [PATCH] xen-gntdev: Use map->vma for checking map validity Daniel De Graaf
2011-02-09 20:12     ` [PATCH] xen-gntdev: Avoid unmapping ranges twice Daniel De Graaf
2011-02-09 21:11 ` [PATCH] xen-gntdev: Avoid double-mapping memory Daniel De Graaf
2011-02-14 16:14 ` [PATCH v6] Userspace grant communication Konrad Rzeszutek Wilk
2011-02-14 16:38   ` Konrad Rzeszutek Wilk
2011-02-14 17:56     ` Daniel De Graaf
2011-02-14 19:21       ` Konrad Rzeszutek Wilk
2011-02-14 20:55         ` Daniel De Graaf
2011-02-14 17:55   ` Daniel De Graaf
2011-02-14 19:04     ` Konrad Rzeszutek Wilk
  -- strict thread matches above, loose matches on Subject: below --
2011-01-21 15:59 [SPAM] [PATCH v5] " Daniel De Graaf
2011-01-21 15:59 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
2011-01-27 18:52   ` Konrad Rzeszutek Wilk
2011-01-27 19:26     ` Daniel De Graaf
2011-03-04 15:57     ` Ian Campbell
2011-03-04 16:34       ` Daniel De Graaf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.