All of lore.kernel.org
 help / color / mirror / Atom feed
* [SPAM]  [PATCH v5] Userspace grant communication
@ 2011-01-21 15:59 Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
                   ` (5 more replies)
  0 siblings, 6 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: jeremy, Ian.Campbell, konrad.wilk

Changes since v4:
 - Fixed patches to pass checkpatch.pl
 - Removed the old patch #2 changing memory allocation
 - One new patch, unmap notification, inspired by robust futex code

[PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
[PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
[PATCH 3/6] xen-gntdev: Add reference counting to maps
[PATCH 4/6] xen-gntdev: Support mapping in HVM domains
[PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
[PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
  2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually Daniel De Graaf
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

Because there is no limitation on how many times a user can open a
given device file, an per-file-description limit on the number of
pages granted offers little to no benefit. Change to a global limit
and remove the ioctl() as the parameter can now be changed via sysfs.

Xen tools changeset 22768:f8d801e5573e is needed to eliminate the
error this change produces in xc_gnttab_set_max_grants.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   50 ++++++++++++++------------------------------------
 1 files changed, 14 insertions(+), 36 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1e31cdc..23d208a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -45,15 +45,15 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
 	      "Gerd Hoffmann <kraxel@redhat.com>");
 MODULE_DESCRIPTION("User-space granted page access driver");
 
-static int limit = 1024;
+static int limit = 1024*1024;
 module_param(limit, int, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at "
-		"once by a gntdev instance");
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
+		"the gntdev device");
+
+static atomic_t pages_mapped = ATOMIC_INIT(0);
 
 struct gntdev_priv {
 	struct list_head maps;
-	uint32_t used;
-	uint32_t limit;
 	/* lock protects maps from concurrent changes */
 	spinlock_t lock;
 	struct mm_struct *mm;
@@ -82,9 +82,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
 #ifdef DEBUG
 	struct grant_map *map;
 
-	pr_debug("maps list (priv %p, usage %d/%d)\n",
-	       priv, priv->used, priv->limit);
-
+	pr_debug("%s: maps list (priv %p)\n", __func__, priv);
 	list_for_each_entry(map, &priv->maps, next)
 		pr_debug("  index %2d, count %2d %s\n",
 		       map->index, map->count,
@@ -121,9 +119,6 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 	add->count = count;
 	add->priv  = priv;
 
-	if (add->count + priv->used > priv->limit)
-		goto err;
-
 	return add;
 
 err:
@@ -154,7 +149,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
 	list_add_tail(&add->next, &priv->maps);
 
 done:
-	priv->used += add->count;
 	gntdev_print_maps(priv, "[new]", add->index);
 }
 
@@ -200,7 +194,7 @@ static int gntdev_del_map(struct grant_map *map)
 		if (map->unmap_ops[i].handle)
 			return -EBUSY;
 
-	map->priv->used -= map->count;
+	atomic_sub(map->count, &pages_mapped);
 	list_del(&map->next);
 	return 0;
 }
@@ -386,7 +380,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
-	priv->limit = limit;
 
 	priv->mm = get_task_mm(current);
 	if (!priv->mm) {
@@ -443,19 +436,24 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 	pr_debug("priv %p, add %d\n", priv, op.count);
 	if (unlikely(op.count <= 0))
 		return -EINVAL;
-	if (unlikely(op.count > priv->limit))
-		return -EINVAL;
 
 	err = -ENOMEM;
 	map = gntdev_alloc_map(priv, op.count);
 	if (!map)
 		return err;
+
 	if (copy_from_user(map->grants, &u->refs,
 			   sizeof(map->grants[0]) * op.count) != 0) {
 		gntdev_free_map(map);
 		return err;
 	}
 
+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
+		pr_debug("can't map: over limit\n");
+		gntdev_free_map(map);
+		return err;
+	}
+
 	spin_lock(&priv->lock);
 	gntdev_add_map(priv, map);
 	op.index = map->index << PAGE_SHIFT;
@@ -518,23 +516,6 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 	return 0;
 }
 
-static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
-					struct ioctl_gntdev_set_max_grants __user *u)
-{
-	struct ioctl_gntdev_set_max_grants op;
-
-	if (copy_from_user(&op, u, sizeof(op)) != 0)
-		return -EFAULT;
-	pr_debug("priv %p, limit %d\n", priv, op.count);
-	if (op.count > limit)
-		return -E2BIG;
-
-	spin_lock(&priv->lock);
-	priv->limit = op.count;
-	spin_unlock(&priv->lock);
-	return 0;
-}
-
 static long gntdev_ioctl(struct file *flip,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -551,9 +532,6 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
 
-	case IOCTL_GNTDEV_SET_MAX_GRANTS:
-		return gntdev_ioctl_set_max_grants(priv, ptr);
-
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
  2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 3/6] xen-gntdev: Add reference counting to maps Daniel De Graaf
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This should be faster if many mappings exist, and also removes
the only user of map->vma not related to PTE modification.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   32 ++++++++------------------------
 1 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 23d208a..ce8c37c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -167,23 +167,6 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 	return NULL;
 }
 
-static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
-					       unsigned long vaddr)
-{
-	struct grant_map *map;
-
-	list_for_each_entry(map, &priv->maps, next) {
-		if (!map->vma)
-			continue;
-		if (vaddr < map->vma->vm_start)
-			continue;
-		if (vaddr >= map->vma->vm_end)
-			continue;
-		return map;
-	}
-	return NULL;
-}
-
 static int gntdev_del_map(struct grant_map *map)
 {
 	int i;
@@ -494,22 +477,23 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 					      struct ioctl_gntdev_get_offset_for_vaddr __user *u)
 {
 	struct ioctl_gntdev_get_offset_for_vaddr op;
+	struct vm_area_struct *vma;
 	struct grant_map *map;
 
 	if (copy_from_user(&op, u, sizeof(op)) != 0)
 		return -EFAULT;
 	pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
 
-	spin_lock(&priv->lock);
-	map = gntdev_find_map_vaddr(priv, op.vaddr);
-	if (map == NULL ||
-	    map->vma->vm_start != op.vaddr) {
-		spin_unlock(&priv->lock);
+	vma = find_vma(current->mm, op.vaddr);
+	if (!vma || vma->vm_ops != &gntdev_vmops)
 		return -EINVAL;
-	}
+
+	map = vma->vm_private_data;
+	if (!map)
+		return -EINVAL;
+
 	op.offset = map->index << PAGE_SHIFT;
 	op.count = map->count;
-	spin_unlock(&priv->lock);
 
 	if (copy_to_user(u, &op, sizeof(op)) != 0)
 		return -EFAULT;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/6] xen-gntdev: Add reference counting to maps
  2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This allows userspace to perform mmap() on the gntdev device and then
immediately close the filehandle or remove the mapping using the
remove ioctl, with the mapped area remaining valid until unmapped.
This also fixes an infinite loop when a gntdev device is closed
without first unmapping all areas.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   67 ++++++++++++++++++++-----------------------------
 1 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index ce8c37c..256162b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -62,12 +62,12 @@ struct gntdev_priv {
 
 struct grant_map {
 	struct list_head next;
-	struct gntdev_priv *priv;
 	struct vm_area_struct *vma;
 	int index;
 	int count;
 	int flags;
 	int is_mapped;
+	atomic_t users;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
 	struct gnttab_unmap_grant_ref *unmap_ops;
@@ -117,7 +117,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
 	add->index = 0;
 	add->count = count;
-	add->priv  = priv;
+	atomic_set(&add->users, 1);
 
 	return add;
 
@@ -167,28 +167,18 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 	return NULL;
 }
 
-static int gntdev_del_map(struct grant_map *map)
-{
-	int i;
-
-	if (map->vma)
-		return -EBUSY;
-	for (i = 0; i < map->count; i++)
-		if (map->unmap_ops[i].handle)
-			return -EBUSY;
-
-	atomic_sub(map->count, &pages_mapped);
-	list_del(&map->next);
-	return 0;
-}
-
-static void gntdev_free_map(struct grant_map *map)
+static void gntdev_put_map(struct grant_map *map)
 {
 	int i;
 
 	if (!map)
 		return;
 
+	if (!atomic_dec_and_test(&map->users))
+		return;
+
+	atomic_sub(map->count, &pages_mapped);
+
 	if (map->pages)
 		for (i = 0; i < map->count; i++) {
 			if (map->pages[i])
@@ -267,6 +257,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	map->is_mapped = 0;
 	map->vma = NULL;
 	vma->vm_private_data = NULL;
+	gntdev_put_map(map);
 }
 
 static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -388,17 +379,14 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 {
 	struct gntdev_priv *priv = flip->private_data;
 	struct grant_map *map;
-	int err;
 
 	pr_debug("priv %p\n", priv);
 
 	spin_lock(&priv->lock);
 	while (!list_empty(&priv->maps)) {
 		map = list_entry(priv->maps.next, struct grant_map, next);
-		err = gntdev_del_map(map);
-		if (WARN_ON(err))
-			gntdev_free_map(map);
-
+		list_del(&map->next);
+		gntdev_put_map(map);
 	}
 	spin_unlock(&priv->lock);
 
@@ -425,15 +413,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 	if (!map)
 		return err;
 
-	if (copy_from_user(map->grants, &u->refs,
-			   sizeof(map->grants[0]) * op.count) != 0) {
-		gntdev_free_map(map);
+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
+		pr_debug("can't map: over limit\n");
+		gntdev_put_map(map);
 		return err;
 	}
 
-	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
-		pr_debug("can't map: over limit\n");
-		gntdev_free_map(map);
+	if (copy_from_user(map->grants, &u->refs,
+			   sizeof(map->grants[0]) * op.count) != 0) {
+		gntdev_put_map(map);
 		return err;
 	}
 
@@ -442,13 +430,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 	op.index = map->index << PAGE_SHIFT;
 	spin_unlock(&priv->lock);
 
-	if (copy_to_user(u, &op, sizeof(op)) != 0) {
-		spin_lock(&priv->lock);
-		gntdev_del_map(map);
-		spin_unlock(&priv->lock);
-		gntdev_free_map(map);
-		return err;
-	}
+	if (copy_to_user(u, &op, sizeof(op)) != 0)
+		return -EFAULT;
+
 	return 0;
 }
 
@@ -465,11 +449,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
 
 	spin_lock(&priv->lock);
 	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
-	if (map)
-		err = gntdev_del_map(map);
+	if (map) {
+		list_del(&map->next);
+		gntdev_put_map(map);
+		err = 0;
+	}
 	spin_unlock(&priv->lock);
-	if (!err)
-		gntdev_free_map(map);
 	return err;
 }
 
@@ -549,6 +534,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		goto unlock_out;
 	}
 
+	atomic_inc(&map->users);
+
 	vma->vm_ops = &gntdev_vmops;
 
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
                   ` (2 preceding siblings ...)
  2011-01-21 15:59 ` [PATCH 3/6] xen-gntdev: Add reference counting to maps Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-21 15:59 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
  2011-01-21 15:59 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
  5 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

HVM does not allow direct PTE modification, so instead we request
that Xen change its internal p2m mappings on the allocated pages and
map the memory into userspace normally.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c      |  115 +++++++++++++++++++++++++++++++-------------
 drivers/xen/grant-table.c |    6 ++
 2 files changed, 87 insertions(+), 34 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 256162b..a5710e8 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+static int use_ptemod;
+
 struct gntdev_priv {
 	struct list_head maps;
 	/* lock protects maps from concurrent changes */
@@ -74,6 +77,8 @@ struct grant_map {
 	struct page **pages;
 };
 
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
-	if (map->pages)
+	if (map->pages) {
+		if (!use_ptemod)
+			unmap_grant_pages(map, 0, map->count);
+
 		for (i = 0; i < map->count; i++) {
-			if (map->pages[i])
+			uint32_t check, *tmp;
+			if (!map->pages[i])
+				continue;
+			/* XXX When unmapping in an HVM domain, Xen will
+			 * sometimes end up mapping the GFN to an invalid MFN.
+			 * In this case, writes will be discarded and reads will
+			 * return all 0xFF bytes.  Leak these unusable GFNs
+			 * until Xen supports fixing their p2m mapping.
+			 */
+			tmp = kmap(map->pages[i]);
+			*tmp = 0xdeaddead;
+			mb();
+			check = *tmp;
+			kunmap(map->pages[i]);
+			if (check == 0xdeaddead)
 				__free_page(map->pages[i]);
+			else
+				pr_debug("Discard page %d=%ld\n", i,
+					page_to_pfn(map->pages[i]));
 		}
+	}
 	kfree(map->pages);
 	kfree(map->grants);
 	kfree(map->map_ops);
@@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
 {
 	struct grant_map *map = data;
 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
 	u64 pte_maddr;
 
 	BUG_ON(pgnr >= map->count);
 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 
-	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
-			  GNTMAP_contains_pte | map->flags,
+	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
 			  map->grants[pgnr].ref,
 			  map->grants[pgnr].domid);
-	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
-			    GNTMAP_contains_pte | map->flags,
+	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
 			    0 /* handle */);
 	return 0;
 }
@@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
 static int map_grant_pages(struct grant_map *map)
 {
 	int i, err = 0;
+	phys_addr_t addr;
+
+	if (!use_ptemod) {
+		for (i = 0; i < map->count; i++) {
+			addr = (phys_addr_t)
+				pfn_to_kaddr(page_to_pfn(map->pages[i]));
+			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
+				map->grants[i].ref,
+				map->grants[i].domid);
+			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
+				map->flags, 0 /* handle */);
+		}
+	}
 
 	pr_debug("map %d+%d\n", map->index, map->count);
 	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
@@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	gntdev_put_map(map);
 }
 
-static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
-			vmf->virtual_address, vmf->pgoff);
-	vmf->flags = VM_FAULT_ERROR;
-	return 0;
-}
-
 static struct vm_operations_struct gntdev_vmops = {
 	.close = gntdev_vma_close,
-	.fault = gntdev_vma_fault,
 };
 
 /* ------------------------------------------------------------------ */
@@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
 
-	priv->mm = get_task_mm(current);
-	if (!priv->mm) {
-		kfree(priv);
-		return -ENOMEM;
+	if (use_ptemod) {
+		priv->mm = get_task_mm(current);
+		if (!priv->mm) {
+			kfree(priv);
+			return -ENOMEM;
+		}
+		priv->mn.ops = &gntdev_mmu_ops;
+		ret = mmu_notifier_register(&priv->mn, priv->mm);
+		mmput(priv->mm);
 	}
-	priv->mn.ops = &gntdev_mmu_ops;
-	ret = mmu_notifier_register(&priv->mn, priv->mm);
-	mmput(priv->mm);
 
 	if (ret) {
 		kfree(priv);
@@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 	}
 	spin_unlock(&priv->lock);
 
-	mmu_notifier_unregister(&priv->mn, priv->mm);
+	if (use_ptemod)
+		mmu_notifier_unregister(&priv->mn, priv->mm);
 	kfree(priv);
 	return 0;
 }
@@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	int index = vma->vm_pgoff;
 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	struct grant_map *map;
-	int err = -EINVAL;
+	int i, err = -EINVAL;
 
 	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
@@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	map = gntdev_find_map_index(priv, index, count);
 	if (!map)
 		goto unlock_out;
-	if (map->vma)
+	if (use_ptemod && map->vma)
 		goto unlock_out;
-	if (priv->mm != vma->vm_mm) {
+	if (use_ptemod && priv->mm != vma->vm_mm) {
 		printk(KERN_WARNING "Huh? Other mm?\n");
 		goto unlock_out;
 	}
@@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
 
 	vma->vm_private_data = map;
-	map->vma = vma;
 
-	map->flags = GNTMAP_host_map | GNTMAP_application_map;
+	if (use_ptemod)
+		map->vma = vma;
+
+	map->flags = GNTMAP_host_map;
 	if (!(vma->vm_flags & VM_WRITE))
 		map->flags |= GNTMAP_readonly;
 
 	spin_unlock(&priv->lock);
 
-	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
-				  vma->vm_end - vma->vm_start,
-				  find_grant_ptes, map);
-	if (err) {
-		printk(KERN_WARNING "find_grant_ptes() failure.\n");
-		return err;
+	if (use_ptemod) {
+		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+					  vma->vm_end - vma->vm_start,
+					  find_grant_ptes, map);
+		if (err) {
+			printk(KERN_WARNING "find_grant_ptes() failure.\n");
+			return err;
+		}
 	}
 
 	err = map_grant_pages(map);
@@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	map->is_mapped = 1;
 
+	if (!use_ptemod) {
+		for (i = 0; i < count; i++) {
+			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
+				map->pages[i]);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 
 unlock_out:
@@ -595,6 +640,8 @@ static int __init gntdev_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	use_ptemod = xen_pv_domain();
+
 	err = misc_register(&gntdev_miscdev);
 	if (err != 0) {
 		printk(KERN_ERR "Could not register gntdev device\n");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 9ef54eb..9428ced 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		/* m2p override only supported for GNTMAP_contains_pte mappings */
 		if (!(map_ops[i].flags & GNTMAP_contains_pte))
@@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		ret = m2p_remove_override(pages[i]);
 		if (ret)
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
                   ` (3 preceding siblings ...)
  2011-01-21 15:59 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-21 15:59 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
  5 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This allows a userspace application to allocate a shared page for
implementing inter-domain communication or device drivers. These
shared pages can be mapped using the gntdev device or by the kernel
in another domain.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/Kconfig    |    8 +
 drivers/xen/Makefile   |    2 +
 drivers/xen/gntalloc.c |  477 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/gntalloc.h |   70 +++++++
 4 files changed, 557 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/gntalloc.c
 create mode 100644 include/xen/gntalloc.h

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 07bec09..a3d7afb 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -80,6 +80,14 @@ config XEN_GNTDEV
 	help
 	  Allows userspace processes to use grants.
 
+config XEN_GRANT_DEV_ALLOC
+	tristate "User-space grant reference allocator driver"
+	depends on XEN
+	help
+	  Allows userspace processes to create pages with access granted
+	  to other domains. This can be used to implement frontend drivers
+	  or as part of an inter-domain shared memory channel.
+
 config XEN_PLATFORM_PCI
 	tristate "xen platform pci device driver"
 	depends on XEN_PVHVM && PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 5088cc2..9585a1d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
@@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
 
 xen-evtchn-y			:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntalloc-y				:= gntalloc.o
 
 xen-platform-pci-y		:= platform-pci.o
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
new file mode 100644
index 0000000..a230dc4
--- /dev/null
+++ b/drivers/xen/gntalloc.c
@@ -0,0 +1,477 @@
+/******************************************************************************
+ * gntalloc.c
+ *
+ * Device for creating grant references (in user-space) that may be shared
+ * with other domains.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This driver exists to allow userspace programs in Linux to allocate kernel
+ * memory that will later be shared with another domain.  Without this device,
+ * Linux userspace programs cannot create grant references.
+ *
+ * How this stuff works:
+ *   X -> granting a page to Y
+ *   Y -> mapping the grant from X
+ *
+ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
+ *   2. X creates an entry in the grant table that says domid(Y) can access P.
+ *      This is done without a hypercall unless the grant table needs expansion.
+ *   3. X gives the grant reference identifier, GREF, to Y.
+ *   4. Y maps the page, either directly into kernel memory for use in a backend
+ *      driver, or via a the gntdev device to map into the address space of an
+ *      application running in Y. This is the first point at which Xen does any
+ *      tracking of the page.
+ *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
+ *      to the shared page, and can now communicate with Y over the shared page.
+ *
+ *
+ * NOTE TO USERSPACE LIBRARIES:
+ *   The grant allocation and mmap()ing are, naturally, two separate operations.
+ *   You set up the sharing by calling the create ioctl() and then the mmap().
+ *   Teardown requires munmap() and either close() or ioctl().
+ *
+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
+ * reference, this device can be used to consume kernel memory by leaving grant
+ * references mapped by another domain when an application exits. Therefore,
+ * there is a global limit on the number of pages that can be allocated. When
+ * all references to the page are unmapped, it will be freed during the next
+ * grant operation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/gntalloc.h>
+
+static int limit = 1024;
+module_param(limit, int, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
+		"the gntalloc device");
+
+static LIST_HEAD(gref_list);
+static DEFINE_SPINLOCK(gref_lock);
+static int gref_size;
+
+/* Metadata on a grant reference. */
+struct gntalloc_gref {
+	struct list_head next_all;   /* list entry gref_list */
+	struct list_head next_file;  /* list entry file->list, if open */
+	struct page *page;	     /* The shared page */
+	uint64_t file_index;         /* File offset for mmap() */
+	unsigned int users;          /* Use count - when zero, waiting on Xen */
+	grant_ref_t gref_id;         /* The grant reference number */
+};
+
+struct gntalloc_file_private_data {
+	struct list_head list;
+	uint64_t index;
+};
+
+static void __del_gref(struct gntalloc_gref *gref);
+
+static void do_cleanup(void)
+{
+	struct gntalloc_gref *gref, *n;
+	list_for_each_entry_safe(gref, n, &gref_list, next_all) {
+		if (!gref->users)
+			__del_gref(gref);
+	}
+}
+
+static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
+	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
+{
+	int i, rc, readonly;
+	LIST_HEAD(queue_all);
+	LIST_HEAD(queue_file);
+	struct gntalloc_gref *gref;
+
+	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
+	rc = -ENOMEM;
+	for (i = 0; i < op->count; i++) {
+		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
+		if (!gref)
+			goto undo;
+		list_add_tail(&gref->next_all, &queue_all);
+		list_add_tail(&gref->next_file, &queue_file);
+		gref->users = 1;
+		gref->file_index = op->index + i * PAGE_SIZE;
+		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!gref->page)
+			goto undo;
+
+		/* Grant foreign access to the page. */
+		gref->gref_id = gnttab_grant_foreign_access(op->domid,
+			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
+		if (gref->gref_id < 0) {
+			rc = gref->gref_id;
+			goto undo;
+		}
+		gref_ids[i] = gref->gref_id;
+	}
+
+	/* Add to gref lists. */
+	spin_lock(&gref_lock);
+	list_splice_tail(&queue_all, &gref_list);
+	list_splice_tail(&queue_file, &priv->list);
+	spin_unlock(&gref_lock);
+
+	return 0;
+
+undo:
+	list_for_each_entry(gref, &queue_file, next_file) {
+		if (gref->page && gref->gref_id >= 0) {
+			__del_gref(gref);
+		} else {
+			if (gref->page)
+				__free_page(gref->page);
+			list_del(&gref->next_all);
+			kfree(gref);
+		}
+	}
+
+	/* It's possible for the target domain to map the just-allocated grant
+	 * references by blindly guessing their IDs; if this is done, then
+	 * __del_gref will leave them in the queue_all list. They need to be
+	 * added to the global list so that we can free them when they are no
+	 * longer referenced.
+	 */
+	if (unlikely(!list_empty(&queue_all))) {
+		spin_lock(&gref_lock);
+		list_splice_tail(&queue_all, &gref_list);
+		spin_unlock(&gref_lock);
+	}
+	return rc;
+}
+
+static void __del_gref(struct gntalloc_gref *gref)
+{
+	if (gnttab_query_foreign_access(gref->gref_id))
+		return;
+
+	if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
+		return;
+
+	gref_size--;
+	list_del(&gref->next_all);
+
+	__free_page(gref->page);
+	kfree(gref);
+}
+
+/* finds contiguous grant references in a file, returns the first */
+static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
+		uint64_t index, uint32_t count)
+{
+	struct gntalloc_gref *rv = NULL, *gref;
+	list_for_each_entry(gref, &priv->list, next_file) {
+		if (gref->file_index == index && !rv)
+			rv = gref;
+		if (rv) {
+			if (gref->file_index != index)
+				return NULL;
+			index += PAGE_SIZE;
+			count--;
+			if (count == 0)
+				return rv;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * -------------------------------------
+ *  File operations.
+ * -------------------------------------
+ */
+static int gntalloc_open(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv;
+
+	try_module_get(THIS_MODULE);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto out_nomem;
+	INIT_LIST_HEAD(&priv->list);
+
+	filp->private_data = priv;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	return 0;
+
+out_nomem:
+	return -ENOMEM;
+}
+
+static int gntalloc_release(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	spin_lock(&gref_lock);
+	while (!list_empty(&priv->list)) {
+		gref = list_entry(priv->list.next,
+			struct gntalloc_gref, next_file);
+		list_del(&gref->next_file);
+		gref->users--;
+		if (gref->users == 0)
+			__del_gref(gref);
+	}
+	kfree(priv);
+	spin_unlock(&gref_lock);
+
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
+		struct ioctl_gntalloc_alloc_gref __user *arg)
+{
+	int rc = 0;
+	struct ioctl_gntalloc_alloc_gref op;
+	uint32_t *gref_ids;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
+	if (!gref_ids) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock(&gref_lock);
+	do_cleanup();
+	if (gref_size + op.count > limit) {
+		spin_unlock(&gref_lock);
+		rc = -ENOSPC;
+		goto out_free;
+	}
+	gref_size += op.count;
+	op.index = priv->index;
+	priv->index += op.count * PAGE_SIZE;
+	spin_unlock(&gref_lock);
+
+	rc = add_grefs(&op, gref_ids, priv);
+	if (rc < 0)
+		goto out_free;
+
+	if (copy_to_user(arg, &op, sizeof(op))) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+	if (copy_to_user(arg->gref_ids, gref_ids,
+			sizeof(gref_ids[0]) * op.count)) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+
+out_free:
+	kfree(gref_ids);
+out:
+	return rc;
+}
+
+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
+		void __user *arg)
+{
+	int i, rc = 0;
+	struct ioctl_gntalloc_dealloc_gref op;
+	struct gntalloc_gref *gref, *n;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto dealloc_grant_out;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, op.index, op.count);
+	if (gref) {
+		for (i = 0; i < op.count; i++) {
+			n = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+			list_del(&gref->next_file);
+			gref->users--;
+			gref = n;
+		}
+	} else {
+		rc = -EINVAL;
+	}
+
+	do_cleanup();
+
+	spin_unlock(&gref_lock);
+dealloc_grant_out:
+	return rc;
+}
+
+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+
+	switch (cmd) {
+	case IOCTL_GNTALLOC_ALLOC_GREF:
+		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
+
+	case IOCTL_GNTALLOC_DEALLOC_GREF:
+		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
+}
+
+static void gntalloc_vma_close(struct vm_area_struct *vma)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return;
+
+	spin_lock(&gref_lock);
+	gref->users--;
+	if (gref->users == 0)
+		__del_gref(gref);
+	spin_unlock(&gref_lock);
+}
+
+static struct vm_operations_struct gntalloc_vmops = {
+	.close = gntalloc_vma_close,
+};
+
+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int rv, i;
+
+	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
+		       priv, vma->vm_pgoff, count);
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
+		return -EINVAL;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
+	if (gref == NULL) {
+		rv = -ENOENT;
+		pr_debug("%s: Could not find grant reference",
+				__func__);
+		goto out_unlock;
+	}
+
+	vma->vm_private_data = gref;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTCOPY;
+	vma->vm_flags |= VM_IO;
+	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
+
+	vma->vm_ops = &gntalloc_vmops;
+
+	for (i = 0; i < count; i++) {
+		gref->users++;
+		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+				gref->page);
+		if (rv)
+			goto out_unlock;
+
+		gref = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+	}
+	rv = 0;
+
+out_unlock:
+	spin_unlock(&gref_lock);
+	return rv;
+}
+
+static const struct file_operations gntalloc_fops = {
+	.owner = THIS_MODULE,
+	.open = gntalloc_open,
+	.release = gntalloc_release,
+	.unlocked_ioctl = gntalloc_ioctl,
+	.mmap = gntalloc_mmap
+};
+
+/*
+ * -------------------------------------
+ * Module creation/destruction.
+ * -------------------------------------
+ */
+static struct miscdevice gntalloc_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "xen/gntalloc",
+	.fops	= &gntalloc_fops,
+};
+
+static int __init gntalloc_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&gntalloc_miscdev);
+	if (err != 0) {
+		printk(KERN_ERR "Could not register misc gntalloc device\n");
+		return err;
+	}
+
+	pr_debug("Created grant allocation device at %d,%d\n",
+			MISC_MAJOR, gntalloc_miscdev.minor);
+
+	return 0;
+}
+
+static void __exit gntalloc_exit(void)
+{
+	misc_deregister(&gntalloc_miscdev);
+}
+
+module_init(gntalloc_init);
+module_exit(gntalloc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
+		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("User-space grant reference allocator driver");
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
new file mode 100644
index 0000000..e1d6d0f
--- /dev/null
+++ b/include/xen/gntalloc.h
@@ -0,0 +1,70 @@
+/******************************************************************************
+ * gntalloc.h
+ *
+ * Interface to /dev/xen/gntalloc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
+#define __LINUX_PUBLIC_GNTALLOC_H__
+
+/*
+ * Allocates a new page and creates a new grant reference.
+ */
+#define IOCTL_GNTALLOC_ALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
+struct ioctl_gntalloc_alloc_gref {
+	/* IN parameters */
+	/* The ID of the domain to be given access to the grants. */
+	uint16_t domid;
+	/* Flags for this mapping */
+	uint16_t flags;
+	/* Number of pages to map */
+	uint32_t count;
+	/* OUT parameters */
+	/* The offset to be used on a subsequent call to mmap(). */
+	uint64_t index;
+	/* The grant references of the newly created grant, one per page */
+	/* Variable size, depending on count */
+	uint32_t gref_ids[1];
+};
+
+#define GNTALLOC_FLAG_WRITABLE 1
+
+/*
+ * Deallocates the grant reference, allowing the associated page to be freed if
+ * no other domains are using it.
+ */
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
+struct ioctl_gntalloc_dealloc_gref {
+	/* IN parameters */
+	/* The offset returned in the map operation */
+	uint64_t index;
+	/* Number of references to unmap */
+	uint32_t count;
+};
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
  2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
                   ` (4 preceding siblings ...)
  2011-01-21 15:59 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
@ 2011-01-21 15:59 ` Daniel De Graaf
  2011-01-27 19:20   ` Konrad Rzeszutek Wilk
  5 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-21 15:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This ioctl allows the users of a shared page to be notified when
the other end exits abnormally.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntalloc.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/gntdev.c   |   56 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/gntalloc.h |   28 ++++++++++++++++++++++++
 include/xen/gntdev.h   |   27 +++++++++++++++++++++++
 4 files changed, 165 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index a230dc4..8ac0dfc 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -60,11 +60,13 @@
 #include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
 #include <xen/gntalloc.h>
+#include <xen/events.h>
 
 static int limit = 1024;
 module_param(limit, int, 0644);
@@ -83,6 +85,9 @@ struct gntalloc_gref {
 	uint64_t file_index;         /* File offset for mmap() */
 	unsigned int users;          /* Use count - when zero, waiting on Xen */
 	grant_ref_t gref_id;         /* The grant reference number */
+	unsigned notify_flags:2;     /* Unmap notification flags */
+	unsigned notify_pgoff:12;    /* Offset of the byte to clear */
+	int notify_event;            /* Port (event channel) to notify */
 };
 
 struct gntalloc_file_private_data {
@@ -169,6 +174,16 @@ undo:
 
 static void __del_gref(struct gntalloc_gref *gref)
 {
+	if (gref->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+		uint8_t *tmp = kmap(gref->page);
+		tmp[gref->notify_pgoff] = 0;
+		kunmap(gref->page);
+	}
+	if (gref->notify_flags & UNMAP_NOTIFY_SEND_EVENT)
+		notify_remote_via_evtchn(gref->notify_event);
+
+	gref->notify_flags = 0;
+
 	if (gnttab_query_foreign_access(gref->gref_id))
 		return;
 
@@ -339,6 +354,43 @@ dealloc_grant_out:
 	return rc;
 }
 
+static long gntalloc_ioctl_notify(struct gntalloc_file_private_data *priv,
+		void __user *arg)
+{
+	struct ioctl_gntalloc_unmap_notify op;
+	struct gntalloc_gref *gref;
+	uint64_t index;
+	int pgoff;
+	int rc;
+
+	if (copy_from_user(&op, arg, sizeof(op)))
+		return -EFAULT;
+
+	index = op.index & ~(PAGE_SIZE - 1);
+	pgoff = op.index & (PAGE_SIZE - 1);
+
+	spin_lock(&gref_lock);
+
+	gref = find_grefs(priv, index, 1);
+	if (!gref) {
+		rc = -ENOENT;
+		goto unlock_out;
+	}
+
+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
+	gref->notify_flags = op.action;
+	gref->notify_pgoff = pgoff;
+	gref->notify_event = op.event_channel_port;
+	rc = 0;
+ unlock_out:
+	spin_unlock(&gref_lock);
+	return rc;
+}
+
 static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -351,6 +403,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 	case IOCTL_GNTALLOC_DEALLOC_GREF:
 		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
 
+	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
+		return gntalloc_ioctl_notify(priv, (void __user *)arg);
+
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a5710e8..e360d0a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -37,6 +37,7 @@
 #include <xen/xen.h>
 #include <xen/grant_table.h>
 #include <xen/gntdev.h>
+#include <xen/events.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
@@ -70,6 +71,9 @@ struct grant_map {
 	int count;
 	int flags;
 	int is_mapped;
+	int notify_flags;
+	int notify_offset;
+	int notify_event;
 	atomic_t users;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
@@ -165,7 +169,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 	list_for_each_entry(map, &priv->maps, next) {
 		if (map->index != index)
 			continue;
-		if (map->count != count)
+		if (count && map->count != count)
 			continue;
 		return map;
 	}
@@ -184,6 +188,10 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
+	if (map->notify_flags & UNMAP_NOTIFY_SEND_EVENT) {
+		notify_remote_via_evtchn(map->notify_event);
+	}
+
 	if (map->pages) {
 		if (!use_ptemod)
 			unmap_grant_pages(map, 0, map->count);
@@ -272,6 +280,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
 	int i, err = 0;
 
+	if (map->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+		int pgno = (map->notify_offset >> PAGE_SHIFT);
+		if (pgno >= offset && pgno < pages + offset) {
+			uint8_t *tmp = kmap(map->pages[pgno]);
+			tmp[map->notify_offset & (PAGE_SIZE-1)] = 0;
+			kunmap(map->pages[pgno]);
+			map->notify_flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+		}
+	}
+
 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
 	if (err)
@@ -517,6 +535,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 	return 0;
 }
 
+static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
+{
+	struct ioctl_gntdev_unmap_notify op;
+	struct grant_map *map;
+	int rc;
+
+	if (copy_from_user(&op, u, sizeof(op)))
+		return -EFAULT;
+
+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
+		return -EINVAL;
+
+	spin_lock(&priv->lock);
+
+	list_for_each_entry(map, &priv->maps, next) {
+		uint64_t begin = map->index << PAGE_SHIFT;
+		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
+		if (op.index >= begin && op.index < end)
+			goto found;
+	}
+	rc = -ENOENT;
+	goto unlock_out;
+
+ found:
+	map->notify_flags = op.action;
+	map->notify_offset = op.index - (map->index << PAGE_SHIFT);
+	map->notify_event = op.event_channel_port;
+	rc = 0;
+ unlock_out:
+	spin_unlock(&priv->lock);
+	return rc;
+}
+
 static long gntdev_ioctl(struct file *flip,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -533,6 +584,9 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
 
+	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
+		return gntdev_ioctl_notify(priv, ptr);
+
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
index e1d6d0f..92339f5 100644
--- a/include/xen/gntalloc.h
+++ b/include/xen/gntalloc.h
@@ -67,4 +67,32 @@ struct ioctl_gntalloc_dealloc_gref {
 	/* Number of references to unmap */
 	uint32_t count;
 };
+
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
+struct ioctl_gntalloc_unmap_notify {
+	/* IN parameters */
+	/* Index of a byte in the page */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
index eb23f41..5d9b9b4 100644
--- a/include/xen/gntdev.h
+++ b/include/xen/gntdev.h
@@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants {
 	uint32_t count;
 };
 
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
+struct ioctl_gntdev_unmap_notify {
+	/* IN parameters */
+	/* Index of a byte in the page */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-01-21 15:59 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
@ 2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-27 19:23     ` Konrad Rzeszutek Wilk
  2011-01-27 20:55     ` Daniel De Graaf
  0 siblings, 2 replies; 29+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-01-27 18:52 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote:
> This allows a userspace application to allocate a shared page for
> implementing inter-domain communication or device drivers. These
> shared pages can be mapped using the gntdev device or by the kernel
> in another domain.
> 
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/Kconfig    |    8 +
>  drivers/xen/Makefile   |    2 +
>  drivers/xen/gntalloc.c |  477 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/xen/gntalloc.h |   70 +++++++
>  4 files changed, 557 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/xen/gntalloc.c
>  create mode 100644 include/xen/gntalloc.h
> 
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 07bec09..a3d7afb 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -80,6 +80,14 @@ config XEN_GNTDEV
>  	help
>  	  Allows userspace processes to use grants.
>  
> +config XEN_GRANT_DEV_ALLOC
> +	tristate "User-space grant reference allocator driver"
> +	depends on XEN
> +	help
> +	  Allows userspace processes to create pages with access granted
> +	  to other domains. This can be used to implement frontend drivers
> +	  or as part of an inter-domain shared memory channel.
> +
>  config XEN_PLATFORM_PCI
>  	tristate "xen platform pci device driver"
>  	depends on XEN_PVHVM && PCI
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 5088cc2..9585a1d 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
>  obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
>  obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
>  obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>  obj-$(CONFIG_XENFS)		+= xenfs/
>  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
>  obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
> @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
>  
>  xen-evtchn-y			:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
> +xen-gntalloc-y				:= gntalloc.o
>  
>  xen-platform-pci-y		:= platform-pci.o
> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> new file mode 100644
> index 0000000..a230dc4
> --- /dev/null
> +++ b/drivers/xen/gntalloc.c
> @@ -0,0 +1,477 @@
> +/******************************************************************************
> + * gntalloc.c
> + *
> + * Device for creating grant references (in user-space) that may be shared
> + * with other domains.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +/*
> + * This driver exists to allow userspace programs in Linux to allocate kernel
> + * memory that will later be shared with another domain.  Without this device,
> + * Linux userspace programs cannot create grant references.
> + *
> + * How this stuff works:
> + *   X -> granting a page to Y
> + *   Y -> mapping the grant from X
> + *
> + *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
> + *   2. X creates an entry in the grant table that says domid(Y) can access P.
> + *      This is done without a hypercall unless the grant table needs expansion.
> + *   3. X gives the grant reference identifier, GREF, to Y.
> + *   4. Y maps the page, either directly into kernel memory for use in a backend
> + *      driver, or via a the gntdev device to map into the address space of an
> + *      application running in Y. This is the first point at which Xen does any
> + *      tracking of the page.
> + *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
> + *      to the shared page, and can now communicate with Y over the shared page.
> + *
> + *
> + * NOTE TO USERSPACE LIBRARIES:
> + *   The grant allocation and mmap()ing are, naturally, two separate operations.
> + *   You set up the sharing by calling the create ioctl() and then the mmap().
> + *   Teardown requires munmap() and either close() or ioctl().
> + *
> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
> + * reference, this device can be used to consume kernel memory by leaving grant
> + * references mapped by another domain when an application exits. Therefore,
> + * there is a global limit on the number of pages that can be allocated. When
> + * all references to the page are unmapped, it will be freed during the next
> + * grant operation.
> + */
> +
> +#include <linux/atomic.h>
> +#include <linux/module.h>
> +#include <linux/miscdevice.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/device.h>
> +#include <linux/mm.h>
> +#include <linux/uaccess.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +
> +#include <xen/xen.h>
> +#include <xen/page.h>
> +#include <xen/grant_table.h>
> +#include <xen/gntalloc.h>
> +
> +static int limit = 1024;
> +module_param(limit, int, 0644);
> +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
> +		"the gntalloc device");
> +
> +static LIST_HEAD(gref_list);
> +static DEFINE_SPINLOCK(gref_lock);
> +static int gref_size;
> +
> +/* Metadata on a grant reference. */
> +struct gntalloc_gref {
> +	struct list_head next_all;   /* list entry gref_list */

How about 'next_gref' ?
> +	struct list_head next_file;  /* list entry file->list, if open */
> +	struct page *page;	     /* The shared page */
> +	uint64_t file_index;         /* File offset for mmap() */
> +	unsigned int users;          /* Use count - when zero, waiting on Xen */
> +	grant_ref_t gref_id;         /* The grant reference number */
> +};
> +
> +struct gntalloc_file_private_data {
> +	struct list_head list;
> +	uint64_t index;
> +};
> +
> +static void __del_gref(struct gntalloc_gref *gref);
> +
> +static void do_cleanup(void)
> +{
> +	struct gntalloc_gref *gref, *n;
> +	list_for_each_entry_safe(gref, n, &gref_list, next_all) {
> +		if (!gref->users)
> +			__del_gref(gref);
> +	}
> +}
> +
> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
> +	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
> +{
> +	int i, rc, readonly;
> +	LIST_HEAD(queue_all);
> +	LIST_HEAD(queue_file);
> +	struct gntalloc_gref *gref;
> +
> +	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
> +	rc = -ENOMEM;
> +	for (i = 0; i < op->count; i++) {
> +		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
> +		if (!gref)
> +			goto undo;
> +		list_add_tail(&gref->next_all, &queue_all);
> +		list_add_tail(&gref->next_file, &queue_file);
> +		gref->users = 1;
> +		gref->file_index = op->index + i * PAGE_SIZE;
> +		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		if (!gref->page)
> +			goto undo;
> +
> +		/* Grant foreign access to the page. */
> +		gref->gref_id = gnttab_grant_foreign_access(op->domid,
> +			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
> +		if (gref->gref_id < 0) {
> +			rc = gref->gref_id;
> +			goto undo;
> +		}
> +		gref_ids[i] = gref->gref_id;
> +	}
> +
> +	/* Add to gref lists. */
> +	spin_lock(&gref_lock);
> +	list_splice_tail(&queue_all, &gref_list);
> +	list_splice_tail(&queue_file, &priv->list);
> +	spin_unlock(&gref_lock);
> +
> +	return 0;
> +
> +undo:
> +	list_for_each_entry(gref, &queue_file, next_file) {
> +		if (gref->page && gref->gref_id >= 0) {
> +			__del_gref(gref);
> +		} else {
> +			if (gref->page)
> +				__free_page(gref->page);
> +			list_del(&gref->next_all);

Ohh, you are actually deleting them from queue_all, ah.
Can you add a comment about that since the first time I read this
I thought you were iterating over the queue_file and deleting
from _that_ list. Tricky..


Can collapse this deleting into __del_gref? Looks like
you should just preface the gnttab with "if gref->page && gref->gref_id"
and also the gref_size, and the "if (gref->page)" before deleting?


> +			kfree(gref);
> +		}
> +	}
> +
> +	/* It's possible for the target domain to map the just-allocated grant
> +	 * references by blindly guessing their IDs; if this is done, then
> +	 * __del_gref will leave them in the queue_all list. They need to be
> +	 * added to the global list so that we can free them when they are no
> +	 * longer referenced.
> +	 */
> +	if (unlikely(!list_empty(&queue_all))) {
> +		spin_lock(&gref_lock);
> +		list_splice_tail(&queue_all, &gref_list);
> +		spin_unlock(&gref_lock);
> +	}
> +	return rc;
> +}
> +
> +static void __del_gref(struct gntalloc_gref *gref)
> +{
> +	if (gnttab_query_foreign_access(gref->gref_id))
> +		return;
> +
> +	if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
> +		return;
> +
> +	gref_size--;
> +	list_del(&gref->next_all);
> +
> +	__free_page(gref->page);
> +	kfree(gref);
> +}
> +
> +/* finds contiguous grant references in a file, returns the first */
> +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
> +		uint64_t index, uint32_t count)
> +{
> +	struct gntalloc_gref *rv = NULL, *gref;
> +	list_for_each_entry(gref, &priv->list, next_file) {
> +		if (gref->file_index == index && !rv)
> +			rv = gref;
> +		if (rv) {
> +			if (gref->file_index != index)
> +				return NULL;
> +			index += PAGE_SIZE;
> +			count--;
> +			if (count == 0)
> +				return rv;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * -------------------------------------
> + *  File operations.
> + * -------------------------------------
> + */
> +static int gntalloc_open(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv;
> +
> +	try_module_get(THIS_MODULE);

No checking if it fails?
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		goto out_nomem;
> +	INIT_LIST_HEAD(&priv->list);
> +
> +	filp->private_data = priv;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	return 0;
> +
> +out_nomem:

You should also do:

	module_put(THIS_MODULE);

> +	return -ENOMEM;
> +}
> +
> +static int gntalloc_release(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	spin_lock(&gref_lock);
> +	while (!list_empty(&priv->list)) {
> +		gref = list_entry(priv->list.next,
> +			struct gntalloc_gref, next_file);


> +		list_del(&gref->next_file);
> +		gref->users--;
> +		if (gref->users == 0)
> +			__del_gref(gref);
> +	}
> +	kfree(priv);
> +	spin_unlock(&gref_lock);
> +
> +	module_put(THIS_MODULE);
> +
> +	return 0;
> +}
> +
> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
> +		struct ioctl_gntalloc_alloc_gref __user *arg)
> +{
> +	int rc = 0;
> +	struct ioctl_gntalloc_alloc_gref op;
> +	uint32_t *gref_ids;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto out;
> +	}
> +
> +	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
> +	if (!gref_ids) {
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	do_cleanup();

Hmm, why the cleanup here? I see it  gntalloc_ioctl_dealloc
which makes sense since the users-- might have been decremented to zero.
But here?

> +	if (gref_size + op.count > limit) {
> +		spin_unlock(&gref_lock);
> +		rc = -ENOSPC;
> +		goto out_free;
> +	}
> +	gref_size += op.count;
> +	op.index = priv->index;
> +	priv->index += op.count * PAGE_SIZE;
> +	spin_unlock(&gref_lock);
> +
> +	rc = add_grefs(&op, gref_ids, priv);
> +	if (rc < 0)
> +		goto out_free;
> +
> +	if (copy_to_user(arg, &op, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto out_free;

Not something that would clean the newly added grant? Say
the code I suggested below the 'out' label.

> +	}
> +	if (copy_to_user(arg->gref_ids, gref_ids,
> +			sizeof(gref_ids[0]) * op.count)) {
> +		rc = -EFAULT;
> +		goto out_free;

Ditto.
> +	}
> +
> +out_free:
> +	kfree(gref_ids);
> +out:
> +	return rc;

out_del_gref:
	gref->users--;
	__del_gref(gref);

	goto out_free;
> +}
> +
> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
> +		void __user *arg)
> +{
> +	int i, rc = 0;
> +	struct ioctl_gntalloc_dealloc_gref op;
> +	struct gntalloc_gref *gref, *n;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto dealloc_grant_out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_grefs(priv, op.index, op.count);
> +	if (gref) {
> +		for (i = 0; i < op.count; i++) {
> +			n = list_entry(gref->next_file.next,
> +				struct gntalloc_gref, next_file);

Might want to add a comment that you are deleting only from ->next_file
b/c the do_cleanpup will take of deleting from the other list (that is
if the users is at zero).

> +			list_del(&gref->next_file);
> +			gref->users--;
> +			gref = n;
> +		}
> +	} else {
> +		rc = -EINVAL;
> +	}
> +
> +	do_cleanup();
> +
> +	spin_unlock(&gref_lock);
> +dealloc_grant_out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
> +		unsigned long arg)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +
> +	switch (cmd) {
> +	case IOCTL_GNTALLOC_ALLOC_GREF:
> +		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
> +
> +	case IOCTL_GNTALLOC_DEALLOC_GREF:
> +		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
> +
> +	default:
> +		return -ENOIOCTLCMD;
> +	}
> +
> +	return 0;
> +}
> +
> +static void gntalloc_vma_close(struct vm_area_struct *vma)
> +{
> +	struct gntalloc_gref *gref = vma->vm_private_data;
> +	if (!gref)
> +		return;
> +
> +	spin_lock(&gref_lock);
> +	gref->users--;
> +	if (gref->users == 0)
> +		__del_gref(gref);

I just want to be convienced here that I am wrong.

If the 'ioctl_deallo' has not been done, what will if unmap this VMA?
Will it be OK to  yank the gref from gref_list while (and kfree it) while
it is still referenced in the filp->private_data? Or would end up trying
to derefence the *priv and go BOOM?


> +	spin_unlock(&gref_lock);
> +}
> +
> +static struct vm_operations_struct gntalloc_vmops = {
> +	.close = gntalloc_vma_close,
> +};
> +
> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
> +	int rv, i;
> +
> +	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
> +		       priv, vma->vm_pgoff, count);
> +
> +	if (!(vma->vm_flags & VM_SHARED)) {
> +		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
> +	if (gref == NULL) {
> +		rv = -ENOENT;
> +		pr_debug("%s: Could not find grant reference",
> +				__func__);
> +		goto out_unlock;
> +	}
> +
> +	vma->vm_private_data = gref;
> +
> +	vma->vm_flags |= VM_RESERVED;
> +	vma->vm_flags |= VM_DONTCOPY;
> +	vma->vm_flags |= VM_IO;

Hmm, I don't think you need the VM_IO.

> +	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
> +
> +	vma->vm_ops = &gntalloc_vmops;
> +
> +	for (i = 0; i < count; i++) {
> +		gref->users++;
> +		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> +				gref->page);
> +		if (rv)
> +			goto out_unlock;
> +
> +		gref = list_entry(gref->next_file.next,
> +				struct gntalloc_gref, next_file);
> +	}
> +	rv = 0;
> +
> +out_unlock:
> +	spin_unlock(&gref_lock);
> +	return rv;
> +}
> +
> +static const struct file_operations gntalloc_fops = {
> +	.owner = THIS_MODULE,
> +	.open = gntalloc_open,
> +	.release = gntalloc_release,
> +	.unlocked_ioctl = gntalloc_ioctl,
> +	.mmap = gntalloc_mmap
> +};
> +
> +/*
> + * -------------------------------------
> + * Module creation/destruction.
> + * -------------------------------------
> + */
> +static struct miscdevice gntalloc_miscdev = {
> +	.minor	= MISC_DYNAMIC_MINOR,
> +	.name	= "xen/gntalloc",
> +	.fops	= &gntalloc_fops,
> +};
> +
> +static int __init gntalloc_init(void)
> +{
> +	int err;
> +
> +	if (!xen_domain())
> +		return -ENODEV;
> +
> +	err = misc_register(&gntalloc_miscdev);
> +	if (err != 0) {
> +		printk(KERN_ERR "Could not register misc gntalloc device\n");
> +		return err;
> +	}
> +
> +	pr_debug("Created grant allocation device at %d,%d\n",
> +			MISC_MAJOR, gntalloc_miscdev.minor);
> +
> +	return 0;
> +}
> +
> +static void __exit gntalloc_exit(void)
> +{
> +	misc_deregister(&gntalloc_miscdev);
> +}
> +
> +module_init(gntalloc_init);
> +module_exit(gntalloc_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
> +		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> new file mode 100644
> index 0000000..e1d6d0f
> --- /dev/null
> +++ b/include/xen/gntalloc.h
> @@ -0,0 +1,70 @@
> +/******************************************************************************
> + * gntalloc.h
> + *

Copyright?

> + * Interface to /dev/xen/gntalloc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this source file (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy, modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
> + * and to permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
> +#define __LINUX_PUBLIC_GNTALLOC_H__
> +
> +/*
> + * Allocates a new page and creates a new grant reference.
> + */
> +#define IOCTL_GNTALLOC_ALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
> +struct ioctl_gntalloc_alloc_gref {
> +	/* IN parameters */
> +	/* The ID of the domain to be given access to the grants. */
> +	uint16_t domid;
> +	/* Flags for this mapping */
> +	uint16_t flags;
> +	/* Number of pages to map */
> +	uint32_t count;
> +	/* OUT parameters */
> +	/* The offset to be used on a subsequent call to mmap(). */
> +	uint64_t index;
> +	/* The grant references of the newly created grant, one per page */
> +	/* Variable size, depending on count */
> +	uint32_t gref_ids[1];
> +};
> +
> +#define GNTALLOC_FLAG_WRITABLE 1
> +
> +/*
> + * Deallocates the grant reference, allowing the associated page to be freed if
> + * no other domains are using it.
> + */
> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
> +struct ioctl_gntalloc_dealloc_gref {
> +	/* IN parameters */
> +	/* The offset returned in the map operation */
> +	uint64_t index;
> +	/* Number of references to unmap */
> +	uint32_t count;
> +};
> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> -- 
> 1.7.3.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-21 15:59 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
@ 2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-27 19:26     ` Daniel De Graaf
  2011-03-04 15:57     ` Ian Campbell
  0 siblings, 2 replies; 29+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-01-27 18:52 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Fri, Jan 21, 2011 at 10:59:06AM -0500, Daniel De Graaf wrote:
> HVM does not allow direct PTE modification, so instead we request
> that Xen change its internal p2m mappings on the allocated pages and
> map the memory into userspace normally.
> 
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/gntdev.c      |  115 +++++++++++++++++++++++++++++++-------------
>  drivers/xen/grant-table.c |    6 ++
>  2 files changed, 87 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 256162b..a5710e8 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -32,6 +32,7 @@
>  #include <linux/sched.h>
>  #include <linux/spinlock.h>
>  #include <linux/slab.h>
> +#include <linux/highmem.h>
>  
>  #include <xen/xen.h>
>  #include <xen/grant_table.h>
> @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
>  
>  static atomic_t pages_mapped = ATOMIC_INIT(0);
>  
> +static int use_ptemod;
> +
>  struct gntdev_priv {
>  	struct list_head maps;
>  	/* lock protects maps from concurrent changes */
> @@ -74,6 +77,8 @@ struct grant_map {
>  	struct page **pages;
>  };
>  
> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
> +
>  /* ------------------------------------------------------------------ */
>  
>  static void gntdev_print_maps(struct gntdev_priv *priv,
> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
>  
>  	atomic_sub(map->count, &pages_mapped);
>  
> -	if (map->pages)
> +	if (map->pages) {
> +		if (!use_ptemod)
> +			unmap_grant_pages(map, 0, map->count);
> +
>  		for (i = 0; i < map->count; i++) {
> -			if (map->pages[i])
> +			uint32_t check, *tmp;
> +			if (!map->pages[i])
> +				continue;
> +			/* XXX When unmapping in an HVM domain, Xen will
> +			 * sometimes end up mapping the GFN to an invalid MFN.
> +			 * In this case, writes will be discarded and reads will
> +			 * return all 0xFF bytes.  Leak these unusable GFNs

I forgot to ask, under what version of Xen did you run this? I want to add
that to the comment so when it gets fixed we know what the failing version is.

> +			 * until Xen supports fixing their p2m mapping.
> +			 */
> +			tmp = kmap(map->pages[i]);
> +			*tmp = 0xdeaddead;
> +			mb();
> +			check = *tmp;
> +			kunmap(map->pages[i]);
> +			if (check == 0xdeaddead)
>  				__free_page(map->pages[i]);
> +			else
> +				pr_debug("Discard page %d=%ld\n", i,
> +					page_to_pfn(map->pages[i]));
>  		}
> +	}
>  	kfree(map->pages);
>  	kfree(map->grants);
>  	kfree(map->map_ops);
> @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>  {
>  	struct grant_map *map = data;
>  	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
> +	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
>  	u64 pte_maddr;
>  
>  	BUG_ON(pgnr >= map->count);
>  	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
>  
> -	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
> -			  GNTMAP_contains_pte | map->flags,
> +	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
>  			  map->grants[pgnr].ref,
>  			  map->grants[pgnr].domid);
> -	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
> -			    GNTMAP_contains_pte | map->flags,
> +	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
>  			    0 /* handle */);
>  	return 0;
>  }
> @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>  static int map_grant_pages(struct grant_map *map)
>  {
>  	int i, err = 0;
> +	phys_addr_t addr;
> +
> +	if (!use_ptemod) {
> +		for (i = 0; i < map->count; i++) {
> +			addr = (phys_addr_t)
> +				pfn_to_kaddr(page_to_pfn(map->pages[i]));
> +			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
> +				map->grants[i].ref,
> +				map->grants[i].domid);
> +			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
> +				map->flags, 0 /* handle */);
> +		}
> +	}
>  
>  	pr_debug("map %d+%d\n", map->index, map->count);
>  	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
> @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
>  	gntdev_put_map(map);
>  }
>  
> -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> -{
> -	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
> -			vmf->virtual_address, vmf->pgoff);
> -	vmf->flags = VM_FAULT_ERROR;
> -	return 0;
> -}
> -
>  static struct vm_operations_struct gntdev_vmops = {
>  	.close = gntdev_vma_close,
> -	.fault = gntdev_vma_fault,
>  };
>  
>  /* ------------------------------------------------------------------ */
> @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
>  	INIT_LIST_HEAD(&priv->maps);
>  	spin_lock_init(&priv->lock);
>  
> -	priv->mm = get_task_mm(current);
> -	if (!priv->mm) {
> -		kfree(priv);
> -		return -ENOMEM;
> +	if (use_ptemod) {
> +		priv->mm = get_task_mm(current);
> +		if (!priv->mm) {
> +			kfree(priv);
> +			return -ENOMEM;
> +		}
> +		priv->mn.ops = &gntdev_mmu_ops;
> +		ret = mmu_notifier_register(&priv->mn, priv->mm);
> +		mmput(priv->mm);
>  	}
> -	priv->mn.ops = &gntdev_mmu_ops;
> -	ret = mmu_notifier_register(&priv->mn, priv->mm);
> -	mmput(priv->mm);
>  
>  	if (ret) {
>  		kfree(priv);
> @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
>  	}
>  	spin_unlock(&priv->lock);
>  
> -	mmu_notifier_unregister(&priv->mn, priv->mm);
> +	if (use_ptemod)
> +		mmu_notifier_unregister(&priv->mn, priv->mm);
>  	kfree(priv);
>  	return 0;
>  }
> @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  	int index = vma->vm_pgoff;
>  	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
>  	struct grant_map *map;
> -	int err = -EINVAL;
> +	int i, err = -EINVAL;
>  
>  	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
>  		return -EINVAL;
> @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  	map = gntdev_find_map_index(priv, index, count);
>  	if (!map)
>  		goto unlock_out;
> -	if (map->vma)
> +	if (use_ptemod && map->vma)
>  		goto unlock_out;
> -	if (priv->mm != vma->vm_mm) {
> +	if (use_ptemod && priv->mm != vma->vm_mm) {
>  		printk(KERN_WARNING "Huh? Other mm?\n");
>  		goto unlock_out;
>  	}
> @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
>  
>  	vma->vm_private_data = map;
> -	map->vma = vma;
>  
> -	map->flags = GNTMAP_host_map | GNTMAP_application_map;
> +	if (use_ptemod)
> +		map->vma = vma;
> +
> +	map->flags = GNTMAP_host_map;
>  	if (!(vma->vm_flags & VM_WRITE))
>  		map->flags |= GNTMAP_readonly;
>  
>  	spin_unlock(&priv->lock);
>  
> -	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
> -				  vma->vm_end - vma->vm_start,
> -				  find_grant_ptes, map);
> -	if (err) {
> -		printk(KERN_WARNING "find_grant_ptes() failure.\n");
> -		return err;
> +	if (use_ptemod) {
> +		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
> +					  vma->vm_end - vma->vm_start,
> +					  find_grant_ptes, map);
> +		if (err) {
> +			printk(KERN_WARNING "find_grant_ptes() failure.\n");
> +			return err;
> +		}
>  	}
>  
>  	err = map_grant_pages(map);
> @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>  
>  	map->is_mapped = 1;
>  
> +	if (!use_ptemod) {
> +		for (i = 0; i < count; i++) {
> +			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
> +				map->pages[i]);
> +			if (err)
> +				return err;
> +		}
> +	}
> +
>  	return 0;
>  
>  unlock_out:
> @@ -595,6 +640,8 @@ static int __init gntdev_init(void)
>  	if (!xen_domain())
>  		return -ENODEV;
>  
> +	use_ptemod = xen_pv_domain();
> +
>  	err = misc_register(&gntdev_miscdev);
>  	if (err != 0) {
>  		printk(KERN_ERR "Could not register gntdev device\n");
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index 9ef54eb..9428ced 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
>  	if (ret)
>  		return ret;
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap))
> +		return ret;
> +
>  	for (i = 0; i < count; i++) {
>  		/* m2p override only supported for GNTMAP_contains_pte mappings */
>  		if (!(map_ops[i].flags & GNTMAP_contains_pte))
> @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
>  	if (ret)
>  		return ret;
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap))
> +		return ret;
> +
>  	for (i = 0; i < count; i++) {
>  		ret = m2p_remove_override(pages[i]);
>  		if (ret)
> -- 
> 1.7.3.4

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
  2011-01-21 15:59 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
@ 2011-01-27 19:20   ` Konrad Rzeszutek Wilk
  2011-01-27 20:09     ` Daniel De Graaf
  0 siblings, 1 reply; 29+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-01-27 19:20 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Fri, Jan 21, 2011 at 10:59:08AM -0500, Daniel De Graaf wrote:
> This ioctl allows the users of a shared page to be notified when
> the other end exits abnormally.
> 
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/gntalloc.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/xen/gntdev.c   |   56 +++++++++++++++++++++++++++++++++++++++++++++++-
>  include/xen/gntalloc.h |   28 ++++++++++++++++++++++++
>  include/xen/gntdev.h   |   27 +++++++++++++++++++++++
>  4 files changed, 165 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> index a230dc4..8ac0dfc 100644
> --- a/drivers/xen/gntalloc.c
> +++ b/drivers/xen/gntalloc.c
> @@ -60,11 +60,13 @@
>  #include <linux/uaccess.h>
>  #include <linux/types.h>
>  #include <linux/list.h>
> +#include <linux/highmem.h>
>  
>  #include <xen/xen.h>
>  #include <xen/page.h>
>  #include <xen/grant_table.h>
>  #include <xen/gntalloc.h>
> +#include <xen/events.h>
>  
>  static int limit = 1024;
>  module_param(limit, int, 0644);
> @@ -83,6 +85,9 @@ struct gntalloc_gref {
>  	uint64_t file_index;         /* File offset for mmap() */
>  	unsigned int users;          /* Use count - when zero, waiting on Xen */
>  	grant_ref_t gref_id;         /* The grant reference number */
> +	unsigned notify_flags:2;     /* Unmap notification flags */

Can you add to the comment: bits 0-1
> +	unsigned notify_pgoff:12;    /* Offset of the byte to clear */
                                              ^^ in the page
And "bits 2-14.".


> +	int notify_event;            /* Port (event channel) to notify */
>  };
>  
>  struct gntalloc_file_private_data {
> @@ -169,6 +174,16 @@ undo:
>  
>  static void __del_gref(struct gntalloc_gref *gref)
>  {
> +	if (gref->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) {
> +		uint8_t *tmp = kmap(gref->page);
> +		tmp[gref->notify_pgoff] = 0;
> +		kunmap(gref->page);
> +	}
> +	if (gref->notify_flags & UNMAP_NOTIFY_SEND_EVENT)
> +		notify_remote_via_evtchn(gref->notify_event);
> +
> +	gref->notify_flags = 0;
> +
>  	if (gnttab_query_foreign_access(gref->gref_id))
>  		return;
>  
> @@ -339,6 +354,43 @@ dealloc_grant_out:
>  	return rc;
>  }
>  
> +static long gntalloc_ioctl_notify(struct gntalloc_file_private_data *priv,
> +		void __user *arg)

Call it '..ioctl_unmap_notify'. When I read it first time I thought this
would do just notify.. while in actuality it can do both.

> +{
> +	struct ioctl_gntalloc_unmap_notify op;
> +	struct gntalloc_gref *gref;
> +	uint64_t index;
> +	int pgoff;
> +	int rc;
> +
> +	if (copy_from_user(&op, arg, sizeof(op)))
> +		return -EFAULT;
> +
> +	index = op.index & ~(PAGE_SIZE - 1);
> +	pgoff = op.index & (PAGE_SIZE - 1);

You don't want to tell the user if the messed up? Say 0xdeadf00d?
> +
> +	spin_lock(&gref_lock);
> +
> +	gref = find_grefs(priv, index, 1);
> +	if (!gref) {
> +		rc = -ENOENT;
> +		goto unlock_out;
> +	}
> +
> +	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
> +		rc = -EINVAL;
> +		goto unlock_out;
> +	}
> +
> +	gref->notify_flags = op.action;
> +	gref->notify_pgoff = pgoff;
> +	gref->notify_event = op.event_channel_port;
> +	rc = 0;
> + unlock_out:
> +	spin_unlock(&gref_lock);
> +	return rc;
> +}
> +
>  static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
>  		unsigned long arg)
>  {
> @@ -351,6 +403,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
>  	case IOCTL_GNTALLOC_DEALLOC_GREF:
>  		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
>  
> +	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
> +		return gntalloc_ioctl_notify(priv, (void __user *)arg);
> +
>  	default:
>  		return -ENOIOCTLCMD;
>  	}
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index a5710e8..e360d0a 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -37,6 +37,7 @@
>  #include <xen/xen.h>
>  #include <xen/grant_table.h>
>  #include <xen/gntdev.h>
> +#include <xen/events.h>
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  #include <asm/xen/page.h>
> @@ -70,6 +71,9 @@ struct grant_map {
>  	int count;
>  	int flags;
>  	int is_mapped;
> +	int notify_flags;
> +	int notify_offset;
> +	int notify_event;

Would it make sense to put these inside a struct?

>  	atomic_t users;
>  	struct ioctl_gntdev_grant_ref *grants;
>  	struct gnttab_map_grant_ref   *map_ops;
> @@ -165,7 +169,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
>  	list_for_each_entry(map, &priv->maps, next) {
>  		if (map->index != index)
>  			continue;
> -		if (map->count != count)
> +		if (count && map->count != count)
>  			continue;
>  		return map;
>  	}
> @@ -184,6 +188,10 @@ static void gntdev_put_map(struct grant_map *map)
>  
>  	atomic_sub(map->count, &pages_mapped);
>  
> +	if (map->notify_flags & UNMAP_NOTIFY_SEND_EVENT) {
> +		notify_remote_via_evtchn(map->notify_event);
> +	}
> +
>  	if (map->pages) {
>  		if (!use_ptemod)
>  			unmap_grant_pages(map, 0, map->count);
> @@ -272,6 +280,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
>  {
>  	int i, err = 0;
>  
> +	if (map->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) {
> +		int pgno = (map->notify_offset >> PAGE_SHIFT);
> +		if (pgno >= offset && pgno < pages + offset) {

Whoa, that looks to violate what 'notify_offset' actually means.
Perhaps a better name for that variable? notify_addr?

> +			uint8_t *tmp = kmap(map->pages[pgno]);
> +			tmp[map->notify_offset & (PAGE_SIZE-1)] = 0;
> +			kunmap(map->pages[pgno]);
> +			map->notify_flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
> +		}
> +	}
> +
>  	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
>  	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
>  	if (err)
> @@ -517,6 +535,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
>  	return 0;
>  }
>  
> +static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)

> +{
> +	struct ioctl_gntdev_unmap_notify op;
> +	struct grant_map *map;
> +	int rc;
> +
> +	if (copy_from_user(&op, u, sizeof(op)))
> +		return -EFAULT;
> +
> +	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
> +		return -EINVAL;
> +
> +	spin_lock(&priv->lock);
> +
> +	list_for_each_entry(map, &priv->maps, next) {
> +		uint64_t begin = map->index << PAGE_SHIFT;
> +		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
> +		if (op.index >= begin && op.index < end)
> +			goto found;
> +	}
> +	rc = -ENOENT;
> +	goto unlock_out;
> +
> + found:
> +	map->notify_flags = op.action;
> +	map->notify_offset = op.index - (map->index << PAGE_SHIFT);

Minus? So say the index is 0xdeadbeef, wouldn't this throw this off?

Should you also check the index to make sure it is within a PAGE_SIZE
offset? (the ioctl accepts 64-bit values for the index).

> +	map->notify_event = op.event_channel_port;
> +	rc = 0;
> + unlock_out:
> +	spin_unlock(&priv->lock);
> +	return rc;
> +}
> +
>  static long gntdev_ioctl(struct file *flip,
>  			 unsigned int cmd, unsigned long arg)
>  {
> @@ -533,6 +584,9 @@ static long gntdev_ioctl(struct file *flip,
>  	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
>  		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
>  
> +	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
> +		return gntdev_ioctl_notify(priv, ptr);
> +
>  	default:
>  		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
>  		return -ENOIOCTLCMD;
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> index e1d6d0f..92339f5 100644
> --- a/include/xen/gntalloc.h
> +++ b/include/xen/gntalloc.h
> @@ -67,4 +67,32 @@ struct ioctl_gntalloc_dealloc_gref {
>  	/* Number of references to unmap */
>  	uint32_t count;
>  };
> +
> +/*
> + * Sets up an unmap notification within the page, so that the other side can do
> + * cleanup if this side crashes. Required to implement cross-domain robust
> + * mutexes or close notification on communication channels.
> + *
> + * Each mapped page only supports one notification; multiple calls referring to
> + * the same page overwrite the previous notification. You must clear the
> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
> + * to occur.
> + */
> +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
> +struct ioctl_gntalloc_unmap_notify {
> +	/* IN parameters */
> +	/* Index of a byte in the page */
> +	uint64_t index;
> +	/* Action(s) to take on unmap */
> +	uint32_t action;
> +	/* Event channel to notify */
> +	uint32_t event_channel_port;
> +};
> +
> +/* Clear (set to zero) the byte specified by index */
> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> +/* Send an interrupt on the indicated event channel */
> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> +
>  #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
> index eb23f41..5d9b9b4 100644
> --- a/include/xen/gntdev.h
> +++ b/include/xen/gntdev.h
> @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants {
>  	uint32_t count;
>  };
>  
> +/*
> + * Sets up an unmap notification within the page, so that the other side can do
> + * cleanup if this side crashes. Required to implement cross-domain robust
> + * mutexes or close notification on communication channels.
> + *
> + * Each mapped page only supports one notification; multiple calls referring to
> + * the same page overwrite the previous notification. You must clear the
> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
> + * to occur.
> + */
> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
> +struct ioctl_gntdev_unmap_notify {
> +	/* IN parameters */
> +	/* Index of a byte in the page */
> +	uint64_t index;
> +	/* Action(s) to take on unmap */
> +	uint32_t action;
> +	/* Event channel to notify */
> +	uint32_t event_channel_port;
> +};
> +
> +/* Clear (set to zero) the byte specified by index */
> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> +/* Send an interrupt on the indicated event channel */
> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> +
>  #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
> -- 
> 1.7.3.4

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
@ 2011-01-27 19:23     ` Konrad Rzeszutek Wilk
  2011-01-27 19:51       ` Daniel De Graaf
  2011-01-27 20:55     ` Daniel De Graaf
  1 sibling, 1 reply; 29+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-01-27 19:23 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Thu, Jan 27, 2011 at 01:52:10PM -0500, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote:
> > This allows a userspace application to allocate a shared page for
> > implementing inter-domain communication or device drivers. These
> > shared pages can be mapped using the gntdev device or by the kernel
> > in another domain.
> > 
> > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> > ---
> >  drivers/xen/Kconfig    |    8 +
> >  drivers/xen/Makefile   |    2 +
> >  drivers/xen/gntalloc.c |  477 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/xen/gntalloc.h |   70 +++++++
> >  4 files changed, 557 insertions(+), 0 deletions(-)
> >  create mode 100644 drivers/xen/gntalloc.c
> >  create mode 100644 include/xen/gntalloc.h
> > 
> > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> > index 07bec09..a3d7afb 100644
> > --- a/drivers/xen/Kconfig
> > +++ b/drivers/xen/Kconfig
> > @@ -80,6 +80,14 @@ config XEN_GNTDEV
> >  	help
> >  	  Allows userspace processes to use grants.
> >  
> > +config XEN_GRANT_DEV_ALLOC
> > +	tristate "User-space grant reference allocator driver"
> > +	depends on XEN
> > +	help
> > +	  Allows userspace processes to create pages with access granted
> > +	  to other domains. This can be used to implement frontend drivers
> > +	  or as part of an inter-domain shared memory channel.


> > +
> >  config XEN_PLATFORM_PCI
> >  	tristate "xen platform pci device driver"
> >  	depends on XEN_PVHVM && PCI
> > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> > index 5088cc2..9585a1d 100644
> > --- a/drivers/xen/Makefile
> > +++ b/drivers/xen/Makefile
> > @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
> >  obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
> >  obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
> >  obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
> > +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
> >  obj-$(CONFIG_XENFS)		+= xenfs/
> >  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
> >  obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
> > @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
> >  
> >  xen-evtchn-y			:= evtchn.o
> >  xen-gntdev-y				:= gntdev.o
> > +xen-gntalloc-y				:= gntalloc.o
> >  
> >  xen-platform-pci-y		:= platform-pci.o
> > diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> > new file mode 100644
> > index 0000000..a230dc4
> > --- /dev/null
> > +++ b/drivers/xen/gntalloc.c
> > @@ -0,0 +1,477 @@
> > +/******************************************************************************
> > + * gntalloc.c
> > + *
> > + * Device for creating grant references (in user-space) that may be shared
> > + * with other domains.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> > + */
> > +
> > +/*
> > + * This driver exists to allow userspace programs in Linux to allocate kernel
> > + * memory that will later be shared with another domain.  Without this device,
> > + * Linux userspace programs cannot create grant references.

Why can't this be done in gntdev? Is there a simple test program for this?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
@ 2011-01-27 19:26     ` Daniel De Graaf
  2011-03-04 15:57     ` Ian Campbell
  1 sibling, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-27 19:26 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 01/27/2011 01:52 PM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 21, 2011 at 10:59:06AM -0500, Daniel De Graaf wrote:
>> HVM does not allow direct PTE modification, so instead we request
>> that Xen change its internal p2m mappings on the allocated pages and
>> map the memory into userspace normally.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/gntdev.c      |  115 +++++++++++++++++++++++++++++++-------------
>>  drivers/xen/grant-table.c |    6 ++
>>  2 files changed, 87 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
>> index 256162b..a5710e8 100644
>> --- a/drivers/xen/gntdev.c
>> +++ b/drivers/xen/gntdev.c
>> @@ -32,6 +32,7 @@
>>  #include <linux/sched.h>
>>  #include <linux/spinlock.h>
>>  #include <linux/slab.h>
>> +#include <linux/highmem.h>
>>  
>>  #include <xen/xen.h>
>>  #include <xen/grant_table.h>
>> @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
>>  
>>  static atomic_t pages_mapped = ATOMIC_INIT(0);
>>  
>> +static int use_ptemod;
>> +
>>  struct gntdev_priv {
>>  	struct list_head maps;
>>  	/* lock protects maps from concurrent changes */
>> @@ -74,6 +77,8 @@ struct grant_map {
>>  	struct page **pages;
>>  };
>>  
>> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
>> +
>>  /* ------------------------------------------------------------------ */
>>  
>>  static void gntdev_print_maps(struct gntdev_priv *priv,
>> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
>>  
>>  	atomic_sub(map->count, &pages_mapped);
>>  
>> -	if (map->pages)
>> +	if (map->pages) {
>> +		if (!use_ptemod)
>> +			unmap_grant_pages(map, 0, map->count);
>> +
>>  		for (i = 0; i < map->count; i++) {
>> -			if (map->pages[i])
>> +			uint32_t check, *tmp;
>> +			if (!map->pages[i])
>> +				continue;
>> +			/* XXX When unmapping in an HVM domain, Xen will
>> +			 * sometimes end up mapping the GFN to an invalid MFN.
>> +			 * In this case, writes will be discarded and reads will
>> +			 * return all 0xFF bytes.  Leak these unusable GFNs
> 
> I forgot to ask, under what version of Xen did you run this? I want to add
> that to the comment so when it gets fixed we know what the failing version is.

4.1-unstable. In particular, 22641:4e108cf56d07 exhibits the bug, and I have not
found a version of 4.1 that doesn't (although I haven't searched for that in
particular). I could try to find other versions also exhibit this behavior, if
it would be useful to have a list.

Note for reproducing: at r22641, you need to revert 22402:7d2fdc083c9c or the
grant table of the HVM guest will not be readable by Xen. My test that exercises
the bug is an HVM-to-HVM grant.

>> +			 * until Xen supports fixing their p2m mapping.
>> +			 */
>> +			tmp = kmap(map->pages[i]);
>> +			*tmp = 0xdeaddead;
>> +			mb();
>> +			check = *tmp;
>> +			kunmap(map->pages[i]);
>> +			if (check == 0xdeaddead)
>>  				__free_page(map->pages[i]);
>> +			else
>> +				pr_debug("Discard page %d=%ld\n", i,
>> +					page_to_pfn(map->pages[i]));
>>  		}
>> +	}
>>  	kfree(map->pages);
>>  	kfree(map->grants);
>>  	kfree(map->map_ops);
>> @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>>  {
>>  	struct grant_map *map = data;
>>  	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
>> +	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
>>  	u64 pte_maddr;
>>  
>>  	BUG_ON(pgnr >= map->count);
>>  	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
>>  
>> -	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
>> -			  GNTMAP_contains_pte | map->flags,
>> +	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
>>  			  map->grants[pgnr].ref,
>>  			  map->grants[pgnr].domid);
>> -	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
>> -			    GNTMAP_contains_pte | map->flags,
>> +	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
>>  			    0 /* handle */);
>>  	return 0;
>>  }
>> @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
>>  static int map_grant_pages(struct grant_map *map)
>>  {
>>  	int i, err = 0;
>> +	phys_addr_t addr;
>> +
>> +	if (!use_ptemod) {
>> +		for (i = 0; i < map->count; i++) {
>> +			addr = (phys_addr_t)
>> +				pfn_to_kaddr(page_to_pfn(map->pages[i]));
>> +			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
>> +				map->grants[i].ref,
>> +				map->grants[i].domid);
>> +			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
>> +				map->flags, 0 /* handle */);
>> +		}
>> +	}
>>  
>>  	pr_debug("map %d+%d\n", map->index, map->count);
>>  	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
>> @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
>>  	gntdev_put_map(map);
>>  }
>>  
>> -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>> -{
>> -	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
>> -			vmf->virtual_address, vmf->pgoff);
>> -	vmf->flags = VM_FAULT_ERROR;
>> -	return 0;
>> -}
>> -
>>  static struct vm_operations_struct gntdev_vmops = {
>>  	.close = gntdev_vma_close,
>> -	.fault = gntdev_vma_fault,
>>  };
>>  
>>  /* ------------------------------------------------------------------ */
>> @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
>>  	INIT_LIST_HEAD(&priv->maps);
>>  	spin_lock_init(&priv->lock);
>>  
>> -	priv->mm = get_task_mm(current);
>> -	if (!priv->mm) {
>> -		kfree(priv);
>> -		return -ENOMEM;
>> +	if (use_ptemod) {
>> +		priv->mm = get_task_mm(current);
>> +		if (!priv->mm) {
>> +			kfree(priv);
>> +			return -ENOMEM;
>> +		}
>> +		priv->mn.ops = &gntdev_mmu_ops;
>> +		ret = mmu_notifier_register(&priv->mn, priv->mm);
>> +		mmput(priv->mm);
>>  	}
>> -	priv->mn.ops = &gntdev_mmu_ops;
>> -	ret = mmu_notifier_register(&priv->mn, priv->mm);
>> -	mmput(priv->mm);
>>  
>>  	if (ret) {
>>  		kfree(priv);
>> @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
>>  	}
>>  	spin_unlock(&priv->lock);
>>  
>> -	mmu_notifier_unregister(&priv->mn, priv->mm);
>> +	if (use_ptemod)
>> +		mmu_notifier_unregister(&priv->mn, priv->mm);
>>  	kfree(priv);
>>  	return 0;
>>  }
>> @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  	int index = vma->vm_pgoff;
>>  	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
>>  	struct grant_map *map;
>> -	int err = -EINVAL;
>> +	int i, err = -EINVAL;
>>  
>>  	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
>>  		return -EINVAL;
>> @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  	map = gntdev_find_map_index(priv, index, count);
>>  	if (!map)
>>  		goto unlock_out;
>> -	if (map->vma)
>> +	if (use_ptemod && map->vma)
>>  		goto unlock_out;
>> -	if (priv->mm != vma->vm_mm) {
>> +	if (use_ptemod && priv->mm != vma->vm_mm) {
>>  		printk(KERN_WARNING "Huh? Other mm?\n");
>>  		goto unlock_out;
>>  	}
>> @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
>>  
>>  	vma->vm_private_data = map;
>> -	map->vma = vma;
>>  
>> -	map->flags = GNTMAP_host_map | GNTMAP_application_map;
>> +	if (use_ptemod)
>> +		map->vma = vma;
>> +
>> +	map->flags = GNTMAP_host_map;
>>  	if (!(vma->vm_flags & VM_WRITE))
>>  		map->flags |= GNTMAP_readonly;
>>  
>>  	spin_unlock(&priv->lock);
>>  
>> -	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
>> -				  vma->vm_end - vma->vm_start,
>> -				  find_grant_ptes, map);
>> -	if (err) {
>> -		printk(KERN_WARNING "find_grant_ptes() failure.\n");
>> -		return err;
>> +	if (use_ptemod) {
>> +		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
>> +					  vma->vm_end - vma->vm_start,
>> +					  find_grant_ptes, map);
>> +		if (err) {
>> +			printk(KERN_WARNING "find_grant_ptes() failure.\n");
>> +			return err;
>> +		}
>>  	}
>>  
>>  	err = map_grant_pages(map);
>> @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
>>  
>>  	map->is_mapped = 1;
>>  
>> +	if (!use_ptemod) {
>> +		for (i = 0; i < count; i++) {
>> +			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
>> +				map->pages[i]);
>> +			if (err)
>> +				return err;
>> +		}
>> +	}
>> +
>>  	return 0;
>>  
>>  unlock_out:
>> @@ -595,6 +640,8 @@ static int __init gntdev_init(void)
>>  	if (!xen_domain())
>>  		return -ENODEV;
>>  
>> +	use_ptemod = xen_pv_domain();
>> +
>>  	err = misc_register(&gntdev_miscdev);
>>  	if (err != 0) {
>>  		printk(KERN_ERR "Could not register gntdev device\n");
>> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
>> index 9ef54eb..9428ced 100644
>> --- a/drivers/xen/grant-table.c
>> +++ b/drivers/xen/grant-table.c
>> @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
>>  	if (ret)
>>  		return ret;
>>  
>> +	if (xen_feature(XENFEAT_auto_translated_physmap))
>> +		return ret;
>> +
>>  	for (i = 0; i < count; i++) {
>>  		/* m2p override only supported for GNTMAP_contains_pte mappings */
>>  		if (!(map_ops[i].flags & GNTMAP_contains_pte))
>> @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
>>  	if (ret)
>>  		return ret;
>>  
>> +	if (xen_feature(XENFEAT_auto_translated_physmap))
>> +		return ret;
>> +
>>  	for (i = 0; i < count; i++) {
>>  		ret = m2p_remove_override(pages[i]);
>>  		if (ret)
>> -- 
>> 1.7.3.4

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-01-27 19:23     ` Konrad Rzeszutek Wilk
@ 2011-01-27 19:51       ` Daniel De Graaf
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-27 19:51 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

[-- Attachment #1: Type: text/plain, Size: 4320 bytes --]

On 01/27/2011 02:23 PM, Konrad Rzeszutek Wilk wrote:
> On Thu, Jan 27, 2011 at 01:52:10PM -0500, Konrad Rzeszutek Wilk wrote:
>> On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote:
>>> This allows a userspace application to allocate a shared page for
>>> implementing inter-domain communication or device drivers. These
>>> shared pages can be mapped using the gntdev device or by the kernel
>>> in another domain.
>>>
>>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>>> ---
>>>  drivers/xen/Kconfig    |    8 +
>>>  drivers/xen/Makefile   |    2 +
>>>  drivers/xen/gntalloc.c |  477 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>  include/xen/gntalloc.h |   70 +++++++
>>>  4 files changed, 557 insertions(+), 0 deletions(-)
>>>  create mode 100644 drivers/xen/gntalloc.c
>>>  create mode 100644 include/xen/gntalloc.h
>>>
>>> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
>>> index 07bec09..a3d7afb 100644
>>> --- a/drivers/xen/Kconfig
>>> +++ b/drivers/xen/Kconfig
>>> @@ -80,6 +80,14 @@ config XEN_GNTDEV
>>>  	help
>>>  	  Allows userspace processes to use grants.
>>>  
>>> +config XEN_GRANT_DEV_ALLOC
>>> +	tristate "User-space grant reference allocator driver"
>>> +	depends on XEN
>>> +	help
>>> +	  Allows userspace processes to create pages with access granted
>>> +	  to other domains. This can be used to implement frontend drivers
>>> +	  or as part of an inter-domain shared memory channel.
> 
> 
>>> +
>>>  config XEN_PLATFORM_PCI
>>>  	tristate "xen platform pci device driver"
>>>  	depends on XEN_PVHVM && PCI
>>> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
>>> index 5088cc2..9585a1d 100644
>>> --- a/drivers/xen/Makefile
>>> +++ b/drivers/xen/Makefile
>>> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
>>>  obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
>>>  obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
>>>  obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
>>> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>>>  obj-$(CONFIG_XENFS)		+= xenfs/
>>>  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
>>>  obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
>>> @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
>>>  
>>>  xen-evtchn-y			:= evtchn.o
>>>  xen-gntdev-y				:= gntdev.o
>>> +xen-gntalloc-y				:= gntalloc.o
>>>  
>>>  xen-platform-pci-y		:= platform-pci.o
>>> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
>>> new file mode 100644
>>> index 0000000..a230dc4
>>> --- /dev/null
>>> +++ b/drivers/xen/gntalloc.c
>>> @@ -0,0 +1,477 @@
>>> +/******************************************************************************
>>> + * gntalloc.c
>>> + *
>>> + * Device for creating grant references (in user-space) that may be shared
>>> + * with other domains.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
>>> + * along with this program; if not, write to the Free Software
>>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>>> + */
>>> +
>>> +/*
>>> + * This driver exists to allow userspace programs in Linux to allocate kernel
>>> + * memory that will later be shared with another domain.  Without this device,
>>> + * Linux userspace programs cannot create grant references.
> 
> Why can't this be done in gntdev? Is there a simple test program for this?
> 

I didn't do this in gntdev so that it would be possible to allow programs to map
grants without also giving them rights to allocate (or vice versa). The actions
taken on the pages are also different (gntdev remaps them, gntalloc just shares)
so much of the surrounding map/unmap logic is different.

If it's preferred, I could try to add the ioctls to gntdev; there is some overlap
in functionality, especially when using the HVM code in gntdev. The limits for
gntalloc need to remain different, because a page allocated by gntalloc is
impossible to reclaim using the OOM killer (unlike pages allocated by gntdev).

Simple test program (for gntdev and gntalloc) attached.

-- 
Daniel De Graaf
National Security Agency

[-- Attachment #2: test_gnt.c --]
[-- Type: text/plain, Size: 5181 bytes --]

#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
struct ioctl_gntdev_grant_ref {
	/* The domain ID of the grant to be mapped. */
	uint32_t domid;
	/* The grant reference of the grant to be mapped. */
	uint32_t ref;
};

/*
 * Allocates a new page and creates a new grant reference.
 */
#define IOCTL_GNTALLOC_ALLOC_GREF \
_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
struct ioctl_gntalloc_alloc_gref {
	/* IN parameters */
	/* The ID of the domain to be given access to the grants. */
	uint16_t domid;
	/* Flags for this mapping */
	uint16_t flags;
	/* Number of pages to map */
	uint32_t count;
	/* OUT parameters */
	/* The offset to be used on a subsequent call to mmap(). */
	uint64_t index;
	/* The grant references of the newly created grant, one per page */
	/* Variable size, depending on count */
	uint32_t gref_ids[1];
};

#define GNTALLOC_FLAG_WRITABLE 1

/*
 * Deallocates the grant reference, allowing the associated page to be freed if
 * no other domains are using it.
 */
#define IOCTL_GNTALLOC_DEALLOC_GREF \
_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
struct ioctl_gntalloc_dealloc_gref {
	/* IN parameters */
	/* The offset returned in the map operation */
	uint64_t index;
	/* Number of references to unmap */
	uint32_t count;
};

#define IOCTL_GNTDEV_MAP_GRANT_REF \
_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
struct ioctl_gntdev_map_grant_ref {
    /* IN parameters */
    /* The number of grants to be mapped. */
    uint32_t count;
    uint32_t pad;
    /* OUT parameters */
    /* The offset to be used on a subsequent call to mmap(). */
    uint64_t index;
    /* Variable IN parameter. */
    /* Array of grant references, of size @count. */
    struct ioctl_gntdev_grant_ref refs[1];
};
#define GNTDEV_MAP_WRITABLE 0x1

#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
struct ioctl_gntdev_unmap_grant_ref {
    /* IN parameters */
    /* The offset was returned by the corresponding map operation. */
    uint64_t index;
    /* The number of pages to be unmapped. */
    uint32_t count;
    uint32_t pad;
};


int a_fd;
int d_fd;

struct data {
	uint64_t* mem;
	int handle;
} items[128];

void sa(int id) {
	struct ioctl_gntalloc_alloc_gref arg = {
		.domid = id,
		.flags = GNTALLOC_FLAG_WRITABLE,
		.count = 1
	};
	int rv = ioctl(a_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg);
	if (rv)
		printf("src-add error: %s (rv=%d)\n", strerror(errno), rv);
	else {
		int i=0;
		while (items[i].mem) i++;
		items[i].mem = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, a_fd, arg.index);
		items[i].handle = arg.index;
		printf("src-add mapped %d at %d=%ld\n", arg.gref_ids[0], arg.index, items[i].mem);
	}
}

void sd(int ref) {
	struct ioctl_gntalloc_dealloc_gref arg = {
		.index = ref,
		.count = 1
	};

	int rv = ioctl(a_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
	if (rv)
		printf("src-del error: %s (rv=%d)\n", strerror(errno), rv);
}

void mm(int domid, int refid) {
	struct ioctl_gntdev_map_grant_ref arg = {
		.count = 1,
		.refs[0].domid = domid,
		.refs[0].ref = refid,
	};
	int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
	if (rv)
		printf("mm error: %s (rv=%d)\n", strerror(errno), rv);
	else {
		int i=0;
		while (items[i].mem) i++;
		items[i].mem = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, d_fd, arg.index);
		items[i].handle = arg.index;
		printf("mapped index %d at %ld\n", arg.index, items[i].mem);
	}
}

void gu(int index) {
	struct ioctl_gntdev_unmap_grant_ref arg = {
		.index = index,
		.count = 1,
	};
	int rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
	if (rv)
		printf("gu error: %s (rv=%d)\n", strerror(errno), rv);
}

void mu(void* addr) {
	int i = 0;
	munmap(addr, 4096);
	while (i < 128)
	{
		if (items[i].mem == addr)
			items[i].mem = 0;
		i++;
	}
}

int bump;

void show() {
	int i;
	for(i=0; i < 128; i++) {
		if (!items[i].mem)
			continue;
		uint64_t val = items[i].mem[0];
		uint64_t repl = val + bump;
		printf("%02d(%ld,%d): current %16lx   new %16lx\n", i, items[i].mem, items[i].handle, val, repl);
		items[i].mem[0] = repl;
	}
	printf("END\n");
}

int main(int argc, char** argv) {
	a_fd = open("/dev/xen/gntalloc", O_RDWR);
	d_fd = open("/dev/xen/gntdev", O_RDWR);
	bump = 1 << (2 * (getpid() % 12));
	printf(
		"src-add <domid>       return gntref, address\n"
		"map <domid> <ref>     return index, address\n"
		"src-del <gntref>      no rv\n"
		"gu <index>            no rv\n"
		"unmap <address>       no rv\n"
		"show                  print and change mapped items\n"
		" This process bumps by %x\n",
		bump
	);
	while (1) {
		char line[80];
		char word[30];
		long a, b;
		printf("\n> ");
		fflush(stdout);
		fgets(line, 80, stdin);
		sscanf(line, "%s %ld %ld", word, &a, &b);
		if (!strcmp(word, "src-add")) {
			sa(a);
		} else if (!strcmp(word, "src-del")) {
			sd(a);
		} else if (!strcmp(word, "map")) {
			mm(a, b);
		} else if (!strcmp(word, "gu")) {
			gu(a);
		} else if (!strcmp(word, "unmap")) {
			mu((void*)a);
		} else {
			show();
		}
	}
}

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
  2011-01-27 19:20   ` Konrad Rzeszutek Wilk
@ 2011-01-27 20:09     ` Daniel De Graaf
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-27 20:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 01/27/2011 02:20 PM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 21, 2011 at 10:59:08AM -0500, Daniel De Graaf wrote:
>> This ioctl allows the users of a shared page to be notified when
>> the other end exits abnormally.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/gntalloc.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++
>>  drivers/xen/gntdev.c   |   56 +++++++++++++++++++++++++++++++++++++++++++++++-
>>  include/xen/gntalloc.h |   28 ++++++++++++++++++++++++
>>  include/xen/gntdev.h   |   27 +++++++++++++++++++++++
>>  4 files changed, 165 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
>> index a230dc4..8ac0dfc 100644
>> --- a/drivers/xen/gntalloc.c
>> +++ b/drivers/xen/gntalloc.c
>> @@ -60,11 +60,13 @@
>>  #include <linux/uaccess.h>
>>  #include <linux/types.h>
>>  #include <linux/list.h>
>> +#include <linux/highmem.h>
>>  
>>  #include <xen/xen.h>
>>  #include <xen/page.h>
>>  #include <xen/grant_table.h>
>>  #include <xen/gntalloc.h>
>> +#include <xen/events.h>
>>  
>>  static int limit = 1024;
>>  module_param(limit, int, 0644);
>> @@ -83,6 +85,9 @@ struct gntalloc_gref {
>>  	uint64_t file_index;         /* File offset for mmap() */
>>  	unsigned int users;          /* Use count - when zero, waiting on Xen */
>>  	grant_ref_t gref_id;         /* The grant reference number */
>> +	unsigned notify_flags:2;     /* Unmap notification flags */
> 
> Can you add to the comment: bits 0-1
>> +	unsigned notify_pgoff:12;    /* Offset of the byte to clear */
>                                               ^^ in the page
> And "bits 2-14.".

OK, will do. I'm just using a bitfield here to save memory, so noting 
the exact bits didn't seem necessary.

> 
>> +	int notify_event;            /* Port (event channel) to notify */
>>  };
>>  
>>  struct gntalloc_file_private_data {
>> @@ -169,6 +174,16 @@ undo:
>>  
>>  static void __del_gref(struct gntalloc_gref *gref)
>>  {
>> +	if (gref->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) {
>> +		uint8_t *tmp = kmap(gref->page);
>> +		tmp[gref->notify_pgoff] = 0;
>> +		kunmap(gref->page);
>> +	}
>> +	if (gref->notify_flags & UNMAP_NOTIFY_SEND_EVENT)
>> +		notify_remote_via_evtchn(gref->notify_event);
>> +
>> +	gref->notify_flags = 0;
>> +
>>  	if (gnttab_query_foreign_access(gref->gref_id))
>>  		return;
>>  
>> @@ -339,6 +354,43 @@ dealloc_grant_out:
>>  	return rc;
>>  }
>>  
>> +static long gntalloc_ioctl_notify(struct gntalloc_file_private_data *priv,
>> +		void __user *arg)
> 
> Call it '..ioctl_unmap_notify'. When I read it first time I thought this
> would do just notify.. while in actuality it can do both.

It's not doing anything else besides adding/removing the notification,
but it does do the notify only on unmap or delete, so that's a good name.

>> +{
>> +	struct ioctl_gntalloc_unmap_notify op;
>> +	struct gntalloc_gref *gref;
>> +	uint64_t index;
>> +	int pgoff;
>> +	int rc;
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op)))
>> +		return -EFAULT;
>> +
>> +	index = op.index & ~(PAGE_SIZE - 1);
>> +	pgoff = op.index & (PAGE_SIZE - 1);
> 
> You don't want to tell the user if the messed up? Say 0xdeadf00d?

I'm not sure how the user would mess up without notification: either it will be
a valid offset, or find_grefs will fail and return -ENOENT.

>> +
>> +	spin_lock(&gref_lock);
>> +
>> +	gref = find_grefs(priv, index, 1);
>> +	if (!gref) {
>> +		rc = -ENOENT;
>> +		goto unlock_out;
>> +	}
>> +
>> +	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
>> +		rc = -EINVAL;
>> +		goto unlock_out;
>> +	}
>> +
>> +	gref->notify_flags = op.action;
>> +	gref->notify_pgoff = pgoff;
>> +	gref->notify_event = op.event_channel_port;
>> +	rc = 0;
>> + unlock_out:
>> +	spin_unlock(&gref_lock);
>> +	return rc;
>> +}
>> +
>>  static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
>>  		unsigned long arg)
>>  {
>> @@ -351,6 +403,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
>>  	case IOCTL_GNTALLOC_DEALLOC_GREF:
>>  		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
>>  
>> +	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
>> +		return gntalloc_ioctl_notify(priv, (void __user *)arg);
>> +
>>  	default:
>>  		return -ENOIOCTLCMD;
>>  	}
>> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
>> index a5710e8..e360d0a 100644
>> --- a/drivers/xen/gntdev.c
>> +++ b/drivers/xen/gntdev.c
>> @@ -37,6 +37,7 @@
>>  #include <xen/xen.h>
>>  #include <xen/grant_table.h>
>>  #include <xen/gntdev.h>
>> +#include <xen/events.h>
>>  #include <asm/xen/hypervisor.h>
>>  #include <asm/xen/hypercall.h>
>>  #include <asm/xen/page.h>
>> @@ -70,6 +71,9 @@ struct grant_map {
>>  	int count;
>>  	int flags;
>>  	int is_mapped;
>> +	int notify_flags;
>> +	int notify_offset;
>> +	int notify_event;
> 
> Would it make sense to put these inside a struct?

Yes, they're related enough. I'll do that in the next revision.

>>  	atomic_t users;
>>  	struct ioctl_gntdev_grant_ref *grants;
>>  	struct gnttab_map_grant_ref   *map_ops;
>> @@ -165,7 +169,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
>>  	list_for_each_entry(map, &priv->maps, next) {
>>  		if (map->index != index)
>>  			continue;
>> -		if (map->count != count)
>> +		if (count && map->count != count)
>>  			continue;
>>  		return map;
>>  	}
>> @@ -184,6 +188,10 @@ static void gntdev_put_map(struct grant_map *map)
>>  
>>  	atomic_sub(map->count, &pages_mapped);
>>  
>> +	if (map->notify_flags & UNMAP_NOTIFY_SEND_EVENT) {
>> +		notify_remote_via_evtchn(map->notify_event);
>> +	}
>> +
>>  	if (map->pages) {
>>  		if (!use_ptemod)
>>  			unmap_grant_pages(map, 0, map->count);
>> @@ -272,6 +280,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
>>  {
>>  	int i, err = 0;
>>  
>> +	if (map->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) {
>> +		int pgno = (map->notify_offset >> PAGE_SHIFT);
>> +		if (pgno >= offset && pgno < pages + offset) {
> 
> Whoa, that looks to violate what 'notify_offset' actually means.
> Perhaps a better name for that variable? notify_addr?

It's not an absolute address within the device, just within the multi-page
structure pointed to by map. notify_addr is also a good name, if you think
that it makes it clearer.

>> +			uint8_t *tmp = kmap(map->pages[pgno]);
>> +			tmp[map->notify_offset & (PAGE_SIZE-1)] = 0;
>> +			kunmap(map->pages[pgno]);
>> +			map->notify_flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
>> +		}
>> +	}
>> +
>>  	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
>>  	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
>>  	if (err)
>> @@ -517,6 +535,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
>>  	return 0;
>>  }
>>  
>> +static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
> 
>> +{
>> +	struct ioctl_gntdev_unmap_notify op;
>> +	struct grant_map *map;
>> +	int rc;
>> +
>> +	if (copy_from_user(&op, u, sizeof(op)))
>> +		return -EFAULT;
>> +
>> +	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
>> +		return -EINVAL;
>> +
>> +	spin_lock(&priv->lock);
>> +
>> +	list_for_each_entry(map, &priv->maps, next) {
>> +		uint64_t begin = map->index << PAGE_SHIFT;
>> +		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
>> +		if (op.index >= begin && op.index < end)
>> +			goto found;
>> +	}
>> +	rc = -ENOENT;
>> +	goto unlock_out;
>> +
>> + found:
>> +	map->notify_flags = op.action;
>> +	map->notify_offset = op.index - (map->index << PAGE_SHIFT);
> 
> Minus? So say the index is 0xdeadbeef, wouldn't this throw this off?
 
If op.index = 0xdeadbeef and map->index = 0xdead9, then we want 0x2eef
to be the offset.

> Should you also check the index to make sure it is within a PAGE_SIZE
> offset? (the ioctl accepts 64-bit values for the index).

If we get here, the offset is within the multi-page entry pointed to by
map. It is valid to have notify_offset > PAGE_SIZE if map->count > 1; in
this case, the notify points to a byte on the 2nd or later page.

>> +	map->notify_event = op.event_channel_port;
>> +	rc = 0;
>> + unlock_out:
>> +	spin_unlock(&priv->lock);
>> +	return rc;
>> +}
>> +
>>  static long gntdev_ioctl(struct file *flip,
>>  			 unsigned int cmd, unsigned long arg)
>>  {
>> @@ -533,6 +584,9 @@ static long gntdev_ioctl(struct file *flip,
>>  	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
>>  		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
>>  
>> +	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
>> +		return gntdev_ioctl_notify(priv, ptr);
>> +
>>  	default:
>>  		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
>>  		return -ENOIOCTLCMD;
>> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
>> index e1d6d0f..92339f5 100644
>> --- a/include/xen/gntalloc.h
>> +++ b/include/xen/gntalloc.h
>> @@ -67,4 +67,32 @@ struct ioctl_gntalloc_dealloc_gref {
>>  	/* Number of references to unmap */
>>  	uint32_t count;
>>  };
>> +
>> +/*
>> + * Sets up an unmap notification within the page, so that the other side can do
>> + * cleanup if this side crashes. Required to implement cross-domain robust
>> + * mutexes or close notification on communication channels.
>> + *
>> + * Each mapped page only supports one notification; multiple calls referring to
>> + * the same page overwrite the previous notification. You must clear the
>> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
>> + * to occur.
>> + */
>> +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
>> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
>> +struct ioctl_gntalloc_unmap_notify {
>> +	/* IN parameters */
>> +	/* Index of a byte in the page */
>> +	uint64_t index;
>> +	/* Action(s) to take on unmap */
>> +	uint32_t action;
>> +	/* Event channel to notify */
>> +	uint32_t event_channel_port;
>> +};
>> +
>> +/* Clear (set to zero) the byte specified by index */
>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
>> +/* Send an interrupt on the indicated event channel */
>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
>> +
>>  #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
>> diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
>> index eb23f41..5d9b9b4 100644
>> --- a/include/xen/gntdev.h
>> +++ b/include/xen/gntdev.h
>> @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants {
>>  	uint32_t count;
>>  };
>>  
>> +/*
>> + * Sets up an unmap notification within the page, so that the other side can do
>> + * cleanup if this side crashes. Required to implement cross-domain robust
>> + * mutexes or close notification on communication channels.
>> + *
>> + * Each mapped page only supports one notification; multiple calls referring to
>> + * the same page overwrite the previous notification. You must clear the
>> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
>> + * to occur.
>> + */
>> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
>> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
>> +struct ioctl_gntdev_unmap_notify {
>> +	/* IN parameters */
>> +	/* Index of a byte in the page */
>> +	uint64_t index;
>> +	/* Action(s) to take on unmap */
>> +	uint32_t action;
>> +	/* Event channel to notify */
>> +	uint32_t event_channel_port;
>> +};
>> +
>> +/* Clear (set to zero) the byte specified by index */
>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
>> +/* Send an interrupt on the indicated event channel */
>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
>> +
>>  #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
>> -- 
>> 1.7.3.4
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-27 19:23     ` Konrad Rzeszutek Wilk
@ 2011-01-27 20:55     ` Daniel De Graaf
  2011-01-27 21:29       ` Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2011-01-27 20:55 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 01/27/2011 01:52 PM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote:
>> This allows a userspace application to allocate a shared page for
>> implementing inter-domain communication or device drivers. These
>> shared pages can be mapped using the gntdev device or by the kernel
>> in another domain.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/Kconfig    |    8 +
>>  drivers/xen/Makefile   |    2 +
>>  drivers/xen/gntalloc.c |  477 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/xen/gntalloc.h |   70 +++++++
>>  4 files changed, 557 insertions(+), 0 deletions(-)
>>  create mode 100644 drivers/xen/gntalloc.c
>>  create mode 100644 include/xen/gntalloc.h
>>
>> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
>> index 07bec09..a3d7afb 100644
>> --- a/drivers/xen/Kconfig
>> +++ b/drivers/xen/Kconfig
>> @@ -80,6 +80,14 @@ config XEN_GNTDEV
>>  	help
>>  	  Allows userspace processes to use grants.
>>  
>> +config XEN_GRANT_DEV_ALLOC
>> +	tristate "User-space grant reference allocator driver"
>> +	depends on XEN
>> +	help
>> +	  Allows userspace processes to create pages with access granted
>> +	  to other domains. This can be used to implement frontend drivers
>> +	  or as part of an inter-domain shared memory channel.
>> +
>>  config XEN_PLATFORM_PCI
>>  	tristate "xen platform pci device driver"
>>  	depends on XEN_PVHVM && PCI
>> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
>> index 5088cc2..9585a1d 100644
>> --- a/drivers/xen/Makefile
>> +++ b/drivers/xen/Makefile
>> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
>>  obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
>>  obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
>>  obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
>> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>>  obj-$(CONFIG_XENFS)		+= xenfs/
>>  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
>>  obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
>> @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
>>  
>>  xen-evtchn-y			:= evtchn.o
>>  xen-gntdev-y				:= gntdev.o
>> +xen-gntalloc-y				:= gntalloc.o
>>  
>>  xen-platform-pci-y		:= platform-pci.o
>> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
>> new file mode 100644
>> index 0000000..a230dc4
>> --- /dev/null
>> +++ b/drivers/xen/gntalloc.c
>> @@ -0,0 +1,477 @@
>> +/******************************************************************************
>> + * gntalloc.c
>> + *
>> + * Device for creating grant references (in user-space) that may be shared
>> + * with other domains.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>> + */
>> +
>> +/*
>> + * This driver exists to allow userspace programs in Linux to allocate kernel
>> + * memory that will later be shared with another domain.  Without this device,
>> + * Linux userspace programs cannot create grant references.
>> + *
>> + * How this stuff works:
>> + *   X -> granting a page to Y
>> + *   Y -> mapping the grant from X
>> + *
>> + *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
>> + *   2. X creates an entry in the grant table that says domid(Y) can access P.
>> + *      This is done without a hypercall unless the grant table needs expansion.
>> + *   3. X gives the grant reference identifier, GREF, to Y.
>> + *   4. Y maps the page, either directly into kernel memory for use in a backend
>> + *      driver, or via a the gntdev device to map into the address space of an
>> + *      application running in Y. This is the first point at which Xen does any
>> + *      tracking of the page.
>> + *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
>> + *      to the shared page, and can now communicate with Y over the shared page.
>> + *
>> + *
>> + * NOTE TO USERSPACE LIBRARIES:
>> + *   The grant allocation and mmap()ing are, naturally, two separate operations.
>> + *   You set up the sharing by calling the create ioctl() and then the mmap().
>> + *   Teardown requires munmap() and either close() or ioctl().
>> + *
>> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
>> + * reference, this device can be used to consume kernel memory by leaving grant
>> + * references mapped by another domain when an application exits. Therefore,
>> + * there is a global limit on the number of pages that can be allocated. When
>> + * all references to the page are unmapped, it will be freed during the next
>> + * grant operation.
>> + */
>> +
>> +#include <linux/atomic.h>
>> +#include <linux/module.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/kernel.h>
>> +#include <linux/init.h>
>> +#include <linux/slab.h>
>> +#include <linux/fs.h>
>> +#include <linux/device.h>
>> +#include <linux/mm.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/types.h>
>> +#include <linux/list.h>
>> +
>> +#include <xen/xen.h>
>> +#include <xen/page.h>
>> +#include <xen/grant_table.h>
>> +#include <xen/gntalloc.h>
>> +
>> +static int limit = 1024;
>> +module_param(limit, int, 0644);
>> +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
>> +		"the gntalloc device");
>> +
>> +static LIST_HEAD(gref_list);
>> +static DEFINE_SPINLOCK(gref_lock);
>> +static int gref_size;
>> +
>> +/* Metadata on a grant reference. */
>> +struct gntalloc_gref {
>> +	struct list_head next_all;   /* list entry gref_list */
> 
> How about 'next_gref' ?

Seems like a good name. Will change.

>> +	struct list_head next_file;  /* list entry file->list, if open */
>> +	struct page *page;	     /* The shared page */
>> +	uint64_t file_index;         /* File offset for mmap() */
>> +	unsigned int users;          /* Use count - when zero, waiting on Xen */
>> +	grant_ref_t gref_id;         /* The grant reference number */
>> +};
>> +
>> +struct gntalloc_file_private_data {
>> +	struct list_head list;
>> +	uint64_t index;
>> +};
>> +
>> +static void __del_gref(struct gntalloc_gref *gref);
>> +
>> +static void do_cleanup(void)
>> +{
>> +	struct gntalloc_gref *gref, *n;
>> +	list_for_each_entry_safe(gref, n, &gref_list, next_all) {
>> +		if (!gref->users)
>> +			__del_gref(gref);
>> +	}
>> +}
>> +
>> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
>> +	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
>> +{
>> +	int i, rc, readonly;
>> +	LIST_HEAD(queue_all);
>> +	LIST_HEAD(queue_file);
>> +	struct gntalloc_gref *gref;
>> +
>> +	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
>> +	rc = -ENOMEM;
>> +	for (i = 0; i < op->count; i++) {
>> +		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
>> +		if (!gref)
>> +			goto undo;
>> +		list_add_tail(&gref->next_all, &queue_all);
>> +		list_add_tail(&gref->next_file, &queue_file);
>> +		gref->users = 1;
>> +		gref->file_index = op->index + i * PAGE_SIZE;
>> +		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>> +		if (!gref->page)
>> +			goto undo;
>> +
>> +		/* Grant foreign access to the page. */
>> +		gref->gref_id = gnttab_grant_foreign_access(op->domid,
>> +			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
>> +		if (gref->gref_id < 0) {
>> +			rc = gref->gref_id;
>> +			goto undo;
>> +		}
>> +		gref_ids[i] = gref->gref_id;
>> +	}
>> +
>> +	/* Add to gref lists. */
>> +	spin_lock(&gref_lock);
>> +	list_splice_tail(&queue_all, &gref_list);
>> +	list_splice_tail(&queue_file, &priv->list);
>> +	spin_unlock(&gref_lock);
>> +
>> +	return 0;
>> +
>> +undo:
>> +	list_for_each_entry(gref, &queue_file, next_file) {
>> +		if (gref->page && gref->gref_id >= 0) {
>> +			__del_gref(gref);
>> +		} else {
>> +			if (gref->page)
>> +				__free_page(gref->page);
>> +			list_del(&gref->next_all);
> 
> Ohh, you are actually deleting them from queue_all, ah.
> Can you add a comment about that since the first time I read this
> I thought you were iterating over the queue_file and deleting
> from _that_ list. Tricky..
>
> Can collapse this deleting into __del_gref? Looks like
> you should just preface the gnttab with "if gref->page && gref->gref_id"
> and also the gref_size, and the "if (gref->page)" before deleting?

Yes, this can be collapsed into __del_gref. That will probably clarify
the deletion, but I'll stick a comment about __del_gref removing from
queue_all and not queue_file so it's clearer what will/won't break.
 
> 
>> +			kfree(gref);
>> +		}
>> +	}
>> +
>> +	/* It's possible for the target domain to map the just-allocated grant
>> +	 * references by blindly guessing their IDs; if this is done, then
>> +	 * __del_gref will leave them in the queue_all list. They need to be
>> +	 * added to the global list so that we can free them when they are no
>> +	 * longer referenced.
>> +	 */
>> +	if (unlikely(!list_empty(&queue_all))) {
>> +		spin_lock(&gref_lock);
>> +		list_splice_tail(&queue_all, &gref_list);
>> +		spin_unlock(&gref_lock);
>> +	}
>> +	return rc;
>> +}
>> +
>> +static void __del_gref(struct gntalloc_gref *gref)
>> +{
>> +	if (gnttab_query_foreign_access(gref->gref_id))
>> +		return;
>> +
>> +	if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
>> +		return;
>> +
>> +	gref_size--;
>> +	list_del(&gref->next_all);
>> +
>> +	__free_page(gref->page);
>> +	kfree(gref);
>> +}
>> +
>> +/* finds contiguous grant references in a file, returns the first */
>> +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
>> +		uint64_t index, uint32_t count)
>> +{
>> +	struct gntalloc_gref *rv = NULL, *gref;
>> +	list_for_each_entry(gref, &priv->list, next_file) {
>> +		if (gref->file_index == index && !rv)
>> +			rv = gref;
>> +		if (rv) {
>> +			if (gref->file_index != index)
>> +				return NULL;
>> +			index += PAGE_SIZE;
>> +			count--;
>> +			if (count == 0)
>> +				return rv;
>> +		}
>> +	}
>> +	return NULL;
>> +}
>> +
>> +/*
>> + * -------------------------------------
>> + *  File operations.
>> + * -------------------------------------
>> + */
>> +static int gntalloc_open(struct inode *inode, struct file *filp)
>> +{
>> +	struct gntalloc_file_private_data *priv;
>> +
>> +	try_module_get(THIS_MODULE);
> 
> No checking if it fails?

Actually, looking at it again, this seems redundant: won't open() itself
prevent rmmod of the module until everything is closed?

Regardless, while the failure seems unlikely it should be checked for.

>> +
>> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>> +	if (!priv)
>> +		goto out_nomem;
>> +	INIT_LIST_HEAD(&priv->list);
>> +
>> +	filp->private_data = priv;
>> +
>> +	pr_debug("%s: priv %p\n", __func__, priv);
>> +
>> +	return 0;
>> +
>> +out_nomem:
> 
> You should also do:
> 
> 	module_put(THIS_MODULE);
> 

Agreed, or just get rid of the module_get.

>> +	return -ENOMEM;
>> +}
>> +
>> +static int gntalloc_release(struct inode *inode, struct file *filp)
>> +{
>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>> +	struct gntalloc_gref *gref;
>> +
>> +	pr_debug("%s: priv %p\n", __func__, priv);
>> +
>> +	spin_lock(&gref_lock);
>> +	while (!list_empty(&priv->list)) {
>> +		gref = list_entry(priv->list.next,
>> +			struct gntalloc_gref, next_file);
> 
> 
>> +		list_del(&gref->next_file);
>> +		gref->users--;
>> +		if (gref->users == 0)
>> +			__del_gref(gref);
>> +	}
>> +	kfree(priv);
>> +	spin_unlock(&gref_lock);
>> +
>> +	module_put(THIS_MODULE);
>> +
>> +	return 0;
>> +}
>> +
>> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
>> +		struct ioctl_gntalloc_alloc_gref __user *arg)
>> +{
>> +	int rc = 0;
>> +	struct ioctl_gntalloc_alloc_gref op;
>> +	uint32_t *gref_ids;
>> +
>> +	pr_debug("%s: priv %p\n", __func__, priv);
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto out;
>> +	}
>> +
>> +	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
>> +	if (!gref_ids) {
>> +		rc = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	do_cleanup();
> 
> Hmm, why the cleanup here? I see it  gntalloc_ioctl_dealloc
> which makes sense since the users-- might have been decremented to zero.
> But here?

This is to clean up pages that were at zero (local) users but were still
mapped by remote domains. Since those pages count towards the limit that
we are about to enforce, it is a good idea to remove any pages that have
been unmapped by remote domains since last time we checked.

This would be much cleaner if Xen allowed a domain to force others to
unmap its pages, but that's a significant change to the semantics of
shared memory in the hypervisor.
 
>> +	if (gref_size + op.count > limit) {
>> +		spin_unlock(&gref_lock);
>> +		rc = -ENOSPC;
>> +		goto out_free;
>> +	}
>> +	gref_size += op.count;
>> +	op.index = priv->index;
>> +	priv->index += op.count * PAGE_SIZE;
>> +	spin_unlock(&gref_lock);
>> +
>> +	rc = add_grefs(&op, gref_ids, priv);
>> +	if (rc < 0)
>> +		goto out_free;
>> +
>> +	if (copy_to_user(arg, &op, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto out_free;
> 
> Not something that would clean the newly added grant? Say
> the code I suggested below the 'out' label.
> 

That races with a concurrent removal operation that has guessed
the offset we just added, and removed the gref. As soon as we unlock
gref_lock at the end of add_grefs, gref is unsafe to dereference.

This could be solved by a per-file lock, or by holding gref_lock
for longer, but the copy_to_user producing -EFAULT seemed unlikely
enough that forcing a close() seemed the better choice - especially
since the userspace application will be segfaulting soon if it is
trying to read the offsets.

>> +	}
>> +	if (copy_to_user(arg->gref_ids, gref_ids,
>> +			sizeof(gref_ids[0]) * op.count)) {
>> +		rc = -EFAULT;
>> +		goto out_free;
> 
> Ditto.
>> +	}
>> +
>> +out_free:
>> +	kfree(gref_ids);
>> +out:
>> +	return rc;
> 
> out_del_gref:
> 	gref->users--;
> 	__del_gref(gref);
> 
> 	goto out_free;
>> +}
>> +
>> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
>> +		void __user *arg)
>> +{
>> +	int i, rc = 0;
>> +	struct ioctl_gntalloc_dealloc_gref op;
>> +	struct gntalloc_gref *gref, *n;
>> +
>> +	pr_debug("%s: priv %p\n", __func__, priv);
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto dealloc_grant_out;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	gref = find_grefs(priv, op.index, op.count);
>> +	if (gref) {
>> +		for (i = 0; i < op.count; i++) {
>> +			n = list_entry(gref->next_file.next,
>> +				struct gntalloc_gref, next_file);
> 
> Might want to add a comment that you are deleting only from ->next_file
> b/c the do_cleanpup will take of deleting from the other list (that is
> if the users is at zero).

Right.
 
>> +			list_del(&gref->next_file);
>> +			gref->users--;
>> +			gref = n;
>> +		}
>> +	} else {
>> +		rc = -EINVAL;
>> +	}
>> +
>> +	do_cleanup();
>> +
>> +	spin_unlock(&gref_lock);
>> +dealloc_grant_out:
>> +	return rc;
>> +}
>> +
>> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
>> +		unsigned long arg)
>> +{
>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>> +
>> +	switch (cmd) {
>> +	case IOCTL_GNTALLOC_ALLOC_GREF:
>> +		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
>> +
>> +	case IOCTL_GNTALLOC_DEALLOC_GREF:
>> +		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
>> +
>> +	default:
>> +		return -ENOIOCTLCMD;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static void gntalloc_vma_close(struct vm_area_struct *vma)
>> +{
>> +	struct gntalloc_gref *gref = vma->vm_private_data;
>> +	if (!gref)
>> +		return;
>> +
>> +	spin_lock(&gref_lock);
>> +	gref->users--;
>> +	if (gref->users == 0)
>> +		__del_gref(gref);
> 
> I just want to be convienced here that I am wrong.
> 
> If the 'ioctl_deallo' has not been done, what will if unmap this VMA?
> Will it be OK to  yank the gref from gref_list while (and kfree it) while
> it is still referenced in the filp->private_data? Or would end up trying
> to derefence the *priv and go BOOM?

The VMA itself is unmapped regardless. The gref structure (and the pages
pointed to by the vma) is deallocated when the last reference goes away.
In your example, it would be on _release() of the file or a later dealloc
ioctl.

The only time __del_gref is called here is when the file has been closed
or the segment has already had ioctl_dealloc run on it.

>> +	spin_unlock(&gref_lock);
>> +}
>> +
>> +static struct vm_operations_struct gntalloc_vmops = {
>> +	.close = gntalloc_vma_close,
>> +};
>> +
>> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
>> +{
>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>> +	struct gntalloc_gref *gref;
>> +	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
>> +	int rv, i;
>> +
>> +	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
>> +		       priv, vma->vm_pgoff, count);
>> +
>> +	if (!(vma->vm_flags & VM_SHARED)) {
>> +		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
>> +		return -EINVAL;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
>> +	if (gref == NULL) {
>> +		rv = -ENOENT;
>> +		pr_debug("%s: Could not find grant reference",
>> +				__func__);
>> +		goto out_unlock;
>> +	}
>> +
>> +	vma->vm_private_data = gref;
>> +
>> +	vma->vm_flags |= VM_RESERVED;
>> +	vma->vm_flags |= VM_DONTCOPY;
>> +	vma->vm_flags |= VM_IO;
> 
> Hmm, I don't think you need the VM_IO.

Agreed, in fact it'll probably trigger warnings now that we force VM_IO
to imply pfn=mfn.

>> +	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
>> +
>> +	vma->vm_ops = &gntalloc_vmops;
>> +
>> +	for (i = 0; i < count; i++) {
>> +		gref->users++;
>> +		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
>> +				gref->page);
>> +		if (rv)
>> +			goto out_unlock;
>> +
>> +		gref = list_entry(gref->next_file.next,
>> +				struct gntalloc_gref, next_file);
>> +	}
>> +	rv = 0;
>> +
>> +out_unlock:
>> +	spin_unlock(&gref_lock);
>> +	return rv;
>> +}
>> +
>> +static const struct file_operations gntalloc_fops = {
>> +	.owner = THIS_MODULE,
>> +	.open = gntalloc_open,
>> +	.release = gntalloc_release,
>> +	.unlocked_ioctl = gntalloc_ioctl,
>> +	.mmap = gntalloc_mmap
>> +};
>> +
>> +/*
>> + * -------------------------------------
>> + * Module creation/destruction.
>> + * -------------------------------------
>> + */
>> +static struct miscdevice gntalloc_miscdev = {
>> +	.minor	= MISC_DYNAMIC_MINOR,
>> +	.name	= "xen/gntalloc",
>> +	.fops	= &gntalloc_fops,
>> +};
>> +
>> +static int __init gntalloc_init(void)
>> +{
>> +	int err;
>> +
>> +	if (!xen_domain())
>> +		return -ENODEV;
>> +
>> +	err = misc_register(&gntalloc_miscdev);
>> +	if (err != 0) {
>> +		printk(KERN_ERR "Could not register misc gntalloc device\n");
>> +		return err;
>> +	}
>> +
>> +	pr_debug("Created grant allocation device at %d,%d\n",
>> +			MISC_MAJOR, gntalloc_miscdev.minor);
>> +
>> +	return 0;
>> +}
>> +
>> +static void __exit gntalloc_exit(void)
>> +{
>> +	misc_deregister(&gntalloc_miscdev);
>> +}
>> +
>> +module_init(gntalloc_init);
>> +module_exit(gntalloc_exit);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
>> +		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
>> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
>> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
>> new file mode 100644
>> index 0000000..e1d6d0f
>> --- /dev/null
>> +++ b/include/xen/gntalloc.h
>> @@ -0,0 +1,70 @@
>> +/******************************************************************************
>> + * gntalloc.h
>> + *
> 
> Copyright?

Public domain. The GPL text will need to be removed.

>> + * Interface to /dev/xen/gntalloc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License version 2
>> + * as published by the Free Software Foundation; or, when distributed
>> + * separately from the Linux kernel or incorporated into other
>> + * software packages, subject to the following license:
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this source file (the "Software"), to deal in the Software without
>> + * restriction, including without limitation the rights to use, copy, modify,
>> + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
>> + * and to permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
>> +#define __LINUX_PUBLIC_GNTALLOC_H__
>> +
>> +/*
>> + * Allocates a new page and creates a new grant reference.
>> + */
>> +#define IOCTL_GNTALLOC_ALLOC_GREF \
>> +_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
>> +struct ioctl_gntalloc_alloc_gref {
>> +	/* IN parameters */
>> +	/* The ID of the domain to be given access to the grants. */
>> +	uint16_t domid;
>> +	/* Flags for this mapping */
>> +	uint16_t flags;
>> +	/* Number of pages to map */
>> +	uint32_t count;
>> +	/* OUT parameters */
>> +	/* The offset to be used on a subsequent call to mmap(). */
>> +	uint64_t index;
>> +	/* The grant references of the newly created grant, one per page */
>> +	/* Variable size, depending on count */
>> +	uint32_t gref_ids[1];
>> +};
>> +
>> +#define GNTALLOC_FLAG_WRITABLE 1
>> +
>> +/*
>> + * Deallocates the grant reference, allowing the associated page to be freed if
>> + * no other domains are using it.
>> + */
>> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
>> +_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
>> +struct ioctl_gntalloc_dealloc_gref {
>> +	/* IN parameters */
>> +	/* The offset returned in the map operation */
>> +	uint64_t index;
>> +	/* Number of references to unmap */
>> +	uint32_t count;
>> +};
>> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
>> -- 
>> 1.7.3.4
>>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-01-27 20:55     ` Daniel De Graaf
@ 2011-01-27 21:29       ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 29+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-01-27 21:29 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

> >> +	try_module_get(THIS_MODULE);
> > 
> > No checking if it fails?
> 
> Actually, looking at it again, this seems redundant: won't open() itself
> prevent rmmod of the module until everything is closed?

I hope so :-)
> 
> Regardless, while the failure seems unlikely it should be checked for.

OK.

.. big snip ..
> >> +	spin_lock(&gref_lock);
> >> +	do_cleanup();
> > 
> > Hmm, why the cleanup here? I see it  gntalloc_ioctl_dealloc
> > which makes sense since the users-- might have been decremented to zero.
> > But here?
> 
> This is to clean up pages that were at zero (local) users but were still
> mapped by remote domains. Since those pages count towards the limit that
> we are about to enforce, it is a good idea to remove any pages that have
> been unmapped by remote domains since last time we checked.

Ok, can you put that as comment right before calling do_cleanup, please?

> 
> This would be much cleaner if Xen allowed a domain to force others to
> unmap its pages, but that's a significant change to the semantics of
> shared memory in the hypervisor.
>  
> >> +	if (gref_size + op.count > limit) {
> >> +		spin_unlock(&gref_lock);
> >> +		rc = -ENOSPC;
> >> +		goto out_free;
> >> +	}
> >> +	gref_size += op.count;
> >> +	op.index = priv->index;
> >> +	priv->index += op.count * PAGE_SIZE;
> >> +	spin_unlock(&gref_lock);
> >> +
> >> +	rc = add_grefs(&op, gref_ids, priv);
> >> +	if (rc < 0)
> >> +		goto out_free;
> >> +
> >> +	if (copy_to_user(arg, &op, sizeof(op))) {
> >> +		rc = -EFAULT;
> >> +		goto out_free;
> > 
> > Not something that would clean the newly added grant? Say
> > the code I suggested below the 'out' label.
> > 
> 
> That races with a concurrent removal operation that has guessed
> the offset we just added, and removed the gref. As soon as we unlock
> gref_lock at the end of add_grefs, gref is unsafe to dereference.

Aha! Can you put a comment about this so in the future we won't
try to correct this "mistake" ?

> 
> This could be solved by a per-file lock, or by holding gref_lock
> for longer, but the copy_to_user producing -EFAULT seemed unlikely
> enough that forcing a close() seemed the better choice - especially
> since the userspace application will be segfaulting soon if it is
> trying to read the offsets.

True enought.

.. big snip..
> >> +	spin_lock(&gref_lock);
> >> +	gref->users--;
> >> +	if (gref->users == 0)
> >> +		__del_gref(gref);
> > 
> > I just want to be convienced here that I am wrong.
> > 
> > If the 'ioctl_deallo' has not been done, what will if unmap this VMA?
> > Will it be OK to  yank the gref from gref_list while (and kfree it) while
> > it is still referenced in the filp->private_data? Or would end up trying
> > to derefence the *priv and go BOOM?
> 
> The VMA itself is unmapped regardless. The gref structure (and the pages
> pointed to by the vma) is deallocated when the last reference goes away.
> In your example, it would be on _release() of the file or a later dealloc
> ioctl.

OK, you convienced me.
> 
> The only time __del_gref is called here is when the file has been closed
> or the segment has already had ioctl_dealloc run on it.

<nods>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-01-27 18:52   ` Konrad Rzeszutek Wilk
  2011-01-27 19:26     ` Daniel De Graaf
@ 2011-03-04 15:57     ` Ian Campbell
  2011-03-04 16:34       ` Daniel De Graaf
  1 sibling, 1 reply; 29+ messages in thread
From: Ian Campbell @ 2011-03-04 15:57 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Konrad Rzeszutek Wilk

On Thu, 2011-01-27 at 18:52 +0000, Konrad Rzeszutek Wilk wrote:
> > @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
> >  
> >       atomic_sub(map->count, &pages_mapped);
> >  
> > -     if (map->pages)
> > +     if (map->pages) {
> > +             if (!use_ptemod)
> > +                     unmap_grant_pages(map, 0, map->count);
> > +
> >               for (i = 0; i < map->count; i++) {
> > -                     if (map->pages[i])
> > +                     uint32_t check, *tmp;
> > +                     if (!map->pages[i])
> > +                             continue;
> > +                     /* XXX When unmapping in an HVM domain, Xen will
> > +                      * sometimes end up mapping the GFN to an invalid MFN.
> > +                      * In this case, writes will be discarded and reads will
> > +                      * return all 0xFF bytes.  Leak these unusable GFNs
> 
> I forgot to ask, under what version of Xen did you run this? I want to add
> that to the comment so when it gets fixed we know what the failing version is.
> 
> > +                      * until Xen supports fixing their p2m mapping.
> > +                      */
> > +                     tmp = kmap(map->pages[i]);
> > +                     *tmp = 0xdeaddead;

I've just tripped over this check which faults in my PV guest. Seems to
be related to the handling failures of map_grant_pages()?

Was the underlying Xen issue here reported somewhere more obvious than
this comment buried in a patch to the kernel?

If not please can you raise it as a separate thread clearly marked as a
hypervisor issue/question, all I can find is bits and pieces spread
through the threads associated with this kernel patch. I don't think
I've got a clear enough picture of the issue from those fragments to
pull it together into a sensible report.

Ian.

> > +                     mb();
> > +                     check = *tmp;
> > +                     kunmap(map->pages[i]); 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
  2011-03-04 15:57     ` Ian Campbell
@ 2011-03-04 16:34       ` Daniel De Graaf
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-03-04 16:34 UTC (permalink / raw)
  To: Ian Campbell; +Cc: jeremy, xen-devel, Konrad Rzeszutek Wilk

On 03/04/2011 10:57 AM, Ian Campbell wrote:
> On Thu, 2011-01-27 at 18:52 +0000, Konrad Rzeszutek Wilk wrote:
>>> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map)
>>>  
>>>       atomic_sub(map->count, &pages_mapped);
>>>  
>>> -     if (map->pages)
>>> +     if (map->pages) {
>>> +             if (!use_ptemod)
>>> +                     unmap_grant_pages(map, 0, map->count);
>>> +
>>>               for (i = 0; i < map->count; i++) {
>>> -                     if (map->pages[i])
>>> +                     uint32_t check, *tmp;
>>> +                     if (!map->pages[i])
>>> +                             continue;
>>> +                     /* XXX When unmapping in an HVM domain, Xen will
>>> +                      * sometimes end up mapping the GFN to an invalid MFN.
>>> +                      * In this case, writes will be discarded and reads will
>>> +                      * return all 0xFF bytes.  Leak these unusable GFNs
>>
>> I forgot to ask, under what version of Xen did you run this? I want to add
>> that to the comment so when it gets fixed we know what the failing version is.
>>
>>> +                      * until Xen supports fixing their p2m mapping.
>>> +                      */
>>> +                     tmp = kmap(map->pages[i]);
>>> +                     *tmp = 0xdeaddead;
> 
> I've just tripped over this check which faults in my PV guest. Seems to
> be related to the handling failures of map_grant_pages()?
> 
> Was the underlying Xen issue here reported somewhere more obvious than
> this comment buried in a patch to the kernel?
> 
> If not please can you raise it as a separate thread clearly marked as a
> hypervisor issue/question, all I can find is bits and pieces spread
> through the threads associated with this kernel patch. I don't think
> I've got a clear enough picture of the issue from those fragments to
> pull it together into a sensible report.
> 
> Ian.
> 

I think there may be other bugs lurking here with these freed pages; I have
observed that even pages that pass this kmap check can become bad at a later
time. This might be due to TLB issues; I haven't had a chance to debug it.
I do have a patch that prevents the pages that have been granted from being
recycled into general use by the kernel; I hadn't posted it yet because it
didn't resolve the issue completely.

8<---------------------------------------------------------
xen-gntdev: avoid reuse of possibly-bad pages
    
In HVM, the unmap hypercall does not reliably associate a valid MFN with
a just-unmapped GFN. A simple validity test of the page is not
sufficient to determine if it will remain valid; pages have been
observed to remain mapped and later become invalid.
    
Instead of releasing the pages to the allocator, keep them in a list to
reuse their GFNs for future mappings, which should always produce a valid
mapping.

** Note: this patch is an RFC, not for a stable patch queue **
    
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index d43ff30..b9b1577 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -54,6 +54,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+/* Pages that are unsafe to use except as gntdev pages due to bad PFN/MFN
+ * mapping after an unmap.
+ */
+static LIST_HEAD(page_reuse);
+static DEFINE_SPINLOCK(page_reuse_lock);
+
 static int use_ptemod;
 
 struct gntdev_priv {
@@ -122,13 +128,21 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 	    NULL == add->pages)
 		goto err;
 
+	spin_lock(&page_reuse_lock);
 	for (i = 0; i < count; i++) {
-		add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-		if (add->pages[i] == NULL)
-			goto err;
+		if (list_empty(&page_reuse)) {
+			add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+			if (add->pages[i] == NULL)
+				goto err_unlock;
+		} else {
+			add->pages[i] = list_entry(page_reuse.next,
+						   struct page, lru);
+			list_del(&add->pages[i]->lru);
+		}
 		add->map_ops[i].handle = -1;
 		add->unmap_ops[i].handle = -1;
 	}
+	spin_unlock(&page_reuse_lock);
 
 	add->index = 0;
 	add->count = count;
@@ -136,12 +150,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
 	return add;
 
+err_unlock:
+	for (i = 0; i < count; i++) {
+		if (add->pages[i])
+			list_add(&add->pages[i]->lru, &page_reuse);
+	}
+	spin_unlock(&page_reuse_lock);
 err:
-	if (add->pages)
-		for (i = 0; i < count; i++) {
-			if (add->pages[i])
-				__free_page(add->pages[i]);
-		}
 	kfree(add->pages);
 	kfree(add->grants);
 	kfree(add->map_ops);
@@ -202,29 +217,14 @@ static void gntdev_put_map(struct grant_map *map)
 		if (!use_ptemod)
 			unmap_grant_pages(map, 0, map->count);
 
+		spin_lock(&page_reuse_lock);
 		for (i = 0; i < map->count; i++) {
 			uint32_t check, *tmp;
 			if (!map->pages[i])
 				continue;
-			/* XXX When unmapping in an HVM domain, Xen will
-			 * sometimes end up mapping the GFN to an invalid MFN.
-			 * In this case, writes will be discarded and reads will
-			 * return all 0xFF bytes.  Leak these unusable GFNs
-			 * until Xen supports fixing their p2m mapping.
-			 *
-			 * Confirmed present in Xen 4.1-RC3 with HVM source
-			 */
-			tmp = kmap(map->pages[i]);
-			*tmp = 0xdeaddead;
-			mb();
-			check = *tmp;
-			kunmap(map->pages[i]);
-			if (check == 0xdeaddead)
-				__free_page(map->pages[i]);
-			else
-				pr_debug("Discard page %d=%ld\n", i,
-					page_to_pfn(map->pages[i]));
+			list_add(&map->pages[i]->lru, &page_reuse);
 		}
+		spin_unlock(&page_reuse_lock);
 	}
 	kfree(map->pages);
 	kfree(map->grants);

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-02-08 22:48   ` Konrad Rzeszutek Wilk
@ 2011-02-09 18:52     ` Daniel De Graaf
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel De Graaf @ 2011-02-09 18:52 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: jeremy, xen-devel, Ian.Campbell

On 02/08/2011 05:48 PM, Konrad Rzeszutek Wilk wrote:
> On Thu, Feb 03, 2011 at 12:19:03PM -0500, Daniel De Graaf wrote:
>> This allows a userspace application to allocate a shared page for
>> implementing inter-domain communication or device drivers. These
>> shared pages can be mapped using the gntdev device or by the kernel
>> in another domain.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/Kconfig    |    8 +
>>  drivers/xen/Makefile   |    2 +
>>  drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/xen/gntalloc.h |   50 +++++
>>  4 files changed, 546 insertions(+), 0 deletions(-)
>>  create mode 100644 drivers/xen/gntalloc.c
>>  create mode 100644 include/xen/gntalloc.h
>>

[snip]

>> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
>> +	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
>> +{
>> +	int i, rc, readonly;
>> +	LIST_HEAD(queue_gref);
>> +	LIST_HEAD(queue_file);
>> +	struct gntalloc_gref *gref;
>> +
>> +	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
>> +	rc = -ENOMEM;
>> +	for (i = 0; i < op->count; i++) {
>> +		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
>> +		if (!gref)
>> +			goto undo;
>> +		list_add_tail(&gref->next_gref, &queue_gref);
>> +		list_add_tail(&gref->next_file, &queue_file);
>> +		gref->users = 1;
>> +		gref->file_index = op->index + i * PAGE_SIZE;
>> +		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>> +		if (!gref->page)
>> +			goto undo;
>> +
>> +		/* Grant foreign access to the page. */
>> +		gref->gref_id = gnttab_grant_foreign_access(op->domid,
>> +			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
>> +		if (gref->gref_id < 0) {
>> +			rc = gref->gref_id;
>> +			goto undo;
>> +		}
>> +		gref_ids[i] = gref->gref_id;
>> +	}
>> +
>> +	/* Add to gref lists. */
>> +	spin_lock(&gref_lock);
>> +	list_splice_tail(&queue_gref, &gref_list);
>> +	list_splice_tail(&queue_file, &priv->list);
>> +	spin_unlock(&gref_lock);
>> +
>> +	return 0;
>> +
>> +undo:
>> +	spin_lock(&gref_lock);
>> +	gref_size -= (op->count - i);
> 
> So we decrease the gref_size by the count of the ones that we 
> allocated..

No, (op->count - i) is the number that we haven't yet allocated. Those
aren't in queue_file, so __del_gref is never called on them.

>> +
>> +	list_for_each_entry(gref, &queue_file, next_file) {
>> +		/* __del_gref does not remove from queue_file */
>> +		__del_gref(gref);
> 
> .. but __del_gref decreases the gref_size by one, so wouldn't
> we decrease by too much?

[snip]

>> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
>> +		struct ioctl_gntalloc_alloc_gref __user *arg)
>> +{
>> +	int rc = 0;
>> +	struct ioctl_gntalloc_alloc_gref op;
>> +	uint32_t *gref_ids;
>> +
>> +	pr_debug("%s: priv %p\n", __func__, priv);
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto out;
>> +	}
>> +
>> +	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
>> +	if (!gref_ids) {
>> +		rc = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	/* Clean up pages that were at zero (local) users but were still mapped
>> +	 * by remote domains. Since those pages count towards the limit that we
>> +	 * are about to enforce, removing them here is a good idea.
>> +	 */
>> +	do_cleanup();
>> +	if (gref_size + op.count > limit) {
>> +		spin_unlock(&gref_lock);
>> +		rc = -ENOSPC;
>> +		goto out_free;
>> +	}
>> +	gref_size += op.count;
>> +	op.index = priv->index;
>> +	priv->index += op.count * PAGE_SIZE;
>> +	spin_unlock(&gref_lock);
>> +
>> +	rc = add_grefs(&op, gref_ids, priv);
>> +	if (rc < 0)
>> +		goto out_free;
> 
> Should we cleanup up priv->index to its earlier value?

We could, but this could also introduce a possible race if two threads were
running the ioctl at the same time. There's no harm in letting the index increase,
since it is 64-bit so doesn't have overflow issues.

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-02-03 17:19 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
@ 2011-02-08 22:48   ` Konrad Rzeszutek Wilk
  2011-02-09 18:52     ` Daniel De Graaf
  0 siblings, 1 reply; 29+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-02-08 22:48 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: jeremy, xen-devel, Ian.Campbell

On Thu, Feb 03, 2011 at 12:19:03PM -0500, Daniel De Graaf wrote:
> This allows a userspace application to allocate a shared page for
> implementing inter-domain communication or device drivers. These
> shared pages can be mapped using the gntdev device or by the kernel
> in another domain.
> 
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/Kconfig    |    8 +
>  drivers/xen/Makefile   |    2 +
>  drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/xen/gntalloc.h |   50 +++++
>  4 files changed, 546 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/xen/gntalloc.c
>  create mode 100644 include/xen/gntalloc.h
> 
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 19f1f3c..69d2cd5 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -142,6 +142,14 @@ config XEN_GNTDEV
>  	help
>  	  Allows userspace processes to use grants.
>  
> +config XEN_GRANT_DEV_ALLOC
> +	tristate "User-space grant reference allocator driver"
> +	depends on XEN
> +	help
> +	  Allows userspace processes to create pages with access granted
> +	  to other domains. This can be used to implement frontend drivers
> +	  or as part of an inter-domain shared memory channel.
> +
>  config XEN_PLATFORM_PCI
>  	tristate "xen platform pci device driver"
>  	depends on XEN_PVHVM && PCI
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 5c3b031..09364b9 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
>  obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
>  obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
>  obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
>  obj-$(CONFIG_XENFS)		+= xenfs/
>  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
> @@ -19,5 +20,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
>  
>  xen-evtchn-y			:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
> +xen-gntalloc-y				:= gntalloc.o
>  
>  xen-platform-pci-y		:= platform-pci.o
> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> new file mode 100644
> index 0000000..d06bf2b
> --- /dev/null
> +++ b/drivers/xen/gntalloc.c
> @@ -0,0 +1,486 @@
> +/******************************************************************************
> + * gntalloc.c
> + *
> + * Device for creating grant references (in user-space) that may be shared
> + * with other domains.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +/*
> + * This driver exists to allow userspace programs in Linux to allocate kernel
> + * memory that will later be shared with another domain.  Without this device,
> + * Linux userspace programs cannot create grant references.
> + *
> + * How this stuff works:
> + *   X -> granting a page to Y
> + *   Y -> mapping the grant from X
> + *
> + *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
> + *   2. X creates an entry in the grant table that says domid(Y) can access P.
> + *      This is done without a hypercall unless the grant table needs expansion.
> + *   3. X gives the grant reference identifier, GREF, to Y.
> + *   4. Y maps the page, either directly into kernel memory for use in a backend
> + *      driver, or via a the gntdev device to map into the address space of an
> + *      application running in Y. This is the first point at which Xen does any
> + *      tracking of the page.
> + *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
> + *      to the shared page, and can now communicate with Y over the shared page.
> + *
> + *
> + * NOTE TO USERSPACE LIBRARIES:
> + *   The grant allocation and mmap()ing are, naturally, two separate operations.
> + *   You set up the sharing by calling the create ioctl() and then the mmap().
> + *   Teardown requires munmap() and either close() or ioctl().
> + *
> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
> + * reference, this device can be used to consume kernel memory by leaving grant
> + * references mapped by another domain when an application exits. Therefore,
> + * there is a global limit on the number of pages that can be allocated. When
> + * all references to the page are unmapped, it will be freed during the next
> + * grant operation.
> + */
> +
> +#include <linux/atomic.h>
> +#include <linux/module.h>
> +#include <linux/miscdevice.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/device.h>
> +#include <linux/mm.h>
> +#include <linux/uaccess.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +
> +#include <xen/xen.h>
> +#include <xen/page.h>
> +#include <xen/grant_table.h>
> +#include <xen/gntalloc.h>
> +
> +static int limit = 1024;
> +module_param(limit, int, 0644);
> +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
> +		"the gntalloc device");
> +
> +static LIST_HEAD(gref_list);
> +static DEFINE_SPINLOCK(gref_lock);
> +static int gref_size;
> +
> +/* Metadata on a grant reference. */
> +struct gntalloc_gref {
> +	struct list_head next_gref;  /* list entry gref_list */
> +	struct list_head next_file;  /* list entry file->list, if open */
> +	struct page *page;	     /* The shared page */
> +	uint64_t file_index;         /* File offset for mmap() */
> +	unsigned int users;          /* Use count - when zero, waiting on Xen */
> +	grant_ref_t gref_id;         /* The grant reference number */
> +};
> +
> +struct gntalloc_file_private_data {
> +	struct list_head list;
> +	uint64_t index;
> +};
> +
> +static void __del_gref(struct gntalloc_gref *gref);
> +
> +static void do_cleanup(void)
> +{
> +	struct gntalloc_gref *gref, *n;
> +	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
> +		if (!gref->users)
> +			__del_gref(gref);
> +	}
> +}
> +
> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
> +	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
> +{
> +	int i, rc, readonly;
> +	LIST_HEAD(queue_gref);
> +	LIST_HEAD(queue_file);
> +	struct gntalloc_gref *gref;
> +
> +	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
> +	rc = -ENOMEM;
> +	for (i = 0; i < op->count; i++) {
> +		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
> +		if (!gref)
> +			goto undo;
> +		list_add_tail(&gref->next_gref, &queue_gref);
> +		list_add_tail(&gref->next_file, &queue_file);
> +		gref->users = 1;
> +		gref->file_index = op->index + i * PAGE_SIZE;
> +		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		if (!gref->page)
> +			goto undo;
> +
> +		/* Grant foreign access to the page. */
> +		gref->gref_id = gnttab_grant_foreign_access(op->domid,
> +			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
> +		if (gref->gref_id < 0) {
> +			rc = gref->gref_id;
> +			goto undo;
> +		}
> +		gref_ids[i] = gref->gref_id;
> +	}
> +
> +	/* Add to gref lists. */
> +	spin_lock(&gref_lock);
> +	list_splice_tail(&queue_gref, &gref_list);
> +	list_splice_tail(&queue_file, &priv->list);
> +	spin_unlock(&gref_lock);
> +
> +	return 0;
> +
> +undo:
> +	spin_lock(&gref_lock);
> +	gref_size -= (op->count - i);

So we decrease the gref_size by the count of the ones that we 
allocated..
> +
> +	list_for_each_entry(gref, &queue_file, next_file) {
> +		/* __del_gref does not remove from queue_file */
> +		__del_gref(gref);

.. but __del_gref decreases the gref_size by one, so wouldn't
we decrease by too much?

> +	}
> +
> +	/* It's possible for the target domain to map the just-allocated grant
> +	 * references by blindly guessing their IDs; if this is done, then
> +	 * __del_gref will leave them in the queue_gref list. They need to be
> +	 * added to the global list so that we can free them when they are no
> +	 * longer referenced.
> +	 */
> +	if (unlikely(!list_empty(&queue_gref)))
> +		list_splice_tail(&queue_gref, &gref_list);
> +	spin_unlock(&gref_lock);
> +	return rc;
> +}
> +
> +static void __del_gref(struct gntalloc_gref *gref)
> +{
> +	if (gref->gref_id > 0) {
> +		if (gnttab_query_foreign_access(gref->gref_id))
> +			return;
> +
> +		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
> +			return;
> +	}
> +
> +	gref_size--;
> +	list_del(&gref->next_gref);
> +
> +	if (gref->page)
> +		__free_page(gref->page);
> +
> +	kfree(gref);
> +}
> +
> +/* finds contiguous grant references in a file, returns the first */
> +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
> +		uint64_t index, uint32_t count)
> +{
> +	struct gntalloc_gref *rv = NULL, *gref;
> +	list_for_each_entry(gref, &priv->list, next_file) {
> +		if (gref->file_index == index && !rv)
> +			rv = gref;
> +		if (rv) {
> +			if (gref->file_index != index)
> +				return NULL;
> +			index += PAGE_SIZE;
> +			count--;
> +			if (count == 0)
> +				return rv;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * -------------------------------------
> + *  File operations.
> + * -------------------------------------
> + */
> +static int gntalloc_open(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv;
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		goto out_nomem;
> +	INIT_LIST_HEAD(&priv->list);
> +
> +	filp->private_data = priv;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	return 0;
> +
> +out_nomem:
> +	return -ENOMEM;
> +}
> +
> +static int gntalloc_release(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	spin_lock(&gref_lock);
> +	while (!list_empty(&priv->list)) {
> +		gref = list_entry(priv->list.next,
> +			struct gntalloc_gref, next_file);
> +		list_del(&gref->next_file);
> +		gref->users--;
> +		if (gref->users == 0)
> +			__del_gref(gref);
> +	}
> +	kfree(priv);
> +	spin_unlock(&gref_lock);
> +
> +	return 0;
> +}
> +
> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
> +		struct ioctl_gntalloc_alloc_gref __user *arg)
> +{
> +	int rc = 0;
> +	struct ioctl_gntalloc_alloc_gref op;
> +	uint32_t *gref_ids;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto out;
> +	}
> +
> +	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
> +	if (!gref_ids) {
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	/* Clean up pages that were at zero (local) users but were still mapped
> +	 * by remote domains. Since those pages count towards the limit that we
> +	 * are about to enforce, removing them here is a good idea.
> +	 */
> +	do_cleanup();
> +	if (gref_size + op.count > limit) {
> +		spin_unlock(&gref_lock);
> +		rc = -ENOSPC;
> +		goto out_free;
> +	}
> +	gref_size += op.count;
> +	op.index = priv->index;
> +	priv->index += op.count * PAGE_SIZE;
> +	spin_unlock(&gref_lock);
> +
> +	rc = add_grefs(&op, gref_ids, priv);
> +	if (rc < 0)
> +		goto out_free;

Should we cleanup up priv->index to its earlier value?

> +
> +	/* Once we finish add_grefs, it is unsafe to touch the new reference,
> +	 * since it is possible for a concurrent ioctl to remove it (by guessing
> +	 * its index). If the userspace application doesn't provide valid memory
> +	 * to write the IDs to, then it will need to close the file in order to
> +	 * release - which it will do by segfaulting when it tries to access the
> +	 * IDs to close them.
> +	 */
> +	if (copy_to_user(arg, &op, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto out_free;
> +	}
> +	if (copy_to_user(arg->gref_ids, gref_ids,
> +			sizeof(gref_ids[0]) * op.count)) {
> +		rc = -EFAULT;
> +		goto out_free;
> +	}
> +
> +out_free:
> +	kfree(gref_ids);
> +out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
> +		void __user *arg)
> +{
> +	int i, rc = 0;
> +	struct ioctl_gntalloc_dealloc_gref op;
> +	struct gntalloc_gref *gref, *n;
> +
> +	pr_debug("%s: priv %p\n", __func__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto dealloc_grant_out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_grefs(priv, op.index, op.count);
> +	if (gref) {
> +		/* Remove from the file list only, and decrease reference count.
> +		 * The later call to do_cleanup() will remove from gref_list and
> +		 * free the memory if the pages aren't mapped anywhere.
> +		 */
> +		for (i = 0; i < op.count; i++) {
> +			n = list_entry(gref->next_file.next,
> +				struct gntalloc_gref, next_file);
> +			list_del(&gref->next_file);
> +			gref->users--;
> +			gref = n;
> +		}
> +	} else {
> +		rc = -EINVAL;
> +	}
> +
> +	do_cleanup();
> +
> +	spin_unlock(&gref_lock);
> +dealloc_grant_out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
> +		unsigned long arg)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +
> +	switch (cmd) {
> +	case IOCTL_GNTALLOC_ALLOC_GREF:
> +		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
> +
> +	case IOCTL_GNTALLOC_DEALLOC_GREF:
> +		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
> +
> +	default:
> +		return -ENOIOCTLCMD;
> +	}
> +
> +	return 0;
> +}
> +
> +static void gntalloc_vma_close(struct vm_area_struct *vma)
> +{
> +	struct gntalloc_gref *gref = vma->vm_private_data;
> +	if (!gref)
> +		return;
> +
> +	spin_lock(&gref_lock);
> +	gref->users--;
> +	if (gref->users == 0)
> +		__del_gref(gref);
> +	spin_unlock(&gref_lock);
> +}
> +
> +static struct vm_operations_struct gntalloc_vmops = {
> +	.close = gntalloc_vma_close,
> +};
> +
> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
> +	int rv, i;
> +
> +	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
> +		       priv, vma->vm_pgoff, count);
> +
> +	if (!(vma->vm_flags & VM_SHARED)) {
> +		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
> +	if (gref == NULL) {
> +		rv = -ENOENT;
> +		pr_debug("%s: Could not find grant reference",
> +				__func__);
> +		goto out_unlock;
> +	}
> +
> +	vma->vm_private_data = gref;
> +
> +	vma->vm_flags |= VM_RESERVED;
> +	vma->vm_flags |= VM_DONTCOPY;
> +	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
> +
> +	vma->vm_ops = &gntalloc_vmops;
> +
> +	for (i = 0; i < count; i++) {
> +		gref->users++;
> +		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> +				gref->page);
> +		if (rv)
> +			goto out_unlock;
> +
> +		gref = list_entry(gref->next_file.next,
> +				struct gntalloc_gref, next_file);
> +	}
> +	rv = 0;
> +
> +out_unlock:
> +	spin_unlock(&gref_lock);
> +	return rv;
> +}
> +
> +static const struct file_operations gntalloc_fops = {
> +	.owner = THIS_MODULE,
> +	.open = gntalloc_open,
> +	.release = gntalloc_release,
> +	.unlocked_ioctl = gntalloc_ioctl,
> +	.mmap = gntalloc_mmap
> +};
> +
> +/*
> + * -------------------------------------
> + * Module creation/destruction.
> + * -------------------------------------
> + */
> +static struct miscdevice gntalloc_miscdev = {
> +	.minor	= MISC_DYNAMIC_MINOR,
> +	.name	= "xen/gntalloc",
> +	.fops	= &gntalloc_fops,
> +};
> +
> +static int __init gntalloc_init(void)
> +{
> +	int err;
> +
> +	if (!xen_domain())
> +		return -ENODEV;
> +
> +	err = misc_register(&gntalloc_miscdev);
> +	if (err != 0) {
> +		printk(KERN_ERR "Could not register misc gntalloc device\n");
> +		return err;
> +	}
> +
> +	pr_debug("Created grant allocation device at %d,%d\n",
> +			MISC_MAJOR, gntalloc_miscdev.minor);
> +
> +	return 0;
> +}
> +
> +static void __exit gntalloc_exit(void)
> +{
> +	misc_deregister(&gntalloc_miscdev);
> +}
> +
> +module_init(gntalloc_init);
> +module_exit(gntalloc_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
> +		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> new file mode 100644
> index 0000000..bc3b85e
> --- /dev/null
> +++ b/include/xen/gntalloc.h
> @@ -0,0 +1,50 @@
> +/******************************************************************************
> + * gntalloc.h
> + *
> + * Interface to /dev/xen/gntalloc.
> + *
> + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> + *
> + * This file is in the public domain.
> + */
> +
> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
> +#define __LINUX_PUBLIC_GNTALLOC_H__
> +
> +/*
> + * Allocates a new page and creates a new grant reference.
> + */
> +#define IOCTL_GNTALLOC_ALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
> +struct ioctl_gntalloc_alloc_gref {
> +	/* IN parameters */
> +	/* The ID of the domain to be given access to the grants. */
> +	uint16_t domid;
> +	/* Flags for this mapping */
> +	uint16_t flags;
> +	/* Number of pages to map */
> +	uint32_t count;
> +	/* OUT parameters */
> +	/* The offset to be used on a subsequent call to mmap(). */
> +	uint64_t index;
> +	/* The grant references of the newly created grant, one per page */
> +	/* Variable size, depending on count */
> +	uint32_t gref_ids[1];
> +};
> +
> +#define GNTALLOC_FLAG_WRITABLE 1
> +
> +/*
> + * Deallocates the grant reference, allowing the associated page to be freed if
> + * no other domains are using it.
> + */
> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
> +struct ioctl_gntalloc_dealloc_gref {
> +	/* IN parameters */
> +	/* The offset returned in the map operation */
> +	uint64_t index;
> +	/* Number of references to unmap */
> +	uint32_t count;
> +};
> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> -- 
> 1.7.3.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
@ 2011-02-03 17:19 ` Daniel De Graaf
  2011-02-08 22:48   ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2011-02-03 17:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, jeremy, Ian.Campbell, konrad.wilk

This allows a userspace application to allocate a shared page for
implementing inter-domain communication or device drivers. These
shared pages can be mapped using the gntdev device or by the kernel
in another domain.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/Kconfig    |    8 +
 drivers/xen/Makefile   |    2 +
 drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/gntalloc.h |   50 +++++
 4 files changed, 546 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/gntalloc.c
 create mode 100644 include/xen/gntalloc.h

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 19f1f3c..69d2cd5 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -142,6 +142,14 @@ config XEN_GNTDEV
 	help
 	  Allows userspace processes to use grants.
 
+config XEN_GRANT_DEV_ALLOC
+	tristate "User-space grant reference allocator driver"
+	depends on XEN
+	help
+	  Allows userspace processes to create pages with access granted
+	  to other domains. This can be used to implement frontend drivers
+	  or as part of an inter-domain shared memory channel.
+
 config XEN_PLATFORM_PCI
 	tristate "xen platform pci device driver"
 	depends on XEN_PVHVM && PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 5c3b031..09364b9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
@@ -19,5 +20,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
 
 xen-evtchn-y			:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntalloc-y				:= gntalloc.o
 
 xen-platform-pci-y		:= platform-pci.o
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
new file mode 100644
index 0000000..d06bf2b
--- /dev/null
+++ b/drivers/xen/gntalloc.c
@@ -0,0 +1,486 @@
+/******************************************************************************
+ * gntalloc.c
+ *
+ * Device for creating grant references (in user-space) that may be shared
+ * with other domains.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This driver exists to allow userspace programs in Linux to allocate kernel
+ * memory that will later be shared with another domain.  Without this device,
+ * Linux userspace programs cannot create grant references.
+ *
+ * How this stuff works:
+ *   X -> granting a page to Y
+ *   Y -> mapping the grant from X
+ *
+ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
+ *   2. X creates an entry in the grant table that says domid(Y) can access P.
+ *      This is done without a hypercall unless the grant table needs expansion.
+ *   3. X gives the grant reference identifier, GREF, to Y.
+ *   4. Y maps the page, either directly into kernel memory for use in a backend
+ *      driver, or via a the gntdev device to map into the address space of an
+ *      application running in Y. This is the first point at which Xen does any
+ *      tracking of the page.
+ *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
+ *      to the shared page, and can now communicate with Y over the shared page.
+ *
+ *
+ * NOTE TO USERSPACE LIBRARIES:
+ *   The grant allocation and mmap()ing are, naturally, two separate operations.
+ *   You set up the sharing by calling the create ioctl() and then the mmap().
+ *   Teardown requires munmap() and either close() or ioctl().
+ *
+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
+ * reference, this device can be used to consume kernel memory by leaving grant
+ * references mapped by another domain when an application exits. Therefore,
+ * there is a global limit on the number of pages that can be allocated. When
+ * all references to the page are unmapped, it will be freed during the next
+ * grant operation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/gntalloc.h>
+
+static int limit = 1024;
+module_param(limit, int, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
+		"the gntalloc device");
+
+static LIST_HEAD(gref_list);
+static DEFINE_SPINLOCK(gref_lock);
+static int gref_size;
+
+/* Metadata on a grant reference. */
+struct gntalloc_gref {
+	struct list_head next_gref;  /* list entry gref_list */
+	struct list_head next_file;  /* list entry file->list, if open */
+	struct page *page;	     /* The shared page */
+	uint64_t file_index;         /* File offset for mmap() */
+	unsigned int users;          /* Use count - when zero, waiting on Xen */
+	grant_ref_t gref_id;         /* The grant reference number */
+};
+
+struct gntalloc_file_private_data {
+	struct list_head list;
+	uint64_t index;
+};
+
+static void __del_gref(struct gntalloc_gref *gref);
+
+static void do_cleanup(void)
+{
+	struct gntalloc_gref *gref, *n;
+	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
+		if (!gref->users)
+			__del_gref(gref);
+	}
+}
+
+static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
+	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
+{
+	int i, rc, readonly;
+	LIST_HEAD(queue_gref);
+	LIST_HEAD(queue_file);
+	struct gntalloc_gref *gref;
+
+	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
+	rc = -ENOMEM;
+	for (i = 0; i < op->count; i++) {
+		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
+		if (!gref)
+			goto undo;
+		list_add_tail(&gref->next_gref, &queue_gref);
+		list_add_tail(&gref->next_file, &queue_file);
+		gref->users = 1;
+		gref->file_index = op->index + i * PAGE_SIZE;
+		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!gref->page)
+			goto undo;
+
+		/* Grant foreign access to the page. */
+		gref->gref_id = gnttab_grant_foreign_access(op->domid,
+			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
+		if (gref->gref_id < 0) {
+			rc = gref->gref_id;
+			goto undo;
+		}
+		gref_ids[i] = gref->gref_id;
+	}
+
+	/* Add to gref lists. */
+	spin_lock(&gref_lock);
+	list_splice_tail(&queue_gref, &gref_list);
+	list_splice_tail(&queue_file, &priv->list);
+	spin_unlock(&gref_lock);
+
+	return 0;
+
+undo:
+	spin_lock(&gref_lock);
+	gref_size -= (op->count - i);
+
+	list_for_each_entry(gref, &queue_file, next_file) {
+		/* __del_gref does not remove from queue_file */
+		__del_gref(gref);
+	}
+
+	/* It's possible for the target domain to map the just-allocated grant
+	 * references by blindly guessing their IDs; if this is done, then
+	 * __del_gref will leave them in the queue_gref list. They need to be
+	 * added to the global list so that we can free them when they are no
+	 * longer referenced.
+	 */
+	if (unlikely(!list_empty(&queue_gref)))
+		list_splice_tail(&queue_gref, &gref_list);
+	spin_unlock(&gref_lock);
+	return rc;
+}
+
+static void __del_gref(struct gntalloc_gref *gref)
+{
+	if (gref->gref_id > 0) {
+		if (gnttab_query_foreign_access(gref->gref_id))
+			return;
+
+		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
+			return;
+	}
+
+	gref_size--;
+	list_del(&gref->next_gref);
+
+	if (gref->page)
+		__free_page(gref->page);
+
+	kfree(gref);
+}
+
+/* finds contiguous grant references in a file, returns the first */
+static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
+		uint64_t index, uint32_t count)
+{
+	struct gntalloc_gref *rv = NULL, *gref;
+	list_for_each_entry(gref, &priv->list, next_file) {
+		if (gref->file_index == index && !rv)
+			rv = gref;
+		if (rv) {
+			if (gref->file_index != index)
+				return NULL;
+			index += PAGE_SIZE;
+			count--;
+			if (count == 0)
+				return rv;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * -------------------------------------
+ *  File operations.
+ * -------------------------------------
+ */
+static int gntalloc_open(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto out_nomem;
+	INIT_LIST_HEAD(&priv->list);
+
+	filp->private_data = priv;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	return 0;
+
+out_nomem:
+	return -ENOMEM;
+}
+
+static int gntalloc_release(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	spin_lock(&gref_lock);
+	while (!list_empty(&priv->list)) {
+		gref = list_entry(priv->list.next,
+			struct gntalloc_gref, next_file);
+		list_del(&gref->next_file);
+		gref->users--;
+		if (gref->users == 0)
+			__del_gref(gref);
+	}
+	kfree(priv);
+	spin_unlock(&gref_lock);
+
+	return 0;
+}
+
+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
+		struct ioctl_gntalloc_alloc_gref __user *arg)
+{
+	int rc = 0;
+	struct ioctl_gntalloc_alloc_gref op;
+	uint32_t *gref_ids;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
+	if (!gref_ids) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock(&gref_lock);
+	/* Clean up pages that were at zero (local) users but were still mapped
+	 * by remote domains. Since those pages count towards the limit that we
+	 * are about to enforce, removing them here is a good idea.
+	 */
+	do_cleanup();
+	if (gref_size + op.count > limit) {
+		spin_unlock(&gref_lock);
+		rc = -ENOSPC;
+		goto out_free;
+	}
+	gref_size += op.count;
+	op.index = priv->index;
+	priv->index += op.count * PAGE_SIZE;
+	spin_unlock(&gref_lock);
+
+	rc = add_grefs(&op, gref_ids, priv);
+	if (rc < 0)
+		goto out_free;
+
+	/* Once we finish add_grefs, it is unsafe to touch the new reference,
+	 * since it is possible for a concurrent ioctl to remove it (by guessing
+	 * its index). If the userspace application doesn't provide valid memory
+	 * to write the IDs to, then it will need to close the file in order to
+	 * release - which it will do by segfaulting when it tries to access the
+	 * IDs to close them.
+	 */
+	if (copy_to_user(arg, &op, sizeof(op))) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+	if (copy_to_user(arg->gref_ids, gref_ids,
+			sizeof(gref_ids[0]) * op.count)) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+
+out_free:
+	kfree(gref_ids);
+out:
+	return rc;
+}
+
+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
+		void __user *arg)
+{
+	int i, rc = 0;
+	struct ioctl_gntalloc_dealloc_gref op;
+	struct gntalloc_gref *gref, *n;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto dealloc_grant_out;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, op.index, op.count);
+	if (gref) {
+		/* Remove from the file list only, and decrease reference count.
+		 * The later call to do_cleanup() will remove from gref_list and
+		 * free the memory if the pages aren't mapped anywhere.
+		 */
+		for (i = 0; i < op.count; i++) {
+			n = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+			list_del(&gref->next_file);
+			gref->users--;
+			gref = n;
+		}
+	} else {
+		rc = -EINVAL;
+	}
+
+	do_cleanup();
+
+	spin_unlock(&gref_lock);
+dealloc_grant_out:
+	return rc;
+}
+
+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+
+	switch (cmd) {
+	case IOCTL_GNTALLOC_ALLOC_GREF:
+		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
+
+	case IOCTL_GNTALLOC_DEALLOC_GREF:
+		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
+}
+
+static void gntalloc_vma_close(struct vm_area_struct *vma)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return;
+
+	spin_lock(&gref_lock);
+	gref->users--;
+	if (gref->users == 0)
+		__del_gref(gref);
+	spin_unlock(&gref_lock);
+}
+
+static struct vm_operations_struct gntalloc_vmops = {
+	.close = gntalloc_vma_close,
+};
+
+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int rv, i;
+
+	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
+		       priv, vma->vm_pgoff, count);
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
+		return -EINVAL;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
+	if (gref == NULL) {
+		rv = -ENOENT;
+		pr_debug("%s: Could not find grant reference",
+				__func__);
+		goto out_unlock;
+	}
+
+	vma->vm_private_data = gref;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTCOPY;
+	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
+
+	vma->vm_ops = &gntalloc_vmops;
+
+	for (i = 0; i < count; i++) {
+		gref->users++;
+		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+				gref->page);
+		if (rv)
+			goto out_unlock;
+
+		gref = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+	}
+	rv = 0;
+
+out_unlock:
+	spin_unlock(&gref_lock);
+	return rv;
+}
+
+static const struct file_operations gntalloc_fops = {
+	.owner = THIS_MODULE,
+	.open = gntalloc_open,
+	.release = gntalloc_release,
+	.unlocked_ioctl = gntalloc_ioctl,
+	.mmap = gntalloc_mmap
+};
+
+/*
+ * -------------------------------------
+ * Module creation/destruction.
+ * -------------------------------------
+ */
+static struct miscdevice gntalloc_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "xen/gntalloc",
+	.fops	= &gntalloc_fops,
+};
+
+static int __init gntalloc_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&gntalloc_miscdev);
+	if (err != 0) {
+		printk(KERN_ERR "Could not register misc gntalloc device\n");
+		return err;
+	}
+
+	pr_debug("Created grant allocation device at %d,%d\n",
+			MISC_MAJOR, gntalloc_miscdev.minor);
+
+	return 0;
+}
+
+static void __exit gntalloc_exit(void)
+{
+	misc_deregister(&gntalloc_miscdev);
+}
+
+module_init(gntalloc_init);
+module_exit(gntalloc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
+		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("User-space grant reference allocator driver");
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
new file mode 100644
index 0000000..bc3b85e
--- /dev/null
+++ b/include/xen/gntalloc.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ * gntalloc.h
+ *
+ * Interface to /dev/xen/gntalloc.
+ *
+ * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
+ *
+ * This file is in the public domain.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
+#define __LINUX_PUBLIC_GNTALLOC_H__
+
+/*
+ * Allocates a new page and creates a new grant reference.
+ */
+#define IOCTL_GNTALLOC_ALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
+struct ioctl_gntalloc_alloc_gref {
+	/* IN parameters */
+	/* The ID of the domain to be given access to the grants. */
+	uint16_t domid;
+	/* Flags for this mapping */
+	uint16_t flags;
+	/* Number of pages to map */
+	uint32_t count;
+	/* OUT parameters */
+	/* The offset to be used on a subsequent call to mmap(). */
+	uint64_t index;
+	/* The grant references of the newly created grant, one per page */
+	/* Variable size, depending on count */
+	uint32_t gref_ids[1];
+};
+
+#define GNTALLOC_FLAG_WRITABLE 1
+
+/*
+ * Deallocates the grant reference, allowing the associated page to be freed if
+ * no other domains are using it.
+ */
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
+struct ioctl_gntalloc_dealloc_gref {
+	/* IN parameters */
+	/* The offset returned in the map operation */
+	uint64_t index;
+	/* Number of references to unmap */
+	uint32_t count;
+};
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-16 15:22             ` Daniel De Graaf
@ 2010-12-16 19:14               ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 29+ messages in thread
From: Jeremy Fitzhardinge @ 2010-12-16 19:14 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: xen-devel, Ian.Campbell

On 12/16/2010 07:22 AM, Daniel De Graaf wrote:
>> Hm, yeah, that could be a bit fiddly.  I guess you'd need to stick them
>> into an rbtree or something.
> Another option that provides more flexibility - have a flag in the create
> operation, similar to MAP_FIXED in mmap, that allows userspace to mandate
> the offset if it wants control, but default to letting the kernel handle it.
> We already have a flags field for making the grant writable, this is just
> another bit.

I'd go for just implementing one way of doing it unless there's a clear
need for each of them.  The choose-your-own-offset route is looking
pretty complex.  If you have the kernel allocate the offsets, but
perhaps with the guarantee that subsequent allocations will get
consecutive offsets, then that lets usermode set up a group of pages
which can be mapped with a single mmap call, which is all I was really
aiming for.


>>> While this isn't hard, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR only exists in
>>> order to relieve userspace of the need to track its mappings, so this
>>> seems to have been a concern before.
>> It would be nice to have them symmetric.  However, its easy to implement
>> GET_OFFSET_FOR_VADDR either way - given a vaddr, you can look up the vma
>> and return its pgoff.
>>
>> It looks like GET_OFFSET_FOR_VADDR is just used in xc_gnttab_munmap() so
>> that libxc can recover the offset and the page count from the vaddr, so
>> that it can pass them to IOCTL_GNTDEV_UNMAP_GRANT_REF.
>>
>> Also, it seems to fail unmaps which don't exactly correspond to a
>> MAP_GRANT_REF.  I guess that's OK, but it looks a bit strange.
> So, implementing an IOCTL_GNTALLOC_GET_OFFSET_FOR_VADDR would be useful in
> order to allow gntalloc munmap() to be similar to gnttab's. If we want to
> allow a given offset to be mapped to multiple domains, we couldn't just
> return the offset; it would have to be a list of grant references, and
> the destroy ioctl would need to take the grant reference.

See below.

>>> Another use case of gntalloc that may prove useful is to have more than
>>> one application able to map the same grant within the kernel.
>> So you mean have gntalloc allocate one page and the allow multiple
>> processes to map and use it?  In that case it would probably be best
>> implemented as a filesystem, so you can give proper globally visible
>> names to the granted regions, and mmap them as normal files, like shm.
> That seems like a better way to expose this functionality. I didn't have
> a use case for multiple processes mapping a grant, just didn't want to
> prevent doing it in the future if it was a trivial change. Since it's
> more complex to implement a filesystem, I think someone needs to find a
> use for it before it's written. I believe the current code lets you map
> the areas in multiple processes if you pass the file descriptor around
> with fork() or using unix sockets; that seems sufficient to me.

That raises another quirk in the gntdev (which I think also applies to
gntalloc) API - the relationship between munmap and
IOCTL_GNTDEV_UNMAP_GRANT_REF.  The current behaviour is that the ioctl
will fail with EBUSY if there are still mappings of the granted pages. 
It would probably be better to make the map refcounted by the number of
vmas+1 pointing to it so that the UNMAP_GRANT_REF would drop a
reference, as would each vma as its unmapped, with the actual ungranting
happening at ref==0.  That would allow multiple uncoordinated processes
to use the same mappings without having to work out who's doing the cleanup.

This would also allow auto-ungranting maps (xc_gnttab_map_grant_ref()
could do the UNMAP_GRANT_REF immediately after the mmap(), so that
xc_gnttab_munmap() can simply munmap() without the need for
GET_OFFSET_FOR_VADDR).

> Anyway, you don't have to call mmap() to let another domain access the shared
> pages; they are mappable as soon as the ioctl() returns, and remain so
> until you call the removal ioctl(). So if you do call mmap(), you probably
> expect to use the mapping.

Yep.

    J

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-16  1:05           ` Jeremy Fitzhardinge
@ 2010-12-16 15:22             ` Daniel De Graaf
  2010-12-16 19:14               ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2010-12-16 15:22 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Ian.Campbell

On 12/15/2010 08:05 PM, Jeremy Fitzhardinge wrote:
> On 12/15/2010 06:18 AM, Daniel De Graaf wrote:
>> On 12/14/2010 05:40 PM, Jeremy Fitzhardinge wrote:
>>> On 12/14/2010 02:06 PM, Daniel De Graaf wrote:
>>>>>> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
>>>>>> +{
>>>>>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>>>>>> +	struct gntalloc_gref *gref;
>>>>>> +
>>>>>> +	if (debug)
>>>>>> +		printk("%s: priv %p, page %lu\n", __func__,
>>>>>> +		       priv, vma->vm_pgoff);
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * There is a 1-to-1 correspondence of grant references to shared
>>>>>> +	 * pages, so it only makes sense to map exactly one page per
>>>>>> +	 * call to mmap().
>>>>>> +	 */
>>>>> Single-page mmap makes sense if the only possible use-cases are for
>>>>> single-page mappings, but if you're talking about framebuffers and the
>>>>> like is seems like a very awkward way to use mmap.  It would be cleaner
>>>>> from an API perspective to have a user-mode defined flat address space
>>>>> indexed by pgoff which maps to an array of grefs, so you can sensibly do
>>>>> a multi-page mapping.
>>>>>
>>>>> It would also allow you to hide the grefs from usermode entirely.  Then
>>>>> its just up to usermode to choose suitable file offsets for itself.
>>>> I considered this, but wanted to keep userspace compatability with the
>>>> previously created interface.
>>> Is that private to you, or something in broader use?
>> This module was used as part of Qubes (http://www.qubes-os.org). The device
>> path has changed (/dev/gntalloc to /dev/xen/gntalloc), and the API change
>> adds useful functionality, so I don't think we must keep compatibility. This
>> will also allow cleaning up the interface to remove parameters that make no
>> sense (owner_domid, for example).
> 
> Ah, right.  Well that means it has at least been prototyped, but I don't
> think we should be constrained by the original ABI if we can make clear
> improvements.
> 
>>>>  If there's no reason to avoid doing so, I'll
>>>> change the ioctl interface to allocate an array of grants and calculate the
>>>> offset similar to how gntdev does currently (picks a suitable open slot).
>>> I guess there's three options: you could get the kernel to allocate
>>> extents, make usermode do it, or have one fd per extent and always start
>>> from offset 0.  I guess the last could get very messy if you want to
>>> have lots of mappings...  Making usermode define the offsets seems
>>> simplest and most flexible, because then they can stitch together the
>>> file-offset space in any way that's convenient to them (you just need to
>>> deal with overlaps in that space).
>> Would it be useful to also give userspace control over the offsets in gntdev?
>>
>> One argument for doing it in the kernel is to avoid needing to track what
>> offsets are already being used (and then having the kernel re-check that).
> 
> Hm, yeah, that could be a bit fiddly.  I guess you'd need to stick them
> into an rbtree or something.

Another option that provides more flexibility - have a flag in the create
operation, similar to MAP_FIXED in mmap, that allows userspace to mandate
the offset if it wants control, but default to letting the kernel handle it.
We already have a flags field for making the grant writable, this is just
another bit.

>> While this isn't hard, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR only exists in
>> order to relieve userspace of the need to track its mappings, so this
>> seems to have been a concern before.
> 
> It would be nice to have them symmetric.  However, its easy to implement
> GET_OFFSET_FOR_VADDR either way - given a vaddr, you can look up the vma
> and return its pgoff.
> 
> It looks like GET_OFFSET_FOR_VADDR is just used in xc_gnttab_munmap() so
> that libxc can recover the offset and the page count from the vaddr, so
> that it can pass them to IOCTL_GNTDEV_UNMAP_GRANT_REF.
> 
> Also, it seems to fail unmaps which don't exactly correspond to a
> MAP_GRANT_REF.  I guess that's OK, but it looks a bit strange.

So, implementing an IOCTL_GNTALLOC_GET_OFFSET_FOR_VADDR would be useful in
order to allow gntalloc munmap() to be similar to gnttab's. If we want to
allow a given offset to be mapped to multiple domains, we couldn't just
return the offset; it would have to be a list of grant references, and
the destroy ioctl would need to take the grant reference.

>> Another use case of gntalloc that may prove useful is to have more than
>> one application able to map the same grant within the kernel.
> 
> So you mean have gntalloc allocate one page and the allow multiple
> processes to map and use it?  In that case it would probably be best
> implemented as a filesystem, so you can give proper globally visible
> names to the granted regions, and mmap them as normal files, like shm.

That seems like a better way to expose this functionality. I didn't have
a use case for multiple processes mapping a grant, just didn't want to
prevent doing it in the future if it was a trivial change. Since it's
more complex to implement a filesystem, I think someone needs to find a
use for it before it's written. I believe the current code lets you map
the areas in multiple processes if you pass the file descriptor around
with fork() or using unix sockets; that seems sufficient to me.
 
>> Agreed; once mapped, the frame numbers (GFN & MFN) won't change until
>> they are unmapped, so pre-populating them will be better.
> 
> Unless of course you don't want to map the pages in dom0 at all; if you
> just want dom0 to be a facilitator for shared pages between two other
> domains.  Does Xen allow a page to be granted to more than one domain at
> once?

I think so; you'd have to use multiple grant table entries in order to do
that, and it might trigger some hypervisor warnings when the shared page has
an unexpectedly high refcount when being unmapped (HVM mappings already
trigger such warnings, so perhaps they need to be removed). I can't think
of an immediate use for a 3-domain shared page, but that doesn't mean that
such a use doesn't exist. Perhaps some kind of shared page cache, exported
using read-only grants by dom0?

Having dom0 create a pair of grant table entries to let two domUs
communicate seems like a hack to get around the lack of gntalloc in either
domU. This module works perfectly fine in a domU, and the use of a shared
page must be asymmetric no matter how you set it up, so you don't save all
that much code by making the setup identical.

Anyway, you don't have to call mmap() to let another domain access the shared
pages; they are mappable as soon as the ioctl() returns, and remain so
until you call the removal ioctl(). So if you do call mmap(), you probably
expect to use the mapping.

-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-15 14:18         ` Daniel De Graaf
@ 2010-12-16  1:05           ` Jeremy Fitzhardinge
  2010-12-16 15:22             ` Daniel De Graaf
  0 siblings, 1 reply; 29+ messages in thread
From: Jeremy Fitzhardinge @ 2010-12-16  1:05 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: xen-devel, Ian.Campbell

On 12/15/2010 06:18 AM, Daniel De Graaf wrote:
> On 12/14/2010 05:40 PM, Jeremy Fitzhardinge wrote:
>> On 12/14/2010 02:06 PM, Daniel De Graaf wrote:
>>>>> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
>>>>> +{
>>>>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>>>>> +	struct gntalloc_gref *gref;
>>>>> +
>>>>> +	if (debug)
>>>>> +		printk("%s: priv %p, page %lu\n", __func__,
>>>>> +		       priv, vma->vm_pgoff);
>>>>> +
>>>>> +	/*
>>>>> +	 * There is a 1-to-1 correspondence of grant references to shared
>>>>> +	 * pages, so it only makes sense to map exactly one page per
>>>>> +	 * call to mmap().
>>>>> +	 */
>>>> Single-page mmap makes sense if the only possible use-cases are for
>>>> single-page mappings, but if you're talking about framebuffers and the
>>>> like is seems like a very awkward way to use mmap.  It would be cleaner
>>>> from an API perspective to have a user-mode defined flat address space
>>>> indexed by pgoff which maps to an array of grefs, so you can sensibly do
>>>> a multi-page mapping.
>>>>
>>>> It would also allow you to hide the grefs from usermode entirely.  Then
>>>> its just up to usermode to choose suitable file offsets for itself.
>>> I considered this, but wanted to keep userspace compatability with the
>>> previously created interface.
>> Is that private to you, or something in broader use?
> This module was used as part of Qubes (http://www.qubes-os.org). The device
> path has changed (/dev/gntalloc to /dev/xen/gntalloc), and the API change
> adds useful functionality, so I don't think we must keep compatibility. This
> will also allow cleaning up the interface to remove parameters that make no
> sense (owner_domid, for example).

Ah, right.  Well that means it has at least been prototyped, but I don't
think we should be constrained by the original ABI if we can make clear
improvements.

>>>  If there's no reason to avoid doing so, I'll
>>> change the ioctl interface to allocate an array of grants and calculate the
>>> offset similar to how gntdev does currently (picks a suitable open slot).
>> I guess there's three options: you could get the kernel to allocate
>> extents, make usermode do it, or have one fd per extent and always start
>> from offset 0.  I guess the last could get very messy if you want to
>> have lots of mappings...  Making usermode define the offsets seems
>> simplest and most flexible, because then they can stitch together the
>> file-offset space in any way that's convenient to them (you just need to
>> deal with overlaps in that space).
> Would it be useful to also give userspace control over the offsets in gntdev?
>
> One argument for doing it in the kernel is to avoid needing to track what
> offsets are already being used (and then having the kernel re-check that).

Hm, yeah, that could be a bit fiddly.  I guess you'd need to stick them
into an rbtree or something.

> While this isn't hard, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR only exists in
> order to relieve userspace of the need to track its mappings, so this
> seems to have been a concern before.

It would be nice to have them symmetric.  However, its easy to implement
GET_OFFSET_FOR_VADDR either way - given a vaddr, you can look up the vma
and return its pgoff.

It looks like GET_OFFSET_FOR_VADDR is just used in xc_gnttab_munmap() so
that libxc can recover the offset and the page count from the vaddr, so
that it can pass them to IOCTL_GNTDEV_UNMAP_GRANT_REF.

Also, it seems to fail unmaps which don't exactly correspond to a
MAP_GRANT_REF.  I guess that's OK, but it looks a bit strange.

> Another use case of gntalloc that may prove useful is to have more than
> one application able to map the same grant within the kernel.

So you mean have gntalloc allocate one page and the allow multiple
processes to map and use it?  In that case it would probably be best
implemented as a filesystem, so you can give proper globally visible
names to the granted regions, and mmap them as normal files, like shm.

> Agreed; once mapped, the frame numbers (GFN & MFN) won't change until
> they are unmapped, so pre-populating them will be better.

Unless of course you don't want to map the pages in dom0 at all; if you
just want dom0 to be a facilitator for shared pages between two other
domains.  Does Xen allow a page to be granted to more than one domain at
once?

    J

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-14 22:40       ` Jeremy Fitzhardinge
@ 2010-12-15 14:18         ` Daniel De Graaf
  2010-12-16  1:05           ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2010-12-15 14:18 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Ian.Campbell

On 12/14/2010 05:40 PM, Jeremy Fitzhardinge wrote:
> On 12/14/2010 02:06 PM, Daniel De Graaf wrote:
>>>> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
>>>> +{
>>>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>>>> +	struct gntalloc_gref *gref;
>>>> +
>>>> +	if (debug)
>>>> +		printk("%s: priv %p, page %lu\n", __func__,
>>>> +		       priv, vma->vm_pgoff);
>>>> +
>>>> +	/*
>>>> +	 * There is a 1-to-1 correspondence of grant references to shared
>>>> +	 * pages, so it only makes sense to map exactly one page per
>>>> +	 * call to mmap().
>>>> +	 */
>>> Single-page mmap makes sense if the only possible use-cases are for
>>> single-page mappings, but if you're talking about framebuffers and the
>>> like is seems like a very awkward way to use mmap.  It would be cleaner
>>> from an API perspective to have a user-mode defined flat address space
>>> indexed by pgoff which maps to an array of grefs, so you can sensibly do
>>> a multi-page mapping.
>>>
>>> It would also allow you to hide the grefs from usermode entirely.  Then
>>> its just up to usermode to choose suitable file offsets for itself.
>> I considered this, but wanted to keep userspace compatability with the
>> previously created interface.
> 
> Is that private to you, or something in broader use?

This module was used as part of Qubes (http://www.qubes-os.org). The device
path has changed (/dev/gntalloc to /dev/xen/gntalloc), and the API change
adds useful functionality, so I don't think we must keep compatibility. This
will also allow cleaning up the interface to remove parameters that make no
sense (owner_domid, for example).

>>  If there's no reason to avoid doing so, I'll
>> change the ioctl interface to allocate an array of grants and calculate the
>> offset similar to how gntdev does currently (picks a suitable open slot).
> 
> I guess there's three options: you could get the kernel to allocate
> extents, make usermode do it, or have one fd per extent and always start
> from offset 0.  I guess the last could get very messy if you want to
> have lots of mappings...  Making usermode define the offsets seems
> simplest and most flexible, because then they can stitch together the
> file-offset space in any way that's convenient to them (you just need to
> deal with overlaps in that space).

Would it be useful to also give userspace control over the offsets in gntdev?

One argument for doing it in the kernel is to avoid needing to track what
offsets are already being used (and then having the kernel re-check that).
While this isn't hard, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR only exists in
order to relieve userspace of the need to track its mappings, so this
seems to have been a concern before.

Another use case of gntalloc that may prove useful is to have more than
one application able to map the same grant within the kernel. For gntdev,
this can be done by mapping the pages multiple times (although that may
not be the most efficient). Moving the gntalloc lists out of the file
private data would allow any user of gntalloc to map any shared page. The
primary reason not to do this is that it prevents automatic cleanup of
allocated pages on close (which is important when the userspace app doing
the mapping crashes).

>> Userspace does still have to know about grefs, of course, but only to pass
>> to the domain doing the mapping, not for its own mmap().
> 
> Ah, yes, of course.
> 
>>>> +
>>>> +	/* This flag prevents Bad PTE errors when the memory is unmapped. */
>>>> +	vma->vm_flags |= VM_RESERVED;
>>>> +	vma->vm_flags |= VM_DONTCOPY;
>>>> +	vma->vm_flags |= VM_IO;
>>> If you set VM_PFNMAP then you don't need to deal with faults.
>> Useful to know. Is that more efficient/preferred to defining a
>> .fault handler? I used this method because that's what is used in
>> kernel/relay.c.
> 
> Well, as you currently have it, your mmap() function doesn't map
> anything, so you're relying on demand faulting to populate the ptes. 
> Since you've already allocated the pages that's just a soft fault, but
> it means you end up with a lot of per-page entries into the hypervisor.
> 
> If you make mmap pre-populate all the ptes (a nice big fat batch
> operation), then you should never get faults on the vma.  You can set
> PFNMAP to make sure of that (since you're already setting all the
> "woowoo vma" flags, that makes sense).
> 
> Its actual meaning is "this vma contains pages which are not really
> kernel memory, so paging them doesn't make sense" - ie, device or
> foreign memory (we use it in gntdev).
> 
> In this case, the pages are normal kernel pages but they're being given
> over to a "device" and so are no longer subject to normal kernel
> lifetime rules.  So I think PFNMAP makes sense in this case too.
> 
> 
>     J
> 

Agreed; once mapped, the frame numbers (GFN & MFN) won't change until
they are unmapped, so pre-populating them will be better.


-- 
Daniel De Graaf
National Security Agency

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-14 22:06     ` Daniel De Graaf
@ 2010-12-14 22:40       ` Jeremy Fitzhardinge
  2010-12-15 14:18         ` Daniel De Graaf
  0 siblings, 1 reply; 29+ messages in thread
From: Jeremy Fitzhardinge @ 2010-12-14 22:40 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: xen-devel, Ian.Campbell

On 12/14/2010 02:06 PM, Daniel De Graaf wrote:
>>> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
>>> +{
>>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>>> +	struct gntalloc_gref *gref;
>>> +
>>> +	if (debug)
>>> +		printk("%s: priv %p, page %lu\n", __func__,
>>> +		       priv, vma->vm_pgoff);
>>> +
>>> +	/*
>>> +	 * There is a 1-to-1 correspondence of grant references to shared
>>> +	 * pages, so it only makes sense to map exactly one page per
>>> +	 * call to mmap().
>>> +	 */
>> Single-page mmap makes sense if the only possible use-cases are for
>> single-page mappings, but if you're talking about framebuffers and the
>> like is seems like a very awkward way to use mmap.  It would be cleaner
>> from an API perspective to have a user-mode defined flat address space
>> indexed by pgoff which maps to an array of grefs, so you can sensibly do
>> a multi-page mapping.
>>
>> It would also allow you to hide the grefs from usermode entirely.  Then
>> its just up to usermode to choose suitable file offsets for itself.
> I considered this, but wanted to keep userspace compatability with the
> previously created interface.

Is that private to you, or something in broader use?

>  If there's no reason to avoid doing so, I'll
> change the ioctl interface to allocate an array of grants and calculate the
> offset similar to how gntdev does currently (picks a suitable open slot).

I guess there's three options: you could get the kernel to allocate
extents, make usermode do it, or have one fd per extent and always start
from offset 0.  I guess the last could get very messy if you want to
have lots of mappings...  Making usermode define the offsets seems
simplest and most flexible, because then they can stitch together the
file-offset space in any way that's convenient to them (you just need to
deal with overlaps in that space).

> Userspace does still have to know about grefs, of course, but only to pass
> to the domain doing the mapping, not for its own mmap().

Ah, yes, of course.

>>> +	if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1) {
>>> +		printk(KERN_ERR "%s: Only one page can be memory-mapped "
>>> +			"per grant reference.\n", __func__);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	if (!(vma->vm_flags & VM_SHARED)) {
>>> +		printk(KERN_ERR "%s: Mapping must be shared.\n",
>>> +			__func__);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	spin_lock(&gref_lock);
>>> +	gref = find_gref(priv, vma->vm_pgoff);
>>> +	if (gref == NULL) {
>>> +		spin_unlock(&gref_lock);
>>> +		printk(KERN_ERR "%s: Could not find a grant reference with "
>>> +			"page index %lu.\n", __func__, vma->vm_pgoff);
>>> +		return -ENOENT;
>>> +	}
>>> +	gref->users++;
>>> +	spin_unlock(&gref_lock);
>>> +
>>> +	vma->vm_private_data = gref;
>>> +
>>> +	/* This flag prevents Bad PTE errors when the memory is unmapped. */
>>> +	vma->vm_flags |= VM_RESERVED;
>>> +	vma->vm_flags |= VM_DONTCOPY;
>>> +	vma->vm_flags |= VM_IO;
>> If you set VM_PFNMAP then you don't need to deal with faults.
> Useful to know. Is that more efficient/preferred to defining a
> .fault handler? I used this method because that's what is used in
> kernel/relay.c.

Well, as you currently have it, your mmap() function doesn't map
anything, so you're relying on demand faulting to populate the ptes. 
Since you've already allocated the pages that's just a soft fault, but
it means you end up with a lot of per-page entries into the hypervisor.

If you make mmap pre-populate all the ptes (a nice big fat batch
operation), then you should never get faults on the vma.  You can set
PFNMAP to make sure of that (since you're already setting all the
"woowoo vma" flags, that makes sense).

Its actual meaning is "this vma contains pages which are not really
kernel memory, so paging them doesn't make sense" - ie, device or
foreign memory (we use it in gntdev).

In this case, the pages are normal kernel pages but they're being given
over to a "device" and so are no longer subject to normal kernel
lifetime rules.  So I think PFNMAP makes sense in this case too.


    J

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-14 21:42   ` Jeremy Fitzhardinge
@ 2010-12-14 22:06     ` Daniel De Graaf
  2010-12-14 22:40       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2010-12-14 22:06 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel, Ian.Campbell

On 12/14/2010 04:42 PM, Jeremy Fitzhardinge wrote:
> On 12/14/2010 06:55 AM, Daniel De Graaf wrote:
>> This allows a userspace application to allocate a shared page for
>> implementing inter-domain communication or device drivers. These
>> shared pages can be mapped using the gntdev device or by the kernel
>> in another domain.
>>
>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>> ---
>>  drivers/xen/Kconfig    |    7 +
>>  drivers/xen/Makefile   |    2 +
>>  drivers/xen/gntalloc.c |  456 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/xen/gntalloc.h |   68 +++++++
>>  4 files changed, 533 insertions(+), 0 deletions(-)
>>  create mode 100644 drivers/xen/gntalloc.c
>>  create mode 100644 include/xen/gntalloc.h
>>
>> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
>> index fa9982e..8398cb0 100644
>> --- a/drivers/xen/Kconfig
>> +++ b/drivers/xen/Kconfig
>> @@ -180,6 +180,13 @@ config XEN_GNTDEV
>>  	help
>>  	  Allows userspace processes use grants.
>>  
>> +config XEN_GRANT_DEV_ALLOC
>> +	tristate "User-space grant reference allocator driver"
>> +	depends on XEN
>> +	help
>> +	  Allows userspace processes to create pages with access granted
>> +	  to other domains.
>> +
>>  config XEN_S3
>>         def_bool y
>>         depends on XEN_DOM0 && ACPI
>> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
>> index ef1ea63..9814c1d 100644
>> --- a/drivers/xen/Makefile
>> +++ b/drivers/xen/Makefile
>> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
>>  obj-$(CONFIG_XEN_BALLOON)		+= balloon.o
>>  obj-$(CONFIG_XEN_DEV_EVTCHN)		+= xen-evtchn.o
>>  obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
>> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
>>  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
>>  obj-$(CONFIG_XEN_BLKDEV_TAP)            += blktap/
>> @@ -25,3 +26,4 @@ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
>>  
>>  xen-evtchn-y				:= evtchn.o
>>  xen-gntdev-y				:= gntdev.o
>> +xen-gntalloc-y				:= gntalloc.o
>> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
>> new file mode 100644
>> index 0000000..f26adfd
>> --- /dev/null
>> +++ b/drivers/xen/gntalloc.c
>> @@ -0,0 +1,456 @@
>> +/******************************************************************************
>> + * gntalloc.c
>> + *
>> + * Device for creating grant references (in user-space) that may be shared
>> + * with other domains.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>> + */
>> +
>> +/*
>> + * This driver exists to allow userspace programs in Linux to allocate kernel
>> + * memory that will later be shared with another domain.  Without this device,
>> + * Linux userspace programs cannot create grant references.
>> + *
>> + * How this stuff works:
>> + *   X -> granting a page to Y
>> + *   Y -> mapping the grant from X
>> + *
>> + *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
>> + *   2. X creates an entry in the grant table that says domid(Y) can
>> + *      access P.
>> + *   3. X gives the grant reference identifier, GREF, to Y.
>> + *   4. A program in Y uses the gntdev device to map the page (owned by X
>> + *      and identified by GREF) into domain(Y) and then into the address
>> + *      space of the program.  Behind the scenes, this requires a
>> + *      hypercall in which Xen modifies the host CPU page tables to
>> + *      perform the sharing -- that's where the actual cross-domain mapping
>> + *      occurs.
> 
> Presumably Y could be any grant-page user, not specifically gntdev?  So
> you could use this to implement a frontend in userspace?

Yes, this is correct; the use of gntdev here was intended as an example,
not as the only user

>> + *   5. A program in X mmap()s a segment of the gntalloc device that
>> + *      corresponds to the shared page.
>> + *   6. The two userspace programs can now communicate over the shared page.
>> + *
>> + *
>> + * NOTE TO USERSPACE LIBRARIES:
>> + *   The grant allocation and mmap()ing are, naturally, two separate
>> + *   operations.  You set up the sharing by calling the create ioctl() and
>> + *   then the mmap().  You must tear down the sharing in the reverse order
>> + *   (munmap() and then the destroy ioctl()).
>> + *
>> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
>> + * reference, this device can be used to consume kernel memory by leaving grant
>> + * references mapped by another domain when an application exits. Therefore,
>> + * there is a global limit on the number of pages that can be allocated. When
>> + * all references to the page are unmapped, it will be freed during the next
>> + * grant operation.
>> + */
>> +
>> +#include <asm/atomic.h>
>> +#include <linux/module.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/kernel.h>
>> +#include <linux/init.h>
>> +#include <linux/slab.h>
>> +#include <linux/fs.h>
>> +#include <linux/device.h>
>> +#include <linux/mm.h>
>> +#include <asm/uaccess.h>
>> +#include <linux/types.h>
>> +#include <linux/list.h>
>> +
>> +#include <xen/xen.h>
>> +#include <xen/page.h>
>> +#include <xen/grant_table.h>
>> +#include <xen/gntalloc.h>
>> +
>> +static int debug = 0;
>> +module_param(debug, int, 0644);
>> +
>> +static int limit = 1024;
>> +module_param(limit, int, 0644);
>> +
>> +static LIST_HEAD(gref_list);
>> +static DEFINE_SPINLOCK(gref_lock);
>> +static int gref_size = 0;
>> +
>> +/* Metadata on a grant reference. */
>> +struct gntalloc_gref {
>> +	struct list_head next_all;   /* list entry gref_list */
>> +	struct list_head next_file;  /* list entry file->list, if open */
>> +	domid_t foreign_domid;	     /* The ID of the domain to share with. */
>> +	grant_ref_t gref_id;         /* The grant reference number. */
>> +	unsigned int users;          /* Use count - when zero, waiting on Xen */
>> +	struct page* page;	     /* The shared page. */
>> +};
>> +
>> +struct gntalloc_file_private_data {
>> +	struct list_head list;
>> +};
>> +
>> +static void __del_gref(struct gntalloc_gref *gref);
>> +
>> +static void do_cleanup(void)
>> +{
>> +	struct gntalloc_gref *gref, *n;
>> +	list_for_each_entry_safe(gref, n, &gref_list, next_all) {
>> +		if (!gref->users)
>> +			__del_gref(gref);
>> +	}
>> +}
>> +
>> +
>> +static int add_gref(domid_t foreign_domid, uint32_t readonly,
>> +	struct gntalloc_file_private_data *priv)
>> +{
>> +	int rc;
>> +	struct gntalloc_gref *gref;
>> +
>> +	rc = -ENOMEM;
>> +	spin_lock(&gref_lock);
>> +	do_cleanup();
>> +	if (gref_size >= limit) {
>> +		spin_unlock(&gref_lock);
>> +		rc = -ENOSPC;
>> +		goto out;
>> +	}
>> +	gref_size++;
>> +	spin_unlock(&gref_lock);
>> +
>> +	gref = kzalloc(sizeof(*gref), GFP_KERNEL);
>> +	if (!gref)
>> +		goto out;
>> +
>> +	gref->foreign_domid = foreign_domid;
>> +	gref->users = 1;
>> +
>> +	/* Allocate the page to share. */
>> +	gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> 
> Could this be GFP_HIGHUSER?

Yes, that's probably a better flag to use here.
 
>> +	if (!gref->page)
>> +		goto out_nopage;
>> +
>> +	/* Grant foreign access to the page. */
>> +	gref->gref_id = gnttab_grant_foreign_access(foreign_domid,
>> +		pfn_to_mfn(page_to_pfn(gref->page)), readonly);
>> +	if (gref->gref_id < 0) {
>> +		printk(KERN_ERR "%s: failed to grant foreign access for mfn "
>> +			"%lu to domain %u\n", __func__,
>> +			pfn_to_mfn(page_to_pfn(gref->page)), foreign_domid);
>> +		rc = -EFAULT;
>> +		goto out_no_foreign_gref;
>> +	}
>> +
>> +	/* Add to gref lists. */
>> +	spin_lock(&gref_lock);
>> +	list_add_tail(&gref->next_all, &gref_list);
>> +	list_add_tail(&gref->next_file, &priv->list);
>> +	spin_unlock(&gref_lock);
>> +
>> +	return gref->gref_id;
>> +
>> +out_no_foreign_gref:
>> +	__free_page(gref->page);
>> +out_nopage:
>> +	kfree(gref);
>> +out:
>> +	return rc;
>> +}
>> +
>> +static void __del_gref(struct gntalloc_gref *gref)
>> +{
>> +	if (gnttab_query_foreign_access(gref->gref_id))
>> +		return;
>> +
>> +	if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
>> +		return;
>> +
>> +	gref_size--;
>> +	list_del(&gref->next_all);
>> +
>> +	__free_page(gref->page);
>> +	kfree(gref);
>> +}
>> +
>> +static struct gntalloc_gref* find_gref(struct gntalloc_file_private_data *priv,
>> +                                       grant_ref_t gref_id)
>> +{
>> +	struct gntalloc_gref *gref;
>> +	list_for_each_entry(gref, &priv->list, next_file) {
>> +		if (gref->gref_id == gref_id)
>> +			return gref;
>> +	}
>> +	return NULL;
>> +}
>> +
>> +/*
>> + * -------------------------------------
>> + *  File operations.
>> + * -------------------------------------
>> + */
>> +static int gntalloc_open(struct inode *inode, struct file *filp)
>> +{
>> +	struct gntalloc_file_private_data *priv;
>> +
>> +	try_module_get(THIS_MODULE);
>> +
>> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>> +	if (!priv)
>> +		goto out_nomem;
>> +	INIT_LIST_HEAD(&priv->list);
>> +
>> +	filp->private_data = priv;
>> +
>> +	if (debug)
>> +		printk("%s: priv %p\n", __FUNCTION__, priv);
>> +
>> +	return 0;
>> +
>> +out_nomem:
>> +	return -ENOMEM;
>> +}
>> +
>> +static int gntalloc_release(struct inode *inode, struct file *filp)
>> +{
>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>> +	struct gntalloc_gref *gref;
>> +
>> +	if (debug)
>> +		printk("%s: priv %p\n", __FUNCTION__, priv);
>> +
>> +	spin_lock(&gref_lock);
> 
> Presumably this is unnecessary because there can be no other users if
> you're tearing down the list and destroying priv.
> 
>> +	while (!list_empty(&priv->list)) {
>> +		gref = list_entry(priv->list.next,
>> +			struct gntalloc_gref, next_file);
>> +		list_del(&gref->next_file);
>> +		gref->users--;
>> +		if (gref->users == 0)
>> +			__del_gref(gref);
>> +	}
>> +	kfree(priv);
>> +	spin_unlock(&gref_lock);
>> +
>> +	module_put(THIS_MODULE);
>> +
>> +	return 0;
>> +}
>> +
>> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
>> +                                 void __user *arg)
>> +{
>> +	int rc = 0;
>> +	struct ioctl_gntalloc_alloc_gref op;
>> +
>> +	if (debug)
>> +		printk("%s: priv %p\n", __FUNCTION__, priv);
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto alloc_grant_out;
>> +	}
>> +	rc = add_gref(op.foreign_domid, op.readonly, priv);
>> +	if (rc < 0)
>> +		goto alloc_grant_out;
>> +
>> +	op.gref_id = rc;
>> +	op.page_idx = rc;
> 
> Hm, see below.
> 
>> +
>> +	rc = 0;
>> +
>> +	if (copy_to_user((void __user *)arg, &op, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto alloc_grant_out;
>> +	}
>> +
>> +alloc_grant_out:
>> +	return rc;
>> +}
>> +
>> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
>> +                                   void __user *arg)
>> +{
>> +	int rc = 0;
>> +	struct ioctl_gntalloc_dealloc_gref op;
>> +	struct gntalloc_gref *gref;
>> +
>> +	if (debug)
>> +		printk("%s: priv %p\n", __FUNCTION__, priv);
>> +
>> +	if (copy_from_user(&op, arg, sizeof(op))) {
>> +		rc = -EFAULT;
>> +		goto dealloc_grant_out;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	gref = find_gref(priv, op.gref_id);
>> +	if (gref) {
>> +		list_del(&gref->next_file);
>> +		gref->users--;
>> +		rc = 0;
>> +	} else {
>> +		rc = -EINVAL;
>> +	}
>> +
>> +	do_cleanup();
>> +	spin_unlock(&gref_lock);
>> +dealloc_grant_out:
>> +	return rc;
>> +}
>> +
>> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
>> +		unsigned long arg)
>> +{
>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>> +
>> +	switch (cmd) {
>> +	case IOCTL_GNTALLOC_ALLOC_GREF:
>> +		return gntalloc_ioctl_alloc(priv, (void __user*)arg);
>> +
>> +	case IOCTL_GNTALLOC_DEALLOC_GREF:
>> +		return gntalloc_ioctl_dealloc(priv, (void __user*)arg);
>> +
>> +	default:
>> +		return -ENOIOCTLCMD;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int gntalloc_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>> +{
>> +	struct gntalloc_gref *gref = vma->vm_private_data;
>> +	if (!gref)
>> +		return VM_FAULT_SIGBUS;
>> +
>> +	vmf->page = gref->page;
>> +	get_page(vmf->page);
>> +
>> +	return 0;
>> +};
>> +
>> +static void gntalloc_vma_close(struct vm_area_struct *vma)
>> +{
>> +	struct gntalloc_gref *gref = vma->vm_private_data;
>> +	if (!gref)
>> +		return;
>> +
>> +	spin_lock(&gref_lock);
>> +	gref->users--;
>> +	if (gref->users == 0)
>> +		__del_gref(gref);
>> +	spin_unlock(&gref_lock);
>> +}
>> +
>> +static struct vm_operations_struct gntalloc_vmops = {
>> +	.fault = gntalloc_vma_fault,
>> +	.close = gntalloc_vma_close,
>> +};
>> +
>> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
>> +{
>> +	struct gntalloc_file_private_data *priv = filp->private_data;
>> +	struct gntalloc_gref *gref;
>> +
>> +	if (debug)
>> +		printk("%s: priv %p, page %lu\n", __func__,
>> +		       priv, vma->vm_pgoff);
>> +
>> +	/*
>> +	 * There is a 1-to-1 correspondence of grant references to shared
>> +	 * pages, so it only makes sense to map exactly one page per
>> +	 * call to mmap().
>> +	 */
> 
> Single-page mmap makes sense if the only possible use-cases are for
> single-page mappings, but if you're talking about framebuffers and the
> like is seems like a very awkward way to use mmap.  It would be cleaner
> from an API perspective to have a user-mode defined flat address space
> indexed by pgoff which maps to an array of grefs, so you can sensibly do
> a multi-page mapping.
> 
> It would also allow you to hide the grefs from usermode entirely.  Then
> its just up to usermode to choose suitable file offsets for itself.

I considered this, but wanted to keep userspace compatability with the
previously created interface. If there's no reason to avoid doing so, I'll
change the ioctl interface to allocate an array of grants and calculate the
offset similar to how gntdev does currently (picks a suitable open slot).
Userspace does still have to know about grefs, of course, but only to pass
to the domain doing the mapping, not for its own mmap().

>> +	if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1) {
>> +		printk(KERN_ERR "%s: Only one page can be memory-mapped "
>> +			"per grant reference.\n", __func__);
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (!(vma->vm_flags & VM_SHARED)) {
>> +		printk(KERN_ERR "%s: Mapping must be shared.\n",
>> +			__func__);
>> +		return -EINVAL;
>> +	}
>> +
>> +	spin_lock(&gref_lock);
>> +	gref = find_gref(priv, vma->vm_pgoff);
>> +	if (gref == NULL) {
>> +		spin_unlock(&gref_lock);
>> +		printk(KERN_ERR "%s: Could not find a grant reference with "
>> +			"page index %lu.\n", __func__, vma->vm_pgoff);
>> +		return -ENOENT;
>> +	}
>> +	gref->users++;
>> +	spin_unlock(&gref_lock);
>> +
>> +	vma->vm_private_data = gref;
>> +
>> +	/* This flag prevents Bad PTE errors when the memory is unmapped. */
>> +	vma->vm_flags |= VM_RESERVED;
>> +	vma->vm_flags |= VM_DONTCOPY;
>> +	vma->vm_flags |= VM_IO;
> 
> If you set VM_PFNMAP then you don't need to deal with faults.

Useful to know. Is that more efficient/preferred to defining a
.fault handler? I used this method because that's what is used in
kernel/relay.c.

>> +
>> +	vma->vm_ops = &gntalloc_vmops;
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct file_operations gntalloc_fops = {
>> +	.owner = THIS_MODULE,
>> +	.open = gntalloc_open,
>> +	.release = gntalloc_release,
>> +	.unlocked_ioctl = gntalloc_ioctl,
>> +	.mmap = gntalloc_mmap
>> +};
>> +
>> +/*
>> + * -------------------------------------
>> + * Module creation/destruction.
>> + * -------------------------------------
>> + */
>> +static struct miscdevice gntalloc_miscdev = {
>> +	.minor	= MISC_DYNAMIC_MINOR,
>> +	.name	= "xen/gntalloc",
>> +	.fops	= &gntalloc_fops,
>> +};
>> +
>> +static int __init gntalloc_init(void)
>> +{
>> +	int err;
>> +
>> +	if (!xen_domain()) {
>> +		if (debug)
>> +			printk(KERN_ERR "gntalloc: You must be running Xen\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	err = misc_register(&gntalloc_miscdev);
>> +	if (err != 0) {
>> +		printk(KERN_ERR "Could not register misc gntalloc device\n");
>> +		return err;
>> +	}
>> +
>> +	if (debug)
>> +		printk(KERN_INFO "Created grant allocation device at %d,%d\n",
>> +			MISC_MAJOR, gntalloc_miscdev.minor);
>> +
>> +	return 0;
>> +}
>> +
>> +static void __exit gntalloc_exit(void)
>> +{
>> +	misc_deregister(&gntalloc_miscdev);
>> +}
>> +
>> +module_init(gntalloc_init);
>> +module_exit(gntalloc_exit);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
>> +              "Daniel De Graaf <dgdegra@tycho.nsa.gov>");
>> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
>> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
>> new file mode 100644
>> index 0000000..76b70d7
>> --- /dev/null
>> +++ b/include/xen/gntalloc.h
>> @@ -0,0 +1,68 @@
>> +/******************************************************************************
>> + * gntalloc.h
>> + *
>> + * Interface to /dev/xen/gntalloc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License version 2
>> + * as published by the Free Software Foundation; or, when distributed
>> + * separately from the Linux kernel or incorporated into other
>> + * software packages, subject to the following license:
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this source file (the "Software"), to deal in the Software without
>> + * restriction, including without limitation the rights to use, copy, modify,
>> + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
>> + * and to permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
>> +#define __LINUX_PUBLIC_GNTALLOC_H__
>> +
>> +/*
>> + * Allocates a new page and creates a new grant reference.
>> + *
>> + * N.B. The page_idx is really the address >> PAGE_SHIFT, meaning it's the
>> + * page number and not an actual address.  It must be shifted again prior
>> + * to feeding it to mmap() (i.e. page_idx << PAGE_SHIFT).
>> + */
>> +#define IOCTL_GNTALLOC_ALLOC_GREF \
>> +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntalloc_alloc_gref))
>> +struct ioctl_gntalloc_alloc_gref {
>> +	/* IN parameters */
>> +	/* The ID of the domain creating the grant reference. */
>> +	domid_t owner_domid;
>> +	/* The ID of the domain to be given access to the grant. */
>> +	domid_t foreign_domid;
>> +	/* The type of access given to domid. */
>> +	uint32_t readonly;
>> +	/* OUT parameters */
>> +	/* The grant reference of the newly created grant. */
>> +	grant_ref_t gref_id;
>> +	/* The page index (page number, NOT address) for grant mmap(). */
>> +	uint32_t page_idx;
>> +};
>> +
>> +/*
>> + * Deallocates the grant reference, freeing the associated page.
>> + */
>> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
>> +_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntalloc_dealloc_gref))
>> +struct ioctl_gntalloc_dealloc_gref {
>> +	/* IN parameter */
>> +	/* The grant reference to deallocate. */
>> +	grant_ref_t gref_id;
>> +};
>> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> 
> J
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-14 14:55 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
@ 2010-12-14 21:42   ` Jeremy Fitzhardinge
  2010-12-14 22:06     ` Daniel De Graaf
  0 siblings, 1 reply; 29+ messages in thread
From: Jeremy Fitzhardinge @ 2010-12-14 21:42 UTC (permalink / raw)
  To: Daniel De Graaf; +Cc: xen-devel, Ian.Campbell

On 12/14/2010 06:55 AM, Daniel De Graaf wrote:
> This allows a userspace application to allocate a shared page for
> implementing inter-domain communication or device drivers. These
> shared pages can be mapped using the gntdev device or by the kernel
> in another domain.
>
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
>  drivers/xen/Kconfig    |    7 +
>  drivers/xen/Makefile   |    2 +
>  drivers/xen/gntalloc.c |  456 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/xen/gntalloc.h |   68 +++++++
>  4 files changed, 533 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/xen/gntalloc.c
>  create mode 100644 include/xen/gntalloc.h
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index fa9982e..8398cb0 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -180,6 +180,13 @@ config XEN_GNTDEV
>  	help
>  	  Allows userspace processes use grants.
>  
> +config XEN_GRANT_DEV_ALLOC
> +	tristate "User-space grant reference allocator driver"
> +	depends on XEN
> +	help
> +	  Allows userspace processes to create pages with access granted
> +	  to other domains.
> +
>  config XEN_S3
>         def_bool y
>         depends on XEN_DOM0 && ACPI
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index ef1ea63..9814c1d 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
>  obj-$(CONFIG_XEN_BALLOON)		+= balloon.o
>  obj-$(CONFIG_XEN_DEV_EVTCHN)		+= xen-evtchn.o
>  obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
>  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
>  obj-$(CONFIG_XEN_BLKDEV_TAP)            += blktap/
> @@ -25,3 +26,4 @@ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
>  
>  xen-evtchn-y				:= evtchn.o
>  xen-gntdev-y				:= gntdev.o
> +xen-gntalloc-y				:= gntalloc.o
> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> new file mode 100644
> index 0000000..f26adfd
> --- /dev/null
> +++ b/drivers/xen/gntalloc.c
> @@ -0,0 +1,456 @@
> +/******************************************************************************
> + * gntalloc.c
> + *
> + * Device for creating grant references (in user-space) that may be shared
> + * with other domains.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +/*
> + * This driver exists to allow userspace programs in Linux to allocate kernel
> + * memory that will later be shared with another domain.  Without this device,
> + * Linux userspace programs cannot create grant references.
> + *
> + * How this stuff works:
> + *   X -> granting a page to Y
> + *   Y -> mapping the grant from X
> + *
> + *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
> + *   2. X creates an entry in the grant table that says domid(Y) can
> + *      access P.
> + *   3. X gives the grant reference identifier, GREF, to Y.
> + *   4. A program in Y uses the gntdev device to map the page (owned by X
> + *      and identified by GREF) into domain(Y) and then into the address
> + *      space of the program.  Behind the scenes, this requires a
> + *      hypercall in which Xen modifies the host CPU page tables to
> + *      perform the sharing -- that's where the actual cross-domain mapping
> + *      occurs.

Presumably Y could be any grant-page user, not specifically gntdev?  So
you could use this to implement a frontend in userspace?

> + *   5. A program in X mmap()s a segment of the gntalloc device that
> + *      corresponds to the shared page.
> + *   6. The two userspace programs can now communicate over the shared page.
> + *
> + *
> + * NOTE TO USERSPACE LIBRARIES:
> + *   The grant allocation and mmap()ing are, naturally, two separate
> + *   operations.  You set up the sharing by calling the create ioctl() and
> + *   then the mmap().  You must tear down the sharing in the reverse order
> + *   (munmap() and then the destroy ioctl()).
> + *
> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
> + * reference, this device can be used to consume kernel memory by leaving grant
> + * references mapped by another domain when an application exits. Therefore,
> + * there is a global limit on the number of pages that can be allocated. When
> + * all references to the page are unmapped, it will be freed during the next
> + * grant operation.
> + */
> +
> +#include <asm/atomic.h>
> +#include <linux/module.h>
> +#include <linux/miscdevice.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/device.h>
> +#include <linux/mm.h>
> +#include <asm/uaccess.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +
> +#include <xen/xen.h>
> +#include <xen/page.h>
> +#include <xen/grant_table.h>
> +#include <xen/gntalloc.h>
> +
> +static int debug = 0;
> +module_param(debug, int, 0644);
> +
> +static int limit = 1024;
> +module_param(limit, int, 0644);
> +
> +static LIST_HEAD(gref_list);
> +static DEFINE_SPINLOCK(gref_lock);
> +static int gref_size = 0;
> +
> +/* Metadata on a grant reference. */
> +struct gntalloc_gref {
> +	struct list_head next_all;   /* list entry gref_list */
> +	struct list_head next_file;  /* list entry file->list, if open */
> +	domid_t foreign_domid;	     /* The ID of the domain to share with. */
> +	grant_ref_t gref_id;         /* The grant reference number. */
> +	unsigned int users;          /* Use count - when zero, waiting on Xen */
> +	struct page* page;	     /* The shared page. */
> +};
> +
> +struct gntalloc_file_private_data {
> +	struct list_head list;
> +};
> +
> +static void __del_gref(struct gntalloc_gref *gref);
> +
> +static void do_cleanup(void)
> +{
> +	struct gntalloc_gref *gref, *n;
> +	list_for_each_entry_safe(gref, n, &gref_list, next_all) {
> +		if (!gref->users)
> +			__del_gref(gref);
> +	}
> +}
> +
> +
> +static int add_gref(domid_t foreign_domid, uint32_t readonly,
> +	struct gntalloc_file_private_data *priv)
> +{
> +	int rc;
> +	struct gntalloc_gref *gref;
> +
> +	rc = -ENOMEM;
> +	spin_lock(&gref_lock);
> +	do_cleanup();
> +	if (gref_size >= limit) {
> +		spin_unlock(&gref_lock);
> +		rc = -ENOSPC;
> +		goto out;
> +	}
> +	gref_size++;
> +	spin_unlock(&gref_lock);
> +
> +	gref = kzalloc(sizeof(*gref), GFP_KERNEL);
> +	if (!gref)
> +		goto out;
> +
> +	gref->foreign_domid = foreign_domid;
> +	gref->users = 1;
> +
> +	/* Allocate the page to share. */
> +	gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);

Could this be GFP_HIGHUSER?

> +	if (!gref->page)
> +		goto out_nopage;
> +
> +	/* Grant foreign access to the page. */
> +	gref->gref_id = gnttab_grant_foreign_access(foreign_domid,
> +		pfn_to_mfn(page_to_pfn(gref->page)), readonly);
> +	if (gref->gref_id < 0) {
> +		printk(KERN_ERR "%s: failed to grant foreign access for mfn "
> +			"%lu to domain %u\n", __func__,
> +			pfn_to_mfn(page_to_pfn(gref->page)), foreign_domid);
> +		rc = -EFAULT;
> +		goto out_no_foreign_gref;
> +	}
> +
> +	/* Add to gref lists. */
> +	spin_lock(&gref_lock);
> +	list_add_tail(&gref->next_all, &gref_list);
> +	list_add_tail(&gref->next_file, &priv->list);
> +	spin_unlock(&gref_lock);
> +
> +	return gref->gref_id;
> +
> +out_no_foreign_gref:
> +	__free_page(gref->page);
> +out_nopage:
> +	kfree(gref);
> +out:
> +	return rc;
> +}
> +
> +static void __del_gref(struct gntalloc_gref *gref)
> +{
> +	if (gnttab_query_foreign_access(gref->gref_id))
> +		return;
> +
> +	if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
> +		return;
> +
> +	gref_size--;
> +	list_del(&gref->next_all);
> +
> +	__free_page(gref->page);
> +	kfree(gref);
> +}
> +
> +static struct gntalloc_gref* find_gref(struct gntalloc_file_private_data *priv,
> +                                       grant_ref_t gref_id)
> +{
> +	struct gntalloc_gref *gref;
> +	list_for_each_entry(gref, &priv->list, next_file) {
> +		if (gref->gref_id == gref_id)
> +			return gref;
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * -------------------------------------
> + *  File operations.
> + * -------------------------------------
> + */
> +static int gntalloc_open(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv;
> +
> +	try_module_get(THIS_MODULE);
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		goto out_nomem;
> +	INIT_LIST_HEAD(&priv->list);
> +
> +	filp->private_data = priv;
> +
> +	if (debug)
> +		printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> +	return 0;
> +
> +out_nomem:
> +	return -ENOMEM;
> +}
> +
> +static int gntalloc_release(struct inode *inode, struct file *filp)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +
> +	if (debug)
> +		printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> +	spin_lock(&gref_lock);

Presumably this is unnecessary because there can be no other users if
you're tearing down the list and destroying priv.

> +	while (!list_empty(&priv->list)) {
> +		gref = list_entry(priv->list.next,
> +			struct gntalloc_gref, next_file);
> +		list_del(&gref->next_file);
> +		gref->users--;
> +		if (gref->users == 0)
> +			__del_gref(gref);
> +	}
> +	kfree(priv);
> +	spin_unlock(&gref_lock);
> +
> +	module_put(THIS_MODULE);
> +
> +	return 0;
> +}
> +
> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
> +                                 void __user *arg)
> +{
> +	int rc = 0;
> +	struct ioctl_gntalloc_alloc_gref op;
> +
> +	if (debug)
> +		printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto alloc_grant_out;
> +	}
> +	rc = add_gref(op.foreign_domid, op.readonly, priv);
> +	if (rc < 0)
> +		goto alloc_grant_out;
> +
> +	op.gref_id = rc;
> +	op.page_idx = rc;

Hm, see below.

> +
> +	rc = 0;
> +
> +	if (copy_to_user((void __user *)arg, &op, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto alloc_grant_out;
> +	}
> +
> +alloc_grant_out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
> +                                   void __user *arg)
> +{
> +	int rc = 0;
> +	struct ioctl_gntalloc_dealloc_gref op;
> +	struct gntalloc_gref *gref;
> +
> +	if (debug)
> +		printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> +	if (copy_from_user(&op, arg, sizeof(op))) {
> +		rc = -EFAULT;
> +		goto dealloc_grant_out;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_gref(priv, op.gref_id);
> +	if (gref) {
> +		list_del(&gref->next_file);
> +		gref->users--;
> +		rc = 0;
> +	} else {
> +		rc = -EINVAL;
> +	}
> +
> +	do_cleanup();
> +	spin_unlock(&gref_lock);
> +dealloc_grant_out:
> +	return rc;
> +}
> +
> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
> +		unsigned long arg)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +
> +	switch (cmd) {
> +	case IOCTL_GNTALLOC_ALLOC_GREF:
> +		return gntalloc_ioctl_alloc(priv, (void __user*)arg);
> +
> +	case IOCTL_GNTALLOC_DEALLOC_GREF:
> +		return gntalloc_ioctl_dealloc(priv, (void __user*)arg);
> +
> +	default:
> +		return -ENOIOCTLCMD;
> +	}
> +
> +	return 0;
> +}
> +
> +static int gntalloc_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	struct gntalloc_gref *gref = vma->vm_private_data;
> +	if (!gref)
> +		return VM_FAULT_SIGBUS;
> +
> +	vmf->page = gref->page;
> +	get_page(vmf->page);
> +
> +	return 0;
> +};
> +
> +static void gntalloc_vma_close(struct vm_area_struct *vma)
> +{
> +	struct gntalloc_gref *gref = vma->vm_private_data;
> +	if (!gref)
> +		return;
> +
> +	spin_lock(&gref_lock);
> +	gref->users--;
> +	if (gref->users == 0)
> +		__del_gref(gref);
> +	spin_unlock(&gref_lock);
> +}
> +
> +static struct vm_operations_struct gntalloc_vmops = {
> +	.fault = gntalloc_vma_fault,
> +	.close = gntalloc_vma_close,
> +};
> +
> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct gntalloc_file_private_data *priv = filp->private_data;
> +	struct gntalloc_gref *gref;
> +
> +	if (debug)
> +		printk("%s: priv %p, page %lu\n", __func__,
> +		       priv, vma->vm_pgoff);
> +
> +	/*
> +	 * There is a 1-to-1 correspondence of grant references to shared
> +	 * pages, so it only makes sense to map exactly one page per
> +	 * call to mmap().
> +	 */

Single-page mmap makes sense if the only possible use-cases are for
single-page mappings, but if you're talking about framebuffers and the
like is seems like a very awkward way to use mmap.  It would be cleaner
from an API perspective to have a user-mode defined flat address space
indexed by pgoff which maps to an array of grefs, so you can sensibly do
a multi-page mapping.

It would also allow you to hide the grefs from usermode entirely.  Then
its just up to usermode to choose suitable file offsets for itself.

> +	if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1) {
> +		printk(KERN_ERR "%s: Only one page can be memory-mapped "
> +			"per grant reference.\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (!(vma->vm_flags & VM_SHARED)) {
> +		printk(KERN_ERR "%s: Mapping must be shared.\n",
> +			__func__);
> +		return -EINVAL;
> +	}
> +
> +	spin_lock(&gref_lock);
> +	gref = find_gref(priv, vma->vm_pgoff);
> +	if (gref == NULL) {
> +		spin_unlock(&gref_lock);
> +		printk(KERN_ERR "%s: Could not find a grant reference with "
> +			"page index %lu.\n", __func__, vma->vm_pgoff);
> +		return -ENOENT;
> +	}
> +	gref->users++;
> +	spin_unlock(&gref_lock);
> +
> +	vma->vm_private_data = gref;
> +
> +	/* This flag prevents Bad PTE errors when the memory is unmapped. */
> +	vma->vm_flags |= VM_RESERVED;
> +	vma->vm_flags |= VM_DONTCOPY;
> +	vma->vm_flags |= VM_IO;

If you set VM_PFNMAP then you don't need to deal with faults.

> +
> +	vma->vm_ops = &gntalloc_vmops;
> +
> +	return 0;
> +}
> +
> +static const struct file_operations gntalloc_fops = {
> +	.owner = THIS_MODULE,
> +	.open = gntalloc_open,
> +	.release = gntalloc_release,
> +	.unlocked_ioctl = gntalloc_ioctl,
> +	.mmap = gntalloc_mmap
> +};
> +
> +/*
> + * -------------------------------------
> + * Module creation/destruction.
> + * -------------------------------------
> + */
> +static struct miscdevice gntalloc_miscdev = {
> +	.minor	= MISC_DYNAMIC_MINOR,
> +	.name	= "xen/gntalloc",
> +	.fops	= &gntalloc_fops,
> +};
> +
> +static int __init gntalloc_init(void)
> +{
> +	int err;
> +
> +	if (!xen_domain()) {
> +		if (debug)
> +			printk(KERN_ERR "gntalloc: You must be running Xen\n");
> +		return -ENODEV;
> +	}
> +
> +	err = misc_register(&gntalloc_miscdev);
> +	if (err != 0) {
> +		printk(KERN_ERR "Could not register misc gntalloc device\n");
> +		return err;
> +	}
> +
> +	if (debug)
> +		printk(KERN_INFO "Created grant allocation device at %d,%d\n",
> +			MISC_MAJOR, gntalloc_miscdev.minor);
> +
> +	return 0;
> +}
> +
> +static void __exit gntalloc_exit(void)
> +{
> +	misc_deregister(&gntalloc_miscdev);
> +}
> +
> +module_init(gntalloc_init);
> +module_exit(gntalloc_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
> +              "Daniel De Graaf <dgdegra@tycho.nsa.gov>");
> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> new file mode 100644
> index 0000000..76b70d7
> --- /dev/null
> +++ b/include/xen/gntalloc.h
> @@ -0,0 +1,68 @@
> +/******************************************************************************
> + * gntalloc.h
> + *
> + * Interface to /dev/xen/gntalloc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this source file (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy, modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
> + * and to permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
> +#define __LINUX_PUBLIC_GNTALLOC_H__
> +
> +/*
> + * Allocates a new page and creates a new grant reference.
> + *
> + * N.B. The page_idx is really the address >> PAGE_SHIFT, meaning it's the
> + * page number and not an actual address.  It must be shifted again prior
> + * to feeding it to mmap() (i.e. page_idx << PAGE_SHIFT).
> + */
> +#define IOCTL_GNTALLOC_ALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntalloc_alloc_gref))
> +struct ioctl_gntalloc_alloc_gref {
> +	/* IN parameters */
> +	/* The ID of the domain creating the grant reference. */
> +	domid_t owner_domid;
> +	/* The ID of the domain to be given access to the grant. */
> +	domid_t foreign_domid;
> +	/* The type of access given to domid. */
> +	uint32_t readonly;
> +	/* OUT parameters */
> +	/* The grant reference of the newly created grant. */
> +	grant_ref_t gref_id;
> +	/* The page index (page number, NOT address) for grant mmap(). */
> +	uint32_t page_idx;
> +};
> +
> +/*
> + * Deallocates the grant reference, freeing the associated page.
> + */
> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntalloc_dealloc_gref))
> +struct ioctl_gntalloc_dealloc_gref {
> +	/* IN parameter */
> +	/* The grant reference to deallocate. */
> +	grant_ref_t gref_id;
> +};
> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */

J

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
  2010-12-14 14:55 [PATCH v2] Userspace grant communication Daniel De Graaf
@ 2010-12-14 14:55 ` Daniel De Graaf
  2010-12-14 21:42   ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel De Graaf @ 2010-12-14 14:55 UTC (permalink / raw)
  To: xen-devel; +Cc: Daniel De Graaf, Ian.Campbell

This allows a userspace application to allocate a shared page for
implementing inter-domain communication or device drivers. These
shared pages can be mapped using the gntdev device or by the kernel
in another domain.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/Kconfig    |    7 +
 drivers/xen/Makefile   |    2 +
 drivers/xen/gntalloc.c |  456 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/gntalloc.h |   68 +++++++
 4 files changed, 533 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/gntalloc.c
 create mode 100644 include/xen/gntalloc.h

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index fa9982e..8398cb0 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -180,6 +180,13 @@ config XEN_GNTDEV
 	help
 	  Allows userspace processes use grants.
 
+config XEN_GRANT_DEV_ALLOC
+	tristate "User-space grant reference allocator driver"
+	depends on XEN
+	help
+	  Allows userspace processes to create pages with access granted
+	  to other domains.
+
 config XEN_S3
        def_bool y
        depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ef1ea63..9814c1d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)		+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)		+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)            += blktap/
@@ -25,3 +26,4 @@ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
 
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntalloc-y				:= gntalloc.o
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
new file mode 100644
index 0000000..f26adfd
--- /dev/null
+++ b/drivers/xen/gntalloc.c
@@ -0,0 +1,456 @@
+/******************************************************************************
+ * gntalloc.c
+ *
+ * Device for creating grant references (in user-space) that may be shared
+ * with other domains.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This driver exists to allow userspace programs in Linux to allocate kernel
+ * memory that will later be shared with another domain.  Without this device,
+ * Linux userspace programs cannot create grant references.
+ *
+ * How this stuff works:
+ *   X -> granting a page to Y
+ *   Y -> mapping the grant from X
+ *
+ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
+ *   2. X creates an entry in the grant table that says domid(Y) can
+ *      access P.
+ *   3. X gives the grant reference identifier, GREF, to Y.
+ *   4. A program in Y uses the gntdev device to map the page (owned by X
+ *      and identified by GREF) into domain(Y) and then into the address
+ *      space of the program.  Behind the scenes, this requires a
+ *      hypercall in which Xen modifies the host CPU page tables to
+ *      perform the sharing -- that's where the actual cross-domain mapping
+ *      occurs.
+ *   5. A program in X mmap()s a segment of the gntalloc device that
+ *      corresponds to the shared page.
+ *   6. The two userspace programs can now communicate over the shared page.
+ *
+ *
+ * NOTE TO USERSPACE LIBRARIES:
+ *   The grant allocation and mmap()ing are, naturally, two separate
+ *   operations.  You set up the sharing by calling the create ioctl() and
+ *   then the mmap().  You must tear down the sharing in the reverse order
+ *   (munmap() and then the destroy ioctl()).
+ *
+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
+ * reference, this device can be used to consume kernel memory by leaving grant
+ * references mapped by another domain when an application exits. Therefore,
+ * there is a global limit on the number of pages that can be allocated. When
+ * all references to the page are unmapped, it will be freed during the next
+ * grant operation.
+ */
+
+#include <asm/atomic.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/gntalloc.h>
+
+static int debug = 0;
+module_param(debug, int, 0644);
+
+static int limit = 1024;
+module_param(limit, int, 0644);
+
+static LIST_HEAD(gref_list);
+static DEFINE_SPINLOCK(gref_lock);
+static int gref_size = 0;
+
+/* Metadata on a grant reference. */
+struct gntalloc_gref {
+	struct list_head next_all;   /* list entry gref_list */
+	struct list_head next_file;  /* list entry file->list, if open */
+	domid_t foreign_domid;	     /* The ID of the domain to share with. */
+	grant_ref_t gref_id;         /* The grant reference number. */
+	unsigned int users;          /* Use count - when zero, waiting on Xen */
+	struct page* page;	     /* The shared page. */
+};
+
+struct gntalloc_file_private_data {
+	struct list_head list;
+};
+
+static void __del_gref(struct gntalloc_gref *gref);
+
+static void do_cleanup(void)
+{
+	struct gntalloc_gref *gref, *n;
+	list_for_each_entry_safe(gref, n, &gref_list, next_all) {
+		if (!gref->users)
+			__del_gref(gref);
+	}
+}
+
+
+static int add_gref(domid_t foreign_domid, uint32_t readonly,
+	struct gntalloc_file_private_data *priv)
+{
+	int rc;
+	struct gntalloc_gref *gref;
+
+	rc = -ENOMEM;
+	spin_lock(&gref_lock);
+	do_cleanup();
+	if (gref_size >= limit) {
+		spin_unlock(&gref_lock);
+		rc = -ENOSPC;
+		goto out;
+	}
+	gref_size++;
+	spin_unlock(&gref_lock);
+
+	gref = kzalloc(sizeof(*gref), GFP_KERNEL);
+	if (!gref)
+		goto out;
+
+	gref->foreign_domid = foreign_domid;
+	gref->users = 1;
+
+	/* Allocate the page to share. */
+	gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+	if (!gref->page)
+		goto out_nopage;
+
+	/* Grant foreign access to the page. */
+	gref->gref_id = gnttab_grant_foreign_access(foreign_domid,
+		pfn_to_mfn(page_to_pfn(gref->page)), readonly);
+	if (gref->gref_id < 0) {
+		printk(KERN_ERR "%s: failed to grant foreign access for mfn "
+			"%lu to domain %u\n", __func__,
+			pfn_to_mfn(page_to_pfn(gref->page)), foreign_domid);
+		rc = -EFAULT;
+		goto out_no_foreign_gref;
+	}
+
+	/* Add to gref lists. */
+	spin_lock(&gref_lock);
+	list_add_tail(&gref->next_all, &gref_list);
+	list_add_tail(&gref->next_file, &priv->list);
+	spin_unlock(&gref_lock);
+
+	return gref->gref_id;
+
+out_no_foreign_gref:
+	__free_page(gref->page);
+out_nopage:
+	kfree(gref);
+out:
+	return rc;
+}
+
+static void __del_gref(struct gntalloc_gref *gref)
+{
+	if (gnttab_query_foreign_access(gref->gref_id))
+		return;
+
+	if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
+		return;
+
+	gref_size--;
+	list_del(&gref->next_all);
+
+	__free_page(gref->page);
+	kfree(gref);
+}
+
+static struct gntalloc_gref* find_gref(struct gntalloc_file_private_data *priv,
+                                       grant_ref_t gref_id)
+{
+	struct gntalloc_gref *gref;
+	list_for_each_entry(gref, &priv->list, next_file) {
+		if (gref->gref_id == gref_id)
+			return gref;
+	}
+	return NULL;
+}
+
+/*
+ * -------------------------------------
+ *  File operations.
+ * -------------------------------------
+ */
+static int gntalloc_open(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv;
+
+	try_module_get(THIS_MODULE);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto out_nomem;
+	INIT_LIST_HEAD(&priv->list);
+
+	filp->private_data = priv;
+
+	if (debug)
+		printk("%s: priv %p\n", __FUNCTION__, priv);
+
+	return 0;
+
+out_nomem:
+	return -ENOMEM;
+}
+
+static int gntalloc_release(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+
+	if (debug)
+		printk("%s: priv %p\n", __FUNCTION__, priv);
+
+	spin_lock(&gref_lock);
+	while (!list_empty(&priv->list)) {
+		gref = list_entry(priv->list.next,
+			struct gntalloc_gref, next_file);
+		list_del(&gref->next_file);
+		gref->users--;
+		if (gref->users == 0)
+			__del_gref(gref);
+	}
+	kfree(priv);
+	spin_unlock(&gref_lock);
+
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
+                                 void __user *arg)
+{
+	int rc = 0;
+	struct ioctl_gntalloc_alloc_gref op;
+
+	if (debug)
+		printk("%s: priv %p\n", __FUNCTION__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto alloc_grant_out;
+	}
+	rc = add_gref(op.foreign_domid, op.readonly, priv);
+	if (rc < 0)
+		goto alloc_grant_out;
+
+	op.gref_id = rc;
+	op.page_idx = rc;
+
+	rc = 0;
+
+	if (copy_to_user((void __user *)arg, &op, sizeof(op))) {
+		rc = -EFAULT;
+		goto alloc_grant_out;
+	}
+
+alloc_grant_out:
+	return rc;
+}
+
+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
+                                   void __user *arg)
+{
+	int rc = 0;
+	struct ioctl_gntalloc_dealloc_gref op;
+	struct gntalloc_gref *gref;
+
+	if (debug)
+		printk("%s: priv %p\n", __FUNCTION__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto dealloc_grant_out;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_gref(priv, op.gref_id);
+	if (gref) {
+		list_del(&gref->next_file);
+		gref->users--;
+		rc = 0;
+	} else {
+		rc = -EINVAL;
+	}
+
+	do_cleanup();
+	spin_unlock(&gref_lock);
+dealloc_grant_out:
+	return rc;
+}
+
+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+
+	switch (cmd) {
+	case IOCTL_GNTALLOC_ALLOC_GREF:
+		return gntalloc_ioctl_alloc(priv, (void __user*)arg);
+
+	case IOCTL_GNTALLOC_DEALLOC_GREF:
+		return gntalloc_ioctl_dealloc(priv, (void __user*)arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
+}
+
+static int gntalloc_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = gref->page;
+	get_page(vmf->page);
+
+	return 0;
+};
+
+static void gntalloc_vma_close(struct vm_area_struct *vma)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return;
+
+	spin_lock(&gref_lock);
+	gref->users--;
+	if (gref->users == 0)
+		__del_gref(gref);
+	spin_unlock(&gref_lock);
+}
+
+static struct vm_operations_struct gntalloc_vmops = {
+	.fault = gntalloc_vma_fault,
+	.close = gntalloc_vma_close,
+};
+
+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+
+	if (debug)
+		printk("%s: priv %p, page %lu\n", __func__,
+		       priv, vma->vm_pgoff);
+
+	/*
+	 * There is a 1-to-1 correspondence of grant references to shared
+	 * pages, so it only makes sense to map exactly one page per
+	 * call to mmap().
+	 */
+	if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1) {
+		printk(KERN_ERR "%s: Only one page can be memory-mapped "
+			"per grant reference.\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		printk(KERN_ERR "%s: Mapping must be shared.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_gref(priv, vma->vm_pgoff);
+	if (gref == NULL) {
+		spin_unlock(&gref_lock);
+		printk(KERN_ERR "%s: Could not find a grant reference with "
+			"page index %lu.\n", __func__, vma->vm_pgoff);
+		return -ENOENT;
+	}
+	gref->users++;
+	spin_unlock(&gref_lock);
+
+	vma->vm_private_data = gref;
+
+	/* This flag prevents Bad PTE errors when the memory is unmapped. */
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTCOPY;
+	vma->vm_flags |= VM_IO;
+
+	vma->vm_ops = &gntalloc_vmops;
+
+	return 0;
+}
+
+static const struct file_operations gntalloc_fops = {
+	.owner = THIS_MODULE,
+	.open = gntalloc_open,
+	.release = gntalloc_release,
+	.unlocked_ioctl = gntalloc_ioctl,
+	.mmap = gntalloc_mmap
+};
+
+/*
+ * -------------------------------------
+ * Module creation/destruction.
+ * -------------------------------------
+ */
+static struct miscdevice gntalloc_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "xen/gntalloc",
+	.fops	= &gntalloc_fops,
+};
+
+static int __init gntalloc_init(void)
+{
+	int err;
+
+	if (!xen_domain()) {
+		if (debug)
+			printk(KERN_ERR "gntalloc: You must be running Xen\n");
+		return -ENODEV;
+	}
+
+	err = misc_register(&gntalloc_miscdev);
+	if (err != 0) {
+		printk(KERN_ERR "Could not register misc gntalloc device\n");
+		return err;
+	}
+
+	if (debug)
+		printk(KERN_INFO "Created grant allocation device at %d,%d\n",
+			MISC_MAJOR, gntalloc_miscdev.minor);
+
+	return 0;
+}
+
+static void __exit gntalloc_exit(void)
+{
+	misc_deregister(&gntalloc_miscdev);
+}
+
+module_init(gntalloc_init);
+module_exit(gntalloc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
+              "Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("User-space grant reference allocator driver");
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
new file mode 100644
index 0000000..76b70d7
--- /dev/null
+++ b/include/xen/gntalloc.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * gntalloc.h
+ *
+ * Interface to /dev/xen/gntalloc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
+#define __LINUX_PUBLIC_GNTALLOC_H__
+
+/*
+ * Allocates a new page and creates a new grant reference.
+ *
+ * N.B. The page_idx is really the address >> PAGE_SHIFT, meaning it's the
+ * page number and not an actual address.  It must be shifted again prior
+ * to feeding it to mmap() (i.e. page_idx << PAGE_SHIFT).
+ */
+#define IOCTL_GNTALLOC_ALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntalloc_alloc_gref))
+struct ioctl_gntalloc_alloc_gref {
+	/* IN parameters */
+	/* The ID of the domain creating the grant reference. */
+	domid_t owner_domid;
+	/* The ID of the domain to be given access to the grant. */
+	domid_t foreign_domid;
+	/* The type of access given to domid. */
+	uint32_t readonly;
+	/* OUT parameters */
+	/* The grant reference of the newly created grant. */
+	grant_ref_t gref_id;
+	/* The page index (page number, NOT address) for grant mmap(). */
+	uint32_t page_idx;
+};
+
+/*
+ * Deallocates the grant reference, freeing the associated page.
+ */
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntalloc_dealloc_gref))
+struct ioctl_gntalloc_dealloc_gref {
+	/* IN parameter */
+	/* The grant reference to deallocate. */
+	grant_ref_t gref_id;
+};
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2011-03-04 16:34 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-21 15:59 [SPAM] [PATCH v5] Userspace grant communication Daniel De Graaf
2011-01-21 15:59 ` [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open Daniel De Graaf
2011-01-21 15:59 ` [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually Daniel De Graaf
2011-01-21 15:59 ` [PATCH 3/6] xen-gntdev: Add reference counting to maps Daniel De Graaf
2011-01-21 15:59 ` [PATCH 4/6] xen-gntdev: Support mapping in HVM domains Daniel De Graaf
2011-01-27 18:52   ` Konrad Rzeszutek Wilk
2011-01-27 19:26     ` Daniel De Graaf
2011-03-04 15:57     ` Ian Campbell
2011-03-04 16:34       ` Daniel De Graaf
2011-01-21 15:59 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
2011-01-27 18:52   ` Konrad Rzeszutek Wilk
2011-01-27 19:23     ` Konrad Rzeszutek Wilk
2011-01-27 19:51       ` Daniel De Graaf
2011-01-27 20:55     ` Daniel De Graaf
2011-01-27 21:29       ` Konrad Rzeszutek Wilk
2011-01-21 15:59 ` [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl Daniel De Graaf
2011-01-27 19:20   ` Konrad Rzeszutek Wilk
2011-01-27 20:09     ` Daniel De Graaf
  -- strict thread matches above, loose matches on Subject: below --
2011-02-03 17:18 [PATCH v6] Userspace grant communication Daniel De Graaf
2011-02-03 17:19 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
2011-02-08 22:48   ` Konrad Rzeszutek Wilk
2011-02-09 18:52     ` Daniel De Graaf
2010-12-14 14:55 [PATCH v2] Userspace grant communication Daniel De Graaf
2010-12-14 14:55 ` [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver Daniel De Graaf
2010-12-14 21:42   ` Jeremy Fitzhardinge
2010-12-14 22:06     ` Daniel De Graaf
2010-12-14 22:40       ` Jeremy Fitzhardinge
2010-12-15 14:18         ` Daniel De Graaf
2010-12-16  1:05           ` Jeremy Fitzhardinge
2010-12-16 15:22             ` Daniel De Graaf
2010-12-16 19:14               ` Jeremy Fitzhardinge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.