* Hopefully correct backports for gntdev deadlock
@ 2022-07-01 0:09 Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages() Demi Marie Obenour
` (5 more replies)
0 siblings, 6 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-07-01 0:09 UTC (permalink / raw)
To: Juergen Gross, Greg KH; +Cc: stable, Xen developer discussion
This backports "xen/gntdev: Avoid blocking in unmap_grant_pages()" to
the various stable trees, hopefully correctly.
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages()
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
@ 2022-07-01 0:09 ` Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.4] " Demi Marie Obenour
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-07-01 0:09 UTC (permalink / raw)
To: Juergen Gross, Greg KH
Cc: stable, Xen developer discussion, Demi Marie Obenour
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/xen/gntdev-common.h | 7 ++
drivers/xen/gntdev.c | 140 +++++++++++++++++++++++++-----------
2 files changed, 106 insertions(+), 41 deletions(-)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 20d7d059dadb..40ef379c28ab 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -16,6 +16,7 @@
#include <linux/mmu_notifier.h>
#include <linux/types.h>
#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
@@ -56,6 +57,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
@@ -73,6 +75,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 54778aadf618..f415c056ff8a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -60,10 +61,11 @@ module_param(limit, uint, 0644);
MODULE_PARM_DESC(limit,
"Maximum number of grants that may be mapped by one mapping request");
+/* True in PV mode, false otherwise */
static int use_ptemod;
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kvfree(map->unmap_ops);
kvfree(map->kmap_ops);
kvfree(map->kunmap_ops);
+ kvfree(map->being_removed);
kfree(map);
}
@@ -140,12 +143,15 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->kunmap_ops = kvcalloc(count,
sizeof(add->kunmap_ops[0]), GFP_KERNEL);
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->kunmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
@@ -240,9 +246,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
if (!refcount_dec_and_test(&map->users))
return;
- if (map->pages && !use_ptemod)
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
unmap_grant_pages(map, 0, map->count);
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -288,6 +321,7 @@ static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -336,87 +370,109 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->pages, map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
+ pr_debug("unmap handle=%d st=%d\n",
+ map->unmap_ops[offset+i].handle,
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = -1;
+ }
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
+
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
-
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
- pr_debug("unmap handle=%d st=%d\n",
- map->unmap_ops[offset+i].handle,
- map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
- }
- return err;
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -468,7 +524,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
- int err;
if (!mmu_notifier_range_blockable(range))
return false;
@@ -489,10 +544,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
range->start, range->end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return true;
}
@@ -980,6 +1034,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
2.36.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 5.4] xen/gntdev: Avoid blocking in unmap_grant_pages()
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages() Demi Marie Obenour
@ 2022-07-01 0:09 ` Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 4.19] " Demi Marie Obenour
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-07-01 0:09 UTC (permalink / raw)
To: Juergen Gross, Greg KH
Cc: stable, Xen developer discussion, Demi Marie Obenour
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/xen/gntdev-common.h | 8 ++
drivers/xen/gntdev.c | 147 +++++++++++++++++++++++++-----------
2 files changed, 110 insertions(+), 45 deletions(-)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949c3eeb..fab6f5a54d5b 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -15,6 +15,8 @@
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/types.h>
+#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
@@ -61,6 +63,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
@@ -78,6 +81,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e953ea34b3e4..f46479347765 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -62,11 +63,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
+/* True in PV mode, false otherwise */
static int use_ptemod;
#define populate_freeable_maps use_ptemod
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
@@ -123,6 +125,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kfree(map->unmap_ops);
kfree(map->kmap_ops);
kfree(map->kunmap_ops);
+ kfree(map->being_removed);
kfree(map);
}
@@ -142,12 +145,15 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->kunmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
@@ -243,6 +249,35 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
return;
atomic_sub(map->count, &pages_mapped);
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
+ unmap_grant_pages(map, 0, map->count);
+
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
@@ -298,6 +333,7 @@ static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -346,87 +382,109 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->pages, map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
+ pr_debug("unmap handle=%d st=%d\n",
+ map->unmap_ops[offset+i].handle,
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = -1;
+ }
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
+
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
-
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
- pr_debug("unmap handle=%d st=%d\n",
- map->unmap_ops[offset+i].handle,
- map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
- }
- return err;
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -496,7 +554,6 @@ static int unmap_if_in_range(struct gntdev_grant_map *map,
bool blockable)
{
unsigned long mstart, mend;
- int err;
if (!in_range(map, start, end))
return 0;
@@ -510,10 +567,9 @@ static int unmap_if_in_range(struct gntdev_grant_map *map,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
start, end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return 0;
}
@@ -554,7 +610,6 @@ static void mn_release(struct mmu_notifier *mn,
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct gntdev_grant_map *map;
- int err;
mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
@@ -563,8 +618,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
if (!map->vma)
@@ -572,8 +626,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
mutex_unlock(&priv->lock);
}
@@ -1102,6 +1155,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
}
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
2.36.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4.19] xen/gntdev: Avoid blocking in unmap_grant_pages()
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages() Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.4] " Demi Marie Obenour
@ 2022-07-01 0:09 ` Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 4.14] " Demi Marie Obenour
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-07-01 0:09 UTC (permalink / raw)
To: Juergen Gross, Greg KH
Cc: stable, Xen developer discussion, Demi Marie Obenour
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/xen/gntdev-common.h | 8 ++
drivers/xen/gntdev.c | 146 +++++++++++++++++++++++++-----------
2 files changed, 109 insertions(+), 45 deletions(-)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949c3eeb..fab6f5a54d5b 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -15,6 +15,8 @@
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/types.h>
+#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
@@ -61,6 +63,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
@@ -78,6 +81,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e519063e421e..492084814f55 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -64,11 +64,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
+/* True in PV mode, false otherwise */
static int use_ptemod;
#define populate_freeable_maps use_ptemod
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
@@ -125,6 +126,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kfree(map->unmap_ops);
kfree(map->kmap_ops);
kfree(map->kunmap_ops);
+ kfree(map->being_removed);
kfree(map);
}
@@ -144,12 +146,15 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->kunmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
@@ -245,6 +250,35 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
return;
atomic_sub(map->count, &pages_mapped);
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
+ unmap_grant_pages(map, 0, map->count);
+
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
@@ -302,6 +336,7 @@ static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token,
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -350,87 +385,109 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->pages, map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
+ pr_debug("unmap handle=%d st=%d\n",
+ map->unmap_ops[offset+i].handle,
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = -1;
+ }
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
+
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
-
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
- pr_debug("unmap handle=%d st=%d\n",
- map->unmap_ops[offset+i].handle,
- map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
- }
- return err;
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -500,7 +557,6 @@ static int unmap_if_in_range(struct gntdev_grant_map *map,
bool blockable)
{
unsigned long mstart, mend;
- int err;
if (!in_range(map, start, end))
return 0;
@@ -514,10 +570,9 @@ static int unmap_if_in_range(struct gntdev_grant_map *map,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
start, end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return 0;
}
@@ -558,7 +613,6 @@ static void mn_release(struct mmu_notifier *mn,
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct gntdev_grant_map *map;
- int err;
mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
@@ -567,8 +621,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
if (!map->vma)
@@ -576,8 +629,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
mutex_unlock(&priv->lock);
}
@@ -1113,6 +1165,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
}
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
2.36.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4.14] xen/gntdev: Avoid blocking in unmap_grant_pages()
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
` (2 preceding siblings ...)
2022-07-01 0:09 ` [PATCH 4.19] " Demi Marie Obenour
@ 2022-07-01 0:09 ` Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 4.9] " Demi Marie Obenour
2022-07-04 13:46 ` Hopefully correct backports for gntdev deadlock Juergen Gross
5 siblings, 0 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-07-01 0:09 UTC (permalink / raw)
To: Juergen Gross, Greg KH
Cc: stable, Xen developer discussion, Demi Marie Obenour
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/xen/gntdev.c | 145 ++++++++++++++++++++++++++++++-------------
1 file changed, 103 insertions(+), 42 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 7b4ac5505f53..2827015604fb 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -59,6 +59,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
+/* True in PV mode, false otherwise */
static int use_ptemod;
#define populate_freeable_maps use_ptemod
@@ -94,11 +95,16 @@ struct grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages);
/* ------------------------------------------------------------------ */
@@ -129,6 +135,7 @@ static void gntdev_free_map(struct grant_map *map)
kfree(map->unmap_ops);
kfree(map->kmap_ops);
kfree(map->kunmap_ops);
+ kfree(map->being_removed);
kfree(map);
}
@@ -147,12 +154,15 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->kunmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
if (gnttab_alloc_pages(count, add->pages))
@@ -217,6 +227,35 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
return;
atomic_sub(map->count, &pages_mapped);
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
+ unmap_grant_pages(map, 0, map->count);
+
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
@@ -274,6 +313,7 @@ static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token,
static int map_grant_pages(struct grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -322,85 +362,107 @@ static int map_grant_pages(struct grant_map *map)
map->pages, map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
+ unsigned int i;
+ struct grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
+
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
+ pr_debug("unmap handle=%d st=%d\n",
+ map->unmap_ops[offset+i].handle,
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = -1;
+ }
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+ /* Release reference taken by unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
+
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
-
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
- pr_debug("unmap handle=%d st=%d\n",
- map->unmap_ops[offset+i].handle,
- map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
- }
- return err;
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -456,7 +518,6 @@ static void unmap_if_in_range(struct grant_map *map,
unsigned long start, unsigned long end)
{
unsigned long mstart, mend;
- int err;
if (!map->vma)
return;
@@ -470,10 +531,9 @@ static void unmap_if_in_range(struct grant_map *map,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
start, end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
}
static void mn_invl_range_start(struct mmu_notifier *mn,
@@ -498,7 +558,6 @@ static void mn_release(struct mmu_notifier *mn,
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct grant_map *map;
- int err;
mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
@@ -507,8 +566,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
if (!map->vma)
@@ -516,8 +574,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
mutex_unlock(&priv->lock);
}
@@ -1006,6 +1063,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
}
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
2.36.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4.9] xen/gntdev: Avoid blocking in unmap_grant_pages()
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
` (3 preceding siblings ...)
2022-07-01 0:09 ` [PATCH 4.14] " Demi Marie Obenour
@ 2022-07-01 0:09 ` Demi Marie Obenour
2022-07-04 13:46 ` Hopefully correct backports for gntdev deadlock Juergen Gross
5 siblings, 0 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-07-01 0:09 UTC (permalink / raw)
To: Juergen Gross, Greg KH
Cc: stable, Xen developer discussion, Demi Marie Obenour
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/xen/gntdev.c | 144 ++++++++++++++++++++++++++++++-------------
1 file changed, 102 insertions(+), 42 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 69d59102ff1b..2c3248e71e9c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -57,6 +57,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
+/* True in PV mode, false otherwise */
static int use_ptemod;
#define populate_freeable_maps use_ptemod
@@ -92,11 +93,16 @@ struct grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages);
/* ------------------------------------------------------------------ */
@@ -127,6 +133,7 @@ static void gntdev_free_map(struct grant_map *map)
kfree(map->unmap_ops);
kfree(map->kmap_ops);
kfree(map->kunmap_ops);
+ kfree(map->being_removed);
kfree(map);
}
@@ -145,12 +152,15 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->kunmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
if (gnttab_alloc_pages(count, add->pages))
@@ -215,6 +225,34 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
return;
atomic_sub(map->count, &pages_mapped);
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1.
+ */
+ atomic_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
+ unmap_grant_pages(map, 0, map->count);
+
+ /* Check if the memory now needs to be freed */
+ if (!atomic_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
@@ -272,6 +310,7 @@ static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token,
static int map_grant_pages(struct grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -320,85 +359,107 @@ static int map_grant_pages(struct grant_map *map)
map->pages, map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
+ unsigned int i;
+ struct grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
+
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
+ pr_debug("unmap handle=%d st=%d\n",
+ map->unmap_ops[offset+i].handle,
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = -1;
+ }
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+ /* Release reference taken by unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
+
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ atomic_inc(&map->users); /* to keep map alive during async call below */
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
-
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
- pr_debug("unmap handle=%d st=%d\n",
- map->unmap_ops[offset+i].handle,
- map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
- }
- return err;
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -454,7 +515,6 @@ static void unmap_if_in_range(struct grant_map *map,
unsigned long start, unsigned long end)
{
unsigned long mstart, mend;
- int err;
if (!map->vma)
return;
@@ -468,10 +528,9 @@ static void unmap_if_in_range(struct grant_map *map,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
start, end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
}
static void mn_invl_range_start(struct mmu_notifier *mn,
@@ -503,7 +562,6 @@ static void mn_release(struct mmu_notifier *mn,
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct grant_map *map;
- int err;
mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
@@ -512,8 +570,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
if (!map->vma)
@@ -521,8 +578,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
mutex_unlock(&priv->lock);
}
@@ -1012,6 +1068,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
}
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
atomic_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
2.36.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: Hopefully correct backports for gntdev deadlock
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
` (4 preceding siblings ...)
2022-07-01 0:09 ` [PATCH 4.9] " Demi Marie Obenour
@ 2022-07-04 13:46 ` Juergen Gross
2022-07-04 14:52 ` Greg KH
5 siblings, 1 reply; 9+ messages in thread
From: Juergen Gross @ 2022-07-04 13:46 UTC (permalink / raw)
To: Demi Marie Obenour, Greg KH; +Cc: stable, Xen developer discussion
[-- Attachment #1.1.1: Type: text/plain, Size: 340 bytes --]
On 01.07.22 02:09, Demi Marie Obenour wrote:
> This backports "xen/gntdev: Avoid blocking in unmap_grant_pages()" to
> the various stable trees, hopefully correctly.
>
I have reviewed all backports and they seem correct to me.
Greg, I'm fine with you adding the backports to the related stable
branches.
Thanks,
Juergen
[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: Hopefully correct backports for gntdev deadlock
2022-07-04 13:46 ` Hopefully correct backports for gntdev deadlock Juergen Gross
@ 2022-07-04 14:52 ` Greg KH
0 siblings, 0 replies; 9+ messages in thread
From: Greg KH @ 2022-07-04 14:52 UTC (permalink / raw)
To: Juergen Gross; +Cc: Demi Marie Obenour, stable, Xen developer discussion
On Mon, Jul 04, 2022 at 03:46:12PM +0200, Juergen Gross wrote:
> On 01.07.22 02:09, Demi Marie Obenour wrote:
> > This backports "xen/gntdev: Avoid blocking in unmap_grant_pages()" to
> > the various stable trees, hopefully correctly.
> >
>
> I have reviewed all backports and they seem correct to me.
>
> Greg, I'm fine with you adding the backports to the related stable
> branches.
Thanks, now queued up.
greg k-h
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 5.4] xen/gntdev: Avoid blocking in unmap_grant_pages()
2022-06-27 18:10 [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages() Demi Marie Obenour
@ 2022-06-27 18:10 ` Demi Marie Obenour
0 siblings, 0 replies; 9+ messages in thread
From: Demi Marie Obenour @ 2022-06-27 18:10 UTC (permalink / raw)
To: stable, Xen developer discussion, Juergen Gross; +Cc: Demi Marie Obenour
commit dbe97cff7dd9f0f75c524afdd55ad46be3d15295 upstream
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/xen/gntdev-common.h | 8 ++
drivers/xen/gntdev.c | 147 +++++++++++++++++++++++++-----------
2 files changed, 110 insertions(+), 45 deletions(-)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949c3eeb..fab6f5a54d5b 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -15,6 +15,8 @@
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/types.h>
+#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
@@ -61,6 +63,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
@@ -78,6 +81,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e953ea34b3e4..f46479347765 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -62,11 +63,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
+/* True in PV mode, false otherwise */
static int use_ptemod;
#define populate_freeable_maps use_ptemod
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
@@ -123,6 +125,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kfree(map->unmap_ops);
kfree(map->kmap_ops);
kfree(map->kunmap_ops);
+ kfree(map->being_removed);
kfree(map);
}
@@ -142,12 +145,15 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->kunmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
@@ -243,6 +249,35 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
return;
atomic_sub(map->count, &pages_mapped);
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
+ unmap_grant_pages(map, 0, map->count);
+
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
@@ -298,6 +333,7 @@ static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -346,87 +382,109 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->pages, map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
+ pr_debug("unmap handle=%d st=%d\n",
+ map->unmap_ops[offset+i].handle,
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = -1;
+ }
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
+
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
-
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
- pr_debug("unmap handle=%d st=%d\n",
- map->unmap_ops[offset+i].handle,
- map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
- }
- return err;
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -496,7 +554,6 @@ static int unmap_if_in_range(struct gntdev_grant_map *map,
bool blockable)
{
unsigned long mstart, mend;
- int err;
if (!in_range(map, start, end))
return 0;
@@ -510,10 +567,9 @@ static int unmap_if_in_range(struct gntdev_grant_map *map,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
start, end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return 0;
}
@@ -554,7 +610,6 @@ static void mn_release(struct mmu_notifier *mn,
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct gntdev_grant_map *map;
- int err;
mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
@@ -563,8 +618,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
if (!map->vma)
@@ -572,8 +626,7 @@ static void mn_release(struct mmu_notifier *mn,
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
+ unmap_grant_pages(map, /* offset */ 0, map->count);
}
mutex_unlock(&priv->lock);
}
@@ -1102,6 +1155,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
}
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-07-04 14:52 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-01 0:09 Hopefully correct backports for gntdev deadlock Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages() Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 5.4] " Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 4.19] " Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 4.14] " Demi Marie Obenour
2022-07-01 0:09 ` [PATCH 4.9] " Demi Marie Obenour
2022-07-04 13:46 ` Hopefully correct backports for gntdev deadlock Juergen Gross
2022-07-04 14:52 ` Greg KH
-- strict thread matches above, loose matches on Subject: below --
2022-06-27 18:10 [PATCH 5.10] xen/gntdev: Avoid blocking in unmap_grant_pages() Demi Marie Obenour
2022-06-27 18:10 ` [PATCH 5.4] " Demi Marie Obenour
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.