linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] basic KASAN support for Xen PV domains
@ 2019-12-17 14:08 Sergey Dyasli
  2019-12-17 14:08 ` [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel Sergey Dyasli
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Sergey Dyasli @ 2019-12-17 14:08 UTC (permalink / raw)
  To: xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
	George Dunlap, Ross Lagerwall, Sergey Dyasli

This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
CONFIG_KASAN=y. It has been used internally for some time now with good
results for finding memory corruption issues in Dom0 kernel.

Only Outline instrumentation is supported at the moment.

Patch 1 is of RFC quality
Patches 2-3 are independent and quite self-contained.

Sergey Dyasli (1):
  x86/xen: add basic KASAN support for PV kernel

Ross Lagerwall (2):
  xen: teach KASAN about grant tables
  xen/netback: Fix grant copy across page boundary with KASAN

 arch/x86/mm/init.c                | 14 ++++++++
 arch/x86/mm/kasan_init_64.c       | 28 ++++++++++++++++
 arch/x86/xen/Makefile             |  7 ++++
 arch/x86/xen/enlighten_pv.c       |  3 ++
 arch/x86/xen/mmu_pv.c             | 13 ++++++--
 arch/x86/xen/multicalls.c         | 10 ++++++
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 55 ++++++++++++++++++++++++-------
 drivers/xen/Makefile              |  2 ++
 drivers/xen/grant-table.c         |  5 ++-
 kernel/Makefile                   |  2 ++
 lib/Kconfig.kasan                 |  3 +-
 12 files changed, 128 insertions(+), 16 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel
  2019-12-17 14:08 [RFC PATCH 0/3] basic KASAN support for Xen PV domains Sergey Dyasli
@ 2019-12-17 14:08 ` Sergey Dyasli
  2019-12-18  9:24   ` Jürgen Groß
  2019-12-17 14:08 ` [RFC PATCH 2/3] xen: teach KASAN about grant tables Sergey Dyasli
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Sergey Dyasli @ 2019-12-17 14:08 UTC (permalink / raw)
  To: xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
	George Dunlap, Ross Lagerwall, Sergey Dyasli

This enables to use Outline instrumentation for Xen PV kernels.

KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
and hence disabled.

Rough edges in the patch are marked with XXX.

Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
---
 arch/x86/mm/init.c          | 14 ++++++++++++++
 arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
 arch/x86/xen/Makefile       |  7 +++++++
 arch/x86/xen/enlighten_pv.c |  3 +++
 arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
 arch/x86/xen/multicalls.c   | 10 ++++++++++
 drivers/xen/Makefile        |  2 ++
 kernel/Makefile             |  2 ++
 lib/Kconfig.kasan           |  3 ++-
 9 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e7bb483557c9..0c98a45eec6c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -8,6 +8,8 @@
 #include <linux/kmemleak.h>
 #include <linux/sched/task.h>
 
+#include <xen/xen.h>
+
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
 #include <asm/init.h>
@@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
 	unsigned long end_ul = (unsigned long)end;
 	unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
 
+	/*
+	 * XXX: skip this for now. Otherwise it leads to:
+	 *
+	 * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
+	 * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
+	 * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
+	 *
+	 * and further #PF error: [PROT] [WRITE] in the kernel.
+	 */
+	if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
+		return;
+
 	free_init_pages(what, begin_ul, end_ul);
 
 	/*
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index cf5bc37c90ac..caee2022f8b0 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -13,6 +13,8 @@
 #include <linux/sched/task.h>
 #include <linux/vmalloc.h>
 
+#include <xen/xen.h>
+
 #include <asm/e820/types.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -20,6 +22,9 @@
 #include <asm/pgtable.h>
 #include <asm/cpu_entry_area.h>
 
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
@@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
 };
 #endif
 
+#ifdef CONFIG_XEN
+/* XXX: this should go to some header */
+void __init set_page_prot(void *addr, pgprot_t prot);
+void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
+#endif
+
 void __init kasan_early_init(void)
 {
 	int i;
@@ -332,6 +343,16 @@ void __init kasan_early_init(void)
 	for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
 		kasan_early_shadow_p4d[i] = __p4d(p4d_val);
 
+	if (xen_pv_domain()) {
+		/* PV page tables must have PAGE_KERNEL_RO */
+		set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
+		set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
+		set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+		/* Add mappings to the initial PV page tables */
+		kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
+	}
+
 	kasan_map_early_shadow(early_top_pgt);
 	kasan_map_early_shadow(init_top_pgt);
 }
@@ -369,6 +390,13 @@ void __init kasan_init(void)
 				__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
 	}
 
+	if (xen_pv_domain()) {
+		/* PV page tables must be pinned */
+		set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
+		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+				  PFN_DOWN(__pa_symbol(early_top_pgt)));
+	}
+
 	load_cr3(early_top_pgt);
 	__flush_tlb_all();
 
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 084de77a109e..102fad0b0bca 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,10 @@
+KASAN_SANITIZE_enlighten_pv.o := n
+KASAN_SANITIZE_enlighten.o := n
+KASAN_SANITIZE_irq.o := n
+KASAN_SANITIZE_mmu_pv.o := n
+KASAN_SANITIZE_p2m.o := n
+KASAN_SANITIZE_multicalls.o := n
+
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae4a41ca19f6..27de55699f24 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,7 @@
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/cpu.h>
+#include <asm/kasan.h>
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
 
+	kasan_early_init();
+
 	/*
 	 * Set up kernel GDT and segment registers, mainly so that
 	 * -fstack-protector code can be executed.
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..eaf63f1f26af 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
 
 static void xen_post_allocator_init(void);
 
-static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 {
 	struct mmuext_op op;
 
@@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
 		BUG();
 }
-static void __init set_page_prot(void *addr, pgprot_t prot)
+void __init set_page_prot(void *addr, pgprot_t prot)
 {
 	return set_page_prot_flags(addr, prot, UVMF_NONE);
 }
@@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	if (i && i < pgd_index(__START_KERNEL_map))
 		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
 
+#ifdef CONFIG_KASAN
+	/*
+	 * Copy KASAN mappings
+	 * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+	 */
+	for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
+		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+#endif
+
 	/* Make pagetable pieces RO */
 	set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 07054572297f..5e4729efbbe2 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -99,6 +99,15 @@ void xen_mc_flush(void)
 				ret++;
 	}
 
+	/*
+	 * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
+	 *
+	 *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
+	 *
+	 * during kasan_init(). They are benign, but silence them for now.
+	 * Otherwise, booting takes too long due to printk() spam.
+	 */
+#ifndef CONFIG_KASAN
 	if (WARN_ON(ret)) {
 		pr_err("%d of %d multicall(s) failed: cpu %d\n",
 		       ret, b->mcidx, smp_processor_id());
@@ -121,6 +130,7 @@ void xen_mc_flush(void)
 			}
 		}
 	}
+#endif /* CONFIG_KASAN */
 
 	b->mcidx = 0;
 	b->argidx = 0;
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 0c4efa6fe450..1e9e1e41c0a8 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+KASAN_SANITIZE_features.o := n
+
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
 obj-y	+= mem-reservation.o
diff --git a/kernel/Makefile b/kernel/Makefile
index f2cc0d118a0b..1da6fd93c00c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -12,6 +12,8 @@ obj-y     = fork.o exec_domain.o panic.o \
 	    notifier.o ksysfs.o cred.o reboot.o \
 	    async.o range.o smpboot.o ucount.o
 
+KASAN_SANITIZE_cpu.o := n
+
 obj-$(CONFIG_MODULES) += kmod.o
 obj-$(CONFIG_MULTIUSER) += groups.o
 
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 81f5464ea9e1..429a638625ea 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -98,6 +98,7 @@ config KASAN_OUTLINE
 
 config KASAN_INLINE
 	bool "Inline instrumentation"
+	depends on !XEN_PV
 	help
 	  Compiler directly inserts code checking shadow memory before
 	  memory accesses. This is faster than outline (in some workloads
@@ -147,7 +148,7 @@ config KASAN_SW_TAGS_IDENTIFY
 
 config KASAN_VMALLOC
 	bool "Back mappings in vmalloc space with real shadow memory"
-	depends on KASAN && HAVE_ARCH_KASAN_VMALLOC
+	depends on KASAN && HAVE_ARCH_KASAN_VMALLOC && !XEN_PV
 	help
 	  By default, the shadow region for vmalloc space is the read-only
 	  zero page. This means that KASAN cannot detect errors involving
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 2/3] xen: teach KASAN about grant tables
  2019-12-17 14:08 [RFC PATCH 0/3] basic KASAN support for Xen PV domains Sergey Dyasli
  2019-12-17 14:08 ` [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel Sergey Dyasli
@ 2019-12-17 14:08 ` Sergey Dyasli
  2019-12-17 14:08 ` [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN Sergey Dyasli
  2019-12-17 18:06 ` [RFC PATCH 0/3] basic KASAN support for Xen PV domains Boris Ostrovsky
  3 siblings, 0 replies; 11+ messages in thread
From: Sergey Dyasli @ 2019-12-17 14:08 UTC (permalink / raw)
  To: xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
	George Dunlap, Ross Lagerwall, Sergey Dyasli

From: Ross Lagerwall <ross.lagerwall@citrix.com>

Otherwise it produces lots of false positives.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
---
 drivers/xen/grant-table.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 49b381e104ef..0f844c14d5b9 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1049,6 +1049,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 			foreign = xen_page_foreign(pages[i]);
 			foreign->domid = map_ops[i].dom;
 			foreign->gref = map_ops[i].ref;
+			kasan_alloc_pages(pages[i], 0);
 			break;
 		}
 
@@ -1085,8 +1086,10 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	if (ret)
 		return ret;
 
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
 		ClearPageForeign(pages[i]);
+		kasan_free_pages(pages[i], 0);
+	}
 
 	return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN
  2019-12-17 14:08 [RFC PATCH 0/3] basic KASAN support for Xen PV domains Sergey Dyasli
  2019-12-17 14:08 ` [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel Sergey Dyasli
  2019-12-17 14:08 ` [RFC PATCH 2/3] xen: teach KASAN about grant tables Sergey Dyasli
@ 2019-12-17 14:08 ` Sergey Dyasli
  2019-12-17 15:14   ` [Xen-devel] " Durrant, Paul
  2019-12-17 18:06 ` [RFC PATCH 0/3] basic KASAN support for Xen PV domains Boris Ostrovsky
  3 siblings, 1 reply; 11+ messages in thread
From: Sergey Dyasli @ 2019-12-17 14:08 UTC (permalink / raw)
  To: xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
	George Dunlap, Ross Lagerwall, Sergey Dyasli

From: Ross Lagerwall <ross.lagerwall@citrix.com>

When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
allocations are aligned to the next power of 2 of the size does not
hold. Therefore, handle grant copies that cross page boundaries.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
---
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 55 ++++++++++++++++++++++++-------
 2 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 05847eb91a1b..e57684415edd 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
 	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 
-	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
 	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
 	/* passed to gnttab_[un]map_refs with pages under (un)mapping */
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 0020b2e8c279..1541b6e0cc62 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
 
 struct xenvif_tx_cb {
 	u16 pending_idx;
+	u8 copies;
 };
 
 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
@@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 {
 	struct gnttab_map_grant_ref *gop_map = *gopp_map;
 	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+	u8 copies = XENVIF_TX_CB(skb)->copies;
 	/* This always points to the shinfo of the skb being checked, which
 	 * could be either the first or the one on the frag_list
 	 */
@@ -450,23 +452,27 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 	int nr_frags = shinfo->nr_frags;
 	const bool sharedslot = nr_frags &&
 				frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
-	int i, err;
+	int i, err = 0;
 
-	/* Check status of header. */
-	err = (*gopp_copy)->status;
-	if (unlikely(err)) {
-		if (net_ratelimit())
-			netdev_dbg(queue->vif->dev,
+	while (copies) {
+		/* Check status of header. */
+		int newerr = (*gopp_copy)->status;
+		if (unlikely(newerr)) {
+			if (net_ratelimit())
+				netdev_dbg(queue->vif->dev,
 				   "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
 				   (*gopp_copy)->status,
 				   pending_idx,
 				   (*gopp_copy)->source.u.ref);
-		/* The first frag might still have this slot mapped */
-		if (!sharedslot)
-			xenvif_idx_release(queue, pending_idx,
-					   XEN_NETIF_RSP_ERROR);
+			/* The first frag might still have this slot mapped */
+			if (!sharedslot && !err)
+				xenvif_idx_release(queue, pending_idx,
+						   XEN_NETIF_RSP_ERROR);
+			err = newerr;
+		}
+		(*gopp_copy)++;
+		copies--;
 	}
-	(*gopp_copy)++;
 
 check_frags:
 	for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -910,6 +916,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
 			xenvif_tx_err(queue, &txreq, extra_count, idx);
 			break;
 		}
+		XENVIF_TX_CB(skb)->copies = 0;
 
 		skb_shinfo(skb)->nr_frags = ret;
 		if (data_len < txreq.size)
@@ -933,6 +940,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
 						   "Can't allocate the frag_list skb.\n");
 				break;
 			}
+			XENVIF_TX_CB(nskb)->copies = 0;
 		}
 
 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
@@ -990,6 +998,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
 
 		queue->tx_copy_ops[*copy_ops].len = data_len;
 		queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+		XENVIF_TX_CB(skb)->copies++;
+
+		if (offset_in_page(skb->data) + data_len > XEN_PAGE_SIZE) {
+			unsigned int extra_len = offset_in_page(skb->data) +
+					     data_len - XEN_PAGE_SIZE;
+
+			queue->tx_copy_ops[*copy_ops].len -= extra_len;
+			(*copy_ops)++;
+
+			queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
+			queue->tx_copy_ops[*copy_ops].source.domid =
+				queue->vif->domid;
+			queue->tx_copy_ops[*copy_ops].source.offset =
+				txreq.offset + data_len - extra_len;
+
+			queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
+				virt_to_gfn(skb->data + data_len - extra_len);
+			queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
+			queue->tx_copy_ops[*copy_ops].dest.offset = 0;
+
+			queue->tx_copy_ops[*copy_ops].len = extra_len;
+			queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+
+			XENVIF_TX_CB(skb)->copies++;
+		}
 
 		(*copy_ops)++;
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* RE: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN
  2019-12-17 14:08 ` [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN Sergey Dyasli
@ 2019-12-17 15:14   ` Durrant, Paul
  2020-01-07 10:33     ` Sergey Dyasli
  0 siblings, 1 reply; 11+ messages in thread
From: Durrant, Paul @ 2019-12-17 15:14 UTC (permalink / raw)
  To: Sergey Dyasli, xen-devel, kasan-dev, linux-kernel
  Cc: Juergen Gross, Stefano Stabellini, George Dunlap, Ross Lagerwall,
	Alexander Potapenko, Andrey Ryabinin, Boris Ostrovsky,
	Dmitry Vyukov

> -----Original Message-----
> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
> Sergey Dyasli
> Sent: 17 December 2019 14:08
> To: xen-devel@lists.xen.org; kasan-dev@googlegroups.com; linux-
> kernel@vger.kernel.org
> Cc: Juergen Gross <jgross@suse.com>; Sergey Dyasli
> <sergey.dyasli@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>;
> George Dunlap <george.dunlap@citrix.com>; Ross Lagerwall
> <ross.lagerwall@citrix.com>; Alexander Potapenko <glider@google.com>;
> Andrey Ryabinin <aryabinin@virtuozzo.com>; Boris Ostrovsky
> <boris.ostrovsky@oracle.com>; Dmitry Vyukov <dvyukov@google.com>
> Subject: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across
> page boundary with KASAN
> 
> From: Ross Lagerwall <ross.lagerwall@citrix.com>
> 
> When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
> allocations are aligned to the next power of 2 of the size does not
> hold. Therefore, handle grant copies that cross page boundaries.
> 
> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>

Would have been nice to cc netback maintainers...

> ---
>  drivers/net/xen-netback/common.h  |  2 +-
>  drivers/net/xen-netback/netback.c | 55 ++++++++++++++++++++++++-------
>  2 files changed, 45 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-
> netback/common.h
> index 05847eb91a1b..e57684415edd 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
>  	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
>  	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
> 
> -	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
> +	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
>  	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
>  	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
>  	/* passed to gnttab_[un]map_refs with pages under (un)mapping */
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
> netback/netback.c
> index 0020b2e8c279..1541b6e0cc62 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue
> *queue,
> 
>  struct xenvif_tx_cb {
>  	u16 pending_idx;
> +	u8 copies;
>  };

I know we're a way off the limit (48 bytes) but I wonder if we ought to have a compile time check here that we're not overflowing skb->cb.

> 
>  #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
> @@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue
> *queue,
>  {
>  	struct gnttab_map_grant_ref *gop_map = *gopp_map;
>  	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
> +	u8 copies = XENVIF_TX_CB(skb)->copies;
>  	/* This always points to the shinfo of the skb being checked, which
>  	 * could be either the first or the one on the frag_list
>  	 */
> @@ -450,23 +452,27 @@ static int xenvif_tx_check_gop(struct xenvif_queue
> *queue,
>  	int nr_frags = shinfo->nr_frags;
>  	const bool sharedslot = nr_frags &&
>  				frag_get_pending_idx(&shinfo->frags[0]) ==
> pending_idx;
> -	int i, err;
> +	int i, err = 0;
> 
> -	/* Check status of header. */
> -	err = (*gopp_copy)->status;
> -	if (unlikely(err)) {
> -		if (net_ratelimit())
> -			netdev_dbg(queue->vif->dev,
> +	while (copies) {
> +		/* Check status of header. */
> +		int newerr = (*gopp_copy)->status;
> +		if (unlikely(newerr)) {
> +			if (net_ratelimit())
> +				netdev_dbg(queue->vif->dev,
>  				   "Grant copy of header failed! status: %d
> pending_idx: %u ref: %u\n",
>  				   (*gopp_copy)->status,
>  				   pending_idx,
>  				   (*gopp_copy)->source.u.ref);
> -		/* The first frag might still have this slot mapped */
> -		if (!sharedslot)
> -			xenvif_idx_release(queue, pending_idx,
> -					   XEN_NETIF_RSP_ERROR);
> +			/* The first frag might still have this slot mapped */
> +			if (!sharedslot && !err)
> +				xenvif_idx_release(queue, pending_idx,
> +						   XEN_NETIF_RSP_ERROR);

Can't this be done after the loop, if there is an accumulated err? I think it would make the code slightly neater.

> +			err = newerr;
> +		}
> +		(*gopp_copy)++;
> +		copies--;
>  	}
> -	(*gopp_copy)++;
> 
>  check_frags:
>  	for (i = 0; i < nr_frags; i++, gop_map++) {
> @@ -910,6 +916,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue
> *queue,
>  			xenvif_tx_err(queue, &txreq, extra_count, idx);
>  			break;
>  		}
> +		XENVIF_TX_CB(skb)->copies = 0;
> 
>  		skb_shinfo(skb)->nr_frags = ret;
>  		if (data_len < txreq.size)
> @@ -933,6 +940,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue
> *queue,
>  						   "Can't allocate the frag_list
> skb.\n");
>  				break;
>  			}
> +			XENVIF_TX_CB(nskb)->copies = 0;
>  		}
> 
>  		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
> @@ -990,6 +998,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue
> *queue,
> 
>  		queue->tx_copy_ops[*copy_ops].len = data_len;

If offset_in_page(skb->data)+ data_len can exceed XEN_PAGE_SIZE, does this not need to be truncated?

  Paul

>  		queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
> +		XENVIF_TX_CB(skb)->copies++;
> +
> +		if (offset_in_page(skb->data) + data_len > XEN_PAGE_SIZE) {
> +			unsigned int extra_len = offset_in_page(skb->data) +
> +					     data_len - XEN_PAGE_SIZE;
> +
> +			queue->tx_copy_ops[*copy_ops].len -= extra_len;
> +			(*copy_ops)++;
> +
> +			queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
> +			queue->tx_copy_ops[*copy_ops].source.domid =
> +				queue->vif->domid;
> +			queue->tx_copy_ops[*copy_ops].source.offset =
> +				txreq.offset + data_len - extra_len;
> +
> +			queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
> +				virt_to_gfn(skb->data + data_len - extra_len);
> +			queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
> +			queue->tx_copy_ops[*copy_ops].dest.offset = 0;
> +
> +			queue->tx_copy_ops[*copy_ops].len = extra_len;
> +			queue->tx_copy_ops[*copy_ops].flags =
> GNTCOPY_source_gref;
> +
> +			XENVIF_TX_CB(skb)->copies++;
> +		}
> 
>  		(*copy_ops)++;
> 
> --
> 2.17.1
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xenproject.org
> https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 0/3] basic KASAN support for Xen PV domains
  2019-12-17 14:08 [RFC PATCH 0/3] basic KASAN support for Xen PV domains Sergey Dyasli
                   ` (2 preceding siblings ...)
  2019-12-17 14:08 ` [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN Sergey Dyasli
@ 2019-12-17 18:06 ` Boris Ostrovsky
  2019-12-20 10:26   ` Sergey Dyasli
  3 siblings, 1 reply; 11+ messages in thread
From: Boris Ostrovsky @ 2019-12-17 18:06 UTC (permalink / raw)
  To: Sergey Dyasli
  Cc: xen-devel, kasan-dev, linux-kernel, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Juergen Gross,
	Stefano Stabellini, George Dunlap, Ross Lagerwall



> On Dec 17, 2019, at 9:08 AM, Sergey Dyasli <sergey.dyasli@citrix.com> wrote:
> 
> This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
> CONFIG_KASAN=y. It has been used internally for some time now with good
> results for finding memory corruption issues in Dom0 kernel.
> 
> Only Outline instrumentation is supported at the moment.
> 
> Patch 1 is of RFC quality
> Patches 2-3 are independent and quite self-contained.


Don’t you need to initialize kasan before, for example, calling kasan_alloc_pages() in patch 2?

-boris


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel
  2019-12-17 14:08 ` [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel Sergey Dyasli
@ 2019-12-18  9:24   ` Jürgen Groß
  2019-12-19 16:42     ` Sergey Dyasli
  0 siblings, 1 reply; 11+ messages in thread
From: Jürgen Groß @ 2019-12-18  9:24 UTC (permalink / raw)
  To: Sergey Dyasli, xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Stefano Stabellini, George Dunlap,
	Ross Lagerwall

On 17.12.19 15:08, Sergey Dyasli wrote:
> This enables to use Outline instrumentation for Xen PV kernels.
> 
> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
> and hence disabled.
> 
> Rough edges in the patch are marked with XXX.
> 
> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
> ---
>   arch/x86/mm/init.c          | 14 ++++++++++++++
>   arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
>   arch/x86/xen/Makefile       |  7 +++++++
>   arch/x86/xen/enlighten_pv.c |  3 +++
>   arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
>   arch/x86/xen/multicalls.c   | 10 ++++++++++
>   drivers/xen/Makefile        |  2 ++
>   kernel/Makefile             |  2 ++
>   lib/Kconfig.kasan           |  3 ++-
>   9 files changed, 79 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index e7bb483557c9..0c98a45eec6c 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -8,6 +8,8 @@
>   #include <linux/kmemleak.h>
>   #include <linux/sched/task.h>
>   
> +#include <xen/xen.h>
> +
>   #include <asm/set_memory.h>
>   #include <asm/e820/api.h>
>   #include <asm/init.h>
> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
>   	unsigned long end_ul = (unsigned long)end;
>   	unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>   
> +	/*
> +	 * XXX: skip this for now. Otherwise it leads to:
> +	 *
> +	 * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
> +	 * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
> +	 * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
> +	 *
> +	 * and further #PF error: [PROT] [WRITE] in the kernel.
> +	 */
> +	if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
> +		return;
> +

I guess this is related to freeing some kasan page tables without
unpinning them?

>   	free_init_pages(what, begin_ul, end_ul);
>   
>   	/*
> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
> index cf5bc37c90ac..caee2022f8b0 100644
> --- a/arch/x86/mm/kasan_init_64.c
> +++ b/arch/x86/mm/kasan_init_64.c
> @@ -13,6 +13,8 @@
>   #include <linux/sched/task.h>
>   #include <linux/vmalloc.h>
>   
> +#include <xen/xen.h>
> +
>   #include <asm/e820/types.h>
>   #include <asm/pgalloc.h>
>   #include <asm/tlbflush.h>
> @@ -20,6 +22,9 @@
>   #include <asm/pgtable.h>
>   #include <asm/cpu_entry_area.h>
>   
> +#include <xen/interface/xen.h>
> +#include <asm/xen/hypervisor.h>
> +
>   extern struct range pfn_mapped[E820_MAX_ENTRIES];
>   
>   static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>   };
>   #endif
>   
> +#ifdef CONFIG_XEN
> +/* XXX: this should go to some header */
> +void __init set_page_prot(void *addr, pgprot_t prot);
> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
> +#endif
> +

Instead of exporting those, why don't you ...

>   void __init kasan_early_init(void)
>   {
>   	int i;
> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>   	for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>   		kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>   
> +	if (xen_pv_domain()) {
> +		/* PV page tables must have PAGE_KERNEL_RO */
> +		set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
> +		set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
> +		set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);

add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?

> +
> +		/* Add mappings to the initial PV page tables */
> +		kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
> +	}
> +
>   	kasan_map_early_shadow(early_top_pgt);
>   	kasan_map_early_shadow(init_top_pgt);
>   }
> @@ -369,6 +390,13 @@ void __init kasan_init(void)
>   				__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>   	}
>   
> +	if (xen_pv_domain()) {
> +		/* PV page tables must be pinned */
> +		set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
> +		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> +				  PFN_DOWN(__pa_symbol(early_top_pgt)));

and another one like xen_pv_kasan_init() here.

> +	}
> +
>   	load_cr3(early_top_pgt);
>   	__flush_tlb_all();
>   
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index 084de77a109e..102fad0b0bca 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -1,3 +1,10 @@
> +KASAN_SANITIZE_enlighten_pv.o := n
> +KASAN_SANITIZE_enlighten.o := n
> +KASAN_SANITIZE_irq.o := n
> +KASAN_SANITIZE_mmu_pv.o := n
> +KASAN_SANITIZE_p2m.o := n
> +KASAN_SANITIZE_multicalls.o := n
> +
>   # SPDX-License-Identifier: GPL-2.0
>   OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
>   
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index ae4a41ca19f6..27de55699f24 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -72,6 +72,7 @@
>   #include <asm/mwait.h>
>   #include <asm/pci_x86.h>
>   #include <asm/cpu.h>
> +#include <asm/kasan.h>
>   
>   #ifdef CONFIG_ACPI
>   #include <linux/acpi.h>
> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>   	/* Get mfn list */
>   	xen_build_dynamic_phys_to_machine();
>   
> +	kasan_early_init();
> +
>   	/*
>   	 * Set up kernel GDT and segment registers, mainly so that
>   	 * -fstack-protector code can be executed.
> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
> index c8dbee62ec2a..eaf63f1f26af 100644
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
>   
>   static void xen_post_allocator_init(void);
>   
> -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>   {
>   	struct mmuext_op op;
>   
> @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
>   	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
>   		BUG();
>   }
> -static void __init set_page_prot(void *addr, pgprot_t prot)
> +void __init set_page_prot(void *addr, pgprot_t prot)
>   {
>   	return set_page_prot_flags(addr, prot, UVMF_NONE);
>   }
> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>   	if (i && i < pgd_index(__START_KERNEL_map))
>   		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>   
> +#ifdef CONFIG_KASAN
> +	/*
> +	 * Copy KASAN mappings
> +	 * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
> +	 */
> +	for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
> +		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
> +#endif
> +
>   	/* Make pagetable pieces RO */
>   	set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
>   	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
> index 07054572297f..5e4729efbbe2 100644
> --- a/arch/x86/xen/multicalls.c
> +++ b/arch/x86/xen/multicalls.c
> @@ -99,6 +99,15 @@ void xen_mc_flush(void)
>   				ret++;
>   	}
>   
> +	/*
> +	 * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
> +	 *
> +	 *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
> +	 *
> +	 * during kasan_init(). They are benign, but silence them for now.
> +	 * Otherwise, booting takes too long due to printk() spam.
> +	 */
> +#ifndef CONFIG_KASAN

It might be interesting to identify the problematic page tables.

I guess this would require some hacking to avoid the multicalls in order
to identify which page table should not be pinned again.


Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel
  2019-12-18  9:24   ` Jürgen Groß
@ 2019-12-19 16:42     ` Sergey Dyasli
  2019-12-20  8:43       ` Jürgen Groß
  0 siblings, 1 reply; 11+ messages in thread
From: Sergey Dyasli @ 2019-12-19 16:42 UTC (permalink / raw)
  To: Jürgen Groß, xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Stefano Stabellini, George Dunlap,
	Ross Lagerwall, sergey.dyasli@citrix.com >> Sergey Dyasli

On 18/12/2019 09:24, Jürgen Groß wrote:
> On 17.12.19 15:08, Sergey Dyasli wrote:
>> This enables to use Outline instrumentation for Xen PV kernels.
>>
>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
>> and hence disabled.
>>
>> Rough edges in the patch are marked with XXX.
>>
>> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
>> ---
>>   arch/x86/mm/init.c          | 14 ++++++++++++++
>>   arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
>>   arch/x86/xen/Makefile       |  7 +++++++
>>   arch/x86/xen/enlighten_pv.c |  3 +++
>>   arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
>>   arch/x86/xen/multicalls.c   | 10 ++++++++++
>>   drivers/xen/Makefile        |  2 ++
>>   kernel/Makefile             |  2 ++
>>   lib/Kconfig.kasan           |  3 ++-
>>   9 files changed, 79 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
>> index e7bb483557c9..0c98a45eec6c 100644
>> --- a/arch/x86/mm/init.c
>> +++ b/arch/x86/mm/init.c
>> @@ -8,6 +8,8 @@
>>   #include <linux/kmemleak.h>
>>   #include <linux/sched/task.h>
>>   +#include <xen/xen.h>
>> +
>>   #include <asm/set_memory.h>
>>   #include <asm/e820/api.h>
>>   #include <asm/init.h>
>> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
>>       unsigned long end_ul = (unsigned long)end;
>>       unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>>   +    /*
>> +     * XXX: skip this for now. Otherwise it leads to:
>> +     *
>> +     * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
>> +     * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
>> +     * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
>> +     *
>> +     * and further #PF error: [PROT] [WRITE] in the kernel.
>> +     */
>> +    if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
>> +        return;
>> +
> 
> I guess this is related to freeing some kasan page tables without
> unpinning them?

Your guess was correct. Turned out that early_top_pgt which I pinned and made RO
is located in .init section and that was causing issues. Unpinning it and making
RW again right after kasan_init() switches to use init_top_pgt seem to fix this
issue.

> 
>>       free_init_pages(what, begin_ul, end_ul);
>>         /*
>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>> index cf5bc37c90ac..caee2022f8b0 100644
>> --- a/arch/x86/mm/kasan_init_64.c
>> +++ b/arch/x86/mm/kasan_init_64.c
>> @@ -13,6 +13,8 @@
>>   #include <linux/sched/task.h>
>>   #include <linux/vmalloc.h>
>>   +#include <xen/xen.h>
>> +
>>   #include <asm/e820/types.h>
>>   #include <asm/pgalloc.h>
>>   #include <asm/tlbflush.h>
>> @@ -20,6 +22,9 @@
>>   #include <asm/pgtable.h>
>>   #include <asm/cpu_entry_area.h>
>>   +#include <xen/interface/xen.h>
>> +#include <asm/xen/hypervisor.h>
>> +
>>   extern struct range pfn_mapped[E820_MAX_ENTRIES];
>>     static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
>> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>>   };
>>   #endif
>>   +#ifdef CONFIG_XEN
>> +/* XXX: this should go to some header */
>> +void __init set_page_prot(void *addr, pgprot_t prot);
>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
>> +#endif
>> +
> 
> Instead of exporting those, why don't you ...
> 
>>   void __init kasan_early_init(void)
>>   {
>>       int i;
>> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>>       for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>           kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>   +    if (xen_pv_domain()) {
>> +        /* PV page tables must have PAGE_KERNEL_RO */
>> +        set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
>> +        set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
>> +        set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
> 
> add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?

Sounds like a good suggestion, but new functions still need some header for
declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting
through kasan.h as well, but that's probably not an issue.

> 
>> +
>> +        /* Add mappings to the initial PV page tables */
>> +        kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
>> +    }
>> +
>>       kasan_map_early_shadow(early_top_pgt);
>>       kasan_map_early_shadow(init_top_pgt);
>>   }
>> @@ -369,6 +390,13 @@ void __init kasan_init(void)
>>                   __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>>       }
>>   +    if (xen_pv_domain()) {
>> +        /* PV page tables must be pinned */
>> +        set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
>> +        pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
>> +                  PFN_DOWN(__pa_symbol(early_top_pgt)));
> 
> and another one like xen_pv_kasan_init() here.

Now there needs to be a 3rd function to unpin early_top_pgt.

> 
>> +    }
>> +
>>       load_cr3(early_top_pgt);
>>       __flush_tlb_all();
>>   diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
>> index 084de77a109e..102fad0b0bca 100644
>> --- a/arch/x86/xen/Makefile
>> +++ b/arch/x86/xen/Makefile
>> @@ -1,3 +1,10 @@
>> +KASAN_SANITIZE_enlighten_pv.o := n
>> +KASAN_SANITIZE_enlighten.o := n
>> +KASAN_SANITIZE_irq.o := n
>> +KASAN_SANITIZE_mmu_pv.o := n
>> +KASAN_SANITIZE_p2m.o := n
>> +KASAN_SANITIZE_multicalls.o := n
>> +
>>   # SPDX-License-Identifier: GPL-2.0
>>   OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
>>   diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>> index ae4a41ca19f6..27de55699f24 100644
>> --- a/arch/x86/xen/enlighten_pv.c
>> +++ b/arch/x86/xen/enlighten_pv.c
>> @@ -72,6 +72,7 @@
>>   #include <asm/mwait.h>
>>   #include <asm/pci_x86.h>
>>   #include <asm/cpu.h>
>> +#include <asm/kasan.h>
>>     #ifdef CONFIG_ACPI
>>   #include <linux/acpi.h>
>> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>>       /* Get mfn list */
>>       xen_build_dynamic_phys_to_machine();
>>   +    kasan_early_init();
>> +
>>       /*
>>        * Set up kernel GDT and segment registers, mainly so that
>>        * -fstack-protector code can be executed.
>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
>> index c8dbee62ec2a..eaf63f1f26af 100644
>> --- a/arch/x86/xen/mmu_pv.c
>> +++ b/arch/x86/xen/mmu_pv.c
>> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
>>     static void xen_post_allocator_init(void);
>>   -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>>   {
>>       struct mmuext_op op;
>>   @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
>>       if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
>>           BUG();
>>   }
>> -static void __init set_page_prot(void *addr, pgprot_t prot)
>> +void __init set_page_prot(void *addr, pgprot_t prot)
>>   {
>>       return set_page_prot_flags(addr, prot, UVMF_NONE);
>>   }
>> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>>       if (i && i < pgd_index(__START_KERNEL_map))
>>           init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>   +#ifdef CONFIG_KASAN
>> +    /*
>> +     * Copy KASAN mappings
>> +     * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
>> +     */
>> +    for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
>> +        init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>> +#endif
>> +
>>       /* Make pagetable pieces RO */
>>       set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
>>       set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
>> index 07054572297f..5e4729efbbe2 100644
>> --- a/arch/x86/xen/multicalls.c
>> +++ b/arch/x86/xen/multicalls.c
>> @@ -99,6 +99,15 @@ void xen_mc_flush(void)
>>                   ret++;
>>       }
>>   +    /*
>> +     * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
>> +     *
>> +     *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
>> +     *
>> +     * during kasan_init(). They are benign, but silence them for now.
>> +     * Otherwise, booting takes too long due to printk() spam.
>> +     */
>> +#ifndef CONFIG_KASAN
> 
> It might be interesting to identify the problematic page tables.
> 
> I guess this would require some hacking to avoid the multicalls in order
> to identify which page table should not be pinned again.

I tracked this down to xen_alloc_ptpage() in mmu_pv.c:

			if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
				__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);

kasan_populate_early_shadow() is doing lots pmd_populate_kernel() with
kasan_early_shadow_pte (mfn of which is reported by Xen). Currently I'm not
sure how to fix that. Is it possible to check that pfn has already been pinned
from Linux kernel? xen_page_pinned() seems to be an incorrect way to check that.

--
Thanks,
Sergey

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel
  2019-12-19 16:42     ` Sergey Dyasli
@ 2019-12-20  8:43       ` Jürgen Groß
  0 siblings, 0 replies; 11+ messages in thread
From: Jürgen Groß @ 2019-12-20  8:43 UTC (permalink / raw)
  To: Sergey Dyasli, xen-devel, kasan-dev, linux-kernel
  Cc: Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Boris Ostrovsky, Stefano Stabellini, George Dunlap,
	Ross Lagerwall

On 19.12.19 17:42, Sergey Dyasli wrote:
> On 18/12/2019 09:24, Jürgen Groß wrote:
>> On 17.12.19 15:08, Sergey Dyasli wrote:
>>> This enables to use Outline instrumentation for Xen PV kernels.
>>>
>>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
>>> and hence disabled.
>>>
>>> Rough edges in the patch are marked with XXX.
>>>
>>> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
>>> ---
>>>    arch/x86/mm/init.c          | 14 ++++++++++++++
>>>    arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
>>>    arch/x86/xen/Makefile       |  7 +++++++
>>>    arch/x86/xen/enlighten_pv.c |  3 +++
>>>    arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
>>>    arch/x86/xen/multicalls.c   | 10 ++++++++++
>>>    drivers/xen/Makefile        |  2 ++
>>>    kernel/Makefile             |  2 ++
>>>    lib/Kconfig.kasan           |  3 ++-
>>>    9 files changed, 79 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
>>> index e7bb483557c9..0c98a45eec6c 100644
>>> --- a/arch/x86/mm/init.c
>>> +++ b/arch/x86/mm/init.c
>>> @@ -8,6 +8,8 @@
>>>    #include <linux/kmemleak.h>
>>>    #include <linux/sched/task.h>
>>>    +#include <xen/xen.h>
>>> +
>>>    #include <asm/set_memory.h>
>>>    #include <asm/e820/api.h>
>>>    #include <asm/init.h>
>>> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
>>>        unsigned long end_ul = (unsigned long)end;
>>>        unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>>>    +    /*
>>> +     * XXX: skip this for now. Otherwise it leads to:
>>> +     *
>>> +     * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
>>> +     * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
>>> +     * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
>>> +     *
>>> +     * and further #PF error: [PROT] [WRITE] in the kernel.
>>> +     */
>>> +    if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
>>> +        return;
>>> +
>>
>> I guess this is related to freeing some kasan page tables without
>> unpinning them?
> 
> Your guess was correct. Turned out that early_top_pgt which I pinned and made RO
> is located in .init section and that was causing issues. Unpinning it and making
> RW again right after kasan_init() switches to use init_top_pgt seem to fix this
> issue.
> 
>>
>>>        free_init_pages(what, begin_ul, end_ul);
>>>          /*
>>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>>> index cf5bc37c90ac..caee2022f8b0 100644
>>> --- a/arch/x86/mm/kasan_init_64.c
>>> +++ b/arch/x86/mm/kasan_init_64.c
>>> @@ -13,6 +13,8 @@
>>>    #include <linux/sched/task.h>
>>>    #include <linux/vmalloc.h>
>>>    +#include <xen/xen.h>
>>> +
>>>    #include <asm/e820/types.h>
>>>    #include <asm/pgalloc.h>
>>>    #include <asm/tlbflush.h>
>>> @@ -20,6 +22,9 @@
>>>    #include <asm/pgtable.h>
>>>    #include <asm/cpu_entry_area.h>
>>>    +#include <xen/interface/xen.h>
>>> +#include <asm/xen/hypervisor.h>
>>> +
>>>    extern struct range pfn_mapped[E820_MAX_ENTRIES];
>>>      static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
>>> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>>>    };
>>>    #endif
>>>    +#ifdef CONFIG_XEN
>>> +/* XXX: this should go to some header */
>>> +void __init set_page_prot(void *addr, pgprot_t prot);
>>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
>>> +#endif
>>> +
>>
>> Instead of exporting those, why don't you ...
>>
>>>    void __init kasan_early_init(void)
>>>    {
>>>        int i;
>>> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>>>        for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>>            kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>>    +    if (xen_pv_domain()) {
>>> +        /* PV page tables must have PAGE_KERNEL_RO */
>>> +        set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
>>> +        set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
>>> +        set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
>>
>> add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?
> 
> Sounds like a good suggestion, but new functions still need some header for
> declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting

xen/xen-ops.h

> through kasan.h as well, but that's probably not an issue.

You could let the new function return (pgd_t *)xen_start_info->pt_base
and use that here, e.g.:

if (xen_pv_domain()) {
     pgd_t *pgd;

     pgd = xen_kasan_early_init();
     kasan_map_early_shadow(pgd);
}

> 
>>
>>> +
>>> +        /* Add mappings to the initial PV page tables */
>>> +        kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
>>> +    }
>>> +
>>>        kasan_map_early_shadow(early_top_pgt);
>>>        kasan_map_early_shadow(init_top_pgt);
>>>    }
>>> @@ -369,6 +390,13 @@ void __init kasan_init(void)
>>>                    __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>>>        }
>>>    +    if (xen_pv_domain()) {
>>> +        /* PV page tables must be pinned */
>>> +        set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
>>> +        pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
>>> +                  PFN_DOWN(__pa_symbol(early_top_pgt)));
>>
>> and another one like xen_pv_kasan_init() here.
> 
> Now there needs to be a 3rd function to unpin early_top_pgt.

Not if you do the load_cr3 in the xen pv case in the new function:

if (xen_pv_domain())
     xen_kasan_load_cr3(early_top_pgt);
else
     load_cr3(early_top_pgt);

> 
>>
>>> +    }
>>> +
>>>        load_cr3(early_top_pgt);
>>>        __flush_tlb_all();
>>>    diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
>>> index 084de77a109e..102fad0b0bca 100644
>>> --- a/arch/x86/xen/Makefile
>>> +++ b/arch/x86/xen/Makefile
>>> @@ -1,3 +1,10 @@
>>> +KASAN_SANITIZE_enlighten_pv.o := n
>>> +KASAN_SANITIZE_enlighten.o := n
>>> +KASAN_SANITIZE_irq.o := n
>>> +KASAN_SANITIZE_mmu_pv.o := n
>>> +KASAN_SANITIZE_p2m.o := n
>>> +KASAN_SANITIZE_multicalls.o := n
>>> +
>>>    # SPDX-License-Identifier: GPL-2.0
>>>    OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
>>>    diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>>> index ae4a41ca19f6..27de55699f24 100644
>>> --- a/arch/x86/xen/enlighten_pv.c
>>> +++ b/arch/x86/xen/enlighten_pv.c
>>> @@ -72,6 +72,7 @@
>>>    #include <asm/mwait.h>
>>>    #include <asm/pci_x86.h>
>>>    #include <asm/cpu.h>
>>> +#include <asm/kasan.h>
>>>      #ifdef CONFIG_ACPI
>>>    #include <linux/acpi.h>
>>> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>>>        /* Get mfn list */
>>>        xen_build_dynamic_phys_to_machine();
>>>    +    kasan_early_init();
>>> +
>>>        /*
>>>         * Set up kernel GDT and segment registers, mainly so that
>>>         * -fstack-protector code can be executed.
>>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
>>> index c8dbee62ec2a..eaf63f1f26af 100644
>>> --- a/arch/x86/xen/mmu_pv.c
>>> +++ b/arch/x86/xen/mmu_pv.c
>>> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
>>>      static void xen_post_allocator_init(void);
>>>    -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>>>    {
>>>        struct mmuext_op op;
>>>    @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
>>>        if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
>>>            BUG();
>>>    }
>>> -static void __init set_page_prot(void *addr, pgprot_t prot)
>>> +void __init set_page_prot(void *addr, pgprot_t prot)
>>>    {
>>>        return set_page_prot_flags(addr, prot, UVMF_NONE);
>>>    }
>>> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>>>        if (i && i < pgd_index(__START_KERNEL_map))
>>>            init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>>    +#ifdef CONFIG_KASAN
>>> +    /*
>>> +     * Copy KASAN mappings
>>> +     * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
>>> +     */
>>> +    for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
>>> +        init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>> +#endif
>>> +
>>>        /* Make pagetable pieces RO */
>>>        set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
>>>        set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
>>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
>>> index 07054572297f..5e4729efbbe2 100644
>>> --- a/arch/x86/xen/multicalls.c
>>> +++ b/arch/x86/xen/multicalls.c
>>> @@ -99,6 +99,15 @@ void xen_mc_flush(void)
>>>                    ret++;
>>>        }
>>>    +    /*
>>> +     * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
>>> +     *
>>> +     *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
>>> +     *
>>> +     * during kasan_init(). They are benign, but silence them for now.
>>> +     * Otherwise, booting takes too long due to printk() spam.
>>> +     */
>>> +#ifndef CONFIG_KASAN
>>
>> It might be interesting to identify the problematic page tables.
>>
>> I guess this would require some hacking to avoid the multicalls in order
>> to identify which page table should not be pinned again.
> 
> I tracked this down to xen_alloc_ptpage() in mmu_pv.c:
> 
> 			if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
> 				__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
> 
> kasan_populate_early_shadow() is doing lots pmd_populate_kernel() with
> kasan_early_shadow_pte (mfn of which is reported by Xen). Currently I'm not
> sure how to fix that. Is it possible to check that pfn has already been pinned
> from Linux kernel? xen_page_pinned() seems to be an incorrect way to check that.

Right, xen_page_pinned() is not yet working at this stage of booting.

But using pmd_populate_kernel() with the same page table multiple times
is just wrong. Doing so the first time is fine, all the other cases
should just use set_pmd().


Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 0/3] basic KASAN support for Xen PV domains
  2019-12-17 18:06 ` [RFC PATCH 0/3] basic KASAN support for Xen PV domains Boris Ostrovsky
@ 2019-12-20 10:26   ` Sergey Dyasli
  0 siblings, 0 replies; 11+ messages in thread
From: Sergey Dyasli @ 2019-12-20 10:26 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: xen-devel, kasan-dev, linux-kernel, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Juergen Gross,
	Stefano Stabellini, George Dunlap, Ross Lagerwall,
	sergey.dyasli@citrix.com >> Sergey Dyasli

On 17/12/2019 18:06, Boris Ostrovsky wrote:
> 
> 
>> On Dec 17, 2019, at 9:08 AM, Sergey Dyasli <sergey.dyasli@citrix.com> wrote:
>>
>> This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
>> CONFIG_KASAN=y. It has been used internally for some time now with good
>> results for finding memory corruption issues in Dom0 kernel.
>>
>> Only Outline instrumentation is supported at the moment.
>>
>> Patch 1 is of RFC quality
>> Patches 2-3 are independent and quite self-contained.
> 
> 
> Don’t you need to initialize kasan before, for example, calling kasan_alloc_pages() in patch 2?

Patch 1 is enough to correctly initialise PV Kasan. But without patch 2, lots
of false positive out-of-bounds accesses are reported once a guest starts using
PV I/O devices.

--
Thanks,
Sergey

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN
  2019-12-17 15:14   ` [Xen-devel] " Durrant, Paul
@ 2020-01-07 10:33     ` Sergey Dyasli
  0 siblings, 0 replies; 11+ messages in thread
From: Sergey Dyasli @ 2020-01-07 10:33 UTC (permalink / raw)
  To: Durrant, Paul, xen-devel, kasan-dev, linux-kernel
  Cc: Juergen Gross, Stefano Stabellini, George Dunlap, Ross Lagerwall,
	Alexander Potapenko, Andrey Ryabinin, Boris Ostrovsky,
	Dmitry Vyukov, sergey.dyasli@citrix.com >> Sergey Dyasli

On 17/12/2019 15:14, Durrant, Paul wrote:
>> -----Original Message-----
>> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
>> Sergey Dyasli
>> Sent: 17 December 2019 14:08
>> To: xen-devel@lists.xen.org; kasan-dev@googlegroups.com; linux-
>> kernel@vger.kernel.org
>> Cc: Juergen Gross <jgross@suse.com>; Sergey Dyasli
>> <sergey.dyasli@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>;
>> George Dunlap <george.dunlap@citrix.com>; Ross Lagerwall
>> <ross.lagerwall@citrix.com>; Alexander Potapenko <glider@google.com>;
>> Andrey Ryabinin <aryabinin@virtuozzo.com>; Boris Ostrovsky
>> <boris.ostrovsky@oracle.com>; Dmitry Vyukov <dvyukov@google.com>
>> Subject: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across
>> page boundary with KASAN
>>
>> From: Ross Lagerwall <ross.lagerwall@citrix.com>
>>
>> When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
>> allocations are aligned to the next power of 2 of the size does not
>> hold. Therefore, handle grant copies that cross page boundaries.
>>
>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
>
> Would have been nice to cc netback maintainers...

Sorry, I'll try to be more careful next time.

>
>> ---
>>  drivers/net/xen-netback/common.h  |  2 +-
>>  drivers/net/xen-netback/netback.c | 55 ++++++++++++++++++++++++-------
>>  2 files changed, 45 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-
>> netback/common.h
>> index 05847eb91a1b..e57684415edd 100644
>> --- a/drivers/net/xen-netback/common.h
>> +++ b/drivers/net/xen-netback/common.h
>> @@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
>>  	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
>>  	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
>>
>> -	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
>> +	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
>>  	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
>>  	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
>>  	/* passed to gnttab_[un]map_refs with pages under (un)mapping */
>> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
>> netback/netback.c
>> index 0020b2e8c279..1541b6e0cc62 100644
>> --- a/drivers/net/xen-netback/netback.c
>> +++ b/drivers/net/xen-netback/netback.c
>> @@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue
>> *queue,
>>
>>  struct xenvif_tx_cb {
>>  	u16 pending_idx;
>> +	u8 copies;
>>  };
>
> I know we're a way off the limit (48 bytes) but I wonder if we ought to have a compile time check here that we're not overflowing skb->cb.

I will add a BUILD_BUG_ON()

>
>>
>>  #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
>> @@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue
>> *queue,
>>  {
>>  	struct gnttab_map_grant_ref *gop_map = *gopp_map;
>>  	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
>> +	u8 copies = XENVIF_TX_CB(skb)->copies;
>>  	/* This always points to the shinfo of the skb being checked, which
>>  	 * could be either the first or the one on the frag_list
>>  	 */
>> @@ -450,23 +452,27 @@ static int xenvif_tx_check_gop(struct xenvif_queue
>> *queue,
>>  	int nr_frags = shinfo->nr_frags;
>>  	const bool sharedslot = nr_frags &&
>>  				frag_get_pending_idx(&shinfo->frags[0]) ==
>> pending_idx;
>> -	int i, err;
>> +	int i, err = 0;
>>
>> -	/* Check status of header. */
>> -	err = (*gopp_copy)->status;
>> -	if (unlikely(err)) {
>> -		if (net_ratelimit())
>> -			netdev_dbg(queue->vif->dev,
>> +	while (copies) {
>> +		/* Check status of header. */
>> +		int newerr = (*gopp_copy)->status;
>> +		if (unlikely(newerr)) {
>> +			if (net_ratelimit())
>> +				netdev_dbg(queue->vif->dev,
>>  				   "Grant copy of header failed! status: %d
>> pending_idx: %u ref: %u\n",
>>  				   (*gopp_copy)->status,
>>  				   pending_idx,
>>  				   (*gopp_copy)->source.u.ref);
>> -		/* The first frag might still have this slot mapped */
>> -		if (!sharedslot)
>> -			xenvif_idx_release(queue, pending_idx,
>> -					   XEN_NETIF_RSP_ERROR);
>> +			/* The first frag might still have this slot mapped */
>> +			if (!sharedslot && !err)
>> +				xenvif_idx_release(queue, pending_idx,
>> +						   XEN_NETIF_RSP_ERROR);
>
> Can't this be done after the loop, if there is an accumulated err? I think it would make the code slightly neater.

Looks like xenvif_idx_release() indeed wants to be just after the loop.

>
>> +			err = newerr;
>> +		}
>> +		(*gopp_copy)++;
>> +		copies--;
>>  	}
>> -	(*gopp_copy)++;
>>
>>  check_frags:
>>  	for (i = 0; i < nr_frags; i++, gop_map++) {
>> @@ -910,6 +916,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue
>> *queue,
>>  			xenvif_tx_err(queue, &txreq, extra_count, idx);
>>  			break;
>>  		}
>> +		XENVIF_TX_CB(skb)->copies = 0;
>>
>>  		skb_shinfo(skb)->nr_frags = ret;
>>  		if (data_len < txreq.size)
>> @@ -933,6 +940,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue
>> *queue,
>>  						   "Can't allocate the frag_list
>> skb.\n");
>>  				break;
>>  			}
>> +			XENVIF_TX_CB(nskb)->copies = 0;
>>  		}
>>
>>  		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
>> @@ -990,6 +998,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue
>> *queue,
>>
>>  		queue->tx_copy_ops[*copy_ops].len = data_len;
>
> If offset_in_page(skb->data)+ data_len can exceed XEN_PAGE_SIZE, does this not need to be truncated?

It is performed as the first thing inside the if condition below.

>>  		queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
>> +		XENVIF_TX_CB(skb)->copies++;
>> +
>> +		if (offset_in_page(skb->data) + data_len > XEN_PAGE_SIZE) {
>> +			unsigned int extra_len = offset_in_page(skb->data) +
>> +					     data_len - XEN_PAGE_SIZE;
>> +
>> +			queue->tx_copy_ops[*copy_ops].len -= extra_len;
>> +			(*copy_ops)++;
>> +
>> +			queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
>> +			queue->tx_copy_ops[*copy_ops].source.domid =
>> +				queue->vif->domid;
>> +			queue->tx_copy_ops[*copy_ops].source.offset =
>> +				txreq.offset + data_len - extra_len;
>> +
>> +			queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
>> +				virt_to_gfn(skb->data + data_len - extra_len);
>> +			queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
>> +			queue->tx_copy_ops[*copy_ops].dest.offset = 0;
>> +
>> +			queue->tx_copy_ops[*copy_ops].len = extra_len;
>> +			queue->tx_copy_ops[*copy_ops].flags =
>> GNTCOPY_source_gref;
>> +
>> +			XENVIF_TX_CB(skb)->copies++;
>> +		}
>>
>>  		(*copy_ops)++;
>>
>> --
>> 2.17.1
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xenproject.org
>> https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-01-07 10:34 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-17 14:08 [RFC PATCH 0/3] basic KASAN support for Xen PV domains Sergey Dyasli
2019-12-17 14:08 ` [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel Sergey Dyasli
2019-12-18  9:24   ` Jürgen Groß
2019-12-19 16:42     ` Sergey Dyasli
2019-12-20  8:43       ` Jürgen Groß
2019-12-17 14:08 ` [RFC PATCH 2/3] xen: teach KASAN about grant tables Sergey Dyasli
2019-12-17 14:08 ` [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN Sergey Dyasli
2019-12-17 15:14   ` [Xen-devel] " Durrant, Paul
2020-01-07 10:33     ` Sergey Dyasli
2019-12-17 18:06 ` [RFC PATCH 0/3] basic KASAN support for Xen PV domains Boris Ostrovsky
2019-12-20 10:26   ` Sergey Dyasli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).