All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode
@ 2005-03-13  9:54 Roland McGrath
  2005-03-13 16:37 ` Andi Kleen
  2005-03-13 21:48 ` Andrew Morton
  0 siblings, 2 replies; 5+ messages in thread
From: Roland McGrath @ 2005-03-13  9:54 UTC (permalink / raw)
  To: Andi Kleen, Andrew Morton
  Cc: linux-kernel, Jim Keniston, Prasanna S. Panchamukhi

The existing x86-64 kprobes implementation doesn't cope with the
%RIP-relative addressing mode.  Kprobes work by single-stepping a copy of
an instruction overwritten by a breakpoint.  When a probe is inserted on an
instruction that uses the %RIP-relative data addressing mode, the copy run
in a different location gets different data and so the presence of that
probe causes the probed code to read or write the wrong memory location.
Without this problem fixed, it is woefully unsafe to use the current
kprobes code on x86-64 unless you are sure the instruction you instrument
is not one that accesses global data using the %RIP addressing mode.

This patch fixes the problem by recognizing the %RIP-relative addressing
mode in an instruction when it's being copied to insert the kprobe, and
adjusting its displacement so that it finds the right data.  Taking this
approach requires that the copied instruction's %RIP value be within 2GB of
the virtual address of the data, i.e. the text/data areas of the kernel
code and loaded modules.  To satisfy this need the patch also replaces the
use of vmalloc for getting instruction pages with lower-level calls to use
a different part of the address space, the area at the top of the address
space just above where modules are loaded.  I left one page of red zone at
the top, and the 1MB-4KB thus available allows for at most 69632 kprobes.
(If we ever need to overcome that limit, we can change this to add a hook
into the arch/x86_64/kernel/modules.c code and allocate pages inside the
module area loading area instead.)


Thanks,
Roland


Signed-off-by: Roland McGrath <roland@redhat.com>

--- linux-2.6/arch/x86_64/kernel/kprobes.c
+++ linux-2.6/arch/x86_64/kernel/kprobes.c
@@ -25,6 +25,8 @@
  *		interface to access function arguments.
  * 2004-Oct	Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi
  *		<prasanna@in.ibm.com> adapted for x86_64
+ * 2005-Mar	Roland McGrath <roland@redhat.com>
+ *		Fixed to handle %rip-relative addressing mode correctly.
  */
 
 #include <linux/config.h>
@@ -86,9 +88,124 @@ int arch_prepare_kprobe(struct kprobe *p
 	return 0;
 }
 
+/*
+ * Determine if the instruction uses the %rip-relative addressing mode.
+ * If it does, return the address of the 32-bit displacement word.
+ * If not, return null.
+ */
+static inline s32 *is_riprel(u8 *insn)
+{
+	static const unsigned char onebyte_has_modrm[256] = {
+		/*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
+		/*       -------------------------------        */
+		/* 00 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 00 */
+		/* 10 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 10 */
+		/* 20 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 20 */
+		/* 30 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0, /* 30 */
+		/* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 40 */
+		/* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 50 */
+		/* 60 */ 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0, /* 60 */
+		/* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 70 */
+		/* 80 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 80 */
+		/* 90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 90 */
+		/* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0 */
+		/* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* b0 */
+		/* c0 */ 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* c0 */
+		/* d0 */ 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1, /* d0 */
+		/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0 */
+		/* f0 */ 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1  /* f0 */
+		/*       -------------------------------        */
+		/*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
+	};
+	static const unsigned char twobyte_has_modrm[256] = {
+		/*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
+		/*       -------------------------------        */
+		/* 00 */ 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1, /* 0f */
+		/* 10 */ 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0, /* 1f */
+		/* 20 */ 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1, /* 2f */
+		/* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */
+		/* 40 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4f */
+		/* 50 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 5f */
+		/* 60 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6f */
+		/* 70 */ 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1, /* 7f */
+		/* 80 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 8f */
+		/* 90 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 9f */
+		/* a0 */ 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1, /* af */
+		/* b0 */ 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1, /* bf */
+		/* c0 */ 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, /* cf */
+		/* d0 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* df */
+		/* e0 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ef */
+		/* f0 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0  /* ff */
+		/*       -------------------------------        */
+		/*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
+	};
+	int need_modrm;
+
+	/* Skip legacy instruction prefixes.  */
+	while (1) {
+		switch (*insn) {
+		case 0x66:
+		case 0x67:
+		case 0x2e:
+		case 0x3e:
+		case 0x26:
+		case 0x64:
+		case 0x65:
+		case 0x36:
+		case 0xf0:
+		case 0xf3:
+		case 0xf2:
+			++insn;
+			continue;
+		}
+		break;
+	}
+
+	/* Skip REX instruction prefix.  */
+	if ((*insn & 0xf0) == 0x40)
+		++insn;
+
+	if (*insn == 0x0f) {	/* Two-byte opcode.  */
+		need_modrm = twobyte_has_modrm[*++insn];
+	} else {		/* One-byte opcode.  */
+		need_modrm = onebyte_has_modrm[*insn];
+	}
+
+	if (need_modrm) {
+		u8 modrm = *++insn;
+		if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
+			/* Displacement follows ModRM byte.  */
+			return (s32 *) ++insn;
+		}
+	}
+
+	/* No %rip-relative addressing mode here.  */
+	return NULL;
+}
+
 void arch_copy_kprobe(struct kprobe *p)
 {
+	s32 *ripdisp;
 	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
+	ripdisp = is_riprel(p->ainsn.insn);
+	if (ripdisp) {
+		/*
+		 * The copied instruction uses the %rip-relative
+		 * addressing mode.  Adjust the displacement for the
+		 * difference between the original location of this
+		 * instruction and the location of the copy that will
+		 * actually be run.  The tricky bit here is making sure
+		 * that the sign extension happens correctly in this
+		 * calculation, since we need a signed 32-bit result to
+		 * be sign-extended to 64 bits when it's added to the
+		 * %rip value and yield the same 64-bit result that the
+		 * sign-extension of the original signed 32-bit
+		 * displacement would have given.
+		 */
+		s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn;
+		BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
+		*ripdisp = disp;
+	}
 }
 
 void arch_remove_kprobe(struct kprobe *p)
@@ -417,6 +534,8 @@ static kprobe_opcode_t *get_insn_slot(vo
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
+	struct vm_struct *area;
+	struct page **pages;
 
 	hlist_for_each(pos, &kprobe_insn_pages) {
 		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
@@ -439,12 +558,52 @@ static kprobe_opcode_t *get_insn_slot(vo
 	if (!kip) {
 		return NULL;
 	}
-	kip->insns = (kprobe_opcode_t*) __vmalloc(PAGE_SIZE,
-		GFP_KERNEL|__GFP_HIGHMEM, __pgprot(__PAGE_KERNEL_EXEC));
-	if (!kip->insns) {
+
+	/*
+	 * For the %rip-relative displacement fixups to be doable, we
+	 * need our instruction copy to be within +/- 2GB of any data
+	 * it might access via %rip.  That is, within 2GB of where the
+	 * kernel image and loaded module images reside.  From the base
+	 * of kernel text (see vmlinux.lds.S) up through the top of the
+	 * address space is less than 2GB total.  There is a megabyte
+	 * of space free from MODULE_END up to the top of the address
+	 * space.  We cap it one page short of that just to have some
+	 * unmapped space at the very top for sanity's sake in case of
+	 * *(NULL - constant) accesses in buggy kernel code.
+	 *
+	 * This basically replicates __vmalloc, except that it uses a
+	 * range of addresses starting at MODULE_END.  This also
+	 * allocates a single page of address space with no following
+	 * guard page (__get_vm_area always adds PAGE_SIZE to the size,
+	 * so by passing zero we get the one page).  We set up all the
+	 * data structures here such that a normal vfree call tears
+	 * them all down just right.
+	 */
+	area = __get_vm_area(0, VM_ALLOC, MODULES_END, 0ULL - PAGE_SIZE);
+	if (!area)
+		goto fail_kip;
+	area->nr_pages = 1;
+	area->pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!area->pages)
+		goto fail_area;
+	area->pages[0] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+	if (!area->pages[0])
+		goto fail_pages;
+	pages = area->pages;
+	if (map_vm_area(area, PAGE_KERNEL_EXEC, &pages)) {
+		__free_page(area->pages[0]);
+	fail_pages:
+		kfree(area->pages);
+	fail_area:
+		remove_vm_area(area->addr);
+		kfree(area);
+	fail_kip:
 		kfree(kip);
 		return NULL;
 	}
+	BUG_ON(pages != area->pages + 1);
+	kip->insns = (kprobe_opcode_t *) area->addr;
+
 	INIT_HLIST_NODE(&kip->hlist);
 	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
 	memset(kip->slot_used, 0, INSNS_PER_PAGE);

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode
  2005-03-13  9:54 [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode Roland McGrath
@ 2005-03-13 16:37 ` Andi Kleen
  2005-03-15  8:59   ` Roland McGrath
  2005-03-13 21:48 ` Andrew Morton
  1 sibling, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2005-03-13 16:37 UTC (permalink / raw)
  To: Roland McGrath; +Cc: linux-kernel

Roland McGrath <roland@redhat.com> writes:

> The existing x86-64 kprobes implementation doesn't cope with the
> %RIP-relative addressing mode.  Kprobes work by single-stepping a copy of

Thanks for fixing that long standing bug. 

> +	static const unsigned char onebyte_has_modrm[256] = {

Can you turn these two arrays into a bitmap please? 

> +	 * This basically replicates __vmalloc, except that it uses a

This shouldn't be opencoded here. Instead make a utility function
like vmalloc_range() that takes a start and end address and
make the module allocation use it too.

Also you should fix up asm-x86_64/page.h and Documentation/x86_64/mm.txt
with the new fixed allocation.

> +	 * range of addresses starting a MODULE_END.  This also
> +	 * allocates a single page of address space with no following
> +	 * guard page (__get_vm_area always adds PAGE_SIZE to the size,
> +	 * so by passing zero we get the one page).  We set up all the

I think Andrea has just changed that and the patch went into
mainline. Be careful with merging.

-Andi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode
  2005-03-13  9:54 [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode Roland McGrath
  2005-03-13 16:37 ` Andi Kleen
@ 2005-03-13 21:48 ` Andrew Morton
  1 sibling, 0 replies; 5+ messages in thread
From: Andrew Morton @ 2005-03-13 21:48 UTC (permalink / raw)
  To: Roland McGrath; +Cc: ak, linux-kernel, jkenisto, PRASANNA

Roland McGrath <roland@redhat.com> wrote:
>
> +	area = __get_vm_area(0, VM_ALLOC, MODULES_END, 0ULL - PAGE_SIZE);

The longlong here seems wrong?  If this is to mean "the top of the address
space minus a page" then unsigned long is the appropriate type.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode
  2005-03-13 16:37 ` Andi Kleen
@ 2005-03-15  8:59   ` Roland McGrath
  0 siblings, 0 replies; 5+ messages in thread
From: Roland McGrath @ 2005-03-15  8:59 UTC (permalink / raw)
  To: Andi Kleen, Andrew Morton; +Cc: linux-kernel

> Can you turn these two arrays into a bitmap please? 

Ok.

> This shouldn't be opencoded here. Instead make a utility function
> like vmalloc_range() that takes a start and end address and
> make the module allocation use it too.
> 
> Also you should fix up asm-x86_64/page.h and Documentation/x86_64/mm.txt
> with the new fixed allocation.
[...]
> I think Andrea has just changed that and the patch went into
> mainline. Be careful with merging.

Since __get_vm_area has been changed to make it harder to avoid the guard
page, I decided just to punt and use module_alloc instead.  This works
either with or without the -mm patches that clean it up to use __vmalloc_area.
There is enough address space in the module area that I'm not going to
worry about each page kprobes uses wasting a second page of address space.

Here is a new version of the patch that addresses your comments.


Thanks,
Roland


Signed-off-by: Roland McGrath <roland@redhat.com>

--- linux-2.6/arch/x86_64/kernel/kprobes.c
+++ linux-2.6/arch/x86_64/kernel/kprobes.c
@@ -25,6 +25,8 @@
  *		interface to access function arguments.
  * 2004-Oct	Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi
  *		<prasanna@in.ibm.com> adapted for x86_64
+ * 2005-Mar	Roland McGrath <roland@redhat.com>
+ *		Fixed to handle %rip-relative addressing mode correctly.
  */
 
 #include <linux/config.h>
@@ -34,7 +36,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
-#include <linux/vmalloc.h>
+#include <linux/moduleloader.h>
 
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
@@ -86,9 +88,132 @@ int arch_prepare_kprobe(struct kprobe *p
 	return 0;
 }
 
+/*
+ * Determine if the instruction uses the %rip-relative addressing mode.
+ * If it does, return the address of the 32-bit displacement word.
+ * If not, return null.
+ */
+static inline s32 *is_riprel(u8 *insn)
+{
+#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
+	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
+	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
+	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
+	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
+	 << (row % 64))
+	static const u64 onebyte_has_modrm[256 / 64] = {
+		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
+		/*      -------------------------------         */
+		W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */
+		W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */
+		W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */
+		W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
+		W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
+		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */
+		W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */
+		W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
+		W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
+		W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */
+		W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */
+		W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
+		W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */
+		W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */
+		W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
+		W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1)  /* f0 */
+		/*      -------------------------------         */
+		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
+	};
+	static const u64 twobyte_has_modrm[256 / 64] = {
+		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
+		/*      -------------------------------         */
+		W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */
+		W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */
+		W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */
+		W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
+		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */
+		W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */
+		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */
+		W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
+		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */
+		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */
+		W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */
+		W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
+		W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */
+		W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */
+		W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */
+		W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0)  /* ff */
+		/*      -------------------------------         */
+		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
+	};
+#undef	W
+	int need_modrm;
+
+	/* Skip legacy instruction prefixes.  */
+	while (1) {
+		switch (*insn) {
+		case 0x66:
+		case 0x67:
+		case 0x2e:
+		case 0x3e:
+		case 0x26:
+		case 0x64:
+		case 0x65:
+		case 0x36:
+		case 0xf0:
+		case 0xf3:
+		case 0xf2:
+			++insn;
+			continue;
+		}
+		break;
+	}
+
+	/* Skip REX instruction prefix.  */
+	if ((*insn & 0xf0) == 0x40)
+		++insn;
+
+	if (*insn == 0x0f) {	/* Two-byte opcode.  */
+		++insn;
+		need_modrm = test_bit(*insn, twobyte_has_modrm);
+	} else {		/* One-byte opcode.  */
+		need_modrm = test_bit(*insn, onebyte_has_modrm);
+	}
+
+	if (need_modrm) {
+		u8 modrm = *++insn;
+		if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
+			/* Displacement follows ModRM byte.  */
+			return (s32 *) ++insn;
+		}
+	}
+
+	/* No %rip-relative addressing mode here.  */
+	return NULL;
+}
+
 void arch_copy_kprobe(struct kprobe *p)
 {
+	s32 *ripdisp;
 	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
+	ripdisp = is_riprel(p->ainsn.insn);
+	if (ripdisp) {
+		/*
+		 * The copied instruction uses the %rip-relative
+		 * addressing mode.  Adjust the displacement for the
+		 * difference between the original location of this
+		 * instruction and the location of the copy that will
+		 * actually be run.  The tricky bit here is making sure
+		 * that the sign extension happens correctly in this
+		 * calculation, since we need a signed 32-bit result to
+		 * be sign-extended to 64 bits when it's added to the
+		 * %rip value and yield the same 64-bit result that the
+		 * sign-extension of the original signed 32-bit
+		 * displacement would have given.
+		 */
+		s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn;
+		BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
+		*ripdisp = disp;
+	}
 }
 
 void arch_remove_kprobe(struct kprobe *p)
@@ -439,8 +564,15 @@ static kprobe_opcode_t *get_insn_slot(vo
 	if (!kip) {
 		return NULL;
 	}
-	kip->insns = (kprobe_opcode_t*) __vmalloc(PAGE_SIZE,
-		GFP_KERNEL|__GFP_HIGHMEM, __pgprot(__PAGE_KERNEL_EXEC));
+
+	/*
+	 * For the %rip-relative displacement fixups to be doable, we
+	 * need our instruction copy to be within +/- 2GB of any data it
+	 * might access via %rip.  That is, within 2GB of where the
+	 * kernel image and loaded module images reside.  So we allocate
+	 * a page in the module loading area.
+	 */
+	kip->insns = module_alloc(PAGE_SIZE);
 	if (!kip->insns) {
 		kfree(kip);
 		return NULL;
@@ -481,7 +614,7 @@ static void free_insn_slot(kprobe_opcode
 					hlist_add_head(&kip->hlist,
 						&kprobe_insn_pages);
 				} else {
-					vfree(kip->insns);
+					module_free(NULL, kip->insns);
 					kfree(kip);
 				}
 			}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode
@ 2005-03-13 19:22 Oleg Nesterov
  0 siblings, 0 replies; 5+ messages in thread
From: Oleg Nesterov @ 2005-03-13 19:22 UTC (permalink / raw)
  To: Roland McGrath; +Cc: linux-kernel, Andi Kleen

Roland McGrath wrote:
>
> +	* This basically replicates __vmalloc, except that it uses a
> +	* range of addresses starting at MODULE_END.  This also

Could you look at these patches:

[PATCH 1/5] vmalloc: introduce __vmalloc_area() function
http://marc.theaimsgroup.com/?l=linux-kernel&m=111013183331326

[PATCH 5/5] vmalloc: use list of pages instead of array in vm_struct
http://marc.theaimsgroup.com/?l=linux-kernel&m=111013224029332

There are in mm3 now. Note that the second one will conflict with
your patch.

Is it possible to use __vmalloc_area()?

Oleg.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-03-15  8:59 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-03-13  9:54 [PATCH] x86-64 kprobes: handle %RIP-relative addressing mode Roland McGrath
2005-03-13 16:37 ` Andi Kleen
2005-03-15  8:59   ` Roland McGrath
2005-03-13 21:48 ` Andrew Morton
2005-03-13 19:22 Oleg Nesterov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.