LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Adrian Hunter <adrian.hunter@intel.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Nadav Amit <nadav.amit@gmail.com>,
	Andi Kleen <ak@linux.intel.com>, Ingo Molnar <mingo@redhat.com>,
	Andy Lutomirski <luto@kernel.org>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Edward Cree <ecree@solarflare.com>,
	"H . Peter Anvin" <hpa@zytor.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>, X86 ML <x86@kernel.org>,
	Paolo Abeni <pabeni@redhat.com>, Borislav Petkov <bp@alien8.de>,
	David Woodhouse <dwmw@amazon.co.uk>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	songliubraving@fb.com
Subject: Re: Tracing text poke / kernel self-modifying code (Was: Re: [RFC v2 0/6] x86: dynamic indirect branch promotion)
Date: Thu, 12 Sep 2019 10:00:39 +0300
Message-ID: <533cc422-3f26-591f-f41c-b385d8c8ff05@intel.com> (raw)
In-Reply-To: <20190829114602.GR2369@hirez.programming.kicks-ass.net>

On 29/08/19 2:46 PM, Peter Zijlstra wrote:
> On Thu, Aug 29, 2019 at 12:40:56PM +0300, Adrian Hunter wrote:
>> Can you expand on "and ensure the poke_handler preserves the existing
>> control flow"?  Whatever the INT3-handler does will be traced normally so
>> long as it does not itself execute self-modified code.
> 
> My thinking was that the code shouldn't change semantics before emitting
> the RECORD_TEXT_POKE; but you're right in that that doesn't seem to
> matter much.
> 
> Either we run the old code or INT3 does 'something'.  Then we get
> RECORD_TEXT_POKE and finish the poke.  Which tells that the moment INT3
> stops the new text is in place.
> 
> I suppose that works too, and requires less changes.


What about this?


diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 70c09967a999..00aa9bef2b9d 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -30,6 +30,7 @@ struct text_poke_loc {
 	void *addr;
 	size_t len;
 	const char opcode[POKE_MAX_OPCODE_SIZE];
+	char old_opcode[POKE_MAX_OPCODE_SIZE];
 };
 
 extern void text_poke_early(void *addr, const void *opcode, size_t len);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ccd32013c47a..c781bbbbbafd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -3,6 +3,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/perf_event.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
@@ -1045,8 +1046,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 	/*
 	 * First step: add a int3 trap to the address that will be patched.
 	 */
-	for (i = 0; i < nr_entries; i++)
+	for (i = 0; i < nr_entries; i++) {
+		memcpy(tp[i].old_opcode, tp[i].addr, tp[i].len);
 		text_poke(tp[i].addr, &int3, sizeof(int3));
+	}
 
 	on_each_cpu(do_sync_core, NULL, 1);
 
@@ -1071,6 +1074,11 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 		on_each_cpu(do_sync_core, NULL, 1);
 	}
 
+	for (i = 0; i < nr_entries; i++) {
+		perf_event_text_poke((unsigned long)tp[i].addr,
+				     tp[i].old_opcode, tp[i].opcode, tp[i].len);
+	}
+
 	/*
 	 * Third step: replace the first byte (int3) by the first byte of
 	 * replacing opcode.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61448c19a132..f4c6095d2110 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1183,6 +1183,8 @@ extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_namespaces(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
+extern void perf_event_text_poke(unsigned long addr, const void *old_bytes,
+				 const void *new_bytes, size_t len);
 
 /* Callchains */
 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
@@ -1406,6 +1408,10 @@ static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
+static inline void perf_event_text_poke(unsigned long addr,
+					const void *old_bytes,
+					const void *new_bytes,
+					size_t len)			{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bb7b271397a6..6396d4c0d2f9 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -375,7 +375,8 @@ struct perf_event_attr {
 				ksymbol        :  1, /* include ksymbol events */
 				bpf_event      :  1, /* include bpf events */
 				aux_output     :  1, /* generate AUX records instead of events */
-				__reserved_1   : 32;
+				text_poke      :  1, /* include text poke events */
+				__reserved_1   : 31;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -1000,6 +1001,22 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_BPF_EVENT			= 18,
 
+	/*
+	 * Records changes to kernel text i.e. self-modified code.
+	 * 'len' is the number of old bytes, which is the same as the number
+	 * of new bytes. 'bytes' contains the old bytes followed immediately
+	 * by the new bytes.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				addr;
+	 *	u16				len;
+	 *	u8				bytes[];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+ 	PERF_RECORD_TEXT_POKE			= 19,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 811bb333c986..43c0d3d232dc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
+static atomic_t nr_text_poke_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4339,7 +4340,7 @@ static bool is_sb_event(struct perf_event *event)
 	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
 	    attr->comm || attr->comm_exec ||
 	    attr->task || attr->ksymbol ||
-	    attr->context_switch ||
+	    attr->context_switch || attr->text_poke ||
 	    attr->bpf_event)
 		return true;
 	return false;
@@ -4413,6 +4414,8 @@ static void unaccount_event(struct perf_event *event)
 		atomic_dec(&nr_ksymbol_events);
 	if (event->attr.bpf_event)
 		atomic_dec(&nr_bpf_events);
+	if (event->attr.text_poke)
+		atomic_dec(&nr_text_poke_events);
 
 	if (dec) {
 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -8045,6 +8048,85 @@ void perf_event_bpf_event(struct bpf_prog *prog,
 	perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
 }
 
+struct perf_text_poke_event {
+	const void		*old_bytes;
+	const void		*new_bytes;
+	size_t			pad;
+	u16			len;
+
+	struct {
+		struct perf_event_header	header;
+
+		u64				addr;
+	} event_id;
+};
+
+static int perf_event_text_poke_match(struct perf_event *event)
+{
+	return event->attr.text_poke;
+}
+
+static void perf_event_text_poke_output(struct perf_event *event, void *data)
+{
+	struct perf_text_poke_event *text_poke_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	u64 padding = 0;
+	int ret;
+
+	if (!perf_event_text_poke_match(event))
+		return;
+
+	perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
+
+	ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, text_poke_event->event_id);
+	perf_output_put(&handle, text_poke_event->len);
+
+	__output_copy(&handle, text_poke_event->old_bytes, text_poke_event->len);
+	__output_copy(&handle, text_poke_event->new_bytes, text_poke_event->len);
+
+	if (text_poke_event->pad)
+		__output_copy(&handle, &padding, text_poke_event->pad);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+void perf_event_text_poke(unsigned long addr, const void *old_bytes,
+			  const void *new_bytes, size_t len)
+{
+	struct perf_text_poke_event text_poke_event;
+	size_t tot, pad;
+
+	if (!atomic_read(&nr_text_poke_events))
+		return;
+
+	tot  = sizeof(text_poke_event.len) + (len << 1);
+	pad  = ALIGN(tot, sizeof(u64)) - tot;
+
+	text_poke_event = (struct perf_text_poke_event){
+		.old_bytes    = old_bytes,
+		.new_bytes    = new_bytes,
+		.pad          = pad,
+		.len          = len,
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_TEXT_POKE,
+				.misc = PERF_RECORD_MISC_KERNEL,
+				.size = sizeof(text_poke_event.event_id) + tot + pad,
+			},
+			.addr = addr,
+		},
+	};
+
+	perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
 	event->attach_state |= PERF_ATTACH_ITRACE;
@@ -10331,6 +10413,8 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_ksymbol_events);
 	if (event->attr.bpf_event)
 		atomic_inc(&nr_bpf_events);
+	if (event->attr.text_poke)
+		atomic_inc(&nr_text_poke_events);
 
 	if (inc) {
 		/*
-- 
2.17.1


  reply index

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-31  7:21 [RFC v2 0/6] x86: dynamic indirect branch promotion Nadav Amit
2018-12-31  7:21 ` [RFC v2 1/6] x86: introduce kernel restartable sequence Nadav Amit
2018-12-31 20:08   ` Andy Lutomirski
2018-12-31 21:12     ` Nadav Amit
2019-01-03 22:21   ` Andi Kleen
2019-01-03 22:29     ` Nadav Amit
2019-01-03 22:48       ` Andi Kleen
2019-01-03 22:52         ` Nadav Amit
2019-01-03 23:40           ` Andi Kleen
2019-01-03 23:56             ` Nadav Amit
2019-01-04  0:34   ` hpa
2018-12-31  7:21 ` [RFC v2 2/6] objtool: ignore instructions Nadav Amit
2018-12-31  7:21 ` [RFC v2 3/6] x86: patch indirect branch promotion Nadav Amit
2018-12-31  7:21 ` [RFC v2 4/6] x86: interface for accessing indirect branch locations Nadav Amit
2018-12-31  7:21 ` [RFC v2 5/6] x86: learning and patching indirect branch targets Nadav Amit
2018-12-31 20:05   ` Andy Lutomirski
2018-12-31 21:07     ` Nadav Amit
2018-12-31  7:21 ` [RFC v2 6/6] x86: outline optpoline Nadav Amit
2018-12-31 19:51 ` [RFC v2 0/6] x86: dynamic indirect branch promotion Andy Lutomirski
2018-12-31 19:53   ` Nadav Amit
2019-01-03 18:10     ` Josh Poimboeuf
2019-01-03 18:30       ` Nadav Amit
2019-01-03 20:31         ` Josh Poimboeuf
2019-01-03 22:18 ` Andi Kleen
2019-01-07 16:32   ` Peter Zijlstra
2019-01-08  7:47     ` Adrian Hunter
2019-01-08  9:25       ` Peter Zijlstra
2019-01-08 10:01         ` Adrian Hunter
2019-01-08 10:10           ` Peter Zijlstra
2019-01-08 17:27             ` Andi Kleen
2019-01-08 18:28               ` Nadav Amit
2019-01-08 19:01                 ` Peter Zijlstra
2019-01-08 20:47                   ` Nadav Amit
2019-01-08 20:53                     ` Andi Kleen
2019-01-09 10:35                     ` Peter Zijlstra
2019-08-29  8:23                       ` Tracing text poke / kernel self-modifying code (Was: Re: [RFC v2 0/6] x86: dynamic indirect branch promotion) Adrian Hunter
2019-08-29  8:53                         ` Peter Zijlstra
2019-08-29  9:40                           ` Adrian Hunter
2019-08-29 11:46                             ` Peter Zijlstra
2019-09-12  7:00                               ` Adrian Hunter [this message]
2019-09-12 12:17                                 ` hpa
2019-01-08 18:57               ` [RFC v2 0/6] x86: dynamic indirect branch promotion Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=533cc422-3f26-591f-f41c-b385d8c8ff05@intel.com \
    --to=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=dwmw@amazon.co.uk \
    --cc=ecree@solarflare.com \
    --cc=hpa@zytor.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=nadav.amit@gmail.com \
    --cc=pabeni@redhat.com \
    --cc=peterz@infradead.org \
    --cc=songliubraving@fb.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git