linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* x86, mce, Use user return notifier in mce
@ 2012-01-13  0:36 Huang Ying
  2012-01-13  0:36 ` [RFC 1/2] urn, make user return notifier lockless Huang Ying
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Huang Ying @ 2012-01-13  0:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Tony Luck, Borislav Petkov, Chen Gong,
	Hidetoshi Seto, ying.huang

Replace the home-made TIF_MCE_NOTIFY based code in MCE with user                
return notifier.

To make this possible, make user return notifier lockless firstly.

[RFC 1/2] urn, make user return notifier lockless
[RFC 2/2] x86, mce, Use user return notifier in mce

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC 1/2] urn, make user return notifier lockless
  2012-01-13  0:36 x86, mce, Use user return notifier in mce Huang Ying
@ 2012-01-13  0:36 ` Huang Ying
  2012-01-13  0:36 ` [RFC 2/2] x86, mce, Use user return notifier in mce Huang Ying
  2012-01-13  1:46 ` Tony Luck
  2 siblings, 0 replies; 13+ messages in thread
From: Huang Ying @ 2012-01-13  0:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Tony Luck, Borislav Petkov, Chen Gong,
	Hidetoshi Seto, ying.huang, Avi Kivity, Peter Zijlstra

This makes it possible to use user return notifier in hardware error
handler, which usually has NMI like semantics such as machine check
exception handler.

The implementation is based on that of irq_work.

Because one mandatory initializer interface is added, to make patchset
bisectable, the changes to the only current user: kvm, is included in
this patch too.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/kvm/x86.c                   |    7 ++--
 include/linux/user-return-notifier.h |   15 +++++++--
 kernel/user-return-notifier.c        |   53 ++++++++++++++++++++++++++++-------
 3 files changed, 59 insertions(+), 16 deletions(-)

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -178,7 +178,6 @@ static void kvm_on_user_return(struct us
 		}
 	}
 	locals->registered = false;
-	user_return_notifier_unregister(urn);
 }
 
 static void shared_msr_update(unsigned slot, u32 msr)
@@ -225,7 +224,7 @@ void kvm_set_shared_msr(unsigned slot, u
 	smsr->values[slot].curr = value;
 	wrmsrl(shared_msrs_global.msrs[slot], value);
 	if (!smsr->registered) {
-		smsr->urn.on_user_return = kvm_on_user_return;
+		init_user_return_notifier(&smsr->urn, kvm_on_user_return);
 		user_return_notifier_register(&smsr->urn);
 		smsr->registered = true;
 	}
@@ -236,8 +235,10 @@ static void drop_user_return_notifiers(v
 {
 	struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
 
-	if (smsr->registered)
+	if (smsr->registered) {
 		kvm_on_user_return(&smsr->urn);
+		user_return_notifier_unregister(&smsr->urn);
+	}
 }
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
--- a/include/linux/user-return-notifier.h
+++ b/include/linux/user-return-notifier.h
@@ -3,16 +3,25 @@
 
 #ifdef CONFIG_USER_RETURN_NOTIFIER
 
-#include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/sched.h>
 
 struct user_return_notifier {
+	unsigned long flags;
 	void (*on_user_return)(struct user_return_notifier *urn);
-	struct hlist_node link;
+	struct llist_node link;
 };
 
 
-void user_return_notifier_register(struct user_return_notifier *urn);
+static inline
+void init_user_return_notifier(struct user_return_notifier *urn,
+			       void (*func)(struct user_return_notifier *))
+{
+	urn->flags = 0;
+	urn->on_user_return = func;
+}
+
+bool user_return_notifier_register(struct user_return_notifier *urn);
 void user_return_notifier_unregister(struct user_return_notifier *urn);
 
 static inline void propagate_user_return_notify(struct task_struct *prev,
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -3,18 +3,26 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/export.h>
+#include <linux/llist.h>
 
-static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
+/* The urn entry is claimed to be enqueued */
+#define URN_CLAIMED_BIT		0
+
+static DEFINE_PER_CPU(struct llist_head, return_notifier_list);
 
 /*
  * Request a notification when the current cpu returns to userspace.  Must be
  * called in atomic context.  The notifier will also be called in atomic
- * context.
+ * context.  Return true on success, failure when the urn entry was already
+ * enqueued by someone else.
  */
-void user_return_notifier_register(struct user_return_notifier *urn)
+bool user_return_notifier_register(struct user_return_notifier *urn)
 {
+	if (test_and_set_bit(URN_CLAIMED_BIT, &urn->flags))
+		return false;
+	llist_add(&urn->link, &__get_cpu_var(return_notifier_list));
 	set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
-	hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list));
+	return true;
 }
 EXPORT_SYMBOL_GPL(user_return_notifier_register);
 
@@ -24,9 +32,27 @@ EXPORT_SYMBOL_GPL(user_return_notifier_r
  */
 void user_return_notifier_unregister(struct user_return_notifier *urn)
 {
-	hlist_del(&urn->link);
-	if (hlist_empty(&__get_cpu_var(return_notifier_list)))
-		clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+	struct llist_head *head;
+	struct llist_node *node;
+	bool found;
+
+	head = &__get_cpu_var(return_notifier_list);
+	clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+	node = llist_del_all(head);
+	while (node) {
+		if (&urn->link == node) {
+			found = true;
+			continue;
+		}
+		llist_add(node, head);
+		node = node->next;
+	}
+	/* The urn entry may be fired already */
+	if (!found)
+		return;
+	if (!llist_empty(head))
+		set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+	clear_bit(URN_CLAIMED_BIT, &urn->flags);
 }
 EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
 
@@ -34,11 +60,18 @@ EXPORT_SYMBOL_GPL(user_return_notifier_u
 void fire_user_return_notifiers(void)
 {
 	struct user_return_notifier *urn;
-	struct hlist_node *tmp1, *tmp2;
-	struct hlist_head *head;
+	struct llist_node *node, *next;
+	struct llist_head *head;
 
 	head = &get_cpu_var(return_notifier_list);
-	hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
+	clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+	node = llist_del_all(head);
+	while (node) {
+		next = node->next;
+		urn = llist_entry(node, struct user_return_notifier, link);
+		clear_bit(URN_CLAIMED_BIT, &urn->flags);
 		urn->on_user_return(urn);
+		node = next;
+	}
 	put_cpu_var(return_notifier_list);
 }

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC 2/2] x86, mce, Use user return notifier in mce
  2012-01-13  0:36 x86, mce, Use user return notifier in mce Huang Ying
  2012-01-13  0:36 ` [RFC 1/2] urn, make user return notifier lockless Huang Ying
@ 2012-01-13  0:36 ` Huang Ying
  2012-01-13  1:46 ` Tony Luck
  2 siblings, 0 replies; 13+ messages in thread
From: Huang Ying @ 2012-01-13  0:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Tony Luck, Borislav Petkov, Chen Gong,
	Hidetoshi Seto, ying.huang

Replace the home-made TIF_MCE_NOTIFY based code in MCE with user
return notifier.

This saves one TIF_xx flag too.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Borislav Petkov <bp@amd64.org>
Cc: Chen Gong <gong.chen@linux.intel.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
---
 arch/x86/include/asm/thread_info.h |    5 +----
 arch/x86/kernel/cpu/mcheck/mce.c   |   13 ++++++++++---
 arch/x86/kernel/signal.c           |    6 ------
 3 files changed, 11 insertions(+), 13 deletions(-)

--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,7 +83,6 @@ struct thread_info {
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
-#define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
@@ -105,7 +104,6 @@ struct thread_info {
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
@@ -139,8 +137,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME |	\
-	 _TIF_USER_RETURN_NOTIFY)
+	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_USER_RETURN_NOTIFY)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -38,6 +38,7 @@
 #include <linux/debugfs.h>
 #include <linux/irq_work.h>
 #include <linux/export.h>
+#include <linux/user-return-notifier.h>
 
 #include <asm/processor.h>
 #include <asm/mce.h>
@@ -102,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_ban
 
 static DEFINE_PER_CPU(struct work_struct, mce_work);
 
+static DEFINE_PER_CPU(struct user_return_notifier, mce_urn);
+
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -1083,7 +1086,7 @@ void do_machine_check(struct pt_regs *re
 		force_sig(SIGBUS, current);
 
 	/* notify userspace ASAP */
-	set_thread_flag(TIF_MCE_NOTIFY);
+	user_return_notifier_register(&__get_cpu_var(mce_urn));
 
 	if (worst > 0)
 		mce_report_event(regs);
@@ -1119,6 +1122,11 @@ void mce_notify_process(void)
 		memory_failure(pfn, MCE_VECTOR);
 }
 
+static void mce_on_user_return(struct user_return_notifier *urn)
+{
+	mce_notify_process();
+}
+
 static void mce_process_work(struct work_struct *dummy)
 {
 	mce_notify_process();
@@ -1211,8 +1219,6 @@ int mce_notify_irq(void)
 	/* Not more than two messages every minute */
 	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
 
-	clear_thread_flag(TIF_MCE_NOTIFY);
-
 	if (test_and_clear_bit(0, &mce_need_notify)) {
 		/* wake processes polling /dev/mcelog */
 		wake_up_interruptible(&mce_chrdev_wait);
@@ -1476,6 +1482,7 @@ void __cpuinit mcheck_cpu_init(struct cp
 	__mcheck_cpu_init_timer();
 	INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
 	init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
+	init_user_return_notifier(&__get_cpu_var(mce_urn), mce_on_user_return);
 }
 
 /*
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -814,12 +814,6 @@ static void do_signal(struct pt_regs *re
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-#ifdef CONFIG_X86_MCE
-	/* notify userspace of pending MCEs */
-	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_process();
-#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
-
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-13  0:36 x86, mce, Use user return notifier in mce Huang Ying
  2012-01-13  0:36 ` [RFC 1/2] urn, make user return notifier lockless Huang Ying
  2012-01-13  0:36 ` [RFC 2/2] x86, mce, Use user return notifier in mce Huang Ying
@ 2012-01-13  1:46 ` Tony Luck
  2012-01-13  6:32   ` Huang Ying
  2 siblings, 1 reply; 13+ messages in thread
From: Tony Luck @ 2012-01-13  1:46 UTC (permalink / raw)
  To: Huang Ying
  Cc: linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong, Hidetoshi Seto

On Thu, Jan 12, 2012 at 4:36 PM, Huang Ying <ying.huang@intel.com> wrote:
> Replace the home-made TIF_MCE_NOTIFY based code in MCE with user
> return notifier.

When I looked at this before, I found that the existing user return notifier had
the meaning "call a function before THIS CPU returns to user space". Use in KVM
was to update some MSR that needed adjustment before a cpu ran another user
process.

The MCE code wanted something slightly different: "call a function before THIS
PROCESS returns to user space". So my prototype code from last year made a
whole new set of interfaces - similar in style to the user return
notifier, but with
the MCE semantics.

At first glance it looks like you are just using the user return
notifier code (perhaps
I'm mis-reading the diff?). This won't work - it's possible for a
context switch, and
then the process that hit the MCE may get moved to another cpu, when it will
be run. Meanwhile we'll execute our function in the context of some
other process.

-Tony

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-13  1:46 ` Tony Luck
@ 2012-01-13  6:32   ` Huang Ying
  2012-01-13  7:41     ` Tony Luck
  0 siblings, 1 reply; 13+ messages in thread
From: Huang Ying @ 2012-01-13  6:32 UTC (permalink / raw)
  To: Tony Luck
  Cc: linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong, Hidetoshi Seto

On Thu, 2012-01-12 at 17:46 -0800, Tony Luck wrote:
> On Thu, Jan 12, 2012 at 4:36 PM, Huang Ying <ying.huang@intel.com> wrote:
> > Replace the home-made TIF_MCE_NOTIFY based code in MCE with user
> > return notifier.
> 
> When I looked at this before, I found that the existing user return notifier had
> the meaning "call a function before THIS CPU returns to user space". Use in KVM
> was to update some MSR that needed adjustment before a cpu ran another user
> process.
> 
> The MCE code wanted something slightly different: "call a function before THIS
> PROCESS returns to user space". So my prototype code from last year made a
> whole new set of interfaces - similar in style to the user return
> notifier, but with
> the MCE semantics.
> 
> At first glance it looks like you are just using the user return
> notifier code (perhaps
> I'm mis-reading the diff?). This won't work - it's possible for a
> context switch, and
> then the process that hit the MCE may get moved to another cpu, when it will
> be run. Meanwhile we'll execute our function in the context of some
> other process.

You are right.  User return notifier can not be used for SRAR.  I think
that may be useful for SRAO.  Where we need a way to do notify earlier
in case of the corresponding work_queue item is not executed in time.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-13  6:32   ` Huang Ying
@ 2012-01-13  7:41     ` Tony Luck
  2012-01-16  1:21       ` Huang Ying
  2012-01-24 14:39       ` Avi Kivity
  0 siblings, 2 replies; 13+ messages in thread
From: Tony Luck @ 2012-01-13  7:41 UTC (permalink / raw)
  To: Huang Ying
  Cc: linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong, Hidetoshi Seto

On Thu, Jan 12, 2012 at 10:32 PM, Huang Ying <ying.huang@intel.com> wrote:
> You are right.  User return notifier can not be used for SRAR.  I think
> that may be useful for SRAO.  Where we need a way to do notify earlier
> in case of the corresponding work_queue item is not executed in time.

OK - I've been so focused on SRAR that I didn't think of the SRAO case.
But even there it seems odd to use user return notifier. We'd like the
SRAO work item to be executed promptly - but we don't care where it
is executed. So the "execute on this cpu" part of user return notifiers
doesn't quite fit.

Is there a concept of "high priority work queue"?

-Tony

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-13  7:41     ` Tony Luck
@ 2012-01-16  1:21       ` Huang Ying
  2012-01-20 17:34         ` Tejun Heo
  2012-01-24 14:39       ` Avi Kivity
  1 sibling, 1 reply; 13+ messages in thread
From: Huang Ying @ 2012-01-16  1:21 UTC (permalink / raw)
  To: Tony Luck, Tejun Heo
  Cc: linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong, Hidetoshi Seto

On Thu, 2012-01-12 at 23:41 -0800, Tony Luck wrote:
> On Thu, Jan 12, 2012 at 10:32 PM, Huang Ying <ying.huang@intel.com> wrote:
> > You are right.  User return notifier can not be used for SRAR.  I think
> > that may be useful for SRAO.  Where we need a way to do notify earlier
> > in case of the corresponding work_queue item is not executed in time.
> 
> OK - I've been so focused on SRAR that I didn't think of the SRAO case.
> But even there it seems odd to use user return notifier. We'd like the
> SRAO work item to be executed promptly - but we don't care where it
> is executed. So the "execute on this cpu" part of user return notifiers
> doesn't quite fit.
> 
> Is there a concept of "high priority work queue"?

"high priority work queue" sounds like a good idea.

Hi, Tejun,

Do you think the concept of "high priority work queue" is possible?
This is the requirement from our RAS guys.  To make some hardware
recovery function to be executed ASAP in process context.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-16  1:21       ` Huang Ying
@ 2012-01-20 17:34         ` Tejun Heo
  2012-01-21  2:44           ` Huang Ying
  0 siblings, 1 reply; 13+ messages in thread
From: Tejun Heo @ 2012-01-20 17:34 UTC (permalink / raw)
  To: Huang Ying
  Cc: Tony Luck, linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong,
	Hidetoshi Seto

Hello,

On Mon, Jan 16, 2012 at 09:21:15AM +0800, Huang Ying wrote:
> "high priority work queue" sounds like a good idea.
> 
> Hi, Tejun,
> 
> Do you think the concept of "high priority work queue" is possible?
> This is the requirement from our RAS guys.  To make some hardware
> recovery function to be executed ASAP in process context.

Take a look at Documentation/workqueue.txt.  It already has
WQ_HIGHPRI.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-20 17:34         ` Tejun Heo
@ 2012-01-21  2:44           ` Huang Ying
  2012-01-21  5:56             ` Tejun Heo
  0 siblings, 1 reply; 13+ messages in thread
From: Huang Ying @ 2012-01-21  2:44 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Tony Luck, linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong,
	Hidetoshi Seto

On Fri, 2012-01-20 at 09:34 -0800, Tejun Heo wrote:
> Hello,
> 
> On Mon, Jan 16, 2012 at 09:21:15AM +0800, Huang Ying wrote:
> > "high priority work queue" sounds like a good idea.
> > 
> > Hi, Tejun,
> > 
> > Do you think the concept of "high priority work queue" is possible?
> > This is the requirement from our RAS guys.  To make some hardware
> > recovery function to be executed ASAP in process context.
> 
> Take a look at Documentation/workqueue.txt.  It already has
> WQ_HIGHPRI.

Thanks for reminding!

It appears that WQ_HIGHPRI only provides priority between work queue,
not between the work queue backing kthread and other tasks.  Is there
any mechanism for that?

If my understanding was correct, WQ_MEM_RECLAIM has some side-effect for
that.  Because hardware errors occurs seldom, the reserved kthread for
WQ_MEME_RECLAIM just sleeps most of the time.  When first hardware error
occurs and the work item is queued, the reserved kthread is waked up.
Because the reserved kthread sleeps for long time, it is highly possible
for it to be scheduled at the next schedule point.

Because hardware error usually has no locality, WQ_UNBOUND can be used
for it so that the work item can be put on relative low-load CPU.  From
the document, it is said WQ_UNBOUND work items will be executed ASAP
too.  Compared with WQ_HIGHPRI, how about the priority of WQ_UNBOUND?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-21  2:44           ` Huang Ying
@ 2012-01-21  5:56             ` Tejun Heo
  2012-01-21  6:19               ` Huang Ying
  0 siblings, 1 reply; 13+ messages in thread
From: Tejun Heo @ 2012-01-21  5:56 UTC (permalink / raw)
  To: Huang Ying
  Cc: Tony Luck, linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong,
	Hidetoshi Seto

Hello,

On Fri, Jan 20, 2012 at 6:44 PM, Huang Ying <ying.huang@intel.com> wrote:
> It appears that WQ_HIGHPRI only provides priority between work queue,
> not between the work queue backing kthread and other tasks.  Is there
> any mechanism for that?

No, it doesn't.

> If my understanding was correct, WQ_MEM_RECLAIM has some side-effect for
> that.  Because hardware errors occurs seldom, the reserved kthread for
> WQ_MEME_RECLAIM just sleeps most of the time.  When first hardware error
> occurs and the work item is queued, the reserved kthread is waked up.
> Because the reserved kthread sleeps for long time, it is highly possible
> for it to be scheduled at the next schedule point.

But rescuer is used only under memory pressure. It doesn't help latency at all.

> Because hardware error usually has no locality, WQ_UNBOUND can be used
> for it so that the work item can be put on relative low-load CPU.  From
> the document, it is said WQ_UNBOUND work items will be executed ASAP
> too.  Compared with WQ_HIGHPRI, how about the priority of WQ_UNBOUND?

Maybe, maybe not. I suggest just using WQ_HIGHPRI for now and worrying
about it later if the scheduling latency actually turns out to matter.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-21  5:56             ` Tejun Heo
@ 2012-01-21  6:19               ` Huang Ying
  2012-01-23 23:00                 ` Luck, Tony
  0 siblings, 1 reply; 13+ messages in thread
From: Huang Ying @ 2012-01-21  6:19 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Tony Luck, linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong,
	Hidetoshi Seto

On Fri, 2012-01-20 at 21:56 -0800, Tejun Heo wrote:
> Hello,
> 
> On Fri, Jan 20, 2012 at 6:44 PM, Huang Ying <ying.huang@intel.com> wrote:
> > It appears that WQ_HIGHPRI only provides priority between work queue,
> > not between the work queue backing kthread and other tasks.  Is there
> > any mechanism for that?
> 
> No, it doesn't.
> 
> > If my understanding was correct, WQ_MEM_RECLAIM has some side-effect for
> > that.  Because hardware errors occurs seldom, the reserved kthread for
> > WQ_MEME_RECLAIM just sleeps most of the time.  When first hardware error
> > occurs and the work item is queued, the reserved kthread is waked up.
> > Because the reserved kthread sleeps for long time, it is highly possible
> > for it to be scheduled at the next schedule point.
> 
> But rescuer is used only under memory pressure. It doesn't help latency at all.
> 
> > Because hardware error usually has no locality, WQ_UNBOUND can be used
> > for it so that the work item can be put on relative low-load CPU.  From
> > the document, it is said WQ_UNBOUND work items will be executed ASAP
> > too.  Compared with WQ_HIGHPRI, how about the priority of WQ_UNBOUND?
> 
> Maybe, maybe not. I suggest just using WQ_HIGHPRI for now and worrying
> about it later if the scheduling latency actually turns out to matter.

This is a performance issue.  So maybe we need to measure the actual
latency firstly.  The first step can be using WQ_HIGHPRI as you
suggested.

Thanks for your information!

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: x86, mce, Use user return notifier in mce
  2012-01-21  6:19               ` Huang Ying
@ 2012-01-23 23:00                 ` Luck, Tony
  0 siblings, 0 replies; 13+ messages in thread
From: Luck, Tony @ 2012-01-23 23:00 UTC (permalink / raw)
  To: Huang, Ying, Tejun Heo
  Cc: linux-kernel, Ingo Molnar, Borislav Petkov, Chen Gong, Hidetoshi Seto

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 930 bytes --]

> This is a performance issue.  So maybe we need to measure the actual
> latency firstly.  The first step can be using WQ_HIGHPRI as you
> suggested.

To clarify "performance issue" a bit.  The situation is that h/w has
found some error in a piece of memory that is not currently being
accessed by any processor - we don't have to take any action right
away - but we'd like to run the work queue that will hunt down which
processes are using this page sooner, rather than later. If some
process does access the memory, it will be a fatal error on some
systems - but if we get there first, we can mark the page, kill the
process(es) and keep the system running.

So we are looking for a solution where there isn't an unbounded time
before the work queue gets run.

-Tony
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: x86, mce, Use user return notifier in mce
  2012-01-13  7:41     ` Tony Luck
  2012-01-16  1:21       ` Huang Ying
@ 2012-01-24 14:39       ` Avi Kivity
  1 sibling, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2012-01-24 14:39 UTC (permalink / raw)
  To: Tony Luck
  Cc: Huang Ying, linux-kernel, Ingo Molnar, Borislav Petkov,
	Chen Gong, Hidetoshi Seto

On 01/13/2012 09:41 AM, Tony Luck wrote:
> On Thu, Jan 12, 2012 at 10:32 PM, Huang Ying <ying.huang@intel.com> wrote:
> > You are right.  User return notifier can not be used for SRAR.  I think
> > that may be useful for SRAO.  Where we need a way to do notify earlier
> > in case of the corresponding work_queue item is not executed in time.
>
> OK - I've been so focused on SRAR that I didn't think of the SRAO case.
> But even there it seems odd to use user return notifier. We'd like the
> SRAO work item to be executed promptly - but we don't care where it
> is executed. So the "execute on this cpu" part of user return notifiers
> doesn't quite fit.
>

Also, nothing bounds the time until a urn executes.  If the cpu stays in
the kernel forever, it will never fire.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-01-24 14:40 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-13  0:36 x86, mce, Use user return notifier in mce Huang Ying
2012-01-13  0:36 ` [RFC 1/2] urn, make user return notifier lockless Huang Ying
2012-01-13  0:36 ` [RFC 2/2] x86, mce, Use user return notifier in mce Huang Ying
2012-01-13  1:46 ` Tony Luck
2012-01-13  6:32   ` Huang Ying
2012-01-13  7:41     ` Tony Luck
2012-01-16  1:21       ` Huang Ying
2012-01-20 17:34         ` Tejun Heo
2012-01-21  2:44           ` Huang Ying
2012-01-21  5:56             ` Tejun Heo
2012-01-21  6:19               ` Huang Ying
2012-01-23 23:00                 ` Luck, Tony
2012-01-24 14:39       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).