All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] initial NOHZ_FULL support for tile
@ 2015-03-24 19:21 cmetcalf
  2015-03-24 19:21 ` [PATCH 1/4] tile: support arch_irq_work_raise cmetcalf
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: cmetcalf @ 2015-03-24 19:21 UTC (permalink / raw)
  To: linux-kernel, David S. Miller, Andrew Morton, Borislav Petkov,
	Frederic Weisbecker, Gilad Ben-Yossef, H. Peter Anvin,
	Ingo Molnar, Kevin Hilman, Li Zhong, Mike Galbraith,
	Paul E. McKenney, Peter Zijlstra, Steven Rostedt,
	Thomas Gleixner, netdev
  Cc: Chris Metcalf

From: Chris Metcalf <cmetcalf@ezchip.com>

These changes are the necessary minimum to make the tile architecture
work relatively well with NOHZ_FULL.  The tile architecture has had
out-of-tree support since 2008 for a DATAPLANE mode that provided
similar functionality (called Zero-Overhead Linux in our marketing
materials); we are now in the process of layering that on top of the
community NOHZ_FULL to upstream it.

Chris Metcalf (4):
  tile: support arch_irq_work_raise
  tile: support CONTEXT_TRACKING and thus NOHZ_FULL
  nohz: add tick_nohz_full_clear_cpus() API
  net: tile: don't send interrupts to nohz cores by default

 arch/tile/Kconfig                   |  1 +
 arch/tile/include/asm/Kbuild        |  1 -
 arch/tile/include/asm/irq_work.h    | 14 ++++++++++++++
 arch/tile/include/asm/smp.h         |  1 +
 arch/tile/include/asm/thread_info.h |  9 ++++++---
 arch/tile/kernel/process.c          | 12 ++++++++----
 arch/tile/kernel/ptrace.c           | 22 ++++++++++++++++++++--
 arch/tile/kernel/single_step.c      |  3 +++
 arch/tile/kernel/smp.c              | 32 +++++++++++++++++++++++++++++++-
 arch/tile/kernel/traps.c            | 16 +++++++++-------
 arch/tile/kernel/unaligned.c        | 22 +++++++++++++---------
 arch/tile/mm/fault.c                | 10 +++++++---
 drivers/net/ethernet/tile/tilegx.c  |  5 ++++-
 include/linux/tick.h                |  7 +++++++
 14 files changed, 124 insertions(+), 31 deletions(-)
 create mode 100644 arch/tile/include/asm/irq_work.h

-- 
2.1.2


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/4] tile: support arch_irq_work_raise
  2015-03-24 19:21 [PATCH 0/4] initial NOHZ_FULL support for tile cmetcalf
@ 2015-03-24 19:21 ` cmetcalf
  2015-03-24 21:00   ` Frederic Weisbecker
  2015-03-24 21:14   ` Peter Zijlstra
  2015-03-24 19:21 ` [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL cmetcalf
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 14+ messages in thread
From: cmetcalf @ 2015-03-24 19:21 UTC (permalink / raw)
  To: linux-kernel, Ingo Molnar, Paul E. McKenney, Peter Zijlstra,
	Thomas Gleixner, Frederic Weisbecker
  Cc: Chris Metcalf

From: Chris Metcalf <cmetcalf@ezchip.com>

Tile includes a hypervisor hook to deliver messages to arbitrary
tiles, so we can use that to raise an interrupt as soon as
possible on our own core.  Unfortunately the Tilera hypervisor
disabled that support on principle in previous releases, but
it will be available in MDE 4.3.4 and later.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
---
 arch/tile/include/asm/Kbuild     |  1 -
 arch/tile/include/asm/irq_work.h | 14 ++++++++++++++
 arch/tile/include/asm/smp.h      |  1 +
 arch/tile/kernel/smp.c           | 32 +++++++++++++++++++++++++++++++-
 4 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 arch/tile/include/asm/irq_work.h

diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b4c488b65745..f5433e0e34e0 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
-generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/tile/include/asm/irq_work.h b/arch/tile/include/asm/irq_work.h
new file mode 100644
index 000000000000..48af33a61a2c
--- /dev/null
+++ b/arch/tile/include/asm/irq_work.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+#ifdef CONFIG_SMP
+	extern bool self_interrupt_ok;
+	return self_interrupt_ok;
+#else
+	return false;
+#endif
+}
+
+#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 9a326b64f7ae..735e7f144733 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -69,6 +69,7 @@ static inline int xy_to_cpu(int x, int y)
 #define MSG_TAG_STOP_CPU		2
 #define MSG_TAG_CALL_FUNCTION_MANY	3
 #define MSG_TAG_CALL_FUNCTION_SINGLE	4
+#define MSG_TAG_IRQ_WORK		5
 
 /* Hook for the generic smp_call_function_many() routine. */
 static inline void arch_send_call_function_ipi_mask(struct cpumask *mask)
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index d3c4ed780ce2..07e3ff5cc740 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irq_work.h>
 #include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/homecache.h>
@@ -33,6 +34,8 @@ EXPORT_SYMBOL(smp_topology);
 static unsigned long __iomem *ipi_mappings[NR_CPUS];
 #endif
 
+/* Does messaging work correctly to the local cpu? */
+bool self_interrupt_ok;
 
 /*
  * Top-level send_IPI*() functions to send messages to other cpus.
@@ -147,6 +150,10 @@ void evaluate_message(int tag)
 		generic_smp_call_function_single_interrupt();
 		break;
 
+	case MSG_TAG_IRQ_WORK: /* Invoke IRQ work */
+		irq_work_run();
+		break;
+
 	default:
 		panic("Unknown IPI message tag %d", tag);
 		break;
@@ -186,6 +193,15 @@ void flush_icache_range(unsigned long start, unsigned long end)
 EXPORT_SYMBOL(flush_icache_range);
 
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	if (arch_irq_work_has_interrupt())
+		send_IPI_single(smp_processor_id(), MSG_TAG_IRQ_WORK);
+}
+#endif
+
+
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
@@ -203,8 +219,22 @@ static struct irqaction resched_action = {
 
 void __init ipi_init(void)
 {
+	int cpu = smp_processor_id();
+	HV_Recipient recip = { .y = cpu_y(cpu), .x = cpu_x(cpu),
+			       .state = HV_TO_BE_SENT };
+	int tag = MSG_TAG_CALL_FUNCTION_SINGLE;
+
+	/*
+	 * Test if we can message ourselves for arch_irq_work_raise.
+	 * This functionality is only available in the Tilera hypervisor
+	 * in versions 4.3.4 and following.
+	 */
+	if (hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag)) == 1)
+		self_interrupt_ok = true;
+	else
+		pr_warn("Older hypervisor: disabling fast irq_work_raise\n");
+
 #if CHIP_HAS_IPI()
-	int cpu;
 	/* Map IPI trigger MMIO addresses. */
 	for_each_possible_cpu(cpu) {
 		HV_Coord tile;
-- 
2.1.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL
  2015-03-24 19:21 [PATCH 0/4] initial NOHZ_FULL support for tile cmetcalf
  2015-03-24 19:21 ` [PATCH 1/4] tile: support arch_irq_work_raise cmetcalf
@ 2015-03-24 19:21 ` cmetcalf
  2015-03-24 21:15   ` Frederic Weisbecker
  2015-03-24 19:21 ` [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API cmetcalf
  2015-03-24 19:21 ` [PATCH 4/4] net: tile: don't send interrupts to nohz cores by default cmetcalf
  3 siblings, 1 reply; 14+ messages in thread
From: cmetcalf @ 2015-03-24 19:21 UTC (permalink / raw)
  To: linux-kernel, Andrew Morton, Frederic Weisbecker,
	Gilad Ben-Yossef, H. Peter Anvin, Ingo Molnar, Li Zhong,
	Paul E. McKenney, Peter Zijlstra, Steven Rostedt,
	Thomas Gleixner
  Cc: Chris Metcalf

From: Chris Metcalf <cmetcalf@ezchip.com>

Add the TIF_NOHZ flag appropriately.

Add call to user_exit() on entry to do_work_pending() and on entry
to syscalls via do_syscall_trace_enter(), and also the top of
do_syscall_trace_exit() just because it's done in x86.

Add call to user_enter() at the bottom of do_work_pending() once we
have no more work to do before returning to userspace.

Wrap all the trap code in exception_enter() / exception_exit().

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
---
 arch/tile/Kconfig                   |  1 +
 arch/tile/include/asm/thread_info.h |  9 ++++++---
 arch/tile/kernel/process.c          | 12 ++++++++----
 arch/tile/kernel/ptrace.c           | 22 ++++++++++++++++++++--
 arch/tile/kernel/single_step.c      |  3 +++
 arch/tile/kernel/traps.c            | 16 +++++++++-------
 arch/tile/kernel/unaligned.c        | 22 +++++++++++++---------
 arch/tile/mm/fault.c                | 10 +++++++---
 8 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7cca41842a9e..c3a31f8bb09c 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -27,6 +27,7 @@ config TILE
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select HAVE_DEBUG_STACKOVERFLOW
 	select ARCH_WANT_FRAME_POINTERS
+	select HAVE_CONTEXT_TRACKING
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 96c14c1430d8..6130a3db505b 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -126,6 +126,7 @@ extern void _cpu_idle(void);
 #define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 #define TIF_SYSCALL_TRACEPOINT	9	/* syscall tracepoint instrumentation */
 #define TIF_POLLING_NRFLAG	10	/* idle is polling for TIF_NEED_RESCHED */
+#define TIF_NOHZ		11	/* in adaptive nohz mode */
 
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
@@ -138,14 +139,16 @@ extern void _cpu_idle(void);
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_NOHZ		(1<<TIF_NOHZ)
 
 /* Work to do on any return to user space. */
 #define _TIF_ALLWORK_MASK \
-  (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\
-   _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_SINGLESTEP | \
+	 _TIF_ASYNC_TLB | _TIF_NOTIFY_RESUME | _TIF_NOHZ)
 
 /* Work to do at syscall entry. */
-#define _TIF_SYSCALL_ENTRY_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SYSCALL_ENTRY_WORK \
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
 
 /* Work to do at syscall exit. */
 #define _TIF_SYSCALL_EXIT_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 48e5773dd0b7..b403c2e3e263 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
 #include <asm/homecache.h>
@@ -474,6 +475,8 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
 	if (!user_mode(regs))
 		return 0;
 
+	user_exit();
+
 	/* Enable interrupts; they are disabled again on return to caller. */
 	local_irq_enable();
 
@@ -496,11 +499,12 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
 		tracehook_notify_resume(regs);
 		return 1;
 	}
-	if (thread_info_flags & _TIF_SINGLESTEP) {
+	if (thread_info_flags & _TIF_SINGLESTEP)
 		single_step_once(regs);
-		return 0;
-	}
-	panic("work_pending: bad flags %#x\n", thread_info_flags);
+
+	user_enter();
+
+	return 0;
 }
 
 unsigned long get_wchan(struct task_struct *p)
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index de98c6ddf136..f84eed8243da 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/regset.h>
 #include <linux/elf.h>
 #include <linux/tracehook.h>
+#include <linux/context_tracking.h>
 #include <asm/traps.h>
 #include <arch/chip.h>
 
@@ -252,12 +253,21 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 int do_syscall_trace_enter(struct pt_regs *regs)
 {
-	if (test_thread_flag(TIF_SYSCALL_TRACE)) {
+	u32 work = ACCESS_ONCE(current_thread_info()->flags);
+
+	/*
+	 * If TIF_NOHZ is set, we are required to call user_exit() before
+	 * doing anything that could touch RCU.
+	 */
+	if (work & _TIF_NOHZ)
+		user_exit();
+
+	if (work & _TIF_SYSCALL_TRACE) {
 		if (tracehook_report_syscall_entry(regs))
 			regs->regs[TREG_SYSCALL_NR] = -1;
 	}
 
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+	if (work & _TIF_SYSCALL_TRACEPOINT)
 		trace_sys_enter(regs, regs->regs[TREG_SYSCALL_NR]);
 
 	return regs->regs[TREG_SYSCALL_NR];
@@ -268,6 +278,12 @@ void do_syscall_trace_exit(struct pt_regs *regs)
 	long errno;
 
 	/*
+	 * We may come here right after calling schedule_user()
+	 * in which case we can be in RCU user mode.
+	 */
+	user_exit();
+
+	/*
 	 * The standard tile calling convention returns the value (or negative
 	 * errno) in r0, and zero (or positive errno) in r1.
 	 * It saves a couple of cycles on the hot path to do this work in
@@ -303,5 +319,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
 /* Handle synthetic interrupt delivered only by the simulator. */
 void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
 {
+	enum ctx_state prev_state = exception_enter();
 	send_sigtrap(current, regs);
+	exception_exit(prev_state);
 }
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 862973074bf9..53f7b9def07b 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/err.h>
 #include <linux/prctl.h>
+#include <linux/context_tracking.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
 #include <asm/uaccess.h>
@@ -738,6 +739,7 @@ static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
 
 void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
 {
+	enum ctx_state prev_state = exception_enter();
 	unsigned long *ss_pc = this_cpu_ptr(&ss_saved_pc);
 	struct thread_info *info = (void *)current_thread_info();
 	int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
@@ -754,6 +756,7 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
 		__insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
 		send_sigtrap(current, regs);
 	}
+	exception_exit(prev_state);
 }
 
 
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index bf841ca517bb..312fc134c1cb 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -20,6 +20,7 @@
 #include <linux/reboot.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
+#include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/traps.h>
 #include <asm/setup.h>
@@ -253,6 +254,7 @@ static int do_bpt(struct pt_regs *regs)
 void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 		       unsigned long reason)
 {
+	enum ctx_state prev_state = exception_enter();
 	siginfo_t info = { 0 };
 	int signo, code;
 	unsigned long address = 0;
@@ -261,7 +263,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 
 	/* Handle breakpoints, etc. */
 	if (is_kernel && fault_num == INT_ILL && do_bpt(regs))
-		return;
+		goto done;
 
 	/* Re-enable interrupts, if they were previously enabled. */
 	if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
@@ -275,7 +277,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 		const char *name;
 		char buf[100];
 		if (fixup_exception(regs))  /* ILL_TRANS or UNALIGN_DATA */
-			return;
+			goto done;
 		if (fault_num >= 0 &&
 		    fault_num < ARRAY_SIZE(int_name) &&
 		    int_name[fault_num] != NULL)
@@ -294,7 +296,6 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 			 fault_num, name, regs->pc, buf);
 		show_regs(regs);
 		do_exit(SIGKILL);  /* FIXME: implement i386 die() */
-		return;
 	}
 
 	switch (fault_num) {
@@ -308,7 +309,6 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 			pr_err("Unreadable instruction for INT_ILL: %#lx\n",
 			       regs->pc);
 			do_exit(SIGKILL);
-			return;
 		}
 		if (!special_ill(instr, &signo, &code)) {
 			signo = SIGILL;
@@ -319,7 +319,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	case INT_GPV:
 #if CHIP_HAS_TILE_DMA()
 		if (retry_gpv(reason))
-			return;
+			goto done;
 #endif
 		/*FALLTHROUGH*/
 	case INT_UDN_ACCESS:
@@ -346,7 +346,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 			if (!state ||
 			    (void __user *)(regs->pc) != state->buffer) {
 				single_step_once(regs);
-				return;
+				goto done;
 			}
 		}
 #endif
@@ -380,7 +380,6 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 #endif
 	default:
 		panic("Unexpected do_trap interrupt number %d", fault_num);
-		return;
 	}
 
 	info.si_signo = signo;
@@ -391,6 +390,9 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	if (signo != SIGTRAP)
 		trace_unhandled_signal("trap", regs, address, signo);
 	force_sig_info(signo, &info, current);
+
+done:
+	exception_exit(prev_state);
 }
 
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index 7d9a83be0aca..d075f92ccee0 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/prctl.h>
+#include <linux/context_tracking.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
 #include <asm/uaccess.h>
@@ -1448,6 +1449,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 
 void do_unaligned(struct pt_regs *regs, int vecnum)
 {
+	enum ctx_state prev_state = exception_enter();
 	tilegx_bundle_bits __user  *pc;
 	tilegx_bundle_bits bundle;
 	struct thread_info *info = current_thread_info();
@@ -1487,12 +1489,11 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 						(int)unaligned_fixup,
 						(unsigned long long)regs->ex1,
 						(unsigned long long)regs->pc);
-				return;
+			} else {
+				/* Not fixable. Go panic. */
+				panic("Unalign exception in Kernel. pc=%lx",
+				      regs->pc);
 			}
-			/* Not fixable. Go panic. */
-			panic("Unalign exception in Kernel. pc=%lx",
-			      regs->pc);
-			return;
 		} else {
 			/*
 			 * Try to fix the exception. If we can't, panic the
@@ -1501,8 +1502,8 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 			bundle = GX_INSN_BSWAP(
 				*((tilegx_bundle_bits *)(regs->pc)));
 			jit_bundle_gen(regs, bundle, align_ctl);
-			return;
 		}
+		goto done;
 	}
 
 	/*
@@ -1526,7 +1527,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 
 		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
 		force_sig_info(info.si_signo, &info, current);
-		return;
+		goto done;
 	}
 
 
@@ -1543,7 +1544,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 		trace_unhandled_signal("segfault in unalign fixup", regs,
 				       (unsigned long)info.si_addr, SIGSEGV);
 		force_sig_info(info.si_signo, &info, current);
-		return;
+		goto done;
 	}
 
 	if (!info->unalign_jit_base) {
@@ -1578,7 +1579,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 
 		if (IS_ERR((void __force *)user_page)) {
 			pr_err("Out of kernel pages trying do_mmap\n");
-			return;
+			goto done;
 		}
 
 		/* Save the address in the thread_info struct */
@@ -1591,6 +1592,9 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 
 	/* Generate unalign JIT */
 	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
+
+done:
+	exception_exit(prev_state);
 }
 
 #endif /* __tilegx__ */
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 0f61a73534e6..e83cc999da02 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -35,6 +35,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/context_tracking.h>
 
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
@@ -702,6 +703,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 		   unsigned long address, unsigned long write)
 {
 	int is_page_fault;
+	enum ctx_state prev_state = exception_enter();
 
 #ifdef CONFIG_KPROBES
 	/*
@@ -711,7 +713,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 	 */
 	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
 		       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
-		return;
+		goto done;
 #endif
 
 #ifdef __tilegx__
@@ -750,7 +752,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 				 current->comm, current->pid, pc, address);
 			show_regs(regs);
 			do_group_exit(SIGKILL);
-			return;
 		}
 	}
 #else
@@ -834,12 +835,15 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 			async->is_fault = is_page_fault;
 			async->is_write = write;
 			async->address = address;
-			return;
+			goto done;
 		}
 	}
 #endif
 
 	handle_page_fault(regs, fault_num, is_page_fault, address, write);
+
+done:
+	exception_exit(prev_state);
 }
 
 
-- 
2.1.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API
  2015-03-24 19:21 [PATCH 0/4] initial NOHZ_FULL support for tile cmetcalf
  2015-03-24 19:21 ` [PATCH 1/4] tile: support arch_irq_work_raise cmetcalf
  2015-03-24 19:21 ` [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL cmetcalf
@ 2015-03-24 19:21 ` cmetcalf
  2015-03-30 16:20   ` Chris Metcalf
  2015-03-24 19:21 ` [PATCH 4/4] net: tile: don't send interrupts to nohz cores by default cmetcalf
  3 siblings, 1 reply; 14+ messages in thread
From: cmetcalf @ 2015-03-24 19:21 UTC (permalink / raw)
  To: linux-kernel, Frederic Weisbecker, Steven Rostedt,
	Paul E. McKenney, Ingo Molnar, Thomas Gleixner, Peter Zijlstra,
	Borislav Petkov, Li Zhong, Mike Galbraith, Kevin Hilman
  Cc: Chris Metcalf

From: Chris Metcalf <cmetcalf@ezchip.com>

This is useful, for example, to modify a cpumask to avoid the
nohz cores so that interrupts aren't sent to them.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
---
Motivated by patch 4/4 in this series.

 include/linux/tick.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9c085dc12ae9..d53ad4892a39 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -186,6 +186,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
 	return cpumask_test_cpu(cpu, tick_nohz_full_mask);
 }
 
+static inline void tick_nohz_full_clear_cpus(struct cpumask *mask)
+{
+	if (tick_nohz_full_enabled())
+		cpumask_andnot(mask, mask, tick_nohz_full_mask);
+}
+
 extern void __tick_nohz_full_check(void);
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
@@ -194,6 +200,7 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
+static inline void tick_nohz_full_clear_cpus(struct cpumask *mask) { }
 static inline void __tick_nohz_full_check(void) { }
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
 static inline void tick_nohz_full_kick(void) { }
-- 
2.1.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] net: tile: don't send interrupts to nohz cores by default
  2015-03-24 19:21 [PATCH 0/4] initial NOHZ_FULL support for tile cmetcalf
                   ` (2 preceding siblings ...)
  2015-03-24 19:21 ` [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API cmetcalf
@ 2015-03-24 19:21 ` cmetcalf
  3 siblings, 0 replies; 14+ messages in thread
From: cmetcalf @ 2015-03-24 19:21 UTC (permalink / raw)
  To: linux-kernel, netdev, David S. Miller, Frederic Weisbecker,
	Steven Rostedt, Paul E. McKenney, Ingo Molnar, Thomas Gleixner,
	Peter Zijlstra, Borislav Petkov, Li Zhong, Mike Galbraith,
	Kevin Hilman
  Cc: Chris Metcalf

From: Chris Metcalf <cmetcalf@ezchip.com>

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
---
 drivers/net/ethernet/tile/tilegx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index bea8cd2bb56c..cf9850c6a809 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -40,6 +40,7 @@
 #include <linux/tcp.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/tick.h>
 
 #include <asm/checksum.h>
 #include <asm/homecache.h>
@@ -2271,8 +2272,10 @@ static int __init tile_net_init_module(void)
 	for (i = 0; gxio_mpipe_link_enumerate_mac(i, name, mac) >= 0; i++)
 		tile_net_dev_init(name, mac);
 
-	if (!network_cpus_init())
+	if (!network_cpus_init()) {
 		network_cpus_map = *cpu_online_mask;
+		tick_nohz_full_clear_cpus(&network_cpus_map);
+	}
 
 	return 0;
 }
-- 
2.1.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] tile: support arch_irq_work_raise
  2015-03-24 19:21 ` [PATCH 1/4] tile: support arch_irq_work_raise cmetcalf
@ 2015-03-24 21:00   ` Frederic Weisbecker
  2015-03-24 21:14   ` Peter Zijlstra
  1 sibling, 0 replies; 14+ messages in thread
From: Frederic Weisbecker @ 2015-03-24 21:00 UTC (permalink / raw)
  To: cmetcalf
  Cc: linux-kernel, Ingo Molnar, Paul E. McKenney, Peter Zijlstra,
	Thomas Gleixner

On Tue, Mar 24, 2015 at 03:21:32PM -0400, cmetcalf@ezchip.com wrote:
> From: Chris Metcalf <cmetcalf@ezchip.com>
> 
> Tile includes a hypervisor hook to deliver messages to arbitrary
> tiles, so we can use that to raise an interrupt as soon as
> possible on our own core.  Unfortunately the Tilera hypervisor
> disabled that support on principle in previous releases, but
> it will be available in MDE 4.3.4 and later.
> 
> Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>

I'm not much knowledgeable with tile but the self-ipi test is enough for
me to ack the change.

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] tile: support arch_irq_work_raise
  2015-03-24 19:21 ` [PATCH 1/4] tile: support arch_irq_work_raise cmetcalf
  2015-03-24 21:00   ` Frederic Weisbecker
@ 2015-03-24 21:14   ` Peter Zijlstra
  2015-03-24 22:06     ` Chris Metcalf
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2015-03-24 21:14 UTC (permalink / raw)
  To: cmetcalf
  Cc: linux-kernel, Ingo Molnar, Paul E. McKenney, Thomas Gleixner,
	Frederic Weisbecker

On Tue, Mar 24, 2015 at 03:21:32PM -0400, cmetcalf@ezchip.com wrote:
> From: Chris Metcalf <cmetcalf@ezchip.com>
> 
> Tile includes a hypervisor hook to deliver messages to arbitrary
> tiles, so we can use that to raise an interrupt as soon as
> possible on our own core.  Unfortunately the Tilera hypervisor
> disabled that support on principle in previous releases, but
> it will be available in MDE 4.3.4 and later.

Can you program a timer in the (recent) past which will instantly
trigger an interrupt? This is what PPC64 does to implement the self-ipi.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL
  2015-03-24 19:21 ` [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL cmetcalf
@ 2015-03-24 21:15   ` Frederic Weisbecker
  2015-03-24 21:49     ` Chris Metcalf
  0 siblings, 1 reply; 14+ messages in thread
From: Frederic Weisbecker @ 2015-03-24 21:15 UTC (permalink / raw)
  To: cmetcalf
  Cc: linux-kernel, Andrew Morton, Gilad Ben-Yossef, H. Peter Anvin,
	Ingo Molnar, Li Zhong, Paul E. McKenney, Peter Zijlstra,
	Steven Rostedt, Thomas Gleixner

On Tue, Mar 24, 2015 at 03:21:33PM -0400, cmetcalf@ezchip.com wrote:
> From: Chris Metcalf <cmetcalf@ezchip.com>
> 
> Add the TIF_NOHZ flag appropriately.
> 
> Add call to user_exit() on entry to do_work_pending() and on entry
> to syscalls via do_syscall_trace_enter(), and also the top of
> do_syscall_trace_exit() just because it's done in x86.

You only need to protect do_syscall_trace_exit() if there is a risk
that something calls user_enter() before. x86 does it so because
schedule_user() can be called before although I think we've changed
schedule_user() to use exception_enter/exit. I should check if that
user_exit() in do_syscall_trace_exit() is still necessary in x86.

Anyway, calling user_exit() on context tracking kerne mode doesn't do
any harm.

> diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
> index 48e5773dd0b7..b403c2e3e263 100644
> --- a/arch/tile/kernel/process.c
> +++ b/arch/tile/kernel/process.c
> @@ -27,6 +27,7 @@
>  #include <linux/kernel.h>
>  #include <linux/tracehook.h>
>  #include <linux/signal.h>
> +#include <linux/context_tracking.h>
>  #include <asm/stack.h>
>  #include <asm/switch_to.h>
>  #include <asm/homecache.h>
> @@ -474,6 +475,8 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
>  	if (!user_mode(regs))
>  		return 0;
>  
> +	user_exit();
> +
>  	/* Enable interrupts; they are disabled again on return to caller. */
>  	local_irq_enable();
>  
> @@ -496,11 +499,12 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
>  		tracehook_notify_resume(regs);
>  		return 1;
>  	}
> -	if (thread_info_flags & _TIF_SINGLESTEP) {
> +	if (thread_info_flags & _TIF_SINGLESTEP)
>  		single_step_once(regs);
> -		return 0;
> -	}
> -	panic("work_pending: bad flags %#x\n", thread_info_flags);
> +
> +	user_enter();

So, do work pending is called from syscall exit only? Or does it concern
interrupts, exceptions as well?

Well if it's always followed by a return to userspace, it should be fine.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL
  2015-03-24 21:15   ` Frederic Weisbecker
@ 2015-03-24 21:49     ` Chris Metcalf
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Metcalf @ 2015-03-24 21:49 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: linux-kernel, Andrew Morton, Gilad Ben-Yossef, H. Peter Anvin,
	Ingo Molnar, Li Zhong, Paul E. McKenney, Peter Zijlstra,
	Steven Rostedt, Thomas Gleixner

On 3/24/2015 5:15 PM, Frederic Weisbecker wrote:
> On Tue, Mar 24, 2015 at 03:21:33PM -0400, cmetcalf@ezchip.com wrote:
>> From: Chris Metcalf <cmetcalf@ezchip.com>
>>
>> Add the TIF_NOHZ flag appropriately.
>>
>> Add call to user_exit() on entry to do_work_pending() and on entry
>> to syscalls via do_syscall_trace_enter(), and also the top of
>> do_syscall_trace_exit() just because it's done in x86.
> You only need to protect do_syscall_trace_exit() if there is a risk
> that something calls user_enter() before. x86 does it so because
> schedule_user() can be called before although I think we've changed
> schedule_user() to use exception_enter/exit. I should check if that
> user_exit() in do_syscall_trace_exit() is still necessary in x86.
>
> Anyway, calling user_exit() on context tracking kerne mode doesn't do
> any harm.

Yes, I see the exception_enter/exit in schedule_user().  I guess I will plan to
leave the user_exit() in for now, but if you conclude it's not necessary for x86,
I can remove it for tile as well at that point.

>> diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
>> index 48e5773dd0b7..b403c2e3e263 100644
>> --- a/arch/tile/kernel/process.c
>> +++ b/arch/tile/kernel/process.c
>> @@ -27,6 +27,7 @@
>>   #include <linux/kernel.h>
>>   #include <linux/tracehook.h>
>>   #include <linux/signal.h>
>> +#include <linux/context_tracking.h>
>>   #include <asm/stack.h>
>>   #include <asm/switch_to.h>
>>   #include <asm/homecache.h>
>> @@ -474,6 +475,8 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
>>   	if (!user_mode(regs))
>>   		return 0;
>>   
>> +	user_exit();
>> +
>>   	/* Enable interrupts; they are disabled again on return to caller. */
>>   	local_irq_enable();
>>   
>> @@ -496,11 +499,12 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
>>   		tracehook_notify_resume(regs);
>>   		return 1;
>>   	}
>> -	if (thread_info_flags & _TIF_SINGLESTEP) {
>> +	if (thread_info_flags & _TIF_SINGLESTEP)
>>   		single_step_once(regs);
>> -		return 0;
>> -	}
>> -	panic("work_pending: bad flags %#x\n", thread_info_flags);
>> +
>> +	user_enter();
> So, do work pending is called from syscall exit only? Or does it concern
> interrupts, exceptions as well?

It's called on every return to userspace if the TIF flags require it.

> Well if it's always followed by a return to userspace, it should be fine.

Let me know if you'd like me to put your Acked-by on the commit. Thanks!

-- 
Chris Metcalf, EZChip Semiconductor
http://www.ezchip.com


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] tile: support arch_irq_work_raise
  2015-03-24 21:14   ` Peter Zijlstra
@ 2015-03-24 22:06     ` Chris Metcalf
  2015-03-25  8:03       ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Metcalf @ 2015-03-24 22:06 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ingo Molnar, Paul E. McKenney, Thomas Gleixner,
	Frederic Weisbecker

On 3/24/2015 5:14 PM, Peter Zijlstra wrote:
> On Tue, Mar 24, 2015 at 03:21:32PM -0400, cmetcalf@ezchip.com wrote:
>> From: Chris Metcalf <cmetcalf@ezchip.com>
>>
>> Tile includes a hypervisor hook to deliver messages to arbitrary
>> tiles, so we can use that to raise an interrupt as soon as
>> possible on our own core.  Unfortunately the Tilera hypervisor
>> disabled that support on principle in previous releases, but
>> it will be available in MDE 4.3.4 and later.
> Can you program a timer in the (recent) past which will instantly
> trigger an interrupt? This is what PPC64 does to implement the self-ipi.

I looked in git history a bit and see commit 105988c015943 from 2009,
which looks like it was the basis for the powerpc support.  I'm a little leery
of just randomly changing the in-flight timer decrementer value, though;
does the timer event_handler properly handle being called early, and
then properly handle resetting the decrementer to the right value?
I guess if both of those things are true, it seems plausible to adopt the
approach you suggested.

On the other hand, the approach I coded up avoids making any of those
slightly scary assumptions about the timer subsystem, and I don't really
have a problem with saying you need a recent Tilera hypervisor binary if
you want to use NOHZ_FULL...

-- 
Chris Metcalf, EZChip Semiconductor
http://www.ezchip.com


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] tile: support arch_irq_work_raise
  2015-03-24 22:06     ` Chris Metcalf
@ 2015-03-25  8:03       ` Peter Zijlstra
  0 siblings, 0 replies; 14+ messages in thread
From: Peter Zijlstra @ 2015-03-25  8:03 UTC (permalink / raw)
  To: Chris Metcalf
  Cc: linux-kernel, Ingo Molnar, Paul E. McKenney, Thomas Gleixner,
	Frederic Weisbecker

On Tue, Mar 24, 2015 at 06:06:26PM -0400, Chris Metcalf wrote:
> On 3/24/2015 5:14 PM, Peter Zijlstra wrote:
> >On Tue, Mar 24, 2015 at 03:21:32PM -0400, cmetcalf@ezchip.com wrote:
> >>From: Chris Metcalf <cmetcalf@ezchip.com>
> >>
> >>Tile includes a hypervisor hook to deliver messages to arbitrary
> >>tiles, so we can use that to raise an interrupt as soon as
> >>possible on our own core.  Unfortunately the Tilera hypervisor
> >>disabled that support on principle in previous releases, but
> >>it will be available in MDE 4.3.4 and later.
> >Can you program a timer in the (recent) past which will instantly
> >trigger an interrupt? This is what PPC64 does to implement the self-ipi.
> 
> I looked in git history a bit and see commit 105988c015943 from 2009,
> which looks like it was the basis for the powerpc support.  I'm a little leery
> of just randomly changing the in-flight timer decrementer value, though;
> does the timer event_handler properly handle being called early, and
> then properly handle resetting the decrementer to the right value?
> I guess if both of those things are true, it seems plausible to adopt the
> approach you suggested.
> 
> On the other hand, the approach I coded up avoids making any of those
> slightly scary assumptions about the timer subsystem, and I don't really
> have a problem with saying you need a recent Tilera hypervisor binary if
> you want to use NOHZ_FULL...

Fair enough; and your call obviously. Just wanted to share that there
are alternative ways if you really need.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API
  2015-03-24 19:21 ` [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API cmetcalf
@ 2015-03-30 16:20   ` Chris Metcalf
  2015-03-30 16:41     ` Rik van Riel
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Metcalf @ 2015-03-30 16:20 UTC (permalink / raw)
  To: linux-kernel, Frederic Weisbecker, Steven Rostedt,
	Paul E. McKenney, Ingo Molnar, Thomas Gleixner, Peter Zijlstra,
	Borislav Petkov, Li Zhong, Mike Galbraith, Kevin Hilman,
	Rik van Riel

I wanted to ping the patch below again, since I haven't heard any
feedback.

I note that Rik van Riel's change posted this weekend offers similar
functionality for userspace.  My change offers a convenient API
for, e.g., kernel drivers setting up default irq balancing.

https://lkml.org/lkml/2015/3/28/94

Although it would be possible to do the same thing by iterating over
the existing tick_nohz_full_cpu() API, that seems kind of silly.

An alternate API would be one that just returned the full no_hz
cpumask to kernel callers; I'd be happy with that as well, but my
instinct was to make the API as narrow as possible to start with.

Comments?

On 03/24/2015 03:21 PM, cmetcalf@ezchip.com wrote:
> From: Chris Metcalf <cmetcalf@ezchip.com>
>
> This is useful, for example, to modify a cpumask to avoid the
> nohz cores so that interrupts aren't sent to them.
>
> Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
> ---
> Motivated by patch 4/4 in this series.
>
>   include/linux/tick.h | 7 +++++++
>   1 file changed, 7 insertions(+)
>
> diff --git a/include/linux/tick.h b/include/linux/tick.h
> index 9c085dc12ae9..d53ad4892a39 100644
> --- a/include/linux/tick.h
> +++ b/include/linux/tick.h
> @@ -186,6 +186,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
>   	return cpumask_test_cpu(cpu, tick_nohz_full_mask);
>   }
>   
> +static inline void tick_nohz_full_clear_cpus(struct cpumask *mask)
> +{
> +	if (tick_nohz_full_enabled())
> +		cpumask_andnot(mask, mask, tick_nohz_full_mask);
> +}
> +
>   extern void __tick_nohz_full_check(void);
>   extern void tick_nohz_full_kick(void);
>   extern void tick_nohz_full_kick_cpu(int cpu);
> @@ -194,6 +200,7 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk);
>   #else
>   static inline bool tick_nohz_full_enabled(void) { return false; }
>   static inline bool tick_nohz_full_cpu(int cpu) { return false; }
> +static inline void tick_nohz_full_clear_cpus(struct cpumask *mask) { }
>   static inline void __tick_nohz_full_check(void) { }
>   static inline void tick_nohz_full_kick_cpu(int cpu) { }
>   static inline void tick_nohz_full_kick(void) { }

-- 
Chris Metcalf, EZChip Semiconductor
http://www.ezchip.com


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API
  2015-03-30 16:20   ` Chris Metcalf
@ 2015-03-30 16:41     ` Rik van Riel
  2015-03-30 16:45       ` Chris Metcalf
  0 siblings, 1 reply; 14+ messages in thread
From: Rik van Riel @ 2015-03-30 16:41 UTC (permalink / raw)
  To: Chris Metcalf, linux-kernel, Frederic Weisbecker, Steven Rostedt,
	Paul E. McKenney, Ingo Molnar, Thomas Gleixner, Peter Zijlstra,
	Borislav Petkov, Li Zhong, Mike Galbraith, Kevin Hilman

On 03/30/2015 12:20 PM, Chris Metcalf wrote:
> I wanted to ping the patch below again, since I haven't heard any
> feedback.
> 
> I note that Rik van Riel's change posted this weekend offers similar
> functionality for userspace.  My change offers a convenient API
> for, e.g., kernel drivers setting up default irq balancing.
> 
> https://lkml.org/lkml/2015/3/28/94

I submitted a patch to irqbalance to exclude nohz_full
cpus from having irqs assigned to them.  I could see
the same thing being useful for in-kernel irq assignment,
especially for multi-queue devices that set up irqs on
multiple CPUs.

> An alternate API would be one that just returned the full no_hz
> cpumask to kernel callers; I'd be happy with that as well, but my
> instinct was to make the API as narrow as possible to start with.
> 
> Comments?

What drivers and subsystems are you targeting?

I am just looking at blk-mq now, and it seems like the
API most appropriate for that would be an inline function
that tests whether or not a CPU is nohz_full.

for_each_possible_cpu(i) {

	...

	if (cpu_nohz_full(i))
		continue;

}

A lot of the other code in drivers and subsystems that
set up per-cpu queues and irqs seem to iterate over all
CPUs at init time, and could benefit from a function
allowing them to skip nohz_full CPUs.

Your tick_nohz_full_clear_cpus() function seems reasonable
too, for code that uses a cpumask to set up per cpu stuff.

> On 03/24/2015 03:21 PM, cmetcalf@ezchip.com wrote:
>> From: Chris Metcalf <cmetcalf@ezchip.com>
>>
>> This is useful, for example, to modify a cpumask to avoid the
>> nohz cores so that interrupts aren't sent to them.
>>
>> Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
>> ---
>> Motivated by patch 4/4 in this series.
>>
>>   include/linux/tick.h | 7 +++++++
>>   1 file changed, 7 insertions(+)
>>
>> diff --git a/include/linux/tick.h b/include/linux/tick.h
>> index 9c085dc12ae9..d53ad4892a39 100644
>> --- a/include/linux/tick.h
>> +++ b/include/linux/tick.h
>> @@ -186,6 +186,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
>>       return cpumask_test_cpu(cpu, tick_nohz_full_mask);
>>   }
>>   +static inline void tick_nohz_full_clear_cpus(struct cpumask *mask)
>> +{
>> +    if (tick_nohz_full_enabled())
>> +        cpumask_andnot(mask, mask, tick_nohz_full_mask);
>> +}
>> +
>>   extern void __tick_nohz_full_check(void);
>>   extern void tick_nohz_full_kick(void);
>>   extern void tick_nohz_full_kick_cpu(int cpu);
>> @@ -194,6 +200,7 @@ extern void __tick_nohz_task_switch(struct
>> task_struct *tsk);
>>   #else
>>   static inline bool tick_nohz_full_enabled(void) { return false; }
>>   static inline bool tick_nohz_full_cpu(int cpu) { return false; }
>> +static inline void tick_nohz_full_clear_cpus(struct cpumask *mask) { }
>>   static inline void __tick_nohz_full_check(void) { }
>>   static inline void tick_nohz_full_kick_cpu(int cpu) { }
>>   static inline void tick_nohz_full_kick(void) { }
> 


-- 
All rights reversed

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API
  2015-03-30 16:41     ` Rik van Riel
@ 2015-03-30 16:45       ` Chris Metcalf
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Metcalf @ 2015-03-30 16:45 UTC (permalink / raw)
  To: Rik van Riel, linux-kernel, Frederic Weisbecker, Steven Rostedt,
	Paul E. McKenney, Ingo Molnar, Thomas Gleixner, Peter Zijlstra,
	Borislav Petkov, Li Zhong, Mike Galbraith, Kevin Hilman

On 03/30/2015 12:41 PM, Rik van Riel wrote:
> On 03/30/2015 12:20 PM, Chris Metcalf wrote:
>> I wanted to ping the patch below again, since I haven't heard any
>> feedback.
>>
>> I note that Rik van Riel's change posted this weekend offers similar
>> functionality for userspace.  My change offers a convenient API
>> for, e.g., kernel drivers setting up default irq balancing.
>>
>> https://lkml.org/lkml/2015/3/28/94
> I submitted a patch to irqbalance to exclude nohz_full
> cpus from having irqs assigned to them.  I could see
> the same thing being useful for in-kernel irq assignment,
> especially for multi-queue devices that set up irqs on
> multiple CPUs.
>
>> An alternate API would be one that just returned the full no_hz
>> cpumask to kernel callers; I'd be happy with that as well, but my
>> instinct was to make the API as narrow as possible to start with.
>>
>> Comments?
> What drivers and subsystems are you targeting?

At the moment, just the on-chip tilegx ethernet controller
(drivers/net/ethernet/tile/tilegx.c) but I'm pushing to upstream
a number of other commits from the Tilera "dataplane" mode,
some of which include code that acts on cpumasks as well.

> I am just looking at blk-mq now, and it seems like the
> API most appropriate for that would be an inline function
> that tests whether or not a CPU is nohz_full.

That API already exists - tick_nohz_full_cpu().

> for_each_possible_cpu(i) {
>
> 	...
>
> 	if (cpu_nohz_full(i))
> 		continue;
>
> }
>
> A lot of the other code in drivers and subsystems that
> set up per-cpu queues and irqs seem to iterate over all
> CPUs at init time, and could benefit from a function
> allowing them to skip nohz_full CPUs.
>
> Your tick_nohz_full_clear_cpus() function seems reasonable
> too, for code that uses a cpumask to set up per cpu stuff.

I'm happy to ask for a pull request for the tile architecture
that includes that commit, if no one objects.  I'd be happier
if someone acked the patch more explicitly, though.

Thanks!

> From: Chris Metcalf <cmetcalf@ezchip.com>
>
> This is useful, for example, to modify a cpumask to avoid the
> nohz cores so that interrupts aren't sent to them.
>
> Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
> ---
> Motivated by patch 4/4 in this series.
>
>    include/linux/tick.h | 7 +++++++
>    1 file changed, 7 insertions(+)
>
> diff --git a/include/linux/tick.h b/include/linux/tick.h
> index 9c085dc12ae9..d53ad4892a39 100644
> --- a/include/linux/tick.h
> +++ b/include/linux/tick.h
> @@ -186,6 +186,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
>        return cpumask_test_cpu(cpu, tick_nohz_full_mask);
>    }
>    +static inline void tick_nohz_full_clear_cpus(struct cpumask *mask)
> +{
> +    if (tick_nohz_full_enabled())
> +        cpumask_andnot(mask, mask, tick_nohz_full_mask);
> +}
> +
>    extern void __tick_nohz_full_check(void);
>    extern void tick_nohz_full_kick(void);
>    extern void tick_nohz_full_kick_cpu(int cpu);
> @@ -194,6 +200,7 @@ extern void __tick_nohz_task_switch(struct
> task_struct *tsk);
>    #else
>    static inline bool tick_nohz_full_enabled(void) { return false; }
>    static inline bool tick_nohz_full_cpu(int cpu) { return false; }
> +static inline void tick_nohz_full_clear_cpus(struct cpumask *mask) { }
>    static inline void __tick_nohz_full_check(void) { }
>    static inline void tick_nohz_full_kick_cpu(int cpu) { }
>    static inline void tick_nohz_full_kick(void) { }

-- 
Chris Metcalf, EZChip Semiconductor
http://www.ezchip.com


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-03-30 16:46 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-24 19:21 [PATCH 0/4] initial NOHZ_FULL support for tile cmetcalf
2015-03-24 19:21 ` [PATCH 1/4] tile: support arch_irq_work_raise cmetcalf
2015-03-24 21:00   ` Frederic Weisbecker
2015-03-24 21:14   ` Peter Zijlstra
2015-03-24 22:06     ` Chris Metcalf
2015-03-25  8:03       ` Peter Zijlstra
2015-03-24 19:21 ` [PATCH 2/4] tile: support CONTEXT_TRACKING and thus NOHZ_FULL cmetcalf
2015-03-24 21:15   ` Frederic Weisbecker
2015-03-24 21:49     ` Chris Metcalf
2015-03-24 19:21 ` [PATCH 3/4] nohz: add tick_nohz_full_clear_cpus() API cmetcalf
2015-03-30 16:20   ` Chris Metcalf
2015-03-30 16:41     ` Rik van Riel
2015-03-30 16:45       ` Chris Metcalf
2015-03-24 19:21 ` [PATCH 4/4] net: tile: don't send interrupts to nohz cores by default cmetcalf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.