All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wright <chrisw@sous-sol.org>
To: linux-kernel@vger.kernel.org, stable@kernel.org, jejb@kernel.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
	Zwane Mwaikambo <zwane@arm.linux.org.uk>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Dave Jones <davej@redhat.com>,
	Chuck Wolber <chuckw@quantumlinux.com>,
	Chris Wedgwood <reviews@ml.cw.f00f.org>,
	Michael Krufky <mkrufky@linuxtv.org>,
	Chuck Ebbert <cebbert@redhat.com>,
	Domenico Andreoli <cavokz@gmail.com>, Willy Tarreau <w@1wt.eu>,
	Rodrigo Rubira Branco <rbranco@la.checkpoint.com>,
	Jake Edge <jake@lwn.net>, Eugene Teo <eteo@redhat.com>,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk,
	Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@elte.hu>
Subject: [patch 056/100] posixtimers, sched: Fix posix clock monotonicity
Date: Thu, 23 Apr 2009 00:21:16 -0700	[thread overview]
Message-ID: <20090423072652.581000544@sous-sol.org> (raw)
In-Reply-To: 20090423072020.428683652@sous-sol.org

[-- Attachment #1: posixtimers-sched-fix-posix-clock-monotonicity.patch --]
[-- Type: text/plain, Size: 4566 bytes --]

-stable review patch.  If anyone has any objections, please let us know.
---------------------

From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>

upstream commit: c5f8d99585d7b5b7e857fabf8aefd0174903a98c

Impact: Regression fix (against clock_gettime() backwarding bug)

This patch re-introduces a couple of functions, task_sched_runtime
and thread_group_sched_runtime, which was once removed at the
time of 2.6.28-rc1.

These functions protect the sampling of thread/process clock with
rq lock.  This rq lock is required not to update rq->clock during
the sampling.

i.e.
  The clock_gettime() may return
   ((accounted runtime before update) + (delta after update))
  that is less than what it should be.

v2 -> v3:
	- Rename static helper function __task_delta_exec()
	  to do_task_delta_exec() since -tip tree already has
	  a __task_delta_exec() of different version.

v1 -> v2:
	- Revises comments of function and patch description.
	- Add note about accuracy of thread group's runtime.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org
LKML-Reference: <49D1CC93.4080401@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
---
 kernel/posix-cpu-timers.c |    7 ++--
 kernel/sched.c            |   65 ++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 61 insertions(+), 11 deletions(-)

--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clocki
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
+		cpu->sched = task_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const 
 {
 	struct task_cputime cputime;
 
-	thread_group_cputime(p, &cputime);
 	switch (CPUCLOCK_WHICH(which_clock)) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
+		thread_group_cputime(p, &cputime);
 		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
+		thread_group_cputime(p, &cputime);
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+		cpu->sched = thread_group_sched_runtime(p);
 		break;
 	}
 	return 0;
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4134,9 +4134,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat
 EXPORT_PER_CPU_SYMBOL(kstat);
 
 /*
- * Return any ns on the sched_clock that have not yet been banked in
+ * Return any ns on the sched_clock that have not yet been accounted in
  * @p in case that task is currently running.
+ *
+ * Called with task_rq_lock() held on @rq.
  */
+static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+	u64 ns = 0;
+
+	if (task_current(rq, p)) {
+		update_rq_clock(rq);
+		ns = rq->clock - p->se.exec_start;
+		if ((s64)ns < 0)
+			ns = 0;
+	}
+
+	return ns;
+}
+
 unsigned long long task_delta_exec(struct task_struct *p)
 {
 	unsigned long flags;
@@ -4144,16 +4160,49 @@ unsigned long long task_delta_exec(struc
 	u64 ns = 0;
 
 	rq = task_rq_lock(p, &flags);
+	ns = do_task_delta_exec(p, rq);
+	task_rq_unlock(rq, &flags);
 
-	if (task_current(rq, p)) {
-		u64 delta_exec;
+	return ns;
+}
 
-		update_rq_clock(rq);
-		delta_exec = rq->clock - p->se.exec_start;
-		if ((s64)delta_exec > 0)
-			ns = delta_exec;
-	}
+/*
+ * Return accounted runtime for the task.
+ * In case the task is currently running, return the runtime plus current's
+ * pending runtime that have not been accounted yet.
+ */
+unsigned long long task_sched_runtime(struct task_struct *p)
+{
+	unsigned long flags;
+	struct rq *rq;
+	u64 ns = 0;
+
+	rq = task_rq_lock(p, &flags);
+	ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+	task_rq_unlock(rq, &flags);
+
+	return ns;
+}
 
+/*
+ * Return sum_exec_runtime for the thread group.
+ * In case the task is currently running, return the sum plus current's
+ * pending runtime that have not been accounted yet.
+ *
+ * Note that the thread group might have other running tasks as well,
+ * so the return value not includes other pending runtime that other
+ * running tasks might have.
+ */
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
+{
+	struct task_cputime totals;
+	unsigned long flags;
+	struct rq *rq;
+	u64 ns;
+
+	rq = task_rq_lock(p, &flags);
+	thread_group_cputime(p, &totals);
+	ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
 	task_rq_unlock(rq, &flags);
 
 	return ns;


  parent reply	other threads:[~2009-04-23  7:49 UTC|newest]

Thread overview: 121+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-23  7:20 [patch 000/100] 2.6.29.2 -stable review Chris Wright
2009-04-23  7:20 ` [patch 001/100] security/smack: fix oops when setting a size 0 SMACK64 xattr Chris Wright
2009-04-23  7:20 ` [patch 002/100] fbmem: fix fb_info->lock and mm->mmap_sem circular locking dependency Chris Wright
2009-04-23  7:20 ` [patch 003/100] fbdev: fix info->lock deadlock in fbcon_event_notify() Chris Wright
2009-04-23  7:20 ` [patch 004/100] ide: Fix code dealing with sleeping devices in do_ide_request() Chris Wright
2009-04-23  7:20 ` [patch 005/100] PCI/x86: detect host bridge config space size w/o using quirks Chris Wright
2009-04-23  7:20 ` [patch 006/100] MIPS: Compat: Zero upper 32-bit of offset_high and offset_low Chris Wright
2009-04-23  7:20 ` [patch 007/100] ext4: fix typo which causes a memory leak on error path Chris Wright
2009-04-23  7:20 ` [patch 008/100] ext4: fix locking typo in mballoc which could cause soft lockup hangs Chris Wright
2009-04-23  7:20 ` [patch 009/100] rt2x00: Fix SLAB corruption during rmmod Chris Wright
2009-04-23  7:20   ` Chris Wright
2009-04-23  7:20 ` [patch 010/100] tracing/core: fix early free of cpumasks Chris Wright
2009-04-23  7:20 ` [patch 011/100] x86, setup: mark %esi as clobbered in E820 BIOS call Chris Wright
2009-04-23  7:20 ` [patch 012/100] acpi: fix of pmtimer overflow that make Cx states time incorrect Chris Wright
2009-04-23  9:24   ` Shi, Alex
2009-04-23  9:24     ` Shi, Alex
2009-04-23  9:40   ` Shi, Alex
2009-04-23  9:40     ` Shi, Alex
2009-04-23  7:20 ` [patch 013/100] ACPI: cap off P-state transition latency from buggy BIOSes Chris Wright
2009-04-25 15:59   ` Martin Steigerwald
2009-04-23  7:20 ` [patch 014/100] dock: fix dereference after kfree() Chris Wright
2009-04-23  7:20 ` [patch 015/100] drm/i915: Change DCC tiling detection case to cover only mobile parts Chris Wright
2009-04-23  7:20 ` [patch 016/100] drm/i915: Read the right SDVO register when detecting SVDO/HDMI Chris Wright
2009-04-23  7:20 ` [patch 017/100] drm/i915: Sync crt hotplug detection with intel video driver Chris Wright
2009-04-23  7:20 ` [patch 018/100] drm/i915: Check for dev->primary->master before dereference Chris Wright
2009-04-23  7:20 ` [patch 019/100] drm/i915: check for -EINVAL from vm_insert_pfn Chris Wright
2009-04-23  7:20 ` [patch 020/100] drm: Use pgprot_writecombine in GEM GTT mapping to get the right bits for !PAT Chris Wright
2009-04-23  7:20 ` [patch 021/100] drm/i915: only set TV mode when any property changed Chris Wright
2009-04-23  7:20 ` [patch 022/100] drm/i915: fix TV mode setting in property change Chris Wright
2009-04-23  7:20 ` [patch 023/100] SCSI: sg: fix iovec bugs introduced by the block layer conversion Chris Wright
2009-04-23  7:20 ` [patch 024/100] md/raid1 - dont assume newly allocated bvecs are initialised Chris Wright
2009-04-23  7:20 ` [patch 025/100] r8169: Reset IntrStatus after chip reset Chris Wright
2009-04-23  7:20 ` [patch 026/100] V4L/DVB (10943): cx88: Prevent general protection fault on rmmod Chris Wright
2009-04-23  7:20 ` [patch 027/100] ide: drivers/ide/ide-atapi.c needs <linux/scatterlist.h> Chris Wright
2009-04-23  7:20 ` [patch 028/100] ide-atapi: start DMA after issuing a packet command Chris Wright
2009-04-23  7:20 ` [patch 029/100] cpumask: fix slab corruption caused by alloc_cpumask_var_node() Chris Wright
2009-04-23  7:20 ` [patch 030/100] sysctl: fix suid_dumpable and lease-break-time sysctls Chris Wright
2009-04-23  7:20 ` [patch 031/100] mm: define a UNIQUE value for AS_UNEVICTABLE flag Chris Wright
2009-04-23  7:20 ` [patch 032/100] mm: do_xip_mapping_read: fix length calculation Chris Wright
2009-04-23  7:20 ` [patch 033/100] ixgbe: Fix potential memory leak/driver panic issue while setting up Tx & Rx ring parameters Chris Wright
2009-04-23  7:20 ` [patch 034/100] dm: preserve bi_io_vec when resubmitting bios Chris Wright
2009-04-23  7:20 ` [patch 035/100] vfs: skip I_CLEAR state inodes Chris Wright
2009-04-23  7:20 ` [patch 036/100] dm raid1: switch read_record from kmalloc to slab to save memory Chris Wright
2009-04-23  7:20 ` [patch 037/100] dm io: make sync_io uninterruptible Chris Wright
2009-04-23  7:20 ` [patch 038/100] dm snapshot: refactor __find_pending_exception Chris Wright
2009-04-23  7:20 ` [patch 039/100] dm snapshot: avoid dropping lock in __find_pending_exception Chris Wright
2009-04-23  7:21 ` [patch 040/100] dm snapshot: avoid having two exceptions for the same chunk Chris Wright
2009-04-23  7:21 ` [patch 041/100] dm target: use module refcount directly Chris Wright
2009-04-23  7:21 ` [patch 042/100] dm: path selector " Chris Wright
2009-04-23  7:21 ` [patch 043/100] dm table: fix upgrade mode race Chris Wright
2009-04-23  7:21 ` [patch 044/100] af_rose/x25: Sanity check the maximum user frame size Chris Wright
2009-04-23  7:21 ` [patch 045/100] net/netrom: Fix socket locking Chris Wright
2009-04-23  7:21 ` [patch 046/100] crypto: shash - Fix unaligned calculation with short length Chris Wright
2009-04-23  7:21 ` [patch 047/100] acer-wmi: Blacklist Acer Aspire One Chris Wright
2009-04-23  7:21 ` [patch 048/100] kprobes: Fix locking imbalance in kretprobes Chris Wright
2009-04-23  7:21 ` [patch 049/100] netfilter: {ip, ip6, arp}_tables: fix incorrect loop detection Chris Wright
2009-04-23  7:21   ` Chris Wright
2009-04-23  7:21 ` [patch 050/100] splice: fix deadlock in splicing to file Chris Wright
2009-04-23  7:21 ` [patch 051/100] ALSA: hda - add missing comma in ad1884_slave_vols Chris Wright
2009-04-23  7:21 ` [patch 052/100] sparc64: Fix bug in ("sparc64: Flush TLB before releasing pages.") Chris Wright
2009-04-23  7:21 ` [patch 053/100] SCSI: libiscsi: fix iscsi pool error path Chris Wright
2009-04-23  7:21 ` [patch 054/100] SCSI: libiscsi: fix iscsi pool error path (fixlet) Chris Wright
2009-04-23  7:21 ` [patch 055/100] cap_prctl: dont set error to 0 at no_change Chris Wright
2009-04-23  7:21 ` Chris Wright [this message]
2009-04-23  7:21 ` [patch 057/100] posix-timers: fix RLIMIT_CPU && fork() Chris Wright
2009-04-23 20:59   ` Chuck Ebbert
2009-04-23 21:02     ` Oleg Nesterov
2009-04-23 21:11     ` Chris Wright
2009-04-23  7:21 ` [patch 058/100] posix-timers: fix RLIMIT_CPU && setitimer(CPUCLOCK_PROF) Chris Wright
2009-04-23  7:21 ` [patch 059/100] dm kcopyd: prepare for callback race fix Chris Wright
2009-04-23  7:21 ` [patch 060/100] dm kcopyd: fix callback race Chris Wright
2009-04-23  7:21 ` [patch 061/100] sched: do not count frozen tasks toward load Chris Wright
2009-04-23  7:21 ` Chris Wright
2009-04-23  7:21 ` Chris Wright
2009-04-23  7:21 ` [patch 062/100] x86: fix broken irq migration logic while cleaning up multiple vectors Chris Wright
2009-04-23  7:21 ` [patch 063/100] hrtimer: fix rq->lock inversion (again) Chris Wright
2009-04-23  7:21 ` [patch 064/100] add some long-missing capabilities to fs_mask Chris Wright
2009-04-23  7:21 ` [patch 065/100] spi: spi_write_then_read() bugfixes Chris Wright
2009-04-23  7:21 ` [patch 066/100] tty: Fix leak in ti-usb Chris Wright
2009-04-23  7:21 ` [patch 067/100] sfc: Match calls to netif_napi_add() and netif_napi_del() Chris Wright
2009-04-23  7:21 ` [patch 068/100] ALSA: hda - Fix the cmd cache keys for amp verbs Chris Wright
2009-04-23  7:21 ` [patch 069/100] powerpc: Fix data-corrupting bug in __futex_atomic_op Chris Wright
2009-04-23  7:21 ` [patch 070/100] hpt366: fix HPT370 DMA timeouts Chris Wright
2009-04-23  7:21 ` [patch 071/100] pata_hpt37x: " Chris Wright
2009-04-23  7:21 ` [patch 072/100] mm: pass correct mm when growing stack Chris Wright
2009-04-23  7:21 ` [patch 073/100] SCSI: sg: fix races during device removal Chris Wright
2009-04-23  7:21 ` [patch 074/100] SCSI: sg: fix races with ioctl(SG_IO) Chris Wright
2009-04-23  7:21 ` [patch 075/100] SCSI: sg: avoid blk_put_request/blk_rq_unmap_user in interrupt Chris Wright
2009-04-23  7:21 ` [patch 076/100] SCSI: sg: fix q->queue_lock on scsi_error_handler path Chris Wright
2009-04-23  7:21 ` [patch 077/100] x86: disable X86_PTRACE_BTS for now Chris Wright
2009-04-23  7:21 ` [patch 078/100] usb gadget: fix ethernet link reports to ethtool Chris Wright
2009-04-23  7:21 ` [patch 079/100] USB: ftdi_sio: add vendor/project id for JETI specbos 1201 spectrometer Chris Wright
2009-04-23  7:21 ` [patch 080/100] USB: fix oops in cdc-wdm in case of malformed descriptors Chris Wright
2009-04-23  7:21 ` [patch 081/100] USB: usb-storage: augment unusual_devs entry for Simple Tech/Datafab Chris Wright
2009-04-23  7:21 ` [patch 082/100] KVM: Fix missing smp tlb flush in invlpg Chris Wright
2009-04-23  7:21 ` [patch 083/100] KVM: Add CONFIG_HAVE_KVM_IRQCHIP Chris Wright
2009-04-23  7:21 ` [patch 084/100] KVM: Interrupt mask notifiers for ioapic Chris Wright
2009-04-23  7:21 ` [patch 085/100] KVM: Reset PIT irq injection logic when the PIT IRQ is unmasked Chris Wright
2009-04-23  7:21 ` [patch 086/100] KVM: MMU: handle compound pages in kvm_is_mmio_pfn Chris Wright
2009-04-23  7:21 ` [patch 087/100] KVM: fix kvm_vm_ioctl_deassign_device Chris Wright
2009-04-23  7:21 ` [patch 088/100] KVM: VMX: Update necessary state when guest enters long mode Chris Wright
2009-04-23  7:21 ` [patch 089/100] KVM: is_long_mode() should check for EFER.LMA Chris Wright
2009-04-23  7:21 ` [patch 090/100] x86, PAT: Remove page granularity tracking for vm_insert_pfn maps Chris Wright
2009-04-23  7:21 ` [patch 091/100] Input: gameport - fix attach driver code Chris Wright
2009-04-23  7:21 ` [patch 092/100] Revert "console ASCII glyph 1:1 mapping" Chris Wright
2009-04-23  7:21 ` [patch 093/100] virtio: fix suspend when using virtio_balloon Chris Wright
2009-04-23  7:21 ` [patch 094/100] agp: zero pages before sending to userspace Chris Wright
2009-04-23  7:21 ` [patch 095/100] gso: Fix support for linear packets Chris Wright
2009-04-23  7:21 ` [patch 096/100] NFS: Fix the XDR iovec calculation in nfs3_xdr_setaclargs Chris Wright
2009-04-23  7:21 ` [patch 097/100] hugetlbfs: return negative error code for bad mount option Chris Wright
2009-04-23  7:21 ` [patch 098/100] scsi: mpt: suppress debugobjects warning Chris Wright
2009-04-23  7:21 ` [patch 099/100] skge: fix occasional BUG during MTU change Chris Wright
2009-04-23  7:22 ` [patch 100/100] Bonding: fix zero address hole bug in arp_ip_target list Chris Wright
2009-04-23 13:47 ` [patch 000/100] 2.6.29.2 -stable review Tvrtko Ursulin
2009-04-23 14:49   ` [stable] " Greg KH
2009-04-23 14:56     ` Tvrtko Ursulin
2009-04-23 15:02       ` Greg KH
2009-04-23 15:08         ` Tvrtko Ursulin
2009-04-23 15:45           ` Greg KH
2009-04-23 16:17             ` Tvrtko Ursulin
2009-04-23 16:25               ` Chris Wright

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090423072652.581000544@sous-sol.org \
    --to=chrisw@sous-sol.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=cavokz@gmail.com \
    --cc=cebbert@redhat.com \
    --cc=chuckw@quantumlinux.com \
    --cc=davej@redhat.com \
    --cc=eteo@redhat.com \
    --cc=jake@lwn.net \
    --cc=jejb@kernel.org \
    --cc=jmforbes@linuxtx.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mkrufky@linuxtv.org \
    --cc=rbranco@la.checkpoint.com \
    --cc=rdunlap@xenotime.net \
    --cc=reviews@ml.cw.f00f.org \
    --cc=seto.hidetoshi@jp.fujitsu.com \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=w@1wt.eu \
    --cc=zwane@arm.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.