All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ankur Arora <ankur.a.arora@oracle.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-mm@kvack.org, x86@kernel.org, akpm@linux-foundation.org,
	luto@kernel.org, bp@alien8.de, dave.hansen@linux.intel.com,
	hpa@zytor.com, mingo@redhat.com, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, willy@infradead.org, mgorman@suse.de,
	jon.grimm@amd.com, bharata@amd.com, raghavendra.kt@amd.com,
	boris.ostrovsky@oracle.com, konrad.wilk@oracle.com,
	jgross@suse.com, andrew.cooper3@citrix.com,
	Joel Fernandes <joel@joelfernandes.org>,
	Youssef Esmat <youssefesmat@chromium.org>,
	Vineeth Pillai <vineethrp@google.com>,
	Suleiman Souhlal <suleiman@google.com>,
	Ingo Molnar <mingo@kernel.org>,
	Daniel Bristot de Oliveira <bristot@kernel.org>
Subject: Re: [POC][RFC][PATCH v2] sched: Extended Scheduler Time Slice
Date: Thu, 26 Oct 2023 17:35:27 -0400	[thread overview]
Message-ID: <20231026173527.2ad215cc@gandalf.local.home> (raw)
In-Reply-To: <20231026152022.668ca0f3@gandalf.local.home>

[-- Attachment #1: Type: text/plain, Size: 630 bytes --]

On Thu, 26 Oct 2023 15:20:22 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> Anyway, I changed the code to use:
> 
> static inline unsigned clrbit(volatile unsigned *ptr)
> {
> 	unsigned ret;
> 
> 	asm volatile("andb %b1,%0"
> 		     : "+m" (*(volatile char *)ptr)
> 		     : "iq" (0x2)
> 		     : "memory");
> 
> 	ret = *ptr;
> 	*ptr = 0;
> 
> 	return ret;
> }

Mathieu also told me that glibc's rseq has some extra padding at the end,
that happens to be big enough to hold this feature. That means you can run
the code without adding:

  GLIBC_TUNABLES=glibc.pthread.rseq=0

Attached is the updated test program.

-- Steve

[-- Attachment #2: extend-sched.c --]
[-- Type: text/x-c++src, Size: 5905 bytes --]


// Run with: GLIBC_TUNABLES=glibc.pthread.rseq=0 

#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <errno.h>
#include <pthread.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <tracefs.h>
#include <sys/syscall.h>
#include "rseq-abi.h"
#include <linux/tls.h>

#define rseq(rseq, len, flags, sig) syscall(SYS_rseq, rseq, len, \
					    flags, sig);

#define __weak __attribute__((weak))

//#define barrier() asm volatile ("" ::: "memory")
#define rmb() asm volatile ("lfence" ::: "memory")
#define wmb() asm volatile ("sfence" ::: "memory")


static pthread_barrier_t pbarrier;

static __thread struct rseq_abi __attribute__((aligned(sizeof(struct rseq_abi)))) rseq_map;
static __thread struct rseq_abi *rseq_ptr;

static bool no_rseq;

static void init_extend_map(void)
{
	extern ptrdiff_t __rseq_offset;
	extern unsigned int __rseq_size;
	int ret;

	if (no_rseq)
		return;

	if (__rseq_size) {
		if (__rseq_size < sizeof(rseq_map)) {
			printf("glibc rseq less than required mapping\n");
			return;
		}
		rseq_ptr = __builtin_thread_pointer() + __rseq_offset;
		printf("Using glibc rseq %p\n", rseq_ptr);
		return;
	}

	rseq_ptr = &rseq_map;
	ret = rseq(rseq_ptr, sizeof(rseq_map), 0, 0);
	perror("rseq");
	printf("ret = %d (%zd) %p\n", ret, sizeof(rseq_map), &rseq_map);
	if (ret < 0)
		rseq_ptr = NULL;
}

struct data;

struct thread_data {
	unsigned long long			start_wait;
	unsigned long long			x_count;
	unsigned long long			total;
	unsigned long long			max;
	unsigned long long			min;
	unsigned long long			total_wait;
	unsigned long long			max_wait;
	unsigned long long			min_wait;
	struct data				*data;
};

struct data {
	unsigned long long		x;
	unsigned long			lock;
	struct thread_data		*tdata;
	bool				done;
};

static inline unsigned long
cmpxchg(volatile unsigned long *ptr, unsigned long old, unsigned long new)
{
        unsigned long prev;

	asm volatile("lock; cmpxchg %b1,%2"
		     : "=a"(prev)
		     : "q"(new), "m"(*(ptr)), "0"(old)
		     : "memory");
        return prev;
}

static inline unsigned clrbit(volatile unsigned *ptr)
{
	unsigned ret;

	asm volatile("andb %b1,%0"
		     : "+m" (*(volatile char *)ptr)
		     : "iq" (0x2)
		     : "memory");

	ret = *ptr;
	*ptr = 0;

	return ret;
}

static void extend(void)
{
	if (!rseq_ptr)
		return;

	rseq_ptr->cr_flags = 1;
}

static void unextend(void)
{
	unsigned prev;

	if (!rseq_ptr)
		return;

	prev = clrbit(&rseq_ptr->cr_flags);
	if (prev & 2) {
		tracefs_printf(NULL, "Yield!\n");
		sched_yield();
	}
}

#define sec2usec(sec) (sec * 1000000ULL)
#define usec2sec(usec) (usec / 1000000ULL)

static unsigned long long get_time(void)
{
	struct timeval tv;
	unsigned long long time;

	gettimeofday(&tv, NULL);

	time = sec2usec(tv.tv_sec);
	time += tv.tv_usec;

	return time;
}

static void grab_lock(struct thread_data *tdata, struct data *data)
{
	unsigned long long start, end, delta;
	unsigned long long end_wait;
	unsigned long long last;
	unsigned long prev;

	if (!tdata->start_wait)
		tdata->start_wait = get_time();

	while (data->lock && !data->done)
		rmb();

	extend();
	start = get_time();
	prev = cmpxchg(&data->lock, 0, 1);
	if (prev) {
		unextend();
		return;
	}
	end_wait = get_time();
	tracefs_printf(NULL, "Have lock!\n");

	delta = end_wait - tdata->start_wait;
	tdata->start_wait = 0;
	if (!tdata->total_wait || tdata->max_wait < delta)
		tdata->max_wait = delta;
	if (!tdata->total_wait || tdata->min_wait > delta)
		tdata->min_wait = delta;
	tdata->total_wait += delta;

	data->x++;
	last = data->x;

	if (data->lock != 1) {
		printf("Failed locking\n");
		exit(-1);
	}
	prev = cmpxchg(&data->lock, 1, 0);
	end = get_time();
	if (prev != 1) {
		printf("Failed unlocking\n");
		exit(-1);
	}
	tracefs_printf(NULL, "released lock!\n");
	unextend();

	delta = end - start;
	if (!tdata->total || tdata->max < delta)
		tdata->max = delta;

	if (!tdata->total || tdata->min > delta)
		tdata->min = delta;

	tdata->total += delta;
	tdata->x_count++;

	/* Let someone else have a turn */
	while (data->x == last && !data->done)
		rmb();
}

	
	
static void *run_thread(void *d)
{
	struct thread_data *tdata = d;
	struct data *data = tdata->data;

	init_extend_map();

	pthread_barrier_wait(&pbarrier);

	while (!data->done) {
		grab_lock(tdata, data);
	}
	return NULL;
}

int main (int argc, char **argv)
{
	unsigned long long total_wait = 0;
	unsigned long long secs;
	pthread_t *threads;
	struct data data;
	int cpus;

	memset(&data, 0, sizeof(data));

	cpus = sysconf(_SC_NPROCESSORS_CONF);

	threads = calloc(cpus + 1, sizeof(*threads));
	if (!threads) {
		perror("threads");
		exit(-1);
	}

	data.tdata = calloc(cpus + 1, sizeof(*data.tdata));
	if (!data.tdata) {
		perror("Allocating tdata");
		exit(-1);
	}

	tracefs_print_init(NULL);
	pthread_barrier_init(&pbarrier, NULL, cpus + 2);

	for (int i = 0; i <= cpus; i++) {
		int ret;

		data.tdata[i].data = &data;
		ret = pthread_create(&threads[i], NULL, run_thread, &data.tdata[i]);
		if (ret < 0) {
			perror("creating threads");
			exit(-1);
		}
	}

	pthread_barrier_wait(&pbarrier);
	sleep(5);

	printf("Finish up\n");
	data.done = true;
	wmb();

	for (int i = 0; i <= cpus; i++) {
		pthread_join(threads[i], NULL);
		printf("thread %i:\n", i);
		printf("   count:\t%lld\n", data.tdata[i].x_count);
		printf("   total:\t%lld\n", data.tdata[i].total);
		printf("     max:\t%lld\n", data.tdata[i].max);
		printf("     min:\t%lld\n", data.tdata[i].min);
		printf("   total wait:\t%lld\n", data.tdata[i].total_wait);
		printf("     max wait:\t%lld\n", data.tdata[i].max_wait);
		printf("     min wait:\t%lld\n", data.tdata[i].min_wait);
		total_wait += data.tdata[i].total_wait;
	}

	secs = usec2sec(total_wait);

	printf("Ran for %lld times\n", data.x);
	printf("Total wait time: %lld.%06lld\n", secs, total_wait - sec2usec(secs));
	return 0;
}

  reply	other threads:[~2023-10-26 21:35 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-26  3:54 [POC][RFC][PATCH v2] sched: Extended Scheduler Time Slice Steven Rostedt
2023-10-26 10:59 ` Peter Zijlstra
2023-10-26 11:14   ` Steven Rostedt
2023-10-26 18:36     ` Mathieu Desnoyers
2023-10-26 18:50       ` Linus Torvalds
2023-10-26 18:59         ` Mathieu Desnoyers
2023-10-26 19:36           ` Steven Rostedt
2023-10-26 20:45           ` Steven Rostedt
     [not found]             ` <644da047-2f7a-4d55-a339-f2dc28d2c852@efficios.com>
     [not found]               ` <20231027122442.5c76dd62@gandalf.local.home>
2023-10-27 16:35                 ` Mathieu Desnoyers
2023-10-27 16:49                   ` Steven Rostedt
2023-10-30 12:56                     ` Mathieu Desnoyers
2023-10-30 13:45                       ` Steven Rostedt
2023-10-30 18:05                         ` Mathieu Desnoyers
2023-10-30 18:19                           ` Steven Rostedt
2023-10-30 18:27                             ` Mathieu Desnoyers
2023-10-30 18:39                               ` Steven Rostedt
2023-10-26 19:20       ` Steven Rostedt
2023-10-26 21:35         ` Steven Rostedt [this message]
2023-10-27 21:52 ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231026173527.2ad215cc@gandalf.local.home \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=andrew.cooper3@citrix.com \
    --cc=ankur.a.arora@oracle.com \
    --cc=bharata@amd.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=bristot@kernel.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=joel@joelfernandes.org \
    --cc=jon.grimm@amd.com \
    --cc=juri.lelli@redhat.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@amd.com \
    --cc=suleiman@google.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vineethrp@google.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=youssefesmat@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.