Re: Multi pthreaded RT application - mlock doubt

From: Dipen Patel <dipenp@nvidia.com>
To: "Ahmed S. Darwish" <a.darwish@linutronix.de>
Cc: <linux-rt-users@vger.kernel.org>
Subject: Re: Multi pthreaded RT application - mlock doubt
Date: Tue, 25 May 2021 15:24:27 -0700	[thread overview]
Message-ID: <95b1b0f8-4957-f9cb-0cf7-82c79d819c38@nvidia.com> (raw)
In-Reply-To: <YKymwnknCImO30CU@lx-t490>

On 5/25/21 12:26 AM, Ahmed S. Darwish wrote:
> On Wed, Mar 31, 2021 at 07:06:26PM -0700, Dipen Patel wrote:
>> Hi,
>>
>> I was following
>> https://rt.wiki.kernel.org/index.php/Threaded_RT-application_with_memory_locking_and_stack_handling_example
>> with some below changes:
>>
> 
> The example above is a bit inaccurate, as it prefaults the thread's
> stack much later than it should be.
> 
> ...
> 
>>
>> thread_fn {
>> 	getrusage(RUSAGE_SELF, &usage);>
>> 	print and save usage.ruminflt;
>> 	prove_thread_stack_use_is_safe
>> 	getrusage(RUSAGE_SELF, &usage);
>> 	print usage.ruminflt - last_saved_cnt;
>> }
>>
>> I observed there are still page faults.
> 
> Well, in the snippet above, there will obviously be page faults, as
> you're also measuring the faults generated by
> prove_thread_stack_use_is_safe(). On first invocation, this is actually
> the method prefaulting the thread stack.
> 
Original example shown in above link uses the prove_thread_stack_use_is_safe same way.
I just extended it to call it locally and calculate it locally because of the mutli
thread.

> To make sure the discussion is more concrete, can you please send a
> complete, compilable, *.c file?
>
 // Compile with 'gcc thisfile.c -lpthread -lrt -Wall'
 /*
  * This program is modified to have multiple threads each with CPU affinity
  * and priority from
  * https://rt.wiki.kernel.org/index.php/Threaded_RT-application_with_memory_locking_and_stack_handling_example
  */
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>	// Needed for mlockall()
#include <unistd.h>		// needed for sysconf(int name);
#include <malloc.h>
#include <sys/time.h>	// needed for getrusage
#include <sys/types.h>
#include <sys/resource.h>	// needed for getrusage
#include <pthread.h>
#include <limits.h>
#include <ctype.h>
#include <sched.h>

#define PRE_ALLOCATION_SIZE (100*1024*1024) /* 100MB pagefault free buffer */
#define MY_STACK_SIZE       (100*1024)      /* 100 kB is enough for now. */

/* Added by Dipen */
#define NUM_THREAD	8 /* Do not change this, start_rt_thread hard codes its usage */
int SEED_PRIO = 90;
int NUM_PROC;
struct th_info {
	  int cpu_number;
	  int other;
	  int prio;
} ti[NUM_THREAD];

pthread_t thread[NUM_THREAD];
pthread_attr_t attr[NUM_THREAD];
/* End */

static void setprio(int prio, int sched)
{
	struct sched_param param;
	// Set realtime priority for this thread
	param.sched_priority = prio;
	if (sched_setscheduler(0, sched, &param) < 0)
		perror("sched_setscheduler");
}

void show_new_pagefault_count(const char* logtext, 
   			      const char* allowed_maj,
   			      const char* allowed_min)
{
	static int last_majflt = 0, last_minflt = 0;
	struct rusage usage;

	getrusage(RUSAGE_SELF, &usage);

	printf("%-30.30s: Pagefaults, Major:%ld (Allowed %s), " \
   	       "Minor:%ld (Allowed %s)\n", logtext,
   	       usage.ru_majflt - last_majflt, allowed_maj,
   	       usage.ru_minflt - last_minflt, allowed_min);

   	last_majflt = usage.ru_majflt; 
   	last_minflt = usage.ru_minflt;
}

static void prove_thread_stack_use_is_safe(int stacksize)
{
	volatile char buffer[stacksize];
	int i;

	/* Prove that this thread is behaving well */
   	for (i = 0; i < stacksize; i += sysconf(_SC_PAGESIZE)) {
   		/* Each write to this buffer shall NOT generate a 
   			pagefault. */
   		buffer[i] = i;
   	}
	/* commented out by Dipen */
	//show_new_pagefault_count("Caused by using thread stack", "0", "0");
}

/* Added by Dipen */
static void confirm_sched_para()
{
	int policy, ret;
	struct sched_param param;
	ret = pthread_getschedparam(pthread_self(), &policy, &param);

	if (ret)
		printf("ERROR getting sched param\n");
	else
		printf("policy=%s, priority=%d\n",
			(policy == SCHED_FIFO)  ? "SCHED_FIFO" :
			(policy == SCHED_RR)    ? "SCHED_RR" :
			(policy == SCHED_OTHER) ? "SCHED_OTHER" :
			"???", param.sched_priority);
}

/*************************************************************/
/* The thread to start */
/* Modified to add CPU affinity and calculating page faults
 * locally in the thread
 */
static void *my_rt_thread(void *args)
{
	struct th_info *ti = (struct th_info *)args;
	struct timespec ts;
	ts.tv_sec = 0;
	ts.tv_nsec = 10000000;

	int last_majflt = 0, last_minflt = 0;
	struct rusage usage;
	cpu_set_t cpuset; 
	CPU_ZERO(&cpuset);
	CPU_SET(ti->cpu_number , &cpuset);

	sched_setaffinity(0, sizeof(cpuset), &cpuset);

	if (ti->other != 1) {
		setprio(ti->prio, SCHED_FIFO);
		printf("I am an RT-thread [%d], executing on [%d]\n",
			pthread_self(), sched_getcpu());
	} else {
		printf("I am an non-thread [%d], executing on [%d]\n",
		pthread_self(), sched_getcpu());
	}
	confirm_sched_para();
	//<do your RT-thing here>

   	getrusage(RUSAGE_SELF, &usage);

   	printf("[%d]Pagefaults, Major:%ld, Minor:%ld \n",pthread_self(),
   	       usage.ru_majflt - last_majflt,
   	       usage.ru_minflt - last_minflt);

	last_majflt = usage.ru_majflt; 
   	last_minflt = usage.ru_minflt;

   	prove_thread_stack_use_is_safe(MY_STACK_SIZE);

	getrusage(RUSAGE_SELF, &usage);

   	printf("[%d]After stack usage:Pagefaults, Major:%ld, Minor:%ld \n",pthread_self(),
   	       usage.ru_majflt - last_majflt,
   	       usage.ru_minflt - last_minflt);

   	/* wait 400 ms before thread terminates */
   	clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL);
	printf("Thread %d leaving\n", pthread_self());
	return NULL;
}

/*************************************************************/
static void error(int at)
{
	/* Just exit on error */
	fprintf(stderr, "Some error occured at %d", at);
	exit(1);
}

static void start_rt_thread(void)
{
	int i = 0;
	int csnum;
	cpu_set_t cpuset;
	int RT_POLICY = SCHED_FIFO;
	int RT_POLICY_MIN_PRIORITY = sched_get_priority_min(RT_POLICY);
	int RT_POLICY_MAX_PRIORITY = sched_get_priority_max(RT_POLICY);
	int PRIO_LOW    = RT_POLICY_MIN_PRIORITY;
	int PRIO_HIGH   = RT_POLICY_MAX_PRIORITY - 5;
	int PRIO_MEDIUM = (PRIO_LOW + PRIO_HIGH) / 2;

	printf("prio low=%d, %d, %d\n", PRIO_LOW, PRIO_HIGH, PRIO_MEDIUM);
   	/* init to default values */
	for (; i < NUM_THREAD; i++) {
		if (pthread_attr_init(&attr[i]))
			error(1);
		if (pthread_attr_setstacksize(&attr[i],
					      PTHREAD_STACK_MIN + MY_STACK_SIZE))
			error(2);

		if (i < 3)
			csnum = i;
		else
			csnum = 3;

		ti[i].cpu_number = csnum;
		ti[i].other = 0;
		if (i >= 0 && i <= 2)
			ti[i].prio = PRIO_LOW;
		else if (i >= 3 && i < 5)
			ti[i].prio = PRIO_MEDIUM;
		else if (i >= 5 && i < 8)
			ti[i].prio = PRIO_HIGH;

		if (i == 7)
			ti[i].other = 1;

		if (pthread_attr_setinheritsched(&attr[i], PTHREAD_EXPLICIT_SCHED))
			error(4);
		/* And finally start the actual thread */
		if (!pthread_create(&thread[i], &attr[i], my_rt_thread, &ti[i])) {
			printf("Thread: %d created\n", thread[i]);
			//pthread_detach(thread[i]);
		}
	}
}

static void configure_malloc_behavior(void)
{
	/* Now lock all current and future pages
	 * from preventing of being paged
	 */
	if (mlockall(MCL_CURRENT | MCL_FUTURE))
		perror("mlockall failed:");

	/* Turn off malloc trimming.*/
	mallopt(M_TRIM_THRESHOLD, -1);

	/* Turn off mmap usage. */
   	mallopt(M_MMAP_MAX, 0);
}

static void reserve_process_memory(int size)
{
	int i;
	char *buffer;

   	buffer = malloc(size);

	/* Touch each page in this piece of memory to get it mapped into RAM */
	for (i = 0; i < size; i += sysconf(_SC_PAGESIZE)) {
   		buffer[i] = 0;
   	}

	free(buffer);
}

int main(int argc, char *argv[])
{
	show_new_pagefault_count("Initial count", ">=0", ">=0");

   	configure_malloc_behavior();

   	show_new_pagefault_count("mlockall() generated", ">=0", ">=0");
	reserve_process_memory(PRE_ALLOCATION_SIZE);
	show_new_pagefault_count("malloc() and touch generated", 
   				 ">=0", ">=0");

   	/* Now allocate the memory for the 2nd time and prove the number of
	 * pagefaults are zero
	 */
	reserve_process_memory(PRE_ALLOCATION_SIZE);
	show_new_pagefault_count("2nd malloc() and use generated", 
   				 "0", "0");
	NUM_PROC = sysconf(_SC_NPROCESSORS_ONLN);
	printf("We have %d processors\n", NUM_PROC);
	start_rt_thread();

	//<do your RT-thing>
	for (int i = 0; i < NUM_THREAD; i ++) {
		pthread_join(thread[i], NULL);
	}
	printf("main thread exit\n");

   	return 0;
}

> Good luck,
> 
> --
> Ahmed S. Darwish
> Linutronix GmbH
>