linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Gabriel Krisman Bertazi <krisman@collabora.com>
To: "André Almeida" <andrealmeid@collabora.com>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Darren Hart <dvhart@infradead.org>,
	linux-kernel@vger.kernel.org,
	Steven Rostedt <rostedt@goodmis.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	kernel@collabora.com, pgriffais@valvesoftware.com,
	z.figura12@gmail.com, joel@joelfernandes.org,
	malteskarupke@fastmail.fm, linux-api@vger.kernel.org,
	fweimer@redhat.com, libc-alpha@sourceware.org,
	linux-kselftest@vger.kernel.org, shuah@kernel.org,
	acme@kernel.org, corbet@lwn.net
Subject: Re: [RFC PATCH 01/13] futex2: Implement wait and wake functions
Date: Mon, 15 Feb 2021 14:59:18 -0500	[thread overview]
Message-ID: <87k0r9w19l.fsf@collabora.com> (raw)
In-Reply-To: <20210215152404.250281-2-andrealmeid@collabora.com> (=?utf-8?Q?=22Andr=C3=A9?= Almeida"'s message of "Mon, 15 Feb 2021 12:23:52 -0300")

André Almeida <andrealmeid@collabora.com> writes:

> Create a new set of futex syscalls known as futex2. This new interface
> is aimed to implement a more maintainable code, while removing obsolete
> features and expanding it with new functionalities.

Hi André.  Some comments below

> +/* kernel/futex2.c */
> +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned int val,
> +			       unsigned int flags,
> +			       struct __kernel_timespec __user __user *timo);

Duplicated __user attribute

> +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned int nr_wake,
> +			       unsigned int flags);
> +
>  /* kernel/hrtimer.c */
>  asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
>  			      struct __kernel_timespec __user *rmtp);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 728752917785..57e19200f7e4 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -862,8 +862,14 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
>  #define __NR_epoll_pwait2 441
>  __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
>  
> +#define __NR_futex_wait 442
> +__SYSCALL(__NR_futex_wait, sys_futex_wait)
> +
> +#define __NR_futex_wake 443
> +__SYSCALL(__NR_futex_wake, sys_futex_wake)
> +
>  #undef __NR_syscalls
> -#define __NR_syscalls 442
> +#define __NR_syscalls 444
>  
>  /*
>   * 32 bit systems traditionally used different
> diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
> index a89eb0accd5e..9fbdaaf4f254 100644
> --- a/include/uapi/linux/futex.h
> +++ b/include/uapi/linux/futex.h
> @@ -41,6 +41,62 @@
>  #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
>  					 FUTEX_PRIVATE_FLAG)
>  
> +/* Size argument to futex2 syscall */
> +#define FUTEX_8		0
> +#define FUTEX_16	1
> +#define FUTEX_32	2
> +
> +#define FUTEX_SIZE_MASK	0x3
> +
> +#define FUTEX_SHARED_FLAG 8
> +
> +#define FUTEX_NUMA_FLAG 16
> +
> +/**
> + * struct futexXX_numa - struct for NUMA-aware futex operation
> + * @value: futex value
> + * @hint:  node id to operate
> + */
> +
> +struct futex8_numa {
> +	__u8 value;
> +	__s8 hint;
> +};
> +
> +struct futex16_numa {
> +	__u16 value;
> +	__s16 hint;
> +};
> +
> +struct futex32_numa {
> +	__u32 value;
> +	__s32 hint;
> +};

This patchset doesn't use these structures as far as I can see.  Maybe
these should be on a later patchset, when there is actual support for
numa awareness ?

> +
> +#define FUTEX_WAITV_MAX 128
> +
> +/**
> + * struct futex_waitv - A waiter for vectorized wait
> + * @uaddr: User address to wait on
> + * @val:   Expected value at uaddr
> + * @flags: Flags for this waiter
> + */
> +struct futex_waitv {
> +	void *uaddr;
> +	unsigned int val;
> +	unsigned int flags;
> +};

Shouldn't this be in patch 3?
> +
> +/**
> + * struct futex_requeue - Define an address and its flags for requeue operation
> + * @uaddr: User address of one of the requeue arguments
> + * @flags: Flags for this address
> + */
> +struct futex_requeue {
> +	void *uaddr;
> +	unsigned int flags;
> +};

Shouldn't this be in patch 4?

> +/**
> + * struct futexv_head - List of futexes to be waited
> + * @task:    Task to be awaken
> + * @hint:    Was someone on this list awakened?
> + * @objects: List of futexes
> + */
> +struct futexv_head {
> +	struct task_struct *task;
> +	bool hint;
> +	struct futex_waiter objects[0];
> +};

this structure is also used for a single futex.  maybe struct futex_waiter_head?

> +
> +/**
> + * struct futex_bucket - A bucket of futex's hash table
> + * @waiters: Number of waiters in the bucket
> + * @lock:    Bucket lock
> + * @list:    List of waiters on this bucket
> + */
> +struct futex_bucket {
> +	atomic_t waiters;
> +	spinlock_t lock;
> +	struct list_head list;
> +};
> +
> +/**
> + * struct futex_single_waiter - Wrapper for a futexv_head of one element
> + * @futexv: Single futexv element
> + * @waiter: Single waiter element
> + */
> +struct futex_single_waiter {
> +	struct futexv_head futexv;
> +	struct futex_waiter waiter;
> +} __packed;

Is this struct necessary?  can't you just allocate the necessary space,
i.e. a struct futexv_head with 1 futexv_head->object?
> +
> +/* Mask for futex2 flag operations */
> +#define FUTEX2_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG | \
> +		     FUTEX_CLOCK_REALTIME)

SHARED_FLAG should be in patch 2

> +
> +/* Mask for sys_futex_waitv flag */
> +#define FUTEXV_MASK (FUTEX_CLOCK_REALTIME)
> +
> +/* Mask for each futex in futex_waitv list */
> +#define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG)
> +
> +struct futex_bucket *futex_table;
> +unsigned int futex2_hashsize;
> +
> +/*
> + * Reflects a new waiter being added to the waitqueue.
> + */
> +static inline void bucket_inc_waiters(struct futex_bucket *bucket)
> +{
> +#ifdef CONFIG_SMP
> +	atomic_inc(&bucket->waiters);
> +	/*
> +	 * Issue a barrier after adding so futex_wake() will see that the
> +	 * value had increased
> +	 */
> +	smp_mb__after_atomic();
> +#endif
> +}
> +
> +/*
> + * Reflects a waiter being removed from the waitqueue by wakeup
> + * paths.
> + */
> +static inline void bucket_dec_waiters(struct futex_bucket *bucket)
> +{
> +#ifdef CONFIG_SMP
> +	atomic_dec(&bucket->waiters);
> +#endif
> +}
> +
> +/*
> + * Get the number of waiters in a bucket
> + */
> +static inline int bucket_get_waiters(struct futex_bucket *bucket)
> +{
> +#ifdef CONFIG_SMP
> +	/*
> +	 * Issue a barrier before reading so we get an updated value from
> +	 * futex_wait()
> +	 */
> +	smp_mb();
> +	return atomic_read(&bucket->waiters);
> +#else
> +	return 1;
> +#endif
> +}
> +
> +/**
> + * futex_get_bucket - Check if the user address is valid, prepare internal
> + *                    data and calculate the hash
> + * @uaddr:   futex user address
> + * @key:     data that uniquely identifies a futex
> + *
> + * Return: address of bucket on success, error code otherwise
> + */
> +static struct futex_bucket *futex_get_bucket(void __user *uaddr,
> +					     struct futex_key *key)
> +{
> +	uintptr_t address = (uintptr_t)uaddr;
> +	u32 hash_key;
> +
> +	/* Checking if uaddr is valid and accessible */
> +	if (unlikely(!IS_ALIGNED(address, sizeof(u32))))
> +		return ERR_PTR(-EINVAL);
> +	if (unlikely(!access_ok(address, sizeof(u32))))
> +		return ERR_PTR(-EFAULT);

This says the code only supports 32-bit.  So, maybe drop the other
FUTEX_SIZE defines for now

> +
> +	key->offset = address % PAGE_SIZE;
> +	address -= key->offset;
> +	key->pointer = (u64)address;
> +	key->index = (unsigned long)current->mm;

Why split the key in offset and pointer and waste 1/3 more space to
store each key?

> +
> +	/* Generate hash key for this futex using uaddr and current->mm */
> +	hash_key = jhash2((u32 *)key, sizeof(*key) / sizeof(u32), 0);
> +
> +	/* Since HASH_SIZE is 2^n, subtracting 1 makes a perfect bit mask */
> +	return &futex_table[hash_key & (futex2_hashsize - 1)];

If someone inadvertely changes futex2_hashsize to something not 2^n this
will silently break.  futex2_hashsize should be constant and you need
a BUILD_BUG_ON().

> +static int futex_enqueue(struct futexv_head *futexv, unsigned int nr_futexes,
> +			 int *awakened)
> +{
> +	int i, ret;
> +	u32 uval, *uaddr, val;
> +	struct futex_bucket *bucket;
> +
> +retry:
> +	set_current_state(TASK_INTERRUPTIBLE);
> +
> +	for (i = 0; i < nr_futexes; i++) {
> +		uaddr = (u32 * __user)futexv->objects[i].uaddr;
> +		val = (u32)futexv->objects[i].val;
> +
> +		bucket = futexv->objects[i].bucket;
> +
> +		bucket_inc_waiters(bucket);
> +		spin_lock(&bucket->lock);
> +
> +		ret = futex_get_user(&uval, uaddr);
> +
> +		if (unlikely(ret)) {
> +			spin_unlock(&bucket->lock);
> +
> +			bucket_dec_waiters(bucket);
> +			__set_current_state(TASK_RUNNING);
> +			*awakened = futex_dequeue_multiple(futexv, i);
> +
> +			if (__get_user(uval, uaddr))
> +				return -EFAULT;
> +
> +			if (*awakened >= 0)
> +				return 1;

If you are awakened, you don't need to waste time with trying to get the
next key.


> +/**
> + * futex_wait - Setup the timer (if there's one) and wait on a list of futexes
> + * @futexv:     List of futexes
> + * @nr_futexes: Length of futexv
> + * @timo:	Timeout
> + * @flags:	Timeout flags
> + *
> + * Return:
> + * * 0 >= - Hint of which futex woke us
> + * * 0 <  - Error code
> + */
> +static int futex_set_timer_and_wait(struct futexv_head *futexv,
> +				    unsigned int nr_futexes,
> +				    struct __kernel_timespec __user *timo,
> +				    unsigned int flags)
> +{
> +	struct hrtimer_sleeper timeout;
> +	int ret;
> +
> +	if (timo) {
> +		ret = futex_setup_time(timo, &timeout, flags);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = __futex_wait(futexv, nr_futexes, timo ? &timeout : NULL);
> +
> +	if (timo)
> +		hrtimer_cancel(&timeout.timer);
> +
> +	return ret;
> +}

I'm having a hard time understanding why this function exists.  part of
the futex is set up outside of it, part inside.  Not sure if this isn't
just part of sys_futex_wait.

Thanks,

-- 
Gabriel Krisman Bertazi

  reply	other threads:[~2021-02-15 20:00 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-15 15:23 [RFC PATCH 00/13] Add futex2 syscalls André Almeida
2021-02-15 15:23 ` [RFC PATCH 01/13] futex2: Implement wait and wake functions André Almeida
2021-02-15 19:59   ` Gabriel Krisman Bertazi [this message]
2021-02-18 13:29     ` André Almeida
2021-02-18 15:48       ` Gabriel Krisman Bertazi
2021-02-16  9:02   ` Peter Zijlstra
2021-02-18 20:09     ` André Almeida
2021-02-16  9:35   ` Peter Zijlstra
2021-02-16  9:56   ` Peter Zijlstra
2021-02-16 10:20     ` Sebastian Andrzej Siewior
2021-02-16 12:42       ` Peter Zijlstra
2021-02-16 22:12     ` Gabriel Krisman Bertazi
2021-02-15 15:23 ` [RFC PATCH 02/13] futex2: Add support for shared futexes André Almeida
2021-02-15 15:23 ` [RFC PATCH 03/13] futex2: Implement vectorized wait André Almeida
2021-02-15 20:03   ` Gabriel Krisman Bertazi
2021-02-15 20:06     ` Zebediah Figura
2021-02-15 20:08   ` Gabriel Krisman Bertazi
2021-02-15 15:23 ` [RFC PATCH 04/13] futex2: Implement requeue operation André Almeida
2021-02-15 15:23 ` [RFC PATCH 05/13] futex2: Add compatibility entry point for x86_x32 ABI André Almeida
2021-02-15 15:23 ` [RFC PATCH 06/13] docs: locking: futex2: Add documentation André Almeida
2021-02-16 18:34   ` Randy Dunlap
2021-02-18 19:12     ` André Almeida
2021-02-15 15:23 ` [RFC PATCH 07/13] selftests: futex2: Add wake/wait test André Almeida
2021-02-15 15:23 ` [RFC PATCH 08/13] selftests: futex2: Add timeout test André Almeida
2021-02-15 15:24 ` [RFC PATCH 09/13] selftests: futex2: Add wouldblock test André Almeida
2021-02-15 15:24 ` [RFC PATCH 10/13] selftests: futex2: Add waitv test André Almeida
2021-02-15 15:24 ` [RFC PATCH 11/13] selftests: futex2: Add requeue test André Almeida
2021-02-15 15:24 ` [RFC PATCH 12/13] perf bench: Add futex2 benchmark tests André Almeida
2021-02-15 15:24 ` [RFC PATCH 13/13] kernel: Enable waitpid() for futex2 André Almeida

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87k0r9w19l.fsf@collabora.com \
    --to=krisman@collabora.com \
    --cc=acme@kernel.org \
    --cc=andrealmeid@collabora.com \
    --cc=bigeasy@linutronix.de \
    --cc=corbet@lwn.net \
    --cc=dvhart@infradead.org \
    --cc=fweimer@redhat.com \
    --cc=joel@joelfernandes.org \
    --cc=kernel@collabora.com \
    --cc=libc-alpha@sourceware.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=malteskarupke@fastmail.fm \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pgriffais@valvesoftware.com \
    --cc=rostedt@goodmis.org \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=z.figura12@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).