All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gabriel Krisman Bertazi <krisman@collabora.com>
To: "André Almeida" <andrealmeid@collabora.com>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Darren Hart <dvhart@infradead.org>,
	linux-kernel@vger.kernel.org,
	Steven Rostedt <rostedt@goodmis.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	kernel@collabora.com, pgriffais@valvesoftware.com,
	z.figura12@gmail.com, joel@joelfernandes.org,
	malteskarupke@fastmail.fm, linux-api@vger.kernel.org,
	fweimer@redhat.com, libc-alpha@sourceware.org,
	linux-kselftest@vger.kernel.org, shuah@kernel.org,
	acme@kernel.org, corbet@lwn.net
Subject: Re: [RFC PATCH 01/13] futex2: Implement wait and wake functions
Date: Mon, 15 Feb 2021 14:59:18 -0500	[thread overview]
Message-ID: <87k0r9w19l.fsf@collabora.com> (raw)
In-Reply-To: <20210215152404.250281-2-andrealmeid@collabora.com> (=?utf-8?Q?=22Andr=C3=A9?= Almeida"'s message of "Mon, 15 Feb 2021 12:23:52 -0300")

André Almeida <andrealmeid@collabora.com> writes:

> Create a new set of futex syscalls known as futex2. This new interface
> is aimed to implement a more maintainable code, while removing obsolete
> features and expanding it with new functionalities.

Hi André.  Some comments below

> +/* kernel/futex2.c */
> +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned int val,
> +			       unsigned int flags,
> +			       struct __kernel_timespec __user __user *timo);

Duplicated __user attribute

> +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned int nr_wake,
> +			       unsigned int flags);
> +
>  /* kernel/hrtimer.c */
>  asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
>  			      struct __kernel_timespec __user *rmtp);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 728752917785..57e19200f7e4 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -862,8 +862,14 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
>  #define __NR_epoll_pwait2 441
>  __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
>  
> +#define __NR_futex_wait 442
> +__SYSCALL(__NR_futex_wait, sys_futex_wait)
> +
> +#define __NR_futex_wake 443
> +__SYSCALL(__NR_futex_wake, sys_futex_wake)
> +
>  #undef __NR_syscalls
> -#define __NR_syscalls 442
> +#define __NR_syscalls 444
>  
>  /*
>   * 32 bit systems traditionally used different
> diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
> index a89eb0accd5e..9fbdaaf4f254 100644
> --- a/include/uapi/linux/futex.h
> +++ b/include/uapi/linux/futex.h
> @@ -41,6 +41,62 @@
>  #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
>  					 FUTEX_PRIVATE_FLAG)
>  
> +/* Size argument to futex2 syscall */
> +#define FUTEX_8		0
> +#define FUTEX_16	1
> +#define FUTEX_32	2
> +
> +#define FUTEX_SIZE_MASK	0x3
> +
> +#define FUTEX_SHARED_FLAG 8
> +
> +#define FUTEX_NUMA_FLAG 16
> +
> +/**
> + * struct futexXX_numa - struct for NUMA-aware futex operation
> + * @value: futex value
> + * @hint:  node id to operate
> + */
> +
> +struct futex8_numa {
> +	__u8 value;
> +	__s8 hint;
> +};
> +
> +struct futex16_numa {
> +	__u16 value;
> +	__s16 hint;
> +};
> +
> +struct futex32_numa {
> +	__u32 value;
> +	__s32 hint;
> +};

This patchset doesn't use these structures as far as I can see.  Maybe
these should be on a later patchset, when there is actual support for
numa awareness ?

> +
> +#define FUTEX_WAITV_MAX 128
> +
> +/**
> + * struct futex_waitv - A waiter for vectorized wait
> + * @uaddr: User address to wait on
> + * @val:   Expected value at uaddr
> + * @flags: Flags for this waiter
> + */
> +struct futex_waitv {
> +	void *uaddr;
> +	unsigned int val;
> +	unsigned int flags;
> +};

Shouldn't this be in patch 3?
> +
> +/**
> + * struct futex_requeue - Define an address and its flags for requeue operation
> + * @uaddr: User address of one of the requeue arguments
> + * @flags: Flags for this address
> + */
> +struct futex_requeue {
> +	void *uaddr;
> +	unsigned int flags;
> +};

Shouldn't this be in patch 4?

> +/**
> + * struct futexv_head - List of futexes to be waited
> + * @task:    Task to be awaken
> + * @hint:    Was someone on this list awakened?
> + * @objects: List of futexes
> + */
> +struct futexv_head {
> +	struct task_struct *task;
> +	bool hint;
> +	struct futex_waiter objects[0];
> +};

this structure is also used for a single futex.  maybe struct futex_waiter_head?

> +
> +/**
> + * struct futex_bucket - A bucket of futex's hash table
> + * @waiters: Number of waiters in the bucket
> + * @lock:    Bucket lock
> + * @list:    List of waiters on this bucket
> + */
> +struct futex_bucket {
> +	atomic_t waiters;
> +	spinlock_t lock;
> +	struct list_head list;
> +};
> +
> +/**
> + * struct futex_single_waiter - Wrapper for a futexv_head of one element
> + * @futexv: Single futexv element
> + * @waiter: Single waiter element
> + */
> +struct futex_single_waiter {
> +	struct futexv_head futexv;
> +	struct futex_waiter waiter;
> +} __packed;

Is this struct necessary?  can't you just allocate the necessary space,
i.e. a struct futexv_head with 1 futexv_head->object?
> +
> +/* Mask for futex2 flag operations */
> +#define FUTEX2_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG | \
> +		     FUTEX_CLOCK_REALTIME)

SHARED_FLAG should be in patch 2

> +
> +/* Mask for sys_futex_waitv flag */
> +#define FUTEXV_MASK (FUTEX_CLOCK_REALTIME)
> +
> +/* Mask for each futex in futex_waitv list */
> +#define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG)
> +
> +struct futex_bucket *futex_table;
> +unsigned int futex2_hashsize;
> +
> +/*
> + * Reflects a new waiter being added to the waitqueue.
> + */
> +static inline void bucket_inc_waiters(struct futex_bucket *bucket)
> +{
> +#ifdef CONFIG_SMP
> +	atomic_inc(&bucket->waiters);
> +	/*
> +	 * Issue a barrier after adding so futex_wake() will see that the
> +	 * value had increased
> +	 */
> +	smp_mb__after_atomic();
> +#endif
> +}
> +
> +/*
> + * Reflects a waiter being removed from the waitqueue by wakeup
> + * paths.
> + */
> +static inline void bucket_dec_waiters(struct futex_bucket *bucket)
> +{
> +#ifdef CONFIG_SMP
> +	atomic_dec(&bucket->waiters);
> +#endif
> +}
> +
> +/*
> + * Get the number of waiters in a bucket
> + */
> +static inline int bucket_get_waiters(struct futex_bucket *bucket)
> +{
> +#ifdef CONFIG_SMP
> +	/*
> +	 * Issue a barrier before reading so we get an updated value from
> +	 * futex_wait()
> +	 */
> +	smp_mb();
> +	return atomic_read(&bucket->waiters);
> +#else
> +	return 1;
> +#endif
> +}
> +
> +/**
> + * futex_get_bucket - Check if the user address is valid, prepare internal
> + *                    data and calculate the hash
> + * @uaddr:   futex user address
> + * @key:     data that uniquely identifies a futex
> + *
> + * Return: address of bucket on success, error code otherwise
> + */
> +static struct futex_bucket *futex_get_bucket(void __user *uaddr,
> +					     struct futex_key *key)
> +{
> +	uintptr_t address = (uintptr_t)uaddr;
> +	u32 hash_key;
> +
> +	/* Checking if uaddr is valid and accessible */
> +	if (unlikely(!IS_ALIGNED(address, sizeof(u32))))
> +		return ERR_PTR(-EINVAL);
> +	if (unlikely(!access_ok(address, sizeof(u32))))
> +		return ERR_PTR(-EFAULT);

This says the code only supports 32-bit.  So, maybe drop the other
FUTEX_SIZE defines for now

> +
> +	key->offset = address % PAGE_SIZE;
> +	address -= key->offset;
> +	key->pointer = (u64)address;
> +	key->index = (unsigned long)current->mm;

Why split the key in offset and pointer and waste 1/3 more space to
store each key?

> +
> +	/* Generate hash key for this futex using uaddr and current->mm */
> +	hash_key = jhash2((u32 *)key, sizeof(*key) / sizeof(u32), 0);
> +
> +	/* Since HASH_SIZE is 2^n, subtracting 1 makes a perfect bit mask */
> +	return &futex_table[hash_key & (futex2_hashsize - 1)];

If someone inadvertely changes futex2_hashsize to something not 2^n this
will silently break.  futex2_hashsize should be constant and you need
a BUILD_BUG_ON().

> +static int futex_enqueue(struct futexv_head *futexv, unsigned int nr_futexes,
> +			 int *awakened)
> +{
> +	int i, ret;
> +	u32 uval, *uaddr, val;
> +	struct futex_bucket *bucket;
> +
> +retry:
> +	set_current_state(TASK_INTERRUPTIBLE);
> +
> +	for (i = 0; i < nr_futexes; i++) {
> +		uaddr = (u32 * __user)futexv->objects[i].uaddr;
> +		val = (u32)futexv->objects[i].val;
> +
> +		bucket = futexv->objects[i].bucket;
> +
> +		bucket_inc_waiters(bucket);
> +		spin_lock(&bucket->lock);
> +
> +		ret = futex_get_user(&uval, uaddr);
> +
> +		if (unlikely(ret)) {
> +			spin_unlock(&bucket->lock);
> +
> +			bucket_dec_waiters(bucket);
> +			__set_current_state(TASK_RUNNING);
> +			*awakened = futex_dequeue_multiple(futexv, i);
> +
> +			if (__get_user(uval, uaddr))
> +				return -EFAULT;
> +
> +			if (*awakened >= 0)
> +				return 1;

If you are awakened, you don't need to waste time with trying to get the
next key.


> +/**
> + * futex_wait - Setup the timer (if there's one) and wait on a list of futexes
> + * @futexv:     List of futexes
> + * @nr_futexes: Length of futexv
> + * @timo:	Timeout
> + * @flags:	Timeout flags
> + *
> + * Return:
> + * * 0 >= - Hint of which futex woke us
> + * * 0 <  - Error code
> + */
> +static int futex_set_timer_and_wait(struct futexv_head *futexv,
> +				    unsigned int nr_futexes,
> +				    struct __kernel_timespec __user *timo,
> +				    unsigned int flags)
> +{
> +	struct hrtimer_sleeper timeout;
> +	int ret;
> +
> +	if (timo) {
> +		ret = futex_setup_time(timo, &timeout, flags);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = __futex_wait(futexv, nr_futexes, timo ? &timeout : NULL);
> +
> +	if (timo)
> +		hrtimer_cancel(&timeout.timer);
> +
> +	return ret;
> +}

I'm having a hard time understanding why this function exists.  part of
the futex is set up outside of it, part inside.  Not sure if this isn't
just part of sys_futex_wait.

Thanks,

-- 
Gabriel Krisman Bertazi

  reply	other threads:[~2021-02-15 20:00 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-15 15:23 [RFC PATCH 00/13] Add futex2 syscalls André Almeida
2021-02-15 15:23 ` [RFC PATCH 01/13] futex2: Implement wait and wake functions André Almeida
2021-02-15 19:59   ` Gabriel Krisman Bertazi [this message]
2021-02-18 13:29     ` André Almeida
2021-02-18 15:48       ` Gabriel Krisman Bertazi
2021-02-16  9:02   ` Peter Zijlstra
2021-02-18 20:09     ` André Almeida
2021-02-16  9:35   ` Peter Zijlstra
2021-02-16  9:56   ` Peter Zijlstra
2021-02-16 10:20     ` Sebastian Andrzej Siewior
2021-02-16 12:42       ` Peter Zijlstra
2021-02-16 22:12     ` Gabriel Krisman Bertazi
2021-02-15 15:23 ` [RFC PATCH 02/13] futex2: Add support for shared futexes André Almeida
2021-02-16  4:32   ` kernel test robot
2021-02-15 15:23 ` [RFC PATCH 03/13] futex2: Implement vectorized wait André Almeida
2021-02-15 16:30   ` kernel test robot
2021-02-15 17:15   ` kernel test robot
2021-02-15 20:03   ` Gabriel Krisman Bertazi
2021-02-15 20:06     ` Zebediah Figura
2021-02-15 20:08   ` Gabriel Krisman Bertazi
2021-02-15 15:23 ` [RFC PATCH 04/13] futex2: Implement requeue operation André Almeida
2021-02-15 16:31   ` kernel test robot
2021-02-15 17:28   ` kernel test robot
2021-02-15 17:33   ` kernel test robot
2021-02-15 15:23 ` [RFC PATCH 05/13] futex2: Add compatibility entry point for x86_x32 ABI André Almeida
2021-02-16 20:19   ` kernel test robot
2021-02-15 15:23 ` [RFC PATCH 06/13] docs: locking: futex2: Add documentation André Almeida
2021-02-16 18:34   ` Randy Dunlap
2021-02-18 19:12     ` André Almeida
2021-02-15 15:23 ` [RFC PATCH 07/13] selftests: futex2: Add wake/wait test André Almeida
2021-02-15 15:23 ` [RFC PATCH 08/13] selftests: futex2: Add timeout test André Almeida
2021-02-15 15:24 ` [RFC PATCH 09/13] selftests: futex2: Add wouldblock test André Almeida
2021-02-15 15:24 ` [RFC PATCH 10/13] selftests: futex2: Add waitv test André Almeida
2021-02-15 15:24 ` [RFC PATCH 11/13] selftests: futex2: Add requeue test André Almeida
2021-02-15 15:24 ` [RFC PATCH 12/13] perf bench: Add futex2 benchmark tests André Almeida
2021-02-15 15:24 ` [RFC PATCH 13/13] kernel: Enable waitpid() for futex2 André Almeida

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87k0r9w19l.fsf@collabora.com \
    --to=krisman@collabora.com \
    --cc=acme@kernel.org \
    --cc=andrealmeid@collabora.com \
    --cc=bigeasy@linutronix.de \
    --cc=corbet@lwn.net \
    --cc=dvhart@infradead.org \
    --cc=fweimer@redhat.com \
    --cc=joel@joelfernandes.org \
    --cc=kernel@collabora.com \
    --cc=libc-alpha@sourceware.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=malteskarupke@fastmail.fm \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pgriffais@valvesoftware.com \
    --cc=rostedt@goodmis.org \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=z.figura12@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.