All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Baron <jbaron@akamai.com>
To: Christoph Hellwig <hch@lst.de>, Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Al Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/2] epoll: use the waitqueue lock to protect ep->wq
Date: Thu, 7 Dec 2017 11:09:11 -0500	[thread overview]
Message-ID: <d3e0d0c9-b9e2-c85d-5c67-30f6f80de42b@akamai.com> (raw)
In-Reply-To: <20171206235230.19425-2-hch@lst.de>

On 12/06/2017 06:52 PM, Christoph Hellwig wrote:
> The eoll code currently always uses the unlocked waitqueue helpers for
> ep->wq, but instead of holding the lock inside the waitqueue around these
> calls, as expected by the epoll code uses its own lock.  Given that the
> waitqueue is not exposed to the rest of the kernel this actually works
> ok at the moment, but prevents the epoll locking rules from being
> enforced using lockdep.  Remove ep->lock and use the waitqueue lock
> to not only reduce the size of struct eventpoll but also make sure we
> can assert locking invariations in the waitqueue code.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Probably should also fix the locking comments at the top of
fs/eventpoll.c that refer to ep->lock...

The rest looks good.

Reviewed-by: Jason Baron <jbaron@akamai.com>

Thanks,

-Jason


> ---
>  fs/eventpoll.c | 46 ++++++++++++++++++++++------------------------
>  1 file changed, 22 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index afd548ebc328..2b2c5ac80e26 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -182,11 +182,10 @@ struct epitem {
>   * This structure is stored inside the "private_data" member of the file
>   * structure and represents the main data structure for the eventpoll
>   * interface.
> + *
> + * Access to it is protected by the lock inside wq.
>   */
>  struct eventpoll {
> -	/* Protect the access to this structure */
> -	spinlock_t lock;
> -
>  	/*
>  	 * This mutex is used to ensure that files are not removed
>  	 * while epoll is using them. This is held during the event
> @@ -210,7 +209,7 @@ struct eventpoll {
>  	/*
>  	 * This is a single linked list that chains all the "struct epitem" that
>  	 * happened while transferring ready events to userspace w/out
> -	 * holding ->lock.
> +	 * holding ->wq.lock.
>  	 */
>  	struct epitem *ovflist;
>  
> @@ -686,17 +685,17 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  	 * because we want the "sproc" callback to be able to do it
>  	 * in a lockless way.
>  	 */
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	list_splice_init(&ep->rdllist, &txlist);
>  	ep->ovflist = NULL;
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	/*
>  	 * Now call the callback function.
>  	 */
>  	error = (*sproc)(ep, &txlist, priv);
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	/*
>  	 * During the time we spent inside the "sproc" callback, some
>  	 * other events might have been queued by the poll callback.
> @@ -738,7 +737,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  		if (waitqueue_active(&ep->poll_wait))
>  			pwake++;
>  	}
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	if (!ep_locked)
>  		mutex_unlock(&ep->mtx);
> @@ -782,10 +781,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
>  
>  	rb_erase_cached(&epi->rbn, &ep->rbr);
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	if (ep_is_linked(&epi->rdllink))
>  		list_del_init(&epi->rdllink);
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	wakeup_source_unregister(ep_wakeup_source(epi));
>  	/*
> @@ -1015,7 +1014,6 @@ static int ep_alloc(struct eventpoll **pep)
>  	if (unlikely(!ep))
>  		goto free_uid;
>  
> -	spin_lock_init(&ep->lock);
>  	mutex_init(&ep->mtx);
>  	init_waitqueue_head(&ep->wq);
>  	init_waitqueue_head(&ep->poll_wait);
> @@ -1119,7 +1117,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
>  	struct eventpoll *ep = epi->ep;
>  	int ewake = 0;
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  
>  	ep_set_busy_poll_napi_id(epi);
>  
> @@ -1196,7 +1194,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
>  		pwake++;
>  
>  out_unlock:
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	/* We have to call this outside the lock */
>  	if (pwake)
> @@ -1480,7 +1478,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  		goto error_remove_epi;
>  
>  	/* We have to drop the new item inside our item list to keep track of it */
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  
>  	/* record NAPI ID of new item if present */
>  	ep_set_busy_poll_napi_id(epi);
> @@ -1497,7 +1495,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  			pwake++;
>  	}
>  
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	atomic_long_inc(&ep->user->epoll_watches);
>  
> @@ -1523,10 +1521,10 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  	 * list, since that is used/cleaned only inside a section bound by "mtx".
>  	 * And ep_insert() is called with "mtx" held.
>  	 */
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	if (ep_is_linked(&epi->rdllink))
>  		list_del_init(&epi->rdllink);
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	wakeup_source_unregister(ep_wakeup_source(epi));
>  
> @@ -1593,7 +1591,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  	 * list, push it inside.
>  	 */
>  	if (revents & event->events) {
> -		spin_lock_irq(&ep->lock);
> +		spin_lock_irq(&ep->wq.lock);
>  		if (!ep_is_linked(&epi->rdllink)) {
>  			list_add_tail(&epi->rdllink, &ep->rdllist);
>  			ep_pm_stay_awake(epi);
> @@ -1604,7 +1602,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  			if (waitqueue_active(&ep->poll_wait))
>  				pwake++;
>  		}
> -		spin_unlock_irq(&ep->lock);
> +		spin_unlock_irq(&ep->wq.lock);
>  	}
>  
>  	/* We have to call this outside the lock */
> @@ -1754,7 +1752,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  		 * caller specified a non blocking operation.
>  		 */
>  		timed_out = 1;
> -		spin_lock_irqsave(&ep->lock, flags);
> +		spin_lock_irqsave(&ep->wq.lock, flags);
>  		goto check_events;
>  	}
>  
> @@ -1763,7 +1761,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  	if (!ep_events_available(ep))
>  		ep_busy_loop(ep, timed_out);
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  
>  	if (!ep_events_available(ep)) {
>  		/*
> @@ -1805,11 +1803,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  				break;
>  			}
>  
> -			spin_unlock_irqrestore(&ep->lock, flags);
> +			spin_unlock_irqrestore(&ep->wq.lock, flags);
>  			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
>  				timed_out = 1;
>  
> -			spin_lock_irqsave(&ep->lock, flags);
> +			spin_lock_irqsave(&ep->wq.lock, flags);
>  		}
>  
>  		__remove_wait_queue(&ep->wq, &wait);
> @@ -1819,7 +1817,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  	/* Is it worth to try to dig for events ? */
>  	eavail = ep_events_available(ep);
>  
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	/*
>  	 * Try to transfer events to user space. In case we get 0 events and
> 

  parent reply	other threads:[~2017-12-07 16:09 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-06 23:52 waitqueue lockdep annotation Christoph Hellwig
2017-12-06 23:52 ` [PATCH 1/2] epoll: use the waitqueue lock to protect ep->wq Christoph Hellwig
2017-12-07  0:49   ` Ingo Molnar
2017-12-07  2:38     ` Andreas Dilger
2017-12-07  6:12       ` Ingo Molnar
2017-12-14 13:06     ` Christoph Hellwig
2017-12-07 16:09   ` Jason Baron [this message]
2017-12-14 13:05     ` Christoph Hellwig
2017-12-06 23:52 ` [PATCH 2/2] sched/wait: assert the wait_queue_head lock is held in __wake_up_common Christoph Hellwig
2017-12-07  0:50   ` Ingo Molnar
2017-12-14 13:08     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d3e0d0c9-b9e2-c85d-5c67-30f6f80de42b@akamai.com \
    --to=jbaron@akamai.com \
    --cc=akpm@linux-foundation.org \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.