All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH-RFC 1/2] eventfd: reorganize the code to simplify new flags
       [not found] <cover.1248803500.git.mst@redhat.com>
@ 2009-07-28 17:54 ` Michael S. Tsirkin
  2009-07-28 17:55 ` [PATCH-RFC 2/2] eventfd: EFD_STATE flag Michael S. Tsirkin
  1 sibling, 0 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-07-28 17:54 UTC (permalink / raw)
  To: davidel, avi, gleb, kvm, linux-kernel

This slightly reorganizes the code in eventfd, encapsulating counter
math in inline functions, so that it will be easier to add a new flag.
No functional changes.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 fs/eventfd.c |   56 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 31d12de..347a0e0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -34,6 +34,37 @@ struct eventfd_ctx {
 	unsigned int flags;
 };
 
+
+static inline int eventfd_readable(struct eventfd_ctx *ctx)
+{
+	return ctx->count > 0;
+}
+
+static inline int eventfd_writeable(struct eventfd_ctx *ctx, u64 n)
+{
+	return ULLONG_MAX - n > ctx->count;
+}
+
+static inline int eventfd_overflow(struct eventfd_ctx *ctx, u64 cnt)
+{
+	return cnt == ULLONG_MAX;
+}
+
+static inline void eventfd_dowrite(struct eventfd_ctx *ctx, u64 ucnt)
+{
+	if (eventfd_writeable(ctx, ucnt))
+		ucnt = ULLONG_MAX - ctx->count;
+
+	ctx->count += ucnt;
+}
+
+static inline u64 eventfd_doread(struct eventfd_ctx *ctx)
+{
+	u64 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+	ctx->count -= ucnt;
+	return ucnt;
+}
+
 /**
  * eventfd_signal - Adds @n to the eventfd counter.
  * @ctx: [in] Pointer to the eventfd context.
@@ -57,9 +88,7 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n)
 	if (n < 0)
 		return -EINVAL;
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	if (ULLONG_MAX - ctx->count < n)
-		n = (int) (ULLONG_MAX - ctx->count);
-	ctx->count += n;
+	eventfd_dowrite(ctx, n);
 	if (waitqueue_active(&ctx->wqh))
 		wake_up_locked_poll(&ctx->wqh, POLLIN);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
@@ -119,11 +148,11 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 	poll_wait(file, &ctx->wqh, wait);
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	if (ctx->count > 0)
+	if (eventfd_readable(ctx))
 		events |= POLLIN;
-	if (ctx->count == ULLONG_MAX)
+	if (eventfd_overflow(ctx, ctx->count))
 		events |= POLLERR;
-	if (ULLONG_MAX - 1 > ctx->count)
+	if (eventfd_writeable(ctx, 1))
 		events |= POLLOUT;
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 
@@ -142,13 +171,13 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
 	res = -EAGAIN;
-	if (ctx->count > 0)
+	if (eventfd_readable(ctx))
 		res = sizeof(ucnt);
 	else if (!(file->f_flags & O_NONBLOCK)) {
 		__add_wait_queue(&ctx->wqh, &wait);
 		for (res = 0;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (ctx->count > 0) {
+			if (eventfd_readable(ctx)) {
 				res = sizeof(ucnt);
 				break;
 			}
@@ -164,8 +193,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		__set_current_state(TASK_RUNNING);
 	}
 	if (likely(res > 0)) {
-		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-		ctx->count -= ucnt;
+		ucnt = eventfd_doread(ctx);
 		if (waitqueue_active(&ctx->wqh))
 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
 	}
@@ -188,17 +216,17 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
 		return -EINVAL;
 	if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
 		return -EFAULT;
-	if (ucnt == ULLONG_MAX)
+	if (eventfd_overflow(ctx, ucnt))
 		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
 	res = -EAGAIN;
-	if (ULLONG_MAX - ctx->count > ucnt)
+	if (eventfd_writeable(ctx, ucnt))
 		res = sizeof(ucnt);
 	else if (!(file->f_flags & O_NONBLOCK)) {
 		__add_wait_queue(&ctx->wqh, &wait);
 		for (res = 0;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (ULLONG_MAX - ctx->count > ucnt) {
+			if (eventfd_writeable(ctx, ucnt)) {
 				res = sizeof(ucnt);
 				break;
 			}
@@ -214,7 +242,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
 		__set_current_state(TASK_RUNNING);
 	}
 	if (likely(res > 0)) {
-		ctx->count += ucnt;
+		eventfd_dowrite(ctx, ucnt);
 		if (waitqueue_active(&ctx->wqh))
 			wake_up_locked_poll(&ctx->wqh, POLLIN);
 	}
-- 
1.6.2.5


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH-RFC 2/2] eventfd: EFD_STATE flag
       [not found] <cover.1248803500.git.mst@redhat.com>
  2009-07-28 17:54 ` [PATCH-RFC 1/2] eventfd: reorganize the code to simplify new flags Michael S. Tsirkin
@ 2009-07-28 17:55 ` Michael S. Tsirkin
  2009-08-03 15:09   ` Avi Kivity
  1 sibling, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-07-28 17:55 UTC (permalink / raw)
  To: davidel, avi, gleb, kvm, linux-kernel

This implements a new EFD_STATE flag for eventfd.
When set, this flag changes eventfd behaviour in the following way:
- write simply stores the value written, and is always non-blocking
- read unblocks when the value written changes, and
  returns the value written

Motivation: we'd like to use eventfd in qemu to pass interrupts from
(emulated or assigned) devices to guest. For level interrupts, the
counter supported currently by eventfd is not a good match: we really
need to set interrupt to a level, typically 0 or 1, and give the guest
ability to see the last value written.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 fs/eventfd.c            |   41 ++++++++++++++++++++++++++++++++++-------
 include/linux/eventfd.h |    3 ++-
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 347a0e0..7b279e3 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -31,37 +31,59 @@ struct eventfd_ctx {
 	 * issue a wakeup.
 	 */
 	__u64 count;
+	/*
+	 * When EF_STATE flag is set, eventfd behaves differently:
+	 * value written gets stored in "count", read will copy
+	 * "count" to "state".
+	 */
+	__u64 state;
 	unsigned int flags;
 };
 
 
 static inline int eventfd_readable(struct eventfd_ctx *ctx)
 {
-	return ctx->count > 0;
+	if (ctx->flags & EFD_STATE)
+		return ctx->state != ctx->count;
+	else
+		return ctx->count > 0;
 }
 
 static inline int eventfd_writeable(struct eventfd_ctx *ctx, u64 n)
 {
-	return ULLONG_MAX - n > ctx->count;
+	if (ctx->flags & EFD_STATE)
+		return 1;
+	else
+		return ULLONG_MAX - n > ctx->count;
 }
 
 static inline int eventfd_overflow(struct eventfd_ctx *ctx, u64 cnt)
 {
-	return cnt == ULLONG_MAX;
+	if (ctx->flags & EFD_STATE)
+		return 0;
+	else
+		return cnt == ULLONG_MAX;
 }
 
 static inline void eventfd_dowrite(struct eventfd_ctx *ctx, u64 ucnt)
 {
-	if (eventfd_writeable(ctx, ucnt))
-		ucnt = ULLONG_MAX - ctx->count;
+	if (ctx->flags & EFD_STATE)
+		ctx->count = ucnt;
+	else {
+		if (ULLONG_MAX - ctx->count < ucnt)
+			ucnt = ULLONG_MAX - ctx->count;
 
-	ctx->count += ucnt;
+		ctx->count += ucnt;
+	}
 }
 
 static inline u64 eventfd_doread(struct eventfd_ctx *ctx)
 {
 	u64 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-	ctx->count -= ucnt;
+	if (ctx->flags & EFD_STATE)
+		ctx->state = ucnt;
+	else
+		ctx->count -= ucnt;
 	return ucnt;
 }
 
@@ -337,6 +359,10 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 
 	if (flags & ~EFD_FLAGS_SET)
 		return -EINVAL;
+	/* State together with semaphore does not make sense. */
+	if ((flags & EFD_STATE) && (flags & EFD_SEMAPHORE))
+		return -EINVAL;
+
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -344,6 +370,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 
 	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
+	ctx->state = count;
 	ctx->count = count;
 	ctx->flags = flags;
 
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 3b85ba6..78ff649 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -19,11 +19,12 @@
  * shared O_* flags.
  */
 #define EFD_SEMAPHORE (1 << 0)
+#define EFD_STATE (1 << 1)
 #define EFD_CLOEXEC O_CLOEXEC
 #define EFD_NONBLOCK O_NONBLOCK
 
 #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
-#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
+#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE | EFD_STATE)
 
 #ifdef CONFIG_EVENTFD
 
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-07-28 17:55 ` [PATCH-RFC 2/2] eventfd: EFD_STATE flag Michael S. Tsirkin
@ 2009-08-03 15:09   ` Avi Kivity
  2009-08-03 15:14     ` Michael S. Tsirkin
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2009-08-03 15:09 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: davidel, gleb, kvm, linux-kernel

On 07/28/2009 08:55 PM, Michael S. Tsirkin wrote:
> This implements a new EFD_STATE flag for eventfd.
> When set, this flag changes eventfd behaviour in the following way:
> - write simply stores the value written, and is always non-blocking
> - read unblocks when the value written changes, and
>    returns the value written
>
> Motivation: we'd like to use eventfd in qemu to pass interrupts from
> (emulated or assigned) devices to guest. For level interrupts, the
> counter supported currently by eventfd is not a good match: we really
> need to set interrupt to a level, typically 0 or 1, and give the guest
> ability to see the last value written.
>
>
> @@ -31,37 +31,59 @@ struct eventfd_ctx {
>   	 * issue a wakeup.
>   	 */
>   	__u64 count;
> +	/*
> +	 * When EF_STATE flag is set, eventfd behaves differently:
> +	 * value written gets stored in "count", read will copy
> +	 * "count" to "state".
> +	 */
> +	__u64 state;
>   	unsigned int flags;
>   };
>    

Why not write the new value into ->count directly?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-03 15:09   ` Avi Kivity
@ 2009-08-03 15:14     ` Michael S. Tsirkin
  2009-08-03 15:29       ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-08-03 15:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: davidel, gleb, kvm, linux-kernel

On Mon, Aug 03, 2009 at 06:09:38PM +0300, Avi Kivity wrote:
> On 07/28/2009 08:55 PM, Michael S. Tsirkin wrote:
>> This implements a new EFD_STATE flag for eventfd.
>> When set, this flag changes eventfd behaviour in the following way:
>> - write simply stores the value written, and is always non-blocking
>> - read unblocks when the value written changes, and
>>    returns the value written
>>
>> Motivation: we'd like to use eventfd in qemu to pass interrupts from
>> (emulated or assigned) devices to guest. For level interrupts, the
>> counter supported currently by eventfd is not a good match: we really
>> need to set interrupt to a level, typically 0 or 1, and give the guest
>> ability to see the last value written.
>>
>>
>> @@ -31,37 +31,59 @@ struct eventfd_ctx {
>>   	 * issue a wakeup.
>>   	 */
>>   	__u64 count;
>> +	/*
>> +	 * When EF_STATE flag is set, eventfd behaves differently:
>> +	 * value written gets stored in "count", read will copy
>> +	 * "count" to "state".
>> +	 */
>> +	__u64 state;
>>   	unsigned int flags;
>>   };
>>    
>
> Why not write the new value into ->count directly?

That's what it says. state is ther to detect that value was changed
after last read. Makes sense?

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-03 15:14     ` Michael S. Tsirkin
@ 2009-08-03 15:29       ` Avi Kivity
  2009-08-03 16:57         ` Michael S. Tsirkin
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2009-08-03 15:29 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: davidel, gleb, kvm, linux-kernel

On 08/03/2009 06:14 PM, Michael S. Tsirkin wrote:
> On Mon, Aug 03, 2009 at 06:09:38PM +0300, Avi Kivity wrote:
>    
>> On 07/28/2009 08:55 PM, Michael S. Tsirkin wrote:
>>      
>>> This implements a new EFD_STATE flag for eventfd.
>>> When set, this flag changes eventfd behaviour in the following way:
>>> - write simply stores the value written, and is always non-blocking
>>> - read unblocks when the value written changes, and
>>>     returns the value written
>>>
>>> Motivation: we'd like to use eventfd in qemu to pass interrupts from
>>> (emulated or assigned) devices to guest. For level interrupts, the
>>> counter supported currently by eventfd is not a good match: we really
>>> need to set interrupt to a level, typically 0 or 1, and give the guest
>>> ability to see the last value written.
>>>
>>>
>>> @@ -31,37 +31,59 @@ struct eventfd_ctx {
>>>    	 * issue a wakeup.
>>>    	 */
>>>    	__u64 count;
>>> +	/*
>>> +	 * When EF_STATE flag is set, eventfd behaves differently:
>>> +	 * value written gets stored in "count", read will copy
>>> +	 * "count" to "state".
>>> +	 */
>>> +	__u64 state;
>>>    	unsigned int flags;
>>>    };
>>>
>>>        
>> Why not write the new value into ->count directly?
>>      
>
> That's what it says. state is ther to detect that value was changed
> after last read. Makes sense?
>    

Why not do it at the point of the write?

     if (value != ctx->count) {
         ctx->count = value;
         wake_things_up();
     }

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-03 15:29       ` Avi Kivity
@ 2009-08-03 16:57         ` Michael S. Tsirkin
  2009-08-04  8:53           ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-08-03 16:57 UTC (permalink / raw)
  To: Avi Kivity; +Cc: davidel, gleb, kvm, linux-kernel

On Mon, Aug 03, 2009 at 06:29:36PM +0300, Avi Kivity wrote:
> On 08/03/2009 06:14 PM, Michael S. Tsirkin wrote:
>> On Mon, Aug 03, 2009 at 06:09:38PM +0300, Avi Kivity wrote:
>>    
>>> On 07/28/2009 08:55 PM, Michael S. Tsirkin wrote:
>>>      
>>>> This implements a new EFD_STATE flag for eventfd.
>>>> When set, this flag changes eventfd behaviour in the following way:
>>>> - write simply stores the value written, and is always non-blocking
>>>> - read unblocks when the value written changes, and
>>>>     returns the value written
>>>>
>>>> Motivation: we'd like to use eventfd in qemu to pass interrupts from
>>>> (emulated or assigned) devices to guest. For level interrupts, the
>>>> counter supported currently by eventfd is not a good match: we really
>>>> need to set interrupt to a level, typically 0 or 1, and give the guest
>>>> ability to see the last value written.
>>>>
>>>>
>>>> @@ -31,37 +31,59 @@ struct eventfd_ctx {
>>>>    	 * issue a wakeup.
>>>>    	 */
>>>>    	__u64 count;
>>>> +	/*
>>>> +	 * When EF_STATE flag is set, eventfd behaves differently:
>>>> +	 * value written gets stored in "count", read will copy
>>>> +	 * "count" to "state".
>>>> +	 */
>>>> +	__u64 state;
>>>>    	unsigned int flags;
>>>>    };
>>>>
>>>>        
>>> Why not write the new value into ->count directly?
>>>      
>>
>> That's what it says. state is ther to detect that value was changed
>> after last read. Makes sense?
>>    
>
> Why not do it at the point of the write?
>
>     if (value != ctx->count) {
>         ctx->count = value;
>         wake_things_up();
>     }

What if write comes before read?

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-03 16:57         ` Michael S. Tsirkin
@ 2009-08-04  8:53           ` Avi Kivity
  2009-08-04  8:54             ` Michael S. Tsirkin
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2009-08-04  8:53 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: davidel, gleb, kvm, linux-kernel

On 08/03/2009 07:57 PM, Michael S. Tsirkin wrote:
>> Why not do it at the point of the write?
>>
>>      if (value != ctx->count) {
>>          ctx->count = value;
>>          wake_things_up();
>>      }
>>      
>
> What if write comes before read?
>    

The read will get the new value.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  8:53           ` Avi Kivity
@ 2009-08-04  8:54             ` Michael S. Tsirkin
  2009-08-04  9:17               ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-08-04  8:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: davidel, gleb, kvm, linux-kernel

On Tue, Aug 04, 2009 at 11:53:03AM +0300, Avi Kivity wrote:
> On 08/03/2009 07:57 PM, Michael S. Tsirkin wrote:
>>> Why not do it at the point of the write?
>>>
>>>      if (value != ctx->count) {
>>>          ctx->count = value;
>>>          wake_things_up();
>>>      }
>>>      
>>
>> What if write comes before read?
>>    
>
> The read will get the new value.

Yes :) But how does read know it should not block?

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  8:54             ` Michael S. Tsirkin
@ 2009-08-04  9:17               ` Avi Kivity
  2009-08-04  9:17                 ` Gleb Natapov
  2009-08-04  9:33                 ` Michael S. Tsirkin
  0 siblings, 2 replies; 16+ messages in thread
From: Avi Kivity @ 2009-08-04  9:17 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: davidel, gleb, kvm, linux-kernel

On 08/04/2009 11:54 AM, Michael S. Tsirkin wrote:
> On Tue, Aug 04, 2009 at 11:53:03AM +0300, Avi Kivity wrote:
>    
>> On 08/03/2009 07:57 PM, Michael S. Tsirkin wrote:
>>      
>>>> Why not do it at the point of the write?
>>>>
>>>>       if (value != ctx->count) {
>>>>           ctx->count = value;
>>>>           wake_things_up();
>>>>       }
>>>>
>>>>          
>>> What if write comes before read?
>>>
>>>        
>> The read will get the new value.
>>      
>
> Yes :) But how does read know it should not block?
>    

If a different read comes after the write but after our read, it will 
have transferred the value, resulting in the same situation.

I think reads should never block with a state based mechanism.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:17               ` Avi Kivity
@ 2009-08-04  9:17                 ` Gleb Natapov
  2009-08-04  9:25                   ` Avi Kivity
  2009-08-04  9:33                 ` Michael S. Tsirkin
  1 sibling, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2009-08-04  9:17 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Michael S. Tsirkin, davidel, kvm, linux-kernel

On Tue, Aug 04, 2009 at 12:17:44PM +0300, Avi Kivity wrote:
> On 08/04/2009 11:54 AM, Michael S. Tsirkin wrote:
>> On Tue, Aug 04, 2009 at 11:53:03AM +0300, Avi Kivity wrote:
>>    
>>> On 08/03/2009 07:57 PM, Michael S. Tsirkin wrote:
>>>      
>>>>> Why not do it at the point of the write?
>>>>>
>>>>>       if (value != ctx->count) {
>>>>>           ctx->count = value;
>>>>>           wake_things_up();
>>>>>       }
>>>>>
>>>>>          
>>>> What if write comes before read?
>>>>
>>>>        
>>> The read will get the new value.
>>>      
>>
>> Yes :) But how does read know it should not block?
>>    
>
> If a different read comes after the write but after our read, it will  
> have transferred the value, resulting in the same situation.
>
> I think reads should never block with a state based mechanism.
>
Reader may want to poll for the status change.

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:25                   ` Avi Kivity
@ 2009-08-04  9:23                     ` Gleb Natapov
  2009-08-04  9:30                       ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2009-08-04  9:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Michael S. Tsirkin, davidel, kvm, linux-kernel

On Tue, Aug 04, 2009 at 12:25:49PM +0300, Avi Kivity wrote:
> On 08/04/2009 12:17 PM, Gleb Natapov wrote:
>>> If a different read comes after the write but after our read, it will
>>> have transferred the value, resulting in the same situation.
>>>
>>> I think reads should never block with a state based mechanism.
>>>
>>>      
>> Reader may want to poll for the status change.
>>    
>
> Without epoll(), it's inherently racy since reads from other processes  
> can clear the status.
>
This is correct for any file descriptor. Multiple readers shouldn't
simultaneously read from the same files descriptor if they expect to
make any sense from a result.

> The "last read value" needs to be maintained for each reader, which is  
> not possible with read().
>
Only one reader scenario is interesting. This is not some multiplexing
device.

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:17                 ` Gleb Natapov
@ 2009-08-04  9:25                   ` Avi Kivity
  2009-08-04  9:23                     ` Gleb Natapov
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2009-08-04  9:25 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Michael S. Tsirkin, davidel, kvm, linux-kernel

On 08/04/2009 12:17 PM, Gleb Natapov wrote:
>> If a different read comes after the write but after our read, it will
>> have transferred the value, resulting in the same situation.
>>
>> I think reads should never block with a state based mechanism.
>>
>>      
> Reader may want to poll for the status change.
>    

Without epoll(), it's inherently racy since reads from other processes 
can clear the status.

The "last read value" needs to be maintained for each reader, which is 
not possible with read().

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:30                       ` Avi Kivity
@ 2009-08-04  9:26                         ` Michael S. Tsirkin
  2009-08-04 10:06                           ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-08-04  9:26 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, davidel, kvm, linux-kernel

On Tue, Aug 04, 2009 at 12:30:29PM +0300, Avi Kivity wrote:
> On 08/04/2009 12:23 PM, Gleb Natapov wrote:
>> On Tue, Aug 04, 2009 at 12:25:49PM +0300, Avi Kivity wrote:
>>    
>>> On 08/04/2009 12:17 PM, Gleb Natapov wrote:
>>>      
>>>>> If a different read comes after the write but after our read, it will
>>>>> have transferred the value, resulting in the same situation.
>>>>>
>>>>> I think reads should never block with a state based mechanism.
>>>>>
>>>>>
>>>>>          
>>>> Reader may want to poll for the status change.
>>>>
>>>>        
>>> Without epoll(), it's inherently racy since reads from other processes
>>> can clear the status.
>>>
>>>      
>> This is correct for any file descriptor. Multiple readers shouldn't
>> simultaneously read from the same files descriptor if they expect to
>> make any sense from a result.
>>    
>
> I think counting eventfd is an exception, but in general you are right.

How is it an exception? It seems that one reader get the counter,
others will block until the next write.

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:23                     ` Gleb Natapov
@ 2009-08-04  9:30                       ` Avi Kivity
  2009-08-04  9:26                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2009-08-04  9:30 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Michael S. Tsirkin, davidel, kvm, linux-kernel

On 08/04/2009 12:23 PM, Gleb Natapov wrote:
> On Tue, Aug 04, 2009 at 12:25:49PM +0300, Avi Kivity wrote:
>    
>> On 08/04/2009 12:17 PM, Gleb Natapov wrote:
>>      
>>>> If a different read comes after the write but after our read, it will
>>>> have transferred the value, resulting in the same situation.
>>>>
>>>> I think reads should never block with a state based mechanism.
>>>>
>>>>
>>>>          
>>> Reader may want to poll for the status change.
>>>
>>>        
>> Without epoll(), it's inherently racy since reads from other processes
>> can clear the status.
>>
>>      
> This is correct for any file descriptor. Multiple readers shouldn't
> simultaneously read from the same files descriptor if they expect to
> make any sense from a result.
>    

I think counting eventfd is an exception, but in general you are right.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:17               ` Avi Kivity
  2009-08-04  9:17                 ` Gleb Natapov
@ 2009-08-04  9:33                 ` Michael S. Tsirkin
  1 sibling, 0 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2009-08-04  9:33 UTC (permalink / raw)
  To: Avi Kivity; +Cc: davidel, gleb, kvm, linux-kernel

On Tue, Aug 04, 2009 at 12:17:44PM +0300, Avi Kivity wrote:
> On 08/04/2009 11:54 AM, Michael S. Tsirkin wrote:
>> On Tue, Aug 04, 2009 at 11:53:03AM +0300, Avi Kivity wrote:
>>    
>>> On 08/03/2009 07:57 PM, Michael S. Tsirkin wrote:
>>>      
>>>>> Why not do it at the point of the write?
>>>>>
>>>>>       if (value != ctx->count) {
>>>>>           ctx->count = value;
>>>>>           wake_things_up();
>>>>>       }
>>>>>
>>>>>          
>>>> What if write comes before read?
>>>>
>>>>        
>>> The read will get the new value.
>>>      
>>
>> Yes :) But how does read know it should not block?
>>    
>
> If a different read comes after the write but after our read, it will  
> have transferred the value, resulting in the same situation.

Not the same: one reader wakes up, others sleep.

Multiple reads from the same fd behave this way for any file I can think
of. Consider regular eventfd, or a pipe, a socket ...  But if we want to
support blocking reads, we probably should not require the readers to
sync with writers.


> I think reads should never block with a state based mechanism.

Yes, with no support for blocking reads, we don't need the state.
In that case, we probably want to error on open out unless O_NONBLOCK is
specified.  But why is this a good idea?


> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH-RFC 2/2] eventfd: EFD_STATE flag
  2009-08-04  9:26                         ` Michael S. Tsirkin
@ 2009-08-04 10:06                           ` Avi Kivity
  0 siblings, 0 replies; 16+ messages in thread
From: Avi Kivity @ 2009-08-04 10:06 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Gleb Natapov, davidel, kvm, linux-kernel

On 08/04/2009 12:26 PM, Michael S. Tsirkin wrote:
> How is it an exception? It seems that one reader get the counter,
> others will block until the next write.
>    

Right.  I retract my comments....

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2009-08-04 10:01 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1248803500.git.mst@redhat.com>
2009-07-28 17:54 ` [PATCH-RFC 1/2] eventfd: reorganize the code to simplify new flags Michael S. Tsirkin
2009-07-28 17:55 ` [PATCH-RFC 2/2] eventfd: EFD_STATE flag Michael S. Tsirkin
2009-08-03 15:09   ` Avi Kivity
2009-08-03 15:14     ` Michael S. Tsirkin
2009-08-03 15:29       ` Avi Kivity
2009-08-03 16:57         ` Michael S. Tsirkin
2009-08-04  8:53           ` Avi Kivity
2009-08-04  8:54             ` Michael S. Tsirkin
2009-08-04  9:17               ` Avi Kivity
2009-08-04  9:17                 ` Gleb Natapov
2009-08-04  9:25                   ` Avi Kivity
2009-08-04  9:23                     ` Gleb Natapov
2009-08-04  9:30                       ` Avi Kivity
2009-08-04  9:26                         ` Michael S. Tsirkin
2009-08-04 10:06                           ` Avi Kivity
2009-08-04  9:33                 ` Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.