linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* O_DIRECT handling introduced a race in F_SETFL
@ 2002-07-02 18:30 Matthew Wilcox
  2002-07-05 14:19 ` Andrea Arcangeli
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Wilcox @ 2002-07-02 18:30 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: linux-kernel


I was doing some random kernel janitoring, pushing the BKL down a
little bit, when I noticed O_DIRECT introduced a race.  If alloc_kiovec
sleeps, we will effectively be unprotected by the BKL.  This would
allow two processes sharing an fd both changing the FASYNC flag to
call f_op->fasync() the wrong number of times and potentially leave
the application erroneously believing that fasync is off when it is on,
or vice versa.

My patch below fixes this problem by allocating the O_DIRECT kiovec
before taking the BKL.  Any comments?  Also, should we be freeing the
kiovec when removing the O_DIRECT attribute from the filp?

diff -urNX dontdiff linux-2.5.24/fs/fcntl.c linux-2.5.24-mm/fs/fcntl.c
--- linux-2.5.24/fs/fcntl.c	Sun Jun  9 06:09:49 2002
+++ linux-2.5.24-mm/fs/fcntl.c	Tue Jul  2 10:55:29 2002
@@ -235,24 +235,11 @@
 	if (!(arg & O_APPEND) && IS_APPEND(inode))
 		return -EPERM;
 
-	/* Did FASYNC state change? */
-	if ((arg ^ filp->f_flags) & FASYNC) {
-		if (filp->f_op && filp->f_op->fasync) {
-			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
-			if (error < 0)
-				return error;
-		}
-	}
-
 	if (arg & O_DIRECT) {
 		/*
-		 * alloc_kiovec() can sleep and we are only serialized by
-		 * the big kernel lock here, so abuse the i_sem to serialize
-		 * this case too. We of course wouldn't need to go deep down
-		 * to the inode layer, we could stay at the file layer, but
-		 * we don't want to pay for the memory of a semaphore in each
-		 * file structure too and we use the inode semaphore that we just
-		 * pay for anyways.
+		 * alloc_kiovec() can sleep, so abuse the i_sem to serialize
+		 * this case too.  Note we have to do this before we take the
+		 * BKL otherwise we have a race if it sleeps.
 		 */
 		error = 0;
 		down(&inode->i_sem);
@@ -263,13 +250,26 @@
 			return error;
 	}
 
+	lock_kernel();
+	/* Did FASYNC state change? */
+	if ((arg ^ filp->f_flags) & FASYNC) {
+		if (filp->f_op && filp->f_op->fasync) {
+			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
+			if (error < 0)
+				goto out;
+		}
+	}
+
 	/* required for strict SunOS emulation */
 	if (O_NONBLOCK != O_NDELAY)
 	       if (arg & O_NDELAY)
 		   arg |= O_NONBLOCK;
 
 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
-	return 0;
+	error = 0;
+ out:
+	unlock_kernel();
+	return error;
 }
 
 static long do_fcntl(unsigned int fd, unsigned int cmd,
@@ -295,9 +295,7 @@
 			err = filp->f_flags;
 			break;
 		case F_SETFL:
-			lock_kernel();
 			err = setfl(fd, filp, arg);
-			unlock_kernel();
 			break;
 		case F_GETLK:
 			err = fcntl_getlk(filp, (struct flock *) arg);

-- 
Revolutions do not require corporate support.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: O_DIRECT handling introduced a race in F_SETFL
  2002-07-02 18:30 O_DIRECT handling introduced a race in F_SETFL Matthew Wilcox
@ 2002-07-05 14:19 ` Andrea Arcangeli
  2002-07-10 11:29   ` Andrea Arcangeli
  0 siblings, 1 reply; 4+ messages in thread
From: Andrea Arcangeli @ 2002-07-05 14:19 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Andrea Arcangeli, linux-kernel

On Tue, Jul 02, 2002 at 07:30:19PM +0100, Matthew Wilcox wrote:
> 
> I was doing some random kernel janitoring, pushing the BKL down a
> little bit, when I noticed O_DIRECT introduced a race.  If alloc_kiovec
> sleeps, we will effectively be unprotected by the BKL.  This would
> allow two processes sharing an fd both changing the FASYNC flag to
> call f_op->fasync() the wrong number of times and potentially leave
> the application erroneously believing that fasync is off when it is on,
> or vice versa.
> 
> My patch below fixes this problem by allocating the O_DIRECT kiovec
> before taking the BKL.  Any comments?  Also, should we be freeing the
> kiovec when removing the O_DIRECT attribute from the filp?
> 
> diff -urNX dontdiff linux-2.5.24/fs/fcntl.c linux-2.5.24-mm/fs/fcntl.c
> --- linux-2.5.24/fs/fcntl.c	Sun Jun  9 06:09:49 2002
> +++ linux-2.5.24-mm/fs/fcntl.c	Tue Jul  2 10:55:29 2002
> @@ -235,24 +235,11 @@
>  	if (!(arg & O_APPEND) && IS_APPEND(inode))
>  		return -EPERM;
>  
> -	/* Did FASYNC state change? */
> -	if ((arg ^ filp->f_flags) & FASYNC) {
> -		if (filp->f_op && filp->f_op->fasync) {
> -			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
> -			if (error < 0)
> -				return error;
> -		}
> -	}
> -
>  	if (arg & O_DIRECT) {
>  		/*
> -		 * alloc_kiovec() can sleep and we are only serialized by
> -		 * the big kernel lock here, so abuse the i_sem to serialize
> -		 * this case too. We of course wouldn't need to go deep down
> -		 * to the inode layer, we could stay at the file layer, but
> -		 * we don't want to pay for the memory of a semaphore in each
> -		 * file structure too and we use the inode semaphore that we just
> -		 * pay for anyways.
> +		 * alloc_kiovec() can sleep, so abuse the i_sem to serialize
> +		 * this case too.  Note we have to do this before we take the
> +		 * BKL otherwise we have a race if it sleeps.
>  		 */
>  		error = 0;
>  		down(&inode->i_sem);
> @@ -263,13 +250,26 @@
>  			return error;
>  	}
>  
> +	lock_kernel();
> +	/* Did FASYNC state change? */
> +	if ((arg ^ filp->f_flags) & FASYNC) {
> +		if (filp->f_op && filp->f_op->fasync) {
> +			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
> +			if (error < 0)
> +				goto out;
> +		}
> +	}
> +
>  	/* required for strict SunOS emulation */
>  	if (O_NONBLOCK != O_NDELAY)
>  	       if (arg & O_NDELAY)
>  		   arg |= O_NONBLOCK;
>  
>  	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
> -	return 0;
> +	error = 0;
> + out:
> +	unlock_kernel();
> +	return error;
>  }
>  
>  static long do_fcntl(unsigned int fd, unsigned int cmd,
> @@ -295,9 +295,7 @@
>  			err = filp->f_flags;
>  			break;
>  		case F_SETFL:
> -			lock_kernel();
>  			err = setfl(fd, filp, arg);
> -			unlock_kernel();
>  			break;
>  		case F_GETLK:
>  			err = fcntl_getlk(filp, (struct flock *) arg);

that's correct. Also moving the if (arg & O_DIRECT) {} right after
setting filp->f_flags would fix the race, that wouldn't require the move
the locking to the setfl, but I guess moving the locking down was your
object since the first place :). Please post it for mainline inclusion
too.

Andrea

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: O_DIRECT handling introduced a race in F_SETFL
  2002-07-05 14:19 ` Andrea Arcangeli
@ 2002-07-10 11:29   ` Andrea Arcangeli
  2002-07-11 21:24     ` Marcus Alanen
  0 siblings, 1 reply; 4+ messages in thread
From: Andrea Arcangeli @ 2002-07-10 11:29 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Andrea Arcangeli, linux-kernel, Marcelo Tosatti

On Fri, Jul 05, 2002 at 04:19:34PM +0200, Andrea Arcangeli wrote:
> On Tue, Jul 02, 2002 at 07:30:19PM +0100, Matthew Wilcox wrote:
> > 
> > I was doing some random kernel janitoring, pushing the BKL down a
> > little bit, when I noticed O_DIRECT introduced a race.  If alloc_kiovec
> > sleeps, we will effectively be unprotected by the BKL.  This would
> > allow two processes sharing an fd both changing the FASYNC flag to
> > call f_op->fasync() the wrong number of times and potentially leave
> > the application erroneously believing that fasync is off when it is on,
> > or vice versa.
> > 
> > My patch below fixes this problem by allocating the O_DIRECT kiovec
> > before taking the BKL.  Any comments?  Also, should we be freeing the
> > kiovec when removing the O_DIRECT attribute from the filp?
> > 
> > diff -urNX dontdiff linux-2.5.24/fs/fcntl.c linux-2.5.24-mm/fs/fcntl.c
> > --- linux-2.5.24/fs/fcntl.c	Sun Jun  9 06:09:49 2002
> > +++ linux-2.5.24-mm/fs/fcntl.c	Tue Jul  2 10:55:29 2002
> > @@ -235,24 +235,11 @@
> >  	if (!(arg & O_APPEND) && IS_APPEND(inode))
> >  		return -EPERM;
> >  
> > -	/* Did FASYNC state change? */
> > -	if ((arg ^ filp->f_flags) & FASYNC) {
> > -		if (filp->f_op && filp->f_op->fasync) {
> > -			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
> > -			if (error < 0)
> > -				return error;
> > -		}
> > -	}
> > -
> >  	if (arg & O_DIRECT) {
> >  		/*
> > -		 * alloc_kiovec() can sleep and we are only serialized by
> > -		 * the big kernel lock here, so abuse the i_sem to serialize
> > -		 * this case too. We of course wouldn't need to go deep down
> > -		 * to the inode layer, we could stay at the file layer, but
> > -		 * we don't want to pay for the memory of a semaphore in each
> > -		 * file structure too and we use the inode semaphore that we just
> > -		 * pay for anyways.
> > +		 * alloc_kiovec() can sleep, so abuse the i_sem to serialize
> > +		 * this case too.  Note we have to do this before we take the
> > +		 * BKL otherwise we have a race if it sleeps.
> >  		 */
> >  		error = 0;
> >  		down(&inode->i_sem);
> > @@ -263,13 +250,26 @@
> >  			return error;
> >  	}
> >  
> > +	lock_kernel();
> > +	/* Did FASYNC state change? */
> > +	if ((arg ^ filp->f_flags) & FASYNC) {
> > +		if (filp->f_op && filp->f_op->fasync) {
> > +			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
> > +			if (error < 0)
> > +				goto out;
> > +		}
> > +	}
> > +
> >  	/* required for strict SunOS emulation */
> >  	if (O_NONBLOCK != O_NDELAY)
> >  	       if (arg & O_NDELAY)
> >  		   arg |= O_NONBLOCK;
> >  
> >  	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
> > -	return 0;
> > +	error = 0;
> > + out:
> > +	unlock_kernel();
> > +	return error;
> >  }
> >  
> >  static long do_fcntl(unsigned int fd, unsigned int cmd,
> > @@ -295,9 +295,7 @@
> >  			err = filp->f_flags;
> >  			break;
> >  		case F_SETFL:
> > -			lock_kernel();
> >  			err = setfl(fd, filp, arg);
> > -			unlock_kernel();
> >  			break;
> >  		case F_GETLK:
> >  			err = fcntl_getlk(filp, (struct flock *) arg);
> 
> that's correct. Also moving the if (arg & O_DIRECT) {} right after
> setting filp->f_flags would fix the race, that wouldn't require the move
> the locking to the setfl, but I guess moving the locking down was your
> object since the first place :). Please post it for mainline inclusion
> too.
> 


this one from -aa fixes fasync too. Please review, thanks.

--- odirect/fs/fcntl.c.~1~	Fri Jul  5 12:20:47 2002
+++ odirect/fs/fcntl.c	Sat Jul  6 18:53:56 2002
@@ -213,32 +213,29 @@ static int setfl(int fd, struct file * f
 	if (!(arg & O_APPEND) && IS_APPEND(inode))
 		return -EPERM;
 
+	/*
+	 * alloc_kiovec() and ->fasync can sleep, so abuse the i_sem
+	 * to serialize against parallel setfl on the same filp,
+	 * to avoid races with ->f_flags and ->f_iobuf.
+	 */
+	down(&inode->i_sem);
 	/* Did FASYNC state change? */
 	if ((arg ^ filp->f_flags) & FASYNC) {
 		if (filp->f_op && filp->f_op->fasync) {
+			lock_kernel();
 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
+			unlock_kernel();
 			if (error < 0)
-				return error;
+				goto out;
 		}
 	}
 
 	if (arg & O_DIRECT) {
-		/*
-		 * alloc_kiovec() can sleep and we are only serialized by
-		 * the big kernel lock here, so abuse the i_sem to serialize
-		 * this case too. We of course wouldn't need to go deep down
-		 * to the inode layer, we could stay at the file layer, but
-		 * we don't want to pay for the memory of a semaphore in each
-		 * file structure too and we use the inode semaphore that we just
-		 * pay for anyways.
-		 */
-		error = 0;
-		down(&inode->i_sem);
-		if (!filp->f_iobuf)
+		if (!filp->f_iobuf) {
 			error = alloc_kiovec(1, &filp->f_iobuf);
-		up(&inode->i_sem);
-		if (error < 0)
-			return error;
+			if (error < 0)
+				goto out;
+		}
 	}
 
 	/* required for strict SunOS emulation */
@@ -247,7 +244,10 @@ static int setfl(int fd, struct file * f
 		   arg |= O_NONBLOCK;
 
 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
-	return 0;
+	error = 0;
+ out:
+	up(&inode->i_sem);
+	return error;
 }
 
 static long do_fcntl(unsigned int fd, unsigned int cmd,
@@ -273,9 +273,7 @@ static long do_fcntl(unsigned int fd, un
 			err = filp->f_flags;
 			break;
 		case F_SETFL:
-			lock_kernel();
 			err = setfl(fd, filp, arg);
-			unlock_kernel();
 			break;
 		case F_GETLK:
 			err = fcntl_getlk(fd, (struct flock *) arg);

Andrea

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: O_DIRECT handling introduced a race in F_SETFL
  2002-07-10 11:29   ` Andrea Arcangeli
@ 2002-07-11 21:24     ` Marcus Alanen
  0 siblings, 0 replies; 4+ messages in thread
From: Marcus Alanen @ 2002-07-11 21:24 UTC (permalink / raw)
  To: andrea, Matthew Wilcox; +Cc: Andrea Arcangeli, linux-kernel, Marcelo Tosatti

>this one from -aa fixes fasync too. Please review, thanks.
>
>--- odirect/fs/fcntl.c.~1~	Fri Jul  5 12:20:47 2002
>+++ odirect/fs/fcntl.c	Sat Jul  6 18:53:56 2002
>@@ -213,32 +213,29 @@ static int setfl(int fd, struct file * f
> 	if (!(arg & O_APPEND) && IS_APPEND(inode))
> 		return -EPERM;
> 
>+	/*
>+	 * alloc_kiovec() and ->fasync can sleep, so abuse the i_sem
>+	 * to serialize against parallel setfl on the same filp,
>+	 * to avoid races with ->f_flags and ->f_iobuf.
>+	 */
>+	down(&inode->i_sem);
    ^^^^ 
> 	/* Did FASYNC state change? */
> 	if ((arg ^ filp->f_flags) & FASYNC) {
> 		if (filp->f_op && filp->f_op->fasync) {
>+			lock_kernel();
> 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) !=
>  0);
>+			unlock_kernel();
...

You introduce locking that hasn't been there previously.


To keep things in sync, ioctl.c needs to take i_sem also. In kernel 2.5,
pipe.c needs to move acquisition of i_sem to the surrounding functions.

Patch below. (Trivial documentation update added)

Could somebody please verify this?


Marcus


===== Documentation/filesystems/Locking 1.32 vs edited =====
--- 1.32/Documentation/filesystems/Locking	Wed Jul  3 03:22:36 2002
+++ edited/Documentation/filesystems/Locking	Thu Jul 11 23:03:52 2002
@@ -343,6 +343,8 @@
 
 ->fsync() has i_sem on inode.
 
+->fasync() has i_sem on inode.
+
 --------------------------- dquot_operations -------------------------------
 prototypes:
 	void (*initialize) (struct inode *, short);
===== fs/ioctl.c 1.4 vs edited =====
--- 1.4/fs/ioctl.c	Sat May 18 20:50:00 2002
+++ edited/fs/ioctl.c	Thu Jul 11 23:49:25 2002
@@ -82,10 +82,13 @@
 				filp->f_flags &= ~flag;
 			break;
 
-		case FIOASYNC:
+		case FIOASYNC: {
+			struct inode * inode;
 			if ((error = get_user(on, (int *)arg)) != 0)
 				break;
 			flag = on ? FASYNC : 0;
+			inode = filp->f_dentry->d_inode;
+			down(&inode->i_sem);
 
 			/* Did FASYNC state change ? */
 			if ((flag ^ filp->f_flags) & FASYNC) {
@@ -94,13 +97,16 @@
 				else error = -ENOTTY;
 			}
 			if (error != 0)
-				break;
+				goto fioasync_out;
 
 			if (on)
 				filp->f_flags |= FASYNC;
 			else
 				filp->f_flags &= ~FASYNC;
+		fioasync_out:
+			up(&inode->i_sem);
 			break;
+		}
 
 		case FIOQSIZE:
 			if (S_ISDIR(filp->f_dentry->d_inode->i_mode) ||
===== fs/pipe.c 1.14 vs edited =====
--- 1.14/fs/pipe.c	Wed Jun 12 03:23:20 2002
+++ edited/fs/pipe.c	Thu Jul 11 23:23:58 2002
@@ -329,9 +329,7 @@
 	struct inode *inode = filp->f_dentry->d_inode;
 	int retval;
 
-	down(PIPE_SEM(*inode));
 	retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
-	up(PIPE_SEM(*inode));
 
 	if (retval < 0)
 		return retval;
@@ -346,9 +344,7 @@
 	struct inode *inode = filp->f_dentry->d_inode;
 	int retval;
 
-	down(PIPE_SEM(*inode));
 	retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
-	up(PIPE_SEM(*inode));
 
 	if (retval < 0)
 		return retval;
@@ -363,15 +359,11 @@
 	struct inode *inode = filp->f_dentry->d_inode;
 	int retval;
 
-	down(PIPE_SEM(*inode));
-
 	retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
 
 	if (retval >= 0)
 		retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
 
-	up(PIPE_SEM(*inode));
-
 	if (retval < 0)
 		return retval;
 
@@ -382,14 +374,18 @@
 static int
 pipe_read_release(struct inode *inode, struct file *filp)
 {
+	down(PIPE_SEM(*inode));
 	pipe_read_fasync(-1, filp, 0);
+	up(PIPE_SEM(*inode));
 	return pipe_release(inode, 1, 0);
 }
 
 static int
 pipe_write_release(struct inode *inode, struct file *filp)
 {
+	down(PIPE_SEM(*inode));
 	pipe_write_fasync(-1, filp, 0);
+	up(PIPE_SEM(*inode));
 	return pipe_release(inode, 0, 1);
 }
 
@@ -398,7 +394,9 @@
 {
 	int decr, decw;
 
+	down(PIPE_SEM(*inode));
 	pipe_rdwr_fasync(-1, filp, 0);
+	up(PIPE_SEM(*inode));
 	decr = (filp->f_mode & FMODE_READ) != 0;
 	decw = (filp->f_mode & FMODE_WRITE) != 0;
 	return pipe_release(inode, decr, decw);


-- 
Marcus Alanen
maalanen@abo.fi

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2002-07-11 21:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-07-02 18:30 O_DIRECT handling introduced a race in F_SETFL Matthew Wilcox
2002-07-05 14:19 ` Andrea Arcangeli
2002-07-10 11:29   ` Andrea Arcangeli
2002-07-11 21:24     ` Marcus Alanen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).