linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3] orangefs: posix open permission checking...
@ 2019-11-26 18:50 hubcap
  2019-11-26 19:22 ` Linus Torvalds
  2019-11-28 15:00 ` Mike Marshall
  0 siblings, 2 replies; 4+ messages in thread
From: hubcap @ 2019-11-26 18:50 UTC (permalink / raw)
  To: torvalds; +Cc: Mike Marshall, linux-fsdevel

From: Mike Marshall <hubcap@omnibond.com>

Here's another version that is hopefully closer to
usable...

 Orangefs has no open, and orangefs checks file permissions
 on each file access. Posix requires that file permissions
 be checked on open and nowhere else. Orangefs-through-the-kernel
 needs to seem posix compliant.

 The VFS opens files, even if the filesystem provides no
 method. We can see if a file was successfully opened for
 read and or for write by looking at file->f_mode.

 When writes are flowing from the page cache, file is no
 longer available. We can trust the VFS to have checked
 file->f_mode before writing to the page cache.

 The mode of a file might change between when it is opened
 and IO commences, or it might be created with an arbitrary mode.

 We'll make sure we don't hit EACCES during the IO stage by
 using UID 0. Some of the time we have access without changing
 to UID 0 - how to check?

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/file.c            | 39 +++++++++++++++++++++++++++++++++--
 fs/orangefs/inode.c           |  8 +++----
 fs/orangefs/orangefs-kernel.h |  3 ++-
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index a5612abc0936..c740159d9ad1 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -46,8 +46,9 @@ static int flush_racache(struct inode *inode)
  * Post and wait for the I/O upcall to finish
  */
 ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
-    loff_t *offset, struct iov_iter *iter, size_t total_size,
-    loff_t readahead_size, struct orangefs_write_range *wr, int *index_return)
+	loff_t *offset, struct iov_iter *iter, size_t total_size,
+	loff_t readahead_size, struct orangefs_write_range *wr,
+	int *index_return, struct file *file)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 	struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
@@ -55,6 +56,8 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
 	int buffer_index;
 	ssize_t ret;
 	size_t copy_amount;
+	int open_for_read;
+	int open_for_write;
 
 	new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
 	if (!new_op)
@@ -90,6 +93,38 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
 		new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid);
 		new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid);
 	}
+	/*
+	 * Orangefs has no open, and orangefs checks file permissions
+	 * on each file access. Posix requires that file permissions
+	 * be checked on open and nowhere else. Orangefs-through-the-kernel
+	 * needs to seem posix compliant.
+	 *
+	 * The VFS opens files, even if the filesystem provides no
+	 * method. We can see if a file was successfully opened for
+	 * read and or for write by looking at file->f_mode.
+	 *
+	 * When writes are flowing from the page cache, file is no
+	 * longer available. We can trust the VFS to have checked
+	 * file->f_mode before writing to the page cache.
+	 *
+	 * The mode of a file might change between when it is opened
+	 * and IO commences, or it might be created with an arbitrary mode.
+	 *
+	 * We'll make sure we don't hit EACCES during the IO stage by
+	 * using UID 0. Some of the time we have access without changing
+	 * to UID 0 - how to check?
+	 */
+	if (file) {
+		open_for_write = file->f_mode & FMODE_WRITE;
+		open_for_read = file->f_mode & FMODE_READ;
+	} else {
+		open_for_write = 1;
+		open_for_read = 0; /* not relevant? */
+	}
+	if ((type == ORANGEFS_IO_WRITE) && open_for_write)
+		new_op->upcall.uid = 0;
+	if ((type == ORANGEFS_IO_READ) && open_for_read)
+		new_op->upcall.uid = 0;
 
 	gossip_debug(GOSSIP_FILE_DEBUG,
 		     "%s(%pU): offset: %llu total_size: %zd\n",
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index efb12197da18..961c0fd8675a 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -55,7 +55,7 @@ static int orangefs_writepage_locked(struct page *page,
 	iov_iter_bvec(&iter, WRITE, &bv, 1, wlen);
 
 	ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
-	    len, wr, NULL);
+	    len, wr, NULL, NULL);
 	if (ret < 0) {
 		SetPageError(page);
 		mapping_set_error(page->mapping, ret);
@@ -126,7 +126,7 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
 	wr.uid = ow->uid;
 	wr.gid = ow->gid;
 	ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len,
-	    0, &wr, NULL);
+	    0, &wr, NULL, NULL);
 	if (ret < 0) {
 		for (i = 0; i < ow->npages; i++) {
 			SetPageError(ow->pages[i]);
@@ -311,7 +311,7 @@ static int orangefs_readpage(struct file *file, struct page *page)
 	iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE);
 
 	ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
-	    read_size, inode->i_size, NULL, &buffer_index);
+	    read_size, inode->i_size, NULL, &buffer_index, file);
 	remaining = ret;
 	/* this will only zero remaining unread portions of the page data */
 	iov_iter_zero(~0U, &iter);
@@ -651,7 +651,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
 			     (int)*offset);
 
 		ret = wait_for_direct_io(type, inode, offset, iter,
-				each_count, 0, NULL, NULL);
+				each_count, 0, NULL, NULL, file);
 		gossip_debug(GOSSIP_FILE_DEBUG,
 			     "%s(%pU): return from wait_for_io:%d\n",
 			     __func__,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 34a6c99fa29b..ed67f39fa7ce 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -398,7 +398,8 @@ bool __is_daemon_in_service(void);
  */
 int orangefs_revalidate_mapping(struct inode *);
 ssize_t wait_for_direct_io(enum ORANGEFS_io_type, struct inode *, loff_t *,
-    struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *);
+    struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *,
+    struct file *);
 ssize_t do_readv_writev(enum ORANGEFS_io_type, struct file *, loff_t *,
     struct iov_iter *);
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V3] orangefs: posix open permission checking...
  2019-11-26 18:50 [PATCH V3] orangefs: posix open permission checking hubcap
@ 2019-11-26 19:22 ` Linus Torvalds
       [not found]   ` <CAOg9mSQ5hURb-e-hfg-q5_rgCfBC+wh7Z6mLUS_Tdu6iLquDhg@mail.gmail.com>
  2019-11-28 15:00 ` Mike Marshall
  1 sibling, 1 reply; 4+ messages in thread
From: Linus Torvalds @ 2019-11-26 19:22 UTC (permalink / raw)
  To: hubcap; +Cc: Mike Marshall, linux-fsdevel

On Tue, Nov 26, 2019 at 10:50 AM <hubcap@kernel.org> wrote:
>
> Here's another version that is hopefully closer to
> usable...

This looks like it should work.

I don't know what side effects that "new_op->upcall.uid = 0;" will
have on the server side, and it still looks a bit hacky to me, but at
least it doesn't have the obvious problems on the client side.

Arguably, if you trust the client, you might as well just *always* do
that upcall.uid clearing.

And if you don't trust the client, then you'd have to do some NFS-like
root squash anyway, at which point the uid clearing will actually
remove permissions and break this situation again.

So I do think this shows a deeper issue still, but at least it is an
understandable workaround for a non-posix filesystem.

               Linus

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Fwd: [PATCH V3] orangefs: posix open permission checking...
       [not found]   ` <CAOg9mSQ5hURb-e-hfg-q5_rgCfBC+wh7Z6mLUS_Tdu6iLquDhg@mail.gmail.com>
@ 2019-11-26 22:43     ` Mike Marshall
  0 siblings, 0 replies; 4+ messages in thread
From: Mike Marshall @ 2019-11-26 22:43 UTC (permalink / raw)
  To: linux-fsdevel

---------- Forwarded message ---------
From: Mike Marshall <hubcap@omnibond.com>
Date: Tue, Nov 26, 2019 at 4:02 PM
Subject: Re: [PATCH V3] orangefs: posix open permission checking...
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <hubcap@kernel.org>, linux-fsdevel <linux-fsdevel@vger.kernel.org>



>> I don't know what side effects that "new_op->upcall.uid = 0;" will
>> have on the server side

I believe that the 0 is only used for whatever filesystem operations
(usually one) are required to get this service_operation done.
I think it will only kick in during discrete IO operations
that we've already posixly promised to complete.

I still am looking for a kernel access()-like thing so
I can only use UID 0 when needed - I see your argument about
always doing it...

The orangefs people are interested in this, so they'll be looking
at this last iteration. Thanks for helping me to understand the
right way to look at it from the kernel's perspective.

-Mike

On Tue, Nov 26, 2019 at 2:23 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> On Tue, Nov 26, 2019 at 10:50 AM <hubcap@kernel.org> wrote:
> >
> > Here's another version that is hopefully closer to
> > usable...
>
> This looks like it should work.
>
> I don't know what side effects that "new_op->upcall.uid = 0;" will
> have on the server side, and it still looks a bit hacky to me, but at
> least it doesn't have the obvious problems on the client side.
>
> Arguably, if you trust the client, you might as well just *always* do
> that upcall.uid clearing.
>
> And if you don't trust the client, then you'd have to do some NFS-like
> root squash anyway, at which point the uid clearing will actually
> remove permissions and break this situation again.
>
> So I do think this shows a deeper issue still, but at least it is an
> understandable workaround for a non-posix filesystem.
>
>                Linus

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH V3] orangefs: posix open permission checking...
  2019-11-26 18:50 [PATCH V3] orangefs: posix open permission checking hubcap
  2019-11-26 19:22 ` Linus Torvalds
@ 2019-11-28 15:00 ` Mike Marshall
  1 sibling, 0 replies; 4+ messages in thread
From: Mike Marshall @ 2019-11-28 15:00 UTC (permalink / raw)
  To: hubcap; +Cc: Linus Torvalds, linux-fsdevel

Happy Thanksgiving everyone...

I wish I only used UID 0 when necessary. I changed my
code like this:

-       if ((type == ORANGEFS_IO_WRITE) && open_for_write)
-               new_op->upcall.uid = 0;
-       if ((type == ORANGEFS_IO_READ) && open_for_read)
-               new_op->upcall.uid = 0;
+       if ((type == ORANGEFS_IO_WRITE) && open_for_write) {
+               if (inode_permission(inode, MAY_WRITE))
+                       new_op->upcall.uid = 0;
+       }
+       if ((type == ORANGEFS_IO_READ) && open_for_read) {
+               if (inode_permission(inode, MAY_READ))
+                       new_op->upcall.uid = 0;
+       }

At first this seems to work, then, whatever process is using my
filesystem wedges up... nothing in dmesg, nothing in the orangefs
userspace logs... vim on a /pvfsmnt/file does it every time, open
a new line and the whole terminal's locked up. If you strace the vim pid,
that locks up :-) ... I've watched ftrace output while this is happening,
inode_permission sure does call orangefs_permission a whole bunch of times.

Anywho... I think inode_permission will return 0 when I can read
(or write) and non-zero when I can't. I'll be trying to figure out
what is happening, if anyone here sees right off how I might be
mis-using inode_permission, please let me know...

-Mike

On Tue, Nov 26, 2019 at 1:50 PM <hubcap@kernel.org> wrote:
>
> From: Mike Marshall <hubcap@omnibond.com>
>
> Here's another version that is hopefully closer to
> usable...
>
>  Orangefs has no open, and orangefs checks file permissions
>  on each file access. Posix requires that file permissions
>  be checked on open and nowhere else. Orangefs-through-the-kernel
>  needs to seem posix compliant.
>
>  The VFS opens files, even if the filesystem provides no
>  method. We can see if a file was successfully opened for
>  read and or for write by looking at file->f_mode.
>
>  When writes are flowing from the page cache, file is no
>  longer available. We can trust the VFS to have checked
>  file->f_mode before writing to the page cache.
>
>  The mode of a file might change between when it is opened
>  and IO commences, or it might be created with an arbitrary mode.
>
>  We'll make sure we don't hit EACCES during the IO stage by
>  using UID 0. Some of the time we have access without changing
>  to UID 0 - how to check?
>
> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
> ---
>  fs/orangefs/file.c            | 39 +++++++++++++++++++++++++++++++++--
>  fs/orangefs/inode.c           |  8 +++----
>  fs/orangefs/orangefs-kernel.h |  3 ++-
>  3 files changed, 43 insertions(+), 7 deletions(-)
>
> diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
> index a5612abc0936..c740159d9ad1 100644
> --- a/fs/orangefs/file.c
> +++ b/fs/orangefs/file.c
> @@ -46,8 +46,9 @@ static int flush_racache(struct inode *inode)
>   * Post and wait for the I/O upcall to finish
>   */
>  ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
> -    loff_t *offset, struct iov_iter *iter, size_t total_size,
> -    loff_t readahead_size, struct orangefs_write_range *wr, int *index_return)
> +       loff_t *offset, struct iov_iter *iter, size_t total_size,
> +       loff_t readahead_size, struct orangefs_write_range *wr,
> +       int *index_return, struct file *file)
>  {
>         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
>         struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
> @@ -55,6 +56,8 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
>         int buffer_index;
>         ssize_t ret;
>         size_t copy_amount;
> +       int open_for_read;
> +       int open_for_write;
>
>         new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
>         if (!new_op)
> @@ -90,6 +93,38 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
>                 new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid);
>                 new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid);
>         }
> +       /*
> +        * Orangefs has no open, and orangefs checks file permissions
> +        * on each file access. Posix requires that file permissions
> +        * be checked on open and nowhere else. Orangefs-through-the-kernel
> +        * needs to seem posix compliant.
> +        *
> +        * The VFS opens files, even if the filesystem provides no
> +        * method. We can see if a file was successfully opened for
> +        * read and or for write by looking at file->f_mode.
> +        *
> +        * When writes are flowing from the page cache, file is no
> +        * longer available. We can trust the VFS to have checked
> +        * file->f_mode before writing to the page cache.
> +        *
> +        * The mode of a file might change between when it is opened
> +        * and IO commences, or it might be created with an arbitrary mode.
> +        *
> +        * We'll make sure we don't hit EACCES during the IO stage by
> +        * using UID 0. Some of the time we have access without changing
> +        * to UID 0 - how to check?
> +        */
> +       if (file) {
> +               open_for_write = file->f_mode & FMODE_WRITE;
> +               open_for_read = file->f_mode & FMODE_READ;
> +       } else {
> +               open_for_write = 1;
> +               open_for_read = 0; /* not relevant? */
> +       }
> +       if ((type == ORANGEFS_IO_WRITE) && open_for_write)
> +               new_op->upcall.uid = 0;
> +       if ((type == ORANGEFS_IO_READ) && open_for_read)
> +               new_op->upcall.uid = 0;
>
>         gossip_debug(GOSSIP_FILE_DEBUG,
>                      "%s(%pU): offset: %llu total_size: %zd\n",
> diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
> index efb12197da18..961c0fd8675a 100644
> --- a/fs/orangefs/inode.c
> +++ b/fs/orangefs/inode.c
> @@ -55,7 +55,7 @@ static int orangefs_writepage_locked(struct page *page,
>         iov_iter_bvec(&iter, WRITE, &bv, 1, wlen);
>
>         ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
> -           len, wr, NULL);
> +           len, wr, NULL, NULL);
>         if (ret < 0) {
>                 SetPageError(page);
>                 mapping_set_error(page->mapping, ret);
> @@ -126,7 +126,7 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
>         wr.uid = ow->uid;
>         wr.gid = ow->gid;
>         ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len,
> -           0, &wr, NULL);
> +           0, &wr, NULL, NULL);
>         if (ret < 0) {
>                 for (i = 0; i < ow->npages; i++) {
>                         SetPageError(ow->pages[i]);
> @@ -311,7 +311,7 @@ static int orangefs_readpage(struct file *file, struct page *page)
>         iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE);
>
>         ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
> -           read_size, inode->i_size, NULL, &buffer_index);
> +           read_size, inode->i_size, NULL, &buffer_index, file);
>         remaining = ret;
>         /* this will only zero remaining unread portions of the page data */
>         iov_iter_zero(~0U, &iter);
> @@ -651,7 +651,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
>                              (int)*offset);
>
>                 ret = wait_for_direct_io(type, inode, offset, iter,
> -                               each_count, 0, NULL, NULL);
> +                               each_count, 0, NULL, NULL, file);
>                 gossip_debug(GOSSIP_FILE_DEBUG,
>                              "%s(%pU): return from wait_for_io:%d\n",
>                              __func__,
> diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
> index 34a6c99fa29b..ed67f39fa7ce 100644
> --- a/fs/orangefs/orangefs-kernel.h
> +++ b/fs/orangefs/orangefs-kernel.h
> @@ -398,7 +398,8 @@ bool __is_daemon_in_service(void);
>   */
>  int orangefs_revalidate_mapping(struct inode *);
>  ssize_t wait_for_direct_io(enum ORANGEFS_io_type, struct inode *, loff_t *,
> -    struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *);
> +    struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *,
> +    struct file *);
>  ssize_t do_readv_writev(enum ORANGEFS_io_type, struct file *, loff_t *,
>      struct iov_iter *);
>
> --
> 2.20.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-11-28 15:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-26 18:50 [PATCH V3] orangefs: posix open permission checking hubcap
2019-11-26 19:22 ` Linus Torvalds
     [not found]   ` <CAOg9mSQ5hURb-e-hfg-q5_rgCfBC+wh7Z6mLUS_Tdu6iLquDhg@mail.gmail.com>
2019-11-26 22:43     ` Fwd: " Mike Marshall
2019-11-28 15:00 ` Mike Marshall

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).