All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Fast status update interface (/selinux/status)
@ 2010-08-26 10:43 KaiGai Kohei
  2010-08-26 10:53 ` KaiGai Kohei
  2010-08-26 23:50 ` KaiGai Kohei
  0 siblings, 2 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-08-26 10:43 UTC (permalink / raw)
  To: selinux; +Cc: ewalsh

The attached patch allows to nofify userspace applications status
updating of SELinux using mmap interface.

When a userspace application manages userspace avc, it has to handle
a few events (policyload and setenforce) which shall cause invalidation
of userspace avc.
Now we have an interface to receive event messages using netlink socket,
but it requires a background worker thread (or process) or checking
the status every time when we call userspace avc routine.
Like kernel space, avc routine is called much frequently, so the later
option is not reasonable from viewpoint of performance.
In addition, now I'm facing a matter, even if we try to run background
worker process, because PostgreSQL is not designed that plugins can
launch worker process. (Now SE-PgSQL is re-designed as a plugin that
uses security hooks in PostgreSQL, like LSM.)
So, I need quite fast interface to check status of selinux with
neither background worker process nor system call invocations.

The new /selinux/status allows application to read-only mmap.
It exposes the following structure, then application can reference
these status information with nearly zero cost.

  struct selinux_kernel_status
  {
        u32     length;
        u32     sequence;
        u32     enforcing;
        u32     policyload;
  };

The 'sequence' shall be incremented by 2 for each events.
So, application can know it should invalidate userspace avc,
if the 'sequence' was changed from the last reference of avc.

The 'enforcing' and 'policyload' are protected from concurrent
updates using seqlock logic. So, if 'sequence' is odd number,
application needs to wait for a moment.

Thanks,

Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
--
 security/selinux/include/security.h |   16 +++++++
 security/selinux/selinuxfs.c        |   31 +++++++++++++
 security/selinux/ss/Makefile        |    2 +-
 security/selinux/ss/services.c      |    4 ++
 security/selinux/ss/status.c        |   85 +++++++++++++++++++++++++++++++++++
 5 files changed, 137 insertions(+), 1 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 1f7c249..75366e5 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -191,5 +191,21 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,

 const char *security_get_initial_sid_context(u32 sid);

+/*
+ * status notifier using mmap interface
+ */
+extern struct page *selinux_status_page;
+
+struct selinux_kernel_status
+{
+	u32	length;
+	u32	sequence;
+	u32	enforcing;
+	u32	policyload;
+};
+
+extern void selinux_status_update_setenforce(int enforcing);
+extern void selinux_status_update_policyload(int seqno);
+
 #endif /* _SELINUX_SECURITY_H_ */

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 79a1bb6..e434558 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -110,6 +110,7 @@ enum sel_inos {
 	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
 	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
 	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
+	SEL_STATUS,	/* export current status using mmap() */
 	SEL_INO_NEXT,	/* The next inode number to use */
 };

@@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 		if (selinux_enforcing)
 			avc_ss_reset(0);
 		selnl_notify_setenforce(selinux_enforcing);
+		selinux_status_update_setenforce(selinux_enforcing);
 	}
 	length = count;
 out:
@@ -200,11 +202,39 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf,
 	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
 }

+extern struct page *selinux_status_page;
+
 static const struct file_operations sel_handle_unknown_ops = {
 	.read		= sel_read_handle_unknown,
 	.llseek		= generic_file_llseek,
 };

+static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct selinux_kernel_status  *status = page_address(selinux_status_page);
+
+	return simple_read_from_buffer(buf, count, ppos, status, sizeof(*status));
+}
+
+static int sel_mmap_handle_status(struct file *file,
+				  struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+		return -EINVAL;
+	if (vma->vm_pgoff > 0)
+		return -ERANGE;
+	return remap_pfn_range(vma, vma->vm_start,
+			       page_to_pfn(selinux_status_page),
+			       PAGE_SIZE, PAGE_READONLY);
+}
+
+static const struct file_operations sel_handle_status_ops = {
+	.read		= sel_read_handle_status,
+	.mmap		= sel_mmap_handle_status,
+	.llseek		= generic_file_llseek,
+};
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 static ssize_t sel_write_disable(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -1612,6 +1642,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
 		[SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
 		[SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO},
 		[SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO},
+		[SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO},
 		/* last one */ {""}
 	};
 	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
index 15d4e62..974e11c 100644
--- a/security/selinux/ss/Makefile
+++ b/security/selinux/ss/Makefile
@@ -5,5 +5,5 @@
 EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
 obj-y := ss.o

-ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
+ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 9ea2fec..640ec23 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1791,6 +1791,8 @@ int security_load_policy(void *data, size_t len)
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
+		selinux_status_update_setenforce(selinux_enforcing);
 		selinux_netlbl_cache_invalidate();
 		selinux_xfrm_notify_policyload();
 		return 0;
@@ -1870,6 +1872,7 @@ int security_load_policy(void *data, size_t len)

 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_status_update_policyload(seqno);
 	selinux_netlbl_cache_invalidate();
 	selinux_xfrm_notify_policyload();

@@ -2374,6 +2377,7 @@ out:
 	if (!rc) {
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_xfrm_notify_policyload();
 	}
 	return rc;
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
new file mode 100644
index 0000000..4f07ebf
--- /dev/null
+++ b/security/selinux/ss/status.c
@@ -0,0 +1,85 @@
+/*
+ * mmap based event notifications for SELinux
+ *
+ * Author: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ * Copyright (C) 2010 NEC corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include "services.h"
+
+/*
+ * The selinux_status_page shall be exposed to userspace applications
+ * using mmap interface on /selinux/status.
+ * It enables to notify applications a few events that will cause reset
+ * of userspace access vector without context switching.
+ *
+ * The selinux_kernel_status structure on the head of status page is
+ * protected from concurrent accesses using seqlock logic, so userspace
+ * application should reference the status page according to the seqlock
+ * logic. (Hopefully, libselinux encapsulates it.)
+ */
+struct page	       *selinux_status_page;
+static spinlock_t	selinux_status_lock;
+
+#define LOCK_STATUS_PAGE(status)			\
+	do {						\
+		spin_lock(&selinux_status_lock);	\
+		(status)->sequence++;			\
+		smp_wmb();				\
+	} while(0)
+
+#define UNLOCK_STATUS_PAGE(status)			\
+	do {						\
+		smp_wmb();				\
+		(status)->sequence++;			\
+		spin_unlock(&selinux_status_lock);	\
+	} while(0)
+
+void selinux_status_update_setenforce(int enforcing)
+{
+	struct selinux_kernel_status   *status
+		= page_address(selinux_status_page);
+
+	LOCK_STATUS_PAGE(status);
+
+	status->enforcing = enforcing;
+
+	UNLOCK_STATUS_PAGE(status);
+}
+
+void selinux_status_update_policyload(int seqno)
+{
+	struct selinux_kernel_status   *status
+		= page_address(selinux_status_page);
+
+	LOCK_STATUS_PAGE(status);
+
+	status->policyload = seqno;
+
+	UNLOCK_STATUS_PAGE(status);
+}
+
+static int __init selinux_status_init(void)
+{
+	struct selinux_kernel_status *status;
+
+	spin_lock_init(&selinux_status_lock);
+
+	selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+	if (!selinux_status_page)
+		return -ENOMEM;
+
+	status = page_address(selinux_status_page);
+	status->length = sizeof(*status);
+
+	return 0;
+}
+__initcall(selinux_status_init);


-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-26 10:43 [PATCH] Fast status update interface (/selinux/status) KaiGai Kohei
@ 2010-08-26 10:53 ` KaiGai Kohei
  2010-08-26 23:50 ` KaiGai Kohei
  1 sibling, 0 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-08-26 10:53 UTC (permalink / raw)
  To: selinux; +Cc: KaiGai Kohei, ewalsh

As an aside, I'm considering about a corner case also.

When a userspace application asks selinux its access control decision,
it needs to handle the following three function calls in atomic.

 - string_to_security_class()
 - string_to_av_perm()
 - security_compute_av()

The 'atomic' means nobody reloads security policy during these
function calls, but here is no guarantee, although it is quite
rare case.

If we port the seqlock logic into userspace applications, we
can solve the matter in natural. In this case, all we need to
do is comparison of seqno after security_compute_av().
If it was different from the seqno on the head, we simply reset
userspace avc and retry a series of steps.

Thanks,

(2010/08/26 19:43), KaiGai Kohei wrote:
> The attached patch allows to nofify userspace applications status
> updating of SELinux using mmap interface.
> 
> When a userspace application manages userspace avc, it has to handle
> a few events (policyload and setenforce) which shall cause invalidation
> of userspace avc.
> Now we have an interface to receive event messages using netlink socket,
> but it requires a background worker thread (or process) or checking
> the status every time when we call userspace avc routine.
> Like kernel space, avc routine is called much frequently, so the later
> option is not reasonable from viewpoint of performance.
> In addition, now I'm facing a matter, even if we try to run background
> worker process, because PostgreSQL is not designed that plugins can
> launch worker process. (Now SE-PgSQL is re-designed as a plugin that
> uses security hooks in PostgreSQL, like LSM.)
> So, I need quite fast interface to check status of selinux with
> neither background worker process nor system call invocations.
> 
> The new /selinux/status allows application to read-only mmap.
> It exposes the following structure, then application can reference
> these status information with nearly zero cost.
> 
>    struct selinux_kernel_status
>    {
>          u32     length;
>          u32     sequence;
>          u32     enforcing;
>          u32     policyload;
>    };
> 
> The 'sequence' shall be incremented by 2 for each events.
> So, application can know it should invalidate userspace avc,
> if the 'sequence' was changed from the last reference of avc.
> 
> The 'enforcing' and 'policyload' are protected from concurrent
> updates using seqlock logic. So, if 'sequence' is odd number,
> application needs to wait for a moment.
> 
> Thanks,
> 
> Signed-off-by: KaiGai Kohei<kaigai@ak.jp.nec.com>
> --
>   security/selinux/include/security.h |   16 +++++++
>   security/selinux/selinuxfs.c        |   31 +++++++++++++
>   security/selinux/ss/Makefile        |    2 +-
>   security/selinux/ss/services.c      |    4 ++
>   security/selinux/ss/status.c        |   85 +++++++++++++++++++++++++++++++++++
>   5 files changed, 137 insertions(+), 1 deletions(-)
> 
> diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
> index 1f7c249..75366e5 100644
> --- a/security/selinux/include/security.h
> +++ b/security/selinux/include/security.h
> @@ -191,5 +191,21 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,
> 
>   const char *security_get_initial_sid_context(u32 sid);
> 
> +/*
> + * status notifier using mmap interface
> + */
> +extern struct page *selinux_status_page;
> +
> +struct selinux_kernel_status
> +{
> +	u32	length;
> +	u32	sequence;
> +	u32	enforcing;
> +	u32	policyload;
> +};
> +
> +extern void selinux_status_update_setenforce(int enforcing);
> +extern void selinux_status_update_policyload(int seqno);
> +
>   #endif /* _SELINUX_SECURITY_H_ */
> 
> diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
> index 79a1bb6..e434558 100644
> --- a/security/selinux/selinuxfs.c
> +++ b/security/selinux/selinuxfs.c
> @@ -110,6 +110,7 @@ enum sel_inos {
>   	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
>   	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
>   	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
> +	SEL_STATUS,	/* export current status using mmap() */
>   	SEL_INO_NEXT,	/* The next inode number to use */
>   };
> 
> @@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
>   		if (selinux_enforcing)
>   			avc_ss_reset(0);
>   		selnl_notify_setenforce(selinux_enforcing);
> +		selinux_status_update_setenforce(selinux_enforcing);
>   	}
>   	length = count;
>   out:
> @@ -200,11 +202,39 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf,
>   	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
>   }
> 
> +extern struct page *selinux_status_page;
> +
>   static const struct file_operations sel_handle_unknown_ops = {
>   	.read		= sel_read_handle_unknown,
>   	.llseek		= generic_file_llseek,
>   };
> 
> +static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
> +				      size_t count, loff_t *ppos)
> +{
> +	struct selinux_kernel_status  *status = page_address(selinux_status_page);
> +
> +	return simple_read_from_buffer(buf, count, ppos, status, sizeof(*status));
> +}
> +
> +static int sel_mmap_handle_status(struct file *file,
> +				  struct vm_area_struct *vma)
> +{
> +	if ((vma->vm_flags&  VM_SHARED)&&  (vma->vm_flags&  VM_MAYWRITE))
> +		return -EINVAL;
> +	if (vma->vm_pgoff>  0)
> +		return -ERANGE;
> +	return remap_pfn_range(vma, vma->vm_start,
> +			       page_to_pfn(selinux_status_page),
> +			       PAGE_SIZE, PAGE_READONLY);
> +}
> +
> +static const struct file_operations sel_handle_status_ops = {
> +	.read		= sel_read_handle_status,
> +	.mmap		= sel_mmap_handle_status,
> +	.llseek		= generic_file_llseek,
> +};
> +
>   #ifdef CONFIG_SECURITY_SELINUX_DISABLE
>   static ssize_t sel_write_disable(struct file *file, const char __user *buf,
>   				 size_t count, loff_t *ppos)
> @@ -1612,6 +1642,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
>   		[SEL_CHECKREQPROT] = {"checkreqprot",&sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
>   		[SEL_REJECT_UNKNOWN] = {"reject_unknown",&sel_handle_unknown_ops, S_IRUGO},
>   		[SEL_DENY_UNKNOWN] = {"deny_unknown",&sel_handle_unknown_ops, S_IRUGO},
> +		[SEL_STATUS] = {"status",&sel_handle_status_ops, S_IRUGO},
>   		/* last one */ {""}
>   	};
>   	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
> diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
> index 15d4e62..974e11c 100644
> --- a/security/selinux/ss/Makefile
> +++ b/security/selinux/ss/Makefile
> @@ -5,5 +5,5 @@
>   EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
>   obj-y := ss.o
> 
> -ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
> +ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o
> 
> diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
> index 9ea2fec..640ec23 100644
> --- a/security/selinux/ss/services.c
> +++ b/security/selinux/ss/services.c
> @@ -1791,6 +1791,8 @@ int security_load_policy(void *data, size_t len)
>   		selinux_complete_init();
>   		avc_ss_reset(seqno);
>   		selnl_notify_policyload(seqno);
> +		selinux_status_update_policyload(seqno);
> +		selinux_status_update_setenforce(selinux_enforcing);
>   		selinux_netlbl_cache_invalidate();
>   		selinux_xfrm_notify_policyload();
>   		return 0;
> @@ -1870,6 +1872,7 @@ int security_load_policy(void *data, size_t len)
> 
>   	avc_ss_reset(seqno);
>   	selnl_notify_policyload(seqno);
> +	selinux_status_update_policyload(seqno);
>   	selinux_netlbl_cache_invalidate();
>   	selinux_xfrm_notify_policyload();
> 
> @@ -2374,6 +2377,7 @@ out:
>   	if (!rc) {
>   		avc_ss_reset(seqno);
>   		selnl_notify_policyload(seqno);
> +		selinux_status_update_policyload(seqno);
>   		selinux_xfrm_notify_policyload();
>   	}
>   	return rc;
> diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
> new file mode 100644
> index 0000000..4f07ebf
> --- /dev/null
> +++ b/security/selinux/ss/status.c
> @@ -0,0 +1,85 @@
> +/*
> + * mmap based event notifications for SELinux
> + *
> + * Author: KaiGai Kohei<kaigai@ak.jp.nec.com>
> + *
> + * Copyright (C) 2010 NEC corporation
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2,
> + * as published by the Free Software Foundation.
> + */
> +#include<linux/kernel.h>
> +#include<linux/gfp.h>
> +#include<linux/mm.h>
> +#include<linux/spinlock.h>
> +#include "services.h"
> +
> +/*
> + * The selinux_status_page shall be exposed to userspace applications
> + * using mmap interface on /selinux/status.
> + * It enables to notify applications a few events that will cause reset
> + * of userspace access vector without context switching.
> + *
> + * The selinux_kernel_status structure on the head of status page is
> + * protected from concurrent accesses using seqlock logic, so userspace
> + * application should reference the status page according to the seqlock
> + * logic. (Hopefully, libselinux encapsulates it.)
> + */
> +struct page	       *selinux_status_page;
> +static spinlock_t	selinux_status_lock;
> +
> +#define LOCK_STATUS_PAGE(status)			\
> +	do {						\
> +		spin_lock(&selinux_status_lock);	\
> +		(status)->sequence++;			\
> +		smp_wmb();				\
> +	} while(0)
> +
> +#define UNLOCK_STATUS_PAGE(status)			\
> +	do {						\
> +		smp_wmb();				\
> +		(status)->sequence++;			\
> +		spin_unlock(&selinux_status_lock);	\
> +	} while(0)
> +
> +void selinux_status_update_setenforce(int enforcing)
> +{
> +	struct selinux_kernel_status   *status
> +		= page_address(selinux_status_page);
> +
> +	LOCK_STATUS_PAGE(status);
> +
> +	status->enforcing = enforcing;
> +
> +	UNLOCK_STATUS_PAGE(status);
> +}
> +
> +void selinux_status_update_policyload(int seqno)
> +{
> +	struct selinux_kernel_status   *status
> +		= page_address(selinux_status_page);
> +
> +	LOCK_STATUS_PAGE(status);
> +
> +	status->policyload = seqno;
> +
> +	UNLOCK_STATUS_PAGE(status);
> +}
> +
> +static int __init selinux_status_init(void)
> +{
> +	struct selinux_kernel_status *status;
> +
> +	spin_lock_init(&selinux_status_lock);
> +
> +	selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +	if (!selinux_status_page)
> +		return -ENOMEM;
> +
> +	status = page_address(selinux_status_page);
> +	status->length = sizeof(*status);
> +
> +	return 0;
> +}
> +__initcall(selinux_status_init);
> 
> 


-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-26 10:43 [PATCH] Fast status update interface (/selinux/status) KaiGai Kohei
  2010-08-26 10:53 ` KaiGai Kohei
@ 2010-08-26 23:50 ` KaiGai Kohei
  2010-08-27  8:37   ` KaiGai Kohei
  1 sibling, 1 reply; 18+ messages in thread
From: KaiGai Kohei @ 2010-08-26 23:50 UTC (permalink / raw)
  To: selinux; +Cc: ewalsh

> The new /selinux/status allows application to read-only mmap.
> It exposes the following structure, then application can reference
> these status information with nearly zero cost.
> 
>    struct selinux_kernel_status
>    {
>          u32     length;
>          u32     sequence;
>          u32     enforcing;
>          u32     policyload;
>    };
> 
> The 'sequence' shall be incremented by 2 for each events.
> So, application can know it should invalidate userspace avc,
> if the 'sequence' was changed from the last reference of avc.
> 
> The 'enforcing' and 'policyload' are protected from concurrent
> updates using seqlock logic. So, if 'sequence' is odd number,
> application needs to wait for a moment.
> 

The 'deny_unknown' might be also exposed.
When expected object class and access vectors are not supported
in the current working policy, userspace object manager needs to
know whether the undefined permissions should be denied, or not.

Thanks,
-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-26 23:50 ` KaiGai Kohei
@ 2010-08-27  8:37   ` KaiGai Kohei
  2010-08-27 15:48     ` Eric Paris
  0 siblings, 1 reply; 18+ messages in thread
From: KaiGai Kohei @ 2010-08-27  8:37 UTC (permalink / raw)
  To: selinux; +Cc: ewalsh

I revised the /selinux/status implementation.

* It becomes to report 'deny_unknown'. Userspace object manager
  also reference this flag to decide its behavior when the loaded
  policy does not support expected object classes.
* It provided PAGE_READONLY to remap_pfn_range() as page protection
  flag independent from argument of mmap(2), but it was uncommon.
  I fixed to pass vma->vm_page_prot instead of the hardwired flag
  according to any other implementation style.
  Now it returns an error, if user tries to map /selinux/status as
  writable pages.

Rest of parts are not changed.
--------------
This patch provides a new /selinux/status entry which allows
applications read-only mmap(2).
This region reflects selinux_kernel_status structure in kernel space.
  struct selinux_kernel_status
  {
          u32     length;         /* length of this structure */
          u32     sequence;       /* sequence number of seqlock logic */
          u32     enforcing;      /* current setting of enforcing mode */
          u32     policyload;     /* times of policy reloaded */
          u32     deny_unknown;   /* current setting of deny_unknown */
  };
When userspace object manager caches access control decisions provided
by SELinux, it needs to invalidate the cache on policy reload and
setenforce to keep consistency.
However, the applications need to check the kernel state for each
accesses on userspace avc, or launch a background worker process.
They give us either expensive system-call invocations or annoying
background process management.
If we could map /selinux/status to process memory space, application
can know updates of selinux status; policy reload or setenforce.

A typical application checks selinux_kernel_status::sequence when
it tries to reference userspace avc.
If it was changed from the last time when it checked userspace avc,
it means something was updated in the kernel space. The application
can reset userspace avc or update enforcing mode, without any system
call invocations.
In addition, the application also checks the sequence number to
ensure no events being happen during permission checks.
If is was increment, the application will reset userspace avc and
retry a series of steps from the head.

At least, if the application is RDBMS (PostgreSQL) or KVS (memcached),
it needs to handle massive number of requests from clients, so it is
significant to reduce number of kernel invocations. :-)

Thanks,

 Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
--
 security/selinux/include/security.h |   17 +++++++
 security/selinux/selinuxfs.c        |   38 +++++++++++++++
 security/selinux/ss/Makefile        |    2 +-
 security/selinux/ss/services.c      |    4 ++
 security/selinux/ss/status.c        |   86 +++++++++++++++++++++++++++++++++++
 5 files changed, 146 insertions(+), 1 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 1f7c249..f51f11f 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -191,5 +191,22 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,

 const char *security_get_initial_sid_context(u32 sid);

+/*
+ * status notifier using mmap interface
+ */
+extern struct page *selinux_status_page;
+
+struct selinux_kernel_status
+{
+	u32	length;		/* length of this structure */
+	u32	sequence;	/* sequence number of seqlock logic */
+	u32	enforcing;	/* current setting of enforcing mode */
+	u32	policyload;	/* times of policy reloaded */
+	u32	deny_unknown;	/* current setting of deny_unknown */
+};
+
+extern void selinux_status_update_setenforce(int enforcing);
+extern void selinux_status_update_policyload(int seqno);
+
 #endif /* _SELINUX_SECURITY_H_ */

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 79a1bb6..ad57c6b 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -110,6 +110,7 @@ enum sel_inos {
 	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
 	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
 	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
+	SEL_STATUS,	/* export current status using mmap() */
 	SEL_INO_NEXT,	/* The next inode number to use */
 };

@@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 		if (selinux_enforcing)
 			avc_ss_reset(0);
 		selnl_notify_setenforce(selinux_enforcing);
+		selinux_status_update_setenforce(selinux_enforcing);
 	}
 	length = count;
 out:
@@ -200,11 +202,46 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf,
 	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
 }

+extern struct page *selinux_status_page;
+
 static const struct file_operations sel_handle_unknown_ops = {
 	.read		= sel_read_handle_unknown,
 	.llseek		= generic_file_llseek,
 };

+static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct selinux_kernel_status  *status = page_address(selinux_status_page);
+
+	return simple_read_from_buffer(buf, count, ppos, status, sizeof(*status));
+}
+
+static int sel_mmap_handle_status(struct file *file,
+				  struct vm_area_struct *vma)
+{
+	unsigned long	size = vma->vm_end - vma->vm_start;
+
+	/* only allows one page from the head */
+	if (vma->vm_pgoff > 0 || size != PAGE_SIZE)
+		return -EIO;
+	/* disallow writable mapping */
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+	/* disallow mprotect() turns it into writable */
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	return remap_pfn_range(vma, vma->vm_start,
+			       page_to_pfn(selinux_status_page),
+			       size, vma->vm_page_prot);
+}
+
+static const struct file_operations sel_handle_status_ops = {
+	.read		= sel_read_handle_status,
+	.mmap		= sel_mmap_handle_status,
+	.llseek		= generic_file_llseek,
+};
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 static ssize_t sel_write_disable(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -1612,6 +1649,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
 		[SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
 		[SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO},
 		[SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO},
+		[SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO},
 		/* last one */ {""}
 	};
 	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
index 15d4e62..974e11c 100644
--- a/security/selinux/ss/Makefile
+++ b/security/selinux/ss/Makefile
@@ -5,5 +5,5 @@
 EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
 obj-y := ss.o

-ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
+ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 9ea2fec..640ec23 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1791,6 +1791,8 @@ int security_load_policy(void *data, size_t len)
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
+		selinux_status_update_setenforce(selinux_enforcing);
 		selinux_netlbl_cache_invalidate();
 		selinux_xfrm_notify_policyload();
 		return 0;
@@ -1870,6 +1872,7 @@ int security_load_policy(void *data, size_t len)

 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_status_update_policyload(seqno);
 	selinux_netlbl_cache_invalidate();
 	selinux_xfrm_notify_policyload();

@@ -2374,6 +2377,7 @@ out:
 	if (!rc) {
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_xfrm_notify_policyload();
 	}
 	return rc;
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
new file mode 100644
index 0000000..a7245d8
--- /dev/null
+++ b/security/selinux/ss/status.c
@@ -0,0 +1,86 @@
+/*
+ * mmap based event notifications for SELinux
+ *
+ * Author: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ * Copyright (C) 2010 NEC corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include "services.h"
+
+/*
+ * The selinux_status_page shall be exposed to userspace applications
+ * using mmap interface on /selinux/status.
+ * It enables to notify applications a few events that will cause reset
+ * of userspace access vector without context switching.
+ *
+ * The selinux_kernel_status structure on the head of status page is
+ * protected from concurrent accesses using seqlock logic, so userspace
+ * application should reference the status page according to the seqlock
+ * logic. (Hopefully, libselinux encapsulates it.)
+ */
+struct page	       *selinux_status_page;
+static spinlock_t	selinux_status_lock;
+
+#define LOCK_STATUS_PAGE(status)			\
+	do {						\
+		spin_lock(&selinux_status_lock);	\
+		(status)->sequence++;			\
+		smp_wmb();				\
+	} while(0)
+
+#define UNLOCK_STATUS_PAGE(status)			\
+	do {						\
+		smp_wmb();				\
+		(status)->sequence++;			\
+		spin_unlock(&selinux_status_lock);	\
+	} while(0)
+
+void selinux_status_update_setenforce(int enforcing)
+{
+	struct selinux_kernel_status   *status
+		= page_address(selinux_status_page);
+
+	LOCK_STATUS_PAGE(status);
+
+	status->enforcing = enforcing;
+
+	UNLOCK_STATUS_PAGE(status);
+}
+
+void selinux_status_update_policyload(int seqno)
+{
+	struct selinux_kernel_status   *status
+		= page_address(selinux_status_page);
+
+	LOCK_STATUS_PAGE(status);
+
+	status->policyload = seqno;
+	status->deny_unknown = !security_get_allow_unknown();
+
+	UNLOCK_STATUS_PAGE(status);
+}
+
+static int __init selinux_status_init(void)
+{
+	struct selinux_kernel_status *status;
+
+	spin_lock_init(&selinux_status_lock);
+
+	selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+	if (!selinux_status_page)
+		return -ENOMEM;
+
+	status = page_address(selinux_status_page);
+	status->length = sizeof(*status);
+
+	return 0;
+}
+__initcall(selinux_status_init);


-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-27  8:37   ` KaiGai Kohei
@ 2010-08-27 15:48     ` Eric Paris
  2010-08-27 16:19       ` Eric Paris
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Paris @ 2010-08-27 15:48 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: selinux, ewalsh

2010/8/27 KaiGai Kohei <kaigai@ak.jp.nec.com>:
> I revised the /selinux/status implementation.
>
> * It becomes to report 'deny_unknown'. Userspace object manager
>  also reference this flag to decide its behavior when the loaded
>  policy does not support expected object classes.
> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>  flag independent from argument of mmap(2), but it was uncommon.
>  I fixed to pass vma->vm_page_prot instead of the hardwired flag
>  according to any other implementation style.
>  Now it returns an error, if user tries to map /selinux/status as
>  writable pages.

I really hate blowing 4k of memory on every system to show 40 bytes of
data on just a few systems.  Is there any change we could allocate the
page the first time it is needed rather that at boot?  I know compared
to the size of policy and other memory usage in SELinux it's odd for
me to complain, but I've decided to get on a reduction if possible
kick.

Only other comment is that __initcall() is deprecated and we are
supposed to use device_initcall() now.

If you plan to use it, I'll ack if you change both of those things....

-Eric


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-27 15:48     ` Eric Paris
@ 2010-08-27 16:19       ` Eric Paris
  2010-08-28  3:24         ` KaiGai Kohei
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Paris @ 2010-08-27 16:19 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: selinux, ewalsh

On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris <eparis@parisplace.org> wrote:
> 2010/8/27 KaiGai Kohei <kaigai@ak.jp.nec.com>:
>> I revised the /selinux/status implementation.
>>
>> * It becomes to report 'deny_unknown'. Userspace object manager
>>  also reference this flag to decide its behavior when the loaded
>>  policy does not support expected object classes.
>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>>  flag independent from argument of mmap(2), but it was uncommon.
>>  I fixed to pass vma->vm_page_prot instead of the hardwired flag
>>  according to any other implementation style.
>>  Now it returns an error, if user tries to map /selinux/status as
>>  writable pages.
>
> I really hate blowing 4k of memory on every system to show 40 bytes of
> data on just a few systems.  Is there any change we could allocate the
> page the first time it is needed rather that at boot?  I know compared
> to the size of policy and other memory usage in SELinux it's odd for
> me to complain, but I've decided to get on a reduction if possible
> kick.
>
> Only other comment is that __initcall() is deprecated and we are
> supposed to use device_initcall() now.
>
> If you plan to use it, I'll ack if you change both of those things....

actually if you move to dynamic allocation of the status page and use
static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
the __init() code altogether....

-Eric


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-27 16:19       ` Eric Paris
@ 2010-08-28  3:24         ` KaiGai Kohei
  2010-09-02  8:16           ` KaiGai Kohei
  0 siblings, 1 reply; 18+ messages in thread
From: KaiGai Kohei @ 2010-08-28  3:24 UTC (permalink / raw)
  To: Eric Paris; +Cc: KaiGai Kohei, selinux, ewalsh

(2010/08/28 1:19), Eric Paris wrote:
> On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris<eparis@parisplace.org>  wrote:
>> 2010/8/27 KaiGai Kohei<kaigai@ak.jp.nec.com>:
>>> I revised the /selinux/status implementation.
>>>
>>> * It becomes to report 'deny_unknown'. Userspace object manager
>>>   also reference this flag to decide its behavior when the loaded
>>>   policy does not support expected object classes.
>>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>>>   flag independent from argument of mmap(2), but it was uncommon.
>>>   I fixed to pass vma->vm_page_prot instead of the hardwired flag
>>>   according to any other implementation style.
>>>   Now it returns an error, if user tries to map /selinux/status as
>>>   writable pages.
>>
>> I really hate blowing 4k of memory on every system to show 40 bytes of
>> data on just a few systems.  Is there any change we could allocate the
>> page the first time it is needed rather that at boot?  I know compared
>> to the size of policy and other memory usage in SELinux it's odd for
>> me to complain, but I've decided to get on a reduction if possible
>> kick.
>>
>> Only other comment is that __initcall() is deprecated and we are
>> supposed to use device_initcall() now.
>>
>> If you plan to use it, I'll ack if you change both of those things....
> 
> actually if you move to dynamic allocation of the status page and use
> static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
> the __init() code altogether....
> 

I revised the patch.
It was changed the selinux_kernel_page being allocated at the first time
when application tries to reference the /selinux/status.
At the same time, it declares selinux_status_lock using DEFINE_MUTEX(),
so whole of the __init section has gone.

In addition, I changed first member of the selinux_kernel_status from
'length' to 'version', because sizeof(struct ...) is aligned to 64bit
boundary (24bytes) on x86_64 system, although it is actually 20bytes.
If we want to add a 32bit member in the future, 'length' may not inform
applications enough.

Thanks,

 Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
--
 security/selinux/include/security.h |   21 ++++++
 security/selinux/selinuxfs.c        |   47 ++++++++++++
 security/selinux/ss/Makefile        |    2 +-
 security/selinux/ss/services.c      |    3 +
 security/selinux/ss/status.c        |  135 +++++++++++++++++++++++++++++++++++
 5 files changed, 207 insertions(+), 1 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 1f7c249..e390e31 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -191,5 +191,26 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,
 
 const char *security_get_initial_sid_context(u32 sid);
 
+/*
+ * status notifier using mmap interface
+ */
+extern struct page *selinux_kernel_status_page(void);
+
+#define SELINUX_KERNEL_STATUS_VERSION	1
+struct selinux_kernel_status
+{
+	u32	version;	/* version number of thie structure */
+	u32	sequence;	/* sequence number of seqlock logic */
+	u32	enforcing;	/* current setting of enforcing mode */
+	u32	policyload;	/* times of policy reloaded */
+	u32	deny_unknown;	/* current setting of deny_unknown */
+	/*
+	 * The version > 0 supports above members.
+	 */
+} __attribute__((packed));
+
+extern void selinux_status_update_setenforce(int enforcing);
+extern void selinux_status_update_policyload(int seqno);
+
 #endif /* _SELINUX_SECURITY_H_ */
 
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 79a1bb6..66e7b62 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -110,6 +110,7 @@ enum sel_inos {
 	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
 	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
 	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
+	SEL_STATUS,	/* export current status using mmap() */
 	SEL_INO_NEXT,	/* The next inode number to use */
 };
 
@@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 		if (selinux_enforcing)
 			avc_ss_reset(0);
 		selnl_notify_setenforce(selinux_enforcing);
+		selinux_status_update_setenforce(selinux_enforcing);
 	}
 	length = count;
 out:
@@ -200,11 +202,55 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf,
 	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
 }
 
+extern struct page *selinux_status_page;
+
 static const struct file_operations sel_handle_unknown_ops = {
 	.read		= sel_read_handle_unknown,
 	.llseek		= generic_file_llseek,
 };
 
+static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct page    *status = selinux_kernel_status_page();
+
+	if (!status)
+		return -ENOMEM;
+
+	return simple_read_from_buffer(buf, count, ppos,
+				       page_address(status),
+				       sizeof(struct selinux_kernel_status));
+}
+
+static int sel_mmap_handle_status(struct file *file,
+				  struct vm_area_struct *vma)
+{
+	unsigned long	size = vma->vm_end - vma->vm_start;
+	struct page    *status = selinux_kernel_status_page();
+
+	/* dynamic page allocation failed */
+	if (!status)
+		return -ENOMEM;
+	/* only allows one page from the head */
+	if (vma->vm_pgoff > 0 || size != PAGE_SIZE)
+		return -EIO;
+	/* disallow writable mapping */
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+	/* disallow mprotect() turns it into writable */
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	return remap_pfn_range(vma, vma->vm_start,
+			       page_to_pfn(status),
+			       size, vma->vm_page_prot);
+}
+
+static const struct file_operations sel_handle_status_ops = {
+	.read		= sel_read_handle_status,
+	.mmap		= sel_mmap_handle_status,
+	.llseek		= generic_file_llseek,
+};
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 static ssize_t sel_write_disable(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -1612,6 +1658,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
 		[SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
 		[SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO},
 		[SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO},
+		[SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO},
 		/* last one */ {""}
 	};
 	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
index 15d4e62..974e11c 100644
--- a/security/selinux/ss/Makefile
+++ b/security/selinux/ss/Makefile
@@ -5,5 +5,5 @@
 EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
 obj-y := ss.o
 
-ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
+ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o
 
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 9ea2fec..494ff52 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1791,6 +1791,7 @@ int security_load_policy(void *data, size_t len)
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_netlbl_cache_invalidate();
 		selinux_xfrm_notify_policyload();
 		return 0;
@@ -1870,6 +1871,7 @@ int security_load_policy(void *data, size_t len)
 
 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_status_update_policyload(seqno);
 	selinux_netlbl_cache_invalidate();
 	selinux_xfrm_notify_policyload();
 
@@ -2374,6 +2376,7 @@ out:
 	if (!rc) {
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_xfrm_notify_policyload();
 	}
 	return rc;
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
new file mode 100644
index 0000000..eeab696
--- /dev/null
+++ b/security/selinux/ss/status.c
@@ -0,0 +1,135 @@
+/*
+ * mmap based event notifications for SELinux
+ *
+ * Author: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ * Copyright (C) 2010 NEC corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include "avc.h"
+#include "services.h"
+
+/*
+ * The selinux_status_page shall be exposed to userspace applications
+ * using mmap interface on /selinux/status.
+ * It enables to notify applications a few events that will cause reset
+ * of userspace access vector without context switching.
+ *
+ * The selinux_kernel_status structure on the head of status page is
+ * protected from concurrent accesses using seqlock logic, so userspace
+ * application should reference the status page according to the seqlock
+ * logic.
+ *
+ * Typically, application checks status->sequence at the head of access
+ * control routine. If it is odd-number, kernel is updating the status,
+ * so please wait for a moment. If it is changed from the last sequence
+ * number, it means something happen, so application will reset userspace
+ * avc, if needed.
+ * In addition, application should also checks the sequence number at
+ * tail of the access control routine. If it is changed from the value
+ * on the head, it means kernel status was changed under processing the
+ * routine. In this case, application should repeat to run the routine
+ * from head, but we expect it is much rare case.
+ * In most case, application can confirm the kernel status is not changed
+ * without any system call invocations.
+ * Hopefully, libselinux encapsulates this logic.
+ */
+static struct page *selinux_status_page = NULL;
+static DEFINE_MUTEX(selinux_status_lock);
+
+/*
+ * selinux_kernel_status_page
+ *
+ * It returns a reference to selinux_status_page. If the status page is
+ * not allocated yet, it also tries to allocate it at the first time.
+ */
+struct page *selinux_kernel_status_page(void)
+{
+	struct selinux_kernel_status   *status;
+	struct page		       *result = NULL;
+
+	mutex_lock(&selinux_status_lock);
+	if (!selinux_status_page)
+	{
+		selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (selinux_status_page)
+		{
+			status = page_address(selinux_status_page);
+
+			status->version = SELINUX_KERNEL_STATUS_VERSION;
+			status->sequence = 0;
+			status->enforcing = selinux_enforcing;
+			/*
+			 * NOTE: the next policyload event shall set
+			 * a positive value on the status->policyload,
+			 * although it may not be 1, but never zero.
+			 * So, application can know it was updated.
+			 */
+			status->policyload = 0;
+			status->deny_unknown = !security_get_allow_unknown();
+		}
+	}
+	result = selinux_status_page;
+	mutex_unlock(&selinux_status_lock);
+
+	return result;
+}
+
+/*
+ * selinux_status_update_setenforce
+ *
+ * It updates status of the current enforcing/permissive mode.
+ */
+void selinux_status_update_setenforce(int enforcing)
+{
+	struct selinux_kernel_status   *status;
+
+	mutex_lock(&selinux_status_lock);
+	if (selinux_status_page)
+	{
+		status = page_address(selinux_status_page);
+
+		status->sequence++;
+		smp_wmb();
+
+		status->enforcing = enforcing;
+
+		smp_wmb();
+		status->sequence++;
+	}
+	mutex_unlock(&selinux_status_lock);
+}
+
+/*
+ * selinux_status_update_policyload
+ *
+ * It updates status of the times of policy reloaded, and current
+ * setting of deny_unknown.
+ */
+void selinux_status_update_policyload(int seqno)
+{
+	struct selinux_kernel_status   *status;
+
+	mutex_lock(&selinux_status_lock);
+	if (selinux_status_page)
+	{
+		status = page_address(selinux_status_page);
+
+		status->sequence++;
+		smp_wmb();
+
+		status->policyload = seqno;
+		status->deny_unknown = !security_get_allow_unknown();
+
+		smp_wmb();
+		status->sequence++;
+	}
+	mutex_unlock(&selinux_status_lock);
+}

-- 
KaiGai Kohei <kaigai@kaigai.gr.jp>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-08-28  3:24         ` KaiGai Kohei
@ 2010-09-02  8:16           ` KaiGai Kohei
  2010-09-07  0:03             ` KaiGai Kohei
  2010-09-13 20:45             ` Eric Paris
  0 siblings, 2 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-09-02  8:16 UTC (permalink / raw)
  To: Eric Paris; +Cc: KaiGai Kohei, selinux, ewalsh

(2010/08/28 12:24), KaiGai Kohei wrote:
> (2010/08/28 1:19), Eric Paris wrote:
>> On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris<eparis@parisplace.org>   wrote:
>>> 2010/8/27 KaiGai Kohei<kaigai@ak.jp.nec.com>:
>>>> I revised the /selinux/status implementation.
>>>>
>>>> * It becomes to report 'deny_unknown'. Userspace object manager
>>>>    also reference this flag to decide its behavior when the loaded
>>>>    policy does not support expected object classes.
>>>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>>>>    flag independent from argument of mmap(2), but it was uncommon.
>>>>    I fixed to pass vma->vm_page_prot instead of the hardwired flag
>>>>    according to any other implementation style.
>>>>    Now it returns an error, if user tries to map /selinux/status as
>>>>    writable pages.
>>>
>>> I really hate blowing 4k of memory on every system to show 40 bytes of
>>> data on just a few systems.  Is there any change we could allocate the
>>> page the first time it is needed rather that at boot?  I know compared
>>> to the size of policy and other memory usage in SELinux it's odd for
>>> me to complain, but I've decided to get on a reduction if possible
>>> kick.
>>>
>>> Only other comment is that __initcall() is deprecated and we are
>>> supposed to use device_initcall() now.
>>>
>>> If you plan to use it, I'll ack if you change both of those things....
>>
>> actually if you move to dynamic allocation of the status page and use
>> static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
>> the __init() code altogether....
>>
> 
> I revised the patch.
> It was changed the selinux_kernel_page being allocated at the first time
> when application tries to reference the /selinux/status.
> At the same time, it declares selinux_status_lock using DEFINE_MUTEX(),
> so whole of the __init section has gone.
> 
> In addition, I changed first member of the selinux_kernel_status from
> 'length' to 'version', because sizeof(struct ...) is aligned to 64bit
> boundary (24bytes) on x86_64 system, although it is actually 20bytes.
> If we want to add a 32bit member in the future, 'length' may not inform
> applications enough.
> 
How about getting the feature?
Although I've not found out this idea for a long time, it is quite helpful
feature to implement SE-PostgreSQL (and other upcoming userspace object
managers) in less invasive way.

I fixed up two minor points in the patch, as follows:
* The 4K of status page becomes allocated at the file_operations::open()
  method, because it seems to me a bit unnatural that either read() or
  mmap() fails due to memory allocation error.
* I forgot to eliminate an unnecessary declaration of extern variable.

 Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
--
 security/selinux/include/security.h |   21 ++++++
 security/selinux/selinuxfs.c        |   56 ++++++++++++++
 security/selinux/ss/Makefile        |    2 +-
 security/selinux/ss/services.c      |    3 +
 security/selinux/ss/status.c        |  135 +++++++++++++++++++++++++++++++++++
 5 files changed, 216 insertions(+), 1 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 1f7c249..e390e31 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -191,5 +191,26 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,

 const char *security_get_initial_sid_context(u32 sid);

+/*
+ * status notifier using mmap interface
+ */
+extern struct page *selinux_kernel_status_page(void);
+
+#define SELINUX_KERNEL_STATUS_VERSION	1
+struct selinux_kernel_status
+{
+	u32	version;	/* version number of thie structure */
+	u32	sequence;	/* sequence number of seqlock logic */
+	u32	enforcing;	/* current setting of enforcing mode */
+	u32	policyload;	/* times of policy reloaded */
+	u32	deny_unknown;	/* current setting of deny_unknown */
+	/*
+	 * The version > 0 supports above members.
+	 */
+} __attribute__((packed));
+
+extern void selinux_status_update_setenforce(int enforcing);
+extern void selinux_status_update_policyload(int seqno);
+
 #endif /* _SELINUX_SECURITY_H_ */

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 79a1bb6..a2e7a85 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -110,6 +110,7 @@ enum sel_inos {
 	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
 	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
 	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
+	SEL_STATUS,	/* export current status using mmap() */
 	SEL_INO_NEXT,	/* The next inode number to use */
 };

@@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 		if (selinux_enforcing)
 			avc_ss_reset(0);
 		selnl_notify_setenforce(selinux_enforcing);
+		selinux_status_update_setenforce(selinux_enforcing);
 	}
 	length = count;
 out:
@@ -205,6 +207,59 @@ static const struct file_operations sel_handle_unknown_ops = {
 	.llseek		= generic_file_llseek,
 };

+static int sel_open_handle_status(struct inode *inode, struct file *filp)
+{
+	struct page    *status = selinux_kernel_status_page();
+
+	if (!status)
+		return -ENOMEM;
+
+	filp->private_data = status;
+
+	return 0;
+}
+
+static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct page    *status = filp->private_data;
+
+	BUG_ON(!status);
+
+	return simple_read_from_buffer(buf, count, ppos,
+				       page_address(status),
+				       sizeof(struct selinux_kernel_status));
+}
+
+static int sel_mmap_handle_status(struct file *filp,
+				  struct vm_area_struct *vma)
+{
+	struct page    *status = filp->private_data;
+	unsigned long	size = vma->vm_end - vma->vm_start;
+
+	BUG_ON(!status);
+
+	/* only allows one page from the head */
+	if (vma->vm_pgoff > 0 || size != PAGE_SIZE)
+		return -EIO;
+	/* disallow writable mapping */
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+	/* disallow mprotect() turns it into writable */
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	return remap_pfn_range(vma, vma->vm_start,
+			       page_to_pfn(status),
+			       size, vma->vm_page_prot);
+}
+
+static const struct file_operations sel_handle_status_ops = {
+	.open		= sel_open_handle_status,
+	.read		= sel_read_handle_status,
+	.mmap		= sel_mmap_handle_status,
+	.llseek		= generic_file_llseek,
+};
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 static ssize_t sel_write_disable(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -1612,6 +1667,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
 		[SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
 		[SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO},
 		[SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO},
+		[SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO},
 		/* last one */ {""}
 	};
 	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
index 15d4e62..974e11c 100644
--- a/security/selinux/ss/Makefile
+++ b/security/selinux/ss/Makefile
@@ -5,5 +5,5 @@
 EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
 obj-y := ss.o

-ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
+ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 9ea2fec..494ff52 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1791,6 +1791,7 @@ int security_load_policy(void *data, size_t len)
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_netlbl_cache_invalidate();
 		selinux_xfrm_notify_policyload();
 		return 0;
@@ -1870,6 +1871,7 @@ int security_load_policy(void *data, size_t len)

 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_status_update_policyload(seqno);
 	selinux_netlbl_cache_invalidate();
 	selinux_xfrm_notify_policyload();

@@ -2374,6 +2376,7 @@ out:
 	if (!rc) {
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_xfrm_notify_policyload();
 	}
 	return rc;
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
new file mode 100644
index 0000000..eeab696
--- /dev/null
+++ b/security/selinux/ss/status.c
@@ -0,0 +1,135 @@
+/*
+ * mmap based event notifications for SELinux
+ *
+ * Author: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ * Copyright (C) 2010 NEC corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include "avc.h"
+#include "services.h"
+
+/*
+ * The selinux_status_page shall be exposed to userspace applications
+ * using mmap interface on /selinux/status.
+ * It enables to notify applications a few events that will cause reset
+ * of userspace access vector without context switching.
+ *
+ * The selinux_kernel_status structure on the head of status page is
+ * protected from concurrent accesses using seqlock logic, so userspace
+ * application should reference the status page according to the seqlock
+ * logic.
+ *
+ * Typically, application checks status->sequence at the head of access
+ * control routine. If it is odd-number, kernel is updating the status,
+ * so please wait for a moment. If it is changed from the last sequence
+ * number, it means something happen, so application will reset userspace
+ * avc, if needed.
+ * In addition, application should also checks the sequence number at
+ * tail of the access control routine. If it is changed from the value
+ * on the head, it means kernel status was changed under processing the
+ * routine. In this case, application should repeat to run the routine
+ * from head, but we expect it is much rare case.
+ * In most case, application can confirm the kernel status is not changed
+ * without any system call invocations.
+ * Hopefully, libselinux encapsulates this logic.
+ */
+static struct page *selinux_status_page = NULL;
+static DEFINE_MUTEX(selinux_status_lock);
+
+/*
+ * selinux_kernel_status_page
+ *
+ * It returns a reference to selinux_status_page. If the status page is
+ * not allocated yet, it also tries to allocate it at the first time.
+ */
+struct page *selinux_kernel_status_page(void)
+{
+	struct selinux_kernel_status   *status;
+	struct page		       *result = NULL;
+
+	mutex_lock(&selinux_status_lock);
+	if (!selinux_status_page)
+	{
+		selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (selinux_status_page)
+		{
+			status = page_address(selinux_status_page);
+
+			status->version = SELINUX_KERNEL_STATUS_VERSION;
+			status->sequence = 0;
+			status->enforcing = selinux_enforcing;
+			/*
+			 * NOTE: the next policyload event shall set
+			 * a positive value on the status->policyload,
+			 * although it may not be 1, but never zero.
+			 * So, application can know it was updated.
+			 */
+			status->policyload = 0;
+			status->deny_unknown = !security_get_allow_unknown();
+		}
+	}
+	result = selinux_status_page;
+	mutex_unlock(&selinux_status_lock);
+
+	return result;
+}
+
+/*
+ * selinux_status_update_setenforce
+ *
+ * It updates status of the current enforcing/permissive mode.
+ */
+void selinux_status_update_setenforce(int enforcing)
+{
+	struct selinux_kernel_status   *status;
+
+	mutex_lock(&selinux_status_lock);
+	if (selinux_status_page)
+	{
+		status = page_address(selinux_status_page);
+
+		status->sequence++;
+		smp_wmb();
+
+		status->enforcing = enforcing;
+
+		smp_wmb();
+		status->sequence++;
+	}
+	mutex_unlock(&selinux_status_lock);
+}
+
+/*
+ * selinux_status_update_policyload
+ *
+ * It updates status of the times of policy reloaded, and current
+ * setting of deny_unknown.
+ */
+void selinux_status_update_policyload(int seqno)
+{
+	struct selinux_kernel_status   *status;
+
+	mutex_lock(&selinux_status_lock);
+	if (selinux_status_page)
+	{
+		status = page_address(selinux_status_page);
+
+		status->sequence++;
+		smp_wmb();
+
+		status->policyload = seqno;
+		status->deny_unknown = !security_get_allow_unknown();
+
+		smp_wmb();
+		status->sequence++;
+	}
+	mutex_unlock(&selinux_status_lock);
+}

-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-02  8:16           ` KaiGai Kohei
@ 2010-09-07  0:03             ` KaiGai Kohei
  2010-09-10  1:16               ` KaiGai Kohei
  2010-09-13 20:45             ` Eric Paris
  1 sibling, 1 reply; 18+ messages in thread
From: KaiGai Kohei @ 2010-09-07  0:03 UTC (permalink / raw)
  To: Eric Paris; +Cc: KaiGai Kohei, selinux, ewalsh

Do we have any issues to be revised about this patch?
If nothing to fix any more, please include this feature toward the next
merge window.

It shall provide a key feature to implement a security server in userspace
application as loadable module, without expensive system-calls and invasive
patches. At least, PostgreSQL and Memcached need it. :-)

Thanks,

(2010/09/02 17:16), KaiGai Kohei wrote:
> (2010/08/28 12:24), KaiGai Kohei wrote:
>> (2010/08/28 1:19), Eric Paris wrote:
>>> On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris<eparis@parisplace.org>    wrote:
>>>> 2010/8/27 KaiGai Kohei<kaigai@ak.jp.nec.com>:
>>>>> I revised the /selinux/status implementation.
>>>>>
>>>>> * It becomes to report 'deny_unknown'. Userspace object manager
>>>>>     also reference this flag to decide its behavior when the loaded
>>>>>     policy does not support expected object classes.
>>>>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>>>>>     flag independent from argument of mmap(2), but it was uncommon.
>>>>>     I fixed to pass vma->vm_page_prot instead of the hardwired flag
>>>>>     according to any other implementation style.
>>>>>     Now it returns an error, if user tries to map /selinux/status as
>>>>>     writable pages.
>>>>
>>>> I really hate blowing 4k of memory on every system to show 40 bytes of
>>>> data on just a few systems.  Is there any change we could allocate the
>>>> page the first time it is needed rather that at boot?  I know compared
>>>> to the size of policy and other memory usage in SELinux it's odd for
>>>> me to complain, but I've decided to get on a reduction if possible
>>>> kick.
>>>>
>>>> Only other comment is that __initcall() is deprecated and we are
>>>> supposed to use device_initcall() now.
>>>>
>>>> If you plan to use it, I'll ack if you change both of those things....
>>>
>>> actually if you move to dynamic allocation of the status page and use
>>> static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
>>> the __init() code altogether....
>>>
>>
>> I revised the patch.
>> It was changed the selinux_kernel_page being allocated at the first time
>> when application tries to reference the /selinux/status.
>> At the same time, it declares selinux_status_lock using DEFINE_MUTEX(),
>> so whole of the __init section has gone.
>>
>> In addition, I changed first member of the selinux_kernel_status from
>> 'length' to 'version', because sizeof(struct ...) is aligned to 64bit
>> boundary (24bytes) on x86_64 system, although it is actually 20bytes.
>> If we want to add a 32bit member in the future, 'length' may not inform
>> applications enough.
>>
> How about getting the feature?
> Although I've not found out this idea for a long time, it is quite helpful
> feature to implement SE-PostgreSQL (and other upcoming userspace object
> managers) in less invasive way.
> 
> I fixed up two minor points in the patch, as follows:
> * The 4K of status page becomes allocated at the file_operations::open()
>    method, because it seems to me a bit unnatural that either read() or
>    mmap() fails due to memory allocation error.
> * I forgot to eliminate an unnecessary declaration of extern variable.
> 
>   Signed-off-by: KaiGai Kohei<kaigai@ak.jp.nec.com>
> --
>   security/selinux/include/security.h |   21 ++++++
>   security/selinux/selinuxfs.c        |   56 ++++++++++++++
>   security/selinux/ss/Makefile        |    2 +-
>   security/selinux/ss/services.c      |    3 +
>   security/selinux/ss/status.c        |  135 +++++++++++++++++++++++++++++++++++
>   5 files changed, 216 insertions(+), 1 deletions(-)
> 
> diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
> index 1f7c249..e390e31 100644
> --- a/security/selinux/include/security.h
> +++ b/security/selinux/include/security.h
> @@ -191,5 +191,26 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,
> 
>   const char *security_get_initial_sid_context(u32 sid);
> 
> +/*
> + * status notifier using mmap interface
> + */
> +extern struct page *selinux_kernel_status_page(void);
> +
> +#define SELINUX_KERNEL_STATUS_VERSION	1
> +struct selinux_kernel_status
> +{
> +	u32	version;	/* version number of thie structure */
> +	u32	sequence;	/* sequence number of seqlock logic */
> +	u32	enforcing;	/* current setting of enforcing mode */
> +	u32	policyload;	/* times of policy reloaded */
> +	u32	deny_unknown;	/* current setting of deny_unknown */
> +	/*
> +	 * The version>  0 supports above members.
> +	 */
> +} __attribute__((packed));
> +
> +extern void selinux_status_update_setenforce(int enforcing);
> +extern void selinux_status_update_policyload(int seqno);
> +
>   #endif /* _SELINUX_SECURITY_H_ */
> 
> diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
> index 79a1bb6..a2e7a85 100644
> --- a/security/selinux/selinuxfs.c
> +++ b/security/selinux/selinuxfs.c
> @@ -110,6 +110,7 @@ enum sel_inos {
>   	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
>   	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
>   	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
> +	SEL_STATUS,	/* export current status using mmap() */
>   	SEL_INO_NEXT,	/* The next inode number to use */
>   };
> 
> @@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
>   		if (selinux_enforcing)
>   			avc_ss_reset(0);
>   		selnl_notify_setenforce(selinux_enforcing);
> +		selinux_status_update_setenforce(selinux_enforcing);
>   	}
>   	length = count;
>   out:
> @@ -205,6 +207,59 @@ static const struct file_operations sel_handle_unknown_ops = {
>   	.llseek		= generic_file_llseek,
>   };
> 
> +static int sel_open_handle_status(struct inode *inode, struct file *filp)
> +{
> +	struct page    *status = selinux_kernel_status_page();
> +
> +	if (!status)
> +		return -ENOMEM;
> +
> +	filp->private_data = status;
> +
> +	return 0;
> +}
> +
> +static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
> +				      size_t count, loff_t *ppos)
> +{
> +	struct page    *status = filp->private_data;
> +
> +	BUG_ON(!status);
> +
> +	return simple_read_from_buffer(buf, count, ppos,
> +				       page_address(status),
> +				       sizeof(struct selinux_kernel_status));
> +}
> +
> +static int sel_mmap_handle_status(struct file *filp,
> +				  struct vm_area_struct *vma)
> +{
> +	struct page    *status = filp->private_data;
> +	unsigned long	size = vma->vm_end - vma->vm_start;
> +
> +	BUG_ON(!status);
> +
> +	/* only allows one page from the head */
> +	if (vma->vm_pgoff>  0 || size != PAGE_SIZE)
> +		return -EIO;
> +	/* disallow writable mapping */
> +	if (vma->vm_flags&  VM_WRITE)
> +		return -EPERM;
> +	/* disallow mprotect() turns it into writable */
> +	vma->vm_flags&= ~VM_MAYWRITE;
> +
> +	return remap_pfn_range(vma, vma->vm_start,
> +			       page_to_pfn(status),
> +			       size, vma->vm_page_prot);
> +}
> +
> +static const struct file_operations sel_handle_status_ops = {
> +	.open		= sel_open_handle_status,
> +	.read		= sel_read_handle_status,
> +	.mmap		= sel_mmap_handle_status,
> +	.llseek		= generic_file_llseek,
> +};
> +
>   #ifdef CONFIG_SECURITY_SELINUX_DISABLE
>   static ssize_t sel_write_disable(struct file *file, const char __user *buf,
>   				 size_t count, loff_t *ppos)
> @@ -1612,6 +1667,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
>   		[SEL_CHECKREQPROT] = {"checkreqprot",&sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
>   		[SEL_REJECT_UNKNOWN] = {"reject_unknown",&sel_handle_unknown_ops, S_IRUGO},
>   		[SEL_DENY_UNKNOWN] = {"deny_unknown",&sel_handle_unknown_ops, S_IRUGO},
> +		[SEL_STATUS] = {"status",&sel_handle_status_ops, S_IRUGO},
>   		/* last one */ {""}
>   	};
>   	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
> diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
> index 15d4e62..974e11c 100644
> --- a/security/selinux/ss/Makefile
> +++ b/security/selinux/ss/Makefile
> @@ -5,5 +5,5 @@
>   EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
>   obj-y := ss.o
> 
> -ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
> +ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o
> 
> diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
> index 9ea2fec..494ff52 100644
> --- a/security/selinux/ss/services.c
> +++ b/security/selinux/ss/services.c
> @@ -1791,6 +1791,7 @@ int security_load_policy(void *data, size_t len)
>   		selinux_complete_init();
>   		avc_ss_reset(seqno);
>   		selnl_notify_policyload(seqno);
> +		selinux_status_update_policyload(seqno);
>   		selinux_netlbl_cache_invalidate();
>   		selinux_xfrm_notify_policyload();
>   		return 0;
> @@ -1870,6 +1871,7 @@ int security_load_policy(void *data, size_t len)
> 
>   	avc_ss_reset(seqno);
>   	selnl_notify_policyload(seqno);
> +	selinux_status_update_policyload(seqno);
>   	selinux_netlbl_cache_invalidate();
>   	selinux_xfrm_notify_policyload();
> 
> @@ -2374,6 +2376,7 @@ out:
>   	if (!rc) {
>   		avc_ss_reset(seqno);
>   		selnl_notify_policyload(seqno);
> +		selinux_status_update_policyload(seqno);
>   		selinux_xfrm_notify_policyload();
>   	}
>   	return rc;
> diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
> new file mode 100644
> index 0000000..eeab696
> --- /dev/null
> +++ b/security/selinux/ss/status.c
> @@ -0,0 +1,135 @@
> +/*
> + * mmap based event notifications for SELinux
> + *
> + * Author: KaiGai Kohei<kaigai@ak.jp.nec.com>
> + *
> + * Copyright (C) 2010 NEC corporation
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2,
> + * as published by the Free Software Foundation.
> + */
> +#include<linux/kernel.h>
> +#include<linux/gfp.h>
> +#include<linux/mm.h>
> +#include<linux/mutex.h>
> +#include "avc.h"
> +#include "services.h"
> +
> +/*
> + * The selinux_status_page shall be exposed to userspace applications
> + * using mmap interface on /selinux/status.
> + * It enables to notify applications a few events that will cause reset
> + * of userspace access vector without context switching.
> + *
> + * The selinux_kernel_status structure on the head of status page is
> + * protected from concurrent accesses using seqlock logic, so userspace
> + * application should reference the status page according to the seqlock
> + * logic.
> + *
> + * Typically, application checks status->sequence at the head of access
> + * control routine. If it is odd-number, kernel is updating the status,
> + * so please wait for a moment. If it is changed from the last sequence
> + * number, it means something happen, so application will reset userspace
> + * avc, if needed.
> + * In addition, application should also checks the sequence number at
> + * tail of the access control routine. If it is changed from the value
> + * on the head, it means kernel status was changed under processing the
> + * routine. In this case, application should repeat to run the routine
> + * from head, but we expect it is much rare case.
> + * In most case, application can confirm the kernel status is not changed
> + * without any system call invocations.
> + * Hopefully, libselinux encapsulates this logic.
> + */
> +static struct page *selinux_status_page = NULL;
> +static DEFINE_MUTEX(selinux_status_lock);
> +
> +/*
> + * selinux_kernel_status_page
> + *
> + * It returns a reference to selinux_status_page. If the status page is
> + * not allocated yet, it also tries to allocate it at the first time.
> + */
> +struct page *selinux_kernel_status_page(void)
> +{
> +	struct selinux_kernel_status   *status;
> +	struct page		       *result = NULL;
> +
> +	mutex_lock(&selinux_status_lock);
> +	if (!selinux_status_page)
> +	{
> +		selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		if (selinux_status_page)
> +		{
> +			status = page_address(selinux_status_page);
> +
> +			status->version = SELINUX_KERNEL_STATUS_VERSION;
> +			status->sequence = 0;
> +			status->enforcing = selinux_enforcing;
> +			/*
> +			 * NOTE: the next policyload event shall set
> +			 * a positive value on the status->policyload,
> +			 * although it may not be 1, but never zero.
> +			 * So, application can know it was updated.
> +			 */
> +			status->policyload = 0;
> +			status->deny_unknown = !security_get_allow_unknown();
> +		}
> +	}
> +	result = selinux_status_page;
> +	mutex_unlock(&selinux_status_lock);
> +
> +	return result;
> +}
> +
> +/*
> + * selinux_status_update_setenforce
> + *
> + * It updates status of the current enforcing/permissive mode.
> + */
> +void selinux_status_update_setenforce(int enforcing)
> +{
> +	struct selinux_kernel_status   *status;
> +
> +	mutex_lock(&selinux_status_lock);
> +	if (selinux_status_page)
> +	{
> +		status = page_address(selinux_status_page);
> +
> +		status->sequence++;
> +		smp_wmb();
> +
> +		status->enforcing = enforcing;
> +
> +		smp_wmb();
> +		status->sequence++;
> +	}
> +	mutex_unlock(&selinux_status_lock);
> +}
> +
> +/*
> + * selinux_status_update_policyload
> + *
> + * It updates status of the times of policy reloaded, and current
> + * setting of deny_unknown.
> + */
> +void selinux_status_update_policyload(int seqno)
> +{
> +	struct selinux_kernel_status   *status;
> +
> +	mutex_lock(&selinux_status_lock);
> +	if (selinux_status_page)
> +	{
> +		status = page_address(selinux_status_page);
> +
> +		status->sequence++;
> +		smp_wmb();
> +
> +		status->policyload = seqno;
> +		status->deny_unknown = !security_get_allow_unknown();
> +
> +		smp_wmb();
> +		status->sequence++;
> +	}
> +	mutex_unlock(&selinux_status_lock);
> +}
> 


-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-07  0:03             ` KaiGai Kohei
@ 2010-09-10  1:16               ` KaiGai Kohei
  0 siblings, 0 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-09-10  1:16 UTC (permalink / raw)
  To: Eric Paris; +Cc: KaiGai Kohei, selinux, ewalsh

[-- Attachment #1: Type: text/plain, Size: 15328 bytes --]

The attached program is a benchmark to iterate avc_has_perm_noaudit()
in userspace.
In the case when it is not reasonable to launch background worker
process/thread, we need to check the status of in-kernel selinux
before and after avc lookups to ensure the cached avc entries are
still valid and queries to selinux were done atomically.

If we use netlink socket to check the status of selinux.

  [kaigai@saba ~]$ gcc status_test.c -l selinux
  [kaigai@saba ~]$ time -p ./a.out 1
  real 65.44
  user 23.91
  sys 41.48

It we use the new /selinux/status with mmap(2)
  [kaigai@saba ~]$ time -p ./a.out 0
  real 4.71
  user 4.68
  sys 0.02

Although it loops 1,000,000 times, but it is not a corner case in
RDBMS with row-level security on large tables, or key-value-store
that handles massive number of requests from web applications.

Well, the /selinux/status is strongly desired feature from the
viewpoint of userspace object manager.
If I need to revise the patch something, please point out it.

Thanks,

(2010/09/07 9:03), KaiGai Kohei wrote:
> Do we have any issues to be revised about this patch?
> If nothing to fix any more, please include this feature toward the next
> merge window.
> 
> It shall provide a key feature to implement a security server in userspace
> application as loadable module, without expensive system-calls and invasive
> patches. At least, PostgreSQL and Memcached need it. :-)
> 
> Thanks,
> 
> (2010/09/02 17:16), KaiGai Kohei wrote:
>> (2010/08/28 12:24), KaiGai Kohei wrote:
>>> (2010/08/28 1:19), Eric Paris wrote:
>>>> On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris<eparis@parisplace.org>     wrote:
>>>>> 2010/8/27 KaiGai Kohei<kaigai@ak.jp.nec.com>:
>>>>>> I revised the /selinux/status implementation.
>>>>>>
>>>>>> * It becomes to report 'deny_unknown'. Userspace object manager
>>>>>>      also reference this flag to decide its behavior when the loaded
>>>>>>      policy does not support expected object classes.
>>>>>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>>>>>>      flag independent from argument of mmap(2), but it was uncommon.
>>>>>>      I fixed to pass vma->vm_page_prot instead of the hardwired flag
>>>>>>      according to any other implementation style.
>>>>>>      Now it returns an error, if user tries to map /selinux/status as
>>>>>>      writable pages.
>>>>>
>>>>> I really hate blowing 4k of memory on every system to show 40 bytes of
>>>>> data on just a few systems.  Is there any change we could allocate the
>>>>> page the first time it is needed rather that at boot?  I know compared
>>>>> to the size of policy and other memory usage in SELinux it's odd for
>>>>> me to complain, but I've decided to get on a reduction if possible
>>>>> kick.
>>>>>
>>>>> Only other comment is that __initcall() is deprecated and we are
>>>>> supposed to use device_initcall() now.
>>>>>
>>>>> If you plan to use it, I'll ack if you change both of those things....
>>>>
>>>> actually if you move to dynamic allocation of the status page and use
>>>> static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
>>>> the __init() code altogether....
>>>>
>>>
>>> I revised the patch.
>>> It was changed the selinux_kernel_page being allocated at the first time
>>> when application tries to reference the /selinux/status.
>>> At the same time, it declares selinux_status_lock using DEFINE_MUTEX(),
>>> so whole of the __init section has gone.
>>>
>>> In addition, I changed first member of the selinux_kernel_status from
>>> 'length' to 'version', because sizeof(struct ...) is aligned to 64bit
>>> boundary (24bytes) on x86_64 system, although it is actually 20bytes.
>>> If we want to add a 32bit member in the future, 'length' may not inform
>>> applications enough.
>>>
>> How about getting the feature?
>> Although I've not found out this idea for a long time, it is quite helpful
>> feature to implement SE-PostgreSQL (and other upcoming userspace object
>> managers) in less invasive way.
>>
>> I fixed up two minor points in the patch, as follows:
>> * The 4K of status page becomes allocated at the file_operations::open()
>>     method, because it seems to me a bit unnatural that either read() or
>>     mmap() fails due to memory allocation error.
>> * I forgot to eliminate an unnecessary declaration of extern variable.
>>
>>    Signed-off-by: KaiGai Kohei<kaigai@ak.jp.nec.com>
>> --
>>    security/selinux/include/security.h |   21 ++++++
>>    security/selinux/selinuxfs.c        |   56 ++++++++++++++
>>    security/selinux/ss/Makefile        |    2 +-
>>    security/selinux/ss/services.c      |    3 +
>>    security/selinux/ss/status.c        |  135 +++++++++++++++++++++++++++++++++++
>>    5 files changed, 216 insertions(+), 1 deletions(-)
>>
>> diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
>> index 1f7c249..e390e31 100644
>> --- a/security/selinux/include/security.h
>> +++ b/security/selinux/include/security.h
>> @@ -191,5 +191,26 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,
>>
>>    const char *security_get_initial_sid_context(u32 sid);
>>
>> +/*
>> + * status notifier using mmap interface
>> + */
>> +extern struct page *selinux_kernel_status_page(void);
>> +
>> +#define SELINUX_KERNEL_STATUS_VERSION	1
>> +struct selinux_kernel_status
>> +{
>> +	u32	version;	/* version number of thie structure */
>> +	u32	sequence;	/* sequence number of seqlock logic */
>> +	u32	enforcing;	/* current setting of enforcing mode */
>> +	u32	policyload;	/* times of policy reloaded */
>> +	u32	deny_unknown;	/* current setting of deny_unknown */
>> +	/*
>> +	 * The version>   0 supports above members.
>> +	 */
>> +} __attribute__((packed));
>> +
>> +extern void selinux_status_update_setenforce(int enforcing);
>> +extern void selinux_status_update_policyload(int seqno);
>> +
>>    #endif /* _SELINUX_SECURITY_H_ */
>>
>> diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
>> index 79a1bb6..a2e7a85 100644
>> --- a/security/selinux/selinuxfs.c
>> +++ b/security/selinux/selinuxfs.c
>> @@ -110,6 +110,7 @@ enum sel_inos {
>>    	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
>>    	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
>>    	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
>> +	SEL_STATUS,	/* export current status using mmap() */
>>    	SEL_INO_NEXT,	/* The next inode number to use */
>>    };
>>
>> @@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
>>    		if (selinux_enforcing)
>>    			avc_ss_reset(0);
>>    		selnl_notify_setenforce(selinux_enforcing);
>> +		selinux_status_update_setenforce(selinux_enforcing);
>>    	}
>>    	length = count;
>>    out:
>> @@ -205,6 +207,59 @@ static const struct file_operations sel_handle_unknown_ops = {
>>    	.llseek		= generic_file_llseek,
>>    };
>>
>> +static int sel_open_handle_status(struct inode *inode, struct file *filp)
>> +{
>> +	struct page    *status = selinux_kernel_status_page();
>> +
>> +	if (!status)
>> +		return -ENOMEM;
>> +
>> +	filp->private_data = status;
>> +
>> +	return 0;
>> +}
>> +
>> +static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
>> +				      size_t count, loff_t *ppos)
>> +{
>> +	struct page    *status = filp->private_data;
>> +
>> +	BUG_ON(!status);
>> +
>> +	return simple_read_from_buffer(buf, count, ppos,
>> +				       page_address(status),
>> +				       sizeof(struct selinux_kernel_status));
>> +}
>> +
>> +static int sel_mmap_handle_status(struct file *filp,
>> +				  struct vm_area_struct *vma)
>> +{
>> +	struct page    *status = filp->private_data;
>> +	unsigned long	size = vma->vm_end - vma->vm_start;
>> +
>> +	BUG_ON(!status);
>> +
>> +	/* only allows one page from the head */
>> +	if (vma->vm_pgoff>   0 || size != PAGE_SIZE)
>> +		return -EIO;
>> +	/* disallow writable mapping */
>> +	if (vma->vm_flags&   VM_WRITE)
>> +		return -EPERM;
>> +	/* disallow mprotect() turns it into writable */
>> +	vma->vm_flags&= ~VM_MAYWRITE;
>> +
>> +	return remap_pfn_range(vma, vma->vm_start,
>> +			       page_to_pfn(status),
>> +			       size, vma->vm_page_prot);
>> +}
>> +
>> +static const struct file_operations sel_handle_status_ops = {
>> +	.open		= sel_open_handle_status,
>> +	.read		= sel_read_handle_status,
>> +	.mmap		= sel_mmap_handle_status,
>> +	.llseek		= generic_file_llseek,
>> +};
>> +
>>    #ifdef CONFIG_SECURITY_SELINUX_DISABLE
>>    static ssize_t sel_write_disable(struct file *file, const char __user *buf,
>>    				 size_t count, loff_t *ppos)
>> @@ -1612,6 +1667,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
>>    		[SEL_CHECKREQPROT] = {"checkreqprot",&sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
>>    		[SEL_REJECT_UNKNOWN] = {"reject_unknown",&sel_handle_unknown_ops, S_IRUGO},
>>    		[SEL_DENY_UNKNOWN] = {"deny_unknown",&sel_handle_unknown_ops, S_IRUGO},
>> +		[SEL_STATUS] = {"status",&sel_handle_status_ops, S_IRUGO},
>>    		/* last one */ {""}
>>    	};
>>    	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
>> diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
>> index 15d4e62..974e11c 100644
>> --- a/security/selinux/ss/Makefile
>> +++ b/security/selinux/ss/Makefile
>> @@ -5,5 +5,5 @@
>>    EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
>>    obj-y := ss.o
>>
>> -ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
>> +ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o
>>
>> diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
>> index 9ea2fec..494ff52 100644
>> --- a/security/selinux/ss/services.c
>> +++ b/security/selinux/ss/services.c
>> @@ -1791,6 +1791,7 @@ int security_load_policy(void *data, size_t len)
>>    		selinux_complete_init();
>>    		avc_ss_reset(seqno);
>>    		selnl_notify_policyload(seqno);
>> +		selinux_status_update_policyload(seqno);
>>    		selinux_netlbl_cache_invalidate();
>>    		selinux_xfrm_notify_policyload();
>>    		return 0;
>> @@ -1870,6 +1871,7 @@ int security_load_policy(void *data, size_t len)
>>
>>    	avc_ss_reset(seqno);
>>    	selnl_notify_policyload(seqno);
>> +	selinux_status_update_policyload(seqno);
>>    	selinux_netlbl_cache_invalidate();
>>    	selinux_xfrm_notify_policyload();
>>
>> @@ -2374,6 +2376,7 @@ out:
>>    	if (!rc) {
>>    		avc_ss_reset(seqno);
>>    		selnl_notify_policyload(seqno);
>> +		selinux_status_update_policyload(seqno);
>>    		selinux_xfrm_notify_policyload();
>>    	}
>>    	return rc;
>> diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
>> new file mode 100644
>> index 0000000..eeab696
>> --- /dev/null
>> +++ b/security/selinux/ss/status.c
>> @@ -0,0 +1,135 @@
>> +/*
>> + * mmap based event notifications for SELinux
>> + *
>> + * Author: KaiGai Kohei<kaigai@ak.jp.nec.com>
>> + *
>> + * Copyright (C) 2010 NEC corporation
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2,
>> + * as published by the Free Software Foundation.
>> + */
>> +#include<linux/kernel.h>
>> +#include<linux/gfp.h>
>> +#include<linux/mm.h>
>> +#include<linux/mutex.h>
>> +#include "avc.h"
>> +#include "services.h"
>> +
>> +/*
>> + * The selinux_status_page shall be exposed to userspace applications
>> + * using mmap interface on /selinux/status.
>> + * It enables to notify applications a few events that will cause reset
>> + * of userspace access vector without context switching.
>> + *
>> + * The selinux_kernel_status structure on the head of status page is
>> + * protected from concurrent accesses using seqlock logic, so userspace
>> + * application should reference the status page according to the seqlock
>> + * logic.
>> + *
>> + * Typically, application checks status->sequence at the head of access
>> + * control routine. If it is odd-number, kernel is updating the status,
>> + * so please wait for a moment. If it is changed from the last sequence
>> + * number, it means something happen, so application will reset userspace
>> + * avc, if needed.
>> + * In addition, application should also checks the sequence number at
>> + * tail of the access control routine. If it is changed from the value
>> + * on the head, it means kernel status was changed under processing the
>> + * routine. In this case, application should repeat to run the routine
>> + * from head, but we expect it is much rare case.
>> + * In most case, application can confirm the kernel status is not changed
>> + * without any system call invocations.
>> + * Hopefully, libselinux encapsulates this logic.
>> + */
>> +static struct page *selinux_status_page = NULL;
>> +static DEFINE_MUTEX(selinux_status_lock);
>> +
>> +/*
>> + * selinux_kernel_status_page
>> + *
>> + * It returns a reference to selinux_status_page. If the status page is
>> + * not allocated yet, it also tries to allocate it at the first time.
>> + */
>> +struct page *selinux_kernel_status_page(void)
>> +{
>> +	struct selinux_kernel_status   *status;
>> +	struct page		       *result = NULL;
>> +
>> +	mutex_lock(&selinux_status_lock);
>> +	if (!selinux_status_page)
>> +	{
>> +		selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
>> +		if (selinux_status_page)
>> +		{
>> +			status = page_address(selinux_status_page);
>> +
>> +			status->version = SELINUX_KERNEL_STATUS_VERSION;
>> +			status->sequence = 0;
>> +			status->enforcing = selinux_enforcing;
>> +			/*
>> +			 * NOTE: the next policyload event shall set
>> +			 * a positive value on the status->policyload,
>> +			 * although it may not be 1, but never zero.
>> +			 * So, application can know it was updated.
>> +			 */
>> +			status->policyload = 0;
>> +			status->deny_unknown = !security_get_allow_unknown();
>> +		}
>> +	}
>> +	result = selinux_status_page;
>> +	mutex_unlock(&selinux_status_lock);
>> +
>> +	return result;
>> +}
>> +
>> +/*
>> + * selinux_status_update_setenforce
>> + *
>> + * It updates status of the current enforcing/permissive mode.
>> + */
>> +void selinux_status_update_setenforce(int enforcing)
>> +{
>> +	struct selinux_kernel_status   *status;
>> +
>> +	mutex_lock(&selinux_status_lock);
>> +	if (selinux_status_page)
>> +	{
>> +		status = page_address(selinux_status_page);
>> +
>> +		status->sequence++;
>> +		smp_wmb();
>> +
>> +		status->enforcing = enforcing;
>> +
>> +		smp_wmb();
>> +		status->sequence++;
>> +	}
>> +	mutex_unlock(&selinux_status_lock);
>> +}
>> +
>> +/*
>> + * selinux_status_update_policyload
>> + *
>> + * It updates status of the times of policy reloaded, and current
>> + * setting of deny_unknown.
>> + */
>> +void selinux_status_update_policyload(int seqno)
>> +{
>> +	struct selinux_kernel_status   *status;
>> +
>> +	mutex_lock(&selinux_status_lock);
>> +	if (selinux_status_page)
>> +	{
>> +		status = page_address(selinux_status_page);
>> +
>> +		status->sequence++;
>> +		smp_wmb();
>> +
>> +		status->policyload = seqno;
>> +		status->deny_unknown = !security_get_allow_unknown();
>> +
>> +		smp_wmb();
>> +		status->sequence++;
>> +	}
>> +	mutex_unlock(&selinux_status_lock);
>> +}
>>
> 
> 


-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

[-- Attachment #2: status_test.c --]
[-- Type: text/plain, Size: 2758 bytes --]

#include <errno.h>
#include <fcntl.h>
#include <selinux/selinux.h>
#include <selinux/avc.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

#define N_TRIAL		10000000

typedef struct
{
	uint32_t	version;
	uint32_t	sequence;
	uint32_t	enforcing;
	uint32_t	policyload;
	uint32_t	deny_unknown;
} selinux_status_t;

static uint32_t		compat_seqno = 0;
static int test_callback(int code)
{
	compat_seqno++;

	return 0;
}

int main(int argc, const char *argv[])
{
	selinux_status_t       *status;
	security_context_t	scontext = "staff_u:staff_r:staff_t:s0";
	security_context_t	tcontext = "system_u:object_r:etc_t:s0";
	security_id_t		ssid, tsid;
	security_class_t	tclass;
	struct av_decision	avd;
	int			compat, fd, i;

	if (argc < 2)
	{
		printf("usage: %s (1: compat| 0: mmap)\n", argv[0]);
		return 1;
	}
	compat = atoi(argv[1]);

	if (avc_open(NULL, 0) < 0)
	{
		printf("failed on avc_open() : %s\n", strerror(errno));
		return 1;
	}

	/*
	 * I think existing avc code is a bit rough, because it checks
	 * netlink socket on the head of avc_has_perm_noaudit(), but
	 * not on the tail of the function. It cannot ensure the kernel
	 * status is not updated during the function.
	 * So, we also need to check it after the avc lookups, then
	 * it shall repeat from the head, if something updated.
	 */
	avc_netlink_acquire_fd();

	if (compat)
	{
		union selinux_callback	cb;
		/*
		 * callback functions to update sequence number
		 */
		cb.func_setenforce = test_callback;
		selinux_set_callback(SELINUX_CB_SETENFORCE, cb);
		cb.func_policyload = test_callback;
		selinux_set_callback(SELINUX_CB_POLICYLOAD, cb);
	}
	else
	{
		/*
		 * Open /selinux/status and mmap it.
		 */
		fd = open("/selinux/status", O_RDONLY);
		if (fd < 0)
		{
			printf("failed to open /selinux/status : %s\n",
			       strerror(errno));
			return 1;
		}
		status = mmap(NULL, sysconf(_SC_PAGESIZE),
			      PROT_READ, MAP_SHARED, fd, 0);
		if (status == MAP_FAILED)
		{
			printf("failed to mmap : %s\n", strerror(errno));
			return 1;
		}
	}

	/*
	 * Start benchmarking
	 */
	for (i = 0; i < N_TRIAL; i++)
	{
		uint32_t	seqno;
	repeat:
		if (compat)
		{
			avc_netlink_check_nb();
			seqno = compat_seqno;
		}
		else
		{
			seqno = status->sequence;
			__sync_synchronize();
		}
		tclass = string_to_security_class("file");
		avc_context_to_sid_raw(scontext, &ssid);
		avc_context_to_sid_raw(tcontext, &tsid);

		avc_has_perm_noaudit(ssid, tsid, tclass, -1, NULL, &avd);

		if (compat)
		{
			avc_netlink_check_nb();
			if (seqno != compat_seqno)
				goto repeat;
		}
		else
		{
			__sync_synchronize();
			if (seqno != status->sequence)
				goto repeat;
		}
	}
}

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-02  8:16           ` KaiGai Kohei
  2010-09-07  0:03             ` KaiGai Kohei
@ 2010-09-13 20:45             ` Eric Paris
  2010-09-14  9:28               ` KaiGai Kohei
  2010-09-14  9:31               ` KaiGai Kohei
  1 sibling, 2 replies; 18+ messages in thread
From: Eric Paris @ 2010-09-13 20:45 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: KaiGai Kohei, selinux, ewalsh

On Thu, 2010-09-02 at 17:16 +0900, KaiGai Kohei wrote:
> (2010/08/28 12:24), KaiGai Kohei wrote:
> > (2010/08/28 1:19), Eric Paris wrote:
> >> On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris<eparis@parisplace.org>   wrote:
> >>> 2010/8/27 KaiGai Kohei<kaigai@ak.jp.nec.com>:
> >>>> I revised the /selinux/status implementation.
> >>>>
> >>>> * It becomes to report 'deny_unknown'. Userspace object manager
> >>>>    also reference this flag to decide its behavior when the loaded
> >>>>    policy does not support expected object classes.
> >>>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
> >>>>    flag independent from argument of mmap(2), but it was uncommon.
> >>>>    I fixed to pass vma->vm_page_prot instead of the hardwired flag
> >>>>    according to any other implementation style.
> >>>>    Now it returns an error, if user tries to map /selinux/status as
> >>>>    writable pages.
> >>>
> >>> I really hate blowing 4k of memory on every system to show 40 bytes of
> >>> data on just a few systems.  Is there any change we could allocate the
> >>> page the first time it is needed rather that at boot?  I know compared
> >>> to the size of policy and other memory usage in SELinux it's odd for
> >>> me to complain, but I've decided to get on a reduction if possible
> >>> kick.
> >>>
> >>> Only other comment is that __initcall() is deprecated and we are
> >>> supposed to use device_initcall() now.
> >>>
> >>> If you plan to use it, I'll ack if you change both of those things....
> >>
> >> actually if you move to dynamic allocation of the status page and use
> >> static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
> >> the __init() code altogether....
> >>
> > 
> > I revised the patch.
> > It was changed the selinux_kernel_page being allocated at the first time
> > when application tries to reference the /selinux/status.
> > At the same time, it declares selinux_status_lock using DEFINE_MUTEX(),
> > so whole of the __init section has gone.
> > 
> > In addition, I changed first member of the selinux_kernel_status from
> > 'length' to 'version', because sizeof(struct ...) is aligned to 64bit
> > boundary (24bytes) on x86_64 system, although it is actually 20bytes.
> > If we want to add a 32bit member in the future, 'length' may not inform
> > applications enough.
> > 
> How about getting the feature?
> Although I've not found out this idea for a long time, it is quite helpful
> feature to implement SE-PostgreSQL (and other upcoming userspace object
> managers) in less invasive way.
> 
> I fixed up two minor points in the patch, as follows:
> * The 4K of status page becomes allocated at the file_operations::open()
>   method, because it seems to me a bit unnatural that either read() or
>   mmap() fails due to memory allocation error.
> * I forgot to eliminate an unnecessary declaration of extern variable.
> 
>  Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>

Sorry I was on vacation for the last 2 weeks.  I'm happy with it so:

Acked-by: Eric Paris <eparis@redhat.com>

As to one comment in the code:

+ * In addition, application should also checks the sequence number at
+ * tail of the access control routine. If it is changed from the value
+ * on the head, it means kernel status was changed under processing the
+ * routine. In this case, application should repeat to run the routine
+ * from head, but we expect it is much rare case.

Is this just to eliminate the race where:

userspace checks seqno
				kernel loads new policy
userspace avc responds to request

but the response 'should' have been different thanks to the policy load
that the userspace object manager didn't hear?  I claim there is no race
here, since he request had to be made before the policy load, even if
the userspace AVC didn't respond until after the load.

I just don't see where closing that 'race' actually improves security.
The application is going to actually do the privileged operation some
time after the access check.  So what we have today seems just as good
when we consider the set of operations

userspace checks seqno
userspace avc respond to request
userspace checks seqno
				kernel loads new policy
userspace actually performs priv operation.

Doesn't change the patch at all, I just think it makes your test case
looks a lot worse than it needs to.

I wonder what your results are when you use the separate thread method
which doesn't have anything expect 1,000,000 calls to avc_has_perm().
Doesn't matter just me wondering out loud and wondering if there is any
reason to use the separate thread implementation if this is just about
as fast.

-Eric



--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-13 20:45             ` Eric Paris
@ 2010-09-14  9:28               ` KaiGai Kohei
  2010-09-14 13:25                 ` Eric Paris
                                   ` (2 more replies)
  2010-09-14  9:31               ` KaiGai Kohei
  1 sibling, 3 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-09-14  9:28 UTC (permalink / raw)
  To: Eric Paris; +Cc: KaiGai Kohei, selinux, ewalsh

(2010/09/14 5:45), Eric Paris wrote:
> Sorry I was on vacation for the last 2 weeks.  I'm happy with it so:
> 
> Acked-by: Eric Paris<eparis@redhat.com>
> 
> As to one comment in the code:
> 
> + * In addition, application should also checks the sequence number at
> + * tail of the access control routine. If it is changed from the value
> + * on the head, it means kernel status was changed under processing the
> + * routine. In this case, application should repeat to run the routine
> + * from head, but we expect it is much rare case.
> 
Thanks for your comments.

I'll introduce in the next message why I considered application 'should'
check the sequence number both of the head and tail of access control
routine.
However, this source code comments might be a bit imperative, apart from
whether here is actually a race, or not. So, I fixed up the comment as
follows, and rest of the part is unchanged.

@@ -203,14 +203,8 @@ index 0000000..eeab696
 + * so please wait for a moment. If it is changed from the last sequence
 + * number, it means something happen, so application will reset userspace
 + * avc, if needed.
-+ * In addition, application should also checks the sequence number at
-+ * tail of the access control routine. If it is changed from the value
-+ * on the head, it means kernel status was changed under processing the
-+ * routine. In this case, application should repeat to run the routine
-+ * from head, but we expect it is much rare case.
-+ * In most case, application can confirm the kernel status is not changed
-+ * without any system call invocations.
-+ * Hopefully, libselinux encapsulates this logic.
++ * In most cases, application shall confirm the kernel status is not
++ * changed without any system call invocations.
 + */
 +static struct page *selinux_status_page = NULL;
 +static DEFINE_MUTEX(selinux_status_lock);

<--------------------------------( cut here )-------------------------------->

This patch provides a new /selinux/status entry which allows applications
read-only mmap(2).
This region reflects selinux_kernel_status structure in kernel space.
  struct selinux_kernel_status
  {
          u32     length;         /* length of this structure */
          u32     sequence;       /* sequence number of seqlock logic */
          u32     enforcing;      /* current setting of enforcing mode */
          u32     policyload;     /* times of policy reloaded */
          u32     deny_unknown;   /* current setting of deny_unknown */
  };

When userspace object manager caches access control decisions provided
by SELinux, it needs to invalidate the cache on policy reload and setenforce
to keep consistency.
However, the applications need to check the kernel state for each accesses
on userspace avc, or launch a background worker process.
In heuristic, frequency of invalidation is much less than frequency of
making access control decision, so it is annoying to invoke a system call
to check we don't need to invalidate the userspace cache.
If we can use a background worker thread, it allows to receive invalidation
messages from the kernel. But it requires us an invasive coding toward the
base application in some cases; E.g, when we provide a feature performing
with SELinux as a plugin module, it is unwelcome manner to launch its own
worker thread from the module.

If we could map /selinux/status to process memory space, application can
know updates of selinux status; policy reload or setenforce.

A typical application checks selinux_kernel_status::sequence when it tries
to reference userspace avc. If it was changed from the last time when it
checked userspace avc, it means something was updated in the kernel space.
Then, the application can reset userspace avc or update current enforcing
mode, without any system call invocations.
This sequence number is updated according to the seqlock logic, so we need
to wait for a while if it is odd number.

Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
--
 security/selinux/include/security.h |   21 ++++++
 security/selinux/selinuxfs.c        |   56 +++++++++++++++
 security/selinux/ss/Makefile        |    2 +-
 security/selinux/ss/services.c      |    3 +
 security/selinux/ss/status.c        |  129 +++++++++++++++++++++++++++++++++++
 5 files changed, 210 insertions(+), 1 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 1f7c249..e390e31 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -191,5 +191,26 @@ static inline int security_netlbl_sid_to_secattr(u32 sid,

 const char *security_get_initial_sid_context(u32 sid);

+/*
+ * status notifier using mmap interface
+ */
+extern struct page *selinux_kernel_status_page(void);
+
+#define SELINUX_KERNEL_STATUS_VERSION	1
+struct selinux_kernel_status
+{
+	u32	version;	/* version number of thie structure */
+	u32	sequence;	/* sequence number of seqlock logic */
+	u32	enforcing;	/* current setting of enforcing mode */
+	u32	policyload;	/* times of policy reloaded */
+	u32	deny_unknown;	/* current setting of deny_unknown */
+	/*
+	 * The version > 0 supports above members.
+	 */
+} __attribute__((packed));
+
+extern void selinux_status_update_setenforce(int enforcing);
+extern void selinux_status_update_policyload(int seqno);
+
 #endif /* _SELINUX_SECURITY_H_ */

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 79a1bb6..a2e7a85 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -110,6 +110,7 @@ enum sel_inos {
 	SEL_COMPAT_NET,	/* whether to use old compat network packet controls */
 	SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
 	SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
+	SEL_STATUS,	/* export current status using mmap() */
 	SEL_INO_NEXT,	/* The next inode number to use */
 };

@@ -171,6 +172,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 		if (selinux_enforcing)
 			avc_ss_reset(0);
 		selnl_notify_setenforce(selinux_enforcing);
+		selinux_status_update_setenforce(selinux_enforcing);
 	}
 	length = count;
 out:
@@ -205,6 +207,59 @@ static const struct file_operations sel_handle_unknown_ops = {
 	.llseek		= generic_file_llseek,
 };

+static int sel_open_handle_status(struct inode *inode, struct file *filp)
+{
+	struct page    *status = selinux_kernel_status_page();
+
+	if (!status)
+		return -ENOMEM;
+
+	filp->private_data = status;
+
+	return 0;
+}
+
+static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct page    *status = filp->private_data;
+
+	BUG_ON(!status);
+
+	return simple_read_from_buffer(buf, count, ppos,
+				       page_address(status),
+				       sizeof(struct selinux_kernel_status));
+}
+
+static int sel_mmap_handle_status(struct file *filp,
+				  struct vm_area_struct *vma)
+{
+	struct page    *status = filp->private_data;
+	unsigned long	size = vma->vm_end - vma->vm_start;
+
+	BUG_ON(!status);
+
+	/* only allows one page from the head */
+	if (vma->vm_pgoff > 0 || size != PAGE_SIZE)
+		return -EIO;
+	/* disallow writable mapping */
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+	/* disallow mprotect() turns it into writable */
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	return remap_pfn_range(vma, vma->vm_start,
+			       page_to_pfn(status),
+			       size, vma->vm_page_prot);
+}
+
+static const struct file_operations sel_handle_status_ops = {
+	.open		= sel_open_handle_status,
+	.read		= sel_read_handle_status,
+	.mmap		= sel_mmap_handle_status,
+	.llseek		= generic_file_llseek,
+};
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 static ssize_t sel_write_disable(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -1612,6 +1667,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent)
 		[SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
 		[SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO},
 		[SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO},
+		[SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO},
 		/* last one */ {""}
 	};
 	ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile
index 15d4e62..974e11c 100644
--- a/security/selinux/ss/Makefile
+++ b/security/selinux/ss/Makefile
@@ -5,5 +5,5 @@
 EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include
 obj-y := ss.o

-ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o
+ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o status.o

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 9ea2fec..494ff52 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1791,6 +1791,7 @@ int security_load_policy(void *data, size_t len)
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_netlbl_cache_invalidate();
 		selinux_xfrm_notify_policyload();
 		return 0;
@@ -1870,6 +1871,7 @@ int security_load_policy(void *data, size_t len)

 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_status_update_policyload(seqno);
 	selinux_netlbl_cache_invalidate();
 	selinux_xfrm_notify_policyload();

@@ -2374,6 +2376,7 @@ out:
 	if (!rc) {
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_status_update_policyload(seqno);
 		selinux_xfrm_notify_policyload();
 	}
 	return rc;
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c
new file mode 100644
index 0000000..5d9b225
--- /dev/null
+++ b/security/selinux/ss/status.c
@@ -0,0 +1,129 @@
+/*
+ * mmap based event notifications for SELinux
+ *
+ * Author: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ * Copyright (C) 2010 NEC corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include "avc.h"
+#include "services.h"
+
+/*
+ * The selinux_status_page shall be exposed to userspace applications
+ * using mmap interface on /selinux/status.
+ * It enables to notify applications a few events that will cause reset
+ * of userspace access vector without context switching.
+ *
+ * The selinux_kernel_status structure on the head of status page is
+ * protected from concurrent accesses using seqlock logic, so userspace
+ * application should reference the status page according to the seqlock
+ * logic.
+ *
+ * Typically, application checks status->sequence at the head of access
+ * control routine. If it is odd-number, kernel is updating the status,
+ * so please wait for a moment. If it is changed from the last sequence
+ * number, it means something happen, so application will reset userspace
+ * avc, if needed.
+ * In most cases, application shall confirm the kernel status is not
+ * changed without any system call invocations.
+ */
+static struct page *selinux_status_page = NULL;
+static DEFINE_MUTEX(selinux_status_lock);
+
+/*
+ * selinux_kernel_status_page
+ *
+ * It returns a reference to selinux_status_page. If the status page is
+ * not allocated yet, it also tries to allocate it at the first time.
+ */
+struct page *selinux_kernel_status_page(void)
+{
+	struct selinux_kernel_status   *status;
+	struct page		       *result = NULL;
+
+	mutex_lock(&selinux_status_lock);
+	if (!selinux_status_page)
+	{
+		selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (selinux_status_page)
+		{
+			status = page_address(selinux_status_page);
+
+			status->version = SELINUX_KERNEL_STATUS_VERSION;
+			status->sequence = 0;
+			status->enforcing = selinux_enforcing;
+			/*
+			 * NOTE: the next policyload event shall set
+			 * a positive value on the status->policyload,
+			 * although it may not be 1, but never zero.
+			 * So, application can know it was updated.
+			 */
+			status->policyload = 0;
+			status->deny_unknown = !security_get_allow_unknown();
+		}
+	}
+	result = selinux_status_page;
+	mutex_unlock(&selinux_status_lock);
+
+	return result;
+}
+
+/*
+ * selinux_status_update_setenforce
+ *
+ * It updates status of the current enforcing/permissive mode.
+ */
+void selinux_status_update_setenforce(int enforcing)
+{
+	struct selinux_kernel_status   *status;
+
+	mutex_lock(&selinux_status_lock);
+	if (selinux_status_page)
+	{
+		status = page_address(selinux_status_page);
+
+		status->sequence++;
+		smp_wmb();
+
+		status->enforcing = enforcing;
+
+		smp_wmb();
+		status->sequence++;
+	}
+	mutex_unlock(&selinux_status_lock);
+}
+
+/*
+ * selinux_status_update_policyload
+ *
+ * It updates status of the times of policy reloaded, and current
+ * setting of deny_unknown.
+ */
+void selinux_status_update_policyload(int seqno)
+{
+	struct selinux_kernel_status   *status;
+
+	mutex_lock(&selinux_status_lock);
+	if (selinux_status_page)
+	{
+		status = page_address(selinux_status_page);
+
+		status->sequence++;
+		smp_wmb();
+
+		status->policyload = seqno;
+		status->deny_unknown = !security_get_allow_unknown();
+
+		smp_wmb();
+		status->sequence++;
+	}
+	mutex_unlock(&selinux_status_lock);
+}

-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-13 20:45             ` Eric Paris
  2010-09-14  9:28               ` KaiGai Kohei
@ 2010-09-14  9:31               ` KaiGai Kohei
  1 sibling, 0 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-09-14  9:31 UTC (permalink / raw)
  To: Eric Paris; +Cc: KaiGai Kohei, selinux, ewalsh

(2010/09/14 5:45), Eric Paris wrote:
> On Thu, 2010-09-02 at 17:16 +0900, KaiGai Kohei wrote:
>> (2010/08/28 12:24), KaiGai Kohei wrote:
>>> (2010/08/28 1:19), Eric Paris wrote:
>>>> On Fri, Aug 27, 2010 at 11:48 AM, Eric Paris<eparis@parisplace.org>    wrote:
>>>>> 2010/8/27 KaiGai Kohei<kaigai@ak.jp.nec.com>:
>>>>>> I revised the /selinux/status implementation.
>>>>>>
>>>>>> * It becomes to report 'deny_unknown'. Userspace object manager
>>>>>>     also reference this flag to decide its behavior when the loaded
>>>>>>     policy does not support expected object classes.
>>>>>> * It provided PAGE_READONLY to remap_pfn_range() as page protection
>>>>>>     flag independent from argument of mmap(2), but it was uncommon.
>>>>>>     I fixed to pass vma->vm_page_prot instead of the hardwired flag
>>>>>>     according to any other implementation style.
>>>>>>     Now it returns an error, if user tries to map /selinux/status as
>>>>>>     writable pages.
>>>>>
>>>>> I really hate blowing 4k of memory on every system to show 40 bytes of
>>>>> data on just a few systems.  Is there any change we could allocate the
>>>>> page the first time it is needed rather that at boot?  I know compared
>>>>> to the size of policy and other memory usage in SELinux it's odd for
>>>>> me to complain, but I've decided to get on a reduction if possible
>>>>> kick.
>>>>>
>>>>> Only other comment is that __initcall() is deprecated and we are
>>>>> supposed to use device_initcall() now.
>>>>>
>>>>> If you plan to use it, I'll ack if you change both of those things....
>>>>
>>>> actually if you move to dynamic allocation of the status page and use
>>>> static DEFINE_SPINLOCK instead of static spinlock_t you can get rid of
>>>> the __init() code altogether....
>>>>
>>>
>>> I revised the patch.
>>> It was changed the selinux_kernel_page being allocated at the first time
>>> when application tries to reference the /selinux/status.
>>> At the same time, it declares selinux_status_lock using DEFINE_MUTEX(),
>>> so whole of the __init section has gone.
>>>
>>> In addition, I changed first member of the selinux_kernel_status from
>>> 'length' to 'version', because sizeof(struct ...) is aligned to 64bit
>>> boundary (24bytes) on x86_64 system, although it is actually 20bytes.
>>> If we want to add a 32bit member in the future, 'length' may not inform
>>> applications enough.
>>>
>> How about getting the feature?
>> Although I've not found out this idea for a long time, it is quite helpful
>> feature to implement SE-PostgreSQL (and other upcoming userspace object
>> managers) in less invasive way.
>>
>> I fixed up two minor points in the patch, as follows:
>> * The 4K of status page becomes allocated at the file_operations::open()
>>    method, because it seems to me a bit unnatural that either read() or
>>    mmap() fails due to memory allocation error.
>> * I forgot to eliminate an unnecessary declaration of extern variable.
>>
>>   Signed-off-by: KaiGai Kohei<kaigai@ak.jp.nec.com>
> 
> Sorry I was on vacation for the last 2 weeks.  I'm happy with it so:
> 
> Acked-by: Eric Paris<eparis@redhat.com>
> 
> As to one comment in the code:
> 
> + * In addition, application should also checks the sequence number at
> + * tail of the access control routine. If it is changed from the value
> + * on the head, it means kernel status was changed under processing the
> + * routine. In this case, application should repeat to run the routine
> + * from head, but we expect it is much rare case.
> 
> Is this just to eliminate the race where:
> 
> userspace checks seqno
> 				kernel loads new policy
> userspace avc responds to request
> 
> but the response 'should' have been different thanks to the policy load
> that the userspace object manager didn't hear?  I claim there is no race
> here, since he request had to be made before the policy load, even if
> the userspace AVC didn't respond until after the load.
> 
The reason why I considered that we should check the seqno at the tail
of the access control routine is avc_has_perm() takes tclass as
security_class_t, and requested as access_vector_t which hold numbers
depending on the current policy.

Unlike earlier version of selinux, the code of object classes and
access vectors are not hard-wired; it enables to develop kernel code
and security policy individually, so good.

On the other hand, when and if application queries to selinux, it needs
to translate name of object class and access vectores, as follows:

1: tclass = string_to_security_class("file")
2: requested = string_to_av_perm(tclass, "read");
3: avc_has_perm(..., tclass, requested, ...);

Although it is a quite rare event, reloading new policy has a possibility
to change the code of object classes and access vectors.
So, we need to ensure the step 1-3 being handled atomically from the policy
reloading.

Apart from whether we should describe this section in the kernel source,
it seems to me a scenario that we need to pay an attention.

> I just don't see where closing that 'race' actually improves security.
> The application is going to actually do the privileged operation some
> time after the access check.  So what we have today seems just as good
> when we consider the set of operations
> 
> userspace checks seqno
> userspace avc respond to request
> userspace checks seqno
> 				kernel loads new policy
> userspace actually performs priv operation.
> 
I don't think it is the 'race' to be fixed up.
This pseudo code makes access control decision in atomic.
No difference with any other kernel code which performs priv operations
after the read_unlock(&policy_rwlock) :-)

> Doesn't change the patch at all, I just think it makes your test case
> looks a lot worse than it needs to.
> 
> I wonder what your results are when you use the separate thread method
> which doesn't have anything expect 1,000,000 calls to avc_has_perm().
> Doesn't matter just me wondering out loud and wondering if there is any
> reason to use the separate thread implementation if this is just about
> as fast.
> 
In the situation that we can use the separate thread, I expect here is
no significant differences between mmap()'ing and threading.
However, in some cases, it is an invasive style to launch a worker thread
from plugin modules. And right now, I'm under development of SE-PostgreSQL
and memcached with selinux as plugin modules.

Thanks,
-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-14  9:28               ` KaiGai Kohei
@ 2010-09-14 13:25                 ` Eric Paris
  2010-09-14 21:48                 ` James Morris
  2010-09-14 22:11                 ` James Morris
  2 siblings, 0 replies; 18+ messages in thread
From: Eric Paris @ 2010-09-14 13:25 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: KaiGai Kohei, selinux, ewalsh, jmorris

On Tue, 2010-09-14 at 18:28 +0900, KaiGai Kohei wrote:

> <--------------------------------( cut here )-------------------------------->
> 
> This patch provides a new /selinux/status entry which allows applications
> read-only mmap(2).
> This region reflects selinux_kernel_status structure in kernel space.
>   struct selinux_kernel_status
>   {
>           u32     length;         /* length of this structure */
>           u32     sequence;       /* sequence number of seqlock logic */
>           u32     enforcing;      /* current setting of enforcing mode */
>           u32     policyload;     /* times of policy reloaded */
>           u32     deny_unknown;   /* current setting of deny_unknown */
>   };
> 
> When userspace object manager caches access control decisions provided
> by SELinux, it needs to invalidate the cache on policy reload and setenforce
> to keep consistency.
> However, the applications need to check the kernel state for each accesses
> on userspace avc, or launch a background worker process.
> In heuristic, frequency of invalidation is much less than frequency of
> making access control decision, so it is annoying to invoke a system call
> to check we don't need to invalidate the userspace cache.
> If we can use a background worker thread, it allows to receive invalidation
> messages from the kernel. But it requires us an invasive coding toward the
> base application in some cases; E.g, when we provide a feature performing
> with SELinux as a plugin module, it is unwelcome manner to launch its own
> worker thread from the module.
> 
> If we could map /selinux/status to process memory space, application can
> know updates of selinux status; policy reload or setenforce.
> 
> A typical application checks selinux_kernel_status::sequence when it tries
> to reference userspace avc. If it was changed from the last time when it
> checked userspace avc, it means something was updated in the kernel space.
> Then, the application can reset userspace avc or update current enforcing
> mode, without any system call invocations.
> This sequence number is updated according to the seqlock logic, so we need
> to wait for a while if it is odd number.
> 
> Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>

Acked-by: Eric Paris <eparis@redhat.com>


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-14  9:28               ` KaiGai Kohei
  2010-09-14 13:25                 ` Eric Paris
@ 2010-09-14 21:48                 ` James Morris
  2010-09-14 21:51                   ` James Morris
  2010-09-14 22:11                 ` James Morris
  2 siblings, 1 reply; 18+ messages in thread
From: James Morris @ 2010-09-14 21:48 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: Eric Paris, KaiGai Kohei, selinux, ewalsh

On Tue, 14 Sep 2010, KaiGai Kohei wrote:

> <--------------------------------( cut here )-------------------------------->
> 

Please send updated patches as new messages, so that I don't accidentally 
apply an older version.

-- 
James Morris
<jmorris@namei.org>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-14 21:48                 ` James Morris
@ 2010-09-14 21:51                   ` James Morris
  2010-09-15  2:31                     ` KaiGai Kohei
  0 siblings, 1 reply; 18+ messages in thread
From: James Morris @ 2010-09-14 21:51 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: Eric Paris, KaiGai Kohei, selinux, ewalsh

On Wed, 15 Sep 2010, James Morris wrote:

> On Tue, 14 Sep 2010, KaiGai Kohei wrote:
> 
> > <--------------------------------( cut here )-------------------------------->
> > 
> 
> Please send updated patches as new messages, so that I don't accidentally 
> apply an older version.

Also, the subject line is not correct for the git logs.

It needs to be:

[PATCH] subsystem: title

(See section 15 of Documentation/SubmittingPatches)






-- 
James Morris
<jmorris@namei.org>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-14  9:28               ` KaiGai Kohei
  2010-09-14 13:25                 ` Eric Paris
  2010-09-14 21:48                 ` James Morris
@ 2010-09-14 22:11                 ` James Morris
  2 siblings, 0 replies; 18+ messages in thread
From: James Morris @ 2010-09-14 22:11 UTC (permalink / raw)
  To: KaiGai Kohei; +Cc: Eric Paris, KaiGai Kohei, selinux, ewalsh

On Tue, 14 Sep 2010, KaiGai Kohei wrote:

> 
> <--------------------------------( cut here )-------------------------------->


Applied to
git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6#next

-- 
James Morris
<jmorris@namei.org>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] Fast status update interface (/selinux/status)
  2010-09-14 21:51                   ` James Morris
@ 2010-09-15  2:31                     ` KaiGai Kohei
  0 siblings, 0 replies; 18+ messages in thread
From: KaiGai Kohei @ 2010-09-15  2:31 UTC (permalink / raw)
  To: James Morris; +Cc: Eric Paris, KaiGai Kohei, selinux, ewalsh

(2010/09/15 6:51), James Morris wrote:
> On Wed, 15 Sep 2010, James Morris wrote:
> 
>> On Tue, 14 Sep 2010, KaiGai Kohei wrote:
>>
>>> <--------------------------------( cut here )-------------------------------->
>>>
>>
>> Please send updated patches as new messages, so that I don't accidentally
>> apply an older version.
> 
> Also, the subject line is not correct for the git logs.
> 
> It needs to be:
> 
> [PATCH] subsystem: title
> 
> (See section 15 of Documentation/SubmittingPatches)
> 
Sorry, I'll pay attention when I submit a patch in the next time.

Thanks,
-- 
KaiGai Kohei <kaigai@ak.jp.nec.com>

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2010-09-15  2:31 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-26 10:43 [PATCH] Fast status update interface (/selinux/status) KaiGai Kohei
2010-08-26 10:53 ` KaiGai Kohei
2010-08-26 23:50 ` KaiGai Kohei
2010-08-27  8:37   ` KaiGai Kohei
2010-08-27 15:48     ` Eric Paris
2010-08-27 16:19       ` Eric Paris
2010-08-28  3:24         ` KaiGai Kohei
2010-09-02  8:16           ` KaiGai Kohei
2010-09-07  0:03             ` KaiGai Kohei
2010-09-10  1:16               ` KaiGai Kohei
2010-09-13 20:45             ` Eric Paris
2010-09-14  9:28               ` KaiGai Kohei
2010-09-14 13:25                 ` Eric Paris
2010-09-14 21:48                 ` James Morris
2010-09-14 21:51                   ` James Morris
2010-09-15  2:31                     ` KaiGai Kohei
2010-09-14 22:11                 ` James Morris
2010-09-14  9:31               ` KaiGai Kohei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.