All of lore.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>,
	linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Greg KH <gregkh@linuxfoundation.org>,
	Andrei Vagin <avagin@gmail.com>,
	Pavel Tikhomirov <ptikhomirov@virtuozzo.com>,
	Vasily Averin <vvs@virtuozzo.com>,
	Alexander Mikhalitsyn <alexander@mihalicyn.com>,
	stable@vger.kernel.org
Subject: [RFC] shm: extend forced shm destroy to support objects from several IPC nses (simplified)
Date: Fri, 05 Nov 2021 16:34:38 -0500	[thread overview]
Message-ID: <87lf22qob5.fsf_-_@disp2133> (raw)
In-Reply-To: <61ca7331-4a86-2bf6-9ccb-50f6a7824e12@colorfullife.com> (Manfred Spraul's message of "Fri, 5 Nov 2021 20:03:21 +0100")


I have to dash so this is short.

This is what I am thinking this change should look like.

I am not certain this is truly reviewable as a single change, so I will
break it into a couple of smaller ones next time I get the chance.

Eric

 include/linux/ipc_namespace.h |  12 ++++
 include/linux/sched/task.h    |   2 +-
 ipc/shm.c                     | 135 +++++++++++++++++++++++++-----------------
 ipc/util.c                    |   5 ++
 ipc/util.h                    |   1 +
 kernel/fork.c                 |   1 -
 6 files changed, 100 insertions(+), 56 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 05e22770af51..c220767a0cc1 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -131,6 +131,13 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	if (ns && refcount_inc_not_zero(&ns->ns.count))
+		return ns;
+	return NULL;
+}
+
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -147,6 +154,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	return ns;
+}
+
 static inline void put_ipc_ns(struct ipc_namespace *ns)
 {
 }
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ef02be869cf2..1d9533d66f7e 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -157,7 +157,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
+ * ->cgroup.subsys[]. And ->vfork_done. And ->shmvshm.shm_clist.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/ipc/shm.c b/ipc/shm.c
index ab749be6d8b7..80e3595d3a69 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -63,8 +63,9 @@ struct shmid_kernel /* private to the kernel */
 	struct ucounts		*mlock_ucounts;
 
 	/* The task created the shm object.  NULL if the task is dead. */
-	struct task_struct	*shm_creator;
+	struct task_struct __rcu *shm_creator;
 	struct list_head	shm_clist;	/* list by creator */
+	struct ipc_namespace	*shm_ns;	/* valid when shm_nattch != 0 */
 } __randomize_layout;
 
 /* shm_mode upper byte flags */
@@ -106,29 +107,17 @@ void shm_init_ns(struct ipc_namespace *ns)
 	ipc_init_ids(&shm_ids(ns));
 }
 
-/*
- * Called with shm_ids.rwsem (writer) and the shp structure locked.
- * Only shm_ids.rwsem remains locked on exit.
- */
-static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
+static void do_shm_destroy(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
-	struct shmid_kernel *shp;
-
-	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
-	if (shp->shm_nattch) {
-		shp->shm_perm.mode |= SHM_DEST;
-		/* Do not find it any more */
-		ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
-		shm_unlock(shp);
-	} else
-		shm_destroy(ns, shp);
+	struct shmid_kernel *shp =
+		container_of(ipcp, struct shmid_kernel, shm_perm);
+	shm_destroy(ns, shp);
 }
 
 #ifdef CONFIG_IPC_NS
 void shm_exit_ns(struct ipc_namespace *ns)
 {
-	free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
+	free_ipcs(ns, &shm_ids(ns), do_shm_destroy);
 	idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
 }
@@ -225,9 +214,22 @@ static void shm_rcu_free(struct rcu_head *head)
 	kfree(shp);
 }
 
+static inline void shm_clist_del(struct shmid_kernel *shp)
+{
+	struct task_struct *creator;
+
+	rcu_read_lock();
+	creator = rcu_dereference(shp->shm_creator);
+	if (creator) {
+		task_lock(creator);
+		list_del(&shp->shm_clist);
+		task_unlock(creator);
+	}
+	rcu_read_unlock();
+}
+
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
-	list_del(&s->shm_clist);
 	ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
@@ -283,7 +285,9 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	shm_file = shp->shm_file;
 	shp->shm_file = NULL;
 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	shm_clist_del(shp);
 	shm_rmid(ns, shp);
+	shp->shm_ns = NULL;
 	shm_unlock(shp);
 	if (!is_file_hugepages(shm_file))
 		shmem_lock(shm_file, 0, shp->mlock_ucounts);
@@ -361,7 +365,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 	 *
 	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
 	 */
-	if (shp->shm_creator != NULL)
+	if (rcu_access_pointer(shp->shm_creator) != NULL)
 		return 0;
 
 	if (shm_may_destroy(ns, shp)) {
@@ -382,48 +386,62 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
 /* Locking assumes this will only be called with task == current */
 void exit_shm(struct task_struct *task)
 {
-	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
-	struct shmid_kernel *shp, *n;
-
-	if (list_empty(&task->sysvshm.shm_clist))
-		return;
-
-	/*
-	 * If kernel.shm_rmid_forced is not set then only keep track of
-	 * which shmids are orphaned, so that a later set of the sysctl
-	 * can clean them up.
-	 */
-	if (!ns->shm_rmid_forced) {
-		down_read(&shm_ids(ns).rwsem);
-		list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
-			shp->shm_creator = NULL;
-		/*
-		 * Only under read lock but we are only called on current
-		 * so no entry on the list will be shared.
-		 */
-		list_del(&task->sysvshm.shm_clist);
-		up_read(&shm_ids(ns).rwsem);
-		return;
-	}
+	struct list_head *head = &task->sysvshm.shm_clist;
 
 	/*
 	 * Destroy all already created segments, that were not yet mapped,
 	 * and mark any mapped as orphan to cover the sysctl toggling.
 	 * Destroy is skipped if shm_may_destroy() returns false.
 	 */
-	down_write(&shm_ids(ns).rwsem);
-	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
-		shp->shm_creator = NULL;
+	for (;;) {
+		struct ipc_namespace *ns;
+		struct shmid_kernel *shp;
 
-		if (shm_may_destroy(ns, shp)) {
+		task_lock(task);
+		if (list_empty(head)) {
+			task_unlock(task);
+			break;
+		}
+
+		shp = list_first_entry(head, struct shmid_kernel, shm_clist);
+
+		list_del(&shp->shm_clist);
+		rcu_assign_pointer(shp->shm_creator, NULL);
+
+		/*
+		 * Guarantee that ns lives after task_list is dropped.
+		 *
+		 * This shm segment may not be attached and it's ipc
+		 * namespace may be exiting.  If so ignore the shm
+		 * segment as it will be destroyed by shm_exit_ns.
+		 */
+		ns = get_ipc_ns_not_zero(shp->shm_ns);
+		if (!ns) {
+			task_unlock(task);
+			continue;
+		}
+
+		/* Guarantee shp lives after task_lock is dropped */
+		ipc_getref(&shp->shm_perm);
+
+		/* Drop task_lock so that shm_destroy may take it */
+		task_unlock(task);
+
+		/* Can the shm segment be destroyed? */
+		down_write(&shm_ids(ns).rwsem);
+		shm_lock_by_ptr(shp);
+		if (ipc_valid_object(&shp->shm_perm) &&
+		    shm_may_destroy(ns, shp)) {
 			shm_lock_by_ptr(shp);
 			shm_destroy(ns, shp);
+		} else {
+			shm_unlock(shp);
 		}
-	}
 
-	/* Remove the list head from any segments still attached. */
-	list_del(&task->sysvshm.shm_clist);
-	up_write(&shm_ids(ns).rwsem);
+		ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+		up_write(&shm_ids(ns).rwsem);
+		put_ipc_ns(ns);
+	}
 }
 
 static vm_fault_t shm_fault(struct vm_fault *vmf)
@@ -673,14 +691,17 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
 	shp->shm_file = file;
-	shp->shm_creator = current;
+	RCU_INIT_POINTER(shp->shm_creator, current);
+	shp->shm_ns = ns;
 
 	/* ipc_addid() locks shp upon success. */
 	error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 	if (error < 0)
 		goto no_id;
 
+	task_lock(current);
 	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
+	task_unlock(current);
 
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
@@ -913,8 +934,14 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 	switch (cmd) {
 	case IPC_RMID:
 		ipc_lock_object(&shp->shm_perm);
-		/* do_shm_rmid unlocks the ipc object and rcu */
-		do_shm_rmid(ns, ipcp);
+		if (shp->shm_nattch) {
+			shp->shm_perm.mode |= SHM_DEST;
+			/* Do not find it any more */
+			ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
+			shm_unlock(shp);
+		} else
+			shm_destroy(ns, shp);
+		/* shm_unlock unlocked the ipc object and rcu */
 		goto out_up;
 	case IPC_SET:
 		ipc_lock_object(&shp->shm_perm);
diff --git a/ipc/util.c b/ipc/util.c
index fa2d86ef3fb8..58228f342397 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -525,6 +525,11 @@ void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 	ipcp->key = IPC_PRIVATE;
 }
 
+void ipc_getref(struct kern_ipc_perm *ptr)
+{
+	return refcount_inc(&ptr->refcount);
+}
+
 bool ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
 	return refcount_inc_not_zero(&ptr->refcount);
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..e13b46ff675f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -170,6 +170,7 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids)
  * refcount is initialized by ipc_addid(), before that point call_rcu()
  * must be used.
  */
+void ipc_getref(struct kern_ipc_perm *ptr);
 bool ipc_rcu_getref(struct kern_ipc_perm *ptr);
 void ipc_rcu_putref(struct kern_ipc_perm *ptr,
 			void (*func)(struct rcu_head *head));
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..3e881f78bcf2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -3095,7 +3095,6 @@ int ksys_unshare(unsigned long unshare_flags)
 		if (unshare_flags & CLONE_NEWIPC) {
 			/* Orphan segments in old ns (see sem above). */
 			exit_shm(current);
-			shm_init_task(current);
 		}
 
 		if (new_nsproxy)


  reply	other threads:[~2021-11-05 21:35 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-27 22:43 [PATCH 0/2] shm: shm_rmid_forced feature fixes Alexander Mikhalitsyn
2021-10-27 22:43 ` [PATCH 1/2] ipc: WARN if trying to remove ipc object which is absent Alexander Mikhalitsyn
2021-10-27 22:43 ` [PATCH 2/2] shm: extend forced shm destroy to support objects from several IPC nses Alexander Mikhalitsyn
2021-10-30  4:26   ` Eric W. Biederman
2021-10-30 13:11     ` Manfred Spraul
2021-11-05 17:46   ` Eric W. Biederman
2021-11-05 19:03     ` Manfred Spraul
2021-11-05 21:34       ` Eric W. Biederman [this message]
2021-11-06  7:50         ` [RFC] shm: extend forced shm destroy to support objects from several IPC nses (simplified) Manfred Spraul
2021-11-06 14:42         ` Manfred Spraul
2021-11-07 19:51           ` Eric W. Biederman
2021-11-08 18:34             ` Manfred Spraul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87lf22qob5.fsf_-_@disp2133 \
    --to=ebiederm@xmission.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.mikhalitsyn@virtuozzo.com \
    --cc=alexander@mihalicyn.com \
    --cc=avagin@gmail.com \
    --cc=dave@stgolabs.net \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    --cc=ptikhomirov@virtuozzo.com \
    --cc=stable@vger.kernel.org \
    --cc=vvs@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.