All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gregory Price <gourry.memverge@gmail.com>
To: linux-mm@kvack.org
Cc: linux-doc@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-api@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
	arnd@arndb.de, tglx@linutronix.de, luto@kernel.org,
	mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
	x86@kernel.org, hpa@zytor.com, mhocko@kernel.org, tj@kernel.org,
	ying.huang@intel.com, Gregory Price <gregory.price@memverge.com>
Subject: [RFC PATCH 04/11] mm/mempolicy: modify get_mempolicy call stack to take a task argument
Date: Wed, 22 Nov 2023 16:11:53 -0500	[thread overview]
Message-ID: <20231122211200.31620-5-gregory.price@memverge.com> (raw)
In-Reply-To: <20231122211200.31620-1-gregory.price@memverge.com>

To make mempolicy fetchable by external tasks, we must first change
the callstack to take a task as an argument.

Modify the following functions to require a task argument:
	do_get_mempolicy
	kernel_get_mempolicy

The way the task->mm is acquired must change slightly to enable this
change.  Originally, do_get_mempolicy would acquire the task->mm
directly via (current->mm).  This is unsafe to do in a non-current
context.  However, utilizing get_task_mm would break the original
functionality of do_get_mempolicy due to the following check
in get_task_mm:

  if (mm) {
    if (task->flags & PF_KTHREAD)
      mm = NULL;
    else
      mmget(mm);
  }

To retain the original behavior, if (task == current) we access
the task->mm directly, but if (task != current) we will utilize
get_task_mm to safely access the mm.

We simplify the get/put mechanics by always taking a reference to
the mm, even if we are in the context of (task == current).

Additionally, since the mempolicy will become externally modifiable,
we need to take the task lock to acquire task->mempolicy safely,
regardless of whether we are operating on current or not.

Signed-off-by: Gregory Price <gregory.price@memverge.com>
---
 mm/mempolicy.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9ea3e1bfc002..4519f39b1a07 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -899,8 +899,9 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr)
 }
 
 /* Retrieve NUMA policy */
-static long do_get_mempolicy(int *policy, nodemask_t *nmask,
-			     unsigned long addr, unsigned long flags)
+static long do_get_mempolicy(struct task_struct *task, int *policy,
+			     nodemask_t *nmask, unsigned long addr,
+			     unsigned long flags)
 {
 	int err;
 	struct mm_struct *mm;
@@ -915,9 +916,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
 			return -EINVAL;
 		*policy = 0;	/* just so it's initialized */
-		task_lock(current);
-		*nmask  = cpuset_current_mems_allowed;
-		task_unlock(current);
+		task_lock(task);
+		*nmask = task->mems_allowed;
+		task_unlock(task);
 		return 0;
 	}
 
@@ -928,7 +929,16 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		 * vma/shared policy at addr is NULL.  We
 		 * want to return MPOL_DEFAULT in this case.
 		 */
-		mm = current->mm;
+		if (task == current) {
+			/*
+			 * original behavior allows a kernel task changing its
+			 * own policy to avoid the condition in get_task_mm,
+			 * so we'll directly access
+			 */
+			mm = task->mm;
+			mmget(mm);
+		} else
+			mm = get_task_mm(task);
 		mmap_read_lock(mm);
 		vma = vma_lookup(mm, addr);
 		if (!vma) {
@@ -947,8 +957,10 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		return -EINVAL;
 	else {
 		/* take a reference of the task policy now */
-		pol = current->mempolicy;
+		task_lock(task);
+		pol = task->mempolicy;
 		mpol_get(pol);
+		task_unlock(task);
 	}
 
 	if (!pol) {
@@ -962,12 +974,13 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 			vma = NULL;
 			mmap_read_unlock(mm);
 			err = lookup_node(mm, addr);
+			mmput(mm);
 			if (err < 0)
 				goto out;
 			*policy = err;
-		} else if (pol == current->mempolicy &&
+		} else if (pol == task->mempolicy &&
 				pol->mode == MPOL_INTERLEAVE) {
-			*policy = next_node_in(current->il_prev, pol->nodes);
+			*policy = next_node_in(task->il_prev, pol->nodes);
 		} else {
 			err = -EINVAL;
 			goto out;
@@ -987,9 +1000,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		if (mpol_store_user_nodemask(pol)) {
 			*nmask = pol->w.user_nodemask;
 		} else {
-			task_lock(current);
+			task_lock(task);
 			get_policy_nodemask(pol, nmask);
-			task_unlock(current);
+			task_unlock(task);
 		}
 	}
 
@@ -1704,7 +1717,8 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
 }
 
 /* Retrieve NUMA policy */
-static int kernel_get_mempolicy(int __user *policy,
+static int kernel_get_mempolicy(struct task_struct *task,
+				int __user *policy,
 				unsigned long __user *nmask,
 				unsigned long maxnode,
 				unsigned long addr,
@@ -1719,7 +1733,7 @@ static int kernel_get_mempolicy(int __user *policy,
 
 	addr = untagged_addr(addr);
 
-	err = do_get_mempolicy(&pval, &nodes, addr, flags);
+	err = do_get_mempolicy(task, &pval, &nodes, addr, flags);
 
 	if (err)
 		return err;
@@ -1737,7 +1751,8 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
 		unsigned long __user *, nmask, unsigned long, maxnode,
 		unsigned long, addr, unsigned long, flags)
 {
-	return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags);
+	return kernel_get_mempolicy(current, policy, nmask, maxnode, addr,
+				    flags);
 }
 
 bool vma_migratable(struct vm_area_struct *vma)
-- 
2.39.1


  parent reply	other threads:[~2023-11-22 21:12 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-22 21:11 [RFC PATCH 00/11] mm/mempolicy: Make task->mempolicy externally modifiable via syscall and procfs Gregory Price
2023-11-22 21:11 ` [RFC PATCH 01/11] mm/mempolicy: refactor do_set_mempolicy for code re-use Gregory Price
2023-11-22 21:11 ` [RFC PATCH 02/11] mm/mempolicy: swap cond reference counting logic in do_get_mempolicy Gregory Price
2023-11-28 14:07   ` Michal Hocko
     [not found]     ` <ZWX0ytAwmOdooHdZ@memverge.com>
2023-11-28 14:28       ` Michal Hocko
2023-11-22 21:11 ` [RFC PATCH 03/11] mm/mempolicy: refactor set_mempolicy stack to take a task argument Gregory Price
2023-11-22 21:11 ` Gregory Price [this message]
2023-11-28 14:07   ` [RFC PATCH 04/11] mm/mempolicy: modify get_mempolicy call " Michal Hocko
     [not found]     ` <ZWX1U1gCTXC+lFXn@memverge.com>
2023-11-28 14:49       ` Michal Hocko
2023-11-22 21:11 ` [RFC PATCH 05/11] mm/mempolicy: modify set_mempolicy_home_node " Gregory Price
2023-11-28 14:07   ` Michal Hocko
2023-11-28 14:14     ` Gregory Price
2023-11-22 21:11 ` [RFC PATCH 06/11] mm/mempolicy: modify do_mbind to operate on task argument instead of current Gregory Price
2023-11-28 14:11   ` Michal Hocko
2023-11-28 14:51     ` Gregory Price
2023-11-28 18:08     ` Gregory Price
2023-11-22 21:11 ` [RFC PATCH 07/11] mm/mempolicy: add task mempolicy syscall variants Gregory Price
2023-11-24  7:56   ` kernel test robot
2023-11-24  7:56   ` kernel test robot
2023-11-24  7:57   ` kernel test robot
2023-11-22 21:11 ` [RFC PATCH 08/11] mm/mempolicy: export replace_mempolicy for use by procfs Gregory Price
2023-11-22 21:11 ` [RFC PATCH 09/11] mm/mempolicy: build mpol_parse_str unconditionally Gregory Price
2023-11-22 21:11 ` [RFC PATCH 10/11] mm/mempolicy: mpol_parse_str should ignore trailing characters in nodelist Gregory Price
2023-11-22 21:12 ` [RFC PATCH 11/11] fs/proc: Add mempolicy attribute to allow read/write of task mempolicy Gregory Price
2023-11-22 21:33 ` [RFC PATCH 00/11] mm/mempolicy: Make task->mempolicy externally modifiable via syscall and procfs Andrew Morton
2023-11-22 21:35   ` Andrew Morton
2023-11-22 22:24   ` Gregory Price
2023-11-27 15:29     ` Michal Hocko
2023-11-27 16:14       ` Gregory Price
2023-11-28  9:45         ` Michal Hocko
2023-11-28 13:15           ` Gregory Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231122211200.31620-5-gregory.price@memverge.com \
    --to=gourry.memverge@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=gregory.price@memverge.com \
    --cc=hpa@zytor.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=x86@kernel.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.