linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] Support ASID Isolation mechanism
@ 2022-10-17  8:31 Yunfeng Ye
  2022-10-17  8:31 ` [PATCH 1/5] arm64: mm: Define asid_bitmap structure for pinned_asid Yunfeng Ye
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Yunfeng Ye @ 2022-10-17  8:31 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng
  Cc: linfeilong

From: y00318929 <yeyunfeng@huawei.com>

This series patches introduce an ASID Isolation mechanism to improve
isolation.

Patch 1-3: Prepare for supporting ASID Isolation mechanism. 

Patch 4: Detailed Description and Implementation of ASID Isolation
mechanism.

Patch 5: This patch is used to observe the TLB flush information.

Yunfeng Ye (5):
  arm64: mm: Define asid_bitmap structure for pinned_asid
  arm64: mm: Extract the processing of asid_generation
  arm64: mm: Use cpumask in flush_context()
  arm64: mm: Support ASID isolation feature
  arm64: mm: Add TLB flush trace on context switch

 arch/arm64/mm/context.c | 286 ++++++++++++++++++++++++++++++++++------
 1 file changed, 247 insertions(+), 39 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/5] arm64: mm: Define asid_bitmap structure for pinned_asid
  2022-10-17  8:31 [PATCH 0/5] Support ASID Isolation mechanism Yunfeng Ye
@ 2022-10-17  8:31 ` Yunfeng Ye
  2022-10-17  8:32 ` [PATCH 2/5] arm64: mm: Extract the processing of asid_generation Yunfeng Ye
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Yunfeng Ye @ 2022-10-17  8:31 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng
  Cc: linfeilong

It is clearer to use the asid_bitmap structure for pinned_sid, and we
will use it for isolated asid later.

No functional change.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
 arch/arm64/mm/context.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e1e0dca01839..8549b5f30352 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,6 +17,12 @@
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
 
+struct asid_bitmap {
+	unsigned long *map;
+	unsigned long nr;
+	unsigned long max;
+};
+
 static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
@@ -27,9 +33,7 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
-static unsigned long max_pinned_asids;
-static unsigned long nr_pinned_asids;
-static unsigned long *pinned_asid_map;
+static struct asid_bitmap pinned_asid;
 
 #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
 #define ASID_FIRST_VERSION	(1UL << asid_bits)
@@ -90,8 +94,8 @@ static void set_kpti_asid_bits(unsigned long *map)
 
 static void set_reserved_asid_bits(void)
 {
-	if (pinned_asid_map)
-		bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS);
+	if (pinned_asid.map)
+		bitmap_copy(asid_map, pinned_asid.map, NUM_USER_ASIDS);
 	else if (arm64_kernel_unmapped_at_el0())
 		set_kpti_asid_bits(asid_map);
 	else
@@ -275,7 +279,7 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
 	unsigned long flags;
 	u64 asid;
 
-	if (!pinned_asid_map)
+	if (!pinned_asid.map)
 		return 0;
 
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
@@ -285,7 +289,7 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
 	if (refcount_inc_not_zero(&mm->context.pinned))
 		goto out_unlock;
 
-	if (nr_pinned_asids >= max_pinned_asids) {
+	if (pinned_asid.nr >= pinned_asid.max) {
 		asid = 0;
 		goto out_unlock;
 	}
@@ -299,8 +303,8 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
 		atomic64_set(&mm->context.id, asid);
 	}
 
-	nr_pinned_asids++;
-	__set_bit(ctxid2asid(asid), pinned_asid_map);
+	pinned_asid.nr++;
+	__set_bit(ctxid2asid(asid), pinned_asid.map);
 	refcount_set(&mm->context.pinned, 1);
 
 out_unlock:
@@ -321,14 +325,14 @@ void arm64_mm_context_put(struct mm_struct *mm)
 	unsigned long flags;
 	u64 asid = atomic64_read(&mm->context.id);
 
-	if (!pinned_asid_map)
+	if (!pinned_asid.map)
 		return;
 
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 
 	if (refcount_dec_and_test(&mm->context.pinned)) {
-		__clear_bit(ctxid2asid(asid), pinned_asid_map);
-		nr_pinned_asids--;
+		__clear_bit(ctxid2asid(asid), pinned_asid.map);
+		pinned_asid.nr--;
 	}
 
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
@@ -377,8 +381,8 @@ static int asids_update_limit(void)
 
 	if (arm64_kernel_unmapped_at_el0()) {
 		num_available_asids /= 2;
-		if (pinned_asid_map)
-			set_kpti_asid_bits(pinned_asid_map);
+		if (pinned_asid.map)
+			set_kpti_asid_bits(pinned_asid.map);
 	}
 	/*
 	 * Expect allocation after rollover to fail if we don't have at least
@@ -393,7 +397,7 @@ static int asids_update_limit(void)
 	 * even if all CPUs have a reserved ASID and the maximum number of ASIDs
 	 * are pinned, there still is at least one empty slot in the ASID map.
 	 */
-	max_pinned_asids = num_available_asids - num_possible_cpus() - 2;
+	pinned_asid.max = num_available_asids - num_possible_cpus() - 2;
 	return 0;
 }
 arch_initcall(asids_update_limit);
@@ -407,8 +411,8 @@ static int asids_init(void)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
 		      NUM_USER_ASIDS);
 
-	pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
-	nr_pinned_asids = 0;
+	pinned_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
+	pinned_asid.nr = 0;
 
 	/*
 	 * We cannot call set_reserved_asid_bits() here because CPU
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] arm64: mm: Extract the processing of asid_generation
  2022-10-17  8:31 [PATCH 0/5] Support ASID Isolation mechanism Yunfeng Ye
  2022-10-17  8:31 ` [PATCH 1/5] arm64: mm: Define asid_bitmap structure for pinned_asid Yunfeng Ye
@ 2022-10-17  8:32 ` Yunfeng Ye
  2022-10-17  8:32 ` [PATCH 3/5] arm64: mm: Use cpumask in flush_context() Yunfeng Ye
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Yunfeng Ye @ 2022-10-17  8:32 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng
  Cc: linfeilong

To prepare for supporting ASID isolation feature, extract the processing
of asid_generation. it is convenient to modify the asid_generation
centrally.

By the way, It is clearer to put flush_generation() into flush_context().

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
 arch/arm64/mm/context.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8549b5f30352..380c7b05c36b 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -102,14 +102,40 @@ static void set_reserved_asid_bits(void)
 		bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 }
 
-#define asid_gen_match(asid) \
-	(!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits))
+static void asid_generation_init(void)
+{
+	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+}
+
+static void flush_generation(void)
+{
+	/* We're out of ASIDs, so increment the global generation count */
+	atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+					&asid_generation);
+}
+
+static inline u64 asid_read_generation(void)
+{
+	return atomic64_read(&asid_generation);
+}
+
+static inline bool asid_match(u64 asid, u64 genid)
+{
+	return (!(((asid) ^ (genid)) >> asid_bits));
+}
+
+static inline bool asid_gen_match(u64 asid)
+{
+	return asid_match(asid, asid_read_generation());
+}
 
 static void flush_context(void)
 {
 	int i;
 	u64 asid;
 
+	flush_generation();
+
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	set_reserved_asid_bits();
 
@@ -163,7 +189,7 @@ static u64 new_context(struct mm_struct *mm)
 {
 	static u32 cur_idx = 1;
 	u64 asid = atomic64_read(&mm->context.id);
-	u64 generation = atomic64_read(&asid_generation);
+	u64 generation = asid_read_generation();
 
 	if (asid != 0) {
 		u64 newasid = asid2ctxid(ctxid2asid(asid), generation);
@@ -202,14 +228,12 @@ static u64 new_context(struct mm_struct *mm)
 	if (asid != NUM_USER_ASIDS)
 		goto set_asid;
 
-	/* We're out of ASIDs, so increment the global generation count */
-	generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
-						 &asid_generation);
 	flush_context();
 
 	/* We have more ASIDs than CPUs, so this will always succeed */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
+	generation = asid_read_generation();
 set_asid:
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
@@ -405,7 +429,8 @@ arch_initcall(asids_update_limit);
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
-	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+	asid_generation_init();
+
 	asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
 	if (!asid_map)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] arm64: mm: Use cpumask in flush_context()
  2022-10-17  8:31 [PATCH 0/5] Support ASID Isolation mechanism Yunfeng Ye
  2022-10-17  8:31 ` [PATCH 1/5] arm64: mm: Define asid_bitmap structure for pinned_asid Yunfeng Ye
  2022-10-17  8:32 ` [PATCH 2/5] arm64: mm: Extract the processing of asid_generation Yunfeng Ye
@ 2022-10-17  8:32 ` Yunfeng Ye
  2022-10-17  8:32 ` [PATCH 4/5] arm64: mm: Support ASID isolation feature Yunfeng Ye
  2022-10-17  8:32 ` [PATCH 5/5] arm64: mm: Add TLB flush trace on context switch Yunfeng Ye
  4 siblings, 0 replies; 11+ messages in thread
From: Yunfeng Ye @ 2022-10-17  8:32 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng
  Cc: linfeilong

Currently, all CPUs are selected to flush TLB in flush_context().
In order to prepare for flushing only part of the CPUs TLB, we use
asid_housekeeping_mask and use cpumask_or() instead of cpumask_setall().

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
 arch/arm64/mm/context.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 380c7b05c36b..e402997aa1c2 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/cpumask.h>
 
 #include <asm/cpufeature.h>
 #include <asm/mmu_context.h>
@@ -32,6 +33,7 @@ static unsigned long *asid_map;
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
+static const struct cpumask *asid_housekeeping_mask;
 
 static struct asid_bitmap pinned_asid;
 
@@ -129,17 +131,23 @@ static inline bool asid_gen_match(u64 asid)
 	return asid_match(asid, asid_read_generation());
 }
 
+static const struct cpumask *flush_cpumask(void)
+{
+	return asid_housekeeping_mask;
+}
+
 static void flush_context(void)
 {
 	int i;
 	u64 asid;
+	const struct cpumask *cpumask = flush_cpumask();
 
 	flush_generation();
 
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	set_reserved_asid_bits();
 
-	for_each_possible_cpu(i) {
+	for_each_cpu(i, cpumask) {
 		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
 		/*
 		 * If this CPU has already been through a
@@ -158,7 +166,7 @@ static void flush_context(void)
 	 * Queue a TLB invalidation for each CPU to perform on next
 	 * context-switch
 	 */
-	cpumask_setall(&tlb_flush_pending);
+	cpumask_or(&tlb_flush_pending, &tlb_flush_pending, cpumask);
 }
 
 static bool check_update_reserved_asid(u64 asid, u64 newasid)
@@ -439,6 +447,8 @@ static int asids_init(void)
 	pinned_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
 	pinned_asid.nr = 0;
 
+	asid_housekeeping_mask = cpu_possible_mask;
+
 	/*
 	 * We cannot call set_reserved_asid_bits() here because CPU
 	 * caps are not finalized yet, so it is safer to assume KPTI
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] arm64: mm: Support ASID isolation feature
  2022-10-17  8:31 [PATCH 0/5] Support ASID Isolation mechanism Yunfeng Ye
                   ` (2 preceding siblings ...)
  2022-10-17  8:32 ` [PATCH 3/5] arm64: mm: Use cpumask in flush_context() Yunfeng Ye
@ 2022-10-17  8:32 ` Yunfeng Ye
  2022-11-09 12:43   ` Catalin Marinas
  2022-10-17  8:32 ` [PATCH 5/5] arm64: mm: Add TLB flush trace on context switch Yunfeng Ye
  4 siblings, 1 reply; 11+ messages in thread
From: Yunfeng Ye @ 2022-10-17  8:32 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng
  Cc: linfeilong

After a rollover, the global generation will be flushed, which will
cause the process mm->context.id on all CPUs do not match the
generation. Thus, the process will compete for the global spinlock lock
to reallocate a new ASID and refresh the TLBs of all CPUs on context
switch. This will lead to the increase of scheduling delay and TLB miss.

In some delay-sensitive scenarios, for example, part of CPUs are
isolated, only a limited number of processes are deployed to run on the
isolated CPUs. In this case, we do not want these key processes to be
affected by the rollover of ASID.

An ASID isolation method can reduce interference. We divide the
asid_generation into different domains, for example, HOUSEKEEPING and
ISOLATION. Processes in different domains allocate ASID from the shared
asid_map pool, then combine with the generation of local domain as the
mm->context.id. After an ASID rollover, the generation of the
HOUSEKEEPING domain can be flushed independently, and only the TLB of
HOUSEKEEPING domain CPUs will be flushed, so the processes of ISOLATION
domain will not be affected.

In addition, the ASID of the ISOLATION domain is stored in the
isolated_asid bitmap. When the asid_map is refreshed, the isolated_asid
must be copied to the asid_map to ensure that the ASID of the ISOLATION
domain is not allocated by other processes.

The following figure shows the example:

    HOUSEKEEPING (genid: G1)            ISOLATION (genid: G2)

    task1(G1,1)                         task2(G2,2)     task3(G2,3)

    cpu0        cpu1                    cpu3    cpu4    cpu5
    -------------------------           -----------------------
                            \           /              |
                             \         /      isolated_asid: [2,3]
                              \       /
                 asid_map: [1,2,3,4,...,65536]

The task1 is running on the HOUSEKEEPING domain, it allocate ASID 1 from
shared asid_map, so the context id of task1 is (G1,1). The task2 and
task3 are running on the ISOLATION domain, they allocate ASID 2,3 from
shared asid_map, and store ASID 2,3 to isolated_asid. the context id of
task2 is (G2,2), and the context id of task3 is (G2,3). After a
rollover, the generation of HOUSEKEEPING doamin is flushed, for example,
it becomes to G3, then the context id of task1 is changed to (G3,1). In
this time, the generation of ISOLATION domain is not affected.

In some scenarios, a process has multiple threads, and different threads
run in different domains, or processes migrate between different domains.
But the process has only one context ID, there is a problem that how to
select generation in this case. The way we're thinking is, as long as
the process has run to ISOLATION doamin, select generation of ISOLATION
doamin.

For example:

    HOUSEKEEPING (genid: G1)            ISOLATION (genid: G2)

    task1(G1,1)               ====>     task1(G2,1)
    task2(G2,2)               <====     task2(G2,2)

    cpu0        cpu1                    cpu3    cpu4    cpu5
    -------------------------           -----------------------

When task1 is migrated from HOUSEKEEPING domain to ISOLATION domain, the
generation G1 must be changed to G2, and save the ASID 1 to isolated_asid
bitmap. But when task2 is migrated from ISOLATION domain to HOUSEKEEPING
domain, it still use generation G2. In this way, we solve the problem
that which generation should be selected in the scenario of process
migration.

As mentioned before, the generation of different domains is different.
we divide the generation into two parts, the lowest bit is used as the
Flag bit to indicate the HOUSEKEEPING and ISOLATION domain, and the rest
bits are used as the Upper-generation. After a rollover, only the
Upper-generation is flushed, the Flag part does not change in the entire
life. This ensures that the genrentaion of different domains is different.

    asid_generation    |---------------------------|-|--------|
                              Upper-generation     Flag

Finally, it is important to select which domain generation and TLBs are
flushed after a rollover. By default, only the HOUSEKEEPING domain is
selected. When the number of ASIDs in the ISOLATION domain exceeds the
max threshold, the ISOLATION domain is selected too.

By default, the ASID isolation feature is disabled, and a cmdline
parameter is provided to control whether the ASID isolation feature is
enabled.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
 arch/arm64/mm/context.c | 203 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 183 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e402997aa1c2..0ea3e7485ae7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/cpumask.h>
+#include <linux/sched/isolation.h>
 
 #include <asm/cpufeature.h>
 #include <asm/mmu_context.h>
@@ -24,10 +25,20 @@ struct asid_bitmap {
 	unsigned long max;
 };
 
+enum {
+	ASID_HOUSEKEEPING = 0,
+	ASID_ISOLATION = 1,
+	ASID_TYPE_MAX,
+};
+
+struct asid_domain {
+	atomic64_t asid_generation;
+};
+
 static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-static atomic64_t asid_generation;
+static struct asid_domain asid_domain[ASID_TYPE_MAX];
 static unsigned long *asid_map;
 
 static DEFINE_PER_CPU(atomic64_t, active_asids);
@@ -36,11 +47,16 @@ static cpumask_t tlb_flush_pending;
 static const struct cpumask *asid_housekeeping_mask;
 
 static struct asid_bitmap pinned_asid;
+static struct asid_bitmap isolated_asid;
+
+static int asid_isolation_cmdline;
+static DEFINE_STATIC_KEY_FALSE(asid_isolation_enable);
 
 #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
-#define ASID_FIRST_VERSION	(1UL << asid_bits)
+#define NUM_USER_ASIDS		(1UL << asid_bits)
 
-#define NUM_USER_ASIDS		ASID_FIRST_VERSION
+#define ASID_ISOLATION_FLAG	(NUM_USER_ASIDS)
+#define ASID_FIRST_VERSION	(NUM_USER_ASIDS << 1)
 #define ctxid2asid(asid)	((asid) & ~ASID_MASK)
 #define asid2ctxid(asid, genid)	((asid) | (genid))
 
@@ -94,6 +110,61 @@ static void set_kpti_asid_bits(unsigned long *map)
 	memset(map, 0xaa, len);
 }
 
+static inline bool is_isolated_asid(u64 asid)
+{
+	/*
+	 * Note that asid 0 is not the isolated asid. The judgment
+	 * is correct in this situation since the ASID_ISOLATION_FLAG
+	 * bit is defined as 1 to indicate ISOLATION domain.
+	 */
+	return asid & ASID_ISOLATION_FLAG;
+}
+
+static inline bool on_isolated_cpu(int cpu)
+{
+	return !cpumask_test_cpu(cpu, asid_housekeeping_mask);
+}
+
+static inline int asid_domain_type(u64 asid, unsigned int cpu)
+{
+	if (on_isolated_cpu(cpu) || is_isolated_asid(asid))
+		return ASID_ISOLATION;
+
+	return ASID_HOUSEKEEPING;
+}
+
+static inline int asid_flush_type(void)
+{
+	if (isolated_asid.nr > isolated_asid.max)
+		return ASID_ISOLATION;
+	else
+		return ASID_HOUSEKEEPING;
+}
+
+static void asid_try_to_isolate(u64 asid)
+{
+	if (!static_branch_unlikely(&asid_isolation_enable))
+		return;
+
+	if (!is_isolated_asid(asid))
+		return;
+	if (!__test_and_set_bit(ctxid2asid(asid), isolated_asid.map))
+		isolated_asid.nr++;
+}
+
+static void update_reserved_asid_bits(void)
+{
+	if (!static_branch_unlikely(&asid_isolation_enable))
+		return;
+
+	if (asid_flush_type() == ASID_HOUSEKEEPING) {
+		bitmap_or(asid_map, asid_map, isolated_asid.map, NUM_USER_ASIDS);
+	} else {
+		bitmap_zero(isolated_asid.map, NUM_USER_ASIDS);
+		isolated_asid.nr = 0;
+	}
+}
+
 static void set_reserved_asid_bits(void)
 {
 	if (pinned_asid.map)
@@ -102,23 +173,51 @@ static void set_reserved_asid_bits(void)
 		set_kpti_asid_bits(asid_map);
 	else
 		bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+
+	update_reserved_asid_bits();
 }
 
 static void asid_generation_init(void)
 {
-	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+	struct asid_domain *ad;
+
+	ad = &asid_domain[ASID_HOUSEKEEPING];
+	atomic64_set(&ad->asid_generation, ASID_FIRST_VERSION);
+
+	ad = &asid_domain[ASID_ISOLATION];
+	atomic64_set(&ad->asid_generation, ASID_ISOLATION_FLAG);
 }
 
 static void flush_generation(void)
 {
+	struct asid_domain *ad = &asid_domain[ASID_HOUSEKEEPING];
+
 	/* We're out of ASIDs, so increment the global generation count */
 	atomic64_add_return_relaxed(ASID_FIRST_VERSION,
-					&asid_generation);
+					&ad->asid_generation);
+
+	if (asid_flush_type() == ASID_ISOLATION) {
+		ad = &asid_domain[ASID_ISOLATION];
+		atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+					&ad->asid_generation);
+	}
 }
 
-static inline u64 asid_read_generation(void)
+static inline u64 asid_read_generation(int type)
 {
-	return atomic64_read(&asid_generation);
+	struct asid_domain *ad = &asid_domain[type];
+
+	return atomic64_read(&ad->asid_generation);
+}
+
+static inline u64 asid_curr_generation(u64 asid)
+{
+	int type = ASID_HOUSEKEEPING;
+
+	if (static_branch_unlikely(&asid_isolation_enable))
+		type = asid_domain_type(asid, smp_processor_id());
+
+	return asid_read_generation(type);
 }
 
 static inline bool asid_match(u64 asid, u64 genid)
@@ -128,12 +227,28 @@ static inline bool asid_match(u64 asid, u64 genid)
 
 static inline bool asid_gen_match(u64 asid)
 {
-	return asid_match(asid, asid_read_generation());
+	return asid_match(asid, asid_curr_generation(asid));
+}
+
+static bool asid_is_migrated(u64 asid, u64 newasid)
+{
+	if (!static_branch_unlikely(&asid_isolation_enable))
+		return false;
+
+	if (!is_isolated_asid(asid) && is_isolated_asid(newasid)) {
+		u64 generation = asid_read_generation(ASID_HOUSEKEEPING);
+
+		return asid_match(asid, generation);
+	}
+	return false;
 }
 
 static const struct cpumask *flush_cpumask(void)
 {
-	return asid_housekeeping_mask;
+	if (asid_flush_type() == ASID_HOUSEKEEPING)
+		return asid_housekeeping_mask;
+
+	return cpu_possible_mask;
 }
 
 static void flush_context(void)
@@ -159,6 +274,7 @@ static void flush_context(void)
 		if (asid == 0)
 			asid = per_cpu(reserved_asids, i);
 		__set_bit(ctxid2asid(asid), asid_map);
+		asid_try_to_isolate(asid);
 		per_cpu(reserved_asids, i) = asid;
 	}
 
@@ -193,21 +309,23 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid)
 	return hit;
 }
 
-static u64 new_context(struct mm_struct *mm)
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 {
 	static u32 cur_idx = 1;
 	u64 asid = atomic64_read(&mm->context.id);
-	u64 generation = asid_read_generation();
+	int domain = asid_domain_type(asid, cpu);
+	u64 generation = asid_read_generation(domain);
+	u64 newasid;
 
 	if (asid != 0) {
-		u64 newasid = asid2ctxid(ctxid2asid(asid), generation);
+		newasid = asid2ctxid(ctxid2asid(asid), generation);
 
 		/*
 		 * If our current ASID was active during a rollover, we
 		 * can continue to use it and this was just a false alarm.
 		 */
 		if (check_update_reserved_asid(asid, newasid))
-			return newasid;
+			goto out;
 
 		/*
 		 * If it is pinned, we can keep using it. Note that reserved
@@ -215,14 +333,21 @@ static u64 new_context(struct mm_struct *mm)
 		 * update the generation into the reserved_asids.
 		 */
 		if (refcount_read(&mm->context.pinned))
-			return newasid;
+			goto out;
 
 		/*
 		 * We had a valid ASID in a previous life, so try to re-use
 		 * it if possible.
 		 */
 		if (!__test_and_set_bit(ctxid2asid(asid), asid_map))
-			return newasid;
+			goto out;
+
+		/*
+		 * We still have a valid ASID now, but the ASID is migrated from
+		 * normal to isolated domain, we should re-use it.
+		 */
+		if (asid_is_migrated(asid, newasid))
+			goto out;
 	}
 
 	/*
@@ -241,11 +366,14 @@ static u64 new_context(struct mm_struct *mm)
 	/* We have more ASIDs than CPUs, so this will always succeed */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
-	generation = asid_read_generation();
+	generation = asid_read_generation(domain);
 set_asid:
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
-	return asid2ctxid(asid, generation);
+	newasid = asid2ctxid(asid, generation);
+out:
+	asid_try_to_isolate(newasid);
+	return newasid;
 }
 
 void check_and_switch_context(struct mm_struct *mm)
@@ -282,12 +410,12 @@ void check_and_switch_context(struct mm_struct *mm)
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 	/* Check that our ASID belongs to the current generation. */
 	asid = atomic64_read(&mm->context.id);
+	cpu = smp_processor_id();
 	if (!asid_gen_match(asid)) {
-		asid = new_context(mm);
+		asid = new_context(mm, cpu);
 		atomic64_set(&mm->context.id, asid);
 	}
 
-	cpu = smp_processor_id();
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
 		local_flush_tlb_all();
 
@@ -327,11 +455,12 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
 	}
 
 	if (!asid_gen_match(asid)) {
+		unsigned int cpu = smp_processor_id();
 		/*
 		 * We went through one or more rollover since that ASID was
 		 * used. Ensure that it is still valid, or generate a new one.
 		 */
-		asid = new_context(mm);
+		asid = new_context(mm, cpu);
 		atomic64_set(&mm->context.id, asid);
 	}
 
@@ -430,10 +559,36 @@ static int asids_update_limit(void)
 	 * are pinned, there still is at least one empty slot in the ASID map.
 	 */
 	pinned_asid.max = num_available_asids - num_possible_cpus() - 2;
+
+	/*
+	 * Generally, the user does not care about the number of asids, so set
+	 * to half of the total number as the default setting of the maximum
+	 * threshold of the isolated asid.
+	 */
+	if (isolated_asid.map)
+		isolated_asid.max = num_available_asids / 2;
+
 	return 0;
 }
 arch_initcall(asids_update_limit);
 
+static void asid_isolation_init(void)
+{
+	if (asid_isolation_cmdline == 0)
+		return;
+
+	if (!housekeeping_enabled(HK_TYPE_DOMAIN))
+		return;
+
+	isolated_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
+	if (!isolated_asid.map)
+		return;
+
+	asid_housekeeping_mask = housekeeping_cpumask(HK_TYPE_DOMAIN);
+	static_branch_enable(&asid_isolation_enable);
+	pr_info("ASID Isolation enable\n");
+}
+
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
@@ -448,6 +603,7 @@ static int asids_init(void)
 	pinned_asid.nr = 0;
 
 	asid_housekeeping_mask = cpu_possible_mask;
+	asid_isolation_init();
 
 	/*
 	 * We cannot call set_reserved_asid_bits() here because CPU
@@ -459,3 +615,10 @@ static int asids_init(void)
 	return 0;
 }
 early_initcall(asids_init);
+
+static int __init asid_isolation_setup(char *str)
+{
+	asid_isolation_cmdline = 1;
+	return 1;
+}
+__setup("asid_isolation", asid_isolation_setup);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] arm64: mm: Add TLB flush trace on context switch
  2022-10-17  8:31 [PATCH 0/5] Support ASID Isolation mechanism Yunfeng Ye
                   ` (3 preceding siblings ...)
  2022-10-17  8:32 ` [PATCH 4/5] arm64: mm: Support ASID isolation feature Yunfeng Ye
@ 2022-10-17  8:32 ` Yunfeng Ye
  4 siblings, 0 replies; 11+ messages in thread
From: Yunfeng Ye @ 2022-10-17  8:32 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng
  Cc: linfeilong

We do not know how many times the TLB is flushed on context switch.
Adding trace_tlb_flush() in check_and_switch_context() may be useful.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
 arch/arm64/mm/context.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 0ea3e7485ae7..eab470a97620 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -19,6 +19,8 @@
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/tlb.h>
+
 struct asid_bitmap {
 	unsigned long *map;
 	unsigned long nr;
@@ -60,6 +62,8 @@ static DEFINE_STATIC_KEY_FALSE(asid_isolation_enable);
 #define ctxid2asid(asid)	((asid) & ~ASID_MASK)
 #define asid2ctxid(asid, genid)	((asid) | (genid))
 
+#define TLB_FLUSH_ALL		(-1)
+
 /* Get the ASIDBits supported by the current CPU */
 static u32 get_cpu_asid_bits(void)
 {
@@ -416,8 +420,10 @@ void check_and_switch_context(struct mm_struct *mm)
 		atomic64_set(&mm->context.id, asid);
 	}
 
-	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
 		local_flush_tlb_all();
+		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+	}
 
 	atomic64_set(this_cpu_ptr(&active_asids), asid);
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] arm64: mm: Support ASID isolation feature
  2022-10-17  8:32 ` [PATCH 4/5] arm64: mm: Support ASID isolation feature Yunfeng Ye
@ 2022-11-09 12:43   ` Catalin Marinas
  2022-11-10  7:07     ` Yunfeng Ye
  0 siblings, 1 reply; 11+ messages in thread
From: Catalin Marinas @ 2022-11-09 12:43 UTC (permalink / raw)
  To: Yunfeng Ye
  Cc: will, wangkefeng.wang, linux-arm-kernel, linux-kernel, linfeilong

On Mon, Oct 17, 2022 at 04:32:02PM +0800, Yunfeng Ye wrote:
> After a rollover, the global generation will be flushed, which will
> cause the process mm->context.id on all CPUs do not match the
> generation. Thus, the process will compete for the global spinlock lock
> to reallocate a new ASID and refresh the TLBs of all CPUs on context
> switch. This will lead to the increase of scheduling delay and TLB miss.
> 
> In some delay-sensitive scenarios, for example, part of CPUs are
> isolated, only a limited number of processes are deployed to run on the
> isolated CPUs. In this case, we do not want these key processes to be
> affected by the rollover of ASID.

Part of this commit log should also go in the cover letter and it would
help to back this up by some numbers, e.g. what percentage improvement
you get with this patchset by running hackbench on an isolated CPU.

In theory it looks like CPU isolation would benefit from this patchset
but we try not to touch this code often, so any modification should come
with proper justification, backed by numbers.

Note that I haven't reviewed the algorithm you are proposing in detail,
only had a brief look.

-- 
Catalin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] arm64: mm: Support ASID isolation feature
  2022-11-09 12:43   ` Catalin Marinas
@ 2022-11-10  7:07     ` Yunfeng Ye
  2022-11-28 17:00       ` Catalin Marinas
  0 siblings, 1 reply; 11+ messages in thread
From: Yunfeng Ye @ 2022-11-10  7:07 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: will, wangkefeng.wang, linux-arm-kernel, linux-kernel, linfeilong



On 2022/11/9 20:43, Catalin Marinas wrote:
> On Mon, Oct 17, 2022 at 04:32:02PM +0800, Yunfeng Ye wrote:
>> After a rollover, the global generation will be flushed, which will
>> cause the process mm->context.id on all CPUs do not match the
>> generation. Thus, the process will compete for the global spinlock lock
>> to reallocate a new ASID and refresh the TLBs of all CPUs on context
>> switch. This will lead to the increase of scheduling delay and TLB miss.
>>
>> In some delay-sensitive scenarios, for example, part of CPUs are
>> isolated, only a limited number of processes are deployed to run on the
>> isolated CPUs. In this case, we do not want these key processes to be
>> affected by the rollover of ASID.
> 
> Part of this commit log should also go in the cover letter and it would> help to back this up by some numbers, e.g. what percentage improvement
> you get with this patchset by running hackbench on an isolated CPU.
> 
> In theory it looks like CPU isolation would benefit from this patchset
> but we try not to touch this code often, so any modification should come
> with proper justification, backed by numbers.
> 
Yes, CPU isolation will benefit from this patchset. We use cyclictest tool
to test the maximum scheduling and interrupt delays, found that the
sched_switch process takes several microseconds sometimes, The analysis
result shows that the delay is caused by the ASID refresh.

We use simple test cases to construct the case of quickly consumption of
ASIDs, this increases the ASID refresh frequency and the contention for
the global ASID spin lock. In this case, the delay between sched_switch
and tlb_flush can reach 63 us. The following is the trace log:

    stress-ng-2864907 [012] dN.. 17006.430048: sched_stat_runtime: comm=stress-ng pid=2864907 runtime=859130 [ns] vruntime=9015202524211 [ns]
    stress-ng-2864907 [012] d... 17006.430048: sched_switch: prev_comm=stress-ng prev_pid=2864907 prev_prio=120 prev_state=R ==> next_comm=cyclictest next_pid=2866344 next_prio=19
    stress-ng-2864907 [012] d... 17006.430111: tlb_flush: pages:-1 reason:flush on task switch (0)
// 17006.430111 - 17006.430048 = 63 us

    cyclictest-2866344 [012] .... 17006.430112: kfree: call_site=__audit_syscall_exit+0x210/0x250 ptr=0000000000000000
    cyclictest-2866344 [012] .... 17006.430112: sys_exit: NR 115 = 0
    cyclictest-2866344 [012] .... 17006.430112: sys_clock_nanosleep -> 0x0
    cyclictest-2866344 [012] d... 17006.430113: user_enter:
    cyclictest-2866344 [012] d... 17006.430126: user_exit:
    cyclictest-2866344 [012] .... 17006.430126: sys_enter: NR 64 (4, ffffa451c4d0, 1f, 0, 3b, 0)
    cyclictest-2866344 [012] .... 17006.430126: sys_write(fd: 4, buf: ffffa451c4d0, count: 1f)
    cyclictest-2866344 [012] .... 17006.430129: tracing_mark_write: hit latency threshold (72 > 30)

The delay caused by ASID interference is variable, may be several nanoseconds,
or may be several microseconds, it depends on the concurrent competition.
If this patch series is used, the delay caused by ASID interference on the
isolated CPU can be reduced.

Thanks.

> Note that I haven't reviewed the algorithm you are proposing in detail,
> only had a brief look.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] arm64: mm: Support ASID isolation feature
  2022-11-10  7:07     ` Yunfeng Ye
@ 2022-11-28 17:00       ` Catalin Marinas
  2022-11-29 12:26         ` Yunfeng Ye
  0 siblings, 1 reply; 11+ messages in thread
From: Catalin Marinas @ 2022-11-28 17:00 UTC (permalink / raw)
  To: Yunfeng Ye
  Cc: will, wangkefeng.wang, linux-arm-kernel, linux-kernel, linfeilong

On Thu, Nov 10, 2022 at 03:07:53PM +0800, Yunfeng Ye wrote:
> On 2022/11/9 20:43, Catalin Marinas wrote:
> > On Mon, Oct 17, 2022 at 04:32:02PM +0800, Yunfeng Ye wrote:
> >> After a rollover, the global generation will be flushed, which will
> >> cause the process mm->context.id on all CPUs do not match the
> >> generation. Thus, the process will compete for the global spinlock lock
> >> to reallocate a new ASID and refresh the TLBs of all CPUs on context
> >> switch. This will lead to the increase of scheduling delay and TLB miss.
> >>
> >> In some delay-sensitive scenarios, for example, part of CPUs are
> >> isolated, only a limited number of processes are deployed to run on the
> >> isolated CPUs. In this case, we do not want these key processes to be
> >> affected by the rollover of ASID.
> > 
> > Part of this commit log should also go in the cover letter and it would> help to back this up by some numbers, e.g. what percentage improvement
> > you get with this patchset by running hackbench on an isolated CPU.
> > 
> > In theory it looks like CPU isolation would benefit from this patchset
> > but we try not to touch this code often, so any modification should come
> > with proper justification, backed by numbers.
> > 
> Yes, CPU isolation will benefit from this patchset. We use cyclictest tool
> to test the maximum scheduling and interrupt delays, found that the
> sched_switch process takes several microseconds sometimes, The analysis
> result shows that the delay is caused by the ASID refresh.

Do you know whether it's predominantly the spinlock or the TLBI that's
causing this (or just a combination of the two)?

I was talking to Will and concluded we should try to reuse the ASID
pinning code that's already in that file rather than adding a new
bitmap. At a high level, a thread migrating to an isolated CPU can have
its ASID pinned. If context switching only happens between pinned ASIDs
on an isolated CPU, we may be able to avoid the lock even if the
generation rolled over on another CPU.

I think the tricky problem is when a pinned ASID task eventually dies,
possibly after migrating to another CPU. If we avoided the TLBI on
generation roll-over for the isolated CPU, it will have stale entries.
One option would be to broadcast a TLBI for the pinned ASID when the
task dies, though this would introduce some jitter. An alternative may
be to track whether a pinned ASID ever run on a CPU and do a local TLBI
for that ASID when a pinned thread is migrated.

All these need a lot more thinking and (formal) modelling. I have a TLA+
model but I haven't updated it to cover the pinned ASIDs. Or,
alternatively, make the current code stand-alone and get it through CBMC
(faking the spinlock as pthread mutexes and implementing some of the
atomics in plain C with __CPROVER_atomic_begin/end).

-- 
Catalin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] arm64: mm: Support ASID isolation feature
  2022-11-28 17:00       ` Catalin Marinas
@ 2022-11-29 12:26         ` Yunfeng Ye
  0 siblings, 0 replies; 11+ messages in thread
From: Yunfeng Ye @ 2022-11-29 12:26 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: will, wangkefeng.wang, linux-arm-kernel, linux-kernel, linfeilong



On 2022/11/29 1:00, Catalin Marinas wrote:
> On Thu, Nov 10, 2022 at 03:07:53PM +0800, Yunfeng Ye wrote:
>> On 2022/11/9 20:43, Catalin Marinas wrote:
>>> On Mon, Oct 17, 2022 at 04:32:02PM +0800, Yunfeng Ye wrote:
>>>> After a rollover, the global generation will be flushed, which will
>>>> cause the process mm->context.id on all CPUs do not match the
>>>> generation. Thus, the process will compete for the global spinlock lock
>>>> to reallocate a new ASID and refresh the TLBs of all CPUs on context
>>>> switch. This will lead to the increase of scheduling delay and TLB miss.
>>>>
>>>> In some delay-sensitive scenarios, for example, part of CPUs are
>>>> isolated, only a limited number of processes are deployed to run on the
>>>> isolated CPUs. In this case, we do not want these key processes to be
>>>> affected by the rollover of ASID.
>>>
>>> Part of this commit log should also go in the cover letter and it would> help to back this up by some numbers, e.g. what percentage improvement
>>> you get with this patchset by running hackbench on an isolated CPU.
>>>
>>> In theory it looks like CPU isolation would benefit from this patchset
>>> but we try not to touch this code often, so any modification should come
>>> with proper justification, backed by numbers.
>>>
>> Yes, CPU isolation will benefit from this patchset. We use cyclictest tool
>> to test the maximum scheduling and interrupt delays, found that the
>> sched_switch process takes several microseconds sometimes, The analysis
>> result shows that the delay is caused by the ASID refresh.
> 
> Do you know whether it's predominantly the spinlock or the TLBI that's
> causing this (or just a combination of the two)?
> 
I think the spinlock is the main factor, I didn't distinguish how much
time it took for each of the two. On the other hand, the TLBI is processed
under the spinlock currently, its time-consuming will increase the
time-consuming of the spinlock too.

> I was talking to Will and concluded we should try to reuse the ASID
> pinning code that's already in that file rather than adding a new
> bitmap. At a high level, a thread migrating to an isolated CPU can have
At first, I want to reuse the ASID pinned bitmap too, which is the same
idea with you. but there is a difference between pinned bitmap and isolation
bitmap, the pinned bitmap will not be changed when the generation roll-over,
while the isolation bitmap need to be flushed.

The idea "broadcast a TLBI for the pinned ASID when the task dies" you
mentioned below maybe can reuse the pinned bitmap. I've considered this idea
too, I think this method is not as good as the current two bitmap method:

1. This will introduce some TLBI jitter, and maybe increase the contention
of spinlock when updating the pinned bitmap, which we don't want the jitter
on the isolation CPU.

2. Another disadvantage is that if only one pinned bitmap is used and a large
number of processes are on the isolation domain but the processes are not dead,
the available ASIDs are insufficient. for example, more than 65536 processes
running or sleeping on the isolation CPU, how to handle this situation?

> its ASID pinned. If context switching only happens between pinned ASIDs
> on an isolated CPU, we may be able to avoid the lock even if the
> generation rolled over on another CPU.
> 
> I think the tricky problem is when a pinned ASID task eventually dies,
> possibly after migrating to another CPU. If we avoided the TLBI on
> generation roll-over for the isolated CPU, it will have stale entries.
> One option would be to broadcast a TLBI for the pinned ASID when the
> task dies, though this would introduce some jitter. An alternative may
> be to track whether a pinned ASID ever run on a CPU and do a local TLBI
> for that ASID when a pinned thread is migrated.
> 
> All these need a lot more thinking and (formal) modelling. I have a TLA+
> model but I haven't updated it to cover the pinned ASIDs. Or,
> alternatively, make the current code stand-alone and get it through CBMC
> (faking the spinlock as pthread mutexes and implementing some of the
> atomics in plain C with __CPROVER_atomic_begin/end).
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/5] arm64: mm: Extract the processing of asid_generation
  2022-10-17  8:12 [PATCH 0/5] Support ASID Isolation mechanism y00318929
@ 2022-10-17  8:12 ` y00318929
  0 siblings, 0 replies; 11+ messages in thread
From: y00318929 @ 2022-10-17  8:12 UTC (permalink / raw)
  To: catalin.marinas, will, wangkefeng.wang, linux-arm-kernel,
	linux-kernel, yeyunfeng

From: Yunfeng Ye <yeyunfeng@huawei.com>

To prepare for supporting ASID isolation feature, extract the processing
of asid_generation. it is convenient to modify the asid_generation
centrally.

By the way, It is clearer to put flush_generation() into flush_context().

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
---
 arch/arm64/mm/context.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8549b5f30352..380c7b05c36b 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -102,14 +102,40 @@ static void set_reserved_asid_bits(void)
 		bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 }
 
-#define asid_gen_match(asid) \
-	(!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits))
+static void asid_generation_init(void)
+{
+	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+}
+
+static void flush_generation(void)
+{
+	/* We're out of ASIDs, so increment the global generation count */
+	atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+					&asid_generation);
+}
+
+static inline u64 asid_read_generation(void)
+{
+	return atomic64_read(&asid_generation);
+}
+
+static inline bool asid_match(u64 asid, u64 genid)
+{
+	return (!(((asid) ^ (genid)) >> asid_bits));
+}
+
+static inline bool asid_gen_match(u64 asid)
+{
+	return asid_match(asid, asid_read_generation());
+}
 
 static void flush_context(void)
 {
 	int i;
 	u64 asid;
 
+	flush_generation();
+
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	set_reserved_asid_bits();
 
@@ -163,7 +189,7 @@ static u64 new_context(struct mm_struct *mm)
 {
 	static u32 cur_idx = 1;
 	u64 asid = atomic64_read(&mm->context.id);
-	u64 generation = atomic64_read(&asid_generation);
+	u64 generation = asid_read_generation();
 
 	if (asid != 0) {
 		u64 newasid = asid2ctxid(ctxid2asid(asid), generation);
@@ -202,14 +228,12 @@ static u64 new_context(struct mm_struct *mm)
 	if (asid != NUM_USER_ASIDS)
 		goto set_asid;
 
-	/* We're out of ASIDs, so increment the global generation count */
-	generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
-						 &asid_generation);
 	flush_context();
 
 	/* We have more ASIDs than CPUs, so this will always succeed */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
+	generation = asid_read_generation();
 set_asid:
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
@@ -405,7 +429,8 @@ arch_initcall(asids_update_limit);
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
-	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+	asid_generation_init();
+
 	asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
 	if (!asid_map)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-11-29 12:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-17  8:31 [PATCH 0/5] Support ASID Isolation mechanism Yunfeng Ye
2022-10-17  8:31 ` [PATCH 1/5] arm64: mm: Define asid_bitmap structure for pinned_asid Yunfeng Ye
2022-10-17  8:32 ` [PATCH 2/5] arm64: mm: Extract the processing of asid_generation Yunfeng Ye
2022-10-17  8:32 ` [PATCH 3/5] arm64: mm: Use cpumask in flush_context() Yunfeng Ye
2022-10-17  8:32 ` [PATCH 4/5] arm64: mm: Support ASID isolation feature Yunfeng Ye
2022-11-09 12:43   ` Catalin Marinas
2022-11-10  7:07     ` Yunfeng Ye
2022-11-28 17:00       ` Catalin Marinas
2022-11-29 12:26         ` Yunfeng Ye
2022-10-17  8:32 ` [PATCH 5/5] arm64: mm: Add TLB flush trace on context switch Yunfeng Ye
  -- strict thread matches above, loose matches on Subject: below --
2022-10-17  8:12 [PATCH 0/5] Support ASID Isolation mechanism y00318929
2022-10-17  8:12 ` [PATCH 2/5] arm64: mm: Extract the processing of asid_generation y00318929

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).