All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-03 14:31 ` Chao Fan
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-03 14:31 UTC (permalink / raw)
  To: linux-kernel, x86, linux-acpi
  Cc: hpa, tglx, mingo, keescook, bhe, arnd, dyoung, dave.jiang,
	lv.zheng, rjw, indou.takao, izumi.taku, yasu.isimatu, Chao Fan

KASLR should choose the memory region of immovable node to extract kernel.
So get ACPI SRAT table and store the memory region of movable node which
kaslr shold avoid.

Signed-off-by: Chao Fan <fanc.fnst@cn.fujitsu.com>
---
v2: Add the support for bios
---
 arch/x86/boot/compressed/kaslr.c | 346 +++++++++++++++++++++++++++++++++++++++
 arch/x86/boot/compressed/misc.h  |  42 +++++
 2 files changed, 388 insertions(+)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 17818ba6906f..8f2b42dc3685 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -45,6 +45,12 @@
 #define STATIC
 #include <linux/decompress/mm.h>
 
+#include <linux/efi.h>
+#include <linux/acpi.h>
+#include <acpi/acpi.h>
+#include <linux/numa.h>
+#include <asm/efi.h>
+
 extern unsigned long get_cmd_line_ptr(void);
 
 /* Simplified build-specific string for starting entropy. */
@@ -94,6 +100,18 @@ static bool memmap_too_large;
 /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
 unsigned long long mem_limit = ULLONG_MAX;
 
+/* Store the max numbers of acpi tables */
+#define ACPI_MAX_TABLES		128
+
+/* Store the movable memory */
+static struct {
+	u64 start;
+	u64 end;
+} movable_mem[MAX_NUMNODES*2];
+
+/* Store the num of movable mem affinity */
+static int num_movable_ma;
+
 
 enum mem_avoid_index {
 	MEM_AVOID_ZO_RANGE = 0,
@@ -257,6 +275,296 @@ static int handle_mem_memmap(void)
 	return 0;
 }
 
+static bool efi_find_rsdp_addr(acpi_physical_address *rsdp_addr)
+{
+	efi_system_table_t *systab;
+	bool find_rsdp = false;
+	int size, total_size;
+	bool acpi_20 = false;
+	bool efi_64 = false;
+	void *config_tables;
+	struct efi_info *e;
+	char *sig;
+	int i;
+
+#ifndef CONFIG_EFI
+	return false;
+#endif
+
+	e = &boot_params->efi_info;
+	sig = (char *)&e->efi_loader_signature;
+
+	if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4))
+		efi_64 = true;
+	else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4))
+		efi_64 = false;
+	else {
+		debug_putstr("Wrong efi loader signature.\n");
+		return false;
+	}
+
+	// Get systab from boot params
+#ifdef CONFIG_X86_32
+	if (e->efi_systab_hi || e->efi_memmap_hi) {
+		debug_putstr("Table located above 4GB, disabling EFI.\n");
+		return false;
+	}
+	systab = (efi_system_table_t *)e->efi_systab;
+#else
+	systab = (efi_system_table_t *)(e->efi_systab |
+			((__u64)e->efi_systab_hi<<32));
+#endif
+
+	// Get efi tables from systab
+	size = efi_64 ? sizeof(efi_config_table_64_t) :
+			sizeof(efi_config_table_32_t);
+	total_size = systab->nr_tables * size;
+
+	for (i = 0; i < systab->nr_tables; i++) {
+		efi_guid_t guid;
+		unsigned long table;
+
+		config_tables = (void *)(systab->tables + size * i);
+		if (efi_64) {
+			efi_config_table_64_t *tmp_table;
+
+			tmp_table = (efi_config_table_64_t *)config_tables;
+			guid = tmp_table->guid;
+			table = tmp_table->table;
+#ifndef CONFIG_64BIT
+			if (table >> 32) {
+				debug_putstr
+				("Table located above 4G, disabling EFI.\n");
+				return false;
+			}
+#endif
+		} else {
+			efi_config_table_32_t *tmp_table;
+
+			tmp_table = (efi_config_table_32_t *)config_tables;
+			guid = tmp_table->guid;
+			table = tmp_table->table;
+		}
+
+		// Get rsdp from efi tables
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)) && !acpi_20) {
+			*rsdp_addr = (acpi_physical_address)table;
+			acpi_20 = false;
+			find_rsdp = true;
+		} else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) {
+			*rsdp_addr = (acpi_physical_address)table;
+			acpi_20 = true;
+			return true;
+		}
+	}
+	return find_rsdp;
+}
+
+static u8 checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8)(sum + *(buffer++));
+
+	return sum;
+}
+
+static u8 *scan_memory_for_rsdp(u8 *start_address, u32 length)
+{
+	struct acpi_table_rsdp *rsdp;
+	u8 *end_address;
+	u8 *mem_rover;
+
+	end_address = start_address + length;
+
+	for (mem_rover = start_address; mem_rover < end_address;
+	     mem_rover += ACPI_RSDP_SCAN_STEP) {
+		rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, mem_rover);
+		if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature))
+			continue;
+		if (checksum((u8 *) rsdp,
+		    ACPI_RSDP_CHECKSUM_LENGTH) != 0)
+			continue;
+		if ((rsdp->revision >= 2) && (checksum((u8 *)
+		    rsdp, ACPI_RSDP_XCHECKSUM_LENGTH) != 0))
+			continue;
+		return mem_rover;
+	}
+	return NULL;
+}
+
+static void bios_find_rsdp_addr(acpi_physical_address *rsdp_addr)
+{
+	struct acpi_table_rsdp *rsdp;
+	u32 physical_address;
+	u8 *table_ptr;
+	u8 *mem_rover;
+
+	table_ptr = (u8 *)ACPI_EBDA_PTR_LOCATION;
+	ACPI_MOVE_16_TO_32(&physical_address, table_ptr);
+	physical_address <<= 4;
+	table_ptr = (u8 *)(acpi_physical_address)physical_address;
+
+	if (physical_address > 0x400) {
+		mem_rover =
+		    scan_memory_for_rsdp(table_ptr, ACPI_EBDA_WINDOW_SIZE);
+
+		if (mem_rover) {
+			physical_address +=
+			    (u32) ACPI_PTR_DIFF(mem_rover, table_ptr);
+
+			*rsdp_addr = (acpi_physical_address)physical_address;
+			return;
+		}
+	}
+
+	table_ptr = (u8 *)ACPI_HI_RSDP_WINDOW_BASE;
+	mem_rover = scan_memory_for_rsdp(table_ptr, ACPI_HI_RSDP_WINDOW_SIZE);
+
+	if (mem_rover) {
+		physical_address = (u32)
+		    (ACPI_HI_RSDP_WINDOW_BASE +
+		     ACPI_PTR_DIFF(mem_rover, table_ptr));
+
+		*rsdp_addr = (acpi_physical_address)physical_address;
+
+		return;
+	}
+}
+
+static acpi_physical_address get_rsdp_addr(void)
+{
+	acpi_physical_address pa = 0;
+	bool status = false;
+
+	status = efi_find_rsdp_addr(&pa);
+
+	if (!status)
+		bios_find_rsdp_addr(&pa);
+
+	return pa;
+}
+
+static struct acpi_table_header*
+get_acpi_root_table(struct acpi_table_rsdp *rsdp)
+{
+	struct acpi_table_desc table_descs[ACPI_MAX_TABLES];
+	char *args = (char *)get_cmd_line_ptr();
+	acpi_physical_address acpi_table;
+	acpi_physical_address root_table;
+	struct acpi_table_header *th;
+	bool use_rsdt = false;
+	u32 table_entry_size;
+	u8 *table_entry;
+	u32 table_count;
+	int i, j;
+	u32 len;
+
+	// Get rsdt or xsdt from rsdp
+	if (strstr(args, "acpi=rsdt"))
+		use_rsdt = true;
+
+	if (!(use_rsdt) &&
+	    (rsdp->xsdt_physical_address) && (rsdp->revision > 1)) {
+		root_table = rsdp->xsdt_physical_address;
+		table_entry_size = ACPI_XSDT_ENTRY_SIZE;
+	} else {
+		root_table = rsdp->rsdt_physical_address;
+		table_entry_size = ACPI_RSDT_ENTRY_SIZE;
+	}
+
+	// Get acpi root table from rsdt or xsdt
+	th = (struct acpi_table_header *)root_table;
+	len = th->length;
+	table_count = (u32)((len - sizeof(struct acpi_table_header)) /
+				table_entry_size);
+	table_entry = ACPI_ADD_PTR(u8, th, sizeof(struct acpi_table_header));
+
+	for (i = 0; i < table_count; i++) {
+		u64 address64;
+
+		memset(&table_descs[i], 0, sizeof(struct acpi_table_desc));
+		if (table_entry_size == ACPI_RSDT_ENTRY_SIZE)
+			acpi_table = ((acpi_physical_address)
+					(*ACPI_CAST_PTR(u32, table_entry)));
+		else {
+			ACPI_MOVE_64_TO_64(&address64, table_entry);
+			acpi_table = (acpi_physical_address) address64;
+		}
+
+		if (acpi_table) {
+			table_descs[i].address = acpi_table;
+			table_descs[i].length =
+				sizeof(struct acpi_table_header);
+			table_descs[i].pointer =
+				(struct acpi_table_header *)acpi_table;
+			for (j = 0; j < 4; j++)
+				table_descs[i].signature.ascii[j] =
+					((struct acpi_table_header *)
+					 acpi_table)->signature[j];
+		}
+
+		if (!strncmp(table_descs[i].signature.ascii, "SRAT", 4))
+			return table_descs[i].pointer;
+
+		table_entry += table_entry_size;
+	}
+	return NULL;
+}
+
+static void mark_movable_mem(struct acpi_table_header *table_header)
+{
+	struct acpi_subtable_header *asth;
+	struct acpi_srat_mem_affinity *ma;
+	unsigned long table_size;
+	unsigned long table_end;
+	int i = 0;
+
+	// Get acpi srat mem affinity frpm acpi root table
+	table_size = sizeof(struct acpi_table_srat);
+	table_end = (unsigned long)table_header + table_header->length;
+	asth = (struct acpi_subtable_header *)
+		((unsigned long)table_header + table_size);
+
+	while (((unsigned long)asth) +
+			sizeof(struct acpi_subtable_header) < table_end) {
+		if (asth->type == 1) {
+			ma = (struct acpi_srat_mem_affinity *)asth;
+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+				movable_mem[i].start = ma->base_address;
+				movable_mem[i].end = ma->base_address +
+						     ma->length - 1;
+				i++;
+			}
+		}
+		asth = (struct acpi_subtable_header *)
+			((unsigned long)asth + asth->length);
+	}
+	num_movable_ma = i;
+}
+
+static void handle_movable_node(void)
+{
+	char *args = (char *)get_cmd_line_ptr();
+	struct acpi_table_header *table_header;
+	acpi_physical_address rsdp;
+
+	if (!strstr(args, "movable_node"))
+		return;
+
+	rsdp = get_rsdp_addr();
+	if (!rsdp)
+		return;
+
+	table_header = get_acpi_root_table((struct acpi_table_rsdp *)rsdp);
+	if (!table_header)
+		return;
+
+	mark_movable_mem(table_header);
+}
+
 /*
  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
  * The mem_avoid array is used to store the ranges that need to be avoided
@@ -380,6 +688,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	/* Mark the memmap regions we need to avoid */
 	handle_mem_memmap();
 
+	/* Mark the hotplug SB regions we need choose */
+	handle_movable_node();
+
 #ifdef CONFIG_X86_VERBOSE_BOOTUP
 	/* Make sure video RAM can be used. */
 	add_identity_map(0, PMD_SIZE);
@@ -481,6 +792,36 @@ static unsigned long slots_fetch_random(void)
 	return 0;
 }
 
+static int check_movable_memory(struct mem_vector *entry)
+{
+	unsigned long long start;
+	unsigned long long end;
+	int i;
+
+	start = entry->start;
+	end = entry->start + entry->size - 1;
+
+	if (num_movable_ma == 0)
+		return 0;
+
+	for (i = 0; i < num_movable_ma; i++) {
+		if ((start >= movable_mem[i].start) &&
+		    (start <= movable_mem[i].end))
+			return 1;
+
+		if ((end >= movable_mem[i].start) &&
+		    (end <= movable_mem[i].end))
+			return 1;
+
+		if (start > movable_mem[i].end)
+			continue;
+
+		if (end < movable_mem[i].start)
+			break;
+	}
+	return 0;
+}
+
 static void process_mem_region(struct mem_vector *entry,
 			       unsigned long minimum,
 			       unsigned long image_size)
@@ -502,6 +843,11 @@ static void process_mem_region(struct mem_vector *entry,
 	end = min(entry->size + entry->start, mem_limit);
 	if (entry->start >= end)
 		return;
+
+	/* Ignore the memory region of movable_node */
+	if (check_movable_memory(entry))
+		return;
+
 	cur_entry.start = entry->start;
 	cur_entry.size = end - entry->start;
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 766a5211f827..3d5f0b5ed13d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -109,3 +109,45 @@ static inline void console_init(void)
 #endif
 
 #endif
+
+#ifdef ACPI_BIG_ENDIAN
+#define ACPI_MOVE_64_TO_64(d, s) \
+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[7]; \
+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[6]; \
+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[5]; \
+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[4]; \
+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[3]; \
+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[2]; \
+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[1]; \
+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[0]; }
+#else
+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
+#define ACPI_MOVE_64_TO_64(d, s) \
+{*(u64 *)(void *)(d) = *(u64 *)(void *)(s); }
+#else
+#define ACPI_MOVE_64_TO_64(d, s) \
+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[0]; \
+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[1]; \
+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[2]; \
+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[3]; \
+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[4]; \
+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[5]; \
+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[6]; \
+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[7]; }
+#endif
+#endif
+#ifdef ACPI_BIG_ENDIAN
+#define ACPI_MOVE_16_TO_32(d, s) \
+{(*(u32 *)(void *)(d)) = 0; \
+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[1]; \
+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[0]; }
+#else
+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
+#define ACPI_MOVE_16_TO_32(d, s) \
+{*(u32 *)(void *)(d) = *(u16 *)(void *)(s); }
+#else
+#define ACPI_MOVE_16_TO_32(d, s) \
+{(*(u32 *)(void *)(d)) = 0; ACPI_MOVE_16_TO_16(d, s); }
+#endif
+#endif
+
-- 
2.13.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-03 14:31 ` Chao Fan
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-03 14:31 UTC (permalink / raw)
  To: linux-kernel, x86, linux-acpi
  Cc: hpa, tglx, mingo, keescook, bhe, arnd, dyoung, dave.jiang,
	lv.zheng, rjw, indou.takao, izumi.taku, yasu.isimatu, Chao Fan

KASLR should choose the memory region of immovable node to extract kernel.
So get ACPI SRAT table and store the memory region of movable node which
kaslr shold avoid.

Signed-off-by: Chao Fan <fanc.fnst@cn.fujitsu.com>
---
v2: Add the support for bios
---
 arch/x86/boot/compressed/kaslr.c | 346 +++++++++++++++++++++++++++++++++++++++
 arch/x86/boot/compressed/misc.h  |  42 +++++
 2 files changed, 388 insertions(+)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 17818ba6906f..8f2b42dc3685 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -45,6 +45,12 @@
 #define STATIC
 #include <linux/decompress/mm.h>
 
+#include <linux/efi.h>
+#include <linux/acpi.h>
+#include <acpi/acpi.h>
+#include <linux/numa.h>
+#include <asm/efi.h>
+
 extern unsigned long get_cmd_line_ptr(void);
 
 /* Simplified build-specific string for starting entropy. */
@@ -94,6 +100,18 @@ static bool memmap_too_large;
 /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
 unsigned long long mem_limit = ULLONG_MAX;
 
+/* Store the max numbers of acpi tables */
+#define ACPI_MAX_TABLES		128
+
+/* Store the movable memory */
+static struct {
+	u64 start;
+	u64 end;
+} movable_mem[MAX_NUMNODES*2];
+
+/* Store the num of movable mem affinity */
+static int num_movable_ma;
+
 
 enum mem_avoid_index {
 	MEM_AVOID_ZO_RANGE = 0,
@@ -257,6 +275,296 @@ static int handle_mem_memmap(void)
 	return 0;
 }
 
+static bool efi_find_rsdp_addr(acpi_physical_address *rsdp_addr)
+{
+	efi_system_table_t *systab;
+	bool find_rsdp = false;
+	int size, total_size;
+	bool acpi_20 = false;
+	bool efi_64 = false;
+	void *config_tables;
+	struct efi_info *e;
+	char *sig;
+	int i;
+
+#ifndef CONFIG_EFI
+	return false;
+#endif
+
+	e = &boot_params->efi_info;
+	sig = (char *)&e->efi_loader_signature;
+
+	if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4))
+		efi_64 = true;
+	else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4))
+		efi_64 = false;
+	else {
+		debug_putstr("Wrong efi loader signature.\n");
+		return false;
+	}
+
+	// Get systab from boot params
+#ifdef CONFIG_X86_32
+	if (e->efi_systab_hi || e->efi_memmap_hi) {
+		debug_putstr("Table located above 4GB, disabling EFI.\n");
+		return false;
+	}
+	systab = (efi_system_table_t *)e->efi_systab;
+#else
+	systab = (efi_system_table_t *)(e->efi_systab |
+			((__u64)e->efi_systab_hi<<32));
+#endif
+
+	// Get efi tables from systab
+	size = efi_64 ? sizeof(efi_config_table_64_t) :
+			sizeof(efi_config_table_32_t);
+	total_size = systab->nr_tables * size;
+
+	for (i = 0; i < systab->nr_tables; i++) {
+		efi_guid_t guid;
+		unsigned long table;
+
+		config_tables = (void *)(systab->tables + size * i);
+		if (efi_64) {
+			efi_config_table_64_t *tmp_table;
+
+			tmp_table = (efi_config_table_64_t *)config_tables;
+			guid = tmp_table->guid;
+			table = tmp_table->table;
+#ifndef CONFIG_64BIT
+			if (table >> 32) {
+				debug_putstr
+				("Table located above 4G, disabling EFI.\n");
+				return false;
+			}
+#endif
+		} else {
+			efi_config_table_32_t *tmp_table;
+
+			tmp_table = (efi_config_table_32_t *)config_tables;
+			guid = tmp_table->guid;
+			table = tmp_table->table;
+		}
+
+		// Get rsdp from efi tables
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)) && !acpi_20) {
+			*rsdp_addr = (acpi_physical_address)table;
+			acpi_20 = false;
+			find_rsdp = true;
+		} else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) {
+			*rsdp_addr = (acpi_physical_address)table;
+			acpi_20 = true;
+			return true;
+		}
+	}
+	return find_rsdp;
+}
+
+static u8 checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8)(sum + *(buffer++));
+
+	return sum;
+}
+
+static u8 *scan_memory_for_rsdp(u8 *start_address, u32 length)
+{
+	struct acpi_table_rsdp *rsdp;
+	u8 *end_address;
+	u8 *mem_rover;
+
+	end_address = start_address + length;
+
+	for (mem_rover = start_address; mem_rover < end_address;
+	     mem_rover += ACPI_RSDP_SCAN_STEP) {
+		rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, mem_rover);
+		if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature))
+			continue;
+		if (checksum((u8 *) rsdp,
+		    ACPI_RSDP_CHECKSUM_LENGTH) != 0)
+			continue;
+		if ((rsdp->revision >= 2) && (checksum((u8 *)
+		    rsdp, ACPI_RSDP_XCHECKSUM_LENGTH) != 0))
+			continue;
+		return mem_rover;
+	}
+	return NULL;
+}
+
+static void bios_find_rsdp_addr(acpi_physical_address *rsdp_addr)
+{
+	struct acpi_table_rsdp *rsdp;
+	u32 physical_address;
+	u8 *table_ptr;
+	u8 *mem_rover;
+
+	table_ptr = (u8 *)ACPI_EBDA_PTR_LOCATION;
+	ACPI_MOVE_16_TO_32(&physical_address, table_ptr);
+	physical_address <<= 4;
+	table_ptr = (u8 *)(acpi_physical_address)physical_address;
+
+	if (physical_address > 0x400) {
+		mem_rover =
+		    scan_memory_for_rsdp(table_ptr, ACPI_EBDA_WINDOW_SIZE);
+
+		if (mem_rover) {
+			physical_address +=
+			    (u32) ACPI_PTR_DIFF(mem_rover, table_ptr);
+
+			*rsdp_addr = (acpi_physical_address)physical_address;
+			return;
+		}
+	}
+
+	table_ptr = (u8 *)ACPI_HI_RSDP_WINDOW_BASE;
+	mem_rover = scan_memory_for_rsdp(table_ptr, ACPI_HI_RSDP_WINDOW_SIZE);
+
+	if (mem_rover) {
+		physical_address = (u32)
+		    (ACPI_HI_RSDP_WINDOW_BASE +
+		     ACPI_PTR_DIFF(mem_rover, table_ptr));
+
+		*rsdp_addr = (acpi_physical_address)physical_address;
+
+		return;
+	}
+}
+
+static acpi_physical_address get_rsdp_addr(void)
+{
+	acpi_physical_address pa = 0;
+	bool status = false;
+
+	status = efi_find_rsdp_addr(&pa);
+
+	if (!status)
+		bios_find_rsdp_addr(&pa);
+
+	return pa;
+}
+
+static struct acpi_table_header*
+get_acpi_root_table(struct acpi_table_rsdp *rsdp)
+{
+	struct acpi_table_desc table_descs[ACPI_MAX_TABLES];
+	char *args = (char *)get_cmd_line_ptr();
+	acpi_physical_address acpi_table;
+	acpi_physical_address root_table;
+	struct acpi_table_header *th;
+	bool use_rsdt = false;
+	u32 table_entry_size;
+	u8 *table_entry;
+	u32 table_count;
+	int i, j;
+	u32 len;
+
+	// Get rsdt or xsdt from rsdp
+	if (strstr(args, "acpi=rsdt"))
+		use_rsdt = true;
+
+	if (!(use_rsdt) &&
+	    (rsdp->xsdt_physical_address) && (rsdp->revision > 1)) {
+		root_table = rsdp->xsdt_physical_address;
+		table_entry_size = ACPI_XSDT_ENTRY_SIZE;
+	} else {
+		root_table = rsdp->rsdt_physical_address;
+		table_entry_size = ACPI_RSDT_ENTRY_SIZE;
+	}
+
+	// Get acpi root table from rsdt or xsdt
+	th = (struct acpi_table_header *)root_table;
+	len = th->length;
+	table_count = (u32)((len - sizeof(struct acpi_table_header)) /
+				table_entry_size);
+	table_entry = ACPI_ADD_PTR(u8, th, sizeof(struct acpi_table_header));
+
+	for (i = 0; i < table_count; i++) {
+		u64 address64;
+
+		memset(&table_descs[i], 0, sizeof(struct acpi_table_desc));
+		if (table_entry_size == ACPI_RSDT_ENTRY_SIZE)
+			acpi_table = ((acpi_physical_address)
+					(*ACPI_CAST_PTR(u32, table_entry)));
+		else {
+			ACPI_MOVE_64_TO_64(&address64, table_entry);
+			acpi_table = (acpi_physical_address) address64;
+		}
+
+		if (acpi_table) {
+			table_descs[i].address = acpi_table;
+			table_descs[i].length =
+				sizeof(struct acpi_table_header);
+			table_descs[i].pointer =
+				(struct acpi_table_header *)acpi_table;
+			for (j = 0; j < 4; j++)
+				table_descs[i].signature.ascii[j] =
+					((struct acpi_table_header *)
+					 acpi_table)->signature[j];
+		}
+
+		if (!strncmp(table_descs[i].signature.ascii, "SRAT", 4))
+			return table_descs[i].pointer;
+
+		table_entry += table_entry_size;
+	}
+	return NULL;
+}
+
+static void mark_movable_mem(struct acpi_table_header *table_header)
+{
+	struct acpi_subtable_header *asth;
+	struct acpi_srat_mem_affinity *ma;
+	unsigned long table_size;
+	unsigned long table_end;
+	int i = 0;
+
+	// Get acpi srat mem affinity frpm acpi root table
+	table_size = sizeof(struct acpi_table_srat);
+	table_end = (unsigned long)table_header + table_header->length;
+	asth = (struct acpi_subtable_header *)
+		((unsigned long)table_header + table_size);
+
+	while (((unsigned long)asth) +
+			sizeof(struct acpi_subtable_header) < table_end) {
+		if (asth->type == 1) {
+			ma = (struct acpi_srat_mem_affinity *)asth;
+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+				movable_mem[i].start = ma->base_address;
+				movable_mem[i].end = ma->base_address +
+						     ma->length - 1;
+				i++;
+			}
+		}
+		asth = (struct acpi_subtable_header *)
+			((unsigned long)asth + asth->length);
+	}
+	num_movable_ma = i;
+}
+
+static void handle_movable_node(void)
+{
+	char *args = (char *)get_cmd_line_ptr();
+	struct acpi_table_header *table_header;
+	acpi_physical_address rsdp;
+
+	if (!strstr(args, "movable_node"))
+		return;
+
+	rsdp = get_rsdp_addr();
+	if (!rsdp)
+		return;
+
+	table_header = get_acpi_root_table((struct acpi_table_rsdp *)rsdp);
+	if (!table_header)
+		return;
+
+	mark_movable_mem(table_header);
+}
+
 /*
  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
  * The mem_avoid array is used to store the ranges that need to be avoided
@@ -380,6 +688,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	/* Mark the memmap regions we need to avoid */
 	handle_mem_memmap();
 
+	/* Mark the hotplug SB regions we need choose */
+	handle_movable_node();
+
 #ifdef CONFIG_X86_VERBOSE_BOOTUP
 	/* Make sure video RAM can be used. */
 	add_identity_map(0, PMD_SIZE);
@@ -481,6 +792,36 @@ static unsigned long slots_fetch_random(void)
 	return 0;
 }
 
+static int check_movable_memory(struct mem_vector *entry)
+{
+	unsigned long long start;
+	unsigned long long end;
+	int i;
+
+	start = entry->start;
+	end = entry->start + entry->size - 1;
+
+	if (num_movable_ma == 0)
+		return 0;
+
+	for (i = 0; i < num_movable_ma; i++) {
+		if ((start >= movable_mem[i].start) &&
+		    (start <= movable_mem[i].end))
+			return 1;
+
+		if ((end >= movable_mem[i].start) &&
+		    (end <= movable_mem[i].end))
+			return 1;
+
+		if (start > movable_mem[i].end)
+			continue;
+
+		if (end < movable_mem[i].start)
+			break;
+	}
+	return 0;
+}
+
 static void process_mem_region(struct mem_vector *entry,
 			       unsigned long minimum,
 			       unsigned long image_size)
@@ -502,6 +843,11 @@ static void process_mem_region(struct mem_vector *entry,
 	end = min(entry->size + entry->start, mem_limit);
 	if (entry->start >= end)
 		return;
+
+	/* Ignore the memory region of movable_node */
+	if (check_movable_memory(entry))
+		return;
+
 	cur_entry.start = entry->start;
 	cur_entry.size = end - entry->start;
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 766a5211f827..3d5f0b5ed13d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -109,3 +109,45 @@ static inline void console_init(void)
 #endif
 
 #endif
+
+#ifdef ACPI_BIG_ENDIAN
+#define ACPI_MOVE_64_TO_64(d, s) \
+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[7]; \
+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[6]; \
+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[5]; \
+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[4]; \
+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[3]; \
+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[2]; \
+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[1]; \
+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[0]; }
+#else
+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
+#define ACPI_MOVE_64_TO_64(d, s) \
+{*(u64 *)(void *)(d) = *(u64 *)(void *)(s); }
+#else
+#define ACPI_MOVE_64_TO_64(d, s) \
+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[0]; \
+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[1]; \
+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[2]; \
+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[3]; \
+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[4]; \
+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[5]; \
+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[6]; \
+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[7]; }
+#endif
+#endif
+#ifdef ACPI_BIG_ENDIAN
+#define ACPI_MOVE_16_TO_32(d, s) \
+{(*(u32 *)(void *)(d)) = 0; \
+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[1]; \
+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[0]; }
+#else
+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
+#define ACPI_MOVE_16_TO_32(d, s) \
+{*(u32 *)(void *)(d) = *(u16 *)(void *)(s); }
+#else
+#define ACPI_MOVE_16_TO_32(d, s) \
+{(*(u32 *)(void *)(d)) = 0; ACPI_MOVE_16_TO_16(d, s); }
+#endif
+#endif
+
-- 
2.13.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-03 14:31 ` Chao Fan
  (?)
@ 2017-09-03 22:55 ` Rafael J. Wysocki
  2017-09-04  1:44     ` Chao Fan
  2017-09-04  2:26   ` Baoquan He
  -1 siblings, 2 replies; 15+ messages in thread
From: Rafael J. Wysocki @ 2017-09-03 22:55 UTC (permalink / raw)
  To: Chao Fan
  Cc: linux-kernel, x86, linux-acpi, hpa, tglx, mingo, keescook, bhe,
	arnd, dyoung, dave.jiang, lv.zheng, indou.takao, izumi.taku,
	yasu.isimatu

On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
> KASLR should choose the memory region of immovable node to extract kernel.
> So get ACPI SRAT table and store the memory region of movable node which
> kaslr shold avoid.

Please elaborate.

This is far too little information on what problem you are trying to address
and why you are trying to address it in this particular way.

Thanks,
Rafael


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-03 22:55 ` Rafael J. Wysocki
@ 2017-09-04  1:44     ` Chao Fan
  2017-09-04  2:26   ` Baoquan He
  1 sibling, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-04  1:44 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-kernel, x86, linux-acpi, hpa, tglx, mingo, keescook, bhe,
	arnd, dyoung, dave.jiang, lv.zheng, indou.takao, izumi.taku,
	yasu.isimatu

On Mon, Sep 04, 2017 at 12:55:00AM +0200, Rafael J. Wysocki wrote:
>On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
>> KASLR should choose the memory region of immovable node to extract kernel.
>> So get ACPI SRAT table and store the memory region of movable node which
>> kaslr shold avoid.
>
>Please elaborate.

Hi Rafael,

Sorry for that.
The problem is: in a machine, some numa nodes are hotpluggable, some are
not. The kernel should use the memory in unhotpluggable.
But when extracting kernel, kaslr may chooose the memory in hotpluggable
or unhotpluggable node. The ACPI SRAT table can show the node is
hotpluggable or not. But the acpi_boot_table_init runs in setup_arch,
which is after extracting kernel. So I imitate the initialization in
acpi_boot_table_init to get the table before extracting kernel. And
mark the memory region in hotpluggable node to avoid kaslr extracting
kernel in these regions.

Thanks,
Chao Fan

>
>This is far too little information on what problem you are trying to address
>and why you are trying to address it in this particular way.
>
>Thanks,
>Rafael
>
>
>



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-04  1:44     ` Chao Fan
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-04  1:44 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-kernel, x86, linux-acpi, hpa, tglx, mingo, keescook, bhe,
	arnd, dyoung, dave.jiang, lv.zheng, indou.takao, izumi.taku,
	yasu.isimatu

On Mon, Sep 04, 2017 at 12:55:00AM +0200, Rafael J. Wysocki wrote:
>On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
>> KASLR should choose the memory region of immovable node to extract kernel.
>> So get ACPI SRAT table and store the memory region of movable node which
>> kaslr shold avoid.
>
>Please elaborate.

Hi Rafael,

Sorry for that.
The problem is: in a machine, some numa nodes are hotpluggable, some are
not. The kernel should use the memory in unhotpluggable.
But when extracting kernel, kaslr may chooose the memory in hotpluggable
or unhotpluggable node. The ACPI SRAT table can show the node is
hotpluggable or not. But the acpi_boot_table_init runs in setup_arch,
which is after extracting kernel. So I imitate the initialization in
acpi_boot_table_init to get the table before extracting kernel. And
mark the memory region in hotpluggable node to avoid kaslr extracting
kernel in these regions.

Thanks,
Chao Fan

>
>This is far too little information on what problem you are trying to address
>and why you are trying to address it in this particular way.
>
>Thanks,
>Rafael
>
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-03 22:55 ` Rafael J. Wysocki
  2017-09-04  1:44     ` Chao Fan
@ 2017-09-04  2:26   ` Baoquan He
  2017-09-04  3:08       ` Chao Fan
  2017-09-04  8:17       ` Dou Liyang
  1 sibling, 2 replies; 15+ messages in thread
From: Baoquan He @ 2017-09-04  2:26 UTC (permalink / raw)
  To: Chao Fan, Rafael J. Wysocki
  Cc: linux-kernel, x86, linux-acpi, hpa, tglx, mingo, keescook, arnd,
	dyoung, dave.jiang, lv.zheng, indou.takao, izumi.taku,
	yasu.isimatu

On 09/04/17 at 12:55am, Rafael J. Wysocki wrote:
> On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
> > KASLR should choose the memory region of immovable node to extract kernel.
> > So get ACPI SRAT table and store the memory region of movable node which
> > kaslr shold avoid.
> 
> Please elaborate.
> 
> This is far too little information on what problem you are trying to address
> and why you are trying to address it in this particular way.

Agree with Rafael.

Why don't you try specifying those regions in cmdline and process them
in kaslr.c? Your colleague, Liyang has tried this way, just he only
considered the region in the first node. In this way, you don't need to
touch ACPI tables with so many lines of code.

Thanks
Baoquan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-04  2:26   ` Baoquan He
@ 2017-09-04  3:08       ` Chao Fan
  2017-09-04  8:17       ` Dou Liyang
  1 sibling, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-04  3:08 UTC (permalink / raw)
  To: Baoquan He
  Cc: Rafael J. Wysocki, linux-kernel, x86, linux-acpi, hpa, tglx,
	mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng, indou.takao,
	izumi.taku, yasu.isimatu

On Mon, Sep 04, 2017 at 10:26:19AM +0800, Baoquan He wrote:
>On 09/04/17 at 12:55am, Rafael J. Wysocki wrote:
>> On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
>> > KASLR should choose the memory region of immovable node to extract kernel.
>> > So get ACPI SRAT table and store the memory region of movable node which
>> > kaslr shold avoid.
>> 
>> Please elaborate.
>> 
>> This is far too little information on what problem you are trying to address
>> and why you are trying to address it in this particular way.
>
>Agree with Rafael.
>
>Why don't you try specifying those regions in cmdline and process them
>in kaslr.c? Your colleague, Liyang has tried this way, just he only
>considered the region in the first node. In this way, you don't need to

Hi Baoquan,

Yes, but if the region is not only in the first node, we can get the
detail information about the memory scope and whether it's hotpluggable
only by the ACPI table. The lines of code are so many, but we can get
more information.

Thanks,
Chao Fan

>touch ACPI tables with so many lines of code.
>
>Thanks
>Baoquan
>
>



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-04  3:08       ` Chao Fan
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-04  3:08 UTC (permalink / raw)
  To: Baoquan He
  Cc: Rafael J. Wysocki, linux-kernel, x86, linux-acpi, hpa, tglx,
	mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng, indou.takao,
	izumi.taku, yasu.isimatu

On Mon, Sep 04, 2017 at 10:26:19AM +0800, Baoquan He wrote:
>On 09/04/17 at 12:55am, Rafael J. Wysocki wrote:
>> On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
>> > KASLR should choose the memory region of immovable node to extract kernel.
>> > So get ACPI SRAT table and store the memory region of movable node which
>> > kaslr shold avoid.
>> 
>> Please elaborate.
>> 
>> This is far too little information on what problem you are trying to address
>> and why you are trying to address it in this particular way.
>
>Agree with Rafael.
>
>Why don't you try specifying those regions in cmdline and process them
>in kaslr.c? Your colleague, Liyang has tried this way, just he only
>considered the region in the first node. In this way, you don't need to

Hi Baoquan,

Yes, but if the region is not only in the first node, we can get the
detail information about the memory scope and whether it's hotpluggable
only by the ACPI table. The lines of code are so many, but we can get
more information.

Thanks,
Chao Fan

>touch ACPI tables with so many lines of code.
>
>Thanks
>Baoquan
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-04  2:26   ` Baoquan He
@ 2017-09-04  8:17       ` Dou Liyang
  2017-09-04  8:17       ` Dou Liyang
  1 sibling, 0 replies; 15+ messages in thread
From: Dou Liyang @ 2017-09-04  8:17 UTC (permalink / raw)
  To: Baoquan He, Chao Fan, Rafael J. Wysocki
  Cc: linux-kernel, x86, linux-acpi, hpa, tglx, mingo, keescook, arnd,
	dyoung, dave.jiang, lv.zheng, indou.takao, izumi.taku,
	yasu.isimatu

Hi Rafael, Baoquan, and Chao,

At 09/04/2017 10:26 AM, Baoquan He wrote:
> On 09/04/17 at 12:55am, Rafael J. Wysocki wrote:
>> On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
>>> KASLR should choose the memory region of immovable node to extract kernel.
>>> So get ACPI SRAT table and store the memory region of movable node which
>>> kaslr shold avoid.
>>
>> Please elaborate.
>>
>> This is far too little information on what problem you are trying to address

The problem is:

In X86 architecture, KASLR (Kernel Address Space Layout Randomization)
makes the memory hot-remove unhappy, when it extracts kernel into a
hot-removable memory region.

The reason is:

Linux cannot migrate the kernel pages, so memory used by the kernel
cannot be hot-removed. Normally, ACPI SRAT table records all
hotpluggable memory ranges. linux can use it to allocate memory
correctly.

But, When extracting kernel, SRAT is not parsed yet, KASLR doesn't
know which memory ranges are hotpluggable and should be avoid, So,
KASLR may randomize the kernel in movable memory, that makes the
movable memory can't hot-remove.

The original solution is:

Using "mem=" and "movable_node" in the kernel command line.

eg:

node 0 size: 1024 MB  immovable
node 1 size: 1024 MB  movable
node 2 size: 1024 MB  movable
node 3 size: 1024 MB  movable

Setting "mem=1024M movable_node", it will restrict the kernel physical
address to [0,1024M] for KASLR and will make sure the memory in node 1
, node 2, node 3 can be hot-removable.

But it has a problem:

if we bootup with all node, we just see 1G RAM, the kernel is not able
to see the whole system memory 4G. It is not good for us. So, we want
to extend the "movable_node" option to fix this problem.

The new ways in discussion are:

Method 1) As Chao's patch shows, Parse the ACPI SRAT table earlier
than extracting kernel.

Method 2) Extend movable_node option for KASLR, eg "movable_node=1024M"
   https://lkml.org/lkml/2017/8/3/401

>> and why you are trying to address it in this particular way.
>
> Agree with Rafael.
>
> Why don't you try specifying those regions in cmdline and process them
> in kaslr.c? Your colleague, Liyang has tried this way, just he only
> considered the region in the first node. In this way, you don't need to

No, not just the first node. It is based on a hypothesis that immovable
node has to be set from lowest address. eg:

node 0 size: 1024 MB  immovable
node 1 size: 1024 MB  immovable
node 2 size: 1024 MB  movable
node 3 size: 1024 MB  movable

With "movable_node=2048M" option in cmdline, KASLR will consider both
node1 and node2

Using method 2, "movable_node=1024M" can be regard as "mem=1024M
movable_node" except the limitation of memory.

the problem of the method 2 is that:

Using method 2, KASLR may not extract the kernel into all the immovable
memory. eg:

node 0 size: 1024 MB  immovable
node 1 size: 1024 MB  movable
node 2 size: 1024 MB  movable
node 3 size: 1024 MB  immovable

With "movable_node=1024M" option in cmdline, KASLR will can't access
the node3 memory.

I am looking for the solution of this. Not find a good way.

Sometimes, I will remember that proverb:

   You cannot have your cake and eat it too. :-)

Thanks,
	dou.
> touch ACPI tables with so many lines of code.
>
> Thanks
> Baoquan
>
>
>



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-04  8:17       ` Dou Liyang
  0 siblings, 0 replies; 15+ messages in thread
From: Dou Liyang @ 2017-09-04  8:17 UTC (permalink / raw)
  To: Baoquan He, Chao Fan, Rafael J. Wysocki
  Cc: linux-kernel, x86, linux-acpi, hpa, tglx, mingo, keescook, arnd,
	dyoung, dave.jiang, lv.zheng, indou.takao, izumi.taku,
	yasu.isimatu

Hi Rafael, Baoquan, and Chao,

At 09/04/2017 10:26 AM, Baoquan He wrote:
> On 09/04/17 at 12:55am, Rafael J. Wysocki wrote:
>> On Sunday, September 3, 2017 4:31:23 PM CEST Chao Fan wrote:
>>> KASLR should choose the memory region of immovable node to extract kernel.
>>> So get ACPI SRAT table and store the memory region of movable node which
>>> kaslr shold avoid.
>>
>> Please elaborate.
>>
>> This is far too little information on what problem you are trying to address

The problem is:

In X86 architecture, KASLR (Kernel Address Space Layout Randomization)
makes the memory hot-remove unhappy, when it extracts kernel into a
hot-removable memory region.

The reason is:

Linux cannot migrate the kernel pages, so memory used by the kernel
cannot be hot-removed. Normally, ACPI SRAT table records all
hotpluggable memory ranges. linux can use it to allocate memory
correctly.

But, When extracting kernel, SRAT is not parsed yet, KASLR doesn't
know which memory ranges are hotpluggable and should be avoid, So,
KASLR may randomize the kernel in movable memory, that makes the
movable memory can't hot-remove.

The original solution is:

Using "mem=" and "movable_node" in the kernel command line.

eg:

node 0 size: 1024 MB  immovable
node 1 size: 1024 MB  movable
node 2 size: 1024 MB  movable
node 3 size: 1024 MB  movable

Setting "mem=1024M movable_node", it will restrict the kernel physical
address to [0,1024M] for KASLR and will make sure the memory in node 1
, node 2, node 3 can be hot-removable.

But it has a problem:

if we bootup with all node, we just see 1G RAM, the kernel is not able
to see the whole system memory 4G. It is not good for us. So, we want
to extend the "movable_node" option to fix this problem.

The new ways in discussion are:

Method 1) As Chao's patch shows, Parse the ACPI SRAT table earlier
than extracting kernel.

Method 2) Extend movable_node option for KASLR, eg "movable_node=1024M"
   https://lkml.org/lkml/2017/8/3/401

>> and why you are trying to address it in this particular way.
>
> Agree with Rafael.
>
> Why don't you try specifying those regions in cmdline and process them
> in kaslr.c? Your colleague, Liyang has tried this way, just he only
> considered the region in the first node. In this way, you don't need to

No, not just the first node. It is based on a hypothesis that immovable
node has to be set from lowest address. eg:

node 0 size: 1024 MB  immovable
node 1 size: 1024 MB  immovable
node 2 size: 1024 MB  movable
node 3 size: 1024 MB  movable

With "movable_node=2048M" option in cmdline, KASLR will consider both
node1 and node2

Using method 2, "movable_node=1024M" can be regard as "mem=1024M
movable_node" except the limitation of memory.

the problem of the method 2 is that:

Using method 2, KASLR may not extract the kernel into all the immovable
memory. eg:

node 0 size: 1024 MB  immovable
node 1 size: 1024 MB  movable
node 2 size: 1024 MB  movable
node 3 size: 1024 MB  immovable

With "movable_node=1024M" option in cmdline, KASLR will can't access
the node3 memory.

I am looking for the solution of this. Not find a good way.

Sometimes, I will remember that proverb:

   You cannot have your cake and eat it too. :-)

Thanks,
	dou.
> touch ACPI tables with so many lines of code.
>
> Thanks
> Baoquan
>
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-04  8:17       ` Dou Liyang
  (?)
@ 2017-09-04  8:39       ` Baoquan He
  2017-09-04  8:52           ` Dou Liyang
  2017-09-04  9:03           ` Chao Fan
  -1 siblings, 2 replies; 15+ messages in thread
From: Baoquan He @ 2017-09-04  8:39 UTC (permalink / raw)
  To: Dou Liyang
  Cc: Chao Fan, Rafael J. Wysocki, linux-kernel, x86, linux-acpi, hpa,
	tglx, mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng,
	indou.takao, izumi.taku, yasu.isimatu

On 09/04/17 at 04:17pm, Dou Liyang wrote:
> With "movable_node=1024M" option in cmdline, KASLR will can't access
> the node3 memory.

So you have extended the movable_node option from no value specified to
adding a limit value, then why don't you go one step further to extend
it as movable_node=xxx@start. With this, you can eat the cake you have.

My personal opinion, could that other peopel have better idea. But dig
into acpi tables to grab the srat table, that is really not a good idea.

Chao has spent time to know the srat table, maybe he can try to make a
patch with the "movable_node=xxx@start" handling in kaslr.c, let's see
what it looks like.

Thanks
Baoquan

> 
> I am looking for the solution of this. Not find a good way.
> 
> Sometimes, I will remember that proverb:
> 
>   You cannot have your cake and eat it too. :-)
> 
> Thanks,
> 	dou.
> > touch ACPI tables with so many lines of code.
> > 
> > Thanks
> > Baoquan
> > 
> > 
> > 
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-04  8:39       ` Baoquan He
@ 2017-09-04  8:52           ` Dou Liyang
  2017-09-04  9:03           ` Chao Fan
  1 sibling, 0 replies; 15+ messages in thread
From: Dou Liyang @ 2017-09-04  8:52 UTC (permalink / raw)
  To: Baoquan He
  Cc: Chao Fan, Rafael J. Wysocki, linux-kernel, x86, linux-acpi, hpa,
	tglx, mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng,
	indou.takao, izumi.taku, yasu.isimatu

Hi Baoquan,

At 09/04/2017 04:39 PM, Baoquan He wrote:
> On 09/04/17 at 04:17pm, Dou Liyang wrote:
>> With "movable_node=1024M" option in cmdline, KASLR will can't access
>> the node3 memory.
>
> So you have extended the movable_node option from no value specified to
> adding a limit value, then why don't you go one step further to extend
> it as movable_node=xxx@start. With this, you can eat the cake you have.
>

Haha, extending it as movable_node=xxx@start is my last choice. I
don't want this option to be as complex as memmap is.

> My personal opinion, could that other peopel have better idea. But dig
> into acpi tables to grab the srat table, that is really not a good idea.
>

I agree with you. That is why I send method 2.

> Chao has spent time to know the srat table, maybe he can try to make a
> patch with the "movable_node=xxx@start" handling in kaslr.c, let's see
> what it looks like.
>

OK, go ahead, Chao.

Thanks,
	dou.


> Thanks
> Baoquan
>
>>
>> I am looking for the solution of this. Not find a good way.
>>
>> Sometimes, I will remember that proverb:
>>
>>   You cannot have your cake and eat it too. :-)
>>
>> Thanks,
>> 	dou.
>>> touch ACPI tables with so many lines of code.
>>>
>>> Thanks
>>> Baoquan
>>>
>>>
>>>
>>
>>
>
>
>



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-04  8:52           ` Dou Liyang
  0 siblings, 0 replies; 15+ messages in thread
From: Dou Liyang @ 2017-09-04  8:52 UTC (permalink / raw)
  To: Baoquan He
  Cc: Chao Fan, Rafael J. Wysocki, linux-kernel, x86, linux-acpi, hpa,
	tglx, mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng,
	indou.takao, izumi.taku, yasu.isimatu

Hi Baoquan,

At 09/04/2017 04:39 PM, Baoquan He wrote:
> On 09/04/17 at 04:17pm, Dou Liyang wrote:
>> With "movable_node=1024M" option in cmdline, KASLR will can't access
>> the node3 memory.
>
> So you have extended the movable_node option from no value specified to
> adding a limit value, then why don't you go one step further to extend
> it as movable_node=xxx@start. With this, you can eat the cake you have.
>

Haha, extending it as movable_node=xxx@start is my last choice. I
don't want this option to be as complex as memmap is.

> My personal opinion, could that other peopel have better idea. But dig
> into acpi tables to grab the srat table, that is really not a good idea.
>

I agree with you. That is why I send method 2.

> Chao has spent time to know the srat table, maybe he can try to make a
> patch with the "movable_node=xxx@start" handling in kaslr.c, let's see
> what it looks like.
>

OK, go ahead, Chao.

Thanks,
	dou.


> Thanks
> Baoquan
>
>>
>> I am looking for the solution of this. Not find a good way.
>>
>> Sometimes, I will remember that proverb:
>>
>>   You cannot have your cake and eat it too. :-)
>>
>> Thanks,
>> 	dou.
>>> touch ACPI tables with so many lines of code.
>>>
>>> Thanks
>>> Baoquan
>>>
>>>
>>>
>>
>>
>
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
  2017-09-04  8:39       ` Baoquan He
@ 2017-09-04  9:03           ` Chao Fan
  2017-09-04  9:03           ` Chao Fan
  1 sibling, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-04  9:03 UTC (permalink / raw)
  To: Baoquan He
  Cc: Dou Liyang, Rafael J. Wysocki, linux-kernel, x86, linux-acpi,
	hpa, tglx, mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng,
	indou.takao, izumi.taku, yasu.isimatu

On Mon, Sep 04, 2017 at 04:39:14PM +0800, Baoquan He wrote:
>On 09/04/17 at 04:17pm, Dou Liyang wrote:
>> With "movable_node=1024M" option in cmdline, KASLR will can't access
>> the node3 memory.
>
>So you have extended the movable_node option from no value specified to
>adding a limit value, then why don't you go one step further to extend
>it as movable_node=xxx@start. With this, you can eat the cake you have.
>
>My personal opinion, could that other peopel have better idea. But dig
>into acpi tables to grab the srat table, that is really not a good idea.
>
>Chao has spent time to know the srat table, maybe he can try to make a
>patch with the "movable_node=xxx@start" handling in kaslr.c, let's see
>what it looks like.

Hi Bao

That means the user should know the detail information of the srat
table, including the memory location and length. But I have no idea
that if it's elegant leaving it for users to fill the parameter.
BTW, it may be like this: "movable_node=xxx@start,xxx@start,..."
And I was also wondering if anyone has a better solution.

Thanks,
Chao Fan

>
>Thanks
>Baoquan
>
>> 
>> I am looking for the solution of this. Not find a good way.
>> 
>> Sometimes, I will remember that proverb:
>> 
>>   You cannot have your cake and eat it too. :-)
>> 
>> Thanks,
>> 	dou.
>> > touch ACPI tables with so many lines of code.
>> > 
>> > Thanks
>> > Baoquan
>> > 
>> > 
>> > 
>> 
>> 
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory
@ 2017-09-04  9:03           ` Chao Fan
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Fan @ 2017-09-04  9:03 UTC (permalink / raw)
  To: Baoquan He
  Cc: Dou Liyang, Rafael J. Wysocki, linux-kernel, x86, linux-acpi,
	hpa, tglx, mingo, keescook, arnd, dyoung, dave.jiang, lv.zheng,
	indou.takao, izumi.taku, yasu.isimatu

On Mon, Sep 04, 2017 at 04:39:14PM +0800, Baoquan He wrote:
>On 09/04/17 at 04:17pm, Dou Liyang wrote:
>> With "movable_node=1024M" option in cmdline, KASLR will can't access
>> the node3 memory.
>
>So you have extended the movable_node option from no value specified to
>adding a limit value, then why don't you go one step further to extend
>it as movable_node=xxx@start. With this, you can eat the cake you have.
>
>My personal opinion, could that other peopel have better idea. But dig
>into acpi tables to grab the srat table, that is really not a good idea.
>
>Chao has spent time to know the srat table, maybe he can try to make a
>patch with the "movable_node=xxx@start" handling in kaslr.c, let's see
>what it looks like.

Hi Bao

That means the user should know the detail information of the srat
table, including the memory location and length. But I have no idea
that if it's elegant leaving it for users to fill the parameter.
BTW, it may be like this: "movable_node=xxx@start,xxx@start,..."
And I was also wondering if anyone has a better solution.

Thanks,
Chao Fan

>
>Thanks
>Baoquan
>
>> 
>> I am looking for the solution of this. Not find a good way.
>> 
>> Sometimes, I will remember that proverb:
>> 
>>   You cannot have your cake and eat it too. :-)
>> 
>> Thanks,
>> 	dou.
>> > touch ACPI tables with so many lines of code.
>> > 
>> > Thanks
>> > Baoquan
>> > 
>> > 
>> > 
>> 
>> 
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-09-04  9:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-03 14:31 [PATCH v2] kaslr: get ACPI SRAT table to avoid movable memory Chao Fan
2017-09-03 14:31 ` Chao Fan
2017-09-03 22:55 ` Rafael J. Wysocki
2017-09-04  1:44   ` Chao Fan
2017-09-04  1:44     ` Chao Fan
2017-09-04  2:26   ` Baoquan He
2017-09-04  3:08     ` Chao Fan
2017-09-04  3:08       ` Chao Fan
2017-09-04  8:17     ` Dou Liyang
2017-09-04  8:17       ` Dou Liyang
2017-09-04  8:39       ` Baoquan He
2017-09-04  8:52         ` Dou Liyang
2017-09-04  8:52           ` Dou Liyang
2017-09-04  9:03         ` Chao Fan
2017-09-04  9:03           ` Chao Fan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.