All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit
@ 2012-11-16 23:04 Yinghai Lu
  2012-11-16 23:04 ` [PATCH 1/8] Add min/max macro Yinghai Lu
                   ` (8 more replies)
  0 siblings, 9 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

Now we have limit kdump reseved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
        git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk high acccording
to new added boot header fields.

Yinghai Lu (8):
  Add min/max macro
  x86: add boot header member for version 2.12
  add mem64_min/max control
  Move out mem_min/max checking in locate_hole
  seperate checking 64bit mem range
  debug print out for add_buf
  x86: put ramdisk high for 64bit bzImage
  x86: put 64bit bzImage high

 include/x86/x86-linux.h           |   20 ++++++++++-
 kexec/add_buffer.c                |    9 ++++-
 kexec/arch/i386/kexec-bzImage.c   |   53 ++++++++++++++++++++++++++--
 kexec/arch/i386/x86-linux-setup.c |   32 ++++++++++++-----
 kexec/arch/ppc/include/types.h    |   12 ------
 kexec/kexec.c                     |   69 +++++++++++++++++++++++++++++-------
 kexec/kexec.h                     |   21 +++++++++++-
 7 files changed, 175 insertions(+), 41 deletions(-)

-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/8] Add min/max macro
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-16 23:04 ` [PATCH 2/8] x86: add boot header member for version 2.12 Yinghai Lu
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/ppc/include/types.h |   12 ------------
 kexec/kexec.h                  |   12 ++++++++++++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/kexec/arch/ppc/include/types.h b/kexec/arch/ppc/include/types.h
index 31393d1..3773cda 100644
--- a/kexec/arch/ppc/include/types.h
+++ b/kexec/arch/ppc/include/types.h
@@ -12,16 +12,4 @@ typedef short			s16;
 typedef int			s32;
 typedef long long		s64;
 
-#define min(x,y) ({ \
-	typeof(x) _x = (x);	\
-	typeof(y) _y = (y);	\
-	(void) (&_x == &_y);	\
-	_x < _y ? _x : _y; })
-
-#define max(x,y) ({ \
-	typeof(x) _x = (x);	\
-	typeof(y) _y = (y);	\
-	(void) (&_x == &_y);	\
-	_x > _y ? _x : _y; })
-
 #endif /* _TYPES_H_ */
diff --git a/kexec/kexec.h b/kexec/kexec.h
index 1f46bcb..35b0b10 100644
--- a/kexec/kexec.h
+++ b/kexec/kexec.h
@@ -100,6 +100,18 @@ do { \
 	} \
 } while(0)
 
+#define min(x,y) ({ \
+        typeof(x) _x = (x);     \
+        typeof(y) _y = (y);     \
+        (void) (&_x == &_y);    \
+        _x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+        typeof(x) _x = (x);     \
+        typeof(y) _y = (y);     \
+        (void) (&_x == &_y);    \
+        _x > _y ? _x : _y; })
+
 extern unsigned long long mem_min, mem_max;
 extern int kexec_debug;
 
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 2/8] x86: add boot header member for version 2.12
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-16 23:04 ` [PATCH 1/8] Add min/max macro Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-16 23:04 ` [PATCH 3/8] add mem64_min/max control Yinghai Lu
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

will use ext_ramdisk_image/size, and code64_start_offset

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 include/x86/x86-linux.h           |   20 +++++++++++++++++++-
 kexec/arch/i386/x86-linux-setup.c |    2 +-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 27af02b..9c8fbc9 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -178,7 +178,16 @@ struct x86_linux_param_header {
 	uint32_t cmdline_size;			/* 0x238 */
 	uint32_t hardware_subarch;		/* 0x23C */
 	uint64_t hardware_subarch_data;		/* 0x240 */
-	uint8_t  reserved16[0x290 - 0x248];	/* 0x248 */
+	uint32_t payload_offset;		/* 0x248 */
+	uint32_t payload_length;		/* 0x24C */
+	uint64_t setup_data;			/* 0x250 */
+	uint64_t pref_address;			/* 0x258 */
+	uint32_t init_size;			/* 0x260 */
+	uint32_t handover_offset;		/* 0x264 */
+	uint32_t ext_ramdisk_image;		/* 0x268 */
+	uint32_t ext_ramdisk_size;		/* 0x26C */
+	uint32_t code64_start_offset;		/* 0x270 */
+	uint8_t  reserved16[0x290 - 0x274];	/* 0x274 */
 	uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];	/* 0x290 */
 #endif
 	struct 	e820entry e820_map[E820MAX];	/* 0x2d0 */
@@ -245,6 +254,15 @@ struct x86_linux_header {
 	uint32_t cmdline_size;                  /* 0x238 */
 	uint32_t hardware_subarch;              /* 0x23C */
 	uint64_t hardware_subarch_data;         /* 0x240 */
+	uint32_t payload_offset;		/* 0x248 */
+	uint32_t payload_length;		/* 0x24C */
+	uint64_t setup_data;			/* 0x250 */
+	uint64_t pref_address;			/* 0x258 */
+	uint32_t init_size;			/* 0x260 */
+	uint32_t handover_offset;		/* 0x264 */
+	uint32_t ext_ramdisk_image;		/* 0x268 */
+	uint32_t ext_ramdisk_size;		/* 0x26C */
+	uint32_t code64_start_offset;		/* 0x270 */
 #endif
 } PACKED;
 
diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..53d9df9 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -41,7 +41,7 @@ void init_linux_parameters(struct x86_linux_param_header *real_mode)
 
 	/* Boot block magic */
 	memcpy(real_mode->header_magic, "HdrS", 4);
-	real_mode->protocol_version = 0x0206;
+	real_mode->protocol_version = 0x020C;
 	real_mode->initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
 	real_mode->cmdline_size = COMMAND_LINE_SIZE;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 3/8] add mem64_min/max control
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-16 23:04 ` [PATCH 1/8] Add min/max macro Yinghai Lu
  2012-11-16 23:04 ` [PATCH 2/8] x86: add boot header member for version 2.12 Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-17  6:18   ` Eric W. Biederman
  2012-11-16 23:04 ` [PATCH 4/8] Move out mem_min/max checking in locate_hole Yinghai Lu
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

So could limit range for 4g above buffers.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/kexec.c |   26 ++++++++++++++++++++++++++
 kexec/kexec.h |    9 ++++++++-
 2 files changed, 34 insertions(+), 1 deletions(-)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index 8928be0..00f90f5 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -50,6 +50,8 @@
 
 unsigned long long mem_min = 0;
 unsigned long long mem_max = ULONG_MAX;
+unsigned long long mem64_min = 1ULL<32;
+unsigned long long mem64_max = ULLONG_MAX;
 static unsigned long kexec_flags = 0;
 int kexec_debug = 0;
 
@@ -900,6 +902,10 @@ void usage(void)
 	       "                      load code into.\n"
 	       "     --mem-max=<addr> Specify the highest memory address to\n"
 	       "                      load code into.\n"
+	       "     --mem64-min=<addr> Specify the lowest memory address \n"
+	       "                      above 4G to load code into.\n"
+	       "     --mem64-max=<addr> Specify the highest memory address \n"
+	       "                      above 4G to load code into.\n"
 	       "     --reuseinitrd    Reuse initrd from first boot.\n"
 	       "     --load-preserve-context Load the new kernel and preserve\n"
 	       "                      context of current kernel during kexec.\n"
@@ -1165,6 +1171,26 @@ int main(int argc, char *argv[])
 				return 1;
 			}
 			break;
+		case OPT_MEM64_MIN:
+			mem64_min = strtoul(optarg, &endptr, 0);
+			if (*endptr) {
+				fprintf(stderr,
+					"Bad option value in --mem64-min=%s\n",
+					optarg);
+				usage();
+				return 1;
+			}
+			break;
+		case OPT_MEM64_MAX:
+			mem64_max = strtoul(optarg, &endptr, 0);
+			if (*endptr) {
+				fprintf(stderr,
+					"Bad option value in --mem64-max=%s\n",
+					optarg);
+				usage();
+				return 1;
+			}
+			break;
 		case OPT_REUSE_INITRD:
 			do_reuse_initrd = 1;
 			break;
diff --git a/kexec/kexec.h b/kexec/kexec.h
index 35b0b10..2e0e5f2 100644
--- a/kexec/kexec.h
+++ b/kexec/kexec.h
@@ -113,6 +113,7 @@ do { \
         _x > _y ? _x : _y; })
 
 extern unsigned long long mem_min, mem_max;
+extern unsigned long long mem64_min, mem64_max;
 extern int kexec_debug;
 
 #define dbgprintf(...) \
@@ -211,7 +212,11 @@ extern int file_types;
 #define OPT_LOAD_PRESERVE_CONTEXT 259
 #define OPT_LOAD_JUMP_BACK_HELPER 260
 #define OPT_ENTRY		261
-#define OPT_MAX			262
+#define OPT_MEM64_MIN           262
+#define OPT_MEM64_MAX           263
+
+#define OPT_MAX			264
+
 #define KEXEC_OPTIONS \
 	{ "help",		0, 0, OPT_HELP }, \
 	{ "version",		0, 0, OPT_VERSION }, \
@@ -227,6 +232,8 @@ extern int file_types;
 	{ "load-panic",         0, 0, OPT_PANIC }, \
 	{ "mem-min",		1, 0, OPT_MEM_MIN }, \
 	{ "mem-max",		1, 0, OPT_MEM_MAX }, \
+	{ "mem64-min",		1, 0, OPT_MEM64_MIN }, \
+	{ "mem64-max",		1, 0, OPT_MEM64_MAX }, \
 	{ "reuseinitrd",	0, 0, OPT_REUSE_INITRD }, \
 	{ "debug",		0, 0, OPT_DEBUG }, \
 
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 4/8] Move out mem_min/max checking in locate_hole
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                   ` (2 preceding siblings ...)
  2012-11-16 23:04 ` [PATCH 3/8] add mem64_min/max control Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-16 23:04 ` [PATCH 5/8] seperate checking 64bit mem range Yinghai Lu
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

only need to check one time out of the loop.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/kexec.c |   27 +++++++++++++++++----------
 1 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index 00f90f5..b8d77bb 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -190,9 +190,9 @@ int sort_segments(struct kexec_info *info)
 	return 0;
 }
 
-unsigned long locate_hole(struct kexec_info *info,
-	unsigned long hole_size, unsigned long hole_align, 
-	unsigned long hole_min, unsigned long hole_max, 
+static unsigned long __locate_hole(struct kexec_info *info,
+	unsigned long hole_size, unsigned long hole_align,
+	unsigned long hole_min, unsigned long hole_max,
 	int hole_end)
 {
 	int i, j;
@@ -253,22 +253,16 @@ unsigned long locate_hole(struct kexec_info *info,
 		/* First filter the range start and end values
 		 * through the lens of mem_min, mem_max and hole_align.
 		 */
-		if (start < mem_min) {
-			start = mem_min;
-		}
 		if (start < hole_min) {
 			start = hole_min;
 		}
 		start = (start + hole_align - 1) &
 			~((unsigned long long)hole_align - 1);
-		if (end > mem_max) {
-			end = mem_max;
-		}
 		if (end > hole_max) {
 			end = hole_max;
 		}
 		/* Is this still a valid memory range? */
-		if ((start >= end) || (start >= mem_max) || (end <= mem_min)) {
+		if (start >= end) {
 			continue;
 		}
 		/* Is there enough space left so we can use it? */
@@ -297,6 +291,19 @@ unsigned long locate_hole(struct kexec_info *info,
 	return hole_base;
 }
 
+unsigned long locate_hole(struct kexec_info *info,
+	unsigned long hole_size, unsigned long hole_align, 
+	unsigned long hole_min, unsigned long hole_max, 
+	int hole_end)
+{
+	hole_min = max(hole_min, (unsigned long)mem_min);
+	hole_max = min(hole_max, (unsigned long)mem_max);
+
+	return __locate_hole(info, hole_size, hole_align,
+				 hole_min, hole_max, hole_end);
+
+}
+
 void add_segment_phys_virt(struct kexec_info *info,
 	const void *buf, size_t bufsz,
 	unsigned long base, size_t memsz, int phys)
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 5/8] seperate checking 64bit mem range
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                   ` (3 preceding siblings ...)
  2012-11-16 23:04 ` [PATCH 4/8] Move out mem_min/max checking in locate_hole Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-16 23:04 ` [PATCH 6/8] debug print out for add_buf Yinghai Lu
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

Acccording to start value to use right range.

So could avoid put range too high or too low.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/kexec.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index b8d77bb..aec3b9e 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -364,6 +364,7 @@ unsigned long add_buffer_phys_virt(struct kexec_info *info,
 	unsigned long base;
 	int result;
 	int pagesize;
+	unsigned long hole_min, hole_max;
 
 	result = sort_segments(info);
 	if (result < 0) {
@@ -374,11 +375,18 @@ unsigned long add_buffer_phys_virt(struct kexec_info *info,
 	pagesize = getpagesize();
 	memsz = (memsz + (pagesize - 1)) & ~(pagesize - 1);
 
-	base = locate_hole(info, memsz, buf_align, buf_min, buf_max, buf_end);
-	if (base == ULONG_MAX) {
-		die("locate_hole failed\n");
+	if (buf_min < (1ULL<<32)) {
+		hole_min = max(buf_min, (unsigned long)mem_min);
+		hole_max = min(buf_max, (unsigned long)mem_max);
+	} else {
+		hole_min = max(buf_min, (unsigned long)mem64_min);
+		hole_max = min(buf_max, (unsigned long)mem64_max);
 	}
-	
+
+	base = __locate_hole(info, memsz, buf_align, hole_min, hole_max, buf_end);
+	if (base == ULONG_MAX)
+		return 0;
+
 	add_segment_phys_virt(info, buf, bufsz, base, memsz, phys);
 	return base;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 6/8] debug print out for add_buf
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                   ` (4 preceding siblings ...)
  2012-11-16 23:04 ` [PATCH 5/8] seperate checking 64bit mem range Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-16 23:04 ` [PATCH 7/8] x86: put ramdisk high for 64bit bzImage Yinghai Lu
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

those info are very important.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/add_buffer.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/kexec/add_buffer.c b/kexec/add_buffer.c
index 4d4a55f..0e3badf 100644
--- a/kexec/add_buffer.c
+++ b/kexec/add_buffer.c
@@ -9,6 +9,13 @@ unsigned long add_buffer(struct kexec_info *info,
 			 unsigned long buf_max,
 			 int buf_end)
 {
-	return add_buffer_virt(info, buf, bufsz, memsz, buf_align,
+	unsigned long base;
+
+	base = add_buffer_virt(info, buf, bufsz, memsz, buf_align,
 			       buf_min, buf_max, buf_end);
+
+	if (base)
+		printf("add_buffer: base:%lx size:%lx\n", base, bufsz);
+
+	return base;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 7/8] x86: put ramdisk high for 64bit bzImage
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                   ` (5 preceding siblings ...)
  2012-11-16 23:04 ` [PATCH 6/8] debug print out for add_buf Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-16 23:04 ` [PATCH 8/8] x86: put 64bit bzImage high Yinghai Lu
  2012-11-19 21:00 ` [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Vivek Goyal
  8 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

only do that for 64bit bzImage, and will fall back to low if fail to get high.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/i386/x86-linux-setup.c |   30 ++++++++++++++++++++++--------
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index 53d9df9..b0e6119 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -69,20 +69,34 @@ void setup_linux_bootloader_parameters(
 	}
 
 	/* Load the initrd if we have one */
+	initrd_base = 0;
 	if (initrd_buf) {
-		initrd_base = add_buffer(info,
-			initrd_buf, initrd_size, initrd_size,
-			4096, INITRD_BASE, initrd_addr_max, -1);
+		if (real_mode->protocol_version >= 0x020c &&
+		    real_mode->code64_start_offset) {
+			initrd_base = add_buffer(info,
+				initrd_buf, initrd_size, initrd_size,
+				4096, 1UL<<32, ULONG_MAX, -1);
+			if (!initrd_base)
+				initrd_base = add_buffer(info,
+					initrd_buf, initrd_size, initrd_size,
+					4096, 1UL<<30, 1UL<<32, -1);
+		}
+		if (!initrd_base)
+			initrd_base = add_buffer(info,
+				initrd_buf, initrd_size, initrd_size,
+				4096, INITRD_BASE, initrd_addr_max, -1);
 		dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base,
 			initrd_size);
-	} else {
-		initrd_base = 0;
+	} else
 		initrd_size = 0;
-	}
 
 	/* Ramdisk address and size */
-	real_mode->initrd_start = initrd_base;
-	real_mode->initrd_size  = initrd_size;
+	real_mode->initrd_start = initrd_base & 0xffffffff;
+	real_mode->initrd_size  = initrd_size & 0xffffffff;
+	if ((initrd_base + initrd_size) > (1ULL<<32)) {
+		real_mode->ext_ramdisk_image = initrd_base >> 32;
+		real_mode->ext_ramdisk_size  = initrd_size >> 32;
+	}
 
 	/* The location of the command line */
 	/* if (real_mode_base == 0x90000) { */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 8/8] x86: put 64bit bzImage high
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                   ` (6 preceding siblings ...)
  2012-11-16 23:04 ` [PATCH 7/8] x86: put ramdisk high for 64bit bzImage Yinghai Lu
@ 2012-11-16 23:04 ` Yinghai Lu
  2012-11-17  6:33   ` Eric W. Biederman
  2012-11-19 21:00 ` [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Vivek Goyal
  8 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-16 23:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

also need to make sure pass right 64bit start address to go there directly later.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/i386/kexec-bzImage.c |   53 ++++++++++++++++++++++++++++++++++++--
 1 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 6998587..3e705ca 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -18,8 +18,10 @@
  */
 
 #define _GNU_SOURCE
+#include <stddef.h>
 #include <stdio.h>
 #include <string.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <sys/types.h>
@@ -35,6 +37,7 @@
 #include "../../kexec-elf.h"
 #include "../../kexec-syscall.h"
 #include "kexec-x86.h"
+#include "../x86_64/kexec-x86_64.h"
 #include "x86-linux-setup.h"
 #include "crashdump-x86.h"
 #include <arch/options.h>
@@ -111,12 +114,15 @@ int do_bzImage_load(struct kexec_info *info,
 	size_t size;
 	int kern16_size;
 	unsigned long setup_base, setup_size;
+	struct entry64_regs regs64;
 	struct entry32_regs regs32;
 	struct entry16_regs regs16;
 	unsigned int relocatable_kernel = 0;
 	unsigned long kernel32_load_addr;
 	char *modified_cmdline;
 	unsigned long cmdline_end;
+	unsigned long code64_start_offset = 0;
+	unsigned long kernel64_load_addr = 0;
 
 	/*
 	 * Find out about the file I am about to load.
@@ -154,6 +160,13 @@ int do_bzImage_load(struct kexec_info *info,
 		dbgprintf("bzImage is relocatable\n");
 	}
 
+	if (setup_header.protocol_version >= 0x020C) {
+		code64_start_offset = setup_header.code64_start_offset;
+		if (code64_start_offset)
+			dbgprintf("code64_start_offset: 0x%lx\n",
+					 code64_start_offset);
+	}
+
 	/* Can't use bzImage for crash dump purposes with real mode entry */
 	if((info->kexec_flags & KEXEC_ON_CRASH) && real_mode_entry) {
 		fprintf(stderr, "Can't use bzImage for crash dump purposes"
@@ -250,7 +263,26 @@ int do_bzImage_load(struct kexec_info *info,
 				kernel32_max_addr = real_mode->initrd_addr_max;
 		}
 
-		kernel32_load_addr = add_buffer(info, kernel + kern16_size,
+		if (!real_mode_entry && code64_start_offset) {
+			/* align to 1G to avoid cross the PUD_SIZE boundary */
+			kernel64_load_addr = add_buffer(
+						info, kernel + kern16_size,
+						size, size, 1UL<<30,
+						1UL<<32, ULONG_MAX,
+						-1);
+			if (!kernel64_load_addr)
+				kernel64_load_addr = add_buffer(
+						info, kernel + kern16_size,
+						size, size, 1UL<<30,
+						1UL<<30, 1UL<<32,
+						-1);
+			if (kernel64_load_addr)
+				kernel64_load_addr += code64_start_offset;
+		}
+
+		if (!kernel64_load_addr)
+			kernel32_load_addr = add_buffer(
+						info, kernel + kern16_size,
 						size, size, kern_align,
 						0x100000, kernel32_max_addr,
 						1);
@@ -260,8 +292,11 @@ int do_bzImage_load(struct kexec_info *info,
 		add_segment(info, kernel + kern16_size, size,
 				kernel32_load_addr, size);
 	}
-		
-	dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr);
+
+	if (kernel64_load_addr)
+		dbgprintf("Loaded 64bit kernel at 0x%lx\n", kernel64_load_addr);
+	else
+		dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr);
 
 	/* Tell the kernel what is going on */
 	setup_linux_bootloader_parameters(info, real_mode, setup_base,
@@ -271,6 +306,16 @@ int do_bzImage_load(struct kexec_info *info,
 	/* Get the initial register values */
 	elf_rel_get_symbol(&info->rhdr, "entry16_regs", &regs16, sizeof(regs16));
 	elf_rel_get_symbol(&info->rhdr, "entry32_regs", &regs32, sizeof(regs32));
+	if (kernel64_load_addr) {
+		elf_rel_get_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
+		regs64.rbx = 0;           /* Bootstrap processor */
+		regs64.rsi = setup_base;  /* Pointer to the parameters */
+		regs64.rip = kernel64_load_addr; /* the entry point */
+		regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */
+		elf_rel_set_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
+
+		goto cmd_line;
+	}
 	/*
 
 	 * Initialize the 32bit start information.
@@ -320,6 +365,8 @@ int do_bzImage_load(struct kexec_info *info,
 	elf_rel_set_symbol(&info->rhdr, "entry16_regs", &regs16, sizeof(regs16));
 	elf_rel_set_symbol(&info->rhdr, "entry16_debug_regs", &regs16, sizeof(regs16));
 	elf_rel_set_symbol(&info->rhdr, "entry32_regs", &regs32, sizeof(regs32));
+
+cmd_line:
 	cmdline_end = setup_base + kern16_size + command_line_len - 1;
 	elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end,
 			   sizeof(unsigned long));
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-16 23:04 ` [PATCH 3/8] add mem64_min/max control Yinghai Lu
@ 2012-11-17  6:18   ` Eric W. Biederman
  2012-11-17  7:06     ` Yinghai Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-17  6:18 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> So could limit range for 4g above buffers.

What is wrong with mem-min and mem-max?  At this point in the patchset
it looks like you are introducing mem64-min and mem64-max as a hack to
avoid fixing mem-min and mem-max properly.

Eric
 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 8/8] x86: put 64bit bzImage high
  2012-11-16 23:04 ` [PATCH 8/8] x86: put 64bit bzImage high Yinghai Lu
@ 2012-11-17  6:33   ` Eric W. Biederman
       [not found]     ` <CAE9FiQWJaT9yfdV0rgV-5rM=BR4eX8sr+a99g8Ggf-+YkD8qgQ@mail.gmail.com>
  0 siblings, 1 reply; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-17  6:33 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> also need to make sure pass right 64bit start address to go there
> directly later.

There are some silly things here but I think the clean way to handle
this is to create a kexec/arch/i386/kexec-bzImage64.c that handles
loading a bzImage with a 64bit entry point.

That should keep the code simpler and easier to read.  Although it will
probably introduce a bit more code.

Eric

> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
>  kexec/arch/i386/kexec-bzImage.c |   53 ++++++++++++++++++++++++++++++++++++--
>  1 files changed, 50 insertions(+), 3 deletions(-)
>
> diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
> index 6998587..3e705ca 100644
> --- a/kexec/arch/i386/kexec-bzImage.c
> +++ b/kexec/arch/i386/kexec-bzImage.c
> @@ -18,8 +18,10 @@
>   */
>  
>  #define _GNU_SOURCE
> +#include <stddef.h>
>  #include <stdio.h>
>  #include <string.h>
> +#include <limits.h>
>  #include <stdlib.h>
>  #include <errno.h>
>  #include <sys/types.h>
> @@ -35,6 +37,7 @@
>  #include "../../kexec-elf.h"
>  #include "../../kexec-syscall.h"
>  #include "kexec-x86.h"
> +#include "../x86_64/kexec-x86_64.h"
>  #include "x86-linux-setup.h"
>  #include "crashdump-x86.h"
>  #include <arch/options.h>
> @@ -111,12 +114,15 @@ int do_bzImage_load(struct kexec_info *info,
>  	size_t size;
>  	int kern16_size;
>  	unsigned long setup_base, setup_size;
> +	struct entry64_regs regs64;
>  	struct entry32_regs regs32;
>  	struct entry16_regs regs16;
>  	unsigned int relocatable_kernel = 0;
>  	unsigned long kernel32_load_addr;
>  	char *modified_cmdline;
>  	unsigned long cmdline_end;
> +	unsigned long code64_start_offset = 0;
> +	unsigned long kernel64_load_addr = 0;
>  
>  	/*
>  	 * Find out about the file I am about to load.
> @@ -154,6 +160,13 @@ int do_bzImage_load(struct kexec_info *info,
>  		dbgprintf("bzImage is relocatable\n");
>  	}
>  
> +	if (setup_header.protocol_version >= 0x020C) {
> +		code64_start_offset = setup_header.code64_start_offset;
> +		if (code64_start_offset)
> +			dbgprintf("code64_start_offset: 0x%lx\n",
> +					 code64_start_offset);
> +	}
> +
>  	/* Can't use bzImage for crash dump purposes with real mode entry */
>  	if((info->kexec_flags & KEXEC_ON_CRASH) && real_mode_entry) {
>  		fprintf(stderr, "Can't use bzImage for crash dump purposes"
> @@ -250,7 +263,26 @@ int do_bzImage_load(struct kexec_info *info,
>  				kernel32_max_addr = real_mode->initrd_addr_max;
>  		}
>  
> -		kernel32_load_addr = add_buffer(info, kernel + kern16_size,
> +		if (!real_mode_entry && code64_start_offset) {
> +			/* align to 1G to avoid cross the PUD_SIZE boundary */
> +			kernel64_load_addr = add_buffer(
> +						info, kernel + kern16_size,
> +						size, size, 1UL<<30,
> +						1UL<<32, ULONG_MAX,
> +						-1);
> +			if (!kernel64_load_addr)
> +				kernel64_load_addr = add_buffer(
> +						info, kernel + kern16_size,
> +						size, size, 1UL<<30,
> +						1UL<<30, 1UL<<32,
> +						-1);
> +			if (kernel64_load_addr)
> +				kernel64_load_addr += code64_start_offset;
> +		}
> +
> +		if (!kernel64_load_addr)
> +			kernel32_load_addr = add_buffer(
> +						info, kernel + kern16_size,
>  						size, size, kern_align,
>  						0x100000, kernel32_max_addr,
>  						1);
> @@ -260,8 +292,11 @@ int do_bzImage_load(struct kexec_info *info,
>  		add_segment(info, kernel + kern16_size, size,
>  				kernel32_load_addr, size);
>  	}
> -		
> -	dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr);
> +
> +	if (kernel64_load_addr)
> +		dbgprintf("Loaded 64bit kernel at 0x%lx\n", kernel64_load_addr);
> +	else
> +		dbgprintf("Loaded 32bit kernel at 0x%lx\n", kernel32_load_addr);
>  
>  	/* Tell the kernel what is going on */
>  	setup_linux_bootloader_parameters(info, real_mode, setup_base,
> @@ -271,6 +306,16 @@ int do_bzImage_load(struct kexec_info *info,
>  	/* Get the initial register values */
>  	elf_rel_get_symbol(&info->rhdr, "entry16_regs", &regs16, sizeof(regs16));
>  	elf_rel_get_symbol(&info->rhdr, "entry32_regs", &regs32, sizeof(regs32));
> +	if (kernel64_load_addr) {
> +		elf_rel_get_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
> +		regs64.rbx = 0;           /* Bootstrap processor */
> +		regs64.rsi = setup_base;  /* Pointer to the parameters */
> +		regs64.rip = kernel64_load_addr; /* the entry point */
> +		regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */
> +		elf_rel_set_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
> +
> +		goto cmd_line;
> +	}
>  	/*
>  
>  	 * Initialize the 32bit start information.
> @@ -320,6 +365,8 @@ int do_bzImage_load(struct kexec_info *info,
>  	elf_rel_set_symbol(&info->rhdr, "entry16_regs", &regs16, sizeof(regs16));
>  	elf_rel_set_symbol(&info->rhdr, "entry16_debug_regs", &regs16, sizeof(regs16));
>  	elf_rel_set_symbol(&info->rhdr, "entry32_regs", &regs32, sizeof(regs32));
> +
> +cmd_line:
>  	cmdline_end = setup_base + kern16_size + command_line_len - 1;
>  	elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end,
>  			   sizeof(unsigned long));

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17  6:18   ` Eric W. Biederman
@ 2012-11-17  7:06     ` Yinghai Lu
  2012-11-17  8:25       ` Eric W. Biederman
  0 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-17  7:06 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Fri, Nov 16, 2012 at 10:18 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>
>> So could limit range for 4g above buffers.
>
> What is wrong with mem-min and mem-max?  At this point in the patchset
> it looks like you are introducing mem64-min and mem64-max as a hack to
> avoid fixing mem-min and mem-max properly.

if we set mem-min high, some buffers for purgatory and real_mode can
not be allocated properly.

mem64-min and mem64-max are used to limit range for buffer that could
stay above 4g.
like limit them one range belong to one node only.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17  7:06     ` Yinghai Lu
@ 2012-11-17  8:25       ` Eric W. Biederman
  2012-11-17 20:04         ` Yinghai Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-17  8:25 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> On Fri, Nov 16, 2012 at 10:18 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> So could limit range for 4g above buffers.
>>
>> What is wrong with mem-min and mem-max?  At this point in the patchset
>> it looks like you are introducing mem64-min and mem64-max as a hack to
>> avoid fixing mem-min and mem-max properly.
>
> if we set mem-min high, some buffers for purgatory and real_mode can
> not be allocated properly.

Let's see.  For a 32bit kexec that is a fundamental limit, even if we
are booting a 64bit kernel.

For a 64bit kexec we have a 64bit purgatory so it should not be a
problem to relocate it higher.

Hmm.  I'm not certain about the real_mode bits.  Splitting out the 64bit
bzImage loader from the 32bit bzImage loader should allow a lot of the
legacy bits to be deleted.  Past that I think we simply down in the real
of needing a command line pointer that is 64bit instead of the current
32bit one.  That we should be able to fix by fixing the boot protocol.

Since the real mode bits when loading a 64bit kernel are just a
parameter area there should be no fundamental reason for them to be
below 4G.

The code needs to default to loading the kernel in the non kdump case
at the address it was compiled to run at.  But for the rest I really
don't see why we can't load the kernel very high.

> mem64-min and mem64-max are used to limit range for buffer that could
> stay above 4g.
> like limit them one range belong to one node only.

Having the limits makes sense.  Requring anything other than the low 1MB
magic below 4G seems wrong if we are going to go all of the way and push
the boot protocol as far as we can.

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 8/8] x86: put 64bit bzImage high
       [not found]     ` <CAE9FiQWJaT9yfdV0rgV-5rM=BR4eX8sr+a99g8Ggf-+YkD8qgQ@mail.gmail.com>
@ 2012-11-17  8:43       ` Eric W. Biederman
  0 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-17  8:43 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> On Fri, Nov 16, 2012 at 10:33 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> also need to make sure pass right 64bit start address to go there
>>> directly later.
>>
>> There are some silly things here but I think the clean way to handle
>> this is to create a kexec/arch/i386/kexec-bzImage64.c that handles
>> loading a bzImage with a 64bit entry point.
>>
>> That should keep the code simpler and easier to read.  Although it will
>> probably introduce a bit more code.
>
> please check attached one with kexec-bzImage64.c

Yes that looks better.

So far I have just skimmed the code but this in particular is not
needed.  We are going in via the 64bit entry point so there is no
point in ever allocating the real mode buffer in the low 640k.

+	if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
+		/* If using bzImage for capture kernel, then we will not be
+		 * executing real mode code. setup segment can be loaded
+		 * anywhere as we will be just reading command line.
+		 */
+		setup_base = add_buffer(info, real_mode, setup_size, setup_size,
+			16, 0x3000, -1, -1);
+	} else {
+		/* Careful setup_base must be greater than 8K */
+		setup_base = add_buffer(info, real_mode, setup_size, setup_size,
+			16, 0x3000, 640*1024, 1);
+	}

That should be just;
+	/* No real mode code will be executing. setup segment can be loaded
+	 * anywhere as we will be just reading the command line.
+	 */
+	setup_base = add_buffer(info, real_mode, setup_size, setup_size,
+		16, 0x3000, -1, -1);

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17  8:25       ` Eric W. Biederman
@ 2012-11-17 20:04         ` Yinghai Lu
  2012-11-17 20:41           ` H. Peter Anvin
  2012-11-18  0:44           ` Yinghai Lu
  0 siblings, 2 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-17 20:04 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 12:25 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>
>> On Fri, Nov 16, 2012 at 10:18 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>> Yinghai Lu <yinghai@kernel.org> writes:
>>>
>>>> So could limit range for 4g above buffers.
>>>
>>> What is wrong with mem-min and mem-max?  At this point in the patchset
>>> it looks like you are introducing mem64-min and mem64-max as a hack to
>>> avoid fixing mem-min and mem-max properly.
>>
>> if we set mem-min high, some buffers for purgatory and real_mode can
>> not be allocated properly.
>
> Let's see.  For a 32bit kexec that is a fundamental limit, even if we
> are booting a 64bit kernel.
>
> For a 64bit kexec we have a 64bit purgatory so it should not be a
> problem to relocate it higher.
>
> Hmm.  I'm not certain about the real_mode bits.  Splitting out the 64bit
> bzImage loader from the 32bit bzImage loader should allow a lot of the
> legacy bits to be deleted.  Past that I think we simply down in the real
> of needing a command line pointer that is 64bit instead of the current
> 32bit one.  That we should be able to fix by fixing the boot protocol.
>
> Since the real mode bits when loading a 64bit kernel are just a
> parameter area there should be no fundamental reason for them to be
> below 4G.
>
> The code needs to default to loading the kernel in the non kdump case
> at the address it was compiled to run at.  But for the rest I really
> don't see why we can't load the kernel very high.

then for setup data (boot param) and command line, we have to set extra ident
mapping for them in kernel arch/x86/kernel/head_64.S

with my current patchset for kernel and kexec,  we only need to set
ident mapping
for [_text, _end) in kernel arch/x86/kernel/head_64.S

Yinghai

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17 20:04         ` Yinghai Lu
@ 2012-11-17 20:41           ` H. Peter Anvin
  2012-11-17 20:51             ` Yinghai Lu
  2012-11-18  0:44           ` Yinghai Lu
  1 sibling, 1 reply; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-17 20:41 UTC (permalink / raw)
  To: Yinghai Lu, Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal

Sorry, with "real mode" are we talking about struct boot_params or the actual realmode trampoline?  The latter needs to be < 640K as a hw requirement.

Sorry, on my cellphone atm and can't direcly check.

Yinghai Lu <yinghai@kernel.org> wrote:

>On Sat, Nov 17, 2012 at 12:25 AM, Eric W. Biederman
><ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> On Fri, Nov 16, 2012 at 10:18 PM, Eric W. Biederman
>>> <ebiederm@xmission.com> wrote:
>>>> Yinghai Lu <yinghai@kernel.org> writes:
>>>>
>>>>> So could limit range for 4g above buffers.
>>>>
>>>> What is wrong with mem-min and mem-max?  At this point in the
>patchset
>>>> it looks like you are introducing mem64-min and mem64-max as a hack
>to
>>>> avoid fixing mem-min and mem-max properly.
>>>
>>> if we set mem-min high, some buffers for purgatory and real_mode can
>>> not be allocated properly.
>>
>> Let's see.  For a 32bit kexec that is a fundamental limit, even if we
>> are booting a 64bit kernel.
>>
>> For a 64bit kexec we have a 64bit purgatory so it should not be a
>> problem to relocate it higher.
>>
>> Hmm.  I'm not certain about the real_mode bits.  Splitting out the
>64bit
>> bzImage loader from the 32bit bzImage loader should allow a lot of
>the
>> legacy bits to be deleted.  Past that I think we simply down in the
>real
>> of needing a command line pointer that is 64bit instead of the
>current
>> 32bit one.  That we should be able to fix by fixing the boot
>protocol.
>>
>> Since the real mode bits when loading a 64bit kernel are just a
>> parameter area there should be no fundamental reason for them to be
>> below 4G.
>>
>> The code needs to default to loading the kernel in the non kdump case
>> at the address it was compiled to run at.  But for the rest I really
>> don't see why we can't load the kernel very high.
>
>then for setup data (boot param) and command line, we have to set extra
>ident
>mapping for them in kernel arch/x86/kernel/head_64.S
>
>with my current patchset for kernel and kexec,  we only need to set
>ident mapping
>for [_text, _end) in kernel arch/x86/kernel/head_64.S
>
>Yinghai

-- 
Sent from my mobile phone. Please excuse brevity and lack of formatting.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17 20:41           ` H. Peter Anvin
@ 2012-11-17 20:51             ` Yinghai Lu
  2012-11-17 20:54               ` H. Peter Anvin
  0 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-17 20:51 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Haren Myneni, Simon Horman, kexec, Eric W. Biederman, Vivek Goyal

On Sat, Nov 17, 2012 at 12:41 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Sorry, with "real mode" are we talking about struct boot_params or the actual realmode trampoline?  The latter needs to be < 640K as a hw requirement.

boot_params.

when kexec bzImage64, kexec will copy that from bzImage file, and
update it....and
pass pointer to arch/x86/boot/compressed/head_64.c via %rsi

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17 20:51             ` Yinghai Lu
@ 2012-11-17 20:54               ` H. Peter Anvin
  0 siblings, 0 replies; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-17 20:54 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Haren Myneni, Simon Horman, kexec, Eric W. Biederman, Vivek Goyal

On 11/17/2012 12:51 PM, Yinghai Lu wrote:
> On Sat, Nov 17, 2012 at 12:41 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> Sorry, with "real mode" are we talking about struct boot_params or the actual realmode trampoline?  The latter needs to be < 640K as a hw requirement.
>
> boot_params.
>
> when kexec bzImage64, kexec will copy that from bzImage file, and
> update it....and
> pass pointer to arch/x86/boot/compressed/head_64.c via %rsi
>

Right, that's fine, of course.

We should make sure there aren't any implicit 32-bit assumptions 
there... there really shouldn't need to be.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-17 20:04         ` Yinghai Lu
  2012-11-17 20:41           ` H. Peter Anvin
@ 2012-11-18  0:44           ` Yinghai Lu
  2012-11-18  4:34             ` H. Peter Anvin
  1 sibling, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  0:44 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 12:04 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Sat, Nov 17, 2012 at 12:25 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>
> then for setup data (boot param) and command line, we have to set extra ident
> mapping for them in kernel arch/x86/kernel/head_64.S
>

find the solution for this one:
We don't need to set ident map for boot_param and command line in head_64.S

as we already get arch/x86/kernel/head64.c, and could use
early_ioremap to access
and copy them out.

Now, will check if can put purgatory above 4g.

Thanks

Yinghai

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  0:44           ` Yinghai Lu
@ 2012-11-18  4:34             ` H. Peter Anvin
  2012-11-18  4:47               ` Eric W. Biederman
  0 siblings, 1 reply; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-18  4:34 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Haren Myneni, Simon Horman, kexec, Eric W. Biederman, Vivek Goyal

On 11/17/2012 04:44 PM, Yinghai Lu wrote:
> On Sat, Nov 17, 2012 at 12:04 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Sat, Nov 17, 2012 at 12:25 AM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>
>> then for setup data (boot param) and command line, we have to set extra ident
>> mapping for them in kernel arch/x86/kernel/head_64.S
>>
>
> find the solution for this one:
> We don't need to set ident map for boot_param and command line in head_64.S
>
> as we already get arch/x86/kernel/head64.c, and could use
> early_ioremap to access
> and copy them out.
>

I don't think that is feasible; we need access to this structure 
extremely early.  I think if you're using the 64-bit entry point it has 
to be an entry condition that those structures are mapped.  It simply 
becomes a precondition for using the 64-bit entry point.

	-hpa


-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  4:34             ` H. Peter Anvin
@ 2012-11-18  4:47               ` Eric W. Biederman
  2012-11-18  4:55                 ` H. Peter Anvin
  2012-11-18  4:56                 ` Yinghai Lu
  0 siblings, 2 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18  4:47 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Haren Myneni, Simon Horman, Yinghai Lu, kexec, Vivek Goyal

"H. Peter Anvin" <hpa@zytor.com> writes:

> On 11/17/2012 04:44 PM, Yinghai Lu wrote:
>> On Sat, Nov 17, 2012 at 12:04 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> On Sat, Nov 17, 2012 at 12:25 AM, Eric W. Biederman
>>> <ebiederm@xmission.com> wrote:
>>>
>>> then for setup data (boot param) and command line, we have to set extra ident
>>> mapping for them in kernel arch/x86/kernel/head_64.S
>>>
>>
>> find the solution for this one:
>> We don't need to set ident map for boot_param and command line in head_64.S
>>
>> as we already get arch/x86/kernel/head64.c, and could use
>> early_ioremap to access
>> and copy them out.
>>
>
> I don't think that is feasible; we need access to this structure
> extremely early.  I think if you're using the 64-bit entry point it
> has to be an entry condition that those structures are mapped.  It
> simply becomes a precondition for using the 64-bit entry point.

64bit purgatory coming from kexec should be running with a page table
that identity maps everything loaded by kexec and in practice all of
memory.

We should still be using that page table when we enter the new kernel.

However then the 64bit kernel switches to it's own page tables.

So after that point we potentially need early_ioremap, because the
kernel's page tables may not have covered the boot_params.  Certainly we
need to do something to ensure the kernels page table covers boot_params.

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  4:47               ` Eric W. Biederman
@ 2012-11-18  4:55                 ` H. Peter Anvin
  2012-11-18  5:00                   ` Eric W. Biederman
  2012-11-18  4:56                 ` Yinghai Lu
  1 sibling, 1 reply; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-18  4:55 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, Yinghai Lu, kexec, Vivek Goyal

On 11/17/2012 08:47 PM, Eric W. Biederman wrote:
>>
>> I don't think that is feasible; we need access to this structure
>> extremely early.  I think if you're using the 64-bit entry point it
>> has to be an entry condition that those structures are mapped.  It
>> simply becomes a precondition for using the 64-bit entry point.
>
> 64bit purgatory coming from kexec should be running with a page table
> that identity maps everything loaded by kexec and in practice all of
> memory.
>
> We should still be using that page table when we enter the new kernel.
>
> However then the 64bit kernel switches to it's own page tables.
>
> So after that point we potentially need early_ioremap, because the
> kernel's page tables may not have covered the boot_params.  Certainly we
> need to do something to ensure the kernels page table covers boot_params.
>

The easy way to fix that is to do what we do on 32 bits: we copy it into 
the bss before we turn paging on (the 64-bit equivalent is before we 
switch to the new page tables.)  That way we know where it is, at least 
for the bounded-size data items.

	-hpa


-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  4:47               ` Eric W. Biederman
  2012-11-18  4:55                 ` H. Peter Anvin
@ 2012-11-18  4:56                 ` Yinghai Lu
  2012-11-18  5:20                   ` Eric W. Biederman
  1 sibling, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  4:56 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 8:47 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> "H. Peter Anvin" <hpa@zytor.com> writes:
>
>
> 64bit purgatory coming from kexec should be running with a page table
> that identity maps everything loaded by kexec and in practice all of
> memory.

there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.

those come from global variables...could kill some by converting them static...

but still have some global string or ro data....

build one big file include all .S ?

>
> We should still be using that page table when we enter the new kernel.
>
> However then the 64bit kernel switches to it's own page tables.
>
> So after that point we potentially need early_ioremap, because the
> kernel's page tables may not have covered the boot_params.  Certainly we
> need to do something to ensure the kernels page table covers boot_params.

yes, I tested that and it works.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  4:55                 ` H. Peter Anvin
@ 2012-11-18  5:00                   ` Eric W. Biederman
  2012-11-18  5:14                     ` H. Peter Anvin
  0 siblings, 1 reply; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18  5:00 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Haren Myneni, Simon Horman, Yinghai Lu, kexec, Vivek Goyal

"H. Peter Anvin" <hpa@zytor.com> writes:

> On 11/17/2012 08:47 PM, Eric W. Biederman wrote:
>>>
>>> I don't think that is feasible; we need access to this structure
>>> extremely early.  I think if you're using the 64-bit entry point it
>>> has to be an entry condition that those structures are mapped.  It
>>> simply becomes a precondition for using the 64-bit entry point.
>>
>> 64bit purgatory coming from kexec should be running with a page table
>> that identity maps everything loaded by kexec and in practice all of
>> memory.
>>
>> We should still be using that page table when we enter the new kernel.
>>
>> However then the 64bit kernel switches to it's own page tables.
>>
>> So after that point we potentially need early_ioremap, because the
>> kernel's page tables may not have covered the boot_params.  Certainly we
>> need to do something to ensure the kernels page table covers boot_params.
>>
>
> The easy way to fix that is to do what we do on 32 bits: we copy it
> into the bss before we turn paging on (the 64-bit equivalent is before
> we switch to the new page tables.)  That way we know where it is, at
> least for the bounded-size data items.

Whatever works.  It sounds like six of one half a dozen of the other to
me.  Last I looked early_ioremap works as soon as paging is enabled on
x86_64.

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:00                   ` Eric W. Biederman
@ 2012-11-18  5:14                     ` H. Peter Anvin
  0 siblings, 0 replies; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-18  5:14 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, Yinghai Lu, kexec, Vivek Goyal

On 11/17/2012 09:00 PM, Eric W. Biederman wrote:
>>
>> The easy way to fix that is to do what we do on 32 bits: we copy it
>> into the bss before we turn paging on (the 64-bit equivalent is before
>> we switch to the new page tables.)  That way we know where it is, at
>> least for the bounded-size data items.
>
> Whatever works.  It sounds like six of one half a dozen of the other to
> me.  Last I looked early_ioremap works as soon as paging is enabled on
> x86_64.
>

Well, x86-64 means paging is enabled.  Presumably you mean "as son as we 
are onl our own page tables".  I'm fine either way, *as long as* we 
don't touch *any* memory outside text/data/bss/brk before we recover 
this content.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  4:56                 ` Yinghai Lu
@ 2012-11-18  5:20                   ` Eric W. Biederman
  2012-11-18  5:35                     ` Yinghai Lu
  2012-11-18  6:23                     ` H. Peter Anvin
  0 siblings, 2 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18  5:20 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> On Sat, Nov 17, 2012 at 8:47 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> "H. Peter Anvin" <hpa@zytor.com> writes:
>>
>>
>> 64bit purgatory coming from kexec should be running with a page table
>> that identity maps everything loaded by kexec and in practice all of
>> memory.
>
> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>
> those come from global variables...could kill some by converting them static...
>
> but still have some global string or ro data....
>
> build one big file include all .S ?

For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
are using absolute 32bit addresses.

It is probably overkill but we should be able to solve this with
by adding "-mcmodel=large" to the build of purgatory.

Hopefully there are not 32bit or 16bit assembly routines getting linked
in and using problemenatic instructions.

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:20                   ` Eric W. Biederman
@ 2012-11-18  5:35                     ` Yinghai Lu
  2012-11-18  5:39                       ` Yinghai Lu
  2012-11-18  6:23                     ` H. Peter Anvin
  1 sibling, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  5:35 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 9:20 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>>
>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>
>> those come from global variables...could kill some by converting them static...
>>
>> but still have some global string or ro data....
>>
>> build one big file include all .S ?
>
> For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
> are using absolute 32bit addresses.
>
> It is probably overkill but we should be able to solve this with
> by adding "-mcmodel=large" to the build of purgatory.

it kill some...

still have left.... looks they from .S

yhlu@linux-siqj:~/xx/xx/utils/kexec-tools> readelf --relocs
purgatory/purgatory.ro | grep R_X86_64_32
0000000005e9  00020000000b R_X86_64_32S      0000000000000000 .rodata + c0
0000000005f0  00010000000b R_X86_64_32S      0000000000000000 .text + 5f6
00000000068e  00410000000b R_X86_64_32S      0000000000002008
jump_back_entry + 0
000000000695  00060000000b R_X86_64_32S      0000000000000000 .bss + 1000
000000000006  000b0000000a R_X86_64_32       0000000000000000 .debug_info + 0
000000000036  000b0000000a R_X86_64_32       0000000000000000 .debug_info + 24c
000000000066  000b0000000a R_X86_64_32       0000000000000000 .debug_info + 4c0
000000000096  000b0000000a R_X86_64_32       0000000000000000 .debug_info + 6d8
0000000000c6  000b0000000a R_X86_64_32       0000000000000000 .debug_info + 85b
000000000006  000c0000000a R_X86_64_32       0000000000000000 .debug_abbrev + 0
00000000000c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + c0
000000000011  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 88
000000000015  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 0
000000000029  000d0000000a R_X86_64_32       0000000000000000 .debug_line + 0
000000000030  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 176
000000000037  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 137
000000000045  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 16d
00000000004a  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5b
000000000057  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 7a
00000000005e  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 48
000000000063  000e0000000a R_X86_64_32       0000000000000000 .debug_str + b7
000000000070  000e0000000a R_X86_64_32       0000000000000000 .debug_str + eb
000000000075  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 63
000000000082  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 9e
000000000087  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 29
000000000094  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 132
000000000099  000e0000000a R_X86_64_32       0000000000000000 .debug_str + b0
0000000000a6  000e0000000a R_X86_64_32       0000000000000000 .debug_str + f8
0000000000ad  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 124
0000000000ba  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 11e
0000000000c8  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 23
0000000000d6  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 141
000000000115  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 10f
000000000120  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 148
00000000013b  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 158
000000000147  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 42
000000000165  000e0000000a R_X86_64_32       0000000000000000 .debug_str + d6
000000000180  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 0
000000000193  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + ed
0000000001a2  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 110
0000000001a7  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 166
0000000001be  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 14f
0000000001da  000e0000000a R_X86_64_32       0000000000000000 .debug_str + cc
0000000001f1  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 185
000000000206  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 33
000000000213  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 6c
000000000220  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 33
000000000236  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 6c
000000000252  000c0000000a R_X86_64_32       0000000000000000
.debug_abbrev + 109
000000000258  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 20c
00000000025d  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 222
000000000261  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 188
000000000275  000d0000000a R_X86_64_32       0000000000000000 .debug_line + 13f
00000000027a  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 1c2
000000000297  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 1df
00000000029c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2a9
0000000002a8  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 218
0000000002b6  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 1ab
0000000002c4  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 248
0000000002d2  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 235
0000000002e3  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 1b5
0000000002ea  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 204
0000000002f7  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 29d
0000000002fe  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 278
00000000030c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 294
000000000313  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 1d1
00000000031a  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 1f1
000000000320  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2bf
000000000337  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 1d1
000000000340  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 287
00000000034a  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 30e
000000000359  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 344
00000000035e  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 282
000000000368  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 3f2
000000000375  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 428
00000000037a  00100000000a R_X86_64_32       0000000000000000 .debug_ranges + 0
000000000396  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 4ae
00000000039b  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 182
0000000003a5  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 54c
0000000003aa  00100000000a R_X86_64_32       0000000000000000 .debug_ranges + 40
0000000003bb  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 60f
0000000003c8  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 632
0000000003e7  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 699
0000000003ec  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 28e
0000000003f6  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 6cf
000000000406  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 243
00000000042e  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 25a
000000000434  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2b7
00000000044b  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 6f2
000000000454  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 287
00000000045e  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 73f
00000000046d  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 762
000000000473  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 282
000000000484  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 271
00000000049b  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 785
0000000004aa  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 7d2
0000000004b0  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 282
0000000004c6  000c0000000a R_X86_64_32       0000000000000000
.debug_abbrev + 22b
0000000004cc  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 339
0000000004d1  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 351
0000000004d5  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2c8
0000000004e9  000d0000000a R_X86_64_32       0000000000000000 .debug_line + 2c7
0000000004f0  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 330
0000000004f5  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2f0
000000000502  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 303
00000000050f  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 315
00000000053a  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 808
000000000556  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 82b
000000000569  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 32b
00000000056f  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 364
00000000059a  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 862
0000000005bd  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 898
0000000005cb  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 8cf
0000000005da  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 34a
0000000005fd  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2eb
000000000607  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 905
00000000062e  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 93b
00000000063b  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 972
00000000065b  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 31d
00000000066c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2f7
00000000068b  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 2fe
000000000698  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 345
0000000006d2  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 9a8
0000000006de  000c0000000a R_X86_64_32       0000000000000000
.debug_abbrev + 308
0000000006e4  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 3f8
0000000006e9  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 404
0000000006ed  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 37b
000000000701  000d0000000a R_X86_64_32       0000000000000000 .debug_line + 387
000000000708  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 47c
00000000070f  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 469
00000000071d  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 473
000000000722  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 43f
00000000072f  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 3bb
000000000736  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 3db
00000000073d  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 42d
000000000744  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 3c9
00000000074a  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 453
000000000761  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + 9df
000000000767  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 488
000000000783  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 39e
00000000079a  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + a53
00000000079f  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 3ee
0000000007ac  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 45e
0000000007b9  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4a2
0000000007c6  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 36b
0000000007d3  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 447
0000000007e8  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 43a
0000000007ed  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 3ee
000000000803  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 45e
000000000819  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4a2
00000000082f  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 36b
000000000845  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 447
000000000861  000c0000000a R_X86_64_32       0000000000000000
.debug_abbrev + 394
000000000867  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 568
00000000086c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4ed
000000000870  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4c3
000000000884  000d0000000a R_X86_64_32       0000000000000000 .debug_line + 453
00000000088b  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 61f
000000000890  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4af
00000000089d  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 52c
0000000008ab  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 513
0000000008b2  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 53e
0000000008b9  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 58c
0000000008c0  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 628
0000000008c7  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 504
0000000008ce  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5f5
0000000008d5  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5a8
0000000008dc  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5e1
0000000008e1  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5bf
0000000008ec  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 55f
0000000008ff  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5db
00000000090d  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4b6
00000000091b  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4bc
00000000095a  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 609
000000000966  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 551
00000000097d  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + ac7
00000000099c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 599
0000000009b3  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + b27
0000000009cb  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 50e
0000000009da  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5d5
0000000009e8  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5ef
000000000a80  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5c7
000000000a97  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + b87
000000000aae  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 521
000000000abc  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 639
000000000aca  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4ff
000000000ad8  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 527
000000000ae8  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 574
000000000aff  000f0000000a R_X86_64_32       0000000000000000 .debug_loc + be7
000000000b16  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 618
000000000b24  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 634
000000000b32  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 582
000000000b40  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 587
000000000b5c  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 4e6
000000000b81  000e0000000a R_X86_64_32       0000000000000000 .debug_str + 5fa


>
> Hopefully there are not 32bit or 16bit assembly routines getting linked
> in and using problemenatic instructions.

i comment out 32bit/16bit related from Makfile...

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:35                     ` Yinghai Lu
@ 2012-11-18  5:39                       ` Yinghai Lu
  2012-11-18  5:58                         ` Yinghai Lu
  2012-11-18  6:24                         ` [PATCH 3/8] add mem64_min/max control H. Peter Anvin
  0 siblings, 2 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  5:39 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 9:35 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Sat, Nov 17, 2012 at 9:20 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>>
>>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>>
>>> those come from global variables...could kill some by converting them static...
>>>
>>> but still have some global string or ro data....
>>>
>>> build one big file include all .S ?
>>
>> For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
>> are using absolute 32bit addresses.
>>
>> It is probably overkill but we should be able to solve this with
>> by adding "-mcmodel=large" to the build of purgatory.
>
> it kill some...
>
> still have left.... looks they from .S
>
> yhlu@linux-siqj:~/xx/xx/utils/kexec-tools> readelf --relocs
> purgatory/purgatory.ro | grep R_X86_64_32
> 0000000005e9  00020000000b R_X86_64_32S      0000000000000000 .rodata + c0

looks like .S did not get that -mcmodel=large applied..

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:39                       ` Yinghai Lu
@ 2012-11-18  5:58                         ` Yinghai Lu
  2012-11-18  6:11                           ` Eric W. Biederman
  2012-11-18  6:24                         ` [PATCH 3/8] add mem64_min/max control H. Peter Anvin
  1 sibling, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  5:58 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 9:39 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Sat, Nov 17, 2012 at 9:35 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Sat, Nov 17, 2012 at 9:20 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>> Yinghai Lu <yinghai@kernel.org> writes:
>>>>
>>>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>>>
>>>> those come from global variables...could kill some by converting them static...
>>>>
>>>> but still have some global string or ro data....
>>>>
>>>> build one big file include all .S ?
>>>
>>> For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
>>> are using absolute 32bit addresses.
>>>
>>> It is probably overkill but we should be able to solve this with
>>> by adding "-mcmodel=large" to the build of purgatory.
>>
>> it kill some...
>>
>> still have left.... looks they from .S
>>
>> yhlu@linux-siqj:~/xx/xx/utils/kexec-tools> readelf --relocs
>> purgatory/purgatory.ro | grep R_X86_64_32
>> 0000000005e9  00020000000b R_X86_64_32S      0000000000000000 .rodata + c0
>
> looks like .S did not get that -mcmodel=large applied..

so -mcmodel=large only work with .c, but does not have effects on .S ?

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:58                         ` Yinghai Lu
@ 2012-11-18  6:11                           ` Eric W. Biederman
  2012-11-18  6:32                             ` Yinghai Lu
  2012-11-18  6:38                             ` Yinghai Lu
  0 siblings, 2 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18  6:11 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> On Sat, Nov 17, 2012 at 9:39 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Sat, Nov 17, 2012 at 9:35 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> On Sat, Nov 17, 2012 at 9:20 PM, Eric W. Biederman
>>> <ebiederm@xmission.com> wrote:
>>>> Yinghai Lu <yinghai@kernel.org> writes:
>>>>>
>>>>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>>>>
>>>>> those come from global variables...could kill some by converting them static...
>>>>>
>>>>> but still have some global string or ro data....
>>>>>
>>>>> build one big file include all .S ?
>>>>
>>>> For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
>>>> are using absolute 32bit addresses.
>>>>
>>>> It is probably overkill but we should be able to solve this with
>>>> by adding "-mcmodel=large" to the build of purgatory.
>>>
>>> it kill some...
>>>
>>> still have left.... looks they from .S
>>>
>>> yhlu@linux-siqj:~/xx/xx/utils/kexec-tools> readelf --relocs
>>> purgatory/purgatory.ro | grep R_X86_64_32
>>> 0000000005e9  00020000000b R_X86_64_32S      0000000000000000 .rodata + c0
>>
>> looks like .S did not get that -mcmodel=large applied..
>
> so -mcmodel=large only work with .c, but does not have effects on .S ?

Yes it -mcmodel=large is about which instructions you generate.  The
instructions used determine the relocates.

Just for playing with it the following patch modifies things so
purgatory is 64bit clean.

I don't know yet what to do with the 32bit and 16bit assembly.

Eric


diff --git a/purgatory/Makefile b/purgatory/Makefile
index ee1679c..e39adec 100644
--- a/purgatory/Makefile
+++ b/purgatory/Makefile
@@ -64,6 +64,7 @@ $(PURGATORY): $(PURGATORY_OBJS)
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
 
 #	$(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) --no-undefined -e purgatory_start -r -o $@ $(PURGATORY_OBJS) $(UTIL_LIB)
+	$(STRIP) --strip-debug $@
 
 echo::
 	@echo "PURGATORY_SRCS $(PURGATORY_SRCS)"
diff --git a/purgatory/arch/x86_64/Makefile b/purgatory/arch/x86_64/Makefile
index 22b4228..2a768c6 100644
--- a/purgatory/arch/x86_64/Makefile
+++ b/purgatory/arch/x86_64/Makefile
@@ -2,7 +2,7 @@
 # Purgatory x86_64
 #
 
-x86_64_PURGATORY_SRCS_native = purgatory/arch/x86_64/entry64-32.S
+#x86_64_PURGATORY_SRCS_native = purgatory/arch/x86_64/entry64-32.S
 x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/entry64.S
 x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/setup-x86_64.S
 x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/stack.S
@@ -16,9 +16,11 @@ dist += purgatory/arch/x86_64/Makefile $(x86_64_PURGATORY_SRCS_native) 	\
 	purgatory/arch/x86_64/purgatory-x86_64.h
 
 # Don't add sources in i386/ to dist, as i386/Makefile adds them
-x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
-x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
+#x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
+#x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/crashdump_backup.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/console-x86.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/vga.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/pic.c
+
+x86_64_PURGATORY_EXTRA_CFLAGS = -mcmodel=large
diff --git a/purgatory/arch/x86_64/entry64.S b/purgatory/arch/x86_64/entry64.S
index 666023c..e3223b7 100644
--- a/purgatory/arch/x86_64/entry64.S
+++ b/purgatory/arch/x86_64/entry64.S
@@ -37,9 +37,10 @@ entry64:
 	movl	%eax, %fs
 	movl	%eax, %gs
 
-	movq	$stack_init, %rsp
+	leaq	stack_init(%rip), %rsp
 	pushq	$0x10 /* CS */
-	pushq	$new_cs_exit
+	leaq	new_cs_exit(%rip), %rax
+	pushq	%rax
 	lretq
 new_cs_exit:
 
diff --git a/purgatory/arch/x86_64/setup-x86_64.S b/purgatory/arch/x86_64/setup-x86_64.S
index 74997fa..95572d8 100644
--- a/purgatory/arch/x86_64/setup-x86_64.S
+++ b/purgatory/arch/x86_64/setup-x86_64.S
@@ -42,10 +42,10 @@ purgatory_start:
 	/* In 64bit mode the code segment is meaningless */
 
 	movq	0(%rsp), %rax
-	movq	%rax, jump_back_entry
+	movq	%rax, jump_back_entry(%rip)
 
 	/* Setup a stack */
-	movq	$lstack_end, %rsp
+	leaq	lstack_end(%rip), %rsp
 
 	/* Call the C code */
 	call purgatory


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:20                   ` Eric W. Biederman
  2012-11-18  5:35                     ` Yinghai Lu
@ 2012-11-18  6:23                     ` H. Peter Anvin
  2012-11-18  6:44                       ` Eric W. Biederman
  1 sibling, 1 reply; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-18  6:23 UTC (permalink / raw)
  To: ebiederm, Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal

I think you just need one of the PIC models, or the medium model.

ebiederm@xmission.com wrote:

>Yinghai Lu <yinghai@kernel.org> writes:
>
>> On Sat, Nov 17, 2012 at 8:47 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>> "H. Peter Anvin" <hpa@zytor.com> writes:
>>>
>>>
>>> 64bit purgatory coming from kexec should be running with a page
>table
>>> that identity maps everything loaded by kexec and in practice all of
>>> memory.
>>
>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>
>> those come from global variables...could kill some by converting them
>static...
>>
>> but still have some global string or ro data....
>>
>> build one big file include all .S ?
>
>For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
>are using absolute 32bit addresses.
>
>It is probably overkill but we should be able to solve this with
>by adding "-mcmodel=large" to the build of purgatory.
>
>Hopefully there are not 32bit or 16bit assembly routines getting linked
>in and using problemenatic instructions.
>
>Eric

-- 
Sent from my mobile phone. Please excuse brevity and lack of formatting.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  5:39                       ` Yinghai Lu
  2012-11-18  5:58                         ` Yinghai Lu
@ 2012-11-18  6:24                         ` H. Peter Anvin
  1 sibling, 0 replies; 60+ messages in thread
From: H. Peter Anvin @ 2012-11-18  6:24 UTC (permalink / raw)
  To: Yinghai Lu, Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal

.S files have to be recoded to match the memory model!

Yinghai Lu <yinghai@kernel.org> wrote:

>On Sat, Nov 17, 2012 at 9:35 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Sat, Nov 17, 2012 at 9:20 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>> Yinghai Lu <yinghai@kernel.org> writes:
>>>>
>>>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>>>
>>>> those come from global variables...could kill some by converting
>them static...
>>>>
>>>> but still have some global string or ro data....
>>>>
>>>> build one big file include all .S ?
>>>
>>> For R_x86_64_32 and R_x86_64_32S the problem is that the
>instructions
>>> are using absolute 32bit addresses.
>>>
>>> It is probably overkill but we should be able to solve this with
>>> by adding "-mcmodel=large" to the build of purgatory.
>>
>> it kill some...
>>
>> still have left.... looks they from .S
>>
>> yhlu@linux-siqj:~/xx/xx/utils/kexec-tools> readelf --relocs
>> purgatory/purgatory.ro | grep R_X86_64_32
>> 0000000005e9  00020000000b R_X86_64_32S      0000000000000000 .rodata
>+ c0
>
>looks like .S did not get that -mcmodel=large applied..

-- 
Sent from my mobile phone. Please excuse brevity and lack of formatting.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  6:11                           ` Eric W. Biederman
@ 2012-11-18  6:32                             ` Yinghai Lu
  2012-11-18  6:38                             ` Yinghai Lu
  1 sibling, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  6:32 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 10:11 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>
>> On Sat, Nov 17, 2012 at 9:39 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> On Sat, Nov 17, 2012 at 9:35 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>>> On Sat, Nov 17, 2012 at 9:20 PM, Eric W. Biederman
>>>> <ebiederm@xmission.com> wrote:
>>>>> Yinghai Lu <yinghai@kernel.org> writes:
>>>>>>
>>>>>> there is lots of R_X86_64_32 and R_X86_64_32S for 64bit purgatory.
>>>>>>
>>>>>> those come from global variables...could kill some by converting them static...
>>>>>>
>>>>>> but still have some global string or ro data....
>>>>>>
>>>>>> build one big file include all .S ?
>>>>>
>>>>> For R_x86_64_32 and R_x86_64_32S the problem is that the instructions
>>>>> are using absolute 32bit addresses.
>>>>>
>>>>> It is probably overkill but we should be able to solve this with
>>>>> by adding "-mcmodel=large" to the build of purgatory.
>>>>
>>>> it kill some...
>>>>
>>>> still have left.... looks they from .S
>>>>
>>>> yhlu@linux-siqj:~/xx/xx/utils/kexec-tools> readelf --relocs
>>>> purgatory/purgatory.ro | grep R_X86_64_32
>>>> 0000000005e9  00020000000b R_X86_64_32S      0000000000000000 .rodata + c0
>>>
>>> looks like .S did not get that -mcmodel=large applied..
>>
>> so -mcmodel=large only work with .c, but does not have effects on .S ?
>
> Yes it -mcmodel=large is about which instructions you generate.  The
> instructions used determine the relocates.
>
> Just for playing with it the following patch modifies things so
> purgatory is 64bit clean.
>
> I don't know yet what to do with the 32bit and 16bit assembly.
>
> Eric
>
>
> diff --git a/purgatory/Makefile b/purgatory/Makefile
> index ee1679c..e39adec 100644
> --- a/purgatory/Makefile
> +++ b/purgatory/Makefile
> @@ -64,6 +64,7 @@ $(PURGATORY): $(PURGATORY_OBJS)
>         $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
>
>  #      $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) --no-undefined -e purgatory_start -r -o $@ $(PURGATORY_OBJS) $(UTIL_LIB)
> +       $(STRIP) --strip-debug $@
>
>  echo::
>         @echo "PURGATORY_SRCS $(PURGATORY_SRCS)"
> diff --git a/purgatory/arch/x86_64/Makefile b/purgatory/arch/x86_64/Makefile
> index 22b4228..2a768c6 100644
> --- a/purgatory/arch/x86_64/Makefile
> +++ b/purgatory/arch/x86_64/Makefile
> @@ -2,7 +2,7 @@
>  # Purgatory x86_64
>  #
>
> -x86_64_PURGATORY_SRCS_native = purgatory/arch/x86_64/entry64-32.S
> +#x86_64_PURGATORY_SRCS_native = purgatory/arch/x86_64/entry64-32.S
>  x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/entry64.S
>  x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/setup-x86_64.S
>  x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/stack.S
> @@ -16,9 +16,11 @@ dist += purgatory/arch/x86_64/Makefile $(x86_64_PURGATORY_SRCS_native)       \
>         purgatory/arch/x86_64/purgatory-x86_64.h
>
>  # Don't add sources in i386/ to dist, as i386/Makefile adds them
> -x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
> -x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
> +#x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
> +#x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
>  x86_64_PURGATORY_SRCS += purgatory/arch/i386/crashdump_backup.c
>  x86_64_PURGATORY_SRCS += purgatory/arch/i386/console-x86.c
>  x86_64_PURGATORY_SRCS += purgatory/arch/i386/vga.c
>  x86_64_PURGATORY_SRCS += purgatory/arch/i386/pic.c
> +
> +x86_64_PURGATORY_EXTRA_CFLAGS = -mcmodel=large
> diff --git a/purgatory/arch/x86_64/entry64.S b/purgatory/arch/x86_64/entry64.S
> index 666023c..e3223b7 100644
> --- a/purgatory/arch/x86_64/entry64.S
> +++ b/purgatory/arch/x86_64/entry64.S
> @@ -37,9 +37,10 @@ entry64:
>         movl    %eax, %fs
>         movl    %eax, %gs
>
> -       movq    $stack_init, %rsp
> +       leaq    stack_init(%rip), %rsp
>         pushq   $0x10 /* CS */
> -       pushq   $new_cs_exit
> +       leaq    new_cs_exit(%rip), %rax
> +       pushq   %rax
>         lretq
>  new_cs_exit:
>
> diff --git a/purgatory/arch/x86_64/setup-x86_64.S b/purgatory/arch/x86_64/setup-x86_64.S
> index 74997fa..95572d8 100644
> --- a/purgatory/arch/x86_64/setup-x86_64.S
> +++ b/purgatory/arch/x86_64/setup-x86_64.S
> @@ -42,10 +42,10 @@ purgatory_start:
>         /* In 64bit mode the code segment is meaningless */
>
>         movq    0(%rsp), %rax
> -       movq    %rax, jump_back_entry
> +       movq    %rax, jump_back_entry(%rip)
>
>         /* Setup a stack */
> -       movq    $lstack_end, %rsp
> +       leaq    lstack_end(%rip), %rsp
>
>         /* Call the C code */
>         call purgatory
>

Great. it works. Thanks a lot for your patch...

10:~/k # sh kk
bzImage is relocatable
code64_start_offset: 0x200
add_buffer: base:19fff7000 size:70a0
sym: sha256_update info: 12 other: 00 shndx: 1 value: 4382 size: 158
sym: sha256_update value: 19fffb382 addr: 19fff7004
R_X86_64_64
sym: sha256_starts info: 12 other: 00 shndx: 1 value: bac size: 79
sym: sha256_starts value: 19fff7bac addr: 19fff700e
R_X86_64_64
sym: sha256_regions info: 11 other: 00 shndx: 8 value: 20 size: 100
sym: sha256_regions value: 19fffc120 addr: 19fff701a
R_X86_64_64
sym: sha256_regions info: 11 other: 00 shndx: 8 value: 20 size: 100
sym: sha256_regions value: 19fffc020 addr: 19fff7026
R_X86_64_64
sym: sha256_finish info: 12 other: 00 shndx: 1 value: 44da size: 34b
sym: sha256_finish value: 19fffb4da addr: 19fff705a
R_X86_64_64
sym: sha256_digest info: 11 other: 00 shndx: 8 value: 0 size: 20
sym: sha256_digest value: 19fffc000 addr: 19fff706b
R_X86_64_64
sym:     memcmp info: 12 other: 00 shndx: 1 value: 5a7 size: 24
sym: memcmp value: 19fff75a7 addr: 19fff707a
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb910 addr: 19fff7092
R_X86_64_64
sym:     printf info: 12 other: 00 shndx: 1 value: 4c7 size: a0
sym: printf value: 19fff74c7 addr: 19fff709e
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb930 addr: 19fff70ab
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb940 addr: 19fff70c4
R_X86_64_64
sym:     printf info: 12 other: 00 shndx: 1 value: 4c7 size: a0
sym: printf value: 19fff74c7 addr: 19fff70ce
R_X86_64_64
sym: sha256_digest info: 11 other: 00 shndx: 8 value: 0 size: 20
sym: sha256_digest value: 19fffc000 addr: 19fff70e3
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb946 addr: 19fff70ed
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb948 addr: 19fff70fb
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb940 addr: 19fff7113
R_X86_64_64
sym:     printf info: 12 other: 00 shndx: 1 value: 4c7 size: a0
sym: printf value: 19fff74c7 addr: 19fff7125
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb946 addr: 19fff712f
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb958 addr: 19fff7152
R_X86_64_64
sym:     printf info: 12 other: 00 shndx: 1 value: 4c7 size: a0
sym: printf value: 19fff74c7 addr: 19fff715e
R_X86_64_64
sym: setup_arch info: 12 other: 00 shndx: 1 value: 6a4 size: 3a
sym: setup_arch value: 19fff76a4 addr: 19fff716b
R_X86_64_64
sym: verify_sha256_digest info: 12 other: 00 shndx: 1 value: 0 size: 150
sym: verify_sha256_digest value: 19fff7000 addr: 19fff7177
R_X86_64_64
sym: post_verification_setup_arch info: 12 other: 00 shndx: 1 value:
716 size: 3c
sym: post_verification_setup_arch value: 19fff7716 addr: 19fff718a
R_X86_64_64
sym:    putchar info: 12 other: 00 shndx: 1 value: 83d size: 13e
sym: putchar value: 19fff783d addr: 19fff71a0
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb96a addr: 19fff72e6
R_X86_64_64
sym:   vsprintf info: 12 other: 00 shndx: 1 value: 194 size: 29d
sym: vsprintf value: 19fff7194 addr: 19fff74b5
R_X86_64_64
sym:   vsprintf info: 12 other: 00 shndx: 1 value: 194 size: 29d
sym: vsprintf value: 19fff7194 addr: 19fff7555
R_X86_64_64
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8bc addr: 19fff75d3
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8ec addr: 19fff75e9
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb82c addr: 19fff75fc
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb834 addr: 19fff7603
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb83c addr: 19fff760a
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb844 addr: 19fff7611
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb84c addr: 19fff7618
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb854 addr: 19fff761f
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb85c addr: 19fff7626
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb864 addr: 19fff762d
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb86c addr: 19fff7634
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb874 addr: 19fff763b
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb87c addr: 19fff7642
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb884 addr: 19fff7649
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb88c addr: 19fff7650
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb894 addr: 19fff7657
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb89c addr: 19fff765e
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8a4 addr: 19fff7665
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8ac addr: 19fff766b
R_X86_64_PC32
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8ec addr: 19fff7673
R_X86_64_PC32
sym: jump_back_entry info: 11 other: 00 shndx: 8 value: 2008 size: 8
sym: jump_back_entry value: 19fffe004 addr: 19fff768d
R_X86_64_PC32
sym:       .bss info: 03 other: 00 shndx: 9 value: 0 size: 0
sym: .bss value: 19ffffffc addr: 19fff7694
R_X86_64_PC32
sym:  purgatory info: 12 other: 00 shndx: 1 value: 150 size: 44
sym: purgatory value: 19fff714c addr: 19fff7699
R_X86_64_PC32
sym:    entry64 info: 10 other: 00 shndx: 1 value: 5d0 size: 0
sym: entry64 value: 19fff75cc addr: 19fff769e
R_X86_64_PC32
sym:  reset_vga info: 11 other: 00 shndx: 8 value: 2012 size: 1
sym: reset_vga value: 19fffe012 addr: 19fff76a6
R_X86_64_64
sym: x86_reset_vga info: 12 other: 00 shndx: 1 value: 97c size: 1fb
sym: x86_reset_vga value: 19fff797c addr: 19fff76b6
R_X86_64_64
sym: legacy_pic info: 11 other: 00 shndx: 8 value: 2011 size: 1
sym: legacy_pic value: 19fffe011 addr: 19fff76c2
R_X86_64_64
sym: x86_setup_legacy_pic info: 12 other: 00 shndx: 1 value: b78 size: 31
sym: x86_setup_legacy_pic value: 19fff7b78 addr: 19fff76d2
R_X86_64_64
sym: cmdline_end info: 11 other: 00 shndx: 8 value: 2000 size: 8
sym: cmdline_end value: 19fffe000 addr: 19fff76e0
R_X86_64_64
sym: jump_back_entry info: 11 other: 00 shndx: 8 value: 2008 size: 8
sym: jump_back_entry value: 19fffe008 addr: 19fff76f2
R_X86_64_64
sym: .rodata.str1.1 info: 03 other: 00 shndx: 5 value: 0 size: 0
sym: .rodata.str1.1 value: 19fffb97b addr: 19fff76fc
R_X86_64_64
sym:    sprintf info: 12 other: 00 shndx: 1 value: 431 size: 96
sym: sprintf value: 19fff7431 addr: 19fff7709
R_X86_64_64
sym: panic_kernel info: 11 other: 00 shndx: 8 value: 2010 size: 1
sym: panic_kernel value: 19fffe010 addr: 19fff7718
R_X86_64_64
sym: crashdump_backup_memory info: 12 other: 00 shndx: 1 value: 754 size: 3e
sym: crashdump_backup_memory value: 19fff7754 addr: 19fff7729
R_X86_64_64
sym: jump_back_entry info: 11 other: 00 shndx: 8 value: 2008 size: 8
sym: jump_back_entry value: 19fffe008 addr: 19fff7735
R_X86_64_64
sym: x86_setup_jump_back_entry info: 12 other: 00 shndx: 1 value: 6de size: 38
sym: x86_setup_jump_back_entry value: 19fff76de addr: 19fff7746
R_X86_64_64
sym: backup_src_start info: 11 other: 00 shndx: 8 value: 2020 size: 8
sym: backup_src_start value: 19fffe020 addr: 19fff7756
R_X86_64_64
sym: backup_src_size info: 11 other: 00 shndx: 8 value: 2018 size: 8
sym: backup_src_size value: 19fffe018 addr: 19fff7763
R_X86_64_64
sym: backup_start info: 11 other: 00 shndx: 8 value: 2028 size: 8
sym: backup_start value: 19fffe028 addr: 19fff7775
R_X86_64_64
sym:     memcpy info: 12 other: 00 shndx: 1 value: 58f size: 18
sym: memcpy value: 19fff758f addr: 19fff7787
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe040 addr: 19fff7796
R_X86_64_64
sym: serial_base info: 11 other: 00 shndx: 8 value: 2034 size: 2
sym: serial_base value: 19fffe034 addr: 19fff77a5
R_X86_64_64
sym: serial_baud info: 11 other: 00 shndx: 8 value: 2030 size: 4
sym: serial_baud value: 19fffe030 addr: 19fff77dd
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe040 addr: 19fff7810
R_X86_64_64
sym: serial_base info: 11 other: 00 shndx: 8 value: 2034 size: 2
sym: serial_base value: 19fffe034 addr: 19fff781a
R_X86_64_64
sym: console_vga info: 11 other: 00 shndx: 8 value: 2037 size: 1
sym: console_vga value: 19fffe037 addr: 19fff783f
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe038 addr: 19fff7855
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe038 addr: 19fff78c2
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe03c addr: 19fff78d2
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe038 addr: 19fff78db
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe038 addr: 19fff78ee
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe03c addr: 19fff78fa
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe03c addr: 19fff791d
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe03c addr: 19fff7931
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe038 addr: 19fff793d
R_X86_64_64
sym: console_serial info: 11 other: 00 shndx: 8 value: 2036 size: 1
sym: console_serial value: 19fffe036 addr: 19fff7947
R_X86_64_64
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7794 addr: 19fff7960
R_X86_64_64
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7794 addr: 19fff796e
R_X86_64_64
sym:     memcpy info: 12 other: 00 shndx: 1 value: 58f size: 18
sym: memcpy value: 19fff758f addr: 19fffb43b
R_X86_64_64
sym: sha256_process info: 12 other: 00 shndx: 1 value: c25 size: 375d
sym: sha256_process value: 19fff7c25 addr: 19fffb459
R_X86_64_64
sym: sha256_process info: 12 other: 00 shndx: 1 value: c25 size: 375d
sym: sha256_process value: 19fff7c25 addr: 19fffb48d
R_X86_64_64
sym:     memcpy info: 12 other: 00 shndx: 1 value: 58f size: 18
sym: memcpy value: 19fff758f addr: 19fffb4cb
R_X86_64_64
sym:      .data info: 03 other: 00 shndx: 8 value: 0 size: 0
sym: .data value: 19fffe060 addr: 19fffb587
R_X86_64_64
sym: sha256_update info: 12 other: 00 shndx: 1 value: 4382 size: 158
sym: sha256_update value: 19fffb382 addr: 19fffb594
R_X86_64_64
sym: sha256_update info: 12 other: 00 shndx: 1 value: 4382 size: 158
sym: sha256_update value: 19fffb382 addr: 19fffb5b3
R_X86_64_64
sym:    entry32 info: 10 other: 00 shndx: 0 value: 0 size: 0
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8c0 addr: 19fffb8c2
R_X86_64_64
sym:    .rodata info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata value: 19fffb8f0 addr: 19fffb8f2
R_X86_64_64
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7000 addr: 19fffb9b8
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7150 addr: 19fffb9f4
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7194 addr: 19fffba28
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7431 addr: 19fffba74
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff74c7 addr: 19fffba90
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7568 addr: 19fffbac8
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff757b addr: 19fffbadc
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff758f addr: 19fffbaf0
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff75a7 addr: 19fffbb04
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff76a4 addr: 19fffbb30
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff76de addr: 19fffbb50
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7716 addr: 19fffbb64
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7754 addr: 19fffbba0
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7794 addr: 19fffbbd0
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff783d addr: 19fffbbe4
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff797c addr: 19fffbc20
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7b78 addr: 19fffbc50
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7bac addr: 19fffbc80
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fff7c25 addr: 19fffbca0
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fffb382 addr: 19fffbcc0
R_X86_64_PC32
sym:      .text info: 03 other: 00 shndx: 1 value: 0 size: 0
sym: .text value: 19fffb4da addr: 19fffbce0
R_X86_64_PC32
Loaded purgatory at addr 0x19fff7000
add_buffer: base:19fff1000 size:44e0
Loaded setup data and command line at 0x19fff1000
add_buffer: base:180000000 size:917d50
Loaded 64bit kernel at 0x180000000
initrd_addr_max is 0x7fffffff
add_buffer: base:19d983000 size:266c368
Loaded initrd at 0x19d983000 size 0x266c368
cmd_line_ptr: 19fff5400
EDD raw data has length 30
Added 0 EDD MBR entries and 1 EDD entries.
10:~/k # kexec -e -d
[   56.119441] i2c i2c-0: shutdown
[   56.120652] psmouse serio1: shutdown
[   56.122606] atkbd serio0: shutdown
[   56.123847] i8042 i8042: shutdown
[   56.127562] sd 0:0:0:0: shutdown
[   56.129017] sd 0:0:0:0: [sda] Synchronizing SCSI cache
[   56.132681] pcspkr pcspkr: shutdown
[   56.133676] pnp 00:07: shutdown
[   56.134365] serial 00:06: shutdown
[   56.135158] pnp 00:05: shutdown
[   56.135918] pnp 00:04: shutdown
[   56.136838] i8042 aux 00:03: shutdown
[   56.138296] i8042 kbd 00:02: shutdown
[   56.139159] rtc_cmos 00:01: shutdown
[   56.140057] pnp 00:00: shutdown
[   56.141511] e1000 0000:00:03.0: shutdown
[   56.193472] pci 0000:00:02.0: shutdown
[   56.194498] piix4_smbus 0000:00:01.3: shutdown
[   56.195575] ata_piix 0000:00:01.1: shutdown
[   56.196684] pci 0000:00:01.0: shutdown
[   56.198051] pci 0000:00:00.0: shutdown
[   56.199296] PM: Calling mce_syscore_shutdown+0x0/0x50
[   56.200626] PM: Calling i8259A_shutdown+0x0/0x20
[   56.202037] Starting new kernel
early console in decompress_kernel
decompress_kernel:
  input: [0x18194a62e-0x18225a8a5], output: 0x180000000, heap:
[0x182261d40-0x182269d3f]

Decompressing Linux... xz... Parsing ELF... done.
Booting the kernel.
[    0.000000]    real_mode_data :      phys 000000019fff1000
[    0.000000]    real_mode_data :      virt ffff88019fff1000
[    0.000000]       boot_params : init virt ffffffff82e4d760
[    0.000000]       boot_params :      phys 0000000181e4d760
[    0.000000]       boot_params :      virt ffff880181e4d760
[    0.000000] boot_command_line : init virt ffffffff82d35000
[    0.000000] boot_command_line :      phys 0000000181d35000
[    0.000000] boot_command_line :      virt ffff880181d35000
[    0.000000] Kernel Layout:
[    0.000000]   .text: [0x180000000-0x18111d5ec]
[    0.000000] .rodata: [0x18111f000-0x181954fff]
[    0.000000]   .data: [0x181955000-0x181b0faff]
[    0.000000]   .init: [0x181b11000-0x181e38fff]
[    0.000000]    .bss: [0x181e47000-0x182bbafff]
[    0.000000]    .brk: [0x182bbb000-0x182bdffff]
[    0.000000] memblock_reserve: [0x180000000-0x182bbafff] TEXT DATA BSS
[    0.000000] memblock_reserve: [0x0009fc00-0x000fffff] * BIOS reserved
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Linux version 3.7.0-rc6-yh-00043-g5616ffd-dirty
(yhlu@linux-siqj.site) (gcc version 4.6.2 (SUSE Linux) ) #656 SMP Sat
Nov 17 17:25:49 PST 2012
[    0.000000] memblock_reserve: [0x19d983000-0x19ffeffff] RAMDISK
[    0.000000] Command line: initcall_debug pci=routeirq debug
i8042.debug=1 apic=debug lpfc.lpfc_use_msi=2 ramdisk_size=262144
root=/dev/ram0 rw ip=dhcp console=uart8250,io,0x3f8,115200n8
[    0.000000] KERNEL supported cpus:
[    0.000000]   Intel GenuineIntel
[    0.000000]   AMD AuthenticAMD
[    0.000000]   Centaur CentaurHauls

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  6:11                           ` Eric W. Biederman
  2012-11-18  6:32                             ` Yinghai Lu
@ 2012-11-18  6:38                             ` Yinghai Lu
  2012-11-18  6:50                               ` Eric W. Biederman
  1 sibling, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  6:38 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 10:11 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>> so -mcmodel=large only work with .c, but does not have effects on .S ?
>
> Yes it -mcmodel=large is about which instructions you generate.  The
> instructions used determine the relocates.
>
> Just for playing with it the following patch modifies things so
> purgatory is 64bit clean.
>
> I don't know yet what to do with the 32bit and 16bit assembly.

so we should two purgatory for x86_64 ?
like one for 64bit and one for 32bit/16bit

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  6:23                     ` H. Peter Anvin
@ 2012-11-18  6:44                       ` Eric W. Biederman
  0 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18  6:44 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Haren Myneni, Simon Horman, Yinghai Lu, kexec, Vivek Goyal

"H. Peter Anvin" <hpa@zytor.com> writes:

> I think you just need one of the PIC models, or the medium model.

-fPIC has the unfortunate tendency to require a global offset table
 and still have relocations against .data so it is usually easier to
 avoid it.

That said it looks like what is left is to modify the .S files to
contain 64bit relocations or actuall position independent code.

Is there any easy way to write position independent 32bit code that will
load all of the registers, and jump to known address? 

This is what the code does right now.

	.code32
	/* load the data segments */
	movl	$0x18, %eax	/* data segment */
	movl	%eax, %ds
	movl	%eax, %es
	movl	%eax, %ss
	movl	%eax, %fs
	movl	%eax, %gs

	/* Load the registers */
	movl	eax, %eax
	movl	ecx, %ecx
	movl	edx, %edx
	movl	esi, %esi
	movl	edi, %edi
	movl	esp, %esp
	movl	ebp, %ebp
	movl	ebx, %ebx

	/* Jump to the loaded image */
	jmpl	*(eip)

	.section ".rodata"
	.balign 4
entry32_regs:  
eax:	.long 0x00000000
ebx:	.long 0x00000000
ecx:	.long 0x00000000
edx:	.long 0x00000000
esi:	.long 0x00000000
edi:	.long 0x00000000
esp:	.long 0x00000000
ebp:	.long 0x00000000
eip:	.long entry16
	.size entry32_regs, . - entry32_regs

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  6:38                             ` Yinghai Lu
@ 2012-11-18  6:50                               ` Eric W. Biederman
  2012-11-18  6:53                                 ` Yinghai Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18  6:50 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> On Sat, Nov 17, 2012 at 10:11 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>> so -mcmodel=large only work with .c, but does not have effects on .S ?
>>
>> Yes it -mcmodel=large is about which instructions you generate.  The
>> instructions used determine the relocates.
>>
>> Just for playing with it the following patch modifies things so
>> purgatory is 64bit clean.
>>
>> I don't know yet what to do with the 32bit and 16bit assembly.
>
> so we should two purgatory for x86_64 ?
> like one for 64bit and one for 32bit/16bit

The problem is that the current 32bit assembly code is not possition
independent.  If we can rewrite that assembly code to be position
independent no relocs will be generated and we should be good.

The 16bit entry point code was already written as position independent
code so it should not cause problems.

Just having one piece of code to deal with (if we can figure it out)
looks to be the simpler and more maintainable solution.

Eric


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  6:50                               ` Eric W. Biederman
@ 2012-11-18  6:53                                 ` Yinghai Lu
  2012-11-18  7:18                                   ` Yinghai Lu
  0 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  6:53 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

On Sat, Nov 17, 2012 at 10:50 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
>>
>> so we should two purgatory for x86_64 ?
>> like one for 64bit and one for 32bit/16bit
>
> The problem is that the current 32bit assembly code is not possition
> independent.  If we can rewrite that assembly code to be position
> independent no relocs will be generated and we should be good.
>
> The 16bit entry point code was already written as position independent
> code so it should not cause problems.
>
> Just having one piece of code to deal with (if we can figure it out)
> looks to be the simpler and more maintainable solution.

sure.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  6:53                                 ` Yinghai Lu
@ 2012-11-18  7:18                                   ` Yinghai Lu
  2012-11-18 10:38                                     ` Eric W. Biederman
  0 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-18  7:18 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

[-- Attachment #1: Type: text/plain, Size: 918 bytes --]

On Sat, Nov 17, 2012 at 10:53 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Sat, Nov 17, 2012 at 10:50 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>>>
>>> so we should two purgatory for x86_64 ?
>>> like one for 64bit and one for 32bit/16bit
>>
>> The problem is that the current 32bit assembly code is not possition
>> independent.  If we can rewrite that assembly code to be position
>> independent no relocs will be generated and we should be good.
>>
>> The 16bit entry point code was already written as position independent
>> code so it should not cause problems.
>>
>> Just having one piece of code to deal with (if we can figure it out)
>> looks to be the simpler and more maintainable solution.
>
> sure.

just resent -v2 patches for kernel parts.

attached are changes for kexec-tools without put 64 bit purgartory above 4g...

hope you can work out 32bit position independent.

Thanks

Yinghai

[-- Attachment #2: patches.kexec_2012_11_17.tar.bz2 --]
[-- Type: application/x-bzip2, Size: 8179 bytes --]

[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/8] add mem64_min/max control
  2012-11-18  7:18                                   ` Yinghai Lu
@ 2012-11-18 10:38                                     ` Eric W. Biederman
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  0 siblings, 2 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-18 10:38 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> On Sat, Nov 17, 2012 at 10:53 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Sat, Nov 17, 2012 at 10:50 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>>>
>>>> so we should two purgatory for x86_64 ?
>>>> like one for 64bit and one for 32bit/16bit
>>>
>>> The problem is that the current 32bit assembly code is not possition
>>> independent.  If we can rewrite that assembly code to be position
>>> independent no relocs will be generated and we should be good.
>>>
>>> The 16bit entry point code was already written as position independent
>>> code so it should not cause problems.
>>>
>>> Just having one piece of code to deal with (if we can figure it out)
>>> looks to be the simpler and more maintainable solution.
>>
>> sure.
>
> just resent -v2 patches for kernel parts.
>
> attached are changes for kexec-tools without put 64 bit purgartory above 4g...
>
> hope you can work out 32bit position independent.

It looks like this is enough to fix purgatory.

I have tested this in so far as that it doesn't generate relocs, but I
haven't had a test to see if the code actually works.

YH can you verify this boots both 32bit and 64bit kernels for you?

Thanks,
Eric

diff --git a/purgatory/Makefile b/purgatory/Makefile
index ee1679c..e39adec 100644
--- a/purgatory/Makefile
+++ b/purgatory/Makefile
@@ -64,6 +64,7 @@ $(PURGATORY): $(PURGATORY_OBJS)
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
 
 #	$(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) --no-undefined -e purgatory_start -r -o $@ $(PURGATORY_OBJS) $(UTIL_LIB)
+	$(STRIP) --strip-debug $@
 
 echo::
 	@echo "PURGATORY_SRCS $(PURGATORY_SRCS)"
diff --git a/purgatory/arch/i386/entry32-16-debug.S b/purgatory/arch/i386/entry32-16-debug.S
index 82b58ca..2f47607 100644
--- a/purgatory/arch/i386/entry32-16-debug.S
+++ b/purgatory/arch/i386/entry32-16-debug.S
@@ -29,15 +29,17 @@
 	.balign 16
 entry16_debug:
 	.code32
-	/* Compute where I am running at */
-	movl	$entry16_debug, %ebx	
+	/* Compute where I am running at (assumes esp valid) */
+	call	1f
+1:	popl	%ebx
 
 	/* Fixup my real mode segment */
 	movl	%ebx, %eax
 	shrl	$4, %eax
-	movw	%ax, 2 + realptr
+	movw	%ax, (2 + realptr - entry16_debug)(%ebx)
 
 	/* Fixup the gdt */
+	movl	%ebx, (0x02 + gdt - entry16_debug)(%ebx)
 	movl	%ebx, %eax
 	shll	$16, %eax
 	
@@ -49,19 +51,19 @@ entry16_debug:
 	andl	$0xff000000, %edx
 	orl	%edx, %ecx
 
-	orl	%eax, 0x08 + gdt
-	orl	%ecx, 0x0c + gdt
-	orl	%eax, 0x10 + gdt
-	orl	%ecx, 0x14 + gdt	
+	orl	%eax, (0x08 + gdt - entry16_debug)(%ebx)
+	orl	%ecx, (0x0c + gdt - entry16_debug)(%ebx)
+	orl	%eax, (0x10 + gdt - entry16_debug)(%ebx)
+	orl	%ecx, (0x14 + gdt - entry16_debug)(%ebx)
 	
 	
 DEBUG_CHAR('a')
 	/* Setup the classic BIOS interrupt table at 0x0 */
-	lidt	idtptr
+	lidt	(idtptr - entry16_debug)(%ebx)
 
 DEBUG_CHAR('b')
 	/* Provide us with 16bit segments that we can use */
-	lgdt	gdt
+	lgdt	(gdt - entry16_debug)(%ebx)
 
 DEBUG_CHAR('c')
 	/* Note we don't disable the a20 line, (this shouldn't be required)
@@ -160,7 +162,7 @@ idtptr:
 gdt:
 	/* 0x00 unusable segment so used as the gdt ptr */
 	.word gdt_end - gdt - 1
-	.long gdt
+	.long 0 /* gdt */
 	.word 0
 
 	/* 0x08 16 bit real mode code segment */
diff --git a/purgatory/arch/i386/entry32-16.S b/purgatory/arch/i386/entry32-16.S
index aaf1273..20a1ce6 100644
--- a/purgatory/arch/i386/entry32-16.S
+++ b/purgatory/arch/i386/entry32-16.S
@@ -24,15 +24,17 @@
 	.balign 16
 entry16:
 	.code32
-	/* Compute where I am running at */
-	movl	$entry16, %ebx
+	/* Compute where I am running at (assumes esp valid) */
+	call	1f
+1:	popl	%ebx
 
 	/* Fixup my real mode segment */
 	movl	%ebx, %eax
 	shrl	$4, %eax
-	movw	%ax, 2 + realptr
+	movw	%ax, (2 + realptr - entry16)(%ebx)
 
 	/* Fixup the gdt */
+	movl	%ebx, (0x02 + gdt - entry16)(%ebx)
 	movl	%ebx, %eax
 	shll	$16, %eax
 	
@@ -44,17 +46,17 @@ entry16:
 	andl	$0xff000000, %edx
 	orl	%edx, %ecx
 
-	orl	%eax, 0x08 + gdt
-	orl	%ecx, 0x0c + gdt
-	orl	%eax, 0x10 + gdt
-	orl	%ecx, 0x14 + gdt	
+	orl	%eax, (0x08 + gdt - entry16)(%ebx)
+	orl	%ecx, (0x0c + gdt - entry16)(%ebx)
+	orl	%eax, (0x10 + gdt - entry16)(%ebx)
+	orl	%ecx, (0x14 + gdt - entry16)(%ebx)
 	
 	
 	/* Setup the classic BIOS interrupt table at 0x0 */
-	lidt	idtptr
+	lidt	(idtptr - entry16)(%ebx)
 	
 	/* Provide us with 16bit segments that we can use */
-	lgdt	gdt
+	lgdt	(gdt - entry16)(%ebx)
 
 	/* Note we don't disable the a20 line, (this shouldn't be required)
 	 * The code to do it is in kexec_test and it is a real pain.
@@ -147,7 +149,7 @@ idtptr:
 gdt:
 	/* 0x00 unusable segment so used as the gdt ptr */
 	.word gdt_end - gdt - 1
-	.long gdt
+	.long 0 /* gdt */
 	.word 0
 
 	/* 0x08 16 bit real mode code segment */
diff --git a/purgatory/arch/x86_64/Makefile b/purgatory/arch/x86_64/Makefile
index 22b4228..7300937 100644
--- a/purgatory/arch/x86_64/Makefile
+++ b/purgatory/arch/x86_64/Makefile
@@ -16,9 +16,11 @@ dist += purgatory/arch/x86_64/Makefile $(x86_64_PURGATORY_SRCS_native) 	\
 	purgatory/arch/x86_64/purgatory-x86_64.h
 
 # Don't add sources in i386/ to dist, as i386/Makefile adds them
-x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
+x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/crashdump_backup.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/console-x86.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/vga.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/pic.c
+
+x86_64_PURGATORY_EXTRA_CFLAGS = -mcmodel=large
diff --git a/purgatory/arch/x86_64/entry64-32.S b/purgatory/arch/x86_64/entry64-32.S
index 66f8a85..0d394ad 100644
--- a/purgatory/arch/x86_64/entry64-32.S
+++ b/purgatory/arch/x86_64/entry64-32.S
@@ -24,13 +24,34 @@
 	.equ	CR0_PG,        0x80000000
 
 	.text
+	.balign 16
 	.globl entry32, entry32_regs
 entry32:
 	.code64
 
-	/* Setup a gdt that should that is generally usefully */
+	/* Setup the 4G offset of entry32 lm_exit code segment */
+	movq	$0x00CF9A000000ffff, %rax
+
+	leaq	entry32(%rip), %rbx	/* Low 24 bits */
+	andq	$0xffffff, %rbx
+	shlq	$16, %rbx
+	orq	%rbx, %rax
+
+	leaq	entry32(%rip), %rbx	/* High 8 bits */
+	movq	$0xff000000, %rdx
+	andq	%rdx, %rbx
+	shlq	$32, %rbx
+	orq	%rbx, %rax
+
+	movq	%rax, (gdt + 0x20)(%rip)
+
+	/* Setup a gdt that is generally usefully */
 	lgdt	gdt(%rip)
-		
+
+	/* Setup the far pointer to the entry point */
+	movl	eip(%rip), %eax
+	movl	%eax, entry32_addr(%rip)
+
 	/* Switch to 32bit compatiblity mode */
 	ljmp	*lm_exit_addr(%rip)
 lm_exit:
@@ -60,19 +81,19 @@ lm_exit:
 	movl	%eax, %gs
 
 	/* Load the registers */
-	movl	eax, %eax
-	movl	ecx, %ecx
-	movl	edx, %edx
-	movl	esi, %esi
-	movl	edi, %edi
-	movl	esp, %esp
-	movl	ebp, %ebp
-	movl	ebx, %ebx
+	movl	%cs:eax - entry32, %eax
+	movl	%cs:ecx - entry32, %ecx
+	movl	%cs:edx - entry32, %edx
+	movl	%cs:esi - entry32, %esi
+	movl	%cs:edi - entry32, %edi
+	movl	%cs:esp - entry32, %esp
+	movl	%cs:ebp - entry32, %ebp
+	movl	%cs:ebx - entry32, %ebx
 
 	/* Jump to the loaded image */
-	jmpl	*(eip)
+	jmpl	*%cs:entry32_addr - entry32
 
-	.section ".rodata"
+	.section ".data"
 	.balign 16
 gdt:	/* 0x00 unusable segment 
 	 * 0x08 unused
@@ -88,8 +109,8 @@ gdt:	/* 0x00 unusable segment
 	/* 0x18 4GB flat data segment */
 	.word	0xFFFF, 0x0000, 0x9200, 0x00CF
 
-	/* 0x20 dummy */
-	.word	0x0000, 0x0000, 0x0000, 0x000
+	/* 0x20 4GB flat code segment base at entry32 */
+	.word	0xFFFF, 0x0000, 0x9A00, 0x0CF
 	/* 0x28 dummy */
 	.word	0x0000, 0x0000, 0x0000, 0x000
 	/* 0x30 dummy */
@@ -115,9 +136,15 @@ gdt_end:
 	.section ".rodata"
 	.balign 4
 lm_exit_addr:
-	.long lm_exit
-	.long 0x10		
-	
+	.long lm_exit - entry32
+	.long 0x20
+
+	.section ".data"
+	.balign 4
+entry32_addr:
+	.long 0x00000000
+	.long 0x10
+
 	.section ".rodata"
 	.balign 4
 entry32_regs:  
@@ -129,6 +156,9 @@ esi:	.long 0x00000000
 edi:	.long 0x00000000
 esp:	.long 0x00000000
 ebp:	.long 0x00000000
-eip:	.long entry16
-	.size entry32_regs, . - entry32_regs
+eip:	.quad entry16	/* low 32 bits address
+			 * high 32bits zeros
+			 * uses 64bit reloc
+			 */
+	.size entry32_regs, (. - 4) - entry32_regs
 
diff --git a/purgatory/arch/x86_64/entry64.S b/purgatory/arch/x86_64/entry64.S
index 666023c..e3223b7 100644
--- a/purgatory/arch/x86_64/entry64.S
+++ b/purgatory/arch/x86_64/entry64.S
@@ -37,9 +37,10 @@ entry64:
 	movl	%eax, %fs
 	movl	%eax, %gs
 
-	movq	$stack_init, %rsp
+	leaq	stack_init(%rip), %rsp
 	pushq	$0x10 /* CS */
-	pushq	$new_cs_exit
+	leaq	new_cs_exit(%rip), %rax
+	pushq	%rax
 	lretq
 new_cs_exit:
 
diff --git a/purgatory/arch/x86_64/setup-x86_64.S b/purgatory/arch/x86_64/setup-x86_64.S
index 74997fa..95572d8 100644
--- a/purgatory/arch/x86_64/setup-x86_64.S
+++ b/purgatory/arch/x86_64/setup-x86_64.S
@@ -42,10 +42,10 @@ purgatory_start:
 	/* In 64bit mode the code segment is meaningless */
 
 	movq	0(%rsp), %rax
-	movq	%rax, jump_back_entry
+	movq	%rax, jump_back_entry(%rip)
 
 	/* Setup a stack */
-	movq	$lstack_end, %rsp
+	leaq	lstack_end(%rip), %rsp
 
 	/* Call the C code */
 	call purgatory


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit
  2012-11-18 10:38                                     ` Eric W. Biederman
@ 2012-11-19  3:02                                       ` Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
                                                           ` (5 more replies)
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  1 sibling, 6 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
        git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk high acccording
to new added boot header fields.

-v2: remove mem64-min/mem64-max
     add purgartory changes from Eric, that make it relocatable
     put command line above 4g.

Eric W. Biederman (1):
  kexec, x86: Make x64_64 purgatory relocatable above 4G

Yinghai Lu (5):
  kexec, x86: add boot header member for version 2.12
  kexec: don't die during buffer finding
  kexec, x86: put ramdisk high for 64bit bzImage
  kexec, x86: set ext_cmd_line_ptr when boot_param is put high
  kexec, x86_64: put 64bit bzImage high

 include/x86/x86-linux.h                |   22 ++-
 kexec/arch/i386/x86-linux-setup.c      |   40 +++-
 kexec/arch/x86_64/Makefile             |    1 +
 kexec/arch/x86_64/kexec-bzImage64.c    |  316 ++++++++++++++++++++++++++++++++
 kexec/arch/x86_64/kexec-x86_64.c       |    1 +
 kexec/arch/x86_64/kexec-x86_64.h       |    5 +
 kexec/kexec.c                          |    7 +-
 purgatory/arch/i386/entry32-16-debug.S |   22 ++-
 purgatory/arch/i386/entry32-16.S       |   22 ++-
 purgatory/arch/x86_64/Makefile         |    4 +-
 purgatory/arch/x86_64/entry64-32.S     |   68 +++++--
 purgatory/arch/x86_64/entry64.S        |    5 +-
 purgatory/arch/x86_64/setup-x86_64.S   |    4 +-
 13 files changed, 458 insertions(+), 59 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/6] kexec, x86: add boot header member for version 2.12
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
@ 2012-11-19  3:02                                         ` Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 2/6] kexec: don't die during buffer finding Yinghai Lu
                                                           ` (4 subsequent siblings)
  5 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

will use ext_ramdisk_image/size, and code64_start_offset

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 include/x86/x86-linux.h           |   22 +++++++++++++++++++++-
 kexec/arch/i386/x86-linux-setup.c |    2 +-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 27af02b..71cd296 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -178,7 +178,17 @@ struct x86_linux_param_header {
 	uint32_t cmdline_size;			/* 0x238 */
 	uint32_t hardware_subarch;		/* 0x23C */
 	uint64_t hardware_subarch_data;		/* 0x240 */
-	uint8_t  reserved16[0x290 - 0x248];	/* 0x248 */
+	uint32_t payload_offset;		/* 0x248 */
+	uint32_t payload_length;		/* 0x24C */
+	uint64_t setup_data;			/* 0x250 */
+	uint64_t pref_address;			/* 0x258 */
+	uint32_t init_size;			/* 0x260 */
+	uint32_t handover_offset;		/* 0x264 */
+	uint32_t ext_ramdisk_image;		/* 0x268 */
+	uint32_t ext_ramdisk_size;		/* 0x26C */
+	uint32_t code64_start_offset;		/* 0x270 */
+	uint32_t ext_cmd_line_ptr;		/* 0x274 */
+	uint8_t  reserved16[0x290 - 0x278];	/* 0x278 */
 	uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];	/* 0x290 */
 #endif
 	struct 	e820entry e820_map[E820MAX];	/* 0x2d0 */
@@ -245,6 +255,16 @@ struct x86_linux_header {
 	uint32_t cmdline_size;                  /* 0x238 */
 	uint32_t hardware_subarch;              /* 0x23C */
 	uint64_t hardware_subarch_data;         /* 0x240 */
+	uint32_t payload_offset;		/* 0x248 */
+	uint32_t payload_length;		/* 0x24C */
+	uint64_t setup_data;			/* 0x250 */
+	uint64_t pref_address;			/* 0x258 */
+	uint32_t init_size;			/* 0x260 */
+	uint32_t handover_offset;		/* 0x264 */
+	uint32_t ext_ramdisk_image;		/* 0x268 */
+	uint32_t ext_ramdisk_size;		/* 0x26C */
+	uint32_t code64_start_offset;		/* 0x270 */
+	uint32_t ext_cmd_line_ptr;		/* 0x274 */
 #endif
 } PACKED;
 
diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..53d9df9 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -41,7 +41,7 @@ void init_linux_parameters(struct x86_linux_param_header *real_mode)
 
 	/* Boot block magic */
 	memcpy(real_mode->header_magic, "HdrS", 4);
-	real_mode->protocol_version = 0x0206;
+	real_mode->protocol_version = 0x020C;
 	real_mode->initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
 	real_mode->cmdline_size = COMMAND_LINE_SIZE;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 2/6] kexec: don't die during buffer finding
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
@ 2012-11-19  3:02                                         ` Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
                                                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

could return 0, and let the caller retry with new ranges.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/kexec.c |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index 8928be0..0f8aec8 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -366,10 +366,9 @@ unsigned long add_buffer_phys_virt(struct kexec_info *info,
 	memsz = (memsz + (pagesize - 1)) & ~(pagesize - 1);
 
 	base = locate_hole(info, memsz, buf_align, buf_min, buf_max, buf_end);
-	if (base == ULONG_MAX) {
-		die("locate_hole failed\n");
-	}
-	
+	if (base == ULONG_MAX)
+		return 0;
+
 	add_segment_phys_virt(info, buf, bufsz, base, memsz, phys);
 	return base;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 3/6] kexec, x86: put ramdisk high for 64bit bzImage
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 2/6] kexec: don't die during buffer finding Yinghai Lu
@ 2012-11-19  3:02                                         ` Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
                                                           ` (2 subsequent siblings)
  5 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

only do that for 64bit bzImage, and will fall back to low if fail to get high.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/i386/x86-linux-setup.c |   30 ++++++++++++++++++++++--------
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index 53d9df9..b0e6119 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -69,20 +69,34 @@ void setup_linux_bootloader_parameters(
 	}
 
 	/* Load the initrd if we have one */
+	initrd_base = 0;
 	if (initrd_buf) {
-		initrd_base = add_buffer(info,
-			initrd_buf, initrd_size, initrd_size,
-			4096, INITRD_BASE, initrd_addr_max, -1);
+		if (real_mode->protocol_version >= 0x020c &&
+		    real_mode->code64_start_offset) {
+			initrd_base = add_buffer(info,
+				initrd_buf, initrd_size, initrd_size,
+				4096, 1UL<<32, ULONG_MAX, -1);
+			if (!initrd_base)
+				initrd_base = add_buffer(info,
+					initrd_buf, initrd_size, initrd_size,
+					4096, 1UL<<30, 1UL<<32, -1);
+		}
+		if (!initrd_base)
+			initrd_base = add_buffer(info,
+				initrd_buf, initrd_size, initrd_size,
+				4096, INITRD_BASE, initrd_addr_max, -1);
 		dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base,
 			initrd_size);
-	} else {
-		initrd_base = 0;
+	} else
 		initrd_size = 0;
-	}
 
 	/* Ramdisk address and size */
-	real_mode->initrd_start = initrd_base;
-	real_mode->initrd_size  = initrd_size;
+	real_mode->initrd_start = initrd_base & 0xffffffff;
+	real_mode->initrd_size  = initrd_size & 0xffffffff;
+	if ((initrd_base + initrd_size) > (1ULL<<32)) {
+		real_mode->ext_ramdisk_image = initrd_base >> 32;
+		real_mode->ext_ramdisk_size  = initrd_size >> 32;
+	}
 
 	/* The location of the command line */
 	/* if (real_mode_base == 0x90000) { */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (2 preceding siblings ...)
  2012-11-19  3:02                                         ` [PATCH 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
@ 2012-11-19  3:02                                         ` Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
  5 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

only do that for bzImage64, and it could have command line above 4g.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/i386/x86-linux-setup.c |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index b0e6119..c929166 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -105,7 +105,13 @@ void setup_linux_bootloader_parameters(
 		/* setup_move_size */
 	/* } */
 	if (real_mode->protocol_version >= 0x0202) {
-		real_mode->cmd_line_ptr = real_mode_base + cmdline_offset;
+		unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+		real_mode->cmd_line_ptr = cmd_line_ptr & 0xffffffff;
+		if (real_mode->protocol_version >= 0x020c)
+			real_mode->ext_cmd_line_ptr = cmd_line_ptr >> 32;
+
+		printf("cmd_line_ptr: %lx\n", cmd_line_ptr);
 	}
 
 	/* Fill in the command line */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (3 preceding siblings ...)
  2012-11-19  3:02                                         ` [PATCH 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
@ 2012-11-19  3:02                                         ` Yinghai Lu
  2012-11-19  3:02                                         ` [PATCH 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
  5 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

From: "Eric W. Biederman" <ebiederm@xmission.com>

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 purgatory/arch/i386/entry32-16-debug.S |   22 ++++++-----
 purgatory/arch/i386/entry32-16.S       |   22 ++++++-----
 purgatory/arch/x86_64/Makefile         |    4 +-
 purgatory/arch/x86_64/entry64-32.S     |   68 +++++++++++++++++++++++---------
 purgatory/arch/x86_64/entry64.S        |    5 +-
 purgatory/arch/x86_64/setup-x86_64.S   |    4 +-
 6 files changed, 81 insertions(+), 44 deletions(-)

diff --git a/purgatory/arch/i386/entry32-16-debug.S b/purgatory/arch/i386/entry32-16-debug.S
index 82b58ca..2f47607 100644
--- a/purgatory/arch/i386/entry32-16-debug.S
+++ b/purgatory/arch/i386/entry32-16-debug.S
@@ -29,15 +29,17 @@
 	.balign 16
 entry16_debug:
 	.code32
-	/* Compute where I am running at */
-	movl	$entry16_debug, %ebx	
+	/* Compute where I am running at (assumes esp valid) */
+	call	1f
+1:	popl	%ebx
 
 	/* Fixup my real mode segment */
 	movl	%ebx, %eax
 	shrl	$4, %eax
-	movw	%ax, 2 + realptr
+	movw	%ax, (2 + realptr - entry16_debug)(%ebx)
 
 	/* Fixup the gdt */
+	movl	%ebx, (0x02 + gdt - entry16_debug)(%ebx)
 	movl	%ebx, %eax
 	shll	$16, %eax
 	
@@ -49,19 +51,19 @@ entry16_debug:
 	andl	$0xff000000, %edx
 	orl	%edx, %ecx
 
-	orl	%eax, 0x08 + gdt
-	orl	%ecx, 0x0c + gdt
-	orl	%eax, 0x10 + gdt
-	orl	%ecx, 0x14 + gdt	
+	orl	%eax, (0x08 + gdt - entry16_debug)(%ebx)
+	orl	%ecx, (0x0c + gdt - entry16_debug)(%ebx)
+	orl	%eax, (0x10 + gdt - entry16_debug)(%ebx)
+	orl	%ecx, (0x14 + gdt - entry16_debug)(%ebx)
 	
 	
 DEBUG_CHAR('a')
 	/* Setup the classic BIOS interrupt table at 0x0 */
-	lidt	idtptr
+	lidt	(idtptr - entry16_debug)(%ebx)
 
 DEBUG_CHAR('b')
 	/* Provide us with 16bit segments that we can use */
-	lgdt	gdt
+	lgdt	(gdt - entry16_debug)(%ebx)
 
 DEBUG_CHAR('c')
 	/* Note we don't disable the a20 line, (this shouldn't be required)
@@ -160,7 +162,7 @@ idtptr:
 gdt:
 	/* 0x00 unusable segment so used as the gdt ptr */
 	.word gdt_end - gdt - 1
-	.long gdt
+	.long 0 /* gdt */
 	.word 0
 
 	/* 0x08 16 bit real mode code segment */
diff --git a/purgatory/arch/i386/entry32-16.S b/purgatory/arch/i386/entry32-16.S
index aaf1273..20a1ce6 100644
--- a/purgatory/arch/i386/entry32-16.S
+++ b/purgatory/arch/i386/entry32-16.S
@@ -24,15 +24,17 @@
 	.balign 16
 entry16:
 	.code32
-	/* Compute where I am running at */
-	movl	$entry16, %ebx
+	/* Compute where I am running at (assumes esp valid) */
+	call	1f
+1:	popl	%ebx
 
 	/* Fixup my real mode segment */
 	movl	%ebx, %eax
 	shrl	$4, %eax
-	movw	%ax, 2 + realptr
+	movw	%ax, (2 + realptr - entry16)(%ebx)
 
 	/* Fixup the gdt */
+	movl	%ebx, (0x02 + gdt - entry16)(%ebx)
 	movl	%ebx, %eax
 	shll	$16, %eax
 	
@@ -44,17 +46,17 @@ entry16:
 	andl	$0xff000000, %edx
 	orl	%edx, %ecx
 
-	orl	%eax, 0x08 + gdt
-	orl	%ecx, 0x0c + gdt
-	orl	%eax, 0x10 + gdt
-	orl	%ecx, 0x14 + gdt	
+	orl	%eax, (0x08 + gdt - entry16)(%ebx)
+	orl	%ecx, (0x0c + gdt - entry16)(%ebx)
+	orl	%eax, (0x10 + gdt - entry16)(%ebx)
+	orl	%ecx, (0x14 + gdt - entry16)(%ebx)
 	
 	
 	/* Setup the classic BIOS interrupt table at 0x0 */
-	lidt	idtptr
+	lidt	(idtptr - entry16)(%ebx)
 	
 	/* Provide us with 16bit segments that we can use */
-	lgdt	gdt
+	lgdt	(gdt - entry16)(%ebx)
 
 	/* Note we don't disable the a20 line, (this shouldn't be required)
 	 * The code to do it is in kexec_test and it is a real pain.
@@ -147,7 +149,7 @@ idtptr:
 gdt:
 	/* 0x00 unusable segment so used as the gdt ptr */
 	.word gdt_end - gdt - 1
-	.long gdt
+	.long 0 /* gdt */
 	.word 0
 
 	/* 0x08 16 bit real mode code segment */
diff --git a/purgatory/arch/x86_64/Makefile b/purgatory/arch/x86_64/Makefile
index 22b4228..7300937 100644
--- a/purgatory/arch/x86_64/Makefile
+++ b/purgatory/arch/x86_64/Makefile
@@ -16,9 +16,11 @@ dist += purgatory/arch/x86_64/Makefile $(x86_64_PURGATORY_SRCS_native) 	\
 	purgatory/arch/x86_64/purgatory-x86_64.h
 
 # Don't add sources in i386/ to dist, as i386/Makefile adds them
-x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
+x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/crashdump_backup.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/console-x86.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/vga.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/pic.c
+
+x86_64_PURGATORY_EXTRA_CFLAGS = -mcmodel=large
diff --git a/purgatory/arch/x86_64/entry64-32.S b/purgatory/arch/x86_64/entry64-32.S
index 66f8a85..f2b6377 100644
--- a/purgatory/arch/x86_64/entry64-32.S
+++ b/purgatory/arch/x86_64/entry64-32.S
@@ -24,13 +24,34 @@
 	.equ	CR0_PG,        0x80000000
 
 	.text
+	.balign 16
 	.globl entry32, entry32_regs
 entry32:
 	.code64
 
-	/* Setup a gdt that should that is generally usefully */
+	/* Setup the 4G offset of entry32 lm_exit code segment */
+	movq	$0x00CF9A000000ffff, %rax
+
+	leaq	entry32(%rip), %rbx	/* Low 24 bits */
+	andq	$0xffffff, %rbx
+	shlq	$16, %rbx
+	orq	%rbx, %rax
+
+	leaq	entry32(%rip), %rbx	/* High 8 bits */
+	movq	$0xff000000, %rdx
+	andq	%rdx, %rbx
+	shlq	$32, %rbx
+	orq	%rbx, %rax
+
+	movq	%rax, (gdt + 0x20)(%rip)
+
+	/* Setup a gdt that is generally usefully */
 	lgdt	gdt(%rip)
-		
+
+	/* Setup the far pointer to the entry point */
+	movl	eip(%rip), %eax
+	movl	%eax, entry32_addr(%rip)
+
 	/* Switch to 32bit compatiblity mode */
 	ljmp	*lm_exit_addr(%rip)
 lm_exit:
@@ -60,19 +81,19 @@ lm_exit:
 	movl	%eax, %gs
 
 	/* Load the registers */
-	movl	eax, %eax
-	movl	ecx, %ecx
-	movl	edx, %edx
-	movl	esi, %esi
-	movl	edi, %edi
-	movl	esp, %esp
-	movl	ebp, %ebp
-	movl	ebx, %ebx
+	movl	%cs:eax - entry32, %eax
+	movl	%cs:ecx - entry32, %ecx
+	movl	%cs:edx - entry32, %edx
+	movl	%cs:esi - entry32, %esi
+	movl	%cs:edi - entry32, %edi
+	movl	%cs:esp - entry32, %esp
+	movl	%cs:ebp - entry32, %ebp
+	movl	%cs:ebx - entry32, %ebx
 
 	/* Jump to the loaded image */
-	jmpl	*(eip)
+	ljmp	*%cs:entry32_addr - entry32
 
-	.section ".rodata"
+	.section ".data"
 	.balign 16
 gdt:	/* 0x00 unusable segment 
 	 * 0x08 unused
@@ -88,8 +109,8 @@ gdt:	/* 0x00 unusable segment
 	/* 0x18 4GB flat data segment */
 	.word	0xFFFF, 0x0000, 0x9200, 0x00CF
 
-	/* 0x20 dummy */
-	.word	0x0000, 0x0000, 0x0000, 0x000
+	/* 0x20 4GB flat code segment base at entry32 */
+	.word	0xFFFF, 0x0000, 0x9A00, 0x0CF
 	/* 0x28 dummy */
 	.word	0x0000, 0x0000, 0x0000, 0x000
 	/* 0x30 dummy */
@@ -115,9 +136,15 @@ gdt_end:
 	.section ".rodata"
 	.balign 4
 lm_exit_addr:
-	.long lm_exit
-	.long 0x10		
-	
+	.long lm_exit - entry32
+	.long 0x20
+
+	.section ".data"
+	.balign 4
+entry32_addr:
+	.long 0x00000000
+	.long 0x10
+
 	.section ".rodata"
 	.balign 4
 entry32_regs:  
@@ -129,6 +156,9 @@ esi:	.long 0x00000000
 edi:	.long 0x00000000
 esp:	.long 0x00000000
 ebp:	.long 0x00000000
-eip:	.long entry16
-	.size entry32_regs, . - entry32_regs
+eip:	.quad entry16	/* low 32 bits address
+			 * high 32bits zeros
+			 * uses 64bit reloc
+			 */
+	.size entry32_regs, (. - 4) - entry32_regs
 
diff --git a/purgatory/arch/x86_64/entry64.S b/purgatory/arch/x86_64/entry64.S
index 666023c..e3223b7 100644
--- a/purgatory/arch/x86_64/entry64.S
+++ b/purgatory/arch/x86_64/entry64.S
@@ -37,9 +37,10 @@ entry64:
 	movl	%eax, %fs
 	movl	%eax, %gs
 
-	movq	$stack_init, %rsp
+	leaq	stack_init(%rip), %rsp
 	pushq	$0x10 /* CS */
-	pushq	$new_cs_exit
+	leaq	new_cs_exit(%rip), %rax
+	pushq	%rax
 	lretq
 new_cs_exit:
 
diff --git a/purgatory/arch/x86_64/setup-x86_64.S b/purgatory/arch/x86_64/setup-x86_64.S
index 74997fa..95572d8 100644
--- a/purgatory/arch/x86_64/setup-x86_64.S
+++ b/purgatory/arch/x86_64/setup-x86_64.S
@@ -42,10 +42,10 @@ purgatory_start:
 	/* In 64bit mode the code segment is meaningless */
 
 	movq	0(%rsp), %rax
-	movq	%rax, jump_back_entry
+	movq	%rax, jump_back_entry(%rip)
 
 	/* Setup a stack */
-	movq	$lstack_end, %rsp
+	leaq	lstack_end(%rip), %rsp
 
 	/* Call the C code */
 	call purgatory
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 6/6] kexec, x86_64: put 64bit bzImage high
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (4 preceding siblings ...)
  2012-11-19  3:02                                         ` [PATCH 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G Yinghai Lu
@ 2012-11-19  3:02                                         ` Yinghai Lu
  5 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:02 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

need to make sure pass right 64bit start address to go there directly later.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/x86_64/Makefile          |    1 +
 kexec/arch/x86_64/kexec-bzImage64.c |  316 +++++++++++++++++++++++++++++++++++
 kexec/arch/x86_64/kexec-x86_64.c    |    1 +
 kexec/arch/x86_64/kexec-x86_64.h    |    5 +
 4 files changed, 323 insertions(+), 0 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 0000000..6835ef6
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,316 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebiederm@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <elf.h>
+#include <boot/elf_boot.h>
+#include <ip_checksum.h>
+#include <x86/x86-linux.h>
+#include "../../kexec.h"
+#include "../../kexec-elf.h"
+#include "../../kexec-syscall.h"
+#include "kexec-x86_64.h"
+#include "../i386/x86-linux-setup.h"
+#include "../i386/crashdump-x86.h"
+#include <arch/options.h>
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+	const struct x86_linux_header *header;
+	if ((uintmax_t)len < (uintmax_t)(2 * 512)) {
+		if (probe_debug) {
+			fprintf(stderr, "File is too short to be a bzImage!\n");
+		}
+		return -1;
+	}
+	header = (const struct x86_linux_header *)buf;
+	if (memcmp(header->header_magic, "HdrS", 4) != 0) {
+		if (probe_debug) {
+			fprintf(stderr, "Not a bzImage\n");
+		}
+		return -1;
+	}
+	if (header->boot_sector_magic != 0xAA55) {
+		if (probe_debug) {
+			fprintf(stderr, "No x86 boot sector present\n");
+		}
+		/* No x86 boot sector present */
+		return -1;
+	}
+	if (header->protocol_version < 0x020C) {
+		if (probe_debug) {
+			fprintf(stderr, "Must be at least protocol version 2.12\n");
+		}
+		/* Must be at least protocol version 2.12 */
+		return -1;
+	}
+	if ((header->loadflags & 1) == 0) {
+		if (probe_debug) {
+			fprintf(stderr, "zImage not a bzImage\n");
+		}
+		/* Not a bzImage */
+		return -1;
+	}
+	if (!header->code64_start_offset) {
+		if (probe_debug) {
+			fprintf(stderr, "Not a bzImage64\n");
+		}
+		/* Must have non zero offset */
+		return -1;
+	}
+	if (!header->relocatable_kernel) {
+		if (probe_debug) {
+			fprintf(stderr, "Not a relocatable bzImage64\n");
+		}
+		/* Must be relocatable */
+		return -1;
+	}
+	/* I've got a bzImage64 */
+	if (probe_debug) {
+		fprintf(stderr, "It's a relocatable bzImage64\n");
+	}
+	return 0;
+}
+
+void bzImage64_usage(void)
+{
+	printf(	"    --command-line=STRING Set the kernel command line to STRING.\n"
+		"    --append=STRING       Set the kernel command line to STRING.\n"
+		"    --reuse-cmdline       Use kernel command line from running system.\n"
+		"    --initrd=FILE         Use FILE as the kernel's initial ramdisk.\n"
+		"    --ramdisk=FILE        Use FILE as the kernel's initial ramdisk.\n"
+		);
+}
+
+static int do_bzImage64_load(struct kexec_info *info,
+	const char *kernel, off_t kernel_len,
+	const char *command_line, off_t command_line_len,
+	const char *initrd, off_t initrd_len)
+{
+	struct x86_linux_header setup_header;
+	struct x86_linux_param_header *real_mode;
+	int setup_sects;
+	size_t size;
+	int kern16_size;
+	unsigned long setup_base, setup_size;
+	struct entry64_regs regs64;
+	char *modified_cmdline;
+	unsigned long cmdline_end;
+	unsigned long code64_start_offset = 0;
+	unsigned long kernel64_load_addr = 0;
+
+	/*
+	 * Find out about the file I am about to load.
+	 */
+	if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512))
+		return -1;
+
+	memcpy(&setup_header, kernel, sizeof(setup_header));
+	setup_sects = setup_header.setup_sects;
+	if (setup_sects == 0)
+		setup_sects = 4;
+
+	kern16_size = (setup_sects +1) *512;
+	if (kernel_len < kern16_size) {
+		fprintf(stderr, "BzImage truncated?\n");
+		return -1;
+	}
+
+	if ((uintmax_t)command_line_len > (uintmax_t)setup_header.cmdline_size) {
+		dbgprintf("Kernel command line too long for kernel!\n");
+		return -1;
+	}
+
+	dbgprintf("bzImage is relocatable\n");
+
+	code64_start_offset = setup_header.code64_start_offset;
+	dbgprintf("code64_start_offset: 0x%lx\n", code64_start_offset);
+
+	/* Need to append some command line parameters internally in case of
+	 * taking crash dumps.
+	 */
+	if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
+		modified_cmdline = xmalloc(COMMAND_LINE_SIZE);
+		memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE);
+		if (command_line) {
+			strncpy(modified_cmdline, command_line,
+					COMMAND_LINE_SIZE);
+			modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0';
+		}
+
+		/* If panic kernel is being loaded, additional segments need
+		 * to be created. load_crashdump_segments will take care of
+		 * loading the segments as high in memory as possible, hence
+		 * in turn as away as possible from kernel to avoid being
+		 * stomped by the kernel.
+		 */
+		if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0)
+			return -1;
+
+		/* Use new command line buffer */
+		command_line = modified_cmdline;
+		command_line_len = strlen(command_line) +1;
+	}
+
+	/* x86_64 purgatory could be anywhere */
+	elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size,
+					0x3000, -1, -1, 0);
+	dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr);
+	/* The argument/parameter segment */
+	setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+	real_mode = xmalloc(setup_size);
+	memcpy(real_mode, kernel, kern16_size);
+
+	/* No real mode code will be executing. setup segment can be loaded
+	 * anywhere as we will be just reading command line.
+	 */
+	setup_base = add_buffer(info, real_mode, setup_size, setup_size,
+				16, 0x3000, -1, -1);
+
+	dbgprintf("Loaded setup data and command line at 0x%lx\n",
+			setup_base);
+
+	/* The main kernel segment */
+	size = kernel_len - kern16_size;
+
+	/* align to 1G to avoid cross the PUD_SIZE boundary */
+	/* try above 4G at first */
+	kernel64_load_addr = add_buffer(info, kernel + kern16_size,
+					size, size, 1UL<<30,
+					1UL<<32, ULONG_MAX,
+					-1);
+
+	/* 1G to 4G */
+	if (!kernel64_load_addr)
+		kernel64_load_addr = add_buffer(info, kernel + kern16_size,
+						size, size, 1UL<<30,
+						1UL<<30, 1UL<<32,
+						-1);
+
+	/* under 1g */
+	if (!kernel64_load_addr)
+		kernel64_load_addr = add_buffer(info, kernel + kern16_size,
+						size, size,
+						real_mode->kernel_alignment,
+						0x100000, 1UL<<30,
+						1);
+	if (kernel64_load_addr)
+		dbgprintf("Loaded 64bit kernel at 0x%lx\n", kernel64_load_addr);
+	else
+		die("can not load bzImage64");
+
+	/* Tell the kernel what is going on */
+	setup_linux_bootloader_parameters(info, real_mode, setup_base,
+			kern16_size, command_line, command_line_len,
+			initrd, initrd_len);
+
+	elf_rel_get_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
+	regs64.rbx = 0;           /* Bootstrap processor */
+	regs64.rsi = setup_base;  /* Pointer to the parameters */
+	regs64.rip = kernel64_load_addr + code64_start_offset; /* the entry point */
+	regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */
+	elf_rel_set_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
+
+	cmdline_end = setup_base + kern16_size + command_line_len - 1;
+	elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end,
+			   sizeof(unsigned long));
+
+	/* Fill in the information BIOS calls would normally provide. */
+	setup_linux_system_parameters(real_mode, info->kexec_flags);
+
+	return 0;
+}
+
+int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
+	struct kexec_info *info)
+{
+	char *command_line = NULL;
+	const char *ramdisk, *append = NULL;
+	char *ramdisk_buf;
+	off_t ramdisk_length;
+	int command_line_len;
+	int opt;
+	int result;
+
+	/* See options.h -- add any more there, too. */
+	static const struct option options[] = {
+		KEXEC_ARCH_OPTIONS
+		{ "command-line",	1, 0, OPT_APPEND },
+		{ "append",		1, 0, OPT_APPEND },
+		{ "reuse-cmdline",	0, 0, OPT_REUSE_CMDLINE },
+		{ "initrd",		1, 0, OPT_RAMDISK },
+		{ "ramdisk",		1, 0, OPT_RAMDISK },
+		{ 0,			0, 0, 0 },
+	};
+	static const char short_options[] = KEXEC_ARCH_OPT_STR "d";
+
+	ramdisk = 0;
+	ramdisk_length = 0;
+	while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) {
+		switch(opt) {
+		default:
+			/* Ignore core options */
+			if (opt < OPT_ARCH_MAX) {
+				break;
+			}
+		case '?':
+			usage();
+			return -1;
+			break;
+		case OPT_APPEND:
+			append = optarg;
+			break;
+		case OPT_REUSE_CMDLINE:
+			command_line = get_command_line();
+			break;
+		case OPT_RAMDISK:
+			ramdisk = optarg;
+			break;
+		}
+	}
+	command_line = concat_cmdline(command_line, append);
+	command_line_len = 0;
+	if (command_line) {
+		command_line_len = strlen(command_line) +1;
+	}
+	ramdisk_buf = 0;
+	if (ramdisk) {
+		ramdisk_buf = slurp_file(ramdisk, &ramdisk_length);
+	}
+	result = do_bzImage64_load(info,
+		buf, len,
+		command_line, command_line_len,
+		ramdisk_buf, ramdisk_length);
+
+	free(command_line);
+	return result;
+}
diff --git a/kexec/arch/x86_64/kexec-x86_64.c b/kexec/arch/x86_64/kexec-x86_64.c
index 6c42c32..5c23e01 100644
--- a/kexec/arch/x86_64/kexec-x86_64.c
+++ b/kexec/arch/x86_64/kexec-x86_64.c
@@ -37,6 +37,7 @@ struct file_type file_type[] = {
 	{ "multiboot-x86", multiboot_x86_probe, multiboot_x86_load,
 	  multiboot_x86_usage },
 	{ "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage },
+	{ "bzImage64", bzImage64_probe, bzImage64_load, bzImage64_usage },
 	{ "bzImage", bzImage_probe, bzImage_load, bzImage_usage },
 	{ "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage },
 	{ "nbi-x86", nbi_probe, nbi_load, nbi_usage },
diff --git a/kexec/arch/x86_64/kexec-x86_64.h b/kexec/arch/x86_64/kexec-x86_64.h
index a97cd71..b820ae8 100644
--- a/kexec/arch/x86_64/kexec-x86_64.h
+++ b/kexec/arch/x86_64/kexec-x86_64.h
@@ -28,4 +28,9 @@ int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len,
 	struct kexec_info *info);
 void elf_x86_64_usage(void);
 
+int bzImage64_probe(const char *buf, off_t len);
+int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
+        struct kexec_info *info);
+void bzImage64_usage(void);
+
 #endif /* KEXEC_X86_64_H */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit
  2012-11-18 10:38                                     ` Eric W. Biederman
  2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
@ 2012-11-19  3:04                                       ` Yinghai Lu
  2012-11-19  3:04                                         ` [PATCH v2 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
                                                           ` (6 more replies)
  1 sibling, 7 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
        git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk high acccording
to new added boot header fields.

-v2: remove mem64-min/mem64-max
     add purgartory changes from Eric, that make it relocatable
     put command line above 4g.

Eric W. Biederman (1):
  kexec, x86: Make x64_64 purgatory relocatable above 4G

Yinghai Lu (5):
  kexec, x86: add boot header member for version 2.12
  kexec: don't die during buffer finding
  kexec, x86: put ramdisk high for 64bit bzImage
  kexec, x86: set ext_cmd_line_ptr when boot_param is put high
  kexec, x86_64: put 64bit bzImage high

 include/x86/x86-linux.h                |   22 ++-
 kexec/arch/i386/x86-linux-setup.c      |   40 +++-
 kexec/arch/x86_64/Makefile             |    1 +
 kexec/arch/x86_64/kexec-bzImage64.c    |  316 ++++++++++++++++++++++++++++++++
 kexec/arch/x86_64/kexec-x86_64.c       |    1 +
 kexec/arch/x86_64/kexec-x86_64.h       |    5 +
 kexec/kexec.c                          |    7 +-
 purgatory/arch/i386/entry32-16-debug.S |   22 ++-
 purgatory/arch/i386/entry32-16.S       |   22 ++-
 purgatory/arch/x86_64/Makefile         |    4 +-
 purgatory/arch/x86_64/entry64-32.S     |   68 +++++--
 purgatory/arch/x86_64/entry64.S        |    5 +-
 purgatory/arch/x86_64/setup-x86_64.S   |    4 +-
 13 files changed, 458 insertions(+), 59 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH v2 1/6] kexec, x86: add boot header member for version 2.12
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
@ 2012-11-19  3:04                                         ` Yinghai Lu
  2012-11-19  3:04                                         ` [PATCH v2 2/6] kexec: don't die during buffer finding Yinghai Lu
                                                           ` (5 subsequent siblings)
  6 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

will use ext_ramdisk_image/size, and code64_start_offset

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 include/x86/x86-linux.h           |   22 +++++++++++++++++++++-
 kexec/arch/i386/x86-linux-setup.c |    2 +-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 27af02b..71cd296 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -178,7 +178,17 @@ struct x86_linux_param_header {
 	uint32_t cmdline_size;			/* 0x238 */
 	uint32_t hardware_subarch;		/* 0x23C */
 	uint64_t hardware_subarch_data;		/* 0x240 */
-	uint8_t  reserved16[0x290 - 0x248];	/* 0x248 */
+	uint32_t payload_offset;		/* 0x248 */
+	uint32_t payload_length;		/* 0x24C */
+	uint64_t setup_data;			/* 0x250 */
+	uint64_t pref_address;			/* 0x258 */
+	uint32_t init_size;			/* 0x260 */
+	uint32_t handover_offset;		/* 0x264 */
+	uint32_t ext_ramdisk_image;		/* 0x268 */
+	uint32_t ext_ramdisk_size;		/* 0x26C */
+	uint32_t code64_start_offset;		/* 0x270 */
+	uint32_t ext_cmd_line_ptr;		/* 0x274 */
+	uint8_t  reserved16[0x290 - 0x278];	/* 0x278 */
 	uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];	/* 0x290 */
 #endif
 	struct 	e820entry e820_map[E820MAX];	/* 0x2d0 */
@@ -245,6 +255,16 @@ struct x86_linux_header {
 	uint32_t cmdline_size;                  /* 0x238 */
 	uint32_t hardware_subarch;              /* 0x23C */
 	uint64_t hardware_subarch_data;         /* 0x240 */
+	uint32_t payload_offset;		/* 0x248 */
+	uint32_t payload_length;		/* 0x24C */
+	uint64_t setup_data;			/* 0x250 */
+	uint64_t pref_address;			/* 0x258 */
+	uint32_t init_size;			/* 0x260 */
+	uint32_t handover_offset;		/* 0x264 */
+	uint32_t ext_ramdisk_image;		/* 0x268 */
+	uint32_t ext_ramdisk_size;		/* 0x26C */
+	uint32_t code64_start_offset;		/* 0x270 */
+	uint32_t ext_cmd_line_ptr;		/* 0x274 */
 #endif
 } PACKED;
 
diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..53d9df9 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -41,7 +41,7 @@ void init_linux_parameters(struct x86_linux_param_header *real_mode)
 
 	/* Boot block magic */
 	memcpy(real_mode->header_magic, "HdrS", 4);
-	real_mode->protocol_version = 0x0206;
+	real_mode->protocol_version = 0x020C;
 	real_mode->initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
 	real_mode->cmdline_size = COMMAND_LINE_SIZE;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH v2 2/6] kexec: don't die during buffer finding
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-19  3:04                                         ` [PATCH v2 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
@ 2012-11-19  3:04                                         ` Yinghai Lu
  2012-11-19 17:05                                           ` Eric W. Biederman
  2012-11-19  3:04                                         ` [PATCH v2 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
                                                           ` (4 subsequent siblings)
  6 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

could return 0, and let the caller retry with new ranges.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/kexec.c |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/kexec/kexec.c b/kexec/kexec.c
index 8928be0..0f8aec8 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -366,10 +366,9 @@ unsigned long add_buffer_phys_virt(struct kexec_info *info,
 	memsz = (memsz + (pagesize - 1)) & ~(pagesize - 1);
 
 	base = locate_hole(info, memsz, buf_align, buf_min, buf_max, buf_end);
-	if (base == ULONG_MAX) {
-		die("locate_hole failed\n");
-	}
-	
+	if (base == ULONG_MAX)
+		return 0;
+
 	add_segment_phys_virt(info, buf, bufsz, base, memsz, phys);
 	return base;
 }
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH v2 3/6] kexec, x86: put ramdisk high for 64bit bzImage
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
  2012-11-19  3:04                                         ` [PATCH v2 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
  2012-11-19  3:04                                         ` [PATCH v2 2/6] kexec: don't die during buffer finding Yinghai Lu
@ 2012-11-19  3:04                                         ` Yinghai Lu
  2012-11-19 17:20                                           ` Eric W. Biederman
  2012-11-19  3:04                                         ` [PATCH v2 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
                                                           ` (3 subsequent siblings)
  6 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

only do that for 64bit bzImage, and will fall back to low if fail to get high.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/i386/x86-linux-setup.c |   30 ++++++++++++++++++++++--------
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index 53d9df9..b0e6119 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -69,20 +69,34 @@ void setup_linux_bootloader_parameters(
 	}
 
 	/* Load the initrd if we have one */
+	initrd_base = 0;
 	if (initrd_buf) {
-		initrd_base = add_buffer(info,
-			initrd_buf, initrd_size, initrd_size,
-			4096, INITRD_BASE, initrd_addr_max, -1);
+		if (real_mode->protocol_version >= 0x020c &&
+		    real_mode->code64_start_offset) {
+			initrd_base = add_buffer(info,
+				initrd_buf, initrd_size, initrd_size,
+				4096, 1UL<<32, ULONG_MAX, -1);
+			if (!initrd_base)
+				initrd_base = add_buffer(info,
+					initrd_buf, initrd_size, initrd_size,
+					4096, 1UL<<30, 1UL<<32, -1);
+		}
+		if (!initrd_base)
+			initrd_base = add_buffer(info,
+				initrd_buf, initrd_size, initrd_size,
+				4096, INITRD_BASE, initrd_addr_max, -1);
 		dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base,
 			initrd_size);
-	} else {
-		initrd_base = 0;
+	} else
 		initrd_size = 0;
-	}
 
 	/* Ramdisk address and size */
-	real_mode->initrd_start = initrd_base;
-	real_mode->initrd_size  = initrd_size;
+	real_mode->initrd_start = initrd_base & 0xffffffff;
+	real_mode->initrd_size  = initrd_size & 0xffffffff;
+	if ((initrd_base + initrd_size) > (1ULL<<32)) {
+		real_mode->ext_ramdisk_image = initrd_base >> 32;
+		real_mode->ext_ramdisk_size  = initrd_size >> 32;
+	}
 
 	/* The location of the command line */
 	/* if (real_mode_base == 0x90000) { */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH v2 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (2 preceding siblings ...)
  2012-11-19  3:04                                         ` [PATCH v2 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
@ 2012-11-19  3:04                                         ` Yinghai Lu
  2012-11-19 17:22                                           ` Eric W. Biederman
  2012-11-19  3:04                                         ` [PATCH v2 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G Yinghai Lu
                                                           ` (2 subsequent siblings)
  6 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

only do that for bzImage64, and it could have command line above 4g.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/i386/x86-linux-setup.c |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
index b0e6119..c929166 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -105,7 +105,13 @@ void setup_linux_bootloader_parameters(
 		/* setup_move_size */
 	/* } */
 	if (real_mode->protocol_version >= 0x0202) {
-		real_mode->cmd_line_ptr = real_mode_base + cmdline_offset;
+		unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+		real_mode->cmd_line_ptr = cmd_line_ptr & 0xffffffff;
+		if (real_mode->protocol_version >= 0x020c)
+			real_mode->ext_cmd_line_ptr = cmd_line_ptr >> 32;
+
+		printf("cmd_line_ptr: %lx\n", cmd_line_ptr);
 	}
 
 	/* Fill in the command line */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH v2 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (3 preceding siblings ...)
  2012-11-19  3:04                                         ` [PATCH v2 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
@ 2012-11-19  3:04                                         ` Yinghai Lu
  2012-11-19  3:04                                         ` [PATCH v2 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
  2012-11-19 17:04                                         ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Eric W. Biederman
  6 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

From: "Eric W. Biederman" <ebiederm@xmission.com>

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 purgatory/arch/i386/entry32-16-debug.S |   22 ++++++-----
 purgatory/arch/i386/entry32-16.S       |   22 ++++++-----
 purgatory/arch/x86_64/Makefile         |    4 +-
 purgatory/arch/x86_64/entry64-32.S     |   68 +++++++++++++++++++++++---------
 purgatory/arch/x86_64/entry64.S        |    5 +-
 purgatory/arch/x86_64/setup-x86_64.S   |    4 +-
 6 files changed, 81 insertions(+), 44 deletions(-)

diff --git a/purgatory/arch/i386/entry32-16-debug.S b/purgatory/arch/i386/entry32-16-debug.S
index 82b58ca..2f47607 100644
--- a/purgatory/arch/i386/entry32-16-debug.S
+++ b/purgatory/arch/i386/entry32-16-debug.S
@@ -29,15 +29,17 @@
 	.balign 16
 entry16_debug:
 	.code32
-	/* Compute where I am running at */
-	movl	$entry16_debug, %ebx	
+	/* Compute where I am running at (assumes esp valid) */
+	call	1f
+1:	popl	%ebx
 
 	/* Fixup my real mode segment */
 	movl	%ebx, %eax
 	shrl	$4, %eax
-	movw	%ax, 2 + realptr
+	movw	%ax, (2 + realptr - entry16_debug)(%ebx)
 
 	/* Fixup the gdt */
+	movl	%ebx, (0x02 + gdt - entry16_debug)(%ebx)
 	movl	%ebx, %eax
 	shll	$16, %eax
 	
@@ -49,19 +51,19 @@ entry16_debug:
 	andl	$0xff000000, %edx
 	orl	%edx, %ecx
 
-	orl	%eax, 0x08 + gdt
-	orl	%ecx, 0x0c + gdt
-	orl	%eax, 0x10 + gdt
-	orl	%ecx, 0x14 + gdt	
+	orl	%eax, (0x08 + gdt - entry16_debug)(%ebx)
+	orl	%ecx, (0x0c + gdt - entry16_debug)(%ebx)
+	orl	%eax, (0x10 + gdt - entry16_debug)(%ebx)
+	orl	%ecx, (0x14 + gdt - entry16_debug)(%ebx)
 	
 	
 DEBUG_CHAR('a')
 	/* Setup the classic BIOS interrupt table at 0x0 */
-	lidt	idtptr
+	lidt	(idtptr - entry16_debug)(%ebx)
 
 DEBUG_CHAR('b')
 	/* Provide us with 16bit segments that we can use */
-	lgdt	gdt
+	lgdt	(gdt - entry16_debug)(%ebx)
 
 DEBUG_CHAR('c')
 	/* Note we don't disable the a20 line, (this shouldn't be required)
@@ -160,7 +162,7 @@ idtptr:
 gdt:
 	/* 0x00 unusable segment so used as the gdt ptr */
 	.word gdt_end - gdt - 1
-	.long gdt
+	.long 0 /* gdt */
 	.word 0
 
 	/* 0x08 16 bit real mode code segment */
diff --git a/purgatory/arch/i386/entry32-16.S b/purgatory/arch/i386/entry32-16.S
index aaf1273..20a1ce6 100644
--- a/purgatory/arch/i386/entry32-16.S
+++ b/purgatory/arch/i386/entry32-16.S
@@ -24,15 +24,17 @@
 	.balign 16
 entry16:
 	.code32
-	/* Compute where I am running at */
-	movl	$entry16, %ebx
+	/* Compute where I am running at (assumes esp valid) */
+	call	1f
+1:	popl	%ebx
 
 	/* Fixup my real mode segment */
 	movl	%ebx, %eax
 	shrl	$4, %eax
-	movw	%ax, 2 + realptr
+	movw	%ax, (2 + realptr - entry16)(%ebx)
 
 	/* Fixup the gdt */
+	movl	%ebx, (0x02 + gdt - entry16)(%ebx)
 	movl	%ebx, %eax
 	shll	$16, %eax
 	
@@ -44,17 +46,17 @@ entry16:
 	andl	$0xff000000, %edx
 	orl	%edx, %ecx
 
-	orl	%eax, 0x08 + gdt
-	orl	%ecx, 0x0c + gdt
-	orl	%eax, 0x10 + gdt
-	orl	%ecx, 0x14 + gdt	
+	orl	%eax, (0x08 + gdt - entry16)(%ebx)
+	orl	%ecx, (0x0c + gdt - entry16)(%ebx)
+	orl	%eax, (0x10 + gdt - entry16)(%ebx)
+	orl	%ecx, (0x14 + gdt - entry16)(%ebx)
 	
 	
 	/* Setup the classic BIOS interrupt table at 0x0 */
-	lidt	idtptr
+	lidt	(idtptr - entry16)(%ebx)
 	
 	/* Provide us with 16bit segments that we can use */
-	lgdt	gdt
+	lgdt	(gdt - entry16)(%ebx)
 
 	/* Note we don't disable the a20 line, (this shouldn't be required)
 	 * The code to do it is in kexec_test and it is a real pain.
@@ -147,7 +149,7 @@ idtptr:
 gdt:
 	/* 0x00 unusable segment so used as the gdt ptr */
 	.word gdt_end - gdt - 1
-	.long gdt
+	.long 0 /* gdt */
 	.word 0
 
 	/* 0x08 16 bit real mode code segment */
diff --git a/purgatory/arch/x86_64/Makefile b/purgatory/arch/x86_64/Makefile
index 22b4228..7300937 100644
--- a/purgatory/arch/x86_64/Makefile
+++ b/purgatory/arch/x86_64/Makefile
@@ -16,9 +16,11 @@ dist += purgatory/arch/x86_64/Makefile $(x86_64_PURGATORY_SRCS_native) 	\
 	purgatory/arch/x86_64/purgatory-x86_64.h
 
 # Don't add sources in i386/ to dist, as i386/Makefile adds them
-x86_64_PURGATORY_SRCS +=  purgatory/arch/i386/entry32-16.S
+x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/entry32-16-debug.S
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/crashdump_backup.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/console-x86.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/vga.c
 x86_64_PURGATORY_SRCS += purgatory/arch/i386/pic.c
+
+x86_64_PURGATORY_EXTRA_CFLAGS = -mcmodel=large
diff --git a/purgatory/arch/x86_64/entry64-32.S b/purgatory/arch/x86_64/entry64-32.S
index 66f8a85..f2b6377 100644
--- a/purgatory/arch/x86_64/entry64-32.S
+++ b/purgatory/arch/x86_64/entry64-32.S
@@ -24,13 +24,34 @@
 	.equ	CR0_PG,        0x80000000
 
 	.text
+	.balign 16
 	.globl entry32, entry32_regs
 entry32:
 	.code64
 
-	/* Setup a gdt that should that is generally usefully */
+	/* Setup the 4G offset of entry32 lm_exit code segment */
+	movq	$0x00CF9A000000ffff, %rax
+
+	leaq	entry32(%rip), %rbx	/* Low 24 bits */
+	andq	$0xffffff, %rbx
+	shlq	$16, %rbx
+	orq	%rbx, %rax
+
+	leaq	entry32(%rip), %rbx	/* High 8 bits */
+	movq	$0xff000000, %rdx
+	andq	%rdx, %rbx
+	shlq	$32, %rbx
+	orq	%rbx, %rax
+
+	movq	%rax, (gdt + 0x20)(%rip)
+
+	/* Setup a gdt that is generally usefully */
 	lgdt	gdt(%rip)
-		
+
+	/* Setup the far pointer to the entry point */
+	movl	eip(%rip), %eax
+	movl	%eax, entry32_addr(%rip)
+
 	/* Switch to 32bit compatiblity mode */
 	ljmp	*lm_exit_addr(%rip)
 lm_exit:
@@ -60,19 +81,19 @@ lm_exit:
 	movl	%eax, %gs
 
 	/* Load the registers */
-	movl	eax, %eax
-	movl	ecx, %ecx
-	movl	edx, %edx
-	movl	esi, %esi
-	movl	edi, %edi
-	movl	esp, %esp
-	movl	ebp, %ebp
-	movl	ebx, %ebx
+	movl	%cs:eax - entry32, %eax
+	movl	%cs:ecx - entry32, %ecx
+	movl	%cs:edx - entry32, %edx
+	movl	%cs:esi - entry32, %esi
+	movl	%cs:edi - entry32, %edi
+	movl	%cs:esp - entry32, %esp
+	movl	%cs:ebp - entry32, %ebp
+	movl	%cs:ebx - entry32, %ebx
 
 	/* Jump to the loaded image */
-	jmpl	*(eip)
+	ljmp	*%cs:entry32_addr - entry32
 
-	.section ".rodata"
+	.section ".data"
 	.balign 16
 gdt:	/* 0x00 unusable segment 
 	 * 0x08 unused
@@ -88,8 +109,8 @@ gdt:	/* 0x00 unusable segment
 	/* 0x18 4GB flat data segment */
 	.word	0xFFFF, 0x0000, 0x9200, 0x00CF
 
-	/* 0x20 dummy */
-	.word	0x0000, 0x0000, 0x0000, 0x000
+	/* 0x20 4GB flat code segment base at entry32 */
+	.word	0xFFFF, 0x0000, 0x9A00, 0x0CF
 	/* 0x28 dummy */
 	.word	0x0000, 0x0000, 0x0000, 0x000
 	/* 0x30 dummy */
@@ -115,9 +136,15 @@ gdt_end:
 	.section ".rodata"
 	.balign 4
 lm_exit_addr:
-	.long lm_exit
-	.long 0x10		
-	
+	.long lm_exit - entry32
+	.long 0x20
+
+	.section ".data"
+	.balign 4
+entry32_addr:
+	.long 0x00000000
+	.long 0x10
+
 	.section ".rodata"
 	.balign 4
 entry32_regs:  
@@ -129,6 +156,9 @@ esi:	.long 0x00000000
 edi:	.long 0x00000000
 esp:	.long 0x00000000
 ebp:	.long 0x00000000
-eip:	.long entry16
-	.size entry32_regs, . - entry32_regs
+eip:	.quad entry16	/* low 32 bits address
+			 * high 32bits zeros
+			 * uses 64bit reloc
+			 */
+	.size entry32_regs, (. - 4) - entry32_regs
 
diff --git a/purgatory/arch/x86_64/entry64.S b/purgatory/arch/x86_64/entry64.S
index 666023c..e3223b7 100644
--- a/purgatory/arch/x86_64/entry64.S
+++ b/purgatory/arch/x86_64/entry64.S
@@ -37,9 +37,10 @@ entry64:
 	movl	%eax, %fs
 	movl	%eax, %gs
 
-	movq	$stack_init, %rsp
+	leaq	stack_init(%rip), %rsp
 	pushq	$0x10 /* CS */
-	pushq	$new_cs_exit
+	leaq	new_cs_exit(%rip), %rax
+	pushq	%rax
 	lretq
 new_cs_exit:
 
diff --git a/purgatory/arch/x86_64/setup-x86_64.S b/purgatory/arch/x86_64/setup-x86_64.S
index 74997fa..95572d8 100644
--- a/purgatory/arch/x86_64/setup-x86_64.S
+++ b/purgatory/arch/x86_64/setup-x86_64.S
@@ -42,10 +42,10 @@ purgatory_start:
 	/* In 64bit mode the code segment is meaningless */
 
 	movq	0(%rsp), %rax
-	movq	%rax, jump_back_entry
+	movq	%rax, jump_back_entry(%rip)
 
 	/* Setup a stack */
-	movq	$lstack_end, %rsp
+	leaq	lstack_end(%rip), %rsp
 
 	/* Call the C code */
 	call purgatory
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH v2 6/6] kexec, x86_64: put 64bit bzImage high
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (4 preceding siblings ...)
  2012-11-19  3:04                                         ` [PATCH v2 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G Yinghai Lu
@ 2012-11-19  3:04                                         ` Yinghai Lu
  2012-11-19 17:28                                           ` Eric W. Biederman
  2012-11-19 17:04                                         ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Eric W. Biederman
  6 siblings, 1 reply; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19  3:04 UTC (permalink / raw)
  To: Simon Horman, H. Peter Anvin, Vivek Goyal, Haren Myneni,
	Eric W. Biederman
  Cc: Yinghai Lu, kexec

need to make sure pass right 64bit start address to go there directly later.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 kexec/arch/x86_64/Makefile          |    1 +
 kexec/arch/x86_64/kexec-bzImage64.c |  316 +++++++++++++++++++++++++++++++++++
 kexec/arch/x86_64/kexec-x86_64.c    |    1 +
 kexec/arch/x86_64/kexec-x86_64.h    |    5 +
 4 files changed, 323 insertions(+), 0 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 0000000..6835ef6
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,316 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebiederm@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <elf.h>
+#include <boot/elf_boot.h>
+#include <ip_checksum.h>
+#include <x86/x86-linux.h>
+#include "../../kexec.h"
+#include "../../kexec-elf.h"
+#include "../../kexec-syscall.h"
+#include "kexec-x86_64.h"
+#include "../i386/x86-linux-setup.h"
+#include "../i386/crashdump-x86.h"
+#include <arch/options.h>
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+	const struct x86_linux_header *header;
+	if ((uintmax_t)len < (uintmax_t)(2 * 512)) {
+		if (probe_debug) {
+			fprintf(stderr, "File is too short to be a bzImage!\n");
+		}
+		return -1;
+	}
+	header = (const struct x86_linux_header *)buf;
+	if (memcmp(header->header_magic, "HdrS", 4) != 0) {
+		if (probe_debug) {
+			fprintf(stderr, "Not a bzImage\n");
+		}
+		return -1;
+	}
+	if (header->boot_sector_magic != 0xAA55) {
+		if (probe_debug) {
+			fprintf(stderr, "No x86 boot sector present\n");
+		}
+		/* No x86 boot sector present */
+		return -1;
+	}
+	if (header->protocol_version < 0x020C) {
+		if (probe_debug) {
+			fprintf(stderr, "Must be at least protocol version 2.12\n");
+		}
+		/* Must be at least protocol version 2.12 */
+		return -1;
+	}
+	if ((header->loadflags & 1) == 0) {
+		if (probe_debug) {
+			fprintf(stderr, "zImage not a bzImage\n");
+		}
+		/* Not a bzImage */
+		return -1;
+	}
+	if (!header->code64_start_offset) {
+		if (probe_debug) {
+			fprintf(stderr, "Not a bzImage64\n");
+		}
+		/* Must have non zero offset */
+		return -1;
+	}
+	if (!header->relocatable_kernel) {
+		if (probe_debug) {
+			fprintf(stderr, "Not a relocatable bzImage64\n");
+		}
+		/* Must be relocatable */
+		return -1;
+	}
+	/* I've got a bzImage64 */
+	if (probe_debug) {
+		fprintf(stderr, "It's a relocatable bzImage64\n");
+	}
+	return 0;
+}
+
+void bzImage64_usage(void)
+{
+	printf(	"    --command-line=STRING Set the kernel command line to STRING.\n"
+		"    --append=STRING       Set the kernel command line to STRING.\n"
+		"    --reuse-cmdline       Use kernel command line from running system.\n"
+		"    --initrd=FILE         Use FILE as the kernel's initial ramdisk.\n"
+		"    --ramdisk=FILE        Use FILE as the kernel's initial ramdisk.\n"
+		);
+}
+
+static int do_bzImage64_load(struct kexec_info *info,
+	const char *kernel, off_t kernel_len,
+	const char *command_line, off_t command_line_len,
+	const char *initrd, off_t initrd_len)
+{
+	struct x86_linux_header setup_header;
+	struct x86_linux_param_header *real_mode;
+	int setup_sects;
+	size_t size;
+	int kern16_size;
+	unsigned long setup_base, setup_size;
+	struct entry64_regs regs64;
+	char *modified_cmdline;
+	unsigned long cmdline_end;
+	unsigned long code64_start_offset = 0;
+	unsigned long kernel64_load_addr = 0;
+
+	/*
+	 * Find out about the file I am about to load.
+	 */
+	if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512))
+		return -1;
+
+	memcpy(&setup_header, kernel, sizeof(setup_header));
+	setup_sects = setup_header.setup_sects;
+	if (setup_sects == 0)
+		setup_sects = 4;
+
+	kern16_size = (setup_sects +1) *512;
+	if (kernel_len < kern16_size) {
+		fprintf(stderr, "BzImage truncated?\n");
+		return -1;
+	}
+
+	if ((uintmax_t)command_line_len > (uintmax_t)setup_header.cmdline_size) {
+		dbgprintf("Kernel command line too long for kernel!\n");
+		return -1;
+	}
+
+	dbgprintf("bzImage is relocatable\n");
+
+	code64_start_offset = setup_header.code64_start_offset;
+	dbgprintf("code64_start_offset: 0x%lx\n", code64_start_offset);
+
+	/* Need to append some command line parameters internally in case of
+	 * taking crash dumps.
+	 */
+	if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
+		modified_cmdline = xmalloc(COMMAND_LINE_SIZE);
+		memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE);
+		if (command_line) {
+			strncpy(modified_cmdline, command_line,
+					COMMAND_LINE_SIZE);
+			modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0';
+		}
+
+		/* If panic kernel is being loaded, additional segments need
+		 * to be created. load_crashdump_segments will take care of
+		 * loading the segments as high in memory as possible, hence
+		 * in turn as away as possible from kernel to avoid being
+		 * stomped by the kernel.
+		 */
+		if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0)
+			return -1;
+
+		/* Use new command line buffer */
+		command_line = modified_cmdline;
+		command_line_len = strlen(command_line) +1;
+	}
+
+	/* x86_64 purgatory could be anywhere */
+	elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size,
+					0x3000, -1, -1, 0);
+	dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr);
+	/* The argument/parameter segment */
+	setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+	real_mode = xmalloc(setup_size);
+	memcpy(real_mode, kernel, kern16_size);
+
+	/* No real mode code will be executing. setup segment can be loaded
+	 * anywhere as we will be just reading command line.
+	 */
+	setup_base = add_buffer(info, real_mode, setup_size, setup_size,
+				16, 0x3000, -1, -1);
+
+	dbgprintf("Loaded setup data and command line at 0x%lx\n",
+			setup_base);
+
+	/* The main kernel segment */
+	size = kernel_len - kern16_size;
+
+	/* align to 1G to avoid cross the PUD_SIZE boundary */
+	/* try above 4G at first */
+	kernel64_load_addr = add_buffer(info, kernel + kern16_size,
+					size, size, 1UL<<30,
+					1UL<<32, ULONG_MAX,
+					-1);
+
+	/* 1G to 4G */
+	if (!kernel64_load_addr)
+		kernel64_load_addr = add_buffer(info, kernel + kern16_size,
+						size, size, 1UL<<30,
+						1UL<<30, 1UL<<32,
+						-1);
+
+	/* under 1g */
+	if (!kernel64_load_addr)
+		kernel64_load_addr = add_buffer(info, kernel + kern16_size,
+						size, size,
+						real_mode->kernel_alignment,
+						0x100000, 1UL<<30,
+						1);
+	if (kernel64_load_addr)
+		dbgprintf("Loaded 64bit kernel at 0x%lx\n", kernel64_load_addr);
+	else
+		die("can not load bzImage64");
+
+	/* Tell the kernel what is going on */
+	setup_linux_bootloader_parameters(info, real_mode, setup_base,
+			kern16_size, command_line, command_line_len,
+			initrd, initrd_len);
+
+	elf_rel_get_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
+	regs64.rbx = 0;           /* Bootstrap processor */
+	regs64.rsi = setup_base;  /* Pointer to the parameters */
+	regs64.rip = kernel64_load_addr + code64_start_offset; /* the entry point */
+	regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */
+	elf_rel_set_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
+
+	cmdline_end = setup_base + kern16_size + command_line_len - 1;
+	elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end,
+			   sizeof(unsigned long));
+
+	/* Fill in the information BIOS calls would normally provide. */
+	setup_linux_system_parameters(real_mode, info->kexec_flags);
+
+	return 0;
+}
+
+int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
+	struct kexec_info *info)
+{
+	char *command_line = NULL;
+	const char *ramdisk, *append = NULL;
+	char *ramdisk_buf;
+	off_t ramdisk_length;
+	int command_line_len;
+	int opt;
+	int result;
+
+	/* See options.h -- add any more there, too. */
+	static const struct option options[] = {
+		KEXEC_ARCH_OPTIONS
+		{ "command-line",	1, 0, OPT_APPEND },
+		{ "append",		1, 0, OPT_APPEND },
+		{ "reuse-cmdline",	0, 0, OPT_REUSE_CMDLINE },
+		{ "initrd",		1, 0, OPT_RAMDISK },
+		{ "ramdisk",		1, 0, OPT_RAMDISK },
+		{ 0,			0, 0, 0 },
+	};
+	static const char short_options[] = KEXEC_ARCH_OPT_STR "d";
+
+	ramdisk = 0;
+	ramdisk_length = 0;
+	while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) {
+		switch(opt) {
+		default:
+			/* Ignore core options */
+			if (opt < OPT_ARCH_MAX) {
+				break;
+			}
+		case '?':
+			usage();
+			return -1;
+			break;
+		case OPT_APPEND:
+			append = optarg;
+			break;
+		case OPT_REUSE_CMDLINE:
+			command_line = get_command_line();
+			break;
+		case OPT_RAMDISK:
+			ramdisk = optarg;
+			break;
+		}
+	}
+	command_line = concat_cmdline(command_line, append);
+	command_line_len = 0;
+	if (command_line) {
+		command_line_len = strlen(command_line) +1;
+	}
+	ramdisk_buf = 0;
+	if (ramdisk) {
+		ramdisk_buf = slurp_file(ramdisk, &ramdisk_length);
+	}
+	result = do_bzImage64_load(info,
+		buf, len,
+		command_line, command_line_len,
+		ramdisk_buf, ramdisk_length);
+
+	free(command_line);
+	return result;
+}
diff --git a/kexec/arch/x86_64/kexec-x86_64.c b/kexec/arch/x86_64/kexec-x86_64.c
index 6c42c32..5c23e01 100644
--- a/kexec/arch/x86_64/kexec-x86_64.c
+++ b/kexec/arch/x86_64/kexec-x86_64.c
@@ -37,6 +37,7 @@ struct file_type file_type[] = {
 	{ "multiboot-x86", multiboot_x86_probe, multiboot_x86_load,
 	  multiboot_x86_usage },
 	{ "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage },
+	{ "bzImage64", bzImage64_probe, bzImage64_load, bzImage64_usage },
 	{ "bzImage", bzImage_probe, bzImage_load, bzImage_usage },
 	{ "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage },
 	{ "nbi-x86", nbi_probe, nbi_load, nbi_usage },
diff --git a/kexec/arch/x86_64/kexec-x86_64.h b/kexec/arch/x86_64/kexec-x86_64.h
index a97cd71..b820ae8 100644
--- a/kexec/arch/x86_64/kexec-x86_64.h
+++ b/kexec/arch/x86_64/kexec-x86_64.h
@@ -28,4 +28,9 @@ int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len,
 	struct kexec_info *info);
 void elf_x86_64_usage(void);
 
+int bzImage64_probe(const char *buf, off_t len);
+int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
+        struct kexec_info *info);
+void bzImage64_usage(void);
+
 #endif /* KEXEC_X86_64_H */
-- 
1.7.7


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit
  2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                                                           ` (5 preceding siblings ...)
  2012-11-19  3:04                                         ` [PATCH v2 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
@ 2012-11-19 17:04                                         ` Eric W. Biederman
  6 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-19 17:04 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> Now we have limit kdump reserved under 896M, because kexec has the limitation.
> and also bzImage need to stay under 4g.

So this patchset is looking  a lot better.  I still have some nits to
pick with it.

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 2/6] kexec: don't die during buffer finding
  2012-11-19  3:04                                         ` [PATCH v2 2/6] kexec: don't die during buffer finding Yinghai Lu
@ 2012-11-19 17:05                                           ` Eric W. Biederman
  0 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-19 17:05 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> could return 0, and let the caller retry with new ranges.

And in 3 lines of code you have broken. all callers of
add_buffer_phys_virt.

If you are going to remove the die here you need to push it into all of
the callers that don't have any other error handling.

Eric

> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
>  kexec/kexec.c |    7 +++----
>  1 files changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/kexec/kexec.c b/kexec/kexec.c
> index 8928be0..0f8aec8 100644
> --- a/kexec/kexec.c
> +++ b/kexec/kexec.c
> @@ -366,10 +366,9 @@ unsigned long add_buffer_phys_virt(struct kexec_info *info,
>  	memsz = (memsz + (pagesize - 1)) & ~(pagesize - 1);
>  
>  	base = locate_hole(info, memsz, buf_align, buf_min, buf_max, buf_end);
> -	if (base == ULONG_MAX) {
> -		die("locate_hole failed\n");
> -	}
> -	
> +	if (base == ULONG_MAX)
> +		return 0;
> +
>  	add_segment_phys_virt(info, buf, bufsz, base, memsz, phys);
>  	return base;
>  }

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 3/6] kexec, x86: put ramdisk high for 64bit bzImage
  2012-11-19  3:04                                         ` [PATCH v2 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
@ 2012-11-19 17:20                                           ` Eric W. Biederman
  0 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-19 17:20 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> only do that for 64bit bzImage, and will fall back to low if fail to
> get high.

The way you have modified the code is silly.

You should be able to do this with one add_buffer call.  Making
the breaking of all of the add_buffer callers unnecessary.
You just need to supply a larger range to add_buffer.

You can get a wider range to add_buffer by modifying the initialization
of initrd_addr_max to account for boot protocol 0x020c.

> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
>  kexec/arch/i386/x86-linux-setup.c |   30 ++++++++++++++++++++++--------
>  1 files changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
> index 53d9df9..b0e6119 100644
> --- a/kexec/arch/i386/x86-linux-setup.c
> +++ b/kexec/arch/i386/x86-linux-setup.c
> @@ -69,20 +69,34 @@ void setup_linux_bootloader_parameters(
>  	}
>  
>  	/* Load the initrd if we have one */
> +	initrd_base = 0;
>  	if (initrd_buf) {
> -		initrd_base = add_buffer(info,
> -			initrd_buf, initrd_size, initrd_size,
> -			4096, INITRD_BASE, initrd_addr_max, -1);
> +		if (real_mode->protocol_version >= 0x020c &&
> +		    real_mode->code64_start_offset) {
> +			initrd_base = add_buffer(info,
> +				initrd_buf, initrd_size, initrd_size,
> +				4096, 1UL<<32, ULONG_MAX, -1);
> +			if (!initrd_base)
> +				initrd_base = add_buffer(info,
> +					initrd_buf, initrd_size, initrd_size,
> +					4096, 1UL<<30, 1UL<<32, -1);



> +		}
> +		if (!initrd_base)
> +			initrd_base = add_buffer(info,
> +				initrd_buf, initrd_size, initrd_size,
> +				4096, INITRD_BASE, initrd_addr_max, -1);
>  		dbgprintf("Loaded initrd at 0x%lx size 0x%lx\n", initrd_base,
>  			initrd_size);
> -	} else {
> -		initrd_base = 0;
> +	} else
>  		initrd_size = 0;
> -	}
>  
>  	/* Ramdisk address and size */
> -	real_mode->initrd_start = initrd_base;
> -	real_mode->initrd_size  = initrd_size;
> +	real_mode->initrd_start = initrd_base & 0xffffffff;
> +	real_mode->initrd_size  = initrd_size & 0xffffffff;
> +	if ((initrd_base + initrd_size) > (1ULL<<32)) {
> +		real_mode->ext_ramdisk_image = initrd_base >> 32;
> +		real_mode->ext_ramdisk_size  = initrd_size >> 32;
> +	}

And this needs to compile 32bit which where 1ULL<<32 doesn't work.  So I
suggest you make the code look like:

+	if (real_mode->protocol_version >= 0x020c &&
+	    (initrd_base & 0xffffffffUL) != initrd_base)
+		real_mode->ext_ramdisk_image = initrd_base >> 32;
+
+	if (real_mode->protocol_version >= 0x020c &&
+	    (initrd_size & 0xffffffffUL) != initrd_size)
+		real_mode->ext_ramdisk_size  = initrd_size >> 32;

Eric


>  	/* The location of the command line */
>  	/* if (real_mode_base == 0x90000) { */

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high
  2012-11-19  3:04                                         ` [PATCH v2 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
@ 2012-11-19 17:22                                           ` Eric W. Biederman
  0 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-19 17:22 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> only do that for bzImage64, and it could have command line above 4g.

Just a small nit.
>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
>  kexec/arch/i386/x86-linux-setup.c |    8 +++++++-
>  1 files changed, 7 insertions(+), 1 deletions(-)
>
> diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c
> index b0e6119..c929166 100644
> --- a/kexec/arch/i386/x86-linux-setup.c
> +++ b/kexec/arch/i386/x86-linux-setup.c
> @@ -105,7 +105,13 @@ void setup_linux_bootloader_parameters(
>  		/* setup_move_size */
>  	/* } */
>  	if (real_mode->protocol_version >= 0x0202) {
> -		real_mode->cmd_line_ptr = real_mode_base + cmdline_offset;
> +		unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
> +
> +		real_mode->cmd_line_ptr = cmd_line_ptr & 0xffffffff;
> +		if (real_mode->protocol_version >= 0x020c)

It probably makes sense to guard this with a test to see if the
cmd_line_ptr value exceeds 4G before doing the >> 32 in case a 32bit
kernel uses boot protocol 0x020c and we have a 32bit kexec.

+		if ((real_mode->protocol_version >= 0x020c) &&
+		    ((cmd_line_ptr & 0xffffffffUL) != cmd_line_ptr))
		
> +			real_mode->ext_cmd_line_ptr = cmd_line_ptr >> 32;
> +
> +		printf("cmd_line_ptr: %lx\n", cmd_line_ptr);
>  	}
>  
>  	/* Fill in the command line */

Eric

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 6/6] kexec, x86_64: put 64bit bzImage high
  2012-11-19  3:04                                         ` [PATCH v2 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
@ 2012-11-19 17:28                                           ` Eric W. Biederman
  0 siblings, 0 replies; 60+ messages in thread
From: Eric W. Biederman @ 2012-11-19 17:28 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Haren Myneni, Simon Horman, kexec, Vivek Goyal, H. Peter Anvin

Yinghai Lu <yinghai@kernel.org> writes:

> need to make sure pass right 64bit start address to go there directly
> later.

Instead of playing games with the definition of add_buffer and calling
add_buffer multiple times you can directly call locate_hole, to find
the hole you want and then you can call add_buffer when you have
found the hole you want to load the kernel in with very tight min
and max values that you will know will work.

Eric

> -v2: add kexec-bzImage64.c according to Eric.
> -v3: don't need to purgatory under 2g after Eric's change to purgatory code.
>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
>  kexec/arch/x86_64/Makefile          |    1 +
>  kexec/arch/x86_64/kexec-bzImage64.c |  316 +++++++++++++++++++++++++++++++++++
>  kexec/arch/x86_64/kexec-x86_64.c    |    1 +
>  kexec/arch/x86_64/kexec-x86_64.h    |    5 +
>  4 files changed, 323 insertions(+), 0 deletions(-)
>  create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c
>
> diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
> index 405bdf5..1cf10f9 100644
> --- a/kexec/arch/x86_64/Makefile
> +++ b/kexec/arch/x86_64/Makefile
> @@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
>  x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
>  x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
>  x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
> +x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
>  
>  x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
>  
> diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c
> new file mode 100644
> index 0000000..6835ef6
> --- /dev/null
> +++ b/kexec/arch/x86_64/kexec-bzImage64.c
> @@ -0,0 +1,316 @@
> +/*
> + * kexec: Linux boots Linux
> + *
> + * Copyright (C) 2003-2010  Eric Biederman (ebiederm@xmission.com)
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation (version 2 of the License).
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + */
> +
> +#define _GNU_SOURCE
> +#include <stddef.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <errno.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <getopt.h>
> +#include <elf.h>
> +#include <boot/elf_boot.h>
> +#include <ip_checksum.h>
> +#include <x86/x86-linux.h>
> +#include "../../kexec.h"
> +#include "../../kexec-elf.h"
> +#include "../../kexec-syscall.h"
> +#include "kexec-x86_64.h"
> +#include "../i386/x86-linux-setup.h"
> +#include "../i386/crashdump-x86.h"
> +#include <arch/options.h>
> +
> +static const int probe_debug = 0;
> +
> +int bzImage64_probe(const char *buf, off_t len)
> +{
> +	const struct x86_linux_header *header;
> +	if ((uintmax_t)len < (uintmax_t)(2 * 512)) {
> +		if (probe_debug) {
> +			fprintf(stderr, "File is too short to be a bzImage!\n");
> +		}
> +		return -1;
> +	}
> +	header = (const struct x86_linux_header *)buf;
> +	if (memcmp(header->header_magic, "HdrS", 4) != 0) {
> +		if (probe_debug) {
> +			fprintf(stderr, "Not a bzImage\n");
> +		}
> +		return -1;
> +	}
> +	if (header->boot_sector_magic != 0xAA55) {
> +		if (probe_debug) {
> +			fprintf(stderr, "No x86 boot sector present\n");
> +		}
> +		/* No x86 boot sector present */
> +		return -1;
> +	}
> +	if (header->protocol_version < 0x020C) {
> +		if (probe_debug) {
> +			fprintf(stderr, "Must be at least protocol version 2.12\n");
> +		}
> +		/* Must be at least protocol version 2.12 */
> +		return -1;
> +	}
> +	if ((header->loadflags & 1) == 0) {
> +		if (probe_debug) {
> +			fprintf(stderr, "zImage not a bzImage\n");
> +		}
> +		/* Not a bzImage */
> +		return -1;
> +	}
> +	if (!header->code64_start_offset) {
> +		if (probe_debug) {
> +			fprintf(stderr, "Not a bzImage64\n");
> +		}
> +		/* Must have non zero offset */
> +		return -1;
> +	}
> +	if (!header->relocatable_kernel) {
> +		if (probe_debug) {
> +			fprintf(stderr, "Not a relocatable bzImage64\n");
> +		}
> +		/* Must be relocatable */
> +		return -1;
> +	}
> +	/* I've got a bzImage64 */
> +	if (probe_debug) {
> +		fprintf(stderr, "It's a relocatable bzImage64\n");
> +	}
> +	return 0;
> +}
> +
> +void bzImage64_usage(void)
> +{
> +	printf(	"    --command-line=STRING Set the kernel command line to STRING.\n"
> +		"    --append=STRING       Set the kernel command line to STRING.\n"
> +		"    --reuse-cmdline       Use kernel command line from running system.\n"
> +		"    --initrd=FILE         Use FILE as the kernel's initial ramdisk.\n"
> +		"    --ramdisk=FILE        Use FILE as the kernel's initial ramdisk.\n"
> +		);
> +}
> +
> +static int do_bzImage64_load(struct kexec_info *info,
> +	const char *kernel, off_t kernel_len,
> +	const char *command_line, off_t command_line_len,
> +	const char *initrd, off_t initrd_len)
> +{
> +	struct x86_linux_header setup_header;
> +	struct x86_linux_param_header *real_mode;
> +	int setup_sects;
> +	size_t size;
> +	int kern16_size;
> +	unsigned long setup_base, setup_size;
> +	struct entry64_regs regs64;
> +	char *modified_cmdline;
> +	unsigned long cmdline_end;
> +	unsigned long code64_start_offset = 0;
> +	unsigned long kernel64_load_addr = 0;
> +
> +	/*
> +	 * Find out about the file I am about to load.
> +	 */
> +	if ((uintmax_t)kernel_len < (uintmax_t)(2 * 512))
> +		return -1;
> +
> +	memcpy(&setup_header, kernel, sizeof(setup_header));
> +	setup_sects = setup_header.setup_sects;
> +	if (setup_sects == 0)
> +		setup_sects = 4;
> +
> +	kern16_size = (setup_sects +1) *512;
> +	if (kernel_len < kern16_size) {
> +		fprintf(stderr, "BzImage truncated?\n");
> +		return -1;
> +	}
> +
> +	if ((uintmax_t)command_line_len > (uintmax_t)setup_header.cmdline_size) {
> +		dbgprintf("Kernel command line too long for kernel!\n");
> +		return -1;
> +	}
> +
> +	dbgprintf("bzImage is relocatable\n");
> +
> +	code64_start_offset = setup_header.code64_start_offset;
> +	dbgprintf("code64_start_offset: 0x%lx\n", code64_start_offset);
> +
> +	/* Need to append some command line parameters internally in case of
> +	 * taking crash dumps.
> +	 */
> +	if (info->kexec_flags & (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
> +		modified_cmdline = xmalloc(COMMAND_LINE_SIZE);
> +		memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE);
> +		if (command_line) {
> +			strncpy(modified_cmdline, command_line,
> +					COMMAND_LINE_SIZE);
> +			modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0';
> +		}
> +
> +		/* If panic kernel is being loaded, additional segments need
> +		 * to be created. load_crashdump_segments will take care of
> +		 * loading the segments as high in memory as possible, hence
> +		 * in turn as away as possible from kernel to avoid being
> +		 * stomped by the kernel.
> +		 */
> +		if (load_crashdump_segments(info, modified_cmdline, -1, 0) < 0)
> +			return -1;
> +
> +		/* Use new command line buffer */
> +		command_line = modified_cmdline;
> +		command_line_len = strlen(command_line) +1;
> +	}
> +
> +	/* x86_64 purgatory could be anywhere */
> +	elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size,
> +					0x3000, -1, -1, 0);
> +	dbgprintf("Loaded purgatory at addr 0x%lx\n", info->rhdr.rel_addr);
> +	/* The argument/parameter segment */
> +	setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
> +	real_mode = xmalloc(setup_size);
> +	memcpy(real_mode, kernel, kern16_size);
> +
> +	/* No real mode code will be executing. setup segment can be loaded
> +	 * anywhere as we will be just reading command line.
> +	 */
> +	setup_base = add_buffer(info, real_mode, setup_size, setup_size,
> +				16, 0x3000, -1, -1);
> +
> +	dbgprintf("Loaded setup data and command line at 0x%lx\n",
> +			setup_base);
> +
> +	/* The main kernel segment */
> +	size = kernel_len - kern16_size;
> +
> +	/* align to 1G to avoid cross the PUD_SIZE boundary */
> +	/* try above 4G at first */
> +	kernel64_load_addr = add_buffer(info, kernel + kern16_size,
> +					size, size, 1UL<<30,
> +					1UL<<32, ULONG_MAX,
> +					-1);
> +
> +	/* 1G to 4G */
> +	if (!kernel64_load_addr)
> +		kernel64_load_addr = add_buffer(info, kernel + kern16_size,
> +						size, size, 1UL<<30,
> +						1UL<<30, 1UL<<32,
> +						-1);
> +
> +	/* under 1g */
> +	if (!kernel64_load_addr)
> +		kernel64_load_addr = add_buffer(info, kernel + kern16_size,
> +						size, size,
> +						real_mode->kernel_alignment,
> +						0x100000, 1UL<<30,
> +						1);
> +	if (kernel64_load_addr)
> +		dbgprintf("Loaded 64bit kernel at 0x%lx\n", kernel64_load_addr);
> +	else
> +		die("can not load bzImage64");
> +
> +	/* Tell the kernel what is going on */
> +	setup_linux_bootloader_parameters(info, real_mode, setup_base,
> +			kern16_size, command_line, command_line_len,
> +			initrd, initrd_len);
> +
> +	elf_rel_get_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
> +	regs64.rbx = 0;           /* Bootstrap processor */
> +	regs64.rsi = setup_base;  /* Pointer to the parameters */
> +	regs64.rip = kernel64_load_addr + code64_start_offset; /* the entry point */
> +	regs64.rsp = elf_rel_get_addr(&info->rhdr, "stack_end"); /* Stack, unused */
> +	elf_rel_set_symbol(&info->rhdr, "entry64_regs", &regs64, sizeof(regs64));
> +
> +	cmdline_end = setup_base + kern16_size + command_line_len - 1;
> +	elf_rel_set_symbol(&info->rhdr, "cmdline_end", &cmdline_end,
> +			   sizeof(unsigned long));
> +
> +	/* Fill in the information BIOS calls would normally provide. */
> +	setup_linux_system_parameters(real_mode, info->kexec_flags);
> +
> +	return 0;
> +}
> +
> +int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
> +	struct kexec_info *info)
> +{
> +	char *command_line = NULL;
> +	const char *ramdisk, *append = NULL;
> +	char *ramdisk_buf;
> +	off_t ramdisk_length;
> +	int command_line_len;
> +	int opt;
> +	int result;
> +
> +	/* See options.h -- add any more there, too. */
> +	static const struct option options[] = {
> +		KEXEC_ARCH_OPTIONS
> +		{ "command-line",	1, 0, OPT_APPEND },
> +		{ "append",		1, 0, OPT_APPEND },
> +		{ "reuse-cmdline",	0, 0, OPT_REUSE_CMDLINE },
> +		{ "initrd",		1, 0, OPT_RAMDISK },
> +		{ "ramdisk",		1, 0, OPT_RAMDISK },
> +		{ 0,			0, 0, 0 },
> +	};
> +	static const char short_options[] = KEXEC_ARCH_OPT_STR "d";
> +
> +	ramdisk = 0;
> +	ramdisk_length = 0;
> +	while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) {
> +		switch(opt) {
> +		default:
> +			/* Ignore core options */
> +			if (opt < OPT_ARCH_MAX) {
> +				break;
> +			}
> +		case '?':
> +			usage();
> +			return -1;
> +			break;
> +		case OPT_APPEND:
> +			append = optarg;
> +			break;
> +		case OPT_REUSE_CMDLINE:
> +			command_line = get_command_line();
> +			break;
> +		case OPT_RAMDISK:
> +			ramdisk = optarg;
> +			break;
> +		}
> +	}
> +	command_line = concat_cmdline(command_line, append);
> +	command_line_len = 0;
> +	if (command_line) {
> +		command_line_len = strlen(command_line) +1;
> +	}
> +	ramdisk_buf = 0;
> +	if (ramdisk) {
> +		ramdisk_buf = slurp_file(ramdisk, &ramdisk_length);
> +	}
> +	result = do_bzImage64_load(info,
> +		buf, len,
> +		command_line, command_line_len,
> +		ramdisk_buf, ramdisk_length);
> +
> +	free(command_line);
> +	return result;
> +}
> diff --git a/kexec/arch/x86_64/kexec-x86_64.c b/kexec/arch/x86_64/kexec-x86_64.c
> index 6c42c32..5c23e01 100644
> --- a/kexec/arch/x86_64/kexec-x86_64.c
> +++ b/kexec/arch/x86_64/kexec-x86_64.c
> @@ -37,6 +37,7 @@ struct file_type file_type[] = {
>  	{ "multiboot-x86", multiboot_x86_probe, multiboot_x86_load,
>  	  multiboot_x86_usage },
>  	{ "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage },
> +	{ "bzImage64", bzImage64_probe, bzImage64_load, bzImage64_usage },
>  	{ "bzImage", bzImage_probe, bzImage_load, bzImage_usage },
>  	{ "beoboot-x86", beoboot_probe, beoboot_load, beoboot_usage },
>  	{ "nbi-x86", nbi_probe, nbi_load, nbi_usage },
> diff --git a/kexec/arch/x86_64/kexec-x86_64.h b/kexec/arch/x86_64/kexec-x86_64.h
> index a97cd71..b820ae8 100644
> --- a/kexec/arch/x86_64/kexec-x86_64.h
> +++ b/kexec/arch/x86_64/kexec-x86_64.h
> @@ -28,4 +28,9 @@ int elf_x86_64_load(int argc, char **argv, const char *buf, off_t len,
>  	struct kexec_info *info);
>  void elf_x86_64_usage(void);
>  
> +int bzImage64_probe(const char *buf, off_t len);
> +int bzImage64_load(int argc, char **argv, const char *buf, off_t len,
> +        struct kexec_info *info);
> +void bzImage64_usage(void);
> +
>  #endif /* KEXEC_X86_64_H */

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit
  2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
                   ` (7 preceding siblings ...)
  2012-11-16 23:04 ` [PATCH 8/8] x86: put 64bit bzImage high Yinghai Lu
@ 2012-11-19 21:00 ` Vivek Goyal
  2012-11-19 22:34   ` Yinghai Lu
  8 siblings, 1 reply; 60+ messages in thread
From: Vivek Goyal @ 2012-11-19 21:00 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Haren Myneni, Simon Horman, kexec, Eric W. Biederman, H. Peter Anvin

On Fri, Nov 16, 2012 at 03:04:19PM -0800, Yinghai Lu wrote:
> Now we have limit kdump reseved under 896M, because kexec has the limitation.
> and also bzImage need to stay under 4g.
> 
> kernel parts changes could be found at:
>         git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git for-x86-boot
> 
> here patches are for kexec tools to load bzImage and ramdisk high acccording
> to new added boot header fields.

So does this allow me to load other segments also above 4G? (purgatory,
setup/bootparams, command line etc). I remember that purgaotry has
some 32bit relocations so had to be loaded below 2G. 

Thanks
Vivek

> 
> Yinghai Lu (8):
>   Add min/max macro
>   x86: add boot header member for version 2.12
>   add mem64_min/max control
>   Move out mem_min/max checking in locate_hole
>   seperate checking 64bit mem range
>   debug print out for add_buf
>   x86: put ramdisk high for 64bit bzImage
>   x86: put 64bit bzImage high
> 
>  include/x86/x86-linux.h           |   20 ++++++++++-
>  kexec/add_buffer.c                |    9 ++++-
>  kexec/arch/i386/kexec-bzImage.c   |   53 ++++++++++++++++++++++++++--
>  kexec/arch/i386/x86-linux-setup.c |   32 ++++++++++++-----
>  kexec/arch/ppc/include/types.h    |   12 ------
>  kexec/kexec.c                     |   69 +++++++++++++++++++++++++++++-------
>  kexec/kexec.h                     |   21 +++++++++++-
>  7 files changed, 175 insertions(+), 41 deletions(-)
> 
> -- 
> 1.7.7

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit
  2012-11-19 21:00 ` [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Vivek Goyal
@ 2012-11-19 22:34   ` Yinghai Lu
  0 siblings, 0 replies; 60+ messages in thread
From: Yinghai Lu @ 2012-11-19 22:34 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Haren Myneni, Simon Horman, kexec, Eric W. Biederman, H. Peter Anvin

On Mon, Nov 19, 2012 at 1:00 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
>> here patches are for kexec tools to load bzImage and ramdisk high acccording
>> to new added boot header fields.
>
> So does this allow me to load other segments also above 4G? (purgatory,
> setup/bootparams, command line etc). I remember that purgaotry has
> some 32bit relocations so had to be loaded below 2G.

for 64bit bzImage, yes.

Eric update purgartory code to support that can not be loaded above 4G ...

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 60+ messages in thread

end of thread, other threads:[~2012-11-19 22:34 UTC | newest]

Thread overview: 60+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-16 23:04 [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
2012-11-16 23:04 ` [PATCH 1/8] Add min/max macro Yinghai Lu
2012-11-16 23:04 ` [PATCH 2/8] x86: add boot header member for version 2.12 Yinghai Lu
2012-11-16 23:04 ` [PATCH 3/8] add mem64_min/max control Yinghai Lu
2012-11-17  6:18   ` Eric W. Biederman
2012-11-17  7:06     ` Yinghai Lu
2012-11-17  8:25       ` Eric W. Biederman
2012-11-17 20:04         ` Yinghai Lu
2012-11-17 20:41           ` H. Peter Anvin
2012-11-17 20:51             ` Yinghai Lu
2012-11-17 20:54               ` H. Peter Anvin
2012-11-18  0:44           ` Yinghai Lu
2012-11-18  4:34             ` H. Peter Anvin
2012-11-18  4:47               ` Eric W. Biederman
2012-11-18  4:55                 ` H. Peter Anvin
2012-11-18  5:00                   ` Eric W. Biederman
2012-11-18  5:14                     ` H. Peter Anvin
2012-11-18  4:56                 ` Yinghai Lu
2012-11-18  5:20                   ` Eric W. Biederman
2012-11-18  5:35                     ` Yinghai Lu
2012-11-18  5:39                       ` Yinghai Lu
2012-11-18  5:58                         ` Yinghai Lu
2012-11-18  6:11                           ` Eric W. Biederman
2012-11-18  6:32                             ` Yinghai Lu
2012-11-18  6:38                             ` Yinghai Lu
2012-11-18  6:50                               ` Eric W. Biederman
2012-11-18  6:53                                 ` Yinghai Lu
2012-11-18  7:18                                   ` Yinghai Lu
2012-11-18 10:38                                     ` Eric W. Biederman
2012-11-19  3:02                                       ` [PATCH 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
2012-11-19  3:02                                         ` [PATCH 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
2012-11-19  3:02                                         ` [PATCH 2/6] kexec: don't die during buffer finding Yinghai Lu
2012-11-19  3:02                                         ` [PATCH 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
2012-11-19  3:02                                         ` [PATCH 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
2012-11-19  3:02                                         ` [PATCH 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G Yinghai Lu
2012-11-19  3:02                                         ` [PATCH 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
2012-11-19  3:04                                       ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Yinghai Lu
2012-11-19  3:04                                         ` [PATCH v2 1/6] kexec, x86: add boot header member for version 2.12 Yinghai Lu
2012-11-19  3:04                                         ` [PATCH v2 2/6] kexec: don't die during buffer finding Yinghai Lu
2012-11-19 17:05                                           ` Eric W. Biederman
2012-11-19  3:04                                         ` [PATCH v2 3/6] kexec, x86: put ramdisk high for 64bit bzImage Yinghai Lu
2012-11-19 17:20                                           ` Eric W. Biederman
2012-11-19  3:04                                         ` [PATCH v2 4/6] kexec, x86: set ext_cmd_line_ptr when boot_param is put high Yinghai Lu
2012-11-19 17:22                                           ` Eric W. Biederman
2012-11-19  3:04                                         ` [PATCH v2 5/6] kexec, x86: Make x64_64 purgatory relocatable above 4G Yinghai Lu
2012-11-19  3:04                                         ` [PATCH v2 6/6] kexec, x86_64: put 64bit bzImage high Yinghai Lu
2012-11-19 17:28                                           ` Eric W. Biederman
2012-11-19 17:04                                         ` [PATCH v2 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit Eric W. Biederman
2012-11-18  6:24                         ` [PATCH 3/8] add mem64_min/max control H. Peter Anvin
2012-11-18  6:23                     ` H. Peter Anvin
2012-11-18  6:44                       ` Eric W. Biederman
2012-11-16 23:04 ` [PATCH 4/8] Move out mem_min/max checking in locate_hole Yinghai Lu
2012-11-16 23:04 ` [PATCH 5/8] seperate checking 64bit mem range Yinghai Lu
2012-11-16 23:04 ` [PATCH 6/8] debug print out for add_buf Yinghai Lu
2012-11-16 23:04 ` [PATCH 7/8] x86: put ramdisk high for 64bit bzImage Yinghai Lu
2012-11-16 23:04 ` [PATCH 8/8] x86: put 64bit bzImage high Yinghai Lu
2012-11-17  6:33   ` Eric W. Biederman
     [not found]     ` <CAE9FiQWJaT9yfdV0rgV-5rM=BR4eX8sr+a99g8Ggf-+YkD8qgQ@mail.gmail.com>
2012-11-17  8:43       ` Eric W. Biederman
2012-11-19 21:00 ` [PATCH 0/8] kexec: put bzImage and ramdisk above 4G for x86 64bit Vivek Goyal
2012-11-19 22:34   ` Yinghai Lu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.