All of lore.kernel.org
 help / color / mirror / Atom feed
From: <Stuart_Hayes@Dell.com>
To: <ak@suse.de>
Cc: <riel@redhat.com>, <andrea@suse.de>, <linux-kernel@vger.kernel.org>
Subject: RE: page allocation/attributes question (i386/x86_64 specific)
Date: Fri, 1 Jul 2005 15:33:33 -0500	[thread overview]
Message-ID: <B1939BC11A23AE47A0DBE89A37CB26B4074359@ausx3mps305.aus.amer.dell.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 5598 bytes --]

> 
> So, if I understand correctly what's going on in x86_64, your fix
> wouldn't be applicable to i386.  In x86_64, every large page has a
> correct "ref_prot" that is the normal setting for that page... but in
> i386, the kernel text area does not--it should ideally be split into
> small pages all the time if there are both kernel code & free pages
> residing in the same 2M area.     
> 
> Stuart

(This isn't a submission--I'm just posting this for comments.)

Right now, any large page that touches anywhere from PAGE_OFFSET to
__init_end is initially set up as a large, executable page... but some
of this area contains data & free pages.  The patch below adds a
"cleanup_nx_in_kerneltext()" function, called at the end of
free_initmem(), which changes these pages--except for the range from
"_text" to "_etext"--to PAGE_KERNEL (i.e., non-executable).

This does result in two large pages being split up into small PTEs
permanently, but all the non-code regions will be non-executable, and
change_page_attr() will work correctly.

What do you think of this?  I have tested this on 2.6.12.

(I've attached the patch as a file, too, since my mail server can't be
convinced to not wrap text.)

Stuart

-----


diff -purN --exclude='*.o' --exclude='*.cmd'
linux-2.6.12grep/arch/i386/mm/init.c linux-2.6.12/arch/i386/mm/init.c
--- linux-2.6.12grep/arch/i386/mm/init.c	2005-07-01
15:09:27.000000000 -0500
+++ linux-2.6.12/arch/i386/mm/init.c	2005-07-01 15:13:06.000000000
-0500
@@ -666,6 +666,30 @@ static int noinline do_test_wp_bit(void)
 	return flag;
 }
 
+extern int change_page_attr_perm(struct page *, int, pgprot_t);
+
+/*
+ * In kernel_physical_mapping_init(), any big pages that contained
kernel text area were
+ * set up as big executable pages.  This function should be called when
the initmem
+ * is freed, to correctly set up the executable & non-executable pages
in this area.
+ */
+static void cleanup_nx_in_kerneltext(void)
+{
+	unsigned long from, to;
+
+	if (!nx_enabled) return;
+
+	from = PAGE_OFFSET;
+	to = (unsigned long)_text & PAGE_MASK;
+	for (; from<to; from += PAGE_SIZE)
+		change_page_attr_perm(virt_to_page(from), 1,
PAGE_KERNEL); 
+	
+	from = ((unsigned long)_etext + PAGE_SIZE - 1) & PAGE_MASK;
+	to = ((unsigned long)__init_end + LARGE_PAGE_SIZE) &
LARGE_PAGE_MASK;
+	for (; from<to; from += PAGE_SIZE)
+		change_page_attr_perm(virt_to_page(from), 1,
PAGE_KERNEL); 
+}
+
 void free_initmem(void)
 {
 	unsigned long addr;
@@ -679,6 +703,8 @@ void free_initmem(void)
 		totalram_pages++;
 	}
 	printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n",
(__init_end - __init_begin) >> 10);
+
+	cleanup_nx_in_kerneltext();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff -purN --exclude='*.o' --exclude='*.cmd'
linux-2.6.12grep/arch/i386/mm/pageattr.c
linux-2.6.12/arch/i386/mm/pageattr.c
--- linux-2.6.12grep/arch/i386/mm/pageattr.c	2005-07-01
15:09:08.000000000 -0500
+++ linux-2.6.12/arch/i386/mm/pageattr.c	2005-07-01
14:56:06.000000000 -0500
@@ -35,7 +35,7 @@ pte_t *lookup_address(unsigned long addr
         return pte_offset_kernel(pmd, address);
 } 
 
-static struct page *split_large_page(unsigned long address, pgprot_t
prot)
+static struct page *split_large_page(unsigned long address, pgprot_t
prot, pgprot_t ref_prot)
 { 
 	int i; 
 	unsigned long addr;
@@ -53,7 +53,7 @@ static struct page *split_large_page(uns
 	pbase = (pte_t *)page_address(base);
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
 		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
-				   addr == address ? prot :
PAGE_KERNEL);
+				   addr == address ? prot : ref_prot);
 	}
 	return base;
 } 
@@ -122,7 +122,7 @@ __change_page_attr(struct page *page, pg
 		if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
 			set_pte_atomic(kpte, mk_pte(page, prot)); 
 		} else {
-			struct page *split = split_large_page(address,
prot); 
+			struct page *split = split_large_page(address,
prot, PAGE_KERNEL); 
 			if (!split)
 				return -ENOMEM;
 			set_pmd_pte(kpte,address,mk_pte(split,
PAGE_KERNEL));
@@ -152,6 +152,38 @@ __change_page_attr(struct page *page, pg
 	return 0;
 } 
 
+static int __change_page_attr_perm (struct page *page, pgprot_t prot)
+{ 
+	pte_t *kpte; 
+	unsigned long address;
+	struct page *kpte_page;
+
+	BUG_ON(PageHighMem(page));
+	address = (unsigned long)page_address(page);
+
+	kpte = lookup_address(address);
+	if (!kpte)
+		return -EINVAL;
+	kpte_page = virt_to_page(kpte);
+
+	if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
+		set_pte_atomic(kpte, mk_pte(page, prot)); 
+	} else {
+		pgprot_t ref_prot;
+
+		if ((pte_val(*kpte) & _PAGE_NX))
+			ref_prot = PAGE_KERNEL;
+		else
+			ref_prot = PAGE_KERNEL_EXEC;
+		kpte_page = split_large_page(address, prot, ref_prot);
+		if (!kpte_page)
+			return -ENOMEM;
+		set_pmd_pte(kpte,address,mk_pte(kpte_page, ref_prot));
+	}	
+	SetPageReserved(kpte_page);
+	return 0;
+} 
+
 static inline void flush_map(void)
 {
 	on_each_cpu(flush_kernel_map, NULL, 1, 1);
@@ -186,6 +218,22 @@ int change_page_attr(struct page *page, 
 	return err;
 }
 
+int change_page_attr_perm(struct page *page, int numpages, pgprot_t
prot)
+{
+	int err = 0; 
+	int i; 
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpa_lock, flags);
+	for (i = 0; i < numpages; i++, page++) { 
+		err = __change_page_attr_perm(page, prot);
+		if (err) 
+			break; 
+	} 	
+	spin_unlock_irqrestore(&cpa_lock, flags);
+	return err;
+}
+
 void global_flush_tlb(void)
 { 
 	LIST_HEAD(l);

[-- Attachment #2: pass1.patch --]
[-- Type: application/octet-stream, Size: 4194 bytes --]

diff -purN --exclude='*.o' --exclude='*.cmd' linux-2.6.12grep/arch/i386/mm/init.c linux-2.6.12/arch/i386/mm/init.c
--- linux-2.6.12grep/arch/i386/mm/init.c	2005-07-01 15:09:27.000000000 -0500
+++ linux-2.6.12/arch/i386/mm/init.c	2005-07-01 15:13:06.000000000 -0500
@@ -666,6 +666,30 @@ static int noinline do_test_wp_bit(void)
 	return flag;
 }
 
+extern int change_page_attr_perm(struct page *, int, pgprot_t);
+
+/*
+ * In kernel_physical_mapping_init(), any big pages that contained kernel text area were
+ * set up as big executable pages.  This function should be called when the initmem
+ * is freed, to correctly set up the executable & non-executable pages in this area.
+ */
+static void cleanup_nx_in_kerneltext(void)
+{
+	unsigned long from, to;
+
+	if (!nx_enabled) return;
+
+	from = PAGE_OFFSET;
+	to = (unsigned long)_text & PAGE_MASK;
+	for (; from<to; from += PAGE_SIZE)
+		change_page_attr_perm(virt_to_page(from), 1, PAGE_KERNEL); 
+	
+	from = ((unsigned long)_etext + PAGE_SIZE - 1) & PAGE_MASK;
+	to = ((unsigned long)__init_end + LARGE_PAGE_SIZE) & LARGE_PAGE_MASK;
+	for (; from<to; from += PAGE_SIZE)
+		change_page_attr_perm(virt_to_page(from), 1, PAGE_KERNEL); 
+}
+
 void free_initmem(void)
 {
 	unsigned long addr;
@@ -679,6 +703,8 @@ void free_initmem(void)
 		totalram_pages++;
 	}
 	printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
+
+	cleanup_nx_in_kerneltext();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff -purN --exclude='*.o' --exclude='*.cmd' linux-2.6.12grep/arch/i386/mm/pageattr.c linux-2.6.12/arch/i386/mm/pageattr.c
--- linux-2.6.12grep/arch/i386/mm/pageattr.c	2005-07-01 15:09:08.000000000 -0500
+++ linux-2.6.12/arch/i386/mm/pageattr.c	2005-07-01 14:56:06.000000000 -0500
@@ -35,7 +35,7 @@ pte_t *lookup_address(unsigned long addr
         return pte_offset_kernel(pmd, address);
 } 
 
-static struct page *split_large_page(unsigned long address, pgprot_t prot)
+static struct page *split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 { 
 	int i; 
 	unsigned long addr;
@@ -53,7 +53,7 @@ static struct page *split_large_page(uns
 	pbase = (pte_t *)page_address(base);
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
 		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
-				   addr == address ? prot : PAGE_KERNEL);
+				   addr == address ? prot : ref_prot);
 	}
 	return base;
 } 
@@ -122,7 +122,7 @@ __change_page_attr(struct page *page, pg
 		if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
 			set_pte_atomic(kpte, mk_pte(page, prot)); 
 		} else {
-			struct page *split = split_large_page(address, prot); 
+			struct page *split = split_large_page(address, prot, PAGE_KERNEL); 
 			if (!split)
 				return -ENOMEM;
 			set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
@@ -152,6 +152,38 @@ __change_page_attr(struct page *page, pg
 	return 0;
 } 
 
+static int __change_page_attr_perm (struct page *page, pgprot_t prot)
+{ 
+	pte_t *kpte; 
+	unsigned long address;
+	struct page *kpte_page;
+
+	BUG_ON(PageHighMem(page));
+	address = (unsigned long)page_address(page);
+
+	kpte = lookup_address(address);
+	if (!kpte)
+		return -EINVAL;
+	kpte_page = virt_to_page(kpte);
+
+	if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
+		set_pte_atomic(kpte, mk_pte(page, prot)); 
+	} else {
+		pgprot_t ref_prot;
+
+		if ((pte_val(*kpte) & _PAGE_NX))
+			ref_prot = PAGE_KERNEL;
+		else
+			ref_prot = PAGE_KERNEL_EXEC;
+		kpte_page = split_large_page(address, prot, ref_prot);
+		if (!kpte_page)
+			return -ENOMEM;
+		set_pmd_pte(kpte,address,mk_pte(kpte_page, ref_prot));
+	}	
+	SetPageReserved(kpte_page);
+	return 0;
+} 
+
 static inline void flush_map(void)
 {
 	on_each_cpu(flush_kernel_map, NULL, 1, 1);
@@ -186,6 +218,22 @@ int change_page_attr(struct page *page, 
 	return err;
 }
 
+int change_page_attr_perm(struct page *page, int numpages, pgprot_t prot)
+{
+	int err = 0; 
+	int i; 
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpa_lock, flags);
+	for (i = 0; i < numpages; i++, page++) { 
+		err = __change_page_attr_perm(page, prot);
+		if (err) 
+			break; 
+	} 	
+	spin_unlock_irqrestore(&cpa_lock, flags);
+	return err;
+}
+
 void global_flush_tlb(void)
 { 
 	LIST_HEAD(l);

             reply	other threads:[~2005-07-01 20:38 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-07-01 20:33 Stuart_Hayes [this message]
  -- strict thread matches above, loose matches on Subject: below --
2005-07-20 19:24 page allocation/attributes question (i386/x86_64 specific) Stuart_Hayes
2005-07-20 15:06 Stuart_Hayes
2005-07-20 20:45 ` Ingo Molnar
2005-07-20 14:51 Stuart_Hayes
2005-07-20 15:00 ` Ingo Molnar
2005-07-19 21:20 Stuart Hayes
2005-07-19 22:04 ` Ingo Molnar
2005-07-07 17:21 Stuart_Hayes
2005-07-05 21:35 Stuart_Hayes
2005-07-05 22:00 ` randy_dunlap
2005-07-05 20:02 Stuart_Hayes
2005-07-05 21:27 ` randy_dunlap
2005-07-07 14:33 ` Andi Kleen
2005-06-30 19:11 Stuart_Hayes
2005-06-30 17:14 Stuart_Hayes
2005-06-30 16:56 Stuart_Hayes
2005-06-30 17:34 ` Arjan van de Ven
2005-06-29 17:20 Stuart_Hayes
2005-06-28 20:16 Stuart_Hayes
2005-06-30 16:15 ` Andi Kleen
2005-06-24 18:20 Stuart_Hayes
2005-06-25  2:28 ` Andi Kleen
2005-06-22 17:11 Stuart_Hayes
2005-06-22 23:54 ` Rik Van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=B1939BC11A23AE47A0DBE89A37CB26B4074359@ausx3mps305.aus.amer.dell.com \
    --to=stuart_hayes@dell.com \
    --cc=ak@suse.de \
    --cc=andrea@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.