linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Andrew Morton <akpm@digeo.com>
Cc: David Miller <davem@redhat.com>,
	Paul Mackerras <paulus@samba.org>,
	linux-kernel@vger.kernel.org,
	linuxppc-embedded@lists.linuxppc.org
Subject: Re: [PATCH,RFC] Add gfp_mask to get_vm_area()
Date: Thu, 3 Oct 2002 14:39:48 +1000	[thread overview]
Message-ID: <20021003043948.GN1102@zax> (raw)
In-Reply-To: <20021001053417.GW10265@zax>

On Tue, Oct 01, 2002 at 03:34:17PM +1000, David Gibson wrote:
> 
> On Mon, Sep 30, 2002 at 10:08:00PM -0700, Andrew Morton wrote:
> >
> > David Gibson wrote:
> > >
> > > Dave, please consider this patch.  It renames get_vm_area() to
> > > __get_vm_area() and adds a gfp_mask parameter which is passed on to
> > > kmalloc().  get_vm_area(size,flags) is then defined as as
> > > __get_vm_area(size,flags,GFP_KERNEL) to avoid messing with existing
> > > callers.
> > >
> > > We need this in order to sanely make pci_alloc_consistent() (and other
> > > consistent allocation functions) obey the DMA-mapping.txt rules on PPC
> > > embedded machines (specifically the requirement that it be callable
> > > from interrupt context).
> > >
> >
> > I can look after that for you.  But I'd prefer that you just add the
> > extra gfp_flags argument to get_vm_area() and update the 16 callers.
> >
> > You cannot call get_vm_area() from interrupt context at present;
> > it does write_lock(&vmlist_lock) unsafely.
> 
> Oh crap, I'm an idiot.  I've even seen prototype patches for this one
> that changed the write_lock() to write_lock_irq(). Duh.
> 
> > It would be a bit sad to make vmlist_lock interrupt-safe for this.  Is
> > there no alternative?
> 
> I don't see an easy one: PPC 4xx has non-coherent cache, so we have to
> mark consistent memory non-cacheable.  We want to make the normal
> lowmem mapping use large page TLB entries, so we can't frob the
> attribute bits on the pages in place.  That means we need to create a
> new, non-cacheable mapping for the physical RAM we allocate, which in
> turn means allocating a chunk of kernel virtual memory.

Well, here is an updated patch which should make the vmlist_lock
interrupt-safe.  It makes __get_vm_area() and remove_vm_area()
callable from interrupt context.  If you can think of some way of
avoiding doing that, I'd love to hear it - I agree this would be
rather sad.

Thoughts?

Incidentally, DaveM, what's the rationale for requiring the consistent
allocation functions to be callable from interrupt context?  It's not
immediately obvious to me.

diff -urN /home/dgibson/kernel/linuxppc-2.5/fs/proc/kcore.c linux-bluefish/fs/proc/kcore.c
--- /home/dgibson/kernel/linuxppc-2.5/fs/proc/kcore.c	2002-10-01 10:17:33.000000000 +1000
+++ linux-bluefish/fs/proc/kcore.c	2002-10-03 14:25:32.000000000 +1000
@@ -318,10 +318,10 @@
 	int num_vma;
 	unsigned long start;
 
-	read_lock(&vmlist_lock);
+	read_lock_irq(&vmlist_lock);
 	proc_root_kcore->size = size = get_kcore_size(&num_vma, &elf_buflen);
 	if (buflen == 0 || *fpos >= size) {
-		read_unlock(&vmlist_lock);
+		read_unlock_irq(&vmlist_lock);
 		return 0;
 	}
 
@@ -338,12 +338,12 @@
 			tsz = buflen;
 		elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
 		if (!elf_buf) {
-			read_unlock(&vmlist_lock);
+			read_unlock_irq(&vmlist_lock);
 			return -ENOMEM;
 		}
 		memset(elf_buf, 0, elf_buflen);
 		elf_kcore_store_hdr(elf_buf, num_vma, elf_buflen);
-		read_unlock(&vmlist_lock);
+		read_unlock_irq(&vmlist_lock);
 		if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 			kfree(elf_buf);
 			return -EFAULT;
@@ -358,7 +358,7 @@
 		if (buflen == 0)
 			return acc;
 	} else
-		read_unlock(&vmlist_lock);
+		read_unlock_irq(&vmlist_lock);
 
 	/* where page 0 not mapped, write zeros into buffer */
 #if defined (__i386__) || defined (__mc68000__) || defined(__x86_64__)
@@ -403,7 +403,7 @@
 				return -ENOMEM;
 			memset(elf_buf, 0, tsz);
 
-			read_lock(&vmlist_lock);
+			read_lock_irq(&vmlist_lock);
 			for (m=vmlist; m && cursize; m=m->next) {
 				unsigned long vmstart;
 				unsigned long vmsize;
@@ -431,7 +431,7 @@
 				memcpy(elf_buf + (vmstart - start),
 					(char *)vmstart, vmsize);
 			}
-			read_unlock(&vmlist_lock);
+			read_unlock_irq(&vmlist_lock);
 			if (copy_to_user(buffer, elf_buf, tsz)) {
 				kfree(elf_buf);
 				return -EFAULT;
diff -urN /home/dgibson/kernel/linuxppc-2.5/include/linux/vmalloc.h linux-bluefish/include/linux/vmalloc.h
--- /home/dgibson/kernel/linuxppc-2.5/include/linux/vmalloc.h	2002-09-20 14:36:15.000000000 +1000
+++ linux-bluefish/include/linux/vmalloc.h	2002-10-01 14:29:10.000000000 +1000
@@ -32,7 +32,8 @@
 /*
  *	Lowlevel-APIs (not for driver use!)
  */
-extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
+extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, int gfp_mask);
+#define get_vm_area(size, flags) __get_vm_area(size, flags, GFP_KERNEL)
 extern struct vm_struct *remove_vm_area(void *addr);
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
diff -urN /home/dgibson/kernel/linuxppc-2.5/mm/vmalloc.c linux-bluefish/mm/vmalloc.c
--- /home/dgibson/kernel/linuxppc-2.5/mm/vmalloc.c	2002-09-20 14:36:26.000000000 +1000
+++ linux-bluefish/mm/vmalloc.c	2002-10-03 14:27:40.000000000 +1000
@@ -181,21 +181,23 @@
 
 
 /**
- *	get_vm_area  -  reserve a contingous kernel virtual area
+ *	__get_vm_area  -  reserve a contingous kernel virtual area
  *
  *	@size:		size of the area
  *	@flags:		%VM_IOREMAP for I/O mappings or VM_ALLOC
+ *	@gfp_mask:	gfp flags to pass to kmalloc()
  *
  *	Search an area of @size in the kernel virtual mapping area,
  *	and reserved it for out purposes.  Returns the area descriptor
  *	on success or %NULL on failure.
  */
-struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
+struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, int gfp_mask)
 {
 	struct vm_struct **p, *tmp, *area;
 	unsigned long addr = VMALLOC_START;
+	unsigned long flags;
 
-	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	area = kmalloc(sizeof(*area), gfp_mask);
 	if (unlikely(!area))
 		return NULL;
 
@@ -204,7 +206,7 @@
 	 */
 	size += PAGE_SIZE;
 
-	write_lock(&vmlist_lock);
+	write_lock_irqsave(&vmlist_lock, flags);
 	for (p = &vmlist; (tmp = *p) ;p = &tmp->next) {
 		if ((size + addr) < addr)
 			goto out;
@@ -225,12 +227,12 @@
 	area->pages = NULL;
 	area->nr_pages = 0;
 	area->phys_addr = 0;
-	write_unlock(&vmlist_lock);
+	write_unlock_irqrestore(&vmlist_lock, flags);
 
 	return area;
 
 out:
-	write_unlock(&vmlist_lock);
+	write_unlock_irqrestore(&vmlist_lock, flags);
 	kfree(area);
 	return NULL;
 }
@@ -247,18 +249,19 @@
 struct vm_struct *remove_vm_area(void *addr)
 {
 	struct vm_struct **p, *tmp;
+	unsigned long flags;
 
-	write_lock(&vmlist_lock);
+	write_lock_irqsave(&vmlist_lock, flags);
 	for (p = &vmlist ; (tmp = *p) ;p = &tmp->next) {
 		 if (tmp->addr == addr)
 			 goto found;
 	}
-	write_unlock(&vmlist_lock);
+	write_unlock_irqrestore(&vmlist_lock, flags);
 	return NULL;
 
 found:
 	*p = tmp->next;
-	write_unlock(&vmlist_lock);
+	write_unlock_irqrestore(&vmlist_lock, flags);
 	return tmp;
 }
 
@@ -452,7 +455,7 @@
 	if ((unsigned long) addr + count < count)
 		count = -(unsigned long) addr;
 
-	read_lock(&vmlist_lock);
+	read_lock_irq(&vmlist_lock);
 	for (tmp = vmlist; tmp; tmp = tmp->next) {
 		vaddr = (char *) tmp->addr;
 		if (addr >= vaddr + tmp->size - PAGE_SIZE)
@@ -476,7 +479,7 @@
 		} while (--n > 0);
 	}
 finished:
-	read_unlock(&vmlist_lock);
+	read_unlock_irq(&vmlist_lock);
 	return buf - buf_start;
 }
 
@@ -490,7 +493,7 @@
 	if ((unsigned long) addr + count < count)
 		count = -(unsigned long) addr;
 
-	read_lock(&vmlist_lock);
+	read_lock_irq(&vmlist_lock);
 	for (tmp = vmlist; tmp; tmp = tmp->next) {
 		vaddr = (char *) tmp->addr;
 		if (addr >= vaddr + tmp->size - PAGE_SIZE)
@@ -513,6 +516,6 @@
 		} while (--n > 0);
 	}
 finished:
-	read_unlock(&vmlist_lock);
+	read_unlock_irq(&vmlist_lock);
 	return buf - buf_start;
 }


-- 
David Gibson			| For every complex problem there is a
david@gibson.dropbear.id.au	| solution which is simple, neat and
				| wrong.
http://www.ozlabs.org/people/dgibson

  parent reply	other threads:[~2002-10-03  4:34 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-10-01  4:42 [PATCH,RFC] Add gfp_mask to get_vm_area() David Gibson
2002-10-01  4:37 ` David S. Miller
2002-10-01  5:08 ` Andrew Morton
2002-10-01  5:34   ` David Gibson
2002-10-01  8:42     ` Russell King
2002-10-02  1:18       ` David Gibson
2002-10-03  4:39     ` David Gibson [this message]
2002-10-03  4:56       ` David Gibson
2002-10-03 15:18         ` Russell King
2002-10-04  3:27           ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20021003043948.GN1102@zax \
    --to=david@gibson.dropbear.id.au \
    --cc=akpm@digeo.com \
    --cc=davem@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-embedded@lists.linuxppc.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).