From: Christoph Hellwig <hch@caldera.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH] kiobuf/rawio fixes for 2.4.0-test10-pre6
Date: Fri, 27 Oct 2000 22:21:43 +0200 [thread overview]
Message-ID: <20001027222143.A8059@caldera.de> (raw)
Ok, forgot to Cc linux-kernel ...
Please Cc linus on reply.
----- Forwarded message from Christoph Hellwig <hch@caldera.de> -----
Date: Fri, 27 Oct 2000 22:03:54 +0200
From: Christoph Hellwig <hch@caldera.de>
To: Linus Torvalds <torvalds@transmeta.com>
Subject: [PATCH] kiobuf/rawio fixes for 2.4.0-test10-pre6
X-Mailer: Mutt 1.0i
Hi Linus,
Stephen Tweedies last kiobuf patchset contained a lot bu fixes
besides new features. These bug-fixes are not yet merged in 2.4.0.
This patch contains forward-ports of the follwoing fixes
(quote from his 00README):
01-mapfix.diff
map_user_kiobuf() retries failed maps to cover a race in which
the swapper steals a page before the kiobuf has grabbed and
locked it.
02-iocount.diff
Kanoj Sarcar's fixes to allow kiobufs to work properly over
fork(), even on threaded applications.
04-eiofix.diff
Fix to return -EIO instead of 0 if a raw I/O read or write
encounters an error in the first block.
06-enxio.diff
Return ENXIO on read/write at or beyond the end of the device
for raw I/O
Please apply.
Christoph
--
Always remember that you are unique. Just like everyone else.
diff -uNr --exclude-from=dontdiff linux.orig/drivers/char/raw.c linux/drivers/char/raw.c
--- linux.orig/drivers/char/raw.c Thu Oct 19 13:21:24 2000
+++ linux/drivers/char/raw.c Tue Oct 24 13:25:47 2000
@@ -277,8 +277,11 @@
if ((*offp & sector_mask) || (size & sector_mask))
return -EINVAL;
- if ((*offp >> sector_bits) > limit)
+ if ((*offp >> sector_bits) > limit) {
+ if (size)
+ return -ENXIO;
return 0;
+ }
/*
* We'll just use one kiobuf
diff -uNr --exclude-from=dontdiff linux.orig/fs/buffer.c linux/fs/buffer.c
--- linux.orig/fs/buffer.c Tue Oct 24 13:15:49 2000
+++ linux/fs/buffer.c Tue Oct 24 13:26:31 2000
@@ -1924,6 +1924,8 @@
spin_unlock(&unused_list_lock);
+ if (!iosize)
+ return -EIO;
return iosize;
}
diff -uNr --exclude-from=dontdiff linux.orig/include/linux/mm.h linux/include/linux/mm.h
--- linux.orig/include/linux/mm.h Tue Oct 24 13:15:56 2000
+++ linux/include/linux/mm.h Tue Oct 24 14:41:46 2000
@@ -157,8 +157,9 @@
wait_queue_head_t wait;
struct page **pprev_hash;
struct buffer_head * buffers;
- void *virtual; /* non-NULL if kmapped */
+ void *virtual; /* non-NULL if kmapped */
struct zone_struct *zone;
+ atomic_t rawcount; /* count of raw io in progress */
} mem_map_t;
#define get_page(p) atomic_inc(&(p)->count)
diff -uNr --exclude-from=dontdiff linux.orig/mm/memory.c linux/mm/memory.c
--- linux.orig/mm/memory.c Tue Oct 24 13:15:58 2000
+++ linux/mm/memory.c Tue Oct 24 16:09:22 2000
@@ -138,6 +138,30 @@
check_pgt_cache();
}
+/*
+ * Establish a new mapping:
+ * - flush the old one
+ * - update the page tables
+ * - inform the TLB about the new one
+ */
+static inline void establish_pte(struct vm_area_struct * vma, unsigned long address,
+ pte_t *page_table, pte_t entry)
+{
+ flush_tlb_page(vma, address);
+ set_pte(page_table, entry);
+ update_mmu_cache(vma, address, entry);
+}
+
+static inline void break_cow(struct vm_area_struct * vma, struct page * old_page,
+ struct page * new_page, unsigned long address,
+ pte_t *page_table)
+{
+ copy_cow_page(old_page,new_page,address);
+ flush_page_to_ram(new_page);
+ flush_cache_page(vma, address);
+ establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
+}
+
#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))
#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))
@@ -227,6 +251,22 @@
/* If it's a COW mapping, write protect it both in the parent and the child */
if (cow) {
+ /* Rawio in progress? */
+ if (atomic_read(&ptepage->rawcount)) {
+ /*
+ * If pte is dirty, its a private page,
+ * rawio was initiated by a clone.
+ * For dmain operation, need to break
+ * cow.
+ */
+ if (pte_dirty(pte)) {
+ struct page * new_page = alloc_page(GFP_HIGHUSER);
+ if (!new_page)
+ goto nomem;
+ break_cow(vma, ptepage, new_page, address, dst_pte);
+ goto cont_copy_pte_range;
+ }
+ }
ptep_clear_wrprotect(src_pte);
pte = *src_pte;
}
@@ -382,9 +422,12 @@
/*
- * Do a quick page-table lookup for a single page.
+ * Do a quick page-table lookup for a single page. We have already verified
+ * access type, and done a fault in. But, kswapd might have stolen the page
+ * in the meantime. Return an indication of whether we should retry the fault
+ * in. Writability test is superfluous but conservative.
*/
-static struct page * follow_page(unsigned long address)
+static struct page * follow_page(unsigned long address, int writeacc, int * ret)
{
pgd_t *pgd;
pmd_t *pmd;
@@ -393,10 +436,15 @@
pmd = pmd_offset(pgd, address);
if (pmd) {
pte_t * pte = pte_offset(pmd, address);
- if (pte && pte_present(*pte))
+ if (pte && pte_present(*pte)) {
+ if (writeacc && !pte_write(*pte))
+ goto retry;
return pte_page(*pte);
+ }
}
-
+
+retry:
+ *ret = 1;
return NULL;
}
@@ -428,7 +476,8 @@
struct page * map;
int i;
int datain = (rw == READ);
-
+ int failed;
+
/* Make sure the iobuf is not already mapped somewhere. */
if (iobuf->nr_pages)
return -EINVAL;
@@ -467,23 +516,28 @@
}
if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
(!(vma->vm_flags & VM_READ))) {
- err = -EACCES;
goto out_unlock;
}
}
+
+faultin:
if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0)
goto out_unlock;
spin_lock(&mm->page_table_lock);
- map = follow_page(ptr);
- if (!map) {
+ map = follow_page(ptr, datain, &failed);
+ if (failed) {
+ /*
+ * Page got stolen before we could lock it down.
+ * Retry.
+ */
spin_unlock(&mm->page_table_lock);
- dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
- goto out_unlock;
+ goto faultin;
}
map = get_page_map(map);
- if (map)
+ if (map) {
atomic_inc(&map->count);
- else
+ atomic_inc(&map->rawcount);
+ } else
printk (KERN_INFO "Mapped page missing [%d]\n", i);
spin_unlock(&mm->page_table_lock);
iobuf->maplist[i] = map;
@@ -519,6 +573,7 @@
if (map) {
if (iobuf->locked)
UnlockPage(map);
+ atomic_dec(&map->rawcount);
__free_page(map);
}
}
@@ -771,28 +826,6 @@
} while (from && (from < end));
flush_tlb_range(current->mm, beg, end);
return error;
-}
-
-/*
- * Establish a new mapping:
- * - flush the old one
- * - update the page tables
- * - inform the TLB about the new one
- */
-static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
-{
- flush_tlb_page(vma, address);
- set_pte(page_table, entry);
- update_mmu_cache(vma, address, entry);
-}
-
-static inline void break_cow(struct vm_area_struct * vma, struct page * old_page, struct page * new_page, unsigned long address,
- pte_t *page_table)
-{
- copy_cow_page(old_page,new_page,address);
- flush_page_to_ram(new_page);
- flush_cache_page(vma, address);
- establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
}
/*
diff -uNr --exclude-from=dontdiff linux.orig/mm/page_alloc.c linux/mm/page_alloc.c
--- linux.orig/mm/page_alloc.c Tue Oct 24 13:15:58 2000
+++ linux/mm/page_alloc.c Tue Oct 24 13:36:44 2000
@@ -98,6 +98,8 @@
BUG();
if (PageInactiveClean(page))
BUG();
+ if (atomic_read(&page->rawcount))
+ BUG();
page->flags &= ~(1<<PG_referenced);
page->age = PAGE_AGE_START;
@@ -819,6 +821,7 @@
*/
for (p = lmem_map; p < lmem_map + totalpages; p++) {
set_page_count(p, 0);
+ atomic_set(&(p)->rawcount, 0);
SetPageReserved(p);
init_waitqueue_head(&p->wait);
memlist_init(&p->list);
----- End forwarded message -----
--
Always remember that you are unique. Just like everyone else.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/
next reply other threads:[~2000-10-27 20:22 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2000-10-27 20:21 Christoph Hellwig [this message]
[not found] ` <200010272123.OAA21478@penguin.transmeta.com>
2000-10-30 11:45 ` [PATCH] kiobuf/rawio fixes for 2.4.0-test10-pre6 Christoph Hellwig
2000-10-30 17:19 ` Jeff Garzik
2000-10-30 18:17 ` Christoph Hellwig
2000-10-30 18:56 ` Jeff Garzik
2000-10-30 19:44 ` Christoph Hellwig
2000-10-30 20:08 ` Jeff Garzik
2000-10-30 20:32 ` Christoph Hellwig
2000-10-30 21:51 ` Jeff Garzik
2000-11-01 13:32 ` Stephen C. Tweedie
2000-10-31 2:08 ` Andrea Arcangeli
2000-11-01 11:16 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20001027222143.A8059@caldera.de \
--to=hch@caldera.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).