>From 6c333a1a5a577672f4ea0114e0fc430531097788 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Apr 2011 20:48:13 +0200 Subject: [PATCH 2/3] ext4: Rewrite ext4_page_mkwrite() to return locked page ext4_page_mkwrite() does not return page locked. This makes it hard to avoid races with filesystem freezing code (so that we don't leave writeable page on a frozen fs) or writeback code (so that we allow page to be stable during writeback). Also the current code uses i_alloc_sem to avoid races with truncate but that seems to be the wrong locking order according to lock ordering documented in mm/rmap.c. Signed-off-by: Jan Kara --- fs/ext4/inode.c | 101 ++++++++++++++++++++++++++++++++++-------------------- 1 files changed, 63 insertions(+), 38 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f2fa5e8..377fed0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5788,40 +5788,53 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } +static int ext4_journalled_fault_fn(handle_t *handle, struct buffer_head *bh) +{ + if (!buffer_dirty(bh)) + return 0; + return ext4_handle_dirty_metadata(handle, NULL, bh); +} + int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; loff_t size; unsigned long len; - int ret = -EINVAL; - void *fsdata; + int ret; struct file *file = vma->vm_file; struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; + handle_t handle; + get_block_t get_block; + int retries = 0; - /* - * Get i_alloc_sem to stop truncates messing with the inode. We cannot - * get i_mutex because we are already holding mmap_sem. - */ - down_read(&inode->i_alloc_sem); + /* Delalloc case is easy... */ + if (test_opt(inode->i_sb, DELALLOC) && + !ext4_should_journal_data(inode) && + !ext4_nonda_switch(inode->i_sb)) { + do { + ret = __block_page_mkwrite(vma, vmf, + ext4_da_get_block_prep); + } while (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)); + goto out_ret; + } + + lock_page(page); size = i_size_read(inode); - if (page->mapping != mapping || size <= page_offset(page) - || !PageUptodate(page)) { - /* page got truncated from under us? */ - goto out_unlock; + /* Page got truncated from under us? */ + if (page->mapping != mapping || page_offset(page) > size) { + unlock_page(page); + ret = VM_FAULT_NOPAGE; + goto out; } - ret = 0; - if (PageMappedToDisk(page)) - goto out_unlock; if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; else len = PAGE_CACHE_SIZE; - - lock_page(page); /* - * return if we have all the buffers mapped. This avoid + * Return if we have all the buffers mapped. This avoid * the need to call write_begin/write_end which does a * journal_start/journal_stop which can block and take * long time @@ -5829,30 +5842,42 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (page_has_buffers(page)) { if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, ext4_bh_unmapped)) { - unlock_page(page); - goto out_unlock; + ret = VM_FAULT_LOCKED; + goto out; } } unlock_page(page); - /* - * OK, we need to fill the hole... Do write_begin write_end - * to do block allocation/reservation.We are not holding - * inode.i__mutex here. That allow * parallel write_begin, - * write_end call. lock_page prevent this from happening - * on the same page though - */ - ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), - len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); - if (ret < 0) - goto out_unlock; - ret = mapping->a_ops->write_end(file, mapping, page_offset(page), - len, len, page, fsdata); - if (ret < 0) - goto out_unlock; - ret = 0; -out_unlock: - if (ret) + /* OK, we need to fill the hole... */ + if (ext4_should_dioread_nolock(inode)) + get_block = ext4_get_block_write; + else + get_block = ext4_get_block; +retry: + handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); + if (IS_ERR(handle)) { ret = VM_FAULT_SIGBUS; - up_read(&inode->i_alloc_sem); + goto out; + } + ret = __block_page_mkwrite(vma, vmf, get_block); + if (ret == VM_FAULT_LOCKED && ext4_should_journal_data(inode)) { + if (walk_page_buffers(handle, page_buffers(page), 0, + PAGE_CACHE_SIZE, NULL, ext4_journalled_fault_fn)) { + unlock_page(page); + ret = VM_FAULT_SIGBUS; + goto out; + } + ext4_set_inode_state(inode, EXT4_STATE_JDATA); + } + ext4_journal_end(handle); + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; +out_ret: + if (ret < 0) { + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else + ret = VM_FAULT_SIGBUS; + } +out: return ret; } -- 1.7.1