From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com ([209.132.183.28]:50556 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727730AbeIMVCj (ORCPT ); Thu, 13 Sep 2018 17:02:39 -0400 Subject: [PATCH 07/10] afs: Use ITER_MAPPING for writing From: David Howells To: viro@zeniv.linux.org.uk Cc: dhowells@redhat.com, linux-fsdevel@vger.kernel.org, linux-afs@lists.infradead.org, linux-kernel@vger.kernel.org Date: Thu, 13 Sep 2018 16:52:32 +0100 Message-ID: <153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk> In-Reply-To: <153685389564.14766.11306559824641824935.stgit@warthog.procyon.org.uk> References: <153685389564.14766.11306559824641824935.stgit@warthog.procyon.org.uk> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: linux-fsdevel-owner@vger.kernel.org List-ID: Use a single ITER_MAPPING iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells --- fs/afs/file.c | 4 -- fs/afs/fsclient.c | 40 +++++------------- fs/afs/internal.h | 15 ++----- fs/afs/rxrpc.c | 99 +++++++------------------------------------- fs/afs/write.c | 84 +++++++++++++++++++++---------------- include/trace/events/afs.h | 51 ++++++++--------------- 6 files changed, 100 insertions(+), 193 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index e887a9b24f4f..36ba263db749 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -195,11 +195,9 @@ static void afs_readpages_page_done(const struct iov_iter *iter, struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); struct afs_read *req = container_of(iter, struct afs_read, iter); - SetPageUptodate(page); - if (0 && afs_vnode_cache(vnode)) SetPageFsCache(page); - unlock_page(page); + page_endio(page, false, 0); put_page(page); req->done_pages++; } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index f0cef8e7b1af..971378801489 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1230,10 +1230,7 @@ static const struct afs_call_type afs_RXFSStoreData64 = { /* * store a set of pages to a very large file */ -static int afs_fs_store_data64(struct afs_fs_cursor *fc, - struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, +static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct iov_iter *iter, loff_t size, loff_t pos, loff_t i_size) { struct afs_vnode *vnode = fc->vnode; @@ -1251,13 +1248,10 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->mapping = mapping; + call->write_iter = iter; + call->write_first = pos >> PAGE_SHIFT; + call->write_last = (pos + size - 1) >> PAGE_SHIFT; call->reply[0] = vnode; - call->first = first; - call->last = last; - call->first_offset = offset; - call->last_to = to; - call->send_pages = true; call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ @@ -1286,26 +1280,20 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, } /* - * store a set of pages + * Write data to a file on the server. */ -int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) +int afs_fs_store_data(struct afs_fs_cursor *fc, struct iov_iter *iter, loff_t pos) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); - loff_t size, pos, i_size; + loff_t size, i_size; __be32 *bp; _enter(",%x,{%x:%u},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - size = (loff_t)to - (loff_t)offset; - if (first != last) - size += (loff_t)(last - first) << PAGE_SHIFT; - pos = (loff_t)first << PAGE_SHIFT; - pos += offset; + size = iov_iter_count(iter); i_size = i_size_read(&vnode->vfs_inode); if (pos + size > i_size) @@ -1316,8 +1304,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, (unsigned long long) i_size); if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) - return afs_fs_store_data64(fc, mapping, first, last, offset, to, - size, pos, i_size); + return afs_fs_store_data64(fc, iter, size, pos, i_size); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, @@ -1326,13 +1313,10 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, return -ENOMEM; call->key = fc->key; - call->mapping = mapping; + call->write_iter = iter; + call->write_first = pos >> PAGE_SHIFT; + call->write_last = (pos + size - 1) >> PAGE_SHIFT; call->reply[0] = vnode; - call->first = first; - call->last = last; - call->first_offset = offset; - call->last_to = to; - call->send_pages = true; call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 997ab8350dfe..7c7598856b96 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -98,14 +98,15 @@ struct afs_call { struct address_space *mapping; /* Pages being written from */ struct iov_iter iter; /* Buffer iterator */ struct iov_iter *_iter; /* Iterator currently in use */ + struct iov_iter *write_iter; /* Iterator defining write to be made */ union { /* Convenience for ->iter */ struct kvec kvec[1]; struct bio_vec bvec[1]; }; void *buffer; /* reply receive buffer */ void *reply[4]; /* Where to put the reply */ - pgoff_t first; /* first page in mapping to deal with */ - pgoff_t last; /* last page in mapping to deal with */ + pgoff_t write_first; /* First page of write */ + pgoff_t write_last; /* Last page of write */ atomic_t usage; enum afs_call_state state; spinlock_t state_lock; @@ -113,15 +114,10 @@ struct afs_call { u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ - unsigned first_offset; /* offset into mapping[first] */ unsigned int cb_break; /* cb_break + cb_s_break before the call */ - union { - unsigned last_to; /* amount of mapping[last] */ - unsigned count2; /* count used in unmarshalling */ - }; + unsigned count2; /* count used in unmarshalling */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ - bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ bool ret_reply0; /* T if should return reply[0] on success */ @@ -799,8 +795,7 @@ extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u6 struct afs_fid *, struct afs_file_status *); extern int afs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *, u64, u64); -extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, - pgoff_t, pgoff_t, unsigned, unsigned); +extern int afs_fs_store_data(struct afs_fs_cursor *, struct iov_iter *, loff_t); extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 966e30f30cbb..6138c12c74a3 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -258,39 +258,6 @@ void afs_flat_call_destructor(struct afs_call *call) call->buffer = NULL; } -#define AFS_BVEC_MAX 8 - -/* - * Load the given bvec with the next few pages. - */ -static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, - struct bio_vec *bv, pgoff_t first, pgoff_t last, - unsigned offset) -{ - struct page *pages[AFS_BVEC_MAX]; - unsigned int nr, n, i, to, bytes = 0; - - nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX); - n = find_get_pages_contig(call->mapping, first, nr, pages); - ASSERTCMP(n, ==, nr); - - msg->msg_flags |= MSG_MORE; - for (i = 0; i < nr; i++) { - to = PAGE_SIZE; - if (first + i >= last) { - to = call->last_to; - msg->msg_flags &= ~MSG_MORE; - } - bv[i].bv_page = pages[i]; - bv[i].bv_len = to - offset; - bv[i].bv_offset = offset; - bytes += to - offset; - offset = 0; - } - - iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes); -} - /* * Advance the AFS call state when the RxRPC call ends the transmit phase. */ @@ -303,41 +270,6 @@ static void afs_notify_end_request_tx(struct sock *sock, afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY); } -/* - * attach the data from a bunch of pages on an inode to a call - */ -static int afs_send_pages(struct afs_call *call, struct msghdr *msg) -{ - struct bio_vec bv[AFS_BVEC_MAX]; - unsigned int bytes, nr, loop, offset; - pgoff_t first = call->first, last = call->last; - int ret; - - offset = call->first_offset; - call->first_offset = 0; - - do { - afs_load_bvec(call, msg, bv, first, last, offset); - trace_afs_send_pages(call, msg, first, last, offset); - - offset = 0; - bytes = msg->msg_iter.count; - nr = msg->msg_iter.nr_segs; - - ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg, - bytes, afs_notify_end_request_tx); - for (loop = 0; loop < nr; loop++) - put_page(bv[loop].bv_page); - if (ret < 0) - break; - - first += nr; - } while (first <= last); - - trace_afs_sent_pages(call, call->first, last, first, ret); - return ret; -} - /* * initiate a call */ @@ -367,19 +299,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, * after the initial fixed part. */ tx_total_len = call->request_size; - if (call->send_pages) { - if (call->last == call->first) { - tx_total_len += call->last_to - call->first_offset; - } else { - /* It looks mathematically like you should be able to - * combine the following lines with the ones above, but - * unsigned arithmetic is fun when it wraps... - */ - tx_total_len += PAGE_SIZE - call->first_offset; - tx_total_len += call->last_to; - tx_total_len += (call->last - call->first - 1) * PAGE_SIZE; - } - } + if (call->write_iter) + tx_total_len += iov_iter_count(call->write_iter); /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, @@ -406,7 +327,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); + msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0); ret = rxrpc_kernel_send_data(call->net->socket, rxcall, &msg, call->request_size, @@ -414,8 +335,18 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, if (ret < 0) goto error_do_abort; - if (call->send_pages) { - ret = afs_send_pages(call, &msg); + if (call->write_iter) { + msg.msg_iter = *call->write_iter; + msg.msg_flags &= ~MSG_MORE; + trace_afs_send_data(call, &msg); + + ret = rxrpc_kernel_send_data(call->net->socket, + call->rxcall, &msg, + iov_iter_count(&msg.msg_iter), + afs_notify_end_request_tx); + *call->write_iter = msg.msg_iter; + + trace_afs_sent_data(call, &msg, ret); if (ret < 0) goto error_do_abort; } diff --git a/fs/afs/write.c b/fs/afs/write.c index d07e7f29f50a..9f035386b9c7 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -302,22 +302,21 @@ static void afs_redirty_pages(struct writeback_control *wbc, /* * write to a file */ -static int afs_store_data(struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) +static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, + loff_t pos) { - struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_fs_cursor fc; struct afs_wb_key *wbk = NULL; struct list_head *p; + loff_t count = iov_iter_count(iter); int ret = -ENOKEY, ret2; - _enter("%s{%x:%u.%u},%lx,%lx,%x,%x", + _enter("%s{%x:%u.%u},%llx,%llx", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - first, last, offset, to); + count, pos); spin_lock(&vnode->wb_lock); p = vnode->wb_keys.next; @@ -350,7 +349,7 @@ static int afs_store_data(struct address_space *mapping, if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_store_data(&fc, mapping, first, last, offset, to); + afs_fs_store_data(&fc, iter, pos); } afs_check_for_remote_deletion(&fc, fc.vnode); @@ -361,9 +360,7 @@ static int afs_store_data(struct address_space *mapping, switch (ret) { case 0: afs_stat_v(vnode, n_stores); - atomic_long_add((last * PAGE_SIZE + to) - - (first * PAGE_SIZE + offset), - &afs_v2net(vnode)->n_store_bytes); + atomic_long_add(count, &afs_v2net(vnode)->n_store_bytes); break; case -EACCES: case -EPERM: @@ -393,10 +390,12 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, pgoff_t final_page) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct iov_iter iter; struct page *pages[8], *page; unsigned long count, priv; unsigned n, offset, to, f, t; pgoff_t start, first, last; + loff_t a, b; int loop, ret; _enter(",%lx", primary_page->index); @@ -496,10 +495,17 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, first = primary_page->index; last = first + count - 1; - _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); - ret = afs_store_data(mapping, first, last, offset, to); + a = first; + a <<= PAGE_SHIFT; + a += offset; + b = last; + b <<= PAGE_SHIFT; + b += to; + iov_iter_mapping(&iter, WRITE, mapping, a, b - a); + + ret = afs_store_data(vnode, &iter, a); switch (ret) { case 0: ret = count; @@ -668,36 +674,35 @@ int afs_writepages(struct address_space *mapping, */ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) { - struct pagevec pv; + struct radix_tree_iter iter; + struct address_space *mapping = vnode->vfs_inode.i_mapping; unsigned long priv; - unsigned count, loop; - pgoff_t first = call->first, last = call->last; + pgoff_t cursor = call->write_first, last = call->write_last; + void __rcu **slot; _enter("{%x:%u},{%lx-%lx}", - vnode->fid.vid, vnode->fid.vnode, first, last); + vnode->fid.vid, vnode->fid.vnode, cursor, last); - pagevec_init(&pv); + rcu_read_lock(); - do { - _debug("done %lx-%lx", first, last); + radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, cursor) { + struct page *page = radix_tree_deref_slot(slot); - count = last - first + 1; - if (count > PAGEVEC_SIZE) - count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, - first, count, pv.pages); - ASSERTCMP(pv.nr, ==, count); + ASSERT(page); + ASSERT(PageWriteback(page)); - for (loop = 0; loop < count; loop++) { - priv = page_private(pv.pages[loop]); - trace_afs_page_dirty(vnode, tracepoint_string("clear"), - pv.pages[loop]->index, priv); - set_page_private(pv.pages[loop], 0); - end_page_writeback(pv.pages[loop]); - } - first += count; - __pagevec_release(&pv); - } while (first <= last); + priv = page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("clear"), + page->index, priv); + set_page_private(page, 0); + page_endio(page, true, 0); + + if (cursor >= last) + break; + cursor++; + } + + rcu_read_unlock(); afs_prune_wb_keys(vnode); _leave(""); @@ -829,6 +834,8 @@ int afs_launder_page(struct page *page) { struct address_space *mapping = page->mapping; struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct iov_iter iter; + struct bio_vec bv[1]; unsigned long priv; unsigned int f, t; int ret = 0; @@ -844,9 +851,14 @@ int afs_launder_page(struct page *page) t = priv >> AFS_PRIV_SHIFT; } + bv[0].bv_page = page; + bv[0].bv_offset = f; + bv[0].bv_len = t - f; + iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len); + trace_afs_page_dirty(vnode, tracepoint_string("launder"), page->index, priv); - ret = afs_store_data(mapping, page->index, page->index, t, f); + ret = afs_store_data(vnode, &iter, (loff_t)page->index << PAGE_SHIFT); } trace_afs_page_dirty(vnode, tracepoint_string("laundered"), diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5e0f8dcede26..9c44245987b3 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -392,65 +392,52 @@ TRACE_EVENT(afs_call_done, __entry->rx_call) ); -TRACE_EVENT(afs_send_pages, - TP_PROTO(struct afs_call *call, struct msghdr *msg, - pgoff_t first, pgoff_t last, unsigned int offset), +TRACE_EVENT(afs_send_data, + TP_PROTO(struct afs_call *call, struct msghdr *msg), - TP_ARGS(call, msg, first, last, offset), + TP_ARGS(call, msg), TP_STRUCT__entry( __field(unsigned int, call ) - __field(pgoff_t, first ) - __field(pgoff_t, last ) - __field(unsigned int, nr ) - __field(unsigned int, bytes ) - __field(unsigned int, offset ) __field(unsigned int, flags ) + __field(loff_t, offset ) + __field(loff_t, count ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->first = first; - __entry->last = last; - __entry->nr = msg->msg_iter.nr_segs; - __entry->bytes = msg->msg_iter.count; - __entry->offset = offset; __entry->flags = msg->msg_flags; + __entry->offset = msg->msg_iter.iov_offset; + __entry->count = iov_iter_count(&msg->msg_iter); ), - TP_printk(" c=%08x %lx-%lx-%lx b=%x o=%x f=%x", - __entry->call, - __entry->first, __entry->first + __entry->nr - 1, __entry->last, - __entry->bytes, __entry->offset, + TP_printk(" c=%08x o=%llx c=%llx f=%x", + __entry->call, __entry->offset, __entry->count, __entry->flags) ); -TRACE_EVENT(afs_sent_pages, - TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last, - pgoff_t cursor, int ret), +TRACE_EVENT(afs_sent_data, + TP_PROTO(struct afs_call *call, struct msghdr *msg, int ret), - TP_ARGS(call, first, last, cursor, ret), + TP_ARGS(call, msg, ret), TP_STRUCT__entry( __field(unsigned int, call ) - __field(pgoff_t, first ) - __field(pgoff_t, last ) - __field(pgoff_t, cursor ) __field(int, ret ) + __field(loff_t, offset ) + __field(loff_t, count ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->first = first; - __entry->last = last; - __entry->cursor = cursor; __entry->ret = ret; + __entry->offset = msg->msg_iter.iov_offset; + __entry->count = iov_iter_count(&msg->msg_iter); ), - TP_printk(" c=%08x %lx-%lx c=%lx r=%d", - __entry->call, - __entry->first, __entry->last, - __entry->cursor, __entry->ret) + TP_printk(" c=%08x o=%llx c=%llx r=%d", + __entry->call, __entry->offset, __entry->count, + __entry->ret) ); TRACE_EVENT(afs_dir_check_failed,