From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel@redhat.com, Christoph Hellwig <hch@lst.de>
Cc: "Bob Peterson" <rpeterso@redhat.com>, "Jan Kara" <jack@suse.cz>,
"Dave Chinner" <david@fromorbit.com>,
"Ross Lagerwall" <ross.lagerwall@citrix.com>,
"Mark Syms" <Mark.Syms@citrix.com>,
"Edwin Török" <edvin.torok@citrix.com>,
linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
"Andreas Gruenbacher" <agruenba@redhat.com>
Subject: [PATCH v5 3/3] gfs2: Fix iomap write page reclaim deadlock
Date: Fri, 26 Apr 2019 15:11:27 +0200 [thread overview]
Message-ID: <20190426131127.19164-3-agruenba@redhat.com> (raw)
In-Reply-To: <20190426131127.19164-1-agruenba@redhat.com>
Since commit 64bc06bb32ee ("gfs2: iomap buffered write support"), gfs2 is doing
buffered writes by starting a transaction in iomap_begin, writing a range of
pages, and ending that transaction in iomap_end. This approach suffers from
two problems:
(1) Any allocations necessary for the write are done in iomap_begin, so when
the data aren't journaled, there is no need for keeping the transaction open
until iomap_end.
(2) Transactions keep the gfs2 log flush lock held. When
iomap_file_buffered_write calls balance_dirty_pages, this can end up calling
gfs2_write_inode, which will try to flush the log. This requires taking the
log flush lock which is already held, resulting in a deadlock.
Fix both of these issues by not keeping transactions open from iomap_begin to
iomap_end. Instead, start a small transaction in page_prepare and end it in
page_done when necessary.
Reported-by: Edwin Török <edvin.torok@citrix.com>
Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support")
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/aops.c | 14 ++++++---
fs/gfs2/bmap.c | 83 +++++++++++++++++++++++++-------------------------
2 files changed, 52 insertions(+), 45 deletions(-)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 05dd78f4b2b3..6210d4429d84 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -649,7 +649,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
*/
void adjust_fs_space(struct inode *inode)
{
- struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
@@ -657,10 +657,13 @@ void adjust_fs_space(struct inode *inode)
struct buffer_head *m_bh, *l_bh;
u64 fs_total, new_free;
+ if (gfs2_trans_begin(sdp, 2 * RES_STATFS, 0) != 0)
+ return;
+
/* Total up the file system space, according to the latest rindex. */
fs_total = gfs2_ri_total(sdp);
if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
- return;
+ goto out;
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
@@ -675,11 +678,14 @@ void adjust_fs_space(struct inode *inode)
gfs2_statfs_change(sdp, new_free, new_free, 0);
if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
- goto out;
+ goto out2;
update_statfs(sdp, m_bh, l_bh);
brelse(l_bh);
-out:
+out2:
brelse(m_bh);
+out:
+ sdp->sd_rindex_uptodate = 0;
+ gfs2_trans_end(sdp);
}
/**
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6b980703bae7..27c82f4aaf32 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -994,7 +994,9 @@ static void gfs2_write_unlock(struct inode *inode)
static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
unsigned len, struct iomap *iomap)
{
- return 0;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+ return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0);
}
static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
@@ -1002,9 +1004,11 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
struct iomap *iomap)
{
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
- if (page)
+ if (page && !gfs2_is_stuffed(ip))
gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+ gfs2_trans_end(sdp);
}
static const struct iomap_page_ops gfs2_iomap_page_ops = {
@@ -1064,31 +1068,45 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
if (alloc_required)
rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
- ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
- if (ret)
- goto out_trans_fail;
+ if (unstuff || iomap->type == IOMAP_HOLE) {
+ struct gfs2_trans *tr;
- if (unstuff) {
- ret = gfs2_unstuff_dinode(ip, NULL);
- if (ret)
- goto out_trans_end;
- release_metapath(mp);
- ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
- flags, iomap, mp);
+ ret = gfs2_trans_begin(sdp, rblocks,
+ iomap->length >> inode->i_blkbits);
if (ret)
- goto out_trans_end;
- }
+ goto out_trans_fail;
- if (iomap->type == IOMAP_HOLE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
- if (ret) {
- gfs2_trans_end(sdp);
- gfs2_inplace_release(ip);
- punch_hole(ip, iomap->offset, iomap->length);
- goto out_qunlock;
+ if (unstuff) {
+ ret = gfs2_unstuff_dinode(ip, NULL);
+ if (ret)
+ goto out_trans_end;
+ release_metapath(mp);
+ ret = gfs2_iomap_get(inode, iomap->offset,
+ iomap->length, flags, iomap, mp);
+ if (ret)
+ goto out_trans_end;
}
+
+ if (iomap->type == IOMAP_HOLE) {
+ ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
+ if (ret) {
+ gfs2_trans_end(sdp);
+ gfs2_inplace_release(ip);
+ punch_hole(ip, iomap->offset, iomap->length);
+ goto out_qunlock;
+ }
+ }
+
+ tr = current->journal_info;
+ if (tr->tr_num_buf_new)
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ else
+ gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[0]);
+
+ gfs2_trans_end(sdp);
}
- if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
+
+ if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
iomap->page_ops = &gfs2_iomap_page_ops;
return 0;
@@ -1128,10 +1146,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
iomap->type != IOMAP_MAPPED)
ret = -ENOTBLK;
}
- if (!ret) {
- get_bh(mp.mp_bh[0]);
- iomap->private = mp.mp_bh[0];
- }
release_metapath(&mp);
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
@@ -1142,27 +1156,16 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_trans *tr = current->journal_info;
- struct buffer_head *dibh = iomap->private;
if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
goto out;
- if (iomap->type != IOMAP_INLINE) {
+ if (!gfs2_is_stuffed(ip))
gfs2_ordered_add_inode(ip);
- if (tr->tr_num_buf_new)
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- else
- gfs2_trans_add_meta(ip->i_gl, dibh);
- }
-
- if (inode == sdp->sd_rindex) {
+ if (inode == sdp->sd_rindex)
adjust_fs_space(inode);
- sdp->sd_rindex_uptodate = 0;
- }
- gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
if (length != written && (iomap->flags & IOMAP_F_NEW)) {
@@ -1182,8 +1185,6 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
gfs2_write_unlock(inode);
out:
- if (dibh)
- brelse(dibh);
return 0;
}
--
2.20.1
next prev parent reply other threads:[~2019-04-26 13:11 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-26 13:11 [PATCH v5 1/3] iomap: Fix use-after-free error in page_done callback Andreas Gruenbacher
2019-04-26 13:11 ` [PATCH v5 2/3] iomap: Add a page_prepare callback Andreas Gruenbacher
2019-04-26 14:22 ` Christoph Hellwig
2019-04-26 13:11 ` Andreas Gruenbacher [this message]
2019-04-27 6:17 ` [PATCH v5 1/3] iomap: Fix use-after-free error in page_done callback Christoph Hellwig
2019-04-28 19:20 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190426131127.19164-3-agruenba@redhat.com \
--to=agruenba@redhat.com \
--cc=Mark.Syms@citrix.com \
--cc=cluster-devel@redhat.com \
--cc=david@fromorbit.com \
--cc=edvin.torok@citrix.com \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ross.lagerwall@citrix.com \
--cc=rpeterso@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).