From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from aserp1040.oracle.com ([141.146.126.69]:38177 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752906AbcI1SsG (ORCPT ); Wed, 28 Sep 2016 14:48:06 -0400 Date: Wed, 28 Sep 2016 11:47:56 -0700 From: "Darrick J. Wong" Subject: Re: [PATCH 10/63] xfs: create refcount update intent log items Message-ID: <20160928184756.GS14092@birch.djwong.org> References: <147503120985.30303.14151302091684456858.stgit@birch.djwong.org> <147503127360.30303.13509008550712587655.stgit@birch.djwong.org> <20160928162017.GE8852@bfoster.bfoster> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20160928162017.GE8852@bfoster.bfoster> Sender: linux-xfs-owner@vger.kernel.org List-ID: List-Id: xfs To: Brian Foster Cc: david@fromorbit.com, linux-xfs@vger.kernel.org On Wed, Sep 28, 2016 at 12:20:18PM -0400, Brian Foster wrote: > On Tue, Sep 27, 2016 at 07:54:33PM -0700, Darrick J. Wong wrote: > > Create refcount update intent/done log items to record redo > > information in the log. Because we need to roll transactions between > > updating the bmbt mapping and updating the reverse mapping, we also > > have to track the status of the metadata updates that will be recorded > > in the post-roll transactions, just in case we crash before committing > > the final transaction. This mechanism enables log recovery to finish > > what was already started. > > > > Signed-off-by: Darrick J. Wong > > --- > > fs/xfs/Makefile | 1 > > fs/xfs/libxfs/xfs_log_format.h | 59 ++++++ > > fs/xfs/xfs_refcount_item.c | 406 ++++++++++++++++++++++++++++++++++++++++ > > fs/xfs/xfs_refcount_item.h | 102 ++++++++++ > > fs/xfs/xfs_super.c | 18 ++ > > 5 files changed, 584 insertions(+), 2 deletions(-) > > create mode 100644 fs/xfs/xfs_refcount_item.c > > create mode 100644 fs/xfs/xfs_refcount_item.h > > > > > ... > > diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c > > new file mode 100644 > > index 0000000..ac52b02 > > --- /dev/null > > +++ b/fs/xfs/xfs_refcount_item.c > > @@ -0,0 +1,406 @@ > ... > > +/* > > + * This is called to fill in the vector of log iovecs for the > > + * given cud log item. We use only 1 iovec, and we point that > > + * at the cud_log_format structure embedded in the cud item. > > + * It is at this point that we assert that all of the extent > > + * slots in the cud item have been filled. > > + */ > > +STATIC void > > +xfs_cud_item_format( > > + struct xfs_log_item *lip, > > + struct xfs_log_vec *lv) > > +{ > > + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); > > + struct xfs_log_iovec *vecp = NULL; > > + > > + cudp->cud_format.cud_type = XFS_LI_CUD; > > + cudp->cud_format.cud_size = 1; > > + > > + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, > > + sizeof(struct xfs_rud_log_format)); > > They're the same size, but: xfs_cud_log_format Yikes, good catch! --D > > Brian > > > +} > > + > > +/* > > + * Pinning has no meaning for an cud item, so just return. > > + */ > > +STATIC void > > +xfs_cud_item_pin( > > + struct xfs_log_item *lip) > > +{ > > +} > > + > > +/* > > + * Since pinning has no meaning for an cud item, unpinning does > > + * not either. > > + */ > > +STATIC void > > +xfs_cud_item_unpin( > > + struct xfs_log_item *lip, > > + int remove) > > +{ > > +} > > + > > +/* > > + * There isn't much you can do to push on an cud item. It is simply stuck > > + * waiting for the log to be flushed to disk. > > + */ > > +STATIC uint > > +xfs_cud_item_push( > > + struct xfs_log_item *lip, > > + struct list_head *buffer_list) > > +{ > > + return XFS_ITEM_PINNED; > > +} > > + > > +/* > > + * The CUD is either committed or aborted if the transaction is cancelled. If > > + * the transaction is cancelled, drop our reference to the CUI and free the > > + * CUD. > > + */ > > +STATIC void > > +xfs_cud_item_unlock( > > + struct xfs_log_item *lip) > > +{ > > + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); > > + > > + if (lip->li_flags & XFS_LI_ABORTED) { > > + xfs_cui_release(cudp->cud_cuip); > > + kmem_zone_free(xfs_cud_zone, cudp); > > + } > > +} > > + > > +/* > > + * When the cud item is committed to disk, all we need to do is delete our > > + * reference to our partner cui item and then free ourselves. Since we're > > + * freeing ourselves we must return -1 to keep the transaction code from > > + * further referencing this item. > > + */ > > +STATIC xfs_lsn_t > > +xfs_cud_item_committed( > > + struct xfs_log_item *lip, > > + xfs_lsn_t lsn) > > +{ > > + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); > > + > > + /* > > + * Drop the CUI reference regardless of whether the CUD has been > > + * aborted. Once the CUD transaction is constructed, it is the sole > > + * responsibility of the CUD to release the CUI (even if the CUI is > > + * aborted due to log I/O error). > > + */ > > + xfs_cui_release(cudp->cud_cuip); > > + kmem_zone_free(xfs_cud_zone, cudp); > > + > > + return (xfs_lsn_t)-1; > > +} > > + > > +/* > > + * The CUD dependency tracking op doesn't do squat. It can't because > > + * it doesn't know where the free extent is coming from. The dependency > > + * tracking has to be handled by the "enclosing" metadata object. For > > + * example, for inodes, the inode is locked throughout the extent freeing > > + * so the dependency should be recorded there. > > + */ > > +STATIC void > > +xfs_cud_item_committing( > > + struct xfs_log_item *lip, > > + xfs_lsn_t lsn) > > +{ > > +} > > + > > +/* > > + * This is the ops vector shared by all cud log items. > > + */ > > +static const struct xfs_item_ops xfs_cud_item_ops = { > > + .iop_size = xfs_cud_item_size, > > + .iop_format = xfs_cud_item_format, > > + .iop_pin = xfs_cud_item_pin, > > + .iop_unpin = xfs_cud_item_unpin, > > + .iop_unlock = xfs_cud_item_unlock, > > + .iop_committed = xfs_cud_item_committed, > > + .iop_push = xfs_cud_item_push, > > + .iop_committing = xfs_cud_item_committing, > > +}; > > + > > +/* > > + * Allocate and initialize an cud item with the given number of extents. > > + */ > > +struct xfs_cud_log_item * > > +xfs_cud_init( > > + struct xfs_mount *mp, > > + struct xfs_cui_log_item *cuip) > > + > > +{ > > + struct xfs_cud_log_item *cudp; > > + > > + cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); > > + xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); > > + cudp->cud_cuip = cuip; > > + cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; > > + > > + return cudp; > > +} > > diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h > > new file mode 100644 > > index 0000000..7b8f56b > > --- /dev/null > > +++ b/fs/xfs/xfs_refcount_item.h > > @@ -0,0 +1,102 @@ > > +/* > > + * Copyright (C) 2016 Oracle. All Rights Reserved. > > + * > > + * Author: Darrick J. Wong > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version 2 > > + * of the License, or (at your option) any later version. > > + * > > + * This program is distributed in the hope that it would be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; if not, write the Free Software Foundation, > > + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. > > + */ > > +#ifndef __XFS_REFCOUNT_ITEM_H__ > > +#define __XFS_REFCOUNT_ITEM_H__ > > + > > +/* > > + * There are (currently) two pairs of refcount btree redo item types: > > + * increase and decrease. The log items for these are CUI (refcount > > + * update intent) and CUD (refcount update done). The redo item type > > + * is encoded in the flags field of each xfs_map_extent. > > + * > > + * *I items should be recorded in the *first* of a series of rolled > > + * transactions, and the *D items should be recorded in the same > > + * transaction that records the associated refcountbt updates. > > + * > > + * Should the system crash after the commit of the first transaction > > + * but before the commit of the final transaction in a series, log > > + * recovery will use the redo information recorded by the intent items > > + * to replay the refcountbt metadata updates. > > + */ > > + > > +/* kernel only CUI/CUD definitions */ > > + > > +struct xfs_mount; > > +struct kmem_zone; > > + > > +/* > > + * Max number of extents in fast allocation path. > > + */ > > +#define XFS_CUI_MAX_FAST_EXTENTS 16 > > + > > +/* > > + * Define CUI flag bits. Manipulated by set/clear/test_bit operators. > > + */ > > +#define XFS_CUI_RECOVERED 1 > > + > > +/* > > + * This is the "refcount update intent" log item. It is used to log > > + * the fact that some reverse mappings need to change. It is used in > > + * conjunction with the "refcount update done" log item described > > + * below. > > + * > > + * These log items follow the same rules as struct xfs_efi_log_item; > > + * see the comments about that structure (in xfs_extfree_item.h) for > > + * more details. > > + */ > > +struct xfs_cui_log_item { > > + struct xfs_log_item cui_item; > > + atomic_t cui_refcount; > > + atomic_t cui_next_extent; > > + unsigned long cui_flags; /* misc flags */ > > + struct xfs_cui_log_format cui_format; > > +}; > > + > > +static inline size_t > > +xfs_cui_log_item_sizeof( > > + unsigned int nr) > > +{ > > + return offsetof(struct xfs_cui_log_item, cui_format) + > > + xfs_cui_log_format_sizeof(nr); > > +} > > + > > +/* > > + * This is the "refcount update done" log item. It is used to log the > > + * fact that some refcountbt updates mentioned in an earlier cui item > > + * have been performed. > > + */ > > +struct xfs_cud_log_item { > > + struct xfs_log_item cud_item; > > + struct xfs_cui_log_item *cud_cuip; > > + struct xfs_cud_log_format cud_format; > > +}; > > + > > +extern struct kmem_zone *xfs_cui_zone; > > +extern struct kmem_zone *xfs_cud_zone; > > + > > +struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); > > +struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, > > + struct xfs_cui_log_item *); > > +int xfs_cui_copy_format(struct xfs_log_iovec *buf, > > + struct xfs_cui_log_format *dst_cui_fmt); > > +void xfs_cui_item_free(struct xfs_cui_log_item *); > > +void xfs_cui_release(struct xfs_cui_log_item *); > > + > > +#endif /* __XFS_REFCOUNT_ITEM_H__ */ > > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > > index 2d092f9..abe69c6 100644 > > --- a/fs/xfs/xfs_super.c > > +++ b/fs/xfs/xfs_super.c > > @@ -47,6 +47,7 @@ > > #include "xfs_sysfs.h" > > #include "xfs_ondisk.h" > > #include "xfs_rmap_item.h" > > +#include "xfs_refcount_item.h" > > > > #include > > #include > > @@ -1788,8 +1789,23 @@ xfs_init_zones(void) > > if (!xfs_rui_zone) > > goto out_destroy_rud_zone; > > > > + xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item), > > + "xfs_cud_item"); > > + if (!xfs_cud_zone) > > + goto out_destroy_rui_zone; > > + > > + xfs_cui_zone = kmem_zone_init( > > + xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), > > + "xfs_cui_item"); > > + if (!xfs_cui_zone) > > + goto out_destroy_cud_zone; > > + > > return 0; > > > > + out_destroy_cud_zone: > > + kmem_zone_destroy(xfs_cud_zone); > > + out_destroy_rui_zone: > > + kmem_zone_destroy(xfs_rui_zone); > > out_destroy_rud_zone: > > kmem_zone_destroy(xfs_rud_zone); > > out_destroy_icreate_zone: > > @@ -1832,6 +1848,8 @@ xfs_destroy_zones(void) > > * destroy caches. > > */ > > rcu_barrier(); > > + kmem_zone_destroy(xfs_cui_zone); > > + kmem_zone_destroy(xfs_cud_zone); > > kmem_zone_destroy(xfs_rui_zone); > > kmem_zone_destroy(xfs_rud_zone); > > kmem_zone_destroy(xfs_icreate_zone); > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html