diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index bcbb66150838..02a80984aa05 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -254,6 +254,20 @@ xfs_buf_item_format( vecp++; nvecs = 1; + /* + * If it is an inode buffer, transfer the in-memory state to the + * format flags and clear the in-memory state. We do not transfer + * this state if the inode buffer allocation has not yet been committed + * to the log as setting the XFS_BLI_INODE_BUF flag will prevent + * correct replay of the inode allocation. + */ + if (bip->bli_flags & XFS_BLI_INODE_BUF) { + if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && + xfs_log_item_in_current_chkpt(&bip->bli_item))) + bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; + bip->bli_flags &= ~XFS_BLI_INODE_BUF; + } + if (bip->bli_flags & XFS_BLI_STALE) { /* * The buffer is stale, so all we need to log diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 8cbb82b1d95c..f20bb472d582 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format { #define XFS_BLI_LOGGED 0x08 #define XFS_BLI_INODE_ALLOC_BUF 0x10 #define XFS_BLI_STALE_INODE 0x20 +#define XFS_BLI_INODE_BUF 0x40 #define XFS_BLI_FLAGS \ { XFS_BLI_HOLD, "HOLD" }, \ @@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format { { XFS_BLI_STALE, "STALE" }, \ { XFS_BLI_LOGGED, "LOGGED" }, \ { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ - { XFS_BLI_STALE_INODE, "STALE_INODE" } + { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ + { XFS_BLI_INODE_BUF, "INODE_BUF" } #ifdef __KERNEL__ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 4a0c57432e8f..04c78e642cc8 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -198,6 +198,7 @@ xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_log_vec *log_vector, xfs_lsn_t *commit_lsn, int flags); +bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); #endif diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9b21f80f31ce..bb17cc044bf3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -199,6 +199,15 @@ xlog_cil_insert( list_move_tail(&item->li_cil, &cil->xc_cil); ctx->nvecs += diff_iovecs; + /* + * If this is the first time the item is being committed to the CIL, + * store the sequence number on the log item so we can tell + * in future commits whether this is the first checkpoint the item is + * being committed into. + */ + if (!item->li_seq) + item->li_seq = ctx->sequence; + /* * Now transfer enough transaction reservation to the context ticket * for the checkpoint. The context ticket is special - the unit @@ -325,6 +334,10 @@ xlog_cil_free_logvec( * For more specific information about the order of operations in * xfs_log_commit_cil() please refer to the comments in * xfs_trans_commit_iclog(). + * + * Called with the context lock already held in read mode to lock out + * background commit, returns without it held once background commits are + * allowed again. */ int xfs_log_commit_cil( @@ -678,3 +691,35 @@ restart: spin_unlock(&cil->xc_cil_lock); return commit_lsn; } + +/* + * Check if the current log item was first committed in this sequence. + * We can't rely on just the log item being in the CIL, we have to check + * the recorded commit sequence number. + * + * Note: for this to be used in a non-racy manner, it has to be called with + * CIL flushing locked out. As a result, it should only be used during the + * transaction commit process when deciding what to format into the item. + */ +bool +xfs_log_item_in_current_chkpt( + struct xfs_log_item *lip) +{ + struct xfs_cil_ctx *ctx; + + if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG)) + return false; + if (list_empty(&lip->li_cil)) + return false; + + ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; + + /* + * li_seq is written on the first commit of a log item to record the + * first checkpoint it is written to. Hence if it is different to the + * current sequence, we're in a new checkpoint. + */ + if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) + return false; + return true; +} diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index b1ea20c66b3e..8c69e7824f68 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -835,6 +835,7 @@ typedef struct xfs_log_item { /* delayed logging */ struct list_head li_cil; /* CIL pointers */ struct xfs_log_vec *li_lv; /* active log vector */ + xfs_lsn_t li_seq; /* CIL commit seq */ } xfs_log_item_t; #define XFS_LI_IN_AIL 0x1 diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3390c3e7441b..63d81a22f4fd 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -792,7 +792,7 @@ xfs_trans_binval( XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_STALE(bp); bip->bli_flags |= XFS_BLI_STALE; - bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); + bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; bip->bli_format.blf_flags |= XFS_BLF_CANCEL; memset((char *)(bip->bli_format.blf_data_map), 0, @@ -802,16 +802,16 @@ xfs_trans_binval( } /* - * This call is used to indicate that the buffer contains on-disk - * inodes which must be handled specially during recovery. They - * require special handling because only the di_next_unlinked from - * the inodes in the buffer should be recovered. The rest of the - * data in the buffer is logged via the inodes themselves. + * This call is used to indicate that the buffer contains on-disk inodes which + * must be handled specially during recovery. They require special handling + * because only the di_next_unlinked from the inodes in the buffer should be + * recovered. The rest of the data in the buffer is logged via the inodes + * themselves. * - * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log - * format structure so that we'll know what to do at recovery time. + * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be + * transferred to the buffer's log format structure so that we'll know what to + * do at recovery time. */ -/* ARGSUSED */ void xfs_trans_inode_buf( xfs_trans_t *tp, @@ -826,7 +826,7 @@ xfs_trans_inode_buf( bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; + bip->bli_flags |= XFS_BLI_INODE_BUF; } /*