Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix kconfig typo and extra whitespace
  ext4: fix build failure without procfs
  ext4: add an option to control error handling on file data
  jbd2: don't dirty original metadata buffer on abort
  ext4: add checks for errors from jbd2
  jbd2: fix error handling for checkpoint io
  jbd2: abort when failed to log metadata buffers
This commit is contained in:
Linus Torvalds 2008-10-12 16:10:29 -07:00
commit 3280fb3139
10 changed files with 131 additions and 32 deletions

View file

@ -223,6 +223,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
errors=continue Keep going on a filesystem error.
errors=panic Panic and halt the machine if an error occurs.
data_err=ignore(*) Just print an error message if an error occurs
in a file data buffer in ordered mode.
data_err=abort Abort the journal if an error occurs in a file
data buffer in ordered mode.
grpid Give objects the same group ID as their creator.
bsdgroups

View file

@ -170,8 +170,8 @@ config EXT4DEV_COMPAT
help
Starting with 2.6.28, the name of the ext4 filesystem was
renamed from ext4dev to ext4. Unfortunately there are some
lagecy userspace programs (such as klibc's fstype) have
"ext4dev" hardcoded.
legacy userspace programs (such as klibc's fstype) have
"ext4dev" hardcoded.
To enable backwards compatibility so that systems that are
still expecting to mount ext4 filesystems using ext4dev,

View file

@ -540,6 +540,8 @@ do { \
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt

View file

@ -192,7 +192,7 @@ setversion_out:
case EXT4_IOC_GROUP_EXTEND: {
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
int err;
int err, err2;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@ -206,8 +206,10 @@ setversion_out:
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);
return err;
@ -215,7 +217,7 @@ setversion_out:
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
int err;
int err, err2;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@ -230,8 +232,10 @@ setversion_out:
err = ext4_group_add(sb, &input);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);
return err;

View file

@ -507,7 +507,8 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
jbd2_journal_destroy(sbi->s_journal);
if (jbd2_journal_destroy(sbi->s_journal) < 0)
ext4_abort(sb, __func__, "Couldn't clean up the journal");
sbi->s_journal = NULL;
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@ -777,6 +778,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_printf(seq, ",inode_readahead_blks=%u",
sbi->s_inode_readahead_blks);
if (test_opt(sb, DATA_ERR_ABORT))
seq_puts(seq, ",data_err=abort");
ext4_show_quota_options(seq, sb);
return 0;
}
@ -906,6 +910,7 @@ enum {
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@ -952,6 +957,8 @@ static match_table_t tokens = {
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_err_abort, "data_err=abort"},
{Opt_data_err_ignore, "data_err=ignore"},
{Opt_offusrjquota, "usrjquota="},
{Opt_usrjquota, "usrjquota=%s"},
{Opt_offgrpjquota, "grpjquota="},
@ -1186,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb,
sbi->s_mount_opt |= data_opt;
}
break;
case Opt_data_err_abort:
set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
break;
case Opt_data_err_ignore:
clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
qtype = USRQUOTA;
@ -2218,6 +2231,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
#ifdef CONFIG_PROC_FS
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
@ -2225,6 +2239,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
&ext4_ui_proc_fops,
&sbi->s_inode_readahead_blks);
#endif
bgl_lock_init(&sbi->s_blockgroup_lock);
@ -2534,6 +2549,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JBD2_BARRIER;
if (test_opt(sb, DATA_ERR_ABORT))
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock);
}
@ -2853,7 +2872,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
journal_t *journal = EXT4_SB(sb)->s_journal;
jbd2_journal_lock_updates(journal);
jbd2_journal_flush(journal);
if (jbd2_journal_flush(journal) < 0)
goto out;
lock_super(sb);
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
sb->s_flags & MS_RDONLY) {
@ -2862,6 +2883,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
ext4_commit_super(sb, es, 1);
}
unlock_super(sb);
out:
jbd2_journal_unlock_updates(journal);
}
@ -2962,7 +2985,13 @@ static void ext4_write_super_lockfs(struct super_block *sb)
/* Now we set up the journal barrier. */
jbd2_journal_lock_updates(journal);
jbd2_journal_flush(journal);
/*
* We don't want to clear needs_recovery flag when we failed
* to flush the journal.
*/
if (jbd2_journal_flush(journal) < 0)
return;
/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@ -3402,8 +3431,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* otherwise be livelocked...
*/
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err) {
path_put(&nd.path);
return err;
}
}
err = vfs_quota_on_path(sb, type, format_id, &nd.path);

View file

@ -94,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
int ret = 0;
struct buffer_head *bh = jh2bh(jh);
if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
@ -176,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
* buffers. Note that we take the buffers in the opposite ordering
* from the one in which they were submitted for IO.
*
* Return 0 on success, and return <0 if some buffers have failed
* to be written out.
*
* Called with j_list_lock held.
*/
static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
{
struct journal_head *jh;
struct buffer_head *bh;
tid_t this_tid;
int released = 0;
int ret = 0;
this_tid = transaction->t_tid;
restart:
/* Did somebody clean up the transaction in the meanwhile? */
if (journal->j_checkpoint_transactions != transaction ||
transaction->t_tid != this_tid)
return;
return ret;
while (!released && transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
@ -210,6 +215,9 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
/*
* Now in whatever state the buffer currently is, we know that
* it has been written out and so we can drop it from the list
@ -219,6 +227,8 @@ restart:
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
}
return ret;
}
#define NR_BATCH 64
@ -242,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Try to flush one buffer from the checkpoint list to disk.
*
* Return 1 if something happened which requires us to abort the current
* scan of the checkpoint list.
* scan of the checkpoint list. Return <0 if the buffer has failed to
* be written out.
*
* Called with j_list_lock held and drops it if 1 is returned
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@ -274,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
jbd2_log_wait_commit(journal, tid);
ret = 1;
} else if (!buffer_dirty(bh)) {
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
@ -281,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
ret = 1;
} else {
/*
* Important: we are about to write the buffer, and
@ -314,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
* to disk. We submit larger chunks of data at once.
*
* The journal should be locked before calling this function.
* Called with j_checkpoint_mutex held.
*/
int jbd2_log_do_checkpoint(journal_t *journal)
{
@ -339,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* OK, we need to start writing disk blocks. Take one transaction
* and write it.
*/
result = 0;
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions)
goto out;
@ -357,7 +372,7 @@ restart:
int batch_count = 0;
struct buffer_head *bhs[NR_BATCH];
struct journal_head *jh;
int retry = 0;
int retry = 0, err;
while (!retry && transaction->t_checkpoint_list) {
struct buffer_head *bh;
@ -371,6 +386,8 @@ restart:
}
retry = __process_buffer(journal, jh, bhs, &batch_count,
transaction);
if (retry < 0 && !result)
result = retry;
if (!retry && (need_resched() ||
spin_needbreak(&journal->j_list_lock))) {
spin_unlock(&journal->j_list_lock);
@ -395,14 +412,18 @@ restart:
* Now we have cleaned up the first transaction's checkpoint
* list. Let's clean up the second one
*/
__wait_cp_io(journal, transaction);
err = __wait_cp_io(journal, transaction);
if (!result)
result = err;
}
out:
spin_unlock(&journal->j_list_lock);
result = jbd2_cleanup_journal_tail(journal);
if (result < 0)
return result;
return 0;
jbd2_journal_abort(journal, result);
else
result = jbd2_cleanup_journal_tail(journal);
return (result < 0) ? result : 0;
}
/*
@ -418,8 +439,9 @@ out:
* This is the only part of the journaling code which really needs to be
* aware of transaction aborts. Checkpointing involves writing to the
* main filesystem area rather than to the journal, so it can proceed
* even in abort state, but we must not update the journal superblock if
* we have an abort error outstanding.
* even in abort state, but we must not update the super block if
* checkpointing may have failed. Otherwise, we would lose some metadata
* buffers which should be written-back to the filesystem.
*/
int jbd2_cleanup_journal_tail(journal_t *journal)
@ -428,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
tid_t first_tid;
unsigned long blocknr, freed;
if (is_journal_aborted(journal))
return 1;
/* OK, work out the oldest transaction remaining in the log, and
* the log block it starts at.
*

View file

@ -504,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jh = commit_transaction->t_buffers;
/* If we're in abort mode, we just un-journal the buffer and
release it for background writing. */
release it. */
if (is_journal_aborted(journal)) {
clear_buffer_jbddirty(jh2bh(jh));
JBUFFER_TRACE(jh, "journal is aborting: refile");
jbd2_journal_refile_buffer(journal, jh);
/* If that was the last one, we need to clean up
@ -683,6 +684,8 @@ start_journal_io:
printk(KERN_WARNING
"JBD2: Detected IO errors while flushing file data "
"on %s\n", journal->j_devname);
if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
jbd2_journal_abort(journal, err);
err = 0;
}
@ -783,6 +786,9 @@ wait_for_iobuf:
/* AKPM: bforget here */
}
if (err)
jbd2_journal_abort(journal, err);
jbd_debug(3, "JBD: commit phase 5\n");
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
@ -881,6 +887,8 @@ restart_loop:
if (buffer_jbddirty(bh)) {
JBUFFER_TRACE(jh, "add to new checkpointing trans");
__jbd2_journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);

View file

@ -1451,9 +1451,12 @@ recovery_error:
*
* Release a journal_t structure once it is no longer in use by the
* journaled object.
* Return <0 if we couldn't clean up the journal.
*/
void jbd2_journal_destroy(journal_t *journal)
int jbd2_journal_destroy(journal_t *journal)
{
int err = 0;
/* Wait for the commit thread to wake up and die. */
journal_kill_thread(journal);
@ -1476,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal)
J_ASSERT(journal->j_checkpoint_transactions == NULL);
spin_unlock(&journal->j_list_lock);
/* We can now mark the journal as empty. */
journal->j_tail = 0;
journal->j_tail_sequence = ++journal->j_transaction_sequence;
if (journal->j_sb_buffer) {
jbd2_journal_update_superblock(journal, 1);
if (!is_journal_aborted(journal)) {
/* We can now mark the journal as empty. */
journal->j_tail = 0;
journal->j_tail_sequence =
++journal->j_transaction_sequence;
jbd2_journal_update_superblock(journal, 1);
} else {
err = -EIO;
}
brelse(journal->j_sb_buffer);
}
@ -1492,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal)
jbd2_journal_destroy_revoke(journal);
kfree(journal->j_wbuf);
kfree(journal);
return err;
}
@ -1717,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal)
spin_lock(&journal->j_list_lock);
while (!err && journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock);
mutex_lock(&journal->j_checkpoint_mutex);
err = jbd2_log_do_checkpoint(journal);
mutex_unlock(&journal->j_checkpoint_mutex);
spin_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
if (is_journal_aborted(journal))
return -EIO;
jbd2_cleanup_journal_tail(journal);
/* Finally, mark the journal as really needing no recovery.
@ -1742,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
spin_unlock(&journal->j_state_lock);
return err;
return 0;
}
/**

View file

@ -225,7 +225,7 @@ do { \
*/
int jbd2_journal_recover(journal_t *journal)
{
int err;
int err, err2;
journal_superblock_t * sb;
struct recovery_info info;
@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal)
journal->j_transaction_sequence = ++info.end_transaction;
jbd2_journal_clear_revoke(journal);
sync_blockdev(journal->j_fs_dev);
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
return err;
}

View file

@ -967,6 +967,9 @@ struct journal_s
#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */
#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */
#define JBD2_BARRIER 0x020 /* Use IDE barriers */
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
* data write error in ordered
* mode */
/*
* Function declarations for the journaling transaction and buffer
@ -1060,7 +1063,7 @@ extern void jbd2_journal_clear_features
(journal_t *, unsigned long, unsigned long, unsigned long);
extern int jbd2_journal_create (journal_t *);
extern int jbd2_journal_load (journal_t *journal);
extern void jbd2_journal_destroy (journal_t *);
extern int jbd2_journal_destroy (journal_t *);
extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *);