Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: avoid delayed metadata items during commits
  btrfs: fix uninitialized return value
  btrfs: fix wrong reservation when doing delayed inode operations
  btrfs: Remove unused sysfs code
  btrfs: fix dereference of ERR_PTR value
  Btrfs: fix relocation races
  Btrfs: set no_trans_join after trying to expand the transaction
  Btrfs: protect the pending_snapshots list with trans_lock
  Btrfs: fix path leakage on subvol deletion
  Btrfs: drop the delalloc_bytes check in shrink_delalloc
  Btrfs: check the return value from set_anon_super
This commit is contained in:
Linus Torvalds 2011-06-20 08:58:53 -07:00
commit 90a800de0a
11 changed files with 174 additions and 189 deletions

View file

@ -967,6 +967,12 @@ struct btrfs_fs_info {
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
/*
* the reloc mutex goes with the trans lock, it is taken
* during commit to protect us from the relocation code
*/
struct mutex reloc_mutex;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
@ -1172,6 +1178,14 @@ struct btrfs_root {
u32 type;
u64 highest_objectid;
/* btrfs_record_root_in_trans is a multi-step process,
* and it can race with the balancing code. But the
* race is very small, and only the first time the root
* is added to each transaction. So in_trans_setup
* is used to tell us when more checks are required
*/
unsigned long in_trans_setup;
int ref_cows;
int track_dirty;
int in_radix;
@ -1181,7 +1195,6 @@ struct btrfs_root {
struct btrfs_key defrag_max;
int defrag_running;
char *name;
int in_sysfs;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;

View file

@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
item->data_len = data_len;
item->ins_or_del = 0;
item->bytes_reserved = 0;
item->block_rsv = NULL;
item->delayed_node = NULL;
atomic_set(&item->refs, 1);
}
@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
if (!ret) {
if (!ret)
item->bytes_reserved = num_bytes;
item->block_rsv = dst_rsv;
}
return ret;
}
@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
struct btrfs_block_rsv *rsv;
if (!item->bytes_reserved)
return;
btrfs_block_rsv_release(root, item->block_rsv,
rsv = &root->fs_info->global_block_rsv;
btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
path = btrfs_alloc_path();
@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->global_block_rsv;
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
}
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
path = btrfs_alloc_path();
@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &node->root->fs_info->global_block_rsv;
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
if (IS_ERR(trans))
goto free_path;
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->global_block_rsv;
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
nr = trans->blocks_used;
trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
free_path:
@ -1222,6 +1237,13 @@ again:
return 0;
}
void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
delayed_root = btrfs_get_delayed_root(root);
WARN_ON(btrfs_first_delayed_node(delayed_root));
}
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;

View file

@ -75,7 +75,6 @@ struct btrfs_delayed_item {
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
struct btrfs_block_rsv *block_rsv;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
int ins_or_del;
@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
/* for debugging */
void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
#endif

View file

@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_trans = 0;
root->highest_objectid = 0;
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@ -1300,19 +1299,21 @@ again:
return root;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
if (!root->free_ino_ctl)
goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
if (!root->free_ino_pinned)
if (!root->free_ino_pinned || !root->free_ino_ctl) {
ret = -ENOMEM;
goto fail;
}
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
set_anon_super(&root->anon_super, NULL);
ret = set_anon_super(&root->anon_super, NULL);
if (ret)
goto fail;
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;

View file

@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0)
return 0;
/* nothing to shrink - nothing to reclaim */
if (root->fs_info->delalloc_bytes == 0)
return 0;
max_reclaim = min(reserved, to_reclaim);
while (loops < 1024) {

View file

@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
btrfs_free_path(path);
return 0;
}

View file

@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
BUG_ON(ret);
spin_lock(&root->fs_info->trans_lock);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
spin_unlock(&root->fs_info->trans_lock);
if (async_transid) {
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,

View file

@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
int ret;
if (!root->reloc_root)
return 0;
goto out;
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&reloc_root->root_key, root_item);
BUG_ON(ret);
out:
return 0;
}
@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;
spin_lock(&root->fs_info->trans_lock);
mutex_lock(&root->fs_info->reloc_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->reloc_mutex);
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
spin_lock(&root->fs_info->trans_lock);
/*
* this serializes us with btrfs_record_root_in_transaction,
* we have to make sure nobody is in the middle of
* adding their roots to the list while we are
* doing this splice
*/
mutex_lock(&root->fs_info->reloc_mutex);
list_splice_init(&rc->reloc_roots, &reloc_roots);
spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->reloc_mutex);
while (!list_empty(&reloc_roots)) {
found = 1;
@ -3590,17 +3600,19 @@ next:
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
spin_lock(&fs_info->trans_lock);
mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = rc;
spin_unlock(&fs_info->trans_lock);
mutex_unlock(&fs_info->reloc_mutex);
}
static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
spin_lock(&fs_info->trans_lock);
mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = NULL;
spin_unlock(&fs_info->trans_lock);
mutex_unlock(&fs_info->reloc_mutex);
}
static int check_extent_flags(u64 flags)

View file

@ -28,152 +28,6 @@
#include "disk-io.h"
#include "transaction.h"
static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)btrfs_root_used(&root->root_item));
}
static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)btrfs_root_limit(&root->root_item));
}
static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
}
static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
}
static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
}
/* this is for root attrs (subvols/snapshots) */
struct btrfs_root_attr {
struct attribute attr;
ssize_t (*show)(struct btrfs_root *, char *);
ssize_t (*store)(struct btrfs_root *, const char *, size_t);
};
#define ROOT_ATTR(name, mode, show, store) \
static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
show, store)
ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
static struct attribute *btrfs_root_attrs[] = {
&btrfs_root_attr_blocks_used.attr,
&btrfs_root_attr_block_limit.attr,
NULL,
};
/* this is for super attrs (actual full fs) */
struct btrfs_super_attr {
struct attribute attr;
ssize_t (*show)(struct btrfs_fs_info *, char *);
ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
};
#define SUPER_ATTR(name, mode, show, store) \
static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
show, store)
SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
static struct attribute *btrfs_super_attrs[] = {
&btrfs_super_attr_blocks_used.attr,
&btrfs_super_attr_total_blocks.attr,
&btrfs_super_attr_blocksize.attr,
NULL,
};
static ssize_t btrfs_super_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
super_kobj);
struct btrfs_super_attr *a = container_of(attr,
struct btrfs_super_attr,
attr);
return a->show ? a->show(fs, buf) : 0;
}
static ssize_t btrfs_super_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
{
struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
super_kobj);
struct btrfs_super_attr *a = container_of(attr,
struct btrfs_super_attr,
attr);
return a->store ? a->store(fs, buf, len) : 0;
}
static ssize_t btrfs_root_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct btrfs_root *root = container_of(kobj, struct btrfs_root,
root_kobj);
struct btrfs_root_attr *a = container_of(attr,
struct btrfs_root_attr,
attr);
return a->show ? a->show(root, buf) : 0;
}
static ssize_t btrfs_root_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
{
struct btrfs_root *root = container_of(kobj, struct btrfs_root,
root_kobj);
struct btrfs_root_attr *a = container_of(attr,
struct btrfs_root_attr,
attr);
return a->store ? a->store(root, buf, len) : 0;
}
static void btrfs_super_release(struct kobject *kobj)
{
struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
super_kobj);
complete(&fs->kobj_unregister);
}
static void btrfs_root_release(struct kobject *kobj)
{
struct btrfs_root *root = container_of(kobj, struct btrfs_root,
root_kobj);
complete(&root->kobj_unregister);
}
static const struct sysfs_ops btrfs_super_attr_ops = {
.show = btrfs_super_attr_show,
.store = btrfs_super_attr_store,
};
static const struct sysfs_ops btrfs_root_attr_ops = {
.show = btrfs_root_attr_show,
.store = btrfs_root_attr_store,
};
/* /sys/fs/btrfs/ entry */
static struct kset *btrfs_kset;

View file

@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
*/
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
/*
* see below for in_trans_setup usage rules
* we have the reloc mutex held now, so there
* is only one writer in this function
*/
root->in_trans_setup = 1;
/* make sure readers find in_trans_setup before
* they find our root->last_trans update
*/
smp_wmb();
spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) {
spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0;
}
root->last_trans = trans->transid;
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
root->last_trans = trans->transid;
/* this is pretty tricky. We don't want to
* take the relocation lock in btrfs_record_root_in_trans
* unless we're really doing the first setup for this root in
* this transaction.
*
* Normally we'd use root->last_trans as a flag to decide
* if we want to take the expensive mutex.
*
* But, we have to set root->last_trans before we
* init the relocation root, otherwise, we trip over warnings
* in ctree.c. The solution used here is to flag ourselves
* with root->in_trans_setup. When this is 1, we're still
* fixing up the reloc trees and everyone must wait.
*
* When this is zero, they can trust root->last_trans and fly
* through btrfs_record_root_in_trans without having to take the
* lock. smp_wmb() makes sure that all the writes above are
* done before we pop in the zero below
*/
btrfs_init_reloc_root(trans, root);
smp_wmb();
root->in_trans_setup = 0;
}
return 0;
}
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (!root->ref_cows)
return 0;
/*
* see record_root_in_trans for comments about in_trans_setup usage
* and barriers
*/
smp_rmb();
if (root->last_trans == trans->transid &&
!root->in_trans_setup)
return 0;
mutex_lock(&root->fs_info->reloc_mutex);
record_root_in_trans(trans, root);
mutex_unlock(&root->fs_info->reloc_mutex);
return 0;
}
/* wait for commit against the current transaction to become unblocked
* when this is done, it is safe to start a new transaction, but the current
* transaction might not be fully on disk.
@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent = dget_parent(dentry);
parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
btrfs_record_root_in_trans(trans, parent_root);
record_root_in_trans(trans, parent_root);
/*
* insert the directory item
@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
btrfs_record_root_in_trans(trans, root);
/*
* pull in the delayed directory update
* and the delayed inode item
* otherwise we corrupt the FS during
* snapshot
*/
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
record_root_in_trans(trans, root);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item);
@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
int ret;
list_for_each_entry(pending, head, list) {
/*
* We must deal with the delayed items before creating
* snapshots, or we will create a snapthot with inconsistent
* information.
*/
ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
BUG_ON(ret);
ret = create_pending_snapshot(trans, fs_info, pending);
BUG_ON(ret);
}
@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
schedule_timeout(1);
finish_wait(&cur_trans->writer_wait, &wait);
spin_lock(&root->fs_info->trans_lock);
root->fs_info->trans_no_join = 1;
spin_unlock(&root->fs_info->trans_lock);
} while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined));
ret = create_pending_snapshots(trans, root->fs_info);
BUG_ON(ret);
/*
* Ok now we need to make sure to block out any other joins while we
* commit the transaction. We could have started a join before setting
* no_join so make sure to wait for num_writers to == 1 again.
*/
spin_lock(&root->fs_info->trans_lock);
root->fs_info->trans_no_join = 1;
spin_unlock(&root->fs_info->trans_lock);
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
/*
* the reloc mutex makes sure that we stop
* the balancing code from coming in and moving
* extents around in the middle of the commit
*/
mutex_lock(&root->fs_info->reloc_mutex);
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
ret = create_pending_snapshots(trans, root->fs_info);
BUG_ON(ret);
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
/*
* make sure none of the code above managed to slip in a
* delayed item
*/
btrfs_assert_delayed_root_empty(root);
WARN_ON(cur_trans != trans->transaction);
btrfs_scrub_pause(root);
@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->reloc_mutex);
wake_up(&root->fs_info->transaction_wait);

View file

@ -3177,7 +3177,7 @@ again:
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
BUG_ON(!wc.replay_dest);
BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);