Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (39 commits)
  Btrfs: deal with errors from updating the tree log
  Btrfs: allow subvol deletion by unprivileged user with -o user_subvol_rm_allowed
  Btrfs: make SNAP_DESTROY async
  Btrfs: add SNAP_CREATE_ASYNC ioctl
  Btrfs: add START_SYNC, WAIT_SYNC ioctls
  Btrfs: async transaction commit
  Btrfs: fix deadlock in btrfs_commit_transaction
  Btrfs: fix lockdep warning on clone ioctl
  Btrfs: fix clone ioctl where range is adjacent to extent
  Btrfs: fix delalloc checks in clone ioctl
  Btrfs: drop unused variable in block_alloc_rsv
  Btrfs: cleanup warnings from gcc 4.6 (nonbugs)
  Btrfs: Fix variables set but not read (bugs found by gcc 4.6)
  Btrfs: Use ERR_CAST helpers
  Btrfs: use memdup_user helpers
  Btrfs: fix raid code for removing missing drives
  Btrfs: Switch the extent buffer rbtree into a radix tree
  Btrfs: restructure try_release_extent_buffer()
  Btrfs: use the flusher threads for delalloc throttling
  Btrfs: tune the chunk allocation to 5% of the FS as metadata
  ...

Fix up trivial conflicts in fs/btrfs/super.c and fs/fs-writeback.c, and
remove use of INIT_RCU_HEAD in fs/btrfs/extent_io.c (that init macro was
useless and removed in commit 5e8067adfd: "rcu head remove init")
This commit is contained in:
Linus Torvalds 2010-10-30 09:05:48 -07:00
commit 925d169f5b
27 changed files with 2411 additions and 525 deletions

View file

@ -163,7 +163,6 @@ fail:
*/
static void end_compressed_bio_read(struct bio *bio, int err)
{
struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
tree = &BTRFS_I(inode)->io_tree;
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
cb->start,
cb->orig_bio->bi_io_vec,

View file

@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid)
{
struct extent_buffer *cow;
u32 nritems;
int ret = 0;
int level;
struct btrfs_disk_key disk_key;
@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
level = btrfs_header_level(buf);
nritems = btrfs_header_nritems(buf);
if (level == 0)
btrfs_item_key(buf, &disk_key, 0);
else
@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
int err_on_enospc = 0;
u64 orig_ptr;
if (level == 0)
@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0;
if (btrfs_header_nritems(mid) < 2)
err_on_enospc = 1;
btrfs_header_nritems(mid);
left = read_node_slot(root, parent, pslot - 1);
if (left) {
@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = push_node_left(trans, root, left, mid, 1);
if (wret < 0)
ret = wret;
if (btrfs_header_nritems(mid) < 2)
err_on_enospc = 1;
btrfs_header_nritems(mid);
}
/*
@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
u64 orig_ptr;
if (level == 0)
return 1;
mid = path->nodes[level];
WARN_ON(btrfs_header_generation(mid) != trans->transid);
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
if (level < BTRFS_MAX_LEVEL - 1)
parent = path->nodes[level + 1];
@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
blocksize = btrfs_level_size(root, level - 1);
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
/*
* we found an up to date block without sleeping, return
* right away
*/
*eb_ret = tmp;
return 0;
if (tmp) {
if (btrfs_buffer_uptodate(tmp, 0)) {
if (btrfs_buffer_uptodate(tmp, gen)) {
/*
* we found an up to date block without
* sleeping, return
* right away
*/
*eb_ret = tmp;
return 0;
}
/* the pages were up to date, but we failed
* the generation number check. Do a full
* read for the generation number that is correct.
* We must do this without dropping locks so
* we can trust our generation number
*/
free_extent_buffer(tmp);
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
*eb_ret = tmp;
return 0;
}
free_extent_buffer(tmp);
btrfs_release_path(NULL, p);
return -EIO;
}
}
/*
@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
if (tmp)
free_extent_buffer(tmp);
free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
int slot;
int i;
int push_space = 0;
int push_items = 0;
@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 this_item_size;
u32 old_left_item_size;
slot = path->slots[1];
if (empty)
nr = min(right_nritems, max_slot);
else
@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int size_diff;
int i;
slot_orig = path->slots[0];
leaf = path->nodes[0];
slot = path->slots[0];
@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
int i;
slot_orig = path->slots[0];
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_key *cpu_key, u32 *data_size,
int nr)
{
struct extent_buffer *leaf;
int ret = 0;
int slot;
int i;
@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
leaf = path->nodes[0];
slot = path->slots[0];
BUG_ON(slot < 0);

View file

@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
*/
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
/* For storing free space cache */
#define BTRFS_FREE_SPACE_OBJECTID -11ULL
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
@ -265,6 +268,22 @@ struct btrfs_chunk {
/* additional stripes go here */
} __attribute__ ((__packed__));
#define BTRFS_FREE_SPACE_EXTENT 1
#define BTRFS_FREE_SPACE_BITMAP 2
struct btrfs_free_space_entry {
__le64 offset;
__le64 bytes;
u8 type;
} __attribute__ ((__packed__));
struct btrfs_free_space_header {
struct btrfs_disk_key location;
__le64 generation;
__le64 num_entries;
__le64 num_bitmaps;
} __attribute__ ((__packed__));
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
@ -365,8 +384,10 @@ struct btrfs_super_block {
char label[BTRFS_LABEL_SIZE];
__le64 cache_generation;
/* future expansion */
__le64 reserved[32];
__le64 reserved[31];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
} __attribute__ ((__packed__));
@ -375,13 +396,15 @@ struct btrfs_super_block {
* ones specified below then we will fail to mount
*/
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
/*
* A leaf is full of items. offset and size tell us where to find
@ -675,7 +698,8 @@ struct btrfs_block_group_item {
struct btrfs_space_info {
u64 flags;
u64 total_bytes; /* total bytes in the space */
u64 total_bytes; /* total bytes in the space,
this doesn't take mirrors into account */
u64 bytes_used; /* total bytes used,
this does't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
@ -687,6 +711,8 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 disk_used; /* total bytes used on disk */
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
int full; /* indicates that we cannot allocate any more
chunks for this space */
@ -750,6 +776,14 @@ enum btrfs_caching_type {
BTRFS_CACHE_FINISHED = 2,
};
enum btrfs_disk_cache_state {
BTRFS_DC_WRITTEN = 0,
BTRFS_DC_ERROR = 1,
BTRFS_DC_CLEAR = 2,
BTRFS_DC_SETUP = 3,
BTRFS_DC_NEED_WRITE = 4,
};
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
struct inode *inode;
spinlock_t lock;
u64 pinned;
u64 reserved;
@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
int extents_thresh;
int free_extents;
int total_bitmaps;
int ro;
int dirty;
int ro:1;
int dirty:1;
int iref:1;
int disk_cache_state;
/* cache tracking stuff */
int cached;
@ -863,6 +901,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
wait_queue_head_t transaction_blocked_wait;
wait_queue_head_t async_submit_wait;
struct btrfs_super_block super_copy;
@ -949,6 +988,7 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
/*
* fixup workers take dirty pages that didn't properly go through
@ -1192,6 +1232,9 @@ struct btrfs_root {
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
num_entries, 64);
BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
num_bitmaps, 64);
BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
generation, 64);
static inline void btrfs_free_space_key(struct extent_buffer *eb,
struct btrfs_free_space_header *h,
struct btrfs_disk_key *key)
{
read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
}
static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
struct btrfs_free_space_header *h,
struct btrfs_disk_key *key)
{
write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
}
/* struct btrfs_disk_key */
BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
objectid, 64);
@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
incompat_flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
csum_type, 16);
BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
cache_generation, 64);
static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
{
@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
return file->f_path.dentry;
}
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
{
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
}
/* extent-tree.c */
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries);
u64 num_bytes);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
int sync);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
int btrfs_prealloc_file_range_trans(struct inode *inode,
struct btrfs_trans_handle *trans, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */

View file

@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_item(trans, root, path,
item_len - sub_item_len, 1);
}
return 0;
return ret;
}

View file

@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
int found_level;
unsigned long len;
struct extent_buffer *eb;
int ret;
@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(1);
goto err;
}
found_level = btrfs_header_level(eb);
csum_tree_block(root, eb, 0);
err:
free_extent_buffer(eb);
@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
if (end_io_wq->metadata)
if (end_io_wq->metadata == 1)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
else if (end_io_wq->metadata == 2)
btrfs_queue_worker(&fs_info->endio_freespace_worker,
&end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
}
}
/*
* For the metadata arg you want
*
* 0 - if data
* 1 - if normal metadta
* 2 - if writing to the free space cache area
*/
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
{
@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
static void run_one_async_start(struct btrfs_work *work)
{
struct btrfs_fs_info *fs_info;
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
fs_info = BTRFS_I(async->inode)->root->fs_info;
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize, u64 parent_transid)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_io_tree *io_tree;
int ret;
io_tree = &BTRFS_I(btree_inode)->io_tree;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
return NULL;
@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
u64 start = 0;
struct page *page;
struct extent_io_tree *io_tree = NULL;
struct btrfs_fs_info *info = NULL;
struct bio_vec *bvec;
int i;
int ret;
@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
buf_len = page->private >> 2;
start = page_offset(page) + bvec->bv_offset;
io_tree = &BTRFS_I(page->mapping->host)->io_tree;
info = BTRFS_I(page->mapping->host)->root->fs_info;
}
/* are we fully contained in this bio? */
if (buf_len <= length)
@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh)
goto fail_iput;
@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1, &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
btrfs_orphan_cleanup(fs_info->fs_root);
btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
}
@ -2035,6 +2038,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
btrfs_put_block_group_cache(fs_info);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_close_devices(fs_info->fs_devices);

File diff suppressed because it is too large Load diff

View file

@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask)
{
tree->state = RB_ROOT;
tree->buffer = RB_ROOT;
INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
return ret;
}
static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
u64 offset, struct rb_node *node)
{
struct rb_root *root = &tree->buffer;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct extent_buffer *eb;
while (*p) {
parent = *p;
eb = rb_entry(parent, struct extent_buffer, rb_node);
if (offset < eb->start)
p = &(*p)->rb_left;
else if (offset > eb->start)
p = &(*p)->rb_right;
else
return eb;
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
u64 offset)
{
struct rb_root *root = &tree->buffer;
struct rb_node *n = root->rb_node;
struct extent_buffer *eb;
while (n) {
eb = rb_entry(n, struct extent_buffer, rb_node);
if (offset < eb->start)
n = n->rb_left;
else if (offset > eb->start)
n = n->rb_right;
else
return eb;
}
return NULL;
}
static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
u64 start;
u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
bio->bi_private = NULL;
@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
u64 unlock_start;
sector_t sector;
struct extent_state *cached_state = NULL;
struct extent_map *em;
@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
unlock_start = page_end + 1;
goto done;
}
@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
unlock_start = page_end + 1;
break;
}
em = epd->get_extent(inode, page, pg_offset, cur,
@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
cur += iosize;
pg_offset += iosize;
unlock_start = cur;
continue;
}
/* leave this out until we have a page_mkwrite call */
@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
int range_whole = 0;
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
scanned = 1;
}
retry:
@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
NULL, 1,
end_bio_extent_preparewrite, 0,
0, 0);
if (ret && !err)
err = ret;
iocount++;
block_start = block_start + iosize;
} else {
@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
/*
* Helper for releasing extent buffer page.
*/
static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long start_idx)
{
unsigned long index;
struct page *page;
if (!eb->first_page)
return;
index = num_extent_pages(eb->start, eb->len);
if (start_idx >= index)
return;
do {
index--;
page = extent_buffer_page(eb, index);
if (page)
page_cache_release(page);
} while (index != start_idx);
}
/*
* Helper for releasing the extent buffer.
*/
static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
{
btrfs_release_extent_buffer_page(eb, 0);
__free_extent_buffer(eb);
}
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
struct page *page0,
@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
int ret;
spin_lock(&tree->buffer_lock);
eb = buffer_search(tree, start);
if (eb) {
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
rcu_read_lock();
eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
mark_page_accessed(eb->first_page);
return eb;
}
spin_unlock(&tree->buffer_lock);
rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb)
@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret)
goto free_eb;
spin_lock(&tree->buffer_lock);
exists = buffer_tree_insert(tree, start, &eb->rb_node);
if (exists) {
ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
if (ret == -EEXIST) {
exists = radix_tree_lookup(&tree->buffer,
start >> PAGE_CACHE_SHIFT);
/* add one reference for the caller */
atomic_inc(&exists->refs);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
goto free_eb;
}
/* add one reference for the tree */
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
return eb;
free_eb:
if (!atomic_dec_and_test(&eb->refs))
return exists;
for (index = 1; index < i; index++)
page_cache_release(extent_buffer_page(eb, index));
page_cache_release(extent_buffer_page(eb, 0));
__free_extent_buffer(eb);
btrfs_release_extent_buffer(eb);
return exists;
}
@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
{
struct extent_buffer *eb;
spin_lock(&tree->buffer_lock);
eb = buffer_search(tree, start);
if (eb)
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
if (eb)
rcu_read_lock();
eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
mark_page_accessed(eb->first_page);
return eb;
}
rcu_read_unlock();
return eb;
return NULL;
}
void free_extent_buffer(struct extent_buffer *eb)
@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
{
struct extent_buffer *eb =
container_of(head, struct extent_buffer, rcu_head);
btrfs_release_extent_buffer(eb);
}
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
struct extent_buffer *eb;
int ret = 1;
unsigned long i;
unsigned long num_pages;
spin_lock(&tree->buffer_lock);
eb = buffer_search(tree, start);
eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
if (!eb)
goto out;
if (atomic_read(&eb->refs) > 1) {
ret = 0;
goto out;
}
if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
ret = 0;
goto out;
}
/* at this point we can safely release the extent buffer */
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++)
page_cache_release(extent_buffer_page(eb, i));
rb_erase(&eb->rb_node, &tree->buffer);
__free_extent_buffer(eb);
/*
* set @eb->refs to 0 if it is already 1, and then release the @eb.
* Or go back.
*/
if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
ret = 0;
goto out;
}
radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
out:
spin_unlock(&tree->buffer_lock);
/* at this point we can safely release the extent buffer */
if (atomic_read(&eb->refs) == 0)
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return ret;
}

View file

@ -85,7 +85,7 @@ struct extent_io_ops {
struct extent_io_tree {
struct rb_root state;
struct rb_root buffer;
struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
spinlock_t lock;
@ -123,7 +123,7 @@ struct extent_buffer {
unsigned long bflags;
atomic_t refs;
struct list_head leak_list;
struct rb_node rb_node;
struct rcu_head rcu_head;
/* the spinlock is used to protect most operations */
spinlock_t lock;

View file

@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
em = ERR_PTR(PTR_ERR(rb_node));
em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
em = ERR_PTR(PTR_ERR(rb_node));
em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);

View file

@ -23,10 +23,761 @@
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
#include "disk-io.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
static void recalculate_thresholds(struct btrfs_block_group_cache
*block_group);
static int link_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info);
struct inode *lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_key location;
struct btrfs_disk_key disk_key;
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct inode *inode = NULL;
int ret;
spin_lock(&block_group->lock);
if (block_group->inode)
inode = igrab(block_group->inode);
spin_unlock(&block_group->lock);
if (inode)
return inode;
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = block_group->key.objectid;
key.type = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0) {
btrfs_release_path(root, path);
return ERR_PTR(-ENOENT);
}
leaf = path->nodes[0];
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
btrfs_free_space_key(leaf, header, &disk_key);
btrfs_disk_key_to_cpu(&location, &disk_key);
btrfs_release_path(root, path);
inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
if (!inode)
return ERR_PTR(-ENOENT);
if (IS_ERR(inode))
return inode;
if (is_bad_inode(inode)) {
iput(inode);
return ERR_PTR(-ENOENT);
}
spin_lock(&block_group->lock);
if (!root->fs_info->closing) {
block_group->inode = igrab(inode);
block_group->iref = 1;
}
spin_unlock(&block_group->lock);
return inode;
}
int create_free_space_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_disk_key disk_key;
struct btrfs_free_space_header *header;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
u64 objectid;
int ret;
ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
if (ret < 0)
return ret;
ret = btrfs_insert_empty_inode(trans, root, path, objectid);
if (ret)
return ret;
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
btrfs_item_key(leaf, &disk_key, path->slots[0]);
memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
sizeof(*inode_item));
btrfs_set_inode_generation(leaf, inode_item, trans->transid);
btrfs_set_inode_size(leaf, inode_item, 0);
btrfs_set_inode_nbytes(leaf, inode_item, 0);
btrfs_set_inode_uid(leaf, inode_item, 0);
btrfs_set_inode_gid(leaf, inode_item, 0);
btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
btrfs_set_inode_nlink(leaf, inode_item, 1);
btrfs_set_inode_transid(leaf, inode_item, trans->transid);
btrfs_set_inode_block_group(leaf, inode_item,
block_group->key.objectid);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(root, path);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = block_group->key.objectid;
key.type = 0;
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(struct btrfs_free_space_header));
if (ret < 0) {
btrfs_release_path(root, path);
return ret;
}
leaf = path->nodes[0];
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
btrfs_set_free_space_key(leaf, header, &disk_key);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(root, path);
return 0;
}
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct inode *inode)
{
loff_t oldsize;
int ret = 0;
trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_block_rsv_check(trans, root,
root->orphan_block_rsv,
0, 5);
if (ret)
return ret;
oldsize = i_size_read(inode);
btrfs_i_size_write(inode, 0);
truncate_pagecache(inode, oldsize, 0);
/*
* We don't need an orphan item because truncating the free space cache
* will never be split across transactions.
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
if (ret) {
WARN_ON(1);
return ret;
}
return btrfs_update_inode(trans, root, inode);
}
static int readahead_cache(struct inode *inode)
{
struct file_ra_state *ra;
unsigned long last_index;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
return -ENOMEM;
file_ra_state_init(ra, inode->i_mapping);
last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
kfree(ra);
return 0;
}
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
struct btrfs_root *root = fs_info->tree_root;
struct inode *inode;
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct page *page;
struct btrfs_path *path;
u32 *checksums = NULL, *crc;
char *disk_crcs = NULL;
struct btrfs_key key;
struct list_head bitmaps;
u64 num_entries;
u64 num_bitmaps;
u64 generation;
u32 cur_crc = ~(u32)0;
pgoff_t index = 0;
unsigned long first_page_offset;
int num_checksums;
int ret = 0;
/*
* If we're unmounting then just return, since this does a search on the
* normal root and not the commit root and we could deadlock.
*/
smp_mb();
if (fs_info->closing)
return 0;
/*
* If this block group has been marked to be cleared for one reason or
* another then we can't trust the on disk cache, so just return.
*/
spin_lock(&block_group->lock);
if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
spin_unlock(&block_group->lock);
return 0;
}
spin_unlock(&block_group->lock);
INIT_LIST_HEAD(&bitmaps);
path = btrfs_alloc_path();
if (!path)
return 0;
inode = lookup_free_space_inode(root, block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
return 0;
}
/* Nothing in the space cache, goodbye */
if (!i_size_read(inode)) {
btrfs_free_path(path);
goto out;
}
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = block_group->key.objectid;
key.type = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret) {
btrfs_free_path(path);
goto out;
}
leaf = path->nodes[0];
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
num_entries = btrfs_free_space_entries(leaf, header);
num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
generation = btrfs_free_space_generation(leaf, header);
btrfs_free_path(path);
if (BTRFS_I(inode)->generation != generation) {
printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
" not match free space cache generation (%llu) for "
"block group %llu\n",
(unsigned long long)BTRFS_I(inode)->generation,
(unsigned long long)generation,
(unsigned long long)block_group->key.objectid);
goto out;
}
if (!num_entries)
goto out;
/* Setup everything for doing checksumming */
num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
if (!checksums)
goto out;
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
if (!disk_crcs)
goto out;
ret = readahead_cache(inode);
if (ret) {
ret = 0;
goto out;
}
while (1) {
struct btrfs_free_space_entry *entry;
struct btrfs_free_space *e;
void *addr;
unsigned long offset = 0;
unsigned long start_offset = 0;
int need_loop = 0;
if (!num_entries && !num_bitmaps)
break;
if (index == 0) {
start_offset = first_page_offset;
offset = start_offset;
}
page = grab_cache_page(inode->i_mapping, index);
if (!page) {
ret = 0;
goto free_cache;
}
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
printk(KERN_ERR "btrfs: error reading free "
"space cache: %llu\n",
(unsigned long long)
block_group->key.objectid);
goto free_cache;
}
}
addr = kmap(page);
if (index == 0) {
u64 *gen;
memcpy(disk_crcs, addr, first_page_offset);
gen = addr + (sizeof(u32) * num_checksums);
if (*gen != BTRFS_I(inode)->generation) {
printk(KERN_ERR "btrfs: space cache generation"
" (%llu) does not match inode (%llu) "
"for block group %llu\n",
(unsigned long long)*gen,
(unsigned long long)
BTRFS_I(inode)->generation,
(unsigned long long)
block_group->key.objectid);
kunmap(page);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
crc = (u32 *)disk_crcs;
}
entry = addr + start_offset;
/* First lets check our crc before we do anything fun */
cur_crc = ~(u32)0;
cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
PAGE_CACHE_SIZE - start_offset);
btrfs_csum_final(cur_crc, (char *)&cur_crc);
if (cur_crc != *crc) {
printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
"block group %llu\n", index,
(unsigned long long)block_group->key.objectid);
kunmap(page);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
crc++;
while (1) {
if (!num_entries)
break;
need_loop = 1;
e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
if (!e) {
kunmap(page);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
e->offset = le64_to_cpu(entry->offset);
e->bytes = le64_to_cpu(entry->bytes);
if (!e->bytes) {
kunmap(page);
kfree(e);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
spin_lock(&block_group->tree_lock);
ret = link_free_space(block_group, e);
spin_unlock(&block_group->tree_lock);
BUG_ON(ret);
} else {
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
kunmap(page);
kfree(e);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
spin_lock(&block_group->tree_lock);
ret = link_free_space(block_group, e);
block_group->total_bitmaps++;
recalculate_thresholds(block_group);
spin_unlock(&block_group->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
num_entries--;
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
PAGE_CACHE_SIZE)
break;
entry++;
}
/*
* We read an entry out of this page, we need to move on to the
* next page.
*/
if (need_loop) {
kunmap(page);
goto next;
}
/*
* We add the bitmaps at the end of the entries in order that
* the bitmap entries are added to the cache.
*/
e = list_entry(bitmaps.next, struct btrfs_free_space, list);
list_del_init(&e->list);
memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
kunmap(page);
num_bitmaps--;
next:
unlock_page(page);
page_cache_release(page);
index++;
}
ret = 1;
out:
kfree(checksums);
kfree(disk_crcs);
iput(inode);
return ret;
free_cache:
/* This cache is bogus, make sure it gets cleared */
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_CLEAR;
spin_unlock(&block_group->lock);
btrfs_remove_free_space_cache(block_group);
goto out;
}
int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct inode *inode;
struct rb_node *node;
struct list_head *pos, *n;
struct page *page;
struct extent_state *cached_state = NULL;
struct list_head bitmap_list;
struct btrfs_key key;
u64 bytes = 0;
u32 *crc, *checksums;
pgoff_t index = 0, last_index = 0;
unsigned long first_page_offset;
int num_checksums;
int entries = 0;
int bitmaps = 0;
int ret = 0;
root = root->fs_info->tree_root;
INIT_LIST_HEAD(&bitmap_list);
spin_lock(&block_group->lock);
if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
spin_unlock(&block_group->lock);
return 0;
}
spin_unlock(&block_group->lock);
inode = lookup_free_space_inode(root, block_group, path);
if (IS_ERR(inode))
return 0;
if (!i_size_read(inode)) {
iput(inode);
return 0;
}
last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size &
~(root->sectorsize - 1), (u64)-1);
/* We need a checksum per page. */
num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
if (!crc) {
iput(inode);
return 0;
}
/* Since the first page has all of our checksums and our generation we
* need to calculate the offset into the page that we can start writing
* our entries.
*/
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
node = rb_first(&block_group->free_space_offset);
if (!node)
goto out_free;
/*
* Lock all pages first so we can lock the extent safely.
*
* NOTE: Because we hold the ref the entire time we're going to write to
* the page find_get_page should never fail, so we don't do a check
* after find_get_page at this point. Just putting this here so people
* know and don't freak out.
*/
while (index <= last_index) {
page = grab_cache_page(inode->i_mapping, index);
if (!page) {
pgoff_t i = 0;
while (i < index) {
page = find_get_page(inode->i_mapping, i);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
i++;
}
goto out_free;
}
index++;
}
index = 0;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
0, &cached_state, GFP_NOFS);
/* Write out the extent entries */
do {
struct btrfs_free_space_entry *entry;
void *addr;
unsigned long offset = 0;
unsigned long start_offset = 0;
if (index == 0) {
start_offset = first_page_offset;
offset = start_offset;
}
page = find_get_page(inode->i_mapping, index);
addr = kmap(page);
entry = addr + start_offset;
memset(addr, 0, PAGE_CACHE_SIZE);
while (1) {
struct btrfs_free_space *e;
e = rb_entry(node, struct btrfs_free_space, offset_index);
entries++;
entry->offset = cpu_to_le64(e->offset);
entry->bytes = cpu_to_le64(e->bytes);
if (e->bitmap) {
entry->type = BTRFS_FREE_SPACE_BITMAP;
list_add_tail(&e->list, &bitmap_list);
bitmaps++;
} else {
entry->type = BTRFS_FREE_SPACE_EXTENT;
}
node = rb_next(node);
if (!node)
break;
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
PAGE_CACHE_SIZE)
break;
entry++;
}
*crc = ~(u32)0;
*crc = btrfs_csum_data(root, addr + start_offset, *crc,
PAGE_CACHE_SIZE - start_offset);
kunmap(page);
btrfs_csum_final(*crc, (char *)crc);
crc++;
bytes += PAGE_CACHE_SIZE;
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
/*
* We need to release our reference we got for grab_cache_page,
* except for the first page which will hold our checksums, we
* do that below.
*/
if (index != 0) {
unlock_page(page);
page_cache_release(page);
}
page_cache_release(page);
index++;
} while (node);
/* Write out the bitmaps */
list_for_each_safe(pos, n, &bitmap_list) {
void *addr;
struct btrfs_free_space *entry =
list_entry(pos, struct btrfs_free_space, list);
page = find_get_page(inode->i_mapping, index);
addr = kmap(page);
memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
*crc = ~(u32)0;
*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
kunmap(page);
btrfs_csum_final(*crc, (char *)crc);
crc++;
bytes += PAGE_CACHE_SIZE;
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
list_del_init(&entry->list);
index++;
}
/* Zero out the rest of the pages just to make sure */
while (index <= last_index) {
void *addr;
page = find_get_page(inode->i_mapping, index);
addr = kmap(page);
memset(addr, 0, PAGE_CACHE_SIZE);
kunmap(page);
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
bytes += PAGE_CACHE_SIZE;
index++;
}
btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
/* Write the checksums and trans id to the first page */
{
void *addr;
u64 *gen;
page = find_get_page(inode->i_mapping, 0);
addr = kmap(page);
memcpy(addr, checksums, sizeof(u32) * num_checksums);
gen = addr + (sizeof(u32) * num_checksums);
*gen = trans->transid;
kunmap(page);
ClearPageChecked(page);
set_page_extent_mapped(page);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
page_cache_release(page);
}
BTRFS_I(inode)->generation = trans->transid;
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS);
filemap_write_and_wait(inode->i_mapping);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = block_group->key.objectid;
key.type = 0;
ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
if (ret < 0) {
ret = 0;
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
goto out_free;
}
leaf = path->nodes[0];
if (ret > 0) {
struct btrfs_key found_key;
BUG_ON(!path->slots[0]);
path->slots[0]--;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
found_key.offset != block_group->key.objectid) {
ret = 0;
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL,
GFP_NOFS);
btrfs_release_path(root, path);
goto out_free;
}
}
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
btrfs_set_free_space_entries(leaf, header, entries);
btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
btrfs_set_free_space_generation(leaf, header, trans->transid);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(root, path);
ret = 1;
out_free:
if (ret == 0) {
invalidate_inode_pages2_range(inode->i_mapping, 0, index);
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock);
BTRFS_I(inode)->generation = 0;
}
kfree(checksums);
btrfs_update_inode(trans, root, inode);
iput(inode);
return ret;
}
static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
u64 offset)
{

View file

@ -27,6 +27,24 @@ struct btrfs_free_space {
struct list_head list;
};
struct inode *lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path);
int create_free_space_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,

View file

@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 num_bytes;
u64 orig_start;
u64 disk_num_bytes;
u64 blocksize = root->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
orig_start = start;
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
@ -371,7 +367,6 @@ again:
total_compressed = min(total_compressed, max_uncompressed);
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
total_in = 0;
ret = 0;
@ -467,7 +462,6 @@ again:
if (total_compressed >= total_in) {
will_compress = 0;
} else {
disk_num_bytes = total_compressed;
num_bytes = total_in;
}
}
@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
u64 disk_num_bytes;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
struct btrfs_key ins;
struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
actual_end = min_t(u64, isize, end + 1);
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int type;
int nocow;
int check_prev = 1;
bool nolock = false;
path = btrfs_alloc_path();
BUG_ON(!path);
trans = btrfs_join_transaction(root, 1);
if (root == root->fs_info->tree_root) {
nolock = true;
trans = btrfs_join_transaction_nolock(root, 1);
} else {
trans = btrfs_join_transaction(root, 1);
}
BUG_ON(!trans);
cow_start = (u64)-1;
@ -1211,8 +1208,13 @@ out_check:
BUG_ON(ret);
}
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
if (nolock) {
ret = btrfs_end_transaction_nolock(trans, root);
BUG_ON(ret);
} else {
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
}
btrfs_free_path(path);
return 0;
}
@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
int do_list = (root->root_key.objectid !=
BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
}
@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
int do_list = (root->root_key.objectid !=
BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& do_list)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes -= len;
BTRFS_I(inode)->delalloc_bytes -= len;
if (BTRFS_I(inode)->delalloc_bytes == 0 &&
if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
}
@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
if (map_length < length + size)
return 1;
return 0;
return ret;
}
/*
@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
if (root == root->fs_info->tree_root)
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct extent_state *cached_state = NULL;
int compressed = 0;
int ret;
bool nolock = false;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1);
@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
nolock = (root == root->fs_info->tree_root);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
trans = btrfs_join_transaction(root, 1);
if (nolock)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state, GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
if (nolock)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->len);
BUG_ON(ret);
} else {
BUG_ON(root == root->fs_info->tree_root);
ret = insert_reserved_file_extent(trans, inode,
ordered_extent->file_offset,
ordered_extent->start,
@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans)
btrfs_end_transaction(trans, root);
if (nolock) {
if (trans)
btrfs_end_transaction_nolock(trans, root);
} else {
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans)
btrfs_end_transaction(trans, root);
}
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_item *item;
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
/* pull out the item */
leaf = path->nodes[0];
item = btrfs_item_nr(leaf, path->slots[0]);
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
/* make sure the item matches what we want */
@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
dir, index);
BUG_ON(ret);
if (ret == -ENOENT)
ret = 0;
err:
btrfs_free_path(path);
if (ret)
@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
{
struct extent_buffer *eb;
int level;
int ret;
u64 refs = 1;
int uninitialized_var(ret);
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
if (refs > 1)
return 1;
}
return 0;
return ret; /* XXX callers? */
}
/*
@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
if (root->ref_cows)
if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path();
@ -3344,7 +3370,8 @@ delete:
} else {
break;
}
if (found_extent && root->ref_cows) {
if (found_extent && (root->ref_cows ||
root == root->fs_info->tree_root)) {
btrfs_set_path_blocking(path);
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
int ret;
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
root == root->fs_info->tree_root))
goto no_delete;
if (is_bad_inode(inode)) {
@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
/*
* Free space cache has inodes in the tree root, but the tree root has a
* root_refs of 0, so this could end up dropping the tree root as a
* snapshot, so we need the extra !root->fs_info->tree_root check to
* make sure we don't drop it.
*/
if (empty && btrfs_root_refs(&root->root_item) == 0 &&
root != root->fs_info->tree_root) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
bool nolock = false;
if (BTRFS_I(inode)->dummy_inode)
return 0;
smp_mb();
nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
if (wbc->sync_mode == WB_SYNC_ALL) {
trans = btrfs_join_transaction(root, 1);
if (nolock)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_commit_transaction(trans, root);
if (nolock)
ret = btrfs_end_transaction_nolock(trans, root);
else
ret = btrfs_commit_transaction(trans, root);
}
return ret;
}
@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
struct bio_vec *bvec = bio->bi_io_vec;
u64 start;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->inode = inode;
dip->logical_offset = file_offset;
start = dip->logical_offset;
dip->bytes = 0;
do {
dip->bytes += bvec->bv_len;
@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
spin_unlock(&root->fs_info->ordered_extent_lock);
}
if (root == root->fs_info->tree_root) {
struct btrfs_block_group_cache *block_group;
block_group = btrfs_lookup_block_group(root->fs_info,
BTRFS_I(inode)->block_group);
if (block_group && block_group->inode == inode) {
spin_lock(&block_group->lock);
block_group->inode = NULL;
spin_unlock(&block_group->lock);
btrfs_put_block_group(block_group);
} else if (block_group) {
btrfs_put_block_group(block_group);
}
}
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0)
if (btrfs_root_refs(&root->root_item) == 0 &&
root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return 0;
}
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
int sync)
{
struct btrfs_inode *binode;
struct inode *inode = NULL;
@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
write_inode_now(inode, 0);
if (sync) {
filemap_write_and_wait(inode->i_mapping);
/*
* We have to do this because compression doesn't
* actually set PG_writeback until it submits the pages
* for IO, which happens in an async thread, so we could
* race and not actually wait for any writeback pages
* because they've not been submitted yet. Technically
* this could still be the case for the ordered stuff
* since the async thread may not have started to do its
* work yet. If this becomes the case then we need to
* figure out a way to make sure that in writepage we
* wait for any async pages to be submitted before
* returning so that fdatawait does what its supposed to
* do.
*/
btrfs_wait_ordered_range(inode, 0, (u64)-1);
} else {
filemap_flush(inode->i_mapping);
}
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
@ -6757,27 +6836,33 @@ out_unlock:
return err;
}
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint)
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint,
struct btrfs_trans_handle *trans)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
int ret = 0;
bool own_trans = true;
if (trans)
own_trans = false;
while (num_bytes > 0) {
trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
if (own_trans) {
trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
}
ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
0, *alloc_hint, (u64)-1, &ins, 1);
if (ret) {
btrfs_end_transaction(trans, root);
if (own_trans)
btrfs_end_transaction(trans, root);
break;
}
@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
if (own_trans)
btrfs_end_transaction(trans, root);
}
return ret;
}
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint)
{
return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
min_size, actual_len, alloc_hint,
NULL);
}
int btrfs_prealloc_file_range_trans(struct inode *inode,
struct btrfs_trans_handle *trans, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint)
{
return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
min_size, actual_len, alloc_hint, trans);
}
static long btrfs_fallocate(struct inode *inode, int mode,
loff_t offset, loff_t len)
{

View file

@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
char *name, int namelen)
char *name, int namelen,
u64 *async_transid)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
err = btrfs_commit_transaction(trans, root);
if (async_transid) {
*async_transid = trans->transid;
err = btrfs_commit_transaction_async(trans, root, 1);
} else {
err = btrfs_commit_transaction(trans, root);
}
if (err && !ret)
ret = err;
return ret;
}
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen, u64 *async_transid)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
if (async_transid) {
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
root->fs_info->extent_root, 1);
} else {
ret = btrfs_commit_transaction(trans,
root->fs_info->extent_root);
}
BUG_ON(ret);
ret = pending_snapshot->error;
@ -395,6 +409,76 @@ fail:
return ret;
}
/* copy of check_sticky in fs/namei.c()
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
*/
static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
{
uid_t fsuid = current_fsuid();
if (!(dir->i_mode & S_ISVTX))
return 0;
if (inode->i_uid == fsuid)
return 0;
if (dir->i_uid == fsuid)
return 0;
return !capable(CAP_FOWNER);
}
/* copy of may_delete in fs/namei.c()
* Check whether we can remove a link victim from directory dir, check
* whether the type of victim is right.
* 1. We can't do it if dir is read-only (done in permission())
* 2. We should have write and exec permissions on dir
* 3. We can't remove anything from append-only dir
* 4. We can't do anything with immutable dir (done in permission())
* 5. If the sticky bit on dir is set we should either
* a. be owner of dir, or
* b. be owner of victim, or
* c. have CAP_FOWNER capability
* 6. If the victim is append-only or immutable we can't do antyhing with
* links pointing to it.
* 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
* 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
* 9. We can't remove a root or mountpoint.
* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
{
int error;
if (!victim->d_inode)
return -ENOENT;
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(victim, dir);
error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
if (btrfs_check_sticky(dir, victim->d_inode)||
IS_APPEND(victim->d_inode)||
IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
return -EPERM;
if (isdir) {
if (!S_ISDIR(victim->d_inode->i_mode))
return -ENOTDIR;
if (IS_ROOT(victim))
return -EBUSY;
} else if (S_ISDIR(victim->d_inode->i_mode))
return -EISDIR;
if (IS_DEADDIR(dir))
return -ENOENT;
if (victim->d_flags & DCACHE_NFSFS_RENAMED)
return -EBUSY;
return 0;
}
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
*/
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src)
struct btrfs_root *snap_src,
u64 *async_transid)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
error = create_snapshot(snap_src, dentry);
error = create_snapshot(snap_src, dentry,
name, namelen, async_transid);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
name, namelen);
name, namelen, async_transid);
}
if (!error)
fsnotify_mkdir(dir, dentry);
@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
char *sizestr;
char *devstr = NULL;
int ret = 0;
int namelen;
int mod = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
@ -801,11 +885,13 @@ out_unlock:
return ret;
}
static noinline int btrfs_ioctl_snap_create(struct file *file,
void __user *arg, int subvol)
static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
char *name,
unsigned long fd,
int subvol,
u64 *transid)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_ioctl_vol_args *vol_args;
struct file *src_file;
int namelen;
int ret = 0;
@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
if (strchr(vol_args->name, '/')) {
namelen = strlen(name);
if (strchr(name, '/')) {
ret = -EINVAL;
goto out;
}
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
NULL);
ret = btrfs_mksubvol(&file->f_path, name, namelen,
NULL, transid);
} else {
struct inode *src_inode;
src_file = fget(vol_args->fd);
src_file = fget(fd);
if (!src_file) {
ret = -EINVAL;
goto out;
@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
fput(src_file);
goto out;
}
ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
BTRFS_I(src_inode)->root);
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
transid);
fput(src_file);
}
out:
return ret;
}
static noinline int btrfs_ioctl_snap_create(struct file *file,
void __user *arg, int subvol,
int async)
{
struct btrfs_ioctl_vol_args *vol_args = NULL;
struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
char *name;
u64 fd;
u64 transid = 0;
int ret;
if (async) {
async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
if (IS_ERR(async_vol_args))
return PTR_ERR(async_vol_args);
name = async_vol_args->name;
fd = async_vol_args->fd;
async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
} else {
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
name = vol_args->name;
fd = vol_args->fd;
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
}
ret = btrfs_ioctl_snap_create_transid(file, name, fd,
subvol, &transid);
if (!ret && async) {
if (copy_to_user(arg +
offsetof(struct btrfs_ioctl_async_vol_args,
transid), &transid, sizeof(transid)))
return -EFAULT;
}
kfree(vol_args);
kfree(async_vol_args);
return ret;
}
@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
args = kmalloc(sizeof(*args), GFP_KERNEL);
if (!args)
return -ENOMEM;
args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args))
return PTR_ERR(args);
if (copy_from_user(args, argp, sizeof(*args))) {
kfree(args);
return -EFAULT;
}
inode = fdentry(file)->d_inode;
ret = search_ioctl(inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
args = kmalloc(sizeof(*args), GFP_KERNEL);
if (!args)
return -ENOMEM;
args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args))
return PTR_ERR(args);
if (copy_from_user(args, argp, sizeof(*args))) {
kfree(args);
return -EFAULT;
}
inode = fdentry(file)->d_inode;
if (args->treeid == 0)
@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
}
inode = dentry->d_inode;
dest = BTRFS_I(inode)->root;
if (!capable(CAP_SYS_ADMIN)){
/*
* Regular user. Only allow this with a special mount
* option, when the user has write+exec access to the
* subvol root, and when rmdir(2) would have been
* allowed.
*
* Note that this is _not_ check that the subvol is
* empty or doesn't contain data that we wouldn't
* otherwise be able to delete.
*
* Users who want to delete empty subvols should try
* rmdir(2).
*/
err = -EPERM;
if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
goto out_dput;
/*
* Do not allow deletion if the parent dir is the same
* as the dir to be deleted. That means the ioctl
* must be called on the dentry referencing the root
* of the subvol, not a random directory contained
* within it.
*/
err = -EINVAL;
if (root == dest)
goto out_dput;
err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
/* check if subvolume may be deleted by a non-root user */
err = btrfs_may_delete(dir, dentry, 1);
if (err)
goto out_dput;
}
if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
}
dest = BTRFS_I(inode)->root;
mutex_lock(&inode->i_mutex);
err = d_invalidate(dentry);
if (err)
@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
BUG_ON(ret);
}
ret = btrfs_commit_transaction(trans, root);
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
inode->i_flags |= S_DEAD;
out_up_write:
@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
path->reada = 2;
if (inode < src) {
mutex_lock(&inode->i_mutex);
mutex_lock(&src->i_mutex);
mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock(&src->i_mutex);
mutex_lock(&inode->i_mutex);
mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
/* determine range to clone */
@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
ordered = btrfs_lookup_first_ordered_extent(src, off+len);
if (!ordered &&
!test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
EXTENT_DELALLOC, 0, NULL))
break;
unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
if (ordered)
btrfs_put_ordered_extent(ordered);
btrfs_wait_ordered_range(src, off, off+len);
btrfs_wait_ordered_range(src, off, len);
}
/* clone data */
@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
}
btrfs_release_path(root, path);
if (key.offset + datal < off ||
if (key.offset + datal <= off ||
key.offset >= off+len)
goto next;
@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
return 0;
}
static void get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space)
{
struct btrfs_block_group_cache *block_group;
space->total_bytes = 0;
space->used_bytes = 0;
space->flags = 0;
list_for_each_entry(block_group, groups_list, list) {
space->flags = block_group->flags;
space->total_bytes += block_group->key.offset;
space->used_bytes +=
btrfs_block_group_used(&block_group->item);
}
}
long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_space_args space_args;
@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info *user_dest;
struct btrfs_space_info *info;
u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
int num_types = 4;
int alloc_size;
int ret = 0;
int slot_count = 0;
int i, c;
if (copy_from_user(&space_args,
(struct btrfs_ioctl_space_args __user *)arg,
sizeof(space_args)))
return -EFAULT;
/* first we count slots */
rcu_read_lock();
list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
slot_count++;
rcu_read_unlock();
for (i = 0; i < num_types; i++) {
struct btrfs_space_info *tmp;
info = NULL;
rcu_read_lock();
list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
list) {
if (tmp->flags == types[i]) {
info = tmp;
break;
}
}
rcu_read_unlock();
if (!info)
continue;
down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c]))
slot_count++;
}
up_read(&info->groups_sem);
}
/* space_slots == 0 means they are asking for a count */
if (space_args.space_slots == 0) {
space_args.total_spaces = slot_count;
goto out;
}
slot_count = min_t(int, space_args.space_slots, slot_count);
alloc_size = sizeof(*dest) * slot_count;
/* we generally have at most 6 or so space infos, one for each raid
* level. So, a whole page should be more than enough for everyone
*/
@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
dest_orig = dest;
/* now we have a buffer to copy into */
rcu_read_lock();
list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
/* make sure we don't copy more than we allocated
* in our buffer
*/
if (slot_count == 0)
break;
slot_count--;
for (i = 0; i < num_types; i++) {
struct btrfs_space_info *tmp;
/* make sure userland has enough room in their buffer */
if (space_args.total_spaces >= space_args.space_slots)
break;
info = NULL;
rcu_read_lock();
list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
list) {
if (tmp->flags == types[i]) {
info = tmp;
break;
}
}
rcu_read_unlock();
space.flags = info->flags;
space.total_bytes = info->total_bytes;
space.used_bytes = info->bytes_used;
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
if (!info)
continue;
down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c])) {
get_block_group_info(&info->block_groups[c],
&space);
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
}
}
up_read(&info->groups_sem);
}
rcu_read_unlock();
user_dest = (struct btrfs_ioctl_space_info *)
(arg + sizeof(struct btrfs_ioctl_space_args));
@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
return 0;
}
static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
{
struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
struct btrfs_trans_handle *trans;
u64 transid;
trans = btrfs_start_transaction(root, 0);
transid = trans->transid;
btrfs_commit_transaction_async(trans, root, 0);
if (argp)
if (copy_to_user(argp, &transid, sizeof(transid)))
return -EFAULT;
return 0;
}
static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
{
struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
u64 transid;
if (argp) {
if (copy_from_user(&transid, argp, sizeof(transid)))
return -EFAULT;
} else {
transid = 0; /* current trans */
}
return btrfs_wait_for_commit(root, transid);
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(file, argp, 0);
return btrfs_ioctl_snap_create(file, argp, 0, 0);
case BTRFS_IOC_SNAP_CREATE_ASYNC:
return btrfs_ioctl_snap_create(file, argp, 0, 1);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
return btrfs_ioctl_snap_create(file, argp, 1, 0);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC:
btrfs_sync_fs(file->f_dentry->d_sb, 1);
return 0;
case BTRFS_IOC_START_SYNC:
return btrfs_ioctl_start_sync(file, argp);
case BTRFS_IOC_WAIT_SYNC:
return btrfs_ioctl_wait_sync(file, argp);
}
return -ENOTTY;

View file

@ -22,14 +22,21 @@
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_VOL_NAME_MAX 255
#define BTRFS_PATH_NAME_MAX 4087
/* this should be 4k */
#define BTRFS_PATH_NAME_MAX 4087
struct btrfs_ioctl_vol_args {
__s64 fd;
char name[BTRFS_PATH_NAME_MAX + 1];
};
#define BTRFS_SNAPSHOT_NAME_MAX 4079
struct btrfs_ioctl_async_vol_args {
__s64 fd;
__u64 transid;
char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
};
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
struct btrfs_ioctl_space_args)
#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_async_vol_args)
#endif

View file

@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
u64 end;
u64 orig_end;
u64 wait_end;
struct btrfs_ordered_extent *ordered;
int found;
@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (orig_end > INT_LIMIT(loff_t))
orig_end = INT_LIMIT(loff_t);
}
wait_end = orig_end;
again:
/* start IO across the range first to instantiate any delalloc
* extents

View file

@ -29,6 +29,7 @@
#include "locking.h"
#include "btrfs_inode.h"
#include "async-thread.h"
#include "free-space-cache.h"
/*
* backref_node, mapping_node and tree_block start with this
@ -178,8 +179,6 @@ struct reloc_control {
u64 search_start;
u64 extents_found;
int block_rsv_retries;
unsigned int stage:8;
unsigned int create_reloc_tree:1;
unsigned int merge_reloc_tree:1;
@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
LIST_HEAD(reloc_roots);
u64 num_bytes = 0;
int ret;
int retries = 0;
mutex_lock(&root->fs_info->trans_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@ -2143,7 +2141,7 @@ again:
if (!err) {
num_bytes = rc->merging_rsv_size;
ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
num_bytes, &retries);
num_bytes);
if (ret)
err = ret;
}
@ -2155,7 +2153,6 @@ again:
btrfs_end_transaction(trans, rc->extent_root);
btrfs_block_rsv_release(rc->extent_root,
rc->block_rsv, num_bytes);
retries = 0;
goto again;
}
}
@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
num_bytes = calcu_metadata_size(rc, node, 1) * 2;
trans->block_rsv = rc->block_rsv;
ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
&rc->block_rsv_retries);
ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
if (ret) {
if (ret == -EAGAIN)
rc->commit_transaction = 1;
return ret;
}
rc->block_rsv_retries = 0;
return 0;
}
@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
ret = get_ref_objectid_v0(rc, path, extent_key,
&ref_owner, NULL);
if (ret < 0)
return ret;
BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
level = (int)ref_owner;
/* FIXME: get real generation */
@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
return ret;
}
static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
struct inode *inode, u64 ino)
{
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
unsigned long nr;
int ret = 0;
if (inode)
goto truncate;
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root, NULL);
if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
if (inode && !IS_ERR(inode))
iput(inode);
return -ENOENT;
}
truncate:
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
trans = btrfs_join_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_free_path(path);
goto out;
}
ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
btrfs_free_path(path);
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
out:
iput(inode);
return ret;
}
/*
* helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
* this function scans fs tree to find blocks reference the data extent
@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
int counted;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ref_root = btrfs_extent_data_ref_root(leaf, ref);
ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
ref_count = btrfs_extent_data_ref_count(leaf, ref);
/*
* This is an extent belonging to the free space cache, lets just delete
* it and redo the search.
*/
if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
ret = delete_block_group_cache(rc->extent_root->fs_info,
NULL, ref_objectid);
if (ret != -ENOENT)
return ret;
ret = 0;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
root = read_fs_root(rc->extent_root->fs_info, ref_root);
if (IS_ERR(root)) {
err = PTR_ERR(root);
@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
* is no reservation in transaction handle.
*/
ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
rc->extent_root->nodesize * 256,
&rc->block_rsv_retries);
rc->extent_root->nodesize * 256);
if (ret)
return ret;
@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->extents_found = 0;
rc->nodes_relocated = 0;
rc->merging_rsv_size = 0;
rc->block_rsv_retries = 0;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
{
struct btrfs_fs_info *fs_info = extent_root->fs_info;
struct reloc_control *rc;
struct inode *inode;
struct btrfs_path *path;
int ret;
int rw = 0;
int err = 0;
@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rw = 1;
}
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
goto out;
}
inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
path);
btrfs_free_path(path);
if (!IS_ERR(inode))
ret = delete_block_group_cache(fs_info, inode, 0);
else
ret = PTR_ERR(inode);
if (ret && ret != -ENOENT) {
err = ret;
goto out;
}
rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
if (IS_ERR(rc->data_inode)) {
err = PTR_ERR(rc->data_inode);
@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
btrfs_add_ordered_sum(inode, ordered, sums);
}
btrfs_put_ordered_extent(ordered);
return 0;
return ret;
}
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,

View file

@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
{
struct btrfs_root *dead_root;
struct btrfs_item *item;
struct btrfs_root_item *ri;
struct btrfs_key key;
struct btrfs_key found_key;
@ -214,7 +213,6 @@ again:
nritems = btrfs_header_nritems(leaf);
slot = path->slots[0];
}
item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
goto next;

View file

@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
ret = close_ctree(root);
sb->s_fs_info = NULL;
(void)ret; /* FIXME: need to fix VFS to return error? */
}
enum {
@ -68,7 +70,8 @@ enum {
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
Opt_discard, Opt_err,
Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
Opt_user_subvol_rm_allowed,
};
static match_table_t tokens = {
@ -92,6 +95,9 @@ static match_table_t tokens = {
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_err, NULL},
};
@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
case Opt_space_cache:
printk(KERN_INFO "btrfs: enabling disk space caching\n");
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
case Opt_clear_cache:
printk(KERN_INFO "btrfs: force clearing of disk cache\n");
btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
break;
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
find_root:
new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
if (IS_ERR(new_root))
return ERR_PTR(PTR_ERR(new_root));
return ERR_CAST(new_root);
if (btrfs_root_refs(&new_root->root_item) == 0)
return ERR_PTR(-ENOENT);
@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct inode *inode;
struct dentry *root_dentry;
struct btrfs_super_block *disk_super;
struct btrfs_root *tree_root;
struct btrfs_key key;
int err;
@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
return PTR_ERR(tree_root);
}
sb->s_fs_info = tree_root;
disk_super = &tree_root->fs_info->super_copy;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
@ -571,7 +585,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
int found = 0;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
@ -607,7 +620,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}
found = 1;
btrfs_close_devices(fs_devices);
} else {
char b[BDEVNAME_SIZE];
@ -629,7 +641,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
goto error;
goto error_free_subvol_name;
}
/* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
@ -643,14 +655,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
deactivate_locked_super(s);
error = PTR_ERR(new_root);
dput(root);
goto error_close_devices;
goto error_free_subvol_name;
}
if (!new_root->d_inode) {
dput(root);
dput(new_root);
deactivate_locked_super(s);
error = -ENXIO;
goto error_close_devices;
goto error_free_subvol_name;
}
dput(root);
root = new_root;
@ -665,7 +677,6 @@ error_close_devices:
btrfs_close_devices(fs_devices);
error_free_subvol_name:
kfree(subvol_name);
error:
return ERR_PTR(error);
}
@ -713,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
u64 total_used_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list)
list_for_each_entry_rcu(found, head, list) {
if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_SYSTEM))
total_used_data += found->disk_total;
else
total_used_data += found->disk_used;
total_used += found->disk_used;
}
rcu_read_unlock();
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
buf->f_bavail = buf->f_bfree;
buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;

View file

@ -163,6 +163,7 @@ enum btrfs_trans_type {
TRANS_START,
TRANS_JOIN,
TRANS_USERSPACE,
TRANS_JOIN_NOLOCK,
};
static int may_wait_transaction(struct btrfs_root *root, int type)
@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
int retries = 0;
int ret;
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
mutex_lock(&root->fs_info->trans_mutex);
if (type != TRANS_JOIN_NOLOCK)
mutex_lock(&root->fs_info->trans_mutex);
if (may_wait_transaction(root, type))
wait_current_trans(root);
@ -195,7 +196,8 @@ again:
cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++;
mutex_unlock(&root->fs_info->trans_mutex);
if (type != TRANS_JOIN_NOLOCK)
mutex_unlock(&root->fs_info->trans_mutex);
h->transid = cur_trans->transid;
h->transaction = cur_trans;
@ -212,8 +214,7 @@ again:
}
if (num_items > 0) {
ret = btrfs_trans_reserve_metadata(h, root, num_items,
&retries);
ret = btrfs_trans_reserve_metadata(h, root, num_items);
if (ret == -EAGAIN) {
btrfs_commit_transaction(h, root);
goto again;
@ -224,9 +225,11 @@ again:
}
}
mutex_lock(&root->fs_info->trans_mutex);
if (type != TRANS_JOIN_NOLOCK)
mutex_lock(&root->fs_info->trans_mutex);
record_root_in_trans(h, root);
mutex_unlock(&root->fs_info->trans_mutex);
if (type != TRANS_JOIN_NOLOCK)
mutex_unlock(&root->fs_info->trans_mutex);
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
return start_transaction(root, 0, TRANS_JOIN);
}
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
int num_blocks)
{
return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
}
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks)
{
@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
return 0;
}
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
{
struct btrfs_transaction *cur_trans = NULL, *t;
int ret;
mutex_lock(&root->fs_info->trans_mutex);
ret = 0;
if (transid) {
if (transid <= root->fs_info->last_trans_committed)
goto out_unlock;
/* find specified transaction */
list_for_each_entry(t, &root->fs_info->trans_list, list) {
if (t->transid == transid) {
cur_trans = t;
break;
}
if (t->transid > transid)
break;
}
ret = -EINVAL;
if (!cur_trans)
goto out_unlock; /* bad transid */
} else {
/* find newest transaction that is committing | committed */
list_for_each_entry_reverse(t, &root->fs_info->trans_list,
list) {
if (t->in_commit) {
if (t->commit_done)
goto out_unlock;
cur_trans = t;
break;
}
}
if (!cur_trans)
goto out_unlock; /* nothing committing|committed */
}
cur_trans->use_count++;
mutex_unlock(&root->fs_info->trans_mutex);
wait_for_commit(root, cur_trans);
mutex_lock(&root->fs_info->trans_mutex);
put_transaction(cur_trans);
ret = 0;
out_unlock:
mutex_unlock(&root->fs_info->trans_mutex);
return ret;
}
#if 0
/*
* rate limit against the drop_snapshot code. This helps to slow down new
@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int throttle)
struct btrfs_root *root, int throttle, int lock)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
if (!root->fs_info->open_ioctl_trans &&
if (lock && !root->fs_info->open_ioctl_trans &&
should_end_transaction(trans, root))
trans->transaction->blocked = 1;
if (cur_trans->blocked && !cur_trans->in_commit) {
if (lock && cur_trans->blocked && !cur_trans->in_commit) {
if (throttle)
return btrfs_commit_transaction(trans, root);
else
wake_up_process(info->transaction_kthread);
}
mutex_lock(&info->trans_mutex);
if (lock)
mutex_lock(&info->trans_mutex);
WARN_ON(cur_trans != info->running_transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
put_transaction(cur_trans);
mutex_unlock(&info->trans_mutex);
if (lock)
mutex_unlock(&info->trans_mutex);
if (current->journal_info == trans)
current->journal_info = NULL;
@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
return __btrfs_end_transaction(trans, root, 0);
return __btrfs_end_transaction(trans, root, 0, 1);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
return __btrfs_end_transaction(trans, root, 1);
return __btrfs_end_transaction(trans, root, 1, 1);
}
int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
return __btrfs_end_transaction(trans, root, 0, 0);
}
/*
@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct extent_buffer *tmp;
struct extent_buffer *old;
int ret;
int retries = 0;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
to_reserve, &retries);
to_reserve);
if (ret) {
pending->error = ret;
goto fail;
@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
super->cache_generation = root_item->generation;
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
return ret;
}
/*
* wait for the current transaction commit to start and block subsequent
* transaction joins
*/
static void wait_current_trans_commit_start(struct btrfs_root *root,
struct btrfs_transaction *trans)
{
DEFINE_WAIT(wait);
if (trans->in_commit)
return;
while (1) {
prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (trans->in_commit) {
finish_wait(&root->fs_info->transaction_blocked_wait,
&wait);
break;
}
mutex_unlock(&root->fs_info->trans_mutex);
schedule();
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
}
}
/*
* wait for the current transaction to start and then become unblocked.
* caller holds ref.
*/
static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
struct btrfs_transaction *trans)
{
DEFINE_WAIT(wait);
if (trans->commit_done || (trans->in_commit && !trans->blocked))
return;
while (1) {
prepare_to_wait(&root->fs_info->transaction_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (trans->commit_done ||
(trans->in_commit && !trans->blocked)) {
finish_wait(&root->fs_info->transaction_wait,
&wait);
break;
}
mutex_unlock(&root->fs_info->trans_mutex);
schedule();
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&root->fs_info->transaction_wait,
&wait);
}
}
/*
* commit transactions asynchronously. once btrfs_commit_transaction_async
* returns, any subsequent transaction will not be allowed to join.
*/
struct btrfs_async_commit {
struct btrfs_trans_handle *newtrans;
struct btrfs_root *root;
struct delayed_work work;
};
static void do_async_commit(struct work_struct *work)
{
struct btrfs_async_commit *ac =
container_of(work, struct btrfs_async_commit, work.work);
btrfs_commit_transaction(ac->newtrans, ac->root);
kfree(ac);
}
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int wait_for_unblock)
{
struct btrfs_async_commit *ac;
struct btrfs_transaction *cur_trans;
ac = kmalloc(sizeof(*ac), GFP_NOFS);
BUG_ON(!ac);
INIT_DELAYED_WORK(&ac->work, do_async_commit);
ac->root = root;
ac->newtrans = btrfs_join_transaction(root, 0);
/* take transaction reference */
mutex_lock(&root->fs_info->trans_mutex);
cur_trans = trans->transaction;
cur_trans->use_count++;
mutex_unlock(&root->fs_info->trans_mutex);
btrfs_end_transaction(trans, root);
schedule_delayed_work(&ac->work, 0);
/* wait for transaction to start and unblock */
mutex_lock(&root->fs_info->trans_mutex);
if (wait_for_unblock)
wait_current_trans_commit_start_and_unblock(root, cur_trans);
else
wait_current_trans_commit_start(root, cur_trans);
put_transaction(cur_trans);
mutex_unlock(&root->fs_info->trans_mutex);
return 0;
}
/*
* btrfs_transaction state sequence:
* in_commit = 0, blocked = 0 (initial)
* in_commit = 1, blocked = 1
* blocked = 0
* commit_done = 1
*/
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
unsigned long joined = 0;
unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
DEFINE_WAIT(wait);
@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
trans->transaction->in_commit = 1;
trans->transaction->blocked = 1;
wake_up(&root->fs_info->transaction_blocked_wait);
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
struct btrfs_transaction, list);
@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
snap_pending = 1;
WARN_ON(cur_trans != trans->transaction);
if (cur_trans->num_writers > 1)
timeout = MAX_SCHEDULE_TIMEOUT;
else if (should_grow)
timeout = 1;
mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit || snap_pending) {
@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
TASK_UNINTERRUPTIBLE);
smp_mb();
if (cur_trans->num_writers > 1 || should_grow)
schedule_timeout(timeout);
if (cur_trans->num_writers > 1)
schedule_timeout(MAX_SCHEDULE_TIMEOUT);
else if (should_grow)
schedule_timeout(1);
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);

View file

@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
int num_blocks);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
int num_blocks);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks);
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int wait_for_unblock);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,

View file

@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
int orig_level;
int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
return -ENOMEM;
level = btrfs_header_level(root->node);
orig_level = level;
if (level == 0)
goto out;

View file

@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
{
struct inode *dir;
int ret;
struct btrfs_key location;
struct btrfs_inode_ref *ref;
struct btrfs_dir_item *di;
struct inode *inode;
@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
unsigned long ref_ptr;
unsigned long ref_end;
location.objectid = key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
/*
* it is possible that we didn't log all the parent directories
* for a given inode. If we don't find the dir, just don't
@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_path *path;
struct btrfs_root *root = wc->replay_dest;
struct btrfs_key key;
u32 item_size;
int level;
int i;
int ret;
@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
item_size = btrfs_item_size_nr(eb, i);
/* inode keys are done during the first stage */
if (key.type == BTRFS_INODE_ITEM_KEY &&
@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
u64 root_gen;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
u64 root_gen;
int i;
int slot;
int ret;
@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
struct extent_buffer *node;
node = path->nodes[i];
path->slots[i]++;
*level = i;
WARN_ON(*level == 0);
@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent);
wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
if (wc->free) {
@ -2273,7 +2260,7 @@ fail:
}
btrfs_end_log_trans(root);
return 0;
return err;
}
/* see comments for btrfs_del_dir_entries_in_log */
@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
u32 size;
int err = 0;
int ret;
int nritems;
@ -2793,7 +2779,6 @@ again:
break;
src = path->nodes[0];
size = btrfs_item_size_nr(src, path->slots[0]);
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
goto next_slot;

View file

@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
u64 size_to_free;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_chunk *chunk;
struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
struct btrfs_trans_handle *trans;
struct btrfs_key found_key;
@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (found_key.objectid != key.objectid)
break;
chunk = btrfs_item_ptr(path->nodes[0],
path->slots[0],
struct btrfs_chunk);
/* chunk zero is special */
if (found_key.offset == 0)
break;
@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
dev = multi->stripes[dev_nr].dev;
BUG_ON(rw == WRITE && !dev->writeable);
if (dev && dev->bdev) {
if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
bio->bi_bdev = dev->bdev;
if (async_submit)
schedule_bio(root, dev, rw, bio);

View file

@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_item *item;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
int ret = 0, slot, advance;
@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
advance = 1;
item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
/* check to make sure this item is what we want */

View file

@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
int out_written = 0;
int in_read = 0;
unsigned long bytes_left;
*out_pages = 0;
@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
out_written = 0;
in_read = 0;
while (workspace->def_strm.total_in < len) {
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
if (ret != Z_OK) {

View file

@ -1081,30 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
}
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* writeback_inodes_sb_nr - writeback dirty inodes from given super_block
* @sb: the superblock
* @nr: the number of pages to write
*
* Start writeback on some inodes on this super_block. No guarantees are made
* on how many (if any) will be written, and this function does not wait
* for IO completion of submitted IO. The number of pages submitted is
* returned.
* for IO completion of submitted IO.
*/
void writeback_inodes_sb(struct super_block *sb)
void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.done = &done,
.nr_pages = nr,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
work.nr_pages = get_nr_dirty_pages();
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
}
EXPORT_SYMBOL(writeback_inodes_sb_nr);
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
*
* Start writeback on some inodes on this super_block. No guarantees are made
* on how many (if any) will be written, and this function does not wait
* for IO completion of submitted IO.
*/
void writeback_inodes_sb(struct super_block *sb)
{
return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
}
EXPORT_SYMBOL(writeback_inodes_sb);
/**
@ -1126,6 +1138,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
}
EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
/**
* writeback_inodes_sb_if_idle - start writeback if none underway
* @sb: the superblock
* @nr: the number of pages to write
*
* Invoke writeback_inodes_sb if no writeback is currently underway.
* Returns 1 if writeback was started, 0 if not.
*/
int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
unsigned long nr)
{
if (!writeback_in_progress(sb->s_bdi)) {
down_read(&sb->s_umount);
writeback_inodes_sb_nr(sb, nr);
up_read(&sb->s_umount);
return 1;
} else
return 0;
}
EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
/**
* sync_inodes_sb - sync sb inode pages
* @sb: the superblock

View file

@ -58,7 +58,9 @@ struct writeback_control {
struct bdi_writeback;
int inode_wait(void *);
void writeback_inodes_sb(struct super_block *);
void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
int writeback_inodes_sb_if_idle(struct super_block *);
int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
void sync_inodes_sb(struct super_block *);
void writeback_inodes_wb(struct bdi_writeback *wb,
struct writeback_control *wbc);