Btrfs: Add data=ordered support

This forces file data extents down the disk along with the metadata that
references them.  The current implementation is fairly simple, and just
writes out all of the dirty pages in an inode before the commit.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2008-01-08 15:46:30 -05:00
parent e4204dedbb
commit dc17ff8f11
13 changed files with 387 additions and 28 deletions

View file

@ -5,7 +5,7 @@ obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o acl.o
extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o
#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
# root-tree.o dir-item.o hash.o file-item.o inode-item.o \

View file

@ -29,6 +29,7 @@ struct btrfs_inode {
struct extent_map_tree extent_tree;
struct inode vfs_inode;
u64 ordered_trans;
/*
* transid of the trans_handle that last modified this inode
*/

View file

@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret)
{
u64 search_start;
u64 header_trans;
int ret;
if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
root->fs_info->generation);
WARN_ON(1);
}
if (btrfs_header_generation(buf) == trans->transid) {
header_trans = btrfs_header_generation(buf);
if (header_trans == trans->transid) {
*cow_ret = buf;
return 0;
}

View file

@ -16,8 +16,8 @@
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS__
#define __BTRFS__
#ifndef __BTRFS_CTREE__
#define __BTRFS_CTREE__
#include <linux/version.h>
#include <linux/mm.h>
@ -363,7 +363,6 @@ struct btrfs_root {
struct inode *inode;
struct kobject root_kobj;
struct completion kobj_unregister;
struct rw_semaphore snap_sem;
u64 objectid;
u64 last_trans;
@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void);
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
struct btrfs_root *root);
struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
u64 root_objectid);
int btrfs_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to);
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,

View file

@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
init_completion(&root->kobj_unregister);
init_rwsem(&root->snap_sem);
root->defrag_running = 0;
root->defrag_level = 0;
root->root_key.objectid = objectid;
@ -498,6 +497,21 @@ insert:
return root;
}
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
u64 root_objectid)
{
struct btrfs_root *root;
if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
return fs_info->tree_root;
if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
return fs_info->extent_root;
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)root_objectid);
return root;
}
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location)
{

View file

@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
u64 root_objectid);
struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *location,
const char *name, int namelen);

View file

@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
if (btrfs_buffer_uptodate(buf)) {
u64 transid =
root->fs_info->running_transaction->transid;
if (btrfs_header_generation(buf) == transid) {
u64 header_transid =
btrfs_header_generation(buf);
if (header_transid == transid) {
free_extent_buffer(buf);
return 1;
}

View file

@ -34,6 +34,7 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ordered-data.h"
#include "ioctl.h"
#include "print-tree.h"
@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
start_pos) - existing_delalloc;
spin_unlock(&root->fs_info->delalloc_lock);
btrfs_add_ordered_inode(inode);
} else {
u64 aligned_end;
/* step one, delete the existing extents in this range */
@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
down_read(&BTRFS_I(inode)->root->snap_sem);
mutex_lock(&inode->i_mutex);
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + count) >> PAGE_CACHE_SHIFT;
@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
}
out:
mutex_unlock(&inode->i_mutex);
up_read(&BTRFS_I(inode)->root->snap_sem);
out_nolock:
kfree(pages);

View file

@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
btrfs_add_ordered_inode(inode);
out:
btrfs_end_transaction(trans, root);
return ret;
@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode)
path = btrfs_alloc_path();
BUG_ON(!path);
mutex_lock(&root->fs_info->fs_mutex);
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret)
goto make_bad;
@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
if ((offset & (blocksize - 1)) == 0)
goto out;
down_read(&root->snap_sem);
ret = -ENOMEM;
page = grab_cache_page(mapping, index);
if (!page)
@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
unlock_page(page);
page_cache_release(page);
up_read(&BTRFS_I(inode)->root->snap_sem);
out:
return ret;
}
@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
args->root == BTRFS_I(inode)->root);
}
struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
u64 root_objectid)
{
struct btrfs_iget_args args;
args.ino = objectid;
args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
if (!args.root)
return NULL;
return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
}
struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
struct btrfs_root *root)
{
@ -1336,7 +1348,6 @@ read_dir_items:
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
over = filldir(dirent, name_ptr, name_len,
found_key.offset,
location.objectid,
@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
ret = -EINVAL;
down_read(&BTRFS_I(inode)->root->snap_sem);
lock_page(page);
wait_on_page_writeback(page);
size = i_size_read(inode);
@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
ret = btrfs_cow_one_page(inode, page, end);
out_unlock:
up_read(&BTRFS_I(inode)->root->snap_sem);
unlock_page(page);
out:
return ret;
@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name,
struct btrfs_root_item root_item;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *new_root;
struct btrfs_root *new_root = root;
struct inode *inode;
struct inode *dir;
int ret;
@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name,
goto fail;
fail:
nr = trans->blocks_used;
err = btrfs_commit_transaction(trans, root);
err = btrfs_commit_transaction(trans, new_root);
if (err && !ret)
ret = err;
fail_commit:
@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
if (!root->ref_cows)
return -EINVAL;
down_write(&root->snap_sem);
freeze_bdev(root->fs_info->sb->s_bdev);
thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb);
mutex_lock(&root->fs_info->fs_mutex);
ret = btrfs_check_free_space(root, 1, 0);
if (ret)
@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
err = btrfs_commit_transaction(trans, root);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_update_inode(trans, root, root->inode);
if (ret)
@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
0, &objectid);
if (ret)
goto fail;
memcpy(&new_root_item, &root->root_item,
goto fail; memcpy(&new_root_item, &root->root_item,
sizeof(new_root_item));
key.objectid = objectid;
@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
free_extent_buffer(tmp);
/* write the ordered inodes to force all delayed allocations to
* be filled. Once this is done, we can copy the root
*/
mutex_lock(&root->fs_info->trans_mutex);
btrfs_write_ordered_inodes(trans, root);
mutex_unlock(&root->fs_info->trans_mutex);
btrfs_copy_root(trans, root, root->node, &tmp, objectid);
btrfs_set_root_bytenr(&new_root_item, tmp->start);
btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp));
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
&new_root_item);
printk("new root %Lu node %Lu\n", objectid, tmp->start);
free_extent_buffer(tmp);
if (ret)
goto fail;
@ -2321,7 +2335,6 @@ fail:
ret = err;
fail_unlock:
mutex_unlock(&root->fs_info->fs_mutex);
up_write(&root->snap_sem);
btrfs_btree_balance_dirty(root, nr);
return ret;
}
@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
ei->last_trans = 0;
ei->ordered_trans = 0;
return &ei->vfs_inode;
}

221
fs/btrfs/ordered-data.c Normal file
View file

@ -0,0 +1,221 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/gfp.h>
#include <linux/slab.h>
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
struct tree_entry {
u64 root_objectid;
u64 objectid;
struct rb_node rb_node;
};
/*
* returns > 0 if entry passed (root, objectid) is > entry,
* < 0 if (root, objectid) < entry and zero if they are equal
*/
static int comp_entry(struct tree_entry *entry, u64 root_objectid,
u64 objectid)
{
if (root_objectid < entry->root_objectid)
return -1;
if (root_objectid > entry->root_objectid)
return 1;
if (objectid < entry->objectid)
return -1;
if (objectid > entry->objectid)
return 1;
return 0;
}
static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
u64 objectid, struct rb_node *node)
{
struct rb_node ** p = &root->rb_node;
struct rb_node * parent = NULL;
struct tree_entry *entry;
int comp;
while(*p) {
parent = *p;
entry = rb_entry(parent, struct tree_entry, rb_node);
comp = comp_entry(entry, root_objectid, objectid);
if (comp < 0)
p = &(*p)->rb_left;
else if (comp > 0)
p = &(*p)->rb_right;
else
return parent;
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
u64 objectid, struct rb_node **prev_ret)
{
struct rb_node * n = root->rb_node;
struct rb_node *prev = NULL;
struct tree_entry *entry;
struct tree_entry *prev_entry = NULL;
int comp;
while(n) {
entry = rb_entry(n, struct tree_entry, rb_node);
prev = n;
prev_entry = entry;
comp = comp_entry(entry, root_objectid, objectid);
if (comp < 0)
n = n->rb_left;
else if (comp > 0)
n = n->rb_right;
else
return n;
}
if (!prev_ret)
return NULL;
while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
}
*prev_ret = prev;
return NULL;
}
static inline struct rb_node *tree_search(struct rb_root *root,
u64 root_objectid, u64 objectid)
{
struct rb_node *prev;
struct rb_node *ret;
ret = __tree_search(root, root_objectid, objectid, &prev);
if (!ret)
return prev;
return ret;
}
int btrfs_add_ordered_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 root_objectid = root->root_key.objectid;
u64 transid = root->fs_info->running_transaction->transid;
struct tree_entry *entry;
struct rb_node *node;
struct btrfs_ordered_inode_tree *tree;
if (transid <= BTRFS_I(inode)->ordered_trans)
return 0;
tree = &root->fs_info->running_transaction->ordered_inode_tree;
read_lock(&tree->lock);
node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
read_unlock(&tree->lock);
if (node) {
return 0;
}
entry = kmalloc(sizeof(*entry), GFP_NOFS);
if (!entry)
return -ENOMEM;
write_lock(&tree->lock);
entry->objectid = inode->i_ino;
entry->root_objectid = root_objectid;
node = tree_insert(&tree->tree, root_objectid,
inode->i_ino, &entry->rb_node);
BTRFS_I(inode)->ordered_trans = transid;
write_unlock(&tree->lock);
if (node)
kfree(entry);
return 0;
}
int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid)
{
struct tree_entry *entry;
struct rb_node *node;
write_lock(&tree->lock);
node = tree_search(&tree->tree, *root_objectid, *objectid);
if (!node) {
write_unlock(&tree->lock);
return 0;
}
entry = rb_entry(node, struct tree_entry, rb_node);
while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
node = rb_next(node);
if (!node)
break;
entry = rb_entry(node, struct tree_entry, rb_node);
}
if (!node) {
write_unlock(&tree->lock);
return 0;
}
*root_objectid = entry->root_objectid;
*objectid = entry->objectid;
write_unlock(&tree->lock);
return 1;
}
int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid)
{
struct tree_entry *entry;
struct rb_node *node;
write_lock(&tree->lock);
node = tree_search(&tree->tree, *root_objectid, *objectid);
if (!node) {
write_unlock(&tree->lock);
return 0;
}
entry = rb_entry(node, struct tree_entry, rb_node);
while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
node = rb_next(node);
if (!node)
break;
entry = rb_entry(node, struct tree_entry, rb_node);
}
if (!node) {
write_unlock(&tree->lock);
return 0;
}
*root_objectid = entry->root_objectid;
*objectid = entry->objectid;
rb_erase(node, &tree->tree);
write_unlock(&tree->lock);
kfree(entry);
return 1;
}

39
fs/btrfs/ordered-data.h Normal file
View file

@ -0,0 +1,39 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_ORDERED_DATA__
#define __BTRFS_ORDERED_DATA__
struct btrfs_ordered_inode_tree {
rwlock_t lock;
struct rb_root tree;
};
static inline void
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
{
rwlock_init(&t->lock);
t->tree.rb_node = NULL;
}
int btrfs_add_ordered_inode(struct inode *inode);
int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid);
int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
u64 *root_objectid, u64 *objectid);
#endif

View file

@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root)
cur_trans->commit_done = 0;
cur_trans->start_time = get_seconds();
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
extent_map_tree_init(&cur_trans->dirty_pages,
root->fs_info->btree_inode->i_mapping,
GFP_NOFS);
@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
return ret;
}
int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct inode *inode;
u64 root_objectid = 0;
u64 objectid = 0;
u64 transid = trans->transid;
int ret;
printk("write ordered trans %Lu\n", transid);
while(1) {
ret = btrfs_find_first_ordered_inode(
&cur_trans->ordered_inode_tree,
&root_objectid, &objectid);
if (!ret)
break;
mutex_unlock(&root->fs_info->trans_mutex);
mutex_unlock(&root->fs_info->fs_mutex);
inode = btrfs_ilookup(root->fs_info->sb, objectid,
root_objectid);
if (inode) {
if (S_ISREG(inode->i_mode))
filemap_fdatawrite(inode->i_mapping);
iput(inode);
}
mutex_lock(&root->fs_info->fs_mutex);
mutex_lock(&root->fs_info->trans_mutex);
}
while(1) {
root_objectid = 0;
objectid = 0;
ret = btrfs_find_del_first_ordered_inode(
&cur_trans->ordered_inode_tree,
&root_objectid, &objectid);
if (!ret)
break;
mutex_unlock(&root->fs_info->trans_mutex);
mutex_unlock(&root->fs_info->fs_mutex);
inode = btrfs_ilookup(root->fs_info->sb, objectid,
root_objectid);
if (inode) {
if (S_ISREG(inode->i_mode))
filemap_write_and_wait(inode->i_mapping);
iput(inode);
}
mutex_lock(&root->fs_info->fs_mutex);
mutex_lock(&root->fs_info->trans_mutex);
}
printk("done write ordered trans %Lu\n", transid);
return 0;
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->fs_mutex);
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
ret = btrfs_write_ordered_inodes(trans, root);
} while (cur_trans->num_writers > 1 ||
(cur_trans->num_joined != joined));
WARN_ON(cur_trans != trans->transaction);
ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
&dirty_fs_roots);
BUG_ON(ret);

View file

@ -16,9 +16,10 @@
* Boston, MA 021110-1307, USA.
*/
#ifndef __TRANSACTION__
#define __TRANSACTION__
#ifndef __BTRFS_TRANSACTION__
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
#include "ordered-data.h"
struct btrfs_transaction {
u64 transid;
@ -30,6 +31,7 @@ struct btrfs_transaction {
struct list_head list;
struct extent_map_tree dirty_pages;
unsigned long start_time;
struct btrfs_ordered_inode_tree ordered_inode_tree;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
};
@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
#endif