Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  fs: simplify iget & friends
  fs: pull inode->i_lock up out of writeback_single_inode
  fs: rename inode_lock to inode_hash_lock
  fs: move i_wb_list out from under inode_lock
  fs: move i_sb_list out from under inode_lock
  fs: remove inode_lock from iput_final and prune_icache
  fs: Lock the inode LRU list separately
  fs: factor inode disposal
  fs: protect inode->i_state with inode->i_lock
  autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd()
  autofs4 - remove autofs4_lock
  autofs4 - fix d_manage() return on rcu-walk
  autofs4 - fix autofs4_expire_indirect() traversal
  autofs4 - fix dentry leak in autofs4_expire_direct()
  autofs4 - reinstate last used update on access
  vfs - check non-mountpoint dentry might block in __follow_mount_rcu()
This commit is contained in:
Linus Torvalds 2011-03-24 19:01:30 -07:00
commit d39dd11c3e
27 changed files with 622 additions and 543 deletions

View file

@ -128,7 +128,7 @@ alloc_inode:
destroy_inode: destroy_inode:
dirty_inode: (must not sleep) dirty_inode: (must not sleep)
write_inode: write_inode:
drop_inode: !!!inode_lock!!! drop_inode: !!!inode->i_lock!!!
evict_inode: evict_inode:
put_super: write put_super: write
write_super: read write_super: read

View file

@ -298,11 +298,14 @@ be used instead. It gets called whenever the inode is evicted, whether it has
remaining links or not. Caller does *not* evict the pagecache or inode-associated remaining links or not. Caller does *not* evict the pagecache or inode-associated
metadata buffers; getting rid of those is responsibility of method, as it had metadata buffers; getting rid of those is responsibility of method, as it had
been for ->delete_inode(). been for ->delete_inode().
->drop_inode() returns int now; it's called on final iput() with inode_lock
held and it returns true if filesystems wants the inode to be dropped. As before, ->drop_inode() returns int now; it's called on final iput() with
generic_drop_inode() is still the default and it's been updated appropriately. inode->i_lock held and it returns true if filesystems wants the inode to be
generic_delete_inode() is also alive and it consists simply of return 1. Note that dropped. As before, generic_drop_inode() is still the default and it's been
all actual eviction work is done by caller after ->drop_inode() returns. updated appropriately. generic_delete_inode() is also alive and it consists
simply of return 1. Note that all actual eviction work is done by caller after
->drop_inode() returns.
clear_inode() is gone; use end_writeback() instead. As before, it must clear_inode() is gone; use end_writeback() instead. As before, it must
be called exactly once on each call of ->evict_inode() (as it used to be for be called exactly once on each call of ->evict_inode() (as it used to be for
each call of ->delete_inode()). Unlike before, if you are using inode-associated each call of ->delete_inode()). Unlike before, if you are using inode-associated
@ -395,6 +398,9 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
so the i_size should not change when hole punching, even when puching the end of so the i_size should not change when hole punching, even when puching the end of
a file off. a file off.
--
[mandatory]
-- --
[mandatory] [mandatory]
->get_sb() is gone. Switch to use of ->mount(). Typically it's just ->get_sb() is gone. Switch to use of ->mount(). Typically it's just

View file

@ -254,7 +254,7 @@ or bottom half).
should be synchronous or not, not all filesystems check this flag. should be synchronous or not, not all filesystems check this flag.
drop_inode: called when the last access to the inode is dropped, drop_inode: called when the last access to the inode is dropped,
with the inode_lock spinlock held. with the inode->i_lock spinlock held.
This method should be either NULL (normal UNIX filesystem This method should be either NULL (normal UNIX filesystem
semantics) or "generic_delete_inode" (for filesystems that do not semantics) or "generic_delete_inode" (for filesystems that do not

View file

@ -61,8 +61,6 @@ do { \
current->pid, __func__, ##args); \ current->pid, __func__, ##args); \
} while (0) } while (0)
extern spinlock_t autofs4_lock;
/* Unified info structure. This is pointed to by both the dentry and /* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this inode structures. Each file in the filesystem has an instance of this
structure. It holds a reference to the dentry, so dentries are never structure. It holds a reference to the dentry, so dentries are never

View file

@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
return -EBUSY; return -EBUSY;
} else { } else {
struct file *pipe = fget(pipefd); struct file *pipe = fget(pipefd);
if (!pipe) {
err = -EBADF;
goto out;
}
if (!pipe->f_op || !pipe->f_op->write) { if (!pipe->f_op || !pipe->f_op->write) {
err = -EPIPE; err = -EPIPE;
fput(pipe); fput(pipe);

View file

@ -86,19 +86,71 @@ done:
return status; return status;
} }
/*
* Calculate and dget next entry in the subdirs list under root.
*/
static struct dentry *get_next_positive_subdir(struct dentry *prev,
struct dentry *root)
{
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
struct list_head *next;
struct dentry *p, *q;
spin_lock(&sbi->lookup_lock);
if (prev == NULL) {
spin_lock(&root->d_lock);
prev = dget_dlock(root);
next = prev->d_subdirs.next;
p = prev;
goto start;
}
p = prev;
spin_lock(&p->d_lock);
again:
next = p->d_u.d_child.next;
start:
if (next == &root->d_subdirs) {
spin_unlock(&p->d_lock);
spin_unlock(&sbi->lookup_lock);
dput(prev);
return NULL;
}
q = list_entry(next, struct dentry, d_u.d_child);
spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
if (!simple_positive(q)) {
spin_unlock(&p->d_lock);
p = q;
goto again;
}
dget_dlock(q);
spin_unlock(&q->d_lock);
spin_unlock(&p->d_lock);
spin_unlock(&sbi->lookup_lock);
dput(prev);
return q;
}
/* /*
* Calculate and dget next entry in top down tree traversal. * Calculate and dget next entry in top down tree traversal.
*/ */
static struct dentry *get_next_positive_dentry(struct dentry *prev, static struct dentry *get_next_positive_dentry(struct dentry *prev,
struct dentry *root) struct dentry *root)
{ {
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
struct list_head *next; struct list_head *next;
struct dentry *p, *ret; struct dentry *p, *ret;
if (prev == NULL) if (prev == NULL)
return dget(root); return dget(root);
spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock);
relock: relock:
p = prev; p = prev;
spin_lock(&p->d_lock); spin_lock(&p->d_lock);
@ -110,7 +162,7 @@ again:
if (p == root) { if (p == root) {
spin_unlock(&p->d_lock); spin_unlock(&p->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
dput(prev); dput(prev);
return NULL; return NULL;
} }
@ -140,7 +192,7 @@ again:
dget_dlock(ret); dget_dlock(ret);
spin_unlock(&ret->d_lock); spin_unlock(&ret->d_lock);
spin_unlock(&p->d_lock); spin_unlock(&p->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
dput(prev); dput(prev);
@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
spin_lock(&sbi->fs_lock); spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(root); ino = autofs4_dentry_ino(root);
/* No point expiring a pending mount */ /* No point expiring a pending mount */
if (ino->flags & AUTOFS_INF_PENDING) { if (ino->flags & AUTOFS_INF_PENDING)
spin_unlock(&sbi->fs_lock); goto out;
return NULL;
}
managed_dentry_set_transit(root);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
struct autofs_info *ino = autofs4_dentry_ino(root); struct autofs_info *ino = autofs4_dentry_ino(root);
ino->flags |= AUTOFS_INF_EXPIRING; ino->flags |= AUTOFS_INF_EXPIRING;
@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
spin_unlock(&sbi->fs_lock); spin_unlock(&sbi->fs_lock);
return root; return root;
} }
managed_dentry_clear_transit(root); out:
spin_unlock(&sbi->fs_lock); spin_unlock(&sbi->fs_lock);
dput(root); dput(root);
@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
timeout = sbi->exp_timeout; timeout = sbi->exp_timeout;
dentry = NULL; dentry = NULL;
while ((dentry = get_next_positive_dentry(dentry, root))) { while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(&sbi->fs_lock); spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry); ino = autofs4_dentry_ino(dentry);
/* No point expiring a pending mount */ /* No point expiring a pending mount */
if (ino->flags & AUTOFS_INF_PENDING) if (ino->flags & AUTOFS_INF_PENDING)
goto cont; goto next;
managed_dentry_set_transit(dentry);
/* /*
* Case 1: (i) indirect mount or top level pseudo direct mount * Case 1: (i) indirect mount or top level pseudo direct mount
@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
} }
} }
next: next:
managed_dentry_clear_transit(dentry);
cont:
spin_unlock(&sbi->fs_lock); spin_unlock(&sbi->fs_lock);
} }
return NULL; return NULL;
@ -415,13 +461,13 @@ found:
ino->flags |= AUTOFS_INF_EXPIRING; ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete); init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock); spin_unlock(&sbi->fs_lock);
spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock);
spin_lock(&expired->d_parent->d_lock); spin_lock(&expired->d_parent->d_lock);
spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
spin_unlock(&expired->d_lock); spin_unlock(&expired->d_lock);
spin_unlock(&expired->d_parent->d_lock); spin_unlock(&expired->d_parent->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
return expired; return expired;
} }
@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb,
spin_lock(&sbi->fs_lock); spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry); ino = autofs4_dentry_ino(dentry);
ino->flags &= ~AUTOFS_INF_EXPIRING; ino->flags &= ~AUTOFS_INF_EXPIRING;
if (!d_unhashed(dentry))
managed_dentry_clear_transit(dentry);
complete_all(&ino->expire_complete); complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock); spin_unlock(&sbi->fs_lock);
@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock); spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_EXPIRING; ino->flags &= ~AUTOFS_INF_EXPIRING;
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
if (ret) if (!ret) {
__managed_dentry_clear_transit(dentry);
else {
if ((IS_ROOT(dentry) || if ((IS_ROOT(dentry) ||
(autofs_type_indirect(sbi->type) && (autofs_type_indirect(sbi->type) &&
IS_ROOT(dentry->d_parent))) && IS_ROOT(dentry->d_parent))) &&

View file

@ -23,8 +23,6 @@
#include "autofs_i.h" #include "autofs_i.h"
DEFINE_SPINLOCK(autofs4_lock);
static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_unlink(struct inode *,struct dentry *);
static int autofs4_dir_rmdir(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
* autofs file system so just let the libfs routines handle * autofs file system so just let the libfs routines handle
* it. * it.
*/ */
spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
return -ENOENT; return -ENOENT;
} }
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
out: out:
return dcache_dir_open(inode, file); return dcache_dir_open(inode, file);
@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
const unsigned char *str = name->name; const unsigned char *str = name->name;
struct list_head *p, *head; struct list_head *p, *head;
spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock); spin_lock(&sbi->lookup_lock);
head = &sbi->active_list; head = &sbi->active_list;
list_for_each(p, head) { list_for_each(p, head) {
@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
dget_dlock(active); dget_dlock(active);
spin_unlock(&active->d_lock); spin_unlock(&active->d_lock);
spin_unlock(&sbi->lookup_lock); spin_unlock(&sbi->lookup_lock);
spin_unlock(&autofs4_lock);
return active; return active;
} }
next: next:
spin_unlock(&active->d_lock); spin_unlock(&active->d_lock);
} }
spin_unlock(&sbi->lookup_lock); spin_unlock(&sbi->lookup_lock);
spin_unlock(&autofs4_lock);
return NULL; return NULL;
} }
@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
const unsigned char *str = name->name; const unsigned char *str = name->name;
struct list_head *p, *head; struct list_head *p, *head;
spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock); spin_lock(&sbi->lookup_lock);
head = &sbi->expiring_list; head = &sbi->expiring_list;
list_for_each(p, head) { list_for_each(p, head) {
@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
dget_dlock(expiring); dget_dlock(expiring);
spin_unlock(&expiring->d_lock); spin_unlock(&expiring->d_lock);
spin_unlock(&sbi->lookup_lock); spin_unlock(&sbi->lookup_lock);
spin_unlock(&autofs4_lock);
return expiring; return expiring;
} }
next: next:
spin_unlock(&expiring->d_lock); spin_unlock(&expiring->d_lock);
} }
spin_unlock(&sbi->lookup_lock); spin_unlock(&sbi->lookup_lock);
spin_unlock(&autofs4_lock);
return NULL; return NULL;
} }
@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry)
{ {
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry); struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status; int status = 0;
if (ino->flags & AUTOFS_INF_PENDING) { if (ino->flags & AUTOFS_INF_PENDING) {
DPRINTK("waiting for mount name=%.*s", DPRINTK("waiting for mount name=%.*s",
dentry->d_name.len, dentry->d_name.name); dentry->d_name.len, dentry->d_name.name);
status = autofs4_wait(sbi, dentry, NFY_MOUNT); status = autofs4_wait(sbi, dentry, NFY_MOUNT);
DPRINTK("mount wait done status=%d", status); DPRINTK("mount wait done status=%d", status);
ino->last_used = jiffies;
return status;
} }
return 0; ino->last_used = jiffies;
return status;
} }
static int do_expire_wait(struct dentry *dentry) static int do_expire_wait(struct dentry *dentry)
@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/ */
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent; struct dentry *parent = dentry->d_parent;
struct autofs_info *ino;
struct dentry *new = d_lookup(parent, &dentry->d_name); struct dentry *new = d_lookup(parent, &dentry->d_name);
if (!new) if (!new)
return NULL; return NULL;
ino = autofs4_dentry_ino(new);
ino->last_used = jiffies;
dput(path->dentry); dput(path->dentry);
path->dentry = new; path->dentry = new;
} }
@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
DPRINTK("dentry=%p %.*s", DPRINTK("dentry=%p %.*s",
dentry, dentry->d_name.len, dentry->d_name.name); dentry, dentry->d_name.len, dentry->d_name.name);
/*
* Someone may have manually umounted this or it was a submount
* that has gone away.
*/
spin_lock(&dentry->d_lock);
if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
(dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
__managed_dentry_set_transit(path->dentry);
}
spin_unlock(&dentry->d_lock);
/* The daemon never triggers a mount. */ /* The daemon never triggers a mount. */
if (autofs4_oz_mode(sbi)) if (autofs4_oz_mode(sbi))
return NULL; return NULL;
@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
done: done:
if (!(ino->flags & AUTOFS_INF_EXPIRING)) { if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
/* /*
* Any needed mounting has been completed and the path updated * Any needed mounting has been completed and the path
* so turn this into a normal dentry so we don't continually * updated so clear DCACHE_NEED_AUTOMOUNT so we don't
* call ->d_automount() and ->d_manage(). * call ->d_automount() on rootless multi-mounts since
*/ * it can lead to an incorrect ELOOP error return.
spin_lock(&dentry->d_lock); *
__managed_dentry_clear_transit(dentry);
/*
* Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
* symlinks as in all other cases the dentry will be covered by * symlinks as in all other cases the dentry will be covered by
* an actual mount so ->d_automount() won't be called during * an actual mount so ->d_automount() won't be called during
* the follow. * the follow.
*/ */
spin_lock(&dentry->d_lock);
if ((!d_mountpoint(dentry) && if ((!d_mountpoint(dentry) &&
!list_empty(&dentry->d_subdirs)) || !list_empty(&dentry->d_subdirs)) ||
(dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
/* The daemon never waits. */ /* The daemon never waits. */
if (autofs4_oz_mode(sbi)) { if (autofs4_oz_mode(sbi)) {
if (rcu_walk)
return 0;
if (!d_mountpoint(dentry)) if (!d_mountpoint(dentry))
return -EISDIR; return -EISDIR;
return 0; return 0;
@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
dir->i_mtime = CURRENT_TIME; dir->i_mtime = CURRENT_TIME;
spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock);
autofs4_add_expiring(dentry); __autofs4_add_expiring(dentry);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
__d_drop(dentry); __d_drop(dentry);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
return 0; return 0;
} }
@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
if (!autofs4_oz_mode(sbi)) if (!autofs4_oz_mode(sbi))
return -EACCES; return -EACCES;
spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock); spin_lock(&sbi->lookup_lock);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
if (!list_empty(&dentry->d_subdirs)) { if (!list_empty(&dentry->d_subdirs)) {
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
spin_unlock(&sbi->lookup_lock); spin_unlock(&sbi->lookup_lock);
spin_unlock(&autofs4_lock);
return -ENOTEMPTY; return -ENOTEMPTY;
} }
__autofs4_add_expiring(dentry); __autofs4_add_expiring(dentry);
spin_unlock(&sbi->lookup_lock);
__d_drop(dentry); __d_drop(dentry);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
spin_unlock(&autofs4_lock); spin_unlock(&sbi->lookup_lock);
if (sbi->version < 5) if (sbi->version < 5)
autofs_clear_leaf_automount_flags(dentry); autofs_clear_leaf_automount_flags(dentry);

View file

@ -197,12 +197,12 @@ rename_retry:
seq = read_seqbegin(&rename_lock); seq = read_seqbegin(&rename_lock);
rcu_read_lock(); rcu_read_lock();
spin_lock(&autofs4_lock); spin_lock(&sbi->fs_lock);
for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
len += tmp->d_name.len + 1; len += tmp->d_name.len + 1;
if (!len || --len > NAME_MAX) { if (!len || --len > NAME_MAX) {
spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock);
rcu_read_unlock(); rcu_read_unlock();
if (read_seqretry(&rename_lock, seq)) if (read_seqretry(&rename_lock, seq))
goto rename_retry; goto rename_retry;
@ -218,7 +218,7 @@ rename_retry:
p -= tmp->d_name.len; p -= tmp->d_name.len;
strncpy(p, tmp->d_name.name, tmp->d_name.len); strncpy(p, tmp->d_name.name, tmp->d_name.len);
} }
spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock);
rcu_read_unlock(); rcu_read_unlock();
if (read_seqretry(&rename_lock, seq)) if (read_seqretry(&rename_lock, seq))
goto rename_retry; goto rename_retry;

View file

@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV);
static void bdev_inode_switch_bdi(struct inode *inode, static void bdev_inode_switch_bdi(struct inode *inode,
struct backing_dev_info *dst) struct backing_dev_info *dst)
{ {
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
spin_lock(&inode->i_lock);
inode->i_data.backing_dev_info = dst; inode->i_data.backing_dev_info = dst;
if (inode->i_state & I_DIRTY) if (inode->i_state & I_DIRTY)
list_move(&inode->i_wb_list, &dst->wb.b_dirty); list_move(&inode->i_wb_list, &dst->wb.b_dirty);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_wb_list_lock);
} }
static sector_t max_block(struct block_device *bdev) static sector_t max_block(struct block_device *bdev)

View file

@ -1138,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
* inode list. * inode list.
* *
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
* mapping->tree_lock and the global inode_lock. * mapping->tree_lock and mapping->host->i_lock.
*/ */
void mark_buffer_dirty(struct buffer_head *bh) void mark_buffer_dirty(struct buffer_head *bh)
{ {

View file

@ -8,6 +8,7 @@
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include "internal.h"
/* A global variable is a bit ugly, but it keeps the code simple */ /* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches; int sysctl_drop_caches;
@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{ {
struct inode *inode, *toput_inode = NULL; struct inode *inode, *toput_inode = NULL;
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) spin_lock(&inode->i_lock);
continue; if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
if (inode->i_mapping->nrpages == 0) (inode->i_mapping->nrpages == 0)) {
spin_unlock(&inode->i_lock);
continue; continue;
}
__iget(inode); __iget(inode);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1); invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode); iput(toput_inode);
toput_inode = inode; toput_inode = inode;
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
} }
spin_unlock(&inode_lock); spin_unlock(&inode_sb_list_lock);
iput(toput_inode); iput(toput_inode);
} }

View file

@ -175,6 +175,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
spin_unlock_bh(&bdi->wb_lock); spin_unlock_bh(&bdi->wb_lock);
} }
/*
* Remove the inode from the writeback list it is on.
*/
void inode_wb_list_del(struct inode *inode)
{
spin_lock(&inode_wb_list_lock);
list_del_init(&inode->i_wb_list);
spin_unlock(&inode_wb_list_lock);
}
/* /*
* Redirty an inode: set its when-it-was dirtied timestamp and move it to the * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
* furthest end of its superblock's dirty-inode list. * furthest end of its superblock's dirty-inode list.
@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)
{ {
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
assert_spin_locked(&inode_wb_list_lock);
if (!list_empty(&wb->b_dirty)) { if (!list_empty(&wb->b_dirty)) {
struct inode *tail; struct inode *tail;
@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)
{ {
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
assert_spin_locked(&inode_wb_list_lock);
list_move(&inode->i_wb_list, &wb->b_more_io); list_move(&inode->i_wb_list, &wb->b_more_io);
} }
static void inode_sync_complete(struct inode *inode) static void inode_sync_complete(struct inode *inode)
{ {
/* /*
* Prevent speculative execution through spin_unlock(&inode_lock); * Prevent speculative execution through
* spin_unlock(&inode_wb_list_lock);
*/ */
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_SYNC); wake_up_bit(&inode->i_state, __I_SYNC);
} }
@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
*/ */
static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
{ {
assert_spin_locked(&inode_wb_list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io); list_splice_init(&wb->b_more_io, &wb->b_io);
move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
} }
@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode)
wait_queue_head_t *wqh; wait_queue_head_t *wqh;
wqh = bit_waitqueue(&inode->i_state, __I_SYNC); wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) { while (inode->i_state & I_SYNC) {
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_wb_list_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
spin_lock(&inode->i_lock);
} }
} }
/* /*
* Write out an inode's dirty pages. Called under inode_lock. Either the * Write out an inode's dirty pages. Called under inode_wb_list_lock and
* caller has ref on the inode (either via __iget or via syscall against an fd) * inode->i_lock. Either the caller has an active reference on the inode or
* or the inode has I_WILL_FREE set (via generic_forget_inode) * the inode has I_WILL_FREE set.
* *
* If `wait' is set, wait on the writeout. * If `wait' is set, wait on the writeout.
* *
* The whole writeout design is quite complex and fragile. We want to avoid * The whole writeout design is quite complex and fragile. We want to avoid
* starvation of particular inodes when others are being redirtied, prevent * starvation of particular inodes when others are being redirtied, prevent
* livelocks, etc. * livelocks, etc.
*
* Called under inode_lock.
*/ */
static int static int
writeback_single_inode(struct inode *inode, struct writeback_control *wbc) writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
unsigned dirty; unsigned dirty;
int ret; int ret;
assert_spin_locked(&inode_wb_list_lock);
assert_spin_locked(&inode->i_lock);
if (!atomic_read(&inode->i_count)) if (!atomic_read(&inode->i_count))
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
else else
@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
/* Set I_SYNC, reset I_DIRTY_PAGES */ /* Set I_SYNC, reset I_DIRTY_PAGES */
inode->i_state |= I_SYNC; inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY_PAGES; inode->i_state &= ~I_DIRTY_PAGES;
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_wb_list_lock);
ret = do_writepages(mapping, wbc); ret = do_writepages(mapping, wbc);
@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* due to delalloc, clear dirty metadata flags right before * due to delalloc, clear dirty metadata flags right before
* write_inode() * write_inode()
*/ */
spin_lock(&inode_lock); spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY; dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
/* Don't write the inode if only I_DIRTY_PAGES was set */ /* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc); int err = write_inode(inode, wbc);
@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
ret = err; ret = err;
} }
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
spin_lock(&inode->i_lock);
inode->i_state &= ~I_SYNC; inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) { if (!(inode->i_state & I_FREEING)) {
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
* kind does not need peridic writeout yet, and for the latter * kind does not need peridic writeout yet, and for the latter
* kind writeout is handled by the freer. * kind writeout is handled by the freer.
*/ */
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
requeue_io(inode); requeue_io(inode);
continue; continue;
} }
@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
* Was this inode dirtied after sync_sb_inodes was called? * Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock. * This keeps sync from extra jobs and livelock.
*/ */
if (inode_dirtied_after(inode, wbc->wb_start)) if (inode_dirtied_after(inode, wbc->wb_start)) {
spin_unlock(&inode->i_lock);
return 1; return 1;
}
__iget(inode); __iget(inode);
pages_skipped = wbc->pages_skipped; pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc); writeback_single_inode(inode, wbc);
if (wbc->pages_skipped != pages_skipped) { if (wbc->pages_skipped != pages_skipped) {
@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
*/ */
redirty_tail(inode); redirty_tail(inode);
} }
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_wb_list_lock);
iput(inode); iput(inode);
cond_resched(); cond_resched();
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
if (wbc->nr_to_write <= 0) { if (wbc->nr_to_write <= 0) {
wbc->more_io = 1; wbc->more_io = 1;
return 1; return 1;
@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
if (!wbc->wb_start) if (!wbc->wb_start)
wbc->wb_start = jiffies; /* livelock avoidance */ wbc->wb_start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io)) if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this); queue_io(wb, wbc->older_than_this);
@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
if (ret) if (ret)
break; break;
} }
spin_unlock(&inode_lock); spin_unlock(&inode_wb_list_lock);
/* Leave any unwritten inodes on b_io */ /* Leave any unwritten inodes on b_io */
} }
@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
{ {
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io)) if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this); queue_io(wb, wbc->older_than_this);
writeback_sb_inodes(sb, wb, wbc, true); writeback_sb_inodes(sb, wb, wbc, true);
spin_unlock(&inode_lock); spin_unlock(&inode_wb_list_lock);
} }
/* /*
@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb,
* become available for writeback. Otherwise * become available for writeback. Otherwise
* we'll just busyloop. * we'll just busyloop.
*/ */
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
if (!list_empty(&wb->b_more_io)) { if (!list_empty(&wb->b_more_io)) {
inode = wb_inode(wb->b_more_io.prev); inode = wb_inode(wb->b_more_io.prev);
trace_wbc_writeback_wait(&wbc, wb->bdi); trace_wbc_writeback_wait(&wbc, wb->bdi);
spin_lock(&inode->i_lock);
inode_wait_for_writeback(inode); inode_wait_for_writeback(inode);
spin_unlock(&inode->i_lock);
} }
spin_unlock(&inode_lock); spin_unlock(&inode_wb_list_lock);
} }
return wrote; return wrote;
@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = NULL; struct backing_dev_info *bdi = NULL;
bool wakeup_bdi = false;
/* /*
* Don't do this for I_DIRTY_PAGES - that doesn't actually * Don't do this for I_DIRTY_PAGES - that doesn't actually
@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (unlikely(block_dump)) if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode); block_dump___mark_inode_dirty(inode);
spin_lock(&inode_lock); spin_lock(&inode->i_lock);
if ((inode->i_state & flags) != flags) { if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY; const int was_dirty = inode->i_state & I_DIRTY;
@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* superblock list, based upon its state. * superblock list, based upon its state.
*/ */
if (inode->i_state & I_SYNC) if (inode->i_state & I_SYNC)
goto out; goto out_unlock_inode;
/* /*
* Only add valid (hashed) inodes to the superblock's * Only add valid (hashed) inodes to the superblock's
@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
*/ */
if (!S_ISBLK(inode->i_mode)) { if (!S_ISBLK(inode->i_mode)) {
if (inode_unhashed(inode)) if (inode_unhashed(inode))
goto out; goto out_unlock_inode;
} }
if (inode->i_state & I_FREEING) if (inode->i_state & I_FREEING)
goto out; goto out_unlock_inode;
/* /*
* If the inode was already on b_dirty/b_io/b_more_io, don't * If the inode was already on b_dirty/b_io/b_more_io, don't
* reposition it (that would break b_dirty time-ordering). * reposition it (that would break b_dirty time-ordering).
*/ */
if (!was_dirty) { if (!was_dirty) {
bool wakeup_bdi = false;
bdi = inode_to_bdi(inode); bdi = inode_to_bdi(inode);
if (bdi_cap_writeback_dirty(bdi)) { if (bdi_cap_writeback_dirty(bdi)) {
@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
wakeup_bdi = true; wakeup_bdi = true;
} }
spin_unlock(&inode->i_lock);
spin_lock(&inode_wb_list_lock);
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, &bdi->wb.b_dirty); list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
spin_unlock(&inode_wb_list_lock);
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
return;
} }
} }
out: out_unlock_inode:
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
} }
EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(__mark_inode_dirty);
@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb)
*/ */
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
/* /*
* Data integrity sync. Must wait for all pages under writeback, * Data integrity sync. Must wait for all pages under writeback,
@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb)
* we still have to wait for that writeout. * we still have to wait for that writeout.
*/ */
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping; struct address_space *mapping = inode->i_mapping;
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) spin_lock(&inode->i_lock);
continue; if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
mapping = inode->i_mapping; (mapping->nrpages == 0)) {
if (mapping->nrpages == 0) spin_unlock(&inode->i_lock);
continue; continue;
}
__iget(inode); __iget(inode);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
/* /*
* We hold a reference to 'inode' so it couldn't have * We hold a reference to 'inode' so it couldn't have been
* been removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_lock. We cannot iput the inode now as we can * inode_sb_list_lock. We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it * be holding the last reference and we cannot iput it under
* under inode_lock. So we keep the reference and iput * inode_sb_list_lock. So we keep the reference and iput it
* it later. * later.
*/ */
iput(old_inode); iput(old_inode);
old_inode = inode; old_inode = inode;
@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb)
cond_resched(); cond_resched();
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
} }
spin_unlock(&inode_lock); spin_unlock(&inode_sb_list_lock);
iput(old_inode); iput(old_inode);
} }
@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0; wbc.nr_to_write = 0;
might_sleep(); might_sleep();
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
spin_lock(&inode->i_lock);
ret = writeback_single_inode(inode, &wbc); ret = writeback_single_inode(inode, &wbc);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_wb_list_lock);
if (sync) if (sync)
inode_sync_wait(inode); inode_sync_wait(inode);
return ret; return ret;
@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
{ {
int ret; int ret;
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
spin_lock(&inode->i_lock);
ret = writeback_single_inode(inode, wbc); ret = writeback_single_inode(inode, wbc);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_wb_list_lock);
return ret; return ret;
} }
EXPORT_SYMBOL(sync_inode); EXPORT_SYMBOL(sync_inode);

File diff suppressed because it is too large Load diff

View file

@ -125,6 +125,13 @@ extern long do_handle_open(int mountdirfd,
/* /*
* inode.c * inode.c
*/ */
extern spinlock_t inode_sb_list_lock;
/*
* fs-writeback.c
*/
extern void inode_wb_list_del(struct inode *inode);
extern int get_nr_dirty_inodes(void); extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *); extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *, bool); extern int invalidate_inodes(struct super_block *, bool);

View file

@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
return ret; return ret;
} }
/* called with inode_lock held */ /* called with inode->i_lock held */
static int logfs_drop_inode(struct inode *inode) static int logfs_drop_inode(struct inode *inode)
{ {
struct logfs_super *super = logfs_super(inode->i_sb); struct logfs_super *super = logfs_super(inode->i_sb);

View file

@ -992,6 +992,12 @@ int follow_down_one(struct path *path)
return 0; return 0;
} }
static inline bool managed_dentry_might_block(struct dentry *dentry)
{
return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
dentry->d_op->d_manage(dentry, true) < 0);
}
/* /*
* Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
* meet a managed dentry and we're not walking to "..". True is returned to * meet a managed dentry and we're not walking to "..". True is returned to
@ -1000,19 +1006,26 @@ int follow_down_one(struct path *path)
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, bool reverse_transit) struct inode **inode, bool reverse_transit)
{ {
while (d_mountpoint(path->dentry)) { for (;;) {
struct vfsmount *mounted; struct vfsmount *mounted;
if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && /*
!reverse_transit && * Don't forget we might have a non-mountpoint managed dentry
path->dentry->d_op->d_manage(path->dentry, true) < 0) * that wants to block transit.
*/
*inode = path->dentry->d_inode;
if (!reverse_transit &&
unlikely(managed_dentry_might_block(path->dentry)))
return false; return false;
if (!d_mountpoint(path->dentry))
break;
mounted = __lookup_mnt(path->mnt, path->dentry, 1); mounted = __lookup_mnt(path->mnt, path->dentry, 1);
if (!mounted) if (!mounted)
break; break;
path->mnt = mounted; path->mnt = mounted;
path->dentry = mounted->mnt_root; path->dentry = mounted->mnt_root;
nd->seq = read_seqcount_begin(&path->dentry->d_seq); nd->seq = read_seqcount_begin(&path->dentry->d_seq);
*inode = path->dentry->d_inode;
} }
if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))

View file

@ -22,13 +22,14 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h> #include <asm/atomic.h>
#include <linux/fsnotify_backend.h> #include <linux/fsnotify_backend.h>
#include "fsnotify.h" #include "fsnotify.h"
#include "../internal.h"
/* /*
* Recalculate the mask of events relevant to a given inode locked. * Recalculate the mask of events relevant to a given inode locked.
*/ */
@ -237,15 +238,14 @@ out:
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes) * @list: list of inodes being unmounted (sb->s_inodes)
* *
* Called with inode_lock held, protecting the unmounting super block's list * Called during unmount with no locks held, so needs to be safe against
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
* We temporarily drop inode_lock, however, and CAN block.
*/ */
void fsnotify_unmount_inodes(struct list_head *list) void fsnotify_unmount_inodes(struct list_head *list)
{ {
struct inode *inode, *next_i, *need_iput = NULL; struct inode *inode, *next_i, *need_iput = NULL;
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) { list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp; struct inode *need_iput_tmp;
@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
* I_WILL_FREE, or I_NEW which is fine because by that point * I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches. * the inode cannot have any associated watches.
*/ */
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue; continue;
}
/* /*
* If i_count is zero, the inode cannot have any watches and * If i_count is zero, the inode cannot have any watches and
@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list)
* evict all inodes with zero i_count from icache which is * evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do. * unnecessarily violent and may in fact be illegal to do.
*/ */
if (!atomic_read(&inode->i_count)) if (!atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
continue; continue;
}
need_iput_tmp = need_iput; need_iput_tmp = need_iput;
need_iput = NULL; need_iput = NULL;
@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
__iget(inode); __iget(inode);
else else
need_iput_tmp = NULL; need_iput_tmp = NULL;
spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */ /* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) && if ((&next_i->i_sb_list != list) &&
atomic_read(&next_i->i_count) && atomic_read(&next_i->i_count)) {
!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { spin_lock(&next_i->i_lock);
__iget(next_i); if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
need_iput = next_i; __iget(next_i);
need_iput = next_i;
}
spin_unlock(&next_i->i_lock);
} }
/* /*
* We can safely drop inode_lock here because we hold * We can safely drop inode_sb_list_lock here because we hold
* references on both inode and next_i. Also no new inodes * references on both inode and next_i. Also no new inodes
* will be added since the umount has begun. Finally, * will be added since the umount has begun.
* iprune_mutex keeps shrink_icache_memory() away.
*/ */
spin_unlock(&inode_lock); spin_unlock(&inode_sb_list_lock);
if (need_iput_tmp) if (need_iput_tmp)
iput(need_iput_tmp); iput(need_iput_tmp);
@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
iput(inode); iput(inode);
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
} }
spin_unlock(&inode_lock); spin_unlock(&inode_sb_list_lock);
} }

View file

@ -91,7 +91,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/srcu.h> #include <linux/srcu.h>
#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h> #include <asm/atomic.h>

View file

@ -23,7 +23,6 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h> #include <asm/atomic.h>

View file

@ -54,7 +54,7 @@
* *
* Return 1 if the attributes match and 0 if not. * Return 1 if the attributes match and 0 if not.
* *
* NOTE: This function runs with the inode_lock spin lock held so it is not * NOTE: This function runs with the inode->i_lock spin lock held so it is not
* allowed to sleep. * allowed to sleep.
*/ */
int ntfs_test_inode(struct inode *vi, ntfs_attr *na) int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
* *
* Return 0 on success and -errno on error. * Return 0 on success and -errno on error.
* *
* NOTE: This function runs with the inode_lock spin lock held so it is not * NOTE: This function runs with the inode->i_lock spin lock held so it is not
* allowed to sleep. (Hence the GFP_ATOMIC allocation.) * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
*/ */
static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)

View file

@ -76,7 +76,7 @@
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/capability.h> #include <linux/capability.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ #include "../internal.h" /* ugh */
#include <asm/uaccess.h> #include <asm/uaccess.h>
@ -900,33 +900,38 @@ static void add_dquot_ref(struct super_block *sb, int type)
int reserved = 0; int reserved = 0;
#endif #endif
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
!atomic_read(&inode->i_writecount) ||
!dqinit_needed(inode, type)) {
spin_unlock(&inode->i_lock);
continue; continue;
}
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0)) if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1; reserved = 1;
#endif #endif
if (!atomic_read(&inode->i_writecount))
continue;
if (!dqinit_needed(inode, type))
continue;
__iget(inode); __iget(inode);
spin_unlock(&inode_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
iput(old_inode); iput(old_inode);
__dquot_initialize(inode, type); __dquot_initialize(inode, type);
/* We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the inode_lock. /*
* We cannot iput the inode now as we can be holding the last * We hold a reference to 'inode' so it couldn't have been
* reference and we cannot iput it under inode_lock. So we * removed from s_inodes list while we dropped the
* keep the reference and iput it later. */ * inode_sb_list_lock We cannot iput the inode now as we can be
* holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it
* later.
*/
old_inode = inode; old_inode = inode;
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
} }
spin_unlock(&inode_lock); spin_unlock(&inode_sb_list_lock);
iput(old_inode); iput(old_inode);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
@ -1007,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
struct inode *inode; struct inode *inode;
int reserved = 0; int reserved = 0;
spin_lock(&inode_lock); spin_lock(&inode_sb_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/* /*
* We have to scan also I_NEW inodes because they can already * We have to scan also I_NEW inodes because they can already
@ -1021,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
remove_inode_dquot_ref(inode, type, tofree_head); remove_inode_dquot_ref(inode, type, tofree_head);
} }
} }
spin_unlock(&inode_lock); spin_unlock(&inode_sb_list_lock);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
if (reserved) { if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota" printk(KERN_WARNING "VFS (%s): Writes happened after quota"

View file

@ -1636,7 +1636,7 @@ struct super_operations {
}; };
/* /*
* Inode state bits. Protected by inode_lock. * Inode state bits. Protected by inode->i_lock
* *
* Three bits determine the dirty state of the inode, I_DIRTY_SYNC, * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
* I_DIRTY_DATASYNC and I_DIRTY_PAGES. * I_DIRTY_DATASYNC and I_DIRTY_PAGES.

View file

@ -277,7 +277,7 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
/* /*
* Mark inode fully dirty. Since we are allocating blocks, inode * Mark inode fully dirty. Since we are allocating blocks, inode
* would become fully dirty soon anyway and it reportedly * would become fully dirty soon anyway and it reportedly
* reduces inode_lock contention. * reduces lock contention.
*/ */
mark_inode_dirty(inode); mark_inode_dirty(inode);
} }

View file

@ -9,7 +9,7 @@
struct backing_dev_info; struct backing_dev_info;
extern spinlock_t inode_lock; extern spinlock_t inode_wb_list_lock;
/* /*
* fs/fs-writeback.c * fs/fs-writeback.c

View file

@ -67,14 +67,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
struct inode *inode; struct inode *inode;
nr_wb = nr_dirty = nr_io = nr_more_io = 0; nr_wb = nr_dirty = nr_io = nr_more_io = 0;
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list) list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
nr_dirty++; nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_wb_list) list_for_each_entry(inode, &wb->b_io, i_wb_list)
nr_io++; nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list) list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
nr_more_io++; nr_more_io++;
spin_unlock(&inode_lock); spin_unlock(&inode_wb_list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh); global_dirty_limits(&background_thresh, &dirty_thresh);
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
@ -676,11 +676,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
if (bdi_has_dirty_io(bdi)) { if (bdi_has_dirty_io(bdi)) {
struct bdi_writeback *dst = &default_backing_dev_info.wb; struct bdi_writeback *dst = &default_backing_dev_info.wb;
spin_lock(&inode_lock); spin_lock(&inode_wb_list_lock);
list_splice(&bdi->wb.b_dirty, &dst->b_dirty); list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
list_splice(&bdi->wb.b_io, &dst->b_io); list_splice(&bdi->wb.b_io, &dst->b_io);
list_splice(&bdi->wb.b_more_io, &dst->b_more_io); list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
spin_unlock(&inode_lock); spin_unlock(&inode_wb_list_lock);
} }
bdi_unregister(bdi); bdi_unregister(bdi);

View file

@ -80,8 +80,8 @@
* ->i_mutex * ->i_mutex
* ->i_alloc_sem (various) * ->i_alloc_sem (various)
* *
* ->inode_lock * inode_wb_list_lock
* ->sb_lock (fs/fs-writeback.c) * sb_lock (fs/fs-writeback.c)
* ->mapping->tree_lock (__sync_single_inode) * ->mapping->tree_lock (__sync_single_inode)
* *
* ->i_mmap_lock * ->i_mmap_lock
@ -98,8 +98,10 @@
* ->zone.lru_lock (check_pte_range->isolate_lru_page) * ->zone.lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty) * ->private_lock (page_remove_rmap->set_page_dirty)
* ->tree_lock (page_remove_rmap->set_page_dirty) * ->tree_lock (page_remove_rmap->set_page_dirty)
* ->inode_lock (page_remove_rmap->set_page_dirty) * inode_wb_list_lock (page_remove_rmap->set_page_dirty)
* ->inode_lock (zap_pte_range->set_page_dirty) * ->inode->i_lock (page_remove_rmap->set_page_dirty)
* inode_wb_list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers) * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
* *
* (code doesn't rely on that order, so you could switch it around) * (code doesn't rely on that order, so you could switch it around)

View file

@ -31,11 +31,12 @@
* swap_lock (in swap_duplicate, swap_info_get) * swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others) * mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers) * mapping->private_lock (in __set_page_dirty_buffers)
* inode_lock (in set_page_dirty's __mark_inode_dirty) * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
* inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty)
* sb_lock (within inode_lock in fs/fs-writeback.c) * sb_lock (within inode_lock in fs/fs-writeback.c)
* mapping->tree_lock (widely used, in set_page_dirty, * mapping->tree_lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock, * in arch-dependent flush_dcache_mmap_lock,
* within inode_lock in __sync_single_inode) * within inode_wb_list_lock in __sync_single_inode)
* *
* (code doesn't rely on that order so it could be switched around) * (code doesn't rely on that order so it could be switched around)
* ->tasklist_lock * ->tasklist_lock