fs: symlink write_begin allocation context fix

With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened.  They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim.  This bug could
cause filesystem deadlocks.

The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called.  It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock.  The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.

Add a new flag for write_begin, AOP_FLAG_NOFS.  Filesystems can now act on
this flag in their write_begin function.  Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).

This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg.  ocfs2_alloc_write_ctxt, for a
random example).

[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org>		[2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
  untouched to the grab_cache_page_write_begin() function.  That
  just simplifies everybody, and may even allow future expansion of the
  logic.   - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Nick Piggin 2009-01-04 12:00:53 -08:00 committed by Linus Torvalds
parent e687d691cb
commit 54566b2c15
22 changed files with 49 additions and 36 deletions

View file

@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
} }
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
*pagep = page; *pagep = page;

View file

@ -144,7 +144,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
candidate->state = AFS_WBACK_PENDING; candidate->state = AFS_WBACK_PENDING;
init_waitqueue_head(&candidate->waitq); init_waitqueue_head(&candidate->waitq);
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
kfree(candidate); kfree(candidate);
return -ENOMEM; return -ENOMEM;

View file

@ -1996,7 +1996,7 @@ int block_write_begin(struct file *file, struct address_space *mapping,
page = *pagep; page = *pagep;
if (page == NULL) { if (page == NULL) {
ownpage = 1; ownpage = 1;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
status = -ENOMEM; status = -ENOMEM;
goto out; goto out;
@ -2502,7 +2502,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
from = pos & (PAGE_CACHE_SIZE - 1); from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len; to = from + len;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
*pagep = page; *pagep = page;

View file

@ -2074,7 +2074,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
rc = -ENOMEM; rc = -ENOMEM;
goto out; goto out;

View file

@ -288,7 +288,7 @@ static int ecryptfs_write_begin(struct file *file,
loff_t prev_page_end_size; loff_t prev_page_end_size;
int rc = 0; int rc = 0;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
*pagep = page; *pagep = page;

View file

@ -1161,7 +1161,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
to = from + len; to = from + len;
retry: retry:
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
*pagep = page; *pagep = page;

View file

@ -2175,8 +2175,7 @@ retry:
* We have a transaction open. All is sweetness. It also sets * We have a transaction open. All is sweetness. It also sets
* i_size in generic_commit_write(). * i_size in generic_commit_write().
*/ */
err = __page_symlink(inode, symname, l, err = __page_symlink(inode, symname, l, 1);
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) { if (err) {
drop_nlink(inode); drop_nlink(inode);
unlock_new_inode(inode); unlock_new_inode(inode);

View file

@ -1346,7 +1346,7 @@ retry:
goto out; goto out;
} }
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
ext4_journal_stop(handle); ext4_journal_stop(handle);
ret = -ENOMEM; ret = -ENOMEM;
@ -2550,7 +2550,7 @@ retry:
goto out; goto out;
} }
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
ext4_journal_stop(handle); ext4_journal_stop(handle);
ret = -ENOMEM; ret = -ENOMEM;

View file

@ -2212,8 +2212,7 @@ retry:
* We have a transaction open. All is sweetness. It also sets * We have a transaction open. All is sweetness. It also sets
* i_size in generic_commit_write(). * i_size in generic_commit_write().
*/ */
err = __page_symlink(inode, symname, l, err = __page_symlink(inode, symname, l, 1);
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) { if (err) {
clear_nlink(inode); clear_nlink(inode);
unlock_new_inode(inode); unlock_new_inode(inode);

View file

@ -646,7 +646,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
{ {
pgoff_t index = pos >> PAGE_CACHE_SHIFT; pgoff_t index = pos >> PAGE_CACHE_SHIFT;
*pagep = __grab_cache_page(mapping, index); *pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep) if (!*pagep)
return -ENOMEM; return -ENOMEM;
return 0; return 0;
@ -779,7 +779,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
break; break;
err = -ENOMEM; err = -ENOMEM;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, 0);
if (!page) if (!page)
break; break;

View file

@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
goto out_trans_fail; goto out_trans_fail;
error = -ENOMEM; error = -ENOMEM;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
*pagep = page; *pagep = page;
if (unlikely(!page)) if (unlikely(!page))
goto out_endtrans; goto out_endtrans;

View file

@ -501,7 +501,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping,
{ {
pgoff_t index = pos >> PAGE_CACHE_SHIFT; pgoff_t index = pos >> PAGE_CACHE_SHIFT;
*pagep = __grab_cache_page(mapping, index); *pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep) if (!*pagep)
return -ENOMEM; return -ENOMEM;
return 0; return 0;

View file

@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
uint32_t pageofs = index << PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0; int ret = 0;
pg = __grab_cache_page(mapping, index); pg = grab_cache_page_write_begin(mapping, index, flags);
if (!pg) if (!pg)
return -ENOMEM; return -ENOMEM;
*pagep = pg; *pagep = pg;

View file

@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1); from = pos & (PAGE_CACHE_SIZE - 1);
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;

View file

@ -2817,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
} }
} }
int __page_symlink(struct inode *inode, const char *symname, int len, /*
gfp_t gfp_mask) * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
*/
int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct page *page; struct page *page;
void *fsdata; void *fsdata;
int err; int err;
char *kaddr; char *kaddr;
unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
if (nofs)
flags |= AOP_FLAG_NOFS;
retry: retry:
err = pagecache_write_begin(NULL, mapping, 0, len-1, err = pagecache_write_begin(NULL, mapping, 0, len-1,
AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); flags, &page, &fsdata);
if (err) if (err)
goto fail; goto fail;
@ -2852,7 +2857,7 @@ fail:
int page_symlink(struct inode *inode, const char *symname, int len) int page_symlink(struct inode *inode, const char *symname, int len)
{ {
return __page_symlink(inode, symname, len, return __page_symlink(inode, symname, len,
mapping_gfp_mask(inode->i_mapping)); !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
} }
const struct inode_operations page_symlink_inode_operations = { const struct inode_operations page_symlink_inode_operations = {

View file

@ -354,7 +354,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file->f_path.dentry->d_name.name, file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos); mapping->host->i_ino, len, (long long) pos);
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
*pagep = page; *pagep = page;

View file

@ -2561,7 +2561,7 @@ static int reiserfs_write_begin(struct file *file,
} }
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
*pagep = page; *pagep = page;

View file

@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
{ {
pgoff_t index = pos >> PAGE_CACHE_SHIFT; pgoff_t index = pos >> PAGE_CACHE_SHIFT;
*pagep = __grab_cache_page(mapping, index); *pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep) if (!*pagep)
return -ENOMEM; return -ENOMEM;
return 0; return 0;

View file

@ -219,7 +219,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
} }
static int write_begin_slow(struct address_space *mapping, static int write_begin_slow(struct address_space *mapping,
loff_t pos, unsigned len, struct page **pagep) loff_t pos, unsigned len, struct page **pagep,
unsigned flags)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_info *c = inode->i_sb->s_fs_info;
@ -247,7 +248,7 @@ static int write_begin_slow(struct address_space *mapping,
if (unlikely(err)) if (unlikely(err))
return err; return err;
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page)) { if (unlikely(!page)) {
ubifs_release_budget(c, &req); ubifs_release_budget(c, &req);
return -ENOMEM; return -ENOMEM;
@ -438,7 +439,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return -EROFS; return -EROFS;
/* Try out the fast-path part first */ /* Try out the fast-path part first */
page = __grab_cache_page(mapping, index); page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page)) if (unlikely(!page))
return -ENOMEM; return -ENOMEM;
@ -483,7 +484,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
return write_begin_slow(mapping, pos, len, pagep); return write_begin_slow(mapping, pos, len, pagep, flags);
} }
/* /*

View file

@ -423,6 +423,9 @@ enum positive_aop_returns {
#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
* helper code (eg buffer layer)
* to clear GFP_FS from alloc */
/* /*
* oh the beauties of C type declarations. * oh the beauties of C type declarations.
@ -2035,7 +2038,7 @@ extern int page_readlink(struct dentry *, char __user *, int);
extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void *page_follow_link_light(struct dentry *, struct nameidata *);
extern void page_put_link(struct dentry *, struct nameidata *, void *); extern void page_put_link(struct dentry *, struct nameidata *, void *);
extern int __page_symlink(struct inode *inode, const char *symname, int len, extern int __page_symlink(struct inode *inode, const char *symname, int len,
gfp_t gfp_mask); int nofs);
extern int page_symlink(struct inode *inode, const char *symname, int len); extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations; extern const struct inode_operations page_symlink_inode_operations;
extern int generic_readlink(struct dentry *, char __user *, int); extern int generic_readlink(struct dentry *, char __user *, int);

View file

@ -241,7 +241,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
int tag, unsigned int nr_pages, struct page **pages); int tag, unsigned int nr_pages, struct page **pages);
struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index); struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags);
/* /*
* Returns locked page at given index in given cache, creating it if needed. * Returns locked page at given index in given cache, creating it if needed.

View file

@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
* Find or create a page at the given pagecache position. Return the locked * Find or create a page at the given pagecache position. Return the locked
* page. This function is specifically for buffered writes. * page. This function is specifically for buffered writes.
*/ */
struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index) struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags)
{ {
int status; int status;
struct page *page; struct page *page;
gfp_t gfp_notmask = 0;
if (flags & AOP_FLAG_NOFS)
gfp_notmask = __GFP_FS;
repeat: repeat:
page = find_lock_page(mapping, index); page = find_lock_page(mapping, index);
if (likely(page)) if (likely(page))
return page; return page;
page = page_cache_alloc(mapping); page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
if (!page) if (!page)
return NULL; return NULL;
status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); status = add_to_page_cache_lru(page, mapping, index,
GFP_KERNEL & ~gfp_notmask);
if (unlikely(status)) { if (unlikely(status)) {
page_cache_release(page); page_cache_release(page);
if (status == -EEXIST) if (status == -EEXIST)
@ -2161,7 +2166,7 @@ repeat:
} }
return page; return page;
} }
EXPORT_SYMBOL(__grab_cache_page); EXPORT_SYMBOL(grab_cache_page_write_begin);
static ssize_t generic_perform_write(struct file *file, static ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos) struct iov_iter *i, loff_t pos)