fasync: RCU and fine grained locking

kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.

fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.

Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.

We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2010-04-14 09:55:35 +00:00 committed by David S. Miller
parent e5700aff14
commit 989a297920
3 changed files with 59 additions and 92 deletions

View File

@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown)
return ret; return ret;
} }
static DEFINE_RWLOCK(fasync_lock); static DEFINE_SPINLOCK(fasync_lock);
static struct kmem_cache *fasync_cache __read_mostly; static struct kmem_cache *fasync_cache __read_mostly;
static void fasync_free_rcu(struct rcu_head *head)
{
kmem_cache_free(fasync_cache,
container_of(head, struct fasync_struct, fa_rcu));
}
/* /*
* Remove a fasync entry. If successfully removed, return * Remove a fasync entry. If successfully removed, return
* positive and clear the FASYNC flag. If no entry exists, * positive and clear the FASYNC flag. If no entry exists,
@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
* NOTE! It is very important that the FASYNC flag always * NOTE! It is very important that the FASYNC flag always
* match the state "is the filp on a fasync list". * match the state "is the filp on a fasync list".
* *
* We always take the 'filp->f_lock', in since fasync_lock
* needs to be irq-safe.
*/ */
static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
{ {
@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
int result = 0; int result = 0;
spin_lock(&filp->f_lock); spin_lock(&filp->f_lock);
write_lock_irq(&fasync_lock); spin_lock(&fasync_lock);
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
if (fa->fa_file != filp) if (fa->fa_file != filp)
continue; continue;
spin_lock_irq(&fa->fa_lock);
fa->fa_file = NULL;
spin_unlock_irq(&fa->fa_lock);
*fp = fa->fa_next; *fp = fa->fa_next;
kmem_cache_free(fasync_cache, fa); call_rcu(&fa->fa_rcu, fasync_free_rcu);
filp->f_flags &= ~FASYNC; filp->f_flags &= ~FASYNC;
result = 1; result = 1;
break; break;
} }
write_unlock_irq(&fasync_lock); spin_unlock(&fasync_lock);
spin_unlock(&filp->f_lock); spin_unlock(&filp->f_lock);
return result; return result;
} }
@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
return -ENOMEM; return -ENOMEM;
spin_lock(&filp->f_lock); spin_lock(&filp->f_lock);
write_lock_irq(&fasync_lock); spin_lock(&fasync_lock);
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
if (fa->fa_file != filp) if (fa->fa_file != filp)
continue; continue;
spin_lock_irq(&fa->fa_lock);
fa->fa_fd = fd; fa->fa_fd = fd;
spin_unlock_irq(&fa->fa_lock);
kmem_cache_free(fasync_cache, new); kmem_cache_free(fasync_cache, new);
goto out; goto out;
} }
spin_lock_init(&new->fa_lock);
new->magic = FASYNC_MAGIC; new->magic = FASYNC_MAGIC;
new->fa_file = filp; new->fa_file = filp;
new->fa_fd = fd; new->fa_fd = fd;
new->fa_next = *fapp; new->fa_next = *fapp;
*fapp = new; rcu_assign_pointer(*fapp, new);
result = 1; result = 1;
filp->f_flags |= FASYNC; filp->f_flags |= FASYNC;
out: out:
write_unlock_irq(&fasync_lock); spin_unlock(&fasync_lock);
spin_unlock(&filp->f_lock); spin_unlock(&filp->f_lock);
return result; return result;
} }
@ -704,37 +718,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
EXPORT_SYMBOL(fasync_helper); EXPORT_SYMBOL(fasync_helper);
void __kill_fasync(struct fasync_struct *fa, int sig, int band) /*
* rcu_read_lock() is held
*/
static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{ {
while (fa) { while (fa) {
struct fown_struct * fown; struct fown_struct *fown;
if (fa->magic != FASYNC_MAGIC) { if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in " printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n"); "fasync_struct!\n");
return; return;
} }
fown = &fa->fa_file->f_owner; spin_lock(&fa->fa_lock);
/* Don't send SIGURG to processes which have not set a if (fa->fa_file) {
queued signum: SIGURG has its own default signalling fown = &fa->fa_file->f_owner;
mechanism. */ /* Don't send SIGURG to processes which have not set a
if (!(sig == SIGURG && fown->signum == 0)) queued signum: SIGURG has its own default signalling
send_sigio(fown, fa->fa_fd, band); mechanism. */
fa = fa->fa_next; if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
spin_unlock(&fa->fa_lock);
fa = rcu_dereference(fa->fa_next);
} }
} }
EXPORT_SYMBOL(__kill_fasync);
void kill_fasync(struct fasync_struct **fp, int sig, int band) void kill_fasync(struct fasync_struct **fp, int sig, int band)
{ {
/* First a quick test without locking: usually /* First a quick test without locking: usually
* the list is empty. * the list is empty.
*/ */
if (*fp) { if (*fp) {
read_lock(&fasync_lock); rcu_read_lock();
/* reread *fp after obtaining the lock */ kill_fasync_rcu(rcu_dereference(*fp), sig, band);
__kill_fasync(*fp, sig, band); rcu_read_unlock();
read_unlock(&fasync_lock);
} }
} }
EXPORT_SYMBOL(kill_fasync); EXPORT_SYMBOL(kill_fasync);

View File

@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
struct fasync_struct { struct fasync_struct {
int magic; spinlock_t fa_lock;
int fa_fd; int magic;
struct fasync_struct *fa_next; /* singly linked list */ int fa_fd;
struct file *fa_file; struct fasync_struct *fa_next; /* singly linked list */
struct file *fa_file;
struct rcu_head fa_rcu;
}; };
#define FASYNC_MAGIC 0x4601 #define FASYNC_MAGIC 0x4601
@ -1292,8 +1294,6 @@ struct fasync_struct {
extern int fasync_helper(int, struct file *, int, struct fasync_struct **); extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
/* can be called from interrupts */ /* can be called from interrupts */
extern void kill_fasync(struct fasync_struct **, int, int); extern void kill_fasync(struct fasync_struct **, int, int);
/* only for net: no internal synchronization */
extern void __kill_fasync(struct fasync_struct *, int, int);
extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
extern int f_setown(struct file *filp, unsigned long arg, int force); extern int f_setown(struct file *filp, unsigned long arg, int force);

View File

@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
* 1. fasync_list is modified only under process context socket lock * 1. fasync_list is modified only under process context socket lock
* i.e. under semaphore. * i.e. under semaphore.
* 2. fasync_list is used under read_lock(&sk->sk_callback_lock) * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
* or under socket lock. * or under socket lock
* 3. fasync_list can be used from softirq context, so that
* modification under socket lock have to be enhanced with
* write_lock_bh(&sk->sk_callback_lock).
* --ANK (990710)
*/ */
static int sock_fasync(int fd, struct file *filp, int on) static int sock_fasync(int fd, struct file *filp, int on)
{ {
struct fasync_struct *fa, *fna = NULL, **prev; struct socket *sock = filp->private_data;
struct socket *sock; struct sock *sk = sock->sk;
struct sock *sk;
if (on) { if (sk == NULL)
fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
if (fna == NULL)
return -ENOMEM;
}
sock = filp->private_data;
sk = sock->sk;
if (sk == NULL) {
kfree(fna);
return -EINVAL; return -EINVAL;
}
lock_sock(sk); lock_sock(sk);
spin_lock(&filp->f_lock); fasync_helper(fd, filp, on, &sock->fasync_list);
if (on)
filp->f_flags |= FASYNC; if (!sock->fasync_list)
sock_reset_flag(sk, SOCK_FASYNC);
else else
filp->f_flags &= ~FASYNC;
spin_unlock(&filp->f_lock);
prev = &(sock->fasync_list);
for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
if (fa->fa_file == filp)
break;
if (on) {
if (fa != NULL) {
write_lock_bh(&sk->sk_callback_lock);
fa->fa_fd = fd;
write_unlock_bh(&sk->sk_callback_lock);
kfree(fna);
goto out;
}
fna->fa_file = filp;
fna->fa_fd = fd;
fna->magic = FASYNC_MAGIC;
fna->fa_next = sock->fasync_list;
write_lock_bh(&sk->sk_callback_lock);
sock->fasync_list = fna;
sock_set_flag(sk, SOCK_FASYNC); sock_set_flag(sk, SOCK_FASYNC);
write_unlock_bh(&sk->sk_callback_lock);
} else {
if (fa != NULL) {
write_lock_bh(&sk->sk_callback_lock);
*prev = fa->fa_next;
if (!sock->fasync_list)
sock_reset_flag(sk, SOCK_FASYNC);
write_unlock_bh(&sk->sk_callback_lock);
kfree(fa);
}
}
out: release_sock(sk);
release_sock(sock->sk);
return 0; return 0;
} }
@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
/* fall through */ /* fall through */
case SOCK_WAKE_IO: case SOCK_WAKE_IO:
call_kill: call_kill:
__kill_fasync(sock->fasync_list, SIGIO, band); kill_fasync(&sock->fasync_list, SIGIO, band);
break; break;
case SOCK_WAKE_URG: case SOCK_WAKE_URG:
__kill_fasync(sock->fasync_list, SIGURG, band); kill_fasync(&sock->fasync_list, SIGURG, band);
} }
return 0; return 0;
} }