From e501d0553a7580fcc6654d7f58a5f061d31d00af Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 8 Jul 2010 12:14:41 +0300 Subject: [PATCH 01/22] Bluetooth: Check L2CAP pending status before sending connect request Due to race condition in L2CAP state machine L2CAP Connection Request may be sent twice for SDP with the same source channel id. Problems reported connecting to Apple products, some carkit, Blackberry phones. ... 2010-06-07 21:18:03.651031 < ACL data: handle 1 flags 0x02 dlen 12 L2CAP(s): Connect req: psm 1 scid 0x0040 2010-06-07 21:18:03.653473 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2010-06-07 21:18:03.653808 > HCI Event: Auth Complete (0x06) plen 3 status 0x00 handle 1 2010-06-07 21:18:03.653869 < ACL data: handle 1 flags 0x02 dlen 12 L2CAP(s): Connect req: psm 1 scid 0x0040 ... Patch uses L2CAP_CONF_CONNECT_PEND flag to mark that L2CAP Connection Request has been sent already. Modified version of patch from Ville Tervo. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 1b682a5aa061..cf3c4073a8a6 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -401,6 +401,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) l2cap_send_sframe(pi, control); } +static inline int __l2cap_no_conn_pending(struct sock *sk) +{ + return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND); +} + static void l2cap_do_start(struct sock *sk) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; @@ -409,12 +414,13 @@ static void l2cap_do_start(struct sock *sk) if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; - if (l2cap_check_security(sk)) { + if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); @@ -464,12 +470,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } if (sk->sk_state == BT_CONNECT) { - if (l2cap_check_security(sk)) { + if (l2cap_check_security(sk) && + __l2cap_no_conn_pending(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); @@ -2912,7 +2920,6 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd l2cap_pi(sk)->ident = 0; l2cap_pi(sk)->dcid = dcid; l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, @@ -4404,6 +4411,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); From da213f41cdd445d3df468f8fe7f24fe4f4c48508 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 18 Jun 2010 11:08:56 +0300 Subject: [PATCH 02/22] Bluetooth: Reset the security level after an authentication failure When authentication fails for a connection the assumed security level should be set back to BT_SECURITY_LOW so that subsequent connect attempts over the same link don't falsely assume that security is adequate enough. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c57fc71c7e2..786b5de0bac4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1049,6 +1049,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_AUTH; + else + conn->sec_level = BT_SECURITY_LOW; clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); From 045309820afe047920a50de25634dab46a1e851d Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Tue, 15 Jun 2010 15:56:05 +0300 Subject: [PATCH 03/22] Bluetooth: Update sec_level/auth_type for already existing connections Update auth level for already existing connections if it is lower than required by new connection. Signed-off-by: Ville Tervo Reviewed-by: Emeltchenko Andrei Signed-off-by: Luciano Coelho Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b10e3cdb08f8..800b6b9fbbae 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -358,6 +358,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl->sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); + } else { + if (acl->sec_level < sec_level) + acl->sec_level = sec_level; + if (acl->auth_type < auth_type) + acl->auth_type = auth_type; } if (type == ACL_LINK) From 635f081541edef7644073f9be50ee5bf7c57ce63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 7 Jul 2010 20:56:53 +0000 Subject: [PATCH 04/22] Phonet: fix skb leak in pipe endpoint accept() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 94d72e85a475..b2a3ae6cad78 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) newsk = NULL; goto out; } + kfree_skb(oskb); sock_hold(sk); pep_sk(newsk)->listener = sk; From 17c99297212a2d1b1779a08caf4b0d83a85545df Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sun, 11 Jul 2010 17:10:09 -0700 Subject: [PATCH 05/22] r8169: incorrect identifier for a 8168dp Merge error. See CFG_METHOD_8 (0x3c800000 + 0x00300000) since version 8.002.00 of Realtek's driver. Signed-off-by: Francois Romieu Cc: Hayes Signed-off-by: David S. Miller --- drivers/net/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 96b6cfbf0a3a..cdc6a5c2e70d 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1316,7 +1316,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, { 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_26 }, /* 8168C family. */ - { 0x7cf00000, 0x3ca00000, RTL_GIGA_MAC_VER_24 }, + { 0x7cf00000, 0x3cb00000, RTL_GIGA_MAC_VER_24 }, { 0x7cf00000, 0x3c900000, RTL_GIGA_MAC_VER_23 }, { 0x7cf00000, 0x3c800000, RTL_GIGA_MAC_VER_18 }, { 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_24 }, From 70c2efa5a32a7d38e66224844032160317fa7887 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 9 Jul 2010 15:33:25 +0000 Subject: [PATCH 06/22] act_nat: not all of the ICMP packets need an IP header payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit not all of the ICMP packets need an IP header payload, so we check the length of the skbs only when the packets should have an IP header payload. Based upon analysis and initial patch by Rodrigo Partearroyo González. Signed-off-by: Changli Gao Acked-by: Herbert Xu ---- net/sched/act_nat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) Signed-off-by: David S. Miller --- net/sched/act_nat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 570949417f38..724553e8ed7b 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, { struct icmphdr *icmph; - if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + if (!pskb_may_pull(skb, ihl + sizeof(*icmph))) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); @@ -215,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, (icmph->type != ICMP_PARAMETERPROB)) break; + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; From 336a283b9cbe47748ccd68fd8c5158f67cee644b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 12 Jul 2010 20:03:42 -0700 Subject: [PATCH 07/22] dsa: Fix Kconfig dependencies. Based upon a report by Randy Dunlap. DSA needs PHYLIB, but PHYLIB needs NET_ETHERNET. So, in order to select PHYLIB we have to make DSA depend upon NET_ETHERNET. Signed-off-by: David S. Miller --- net/dsa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index c51b55400dc5..11201784d29a 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,7 +1,7 @@ menuconfig NET_DSA bool "Distributed Switch Architecture support" default n - depends on EXPERIMENTAL && !S390 + depends on EXPERIMENTAL && NET_ETHERNET && !S390 select PHYLIB ---help--- This allows you to use hardware switch chips that use From ab83a38958ae7e419f18fabe9b2954a6087bfe0d Mon Sep 17 00:00:00 2001 From: Ken Kawasaki Date: Sat, 10 Jul 2010 01:18:13 +0000 Subject: [PATCH 08/22] axnet_cs: use spin_lock_irqsave in ax_interrupt Use spin_lock_irqsave instead of spin_lock in ax_interrupt because the interrupt handler can also be invoked from ei_watchdog. Signed-off-by: Ken Kawasaki Signed-off-by: David S. Miller --- drivers/net/pcmcia/axnet_cs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 5b3dfb4ab279..33525bf2a3d3 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -1168,6 +1168,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id) int interrupts, nr_serviced = 0, i; struct ei_device *ei_local; int handled = 0; + unsigned long flags; e8390_base = dev->base_addr; ei_local = netdev_priv(dev); @@ -1176,7 +1177,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id) * Protect the irq test too. */ - spin_lock(&ei_local->page_lock); + spin_lock_irqsave(&ei_local->page_lock, flags); if (ei_local->irqlock) { @@ -1188,7 +1189,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id) dev->name, inb_p(e8390_base + EN0_ISR), inb_p(e8390_base + EN0_IMR)); #endif - spin_unlock(&ei_local->page_lock); + spin_unlock_irqrestore(&ei_local->page_lock, flags); return IRQ_NONE; } @@ -1261,7 +1262,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id) ei_local->irqlock = 0; outb_p(ENISR_ALL, e8390_base + EN0_IMR); - spin_unlock(&ei_local->page_lock); + spin_unlock_irqrestore(&ei_local->page_lock, flags); return IRQ_RETVAL(handled); } From 0f4da2d77e1bf424ac36424081afc22cbfc3ff2b Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 13 Jul 2010 14:06:32 -0400 Subject: [PATCH 09/22] hostap_pci: set dev->base_addr during probe "hostap: Protect against initialization interrupt" (which reinstated "wireless: hostap, fix oops due to early probing interrupt") reintroduced Bug 16111. This is because hostap_pci wasn't setting dev->base_addr, which is now checked in prism2_interrupt. As a result, initialization was failing for PCI-based hostap devices. This corrects that oversight. Signed-off-by: John W. Linville --- drivers/net/wireless/hostap/hostap_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/hostap/hostap_pci.c b/drivers/net/wireless/hostap/hostap_pci.c index d24dc7dc0723..972a9c3af39e 100644 --- a/drivers/net/wireless/hostap/hostap_pci.c +++ b/drivers/net/wireless/hostap/hostap_pci.c @@ -330,6 +330,7 @@ static int prism2_pci_probe(struct pci_dev *pdev, dev->irq = pdev->irq; hw_priv->mem_start = mem; + dev->base_addr = (unsigned long) mem; prism2_pci_cor_sreset(local); From d809ec895505e6f35fb1965f0946381ab4eaa474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 12 Jul 2010 21:29:42 +0000 Subject: [PATCH 10/22] xfrm: do not assume that template resolving always returns xfrms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xfrm_resolve_and_create_bundle() assumed that, if policies indicated presence of xfrms, bundle template resolution would always return some xfrms. This is not true for 'use' level policies which can result in no xfrm's being applied if there is no suitable xfrm states. This fixes a crash by this incorrect assumption. Reported-by: George Spelvin Bisected-by: George Spelvin Tested-by: George Spelvin Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index af1c173be4ad..a7ec5a8a2380 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); - if (err < 0) { - if (err != -EAGAIN) + if (err <= 0) { + if (err != 0 && err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } @@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; dst_hold(&xdst->u.dst); return oldflo; + } else if (new_xdst == NULL) { + num_xfrms = 0; + if (oldflo == NULL) + goto make_dummy_bundle; + xdst->num_xfrms = 0; + dst_hold(&xdst->u.dst); + return oldflo; } /* Kill the previous bundle */ @@ -1760,6 +1767,10 @@ restart: xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); goto dropdst; + } else if (xdst == NULL) { + num_xfrms = 0; + drop_pols = num_pols; + goto no_transform; } spin_lock_bh(&xfrm_policy_sk_bundle_lock); From 3a047bf87b1b6f69c62ab9fb28072c639cb7e2fa Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 12 Jul 2010 21:00:12 +0000 Subject: [PATCH 11/22] rfs: call sock_rps_record_flow() in tcp_splice_read() rfs: call sock_rps_record_flow() in tcp_splice_read() call sock_rps_record_flow() in tcp_splice_read(), so the applications using splice(2) or sendfile(2) can utilize RFS. Signed-off-by: Changli Gao ---- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6596b4feeddc..65afeaec15b7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ssize_t spliced; int ret; + sock_rps_record_flow(sk); /* * We can't seek on a socket input */ From 87fd308cfc6b2e880bf717a740bd5c58d2aed10c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Jul 2010 05:24:20 +0000 Subject: [PATCH 12/22] net: skb_tx_hash() fix relative to skb_orphan_try() commit fc6055a5ba31e2 (net: Introduce skb_orphan_try()) added early orphaning of skbs. This unfortunately added a performance regression in skb_tx_hash() in case of stacked devices (bonding, vlans, ...) Since skb->sk is now NULL, we cannot access sk->sk_hash anymore to spread tx packets to multiple NIC queues on multiqueue devices. skb_tx_hash() in this case only uses skb->protocol, same value for all flows. skb_orphan_try() can copy sk->sk_hash into skb->rxhash and skb_tx_hash() can use this saved sk_hash value to compute its internal hash value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 723a34710ad4..4b05fdf762ab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1911,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb) */ static inline void skb_orphan_try(struct sk_buff *skb) { - if (!skb_tx(skb)->flags) + struct sock *sk = skb->sk; + + if (sk && !skb_tx(skb)->flags) { + /* skb_tx_hash() wont be able to get sk. + * We copy sk_hash into skb->rxhash + */ + if (!skb->rxhash) + skb->rxhash = sk->sk_hash; skb_orphan(skb); + } } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -1998,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol; - + hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); From 91a72a70594e5212c97705ca6a694bd307f7a26b Mon Sep 17 00:00:00 2001 From: Doug Kehn Date: Wed, 14 Jul 2010 18:02:16 -0700 Subject: [PATCH 13/22] net/core: neighbour update Oops When configuring DMVPN (GRE + openNHRP) and a GRE remote address is configured a kernel Oops is observed. The obserseved Oops is caused by a NULL header_ops pointer (neigh->dev->header_ops) in neigh_update_hhs() when void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) = neigh->dev->header_ops->cache_update; is executed. The dev associated with the NULL header_ops is the GRE interface. This patch guards against the possibility that header_ops is NULL. This Oops was first observed in kernel version 2.6.26.8. Signed-off-by: Doug Kehn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6ba1c0eece03..a4e0a7482c2b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) - = neigh->dev->header_ops->cache_update; + = NULL; + + if (neigh->dev->header_ops) + update = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 14 Jul 2010 20:50:29 -0700 Subject: [PATCH 14/22] net: fix problem in reading sock TX queue Fix problem in reading the tx_queue recorded in a socket. In dev_pick_tx, the TX queue is read by doing a check with sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get. The problem is that there is not mutual exclusion across these calls in the socket so it it is possible that the queue in the sock can be invalidated after sk_tx_queue_recorded is called so that sk_tx_queue get returns -1, which sets 65535 in queue_index and thus dev_pick_tx returns 65536 which is a bogus queue and can cause crash in dev_queue_xmit. We fix this by only calling sk_tx_queue_get which does the proper checks. The interface is that sk_tx_queue_get returns the TX queue if the sock argument is non-NULL and TX queue is recorded, else it returns -1. sk_tx_queue_recorded is no longer used so it can be completely removed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 7 +------ net/core/dev.c | 7 +++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 731150d52799..0a691ea7654a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk) static inline int sk_tx_queue_get(const struct sock *sk) { - return sk->sk_tx_queue_mapping; -} - -static inline bool sk_tx_queue_recorded(const struct sock *sk) -{ - return (sk && sk->sk_tx_queue_mapping >= 0); + return sk ? sk->sk_tx_queue_mapping : -1; } static inline void sk_set_socket(struct sock *sk, struct socket *sock) diff --git a/net/core/dev.c b/net/core/dev.c index 4b05fdf762ab..0ea10f849be8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - u16 queue_index; + int queue_index; struct sock *sk = skb->sk; - if (sk_tx_queue_recorded(sk)) { - queue_index = sk_tx_queue_get(sk); - } else { + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) { From 1680e9063ea28099a1efa8ca11cee069cc7a9bc3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 15 Jul 2010 15:19:12 +0300 Subject: [PATCH 15/22] vhost-net: avoid flush under lock We flush under vq mutex when changing backends. This creates a deadlock as workqueue being flushed needs this lock as well. https://bugzilla.redhat.com/show_bug.cgi?id=612421 Drop the vq mutex before flush: we have the device mutex which is sufficient to prevent another ioctl from touching the vq. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2406377a6e5e..2764e0fbf29b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -534,11 +534,16 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) rcu_assign_pointer(vq->private_data, sock); vhost_net_enable_vq(n, vq); done: + mutex_unlock(&vq->mutex); + if (oldsock) { vhost_net_flush_vq(n, index); fput(oldsock->file); } + mutex_unlock(&n->dev.mutex); + return 0; + err_vq: mutex_unlock(&vq->mutex); err: From e40dbc51fbcc3281bb52ecf0f5bec693d36e2aea Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 15 Jul 2010 13:22:33 +0000 Subject: [PATCH 16/22] ipmr: Don't leak memory if fib lookup fails. This was detected using two mcast router tables. The pimreg for the second interface did not have a specific mrule, so packets received by it were handled by the default table, which had nothing configured. This caused the ipmr_fib_lookup to fail, causing the memory leak. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 757f25eb9b4b..7f6273506eea 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) int err; err = ipmr_fib_lookup(net, &fl, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb) goto dont_forward; err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } if (!local) { if (IPCB(skb)->opt.router_alert) { From 95c0ec6a97ae82d39a6e13fc01aa76861a4a76d0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 24 Jun 2010 17:10:25 +0300 Subject: [PATCH 17/22] vhost: avoid pr_err on condition guest can trigger Guest can trigger packet truncation by posting a very short buffer and disabling buffer merging. Convert pr_err to pr_debug to avoid log from filling up when this happens. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2764e0fbf29b..2f6185c845e0 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -177,8 +177,8 @@ static void handle_tx(struct vhost_net *net) break; } if (err != len) - pr_err("Truncated TX packet: " - " len %d != %zd\n", err, len); + pr_debug("Truncated TX packet: " + " len %d != %zd\n", err, len); vhost_add_used_and_signal(&net->dev, vq, head, 0); total_len += len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { @@ -275,8 +275,8 @@ static void handle_rx(struct vhost_net *net) } /* TODO: Should check and handle checksum. */ if (err > len) { - pr_err("Discarded truncated rx packet: " - " len %d > %zd\n", err, len); + pr_debug("Discarded truncated rx packet: " + " len %d > %zd\n", err, len); vhost_discard_vq_desc(vq); continue; } From 9acd56d3f2a05191ee369cbdd8c37dd547aa19b8 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 16 Jul 2010 09:50:10 -0700 Subject: [PATCH 18/22] rt2x00: Fix lockdep warning in rt2x00lib_probe_dev() The rt2x00dev->intf_work workqueue is never initialized when a driver is probed for a non-existent device (in this case rt2500usb). On such a path we call rt2x00lib_remove_dev() to free any resources initialized during the probe before we use INIT_WORK to initialize the workqueue. This causes lockdep to get confused since the lock used in the workqueue hasn't been initialized yet but is now being acquired during cancel_work_sync() called by rt2x00lib_remove_dev(). Fix this by initializing the workqueue first before we attempt to probe the device. This should make lockdep happy and avoid breaking any assumptions about how the library cleans up after a probe fails. phy0 -> rt2x00lib_probe_dev: Error - Failed to allocate device. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. Pid: 2027, comm: modprobe Not tainted 2.6.35-rc5+ #60 Call Trace: [] register_lock_class+0x152/0x31f [] ? usb_control_msg+0xd5/0x111 [] __lock_acquire+0xce/0xcf4 [] ? trace_hardirqs_off+0xd/0xf [] ? _raw_spin_unlock_irqrestore+0x33/0x41 [] lock_acquire+0xd1/0xf7 [] ? __cancel_work_timer+0x99/0x17e [] __cancel_work_timer+0xd0/0x17e [] ? __cancel_work_timer+0x99/0x17e [] cancel_work_sync+0xb/0xd [] rt2x00lib_remove_dev+0x25/0xb0 [rt2x00lib] [] rt2x00lib_probe_dev+0x380/0x3ed [rt2x00lib] [] ? __raw_spin_lock_init+0x31/0x52 [] ? T.676+0xe/0x10 [rt2x00usb] [] rt2x00usb_probe+0x121/0x15e [rt2x00usb] [] usb_probe_interface+0x151/0x19e [] driver_probe_device+0xa7/0x136 [] __driver_attach+0x4a/0x66 [] ? __driver_attach+0x0/0x66 [] bus_for_each_dev+0x54/0x89 [] driver_attach+0x19/0x1b [] bus_add_driver+0xb4/0x204 [] driver_register+0x98/0x109 [] usb_register_driver+0xb2/0x173 [] ? rt2500usb_init+0x0/0x20 [rt2500usb] [] rt2500usb_init+0x1e/0x20 [rt2500usb] [] do_one_initcall+0x6d/0x17a [] sys_init_module+0x9c/0x1e0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Stephen Boyd Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 3ae468c4d760..f20d3eeeea7f 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -853,6 +853,11 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) BIT(NL80211_IFTYPE_MESH_POINT) | BIT(NL80211_IFTYPE_WDS); + /* + * Initialize configuration work. + */ + INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled); + /* * Let the driver probe the device to detect the capabilities. */ @@ -862,11 +867,6 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) goto exit; } - /* - * Initialize configuration work. - */ - INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled); - /* * Allocate queue array. */ From ee2e6114de3bdb1c34f3910b690f990483e981ab Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Fri, 16 Jul 2010 04:57:25 +0000 Subject: [PATCH 19/22] ibmveth: lost IRQ while closing/opening device leads to service loss The order of freeing the IRQ and freeing the device in firmware in ibmveth_close can cause the adapter to become unusable after a subsequent ibmveth_open. Only a reboot of the OS will make the network device usable again. This is seen when cycling the adapter up and down while there is network activity. There is a window where an IRQ will be left unserviced (H_EOI will not be called). The solution is to make a VIO_IRQ_DISABLE h_call, free the device with firmware, and then call free_irq. Signed-off-by: Robert Jennings Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 7acb3edc47ef..2602852cc55a 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -677,7 +677,7 @@ static int ibmveth_close(struct net_device *netdev) if (!adapter->pool_config) netif_stop_queue(netdev); - free_irq(netdev->irq, netdev); + h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); do { lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); @@ -689,6 +689,8 @@ static int ibmveth_close(struct net_device *netdev) lpar_rc); } + free_irq(netdev->irq, netdev); + adapter->rx_no_buffer = *(u64*)(((char*)adapter->buffer_list_addr) + 4096 - 8); ibmveth_cleanup(adapter); From d9a9dc66eb8a8fd85c8546247e7e1d6023d0eb0f Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Fri, 16 Jul 2010 00:38:44 +0000 Subject: [PATCH 20/22] IPv6: fix CoA check in RH2 input handler (mip6_rthdr_input()) The input handler for Type 2 Routing Header (mip6_rthdr_input()) checks if the CoA in the packet matches the CoA in the XFRM state. Current check is buggy: it compares the adddress in the Type 2 Routing Header, i.e. the HoA, against the expected CoA in the state. The comparison should be made against the address in the destination field of the IPv6 header. The bug remained unnoticed because the main (and possibly only current) user of the code (UMIP MIPv6 Daemon) initializes the XFRM state with the unspecified address, i.e. explicitly allows everything. Yoshifuji-san, can you ack that one? Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 2794b6002836..d6e9599d0705 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type = static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { + struct ipv6hdr *iph = ipv6_hdr(skb); struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; int err = rt2->rt_hdr.nexthdr; spin_lock(&x->lock); - if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && + if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); From 45e77d314585869dfe43c82679f7e08c9b35b898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 19 Jul 2010 01:16:18 +0000 Subject: [PATCH 21/22] tcp: fix crash in tcp_xmit_retransmit_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It can happen that there are no packets in queue while calling tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns NULL and that gets deref'ed to get sacked into a local var. There is no work to do if no packets are outstanding so we just exit early. This oops was introduced by 08ebd1721ab8fd (tcp: remove tp->lost_out guard to make joining diff nicer). Signed-off-by: Ilpo Järvinen Reported-by: Lennart Schulte Tested-by: Lennart Schulte Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4ed957f201a..7ed9dc1042d1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) int mib_idx; int fwd_rexmitting = 0; + if (!tp->packets_out) + return; + if (!tp->lost_out) tp->retransmit_high = tp->snd_una; From 573201f36fd9c7c6d5218cdcd9948cee700b277d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Jul 2010 19:26:45 +0000 Subject: [PATCH 22/22] bridge: Partially disable netpoll support The new netpoll code in bridging contains use-after-free bugs that are non-trivial to fix. This patch fixes this by removing the code that uses skbs after they're freed. As a consequence, this means that we can no longer call bridge from the netpoll path, so this patch also removes the controller function in order to disable netpoll. Signed-off-by: Herbert Xu Thanks, Signed-off-by: David S. Miller --- net/bridge/br_device.c | 9 --------- net/bridge/br_forward.c | 23 +---------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eedf2c94820e..753fc4221f3c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -217,14 +217,6 @@ static bool br_devices_support_netpoll(struct net_bridge *br) return count != 0 && ret; } -static void br_poll_controller(struct net_device *br_dev) -{ - struct netpoll *np = br_dev->npinfo->netpoll; - - if (np->real_dev != br_dev) - netpoll_poll_dev(np->real_dev); -} - void br_netpoll_cleanup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -295,7 +287,6 @@ static const struct net_device_ops br_netdev_ops = { .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_cleanup = br_netpoll_cleanup, - .ndo_poll_controller = br_poll_controller, #endif }; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index a4e72a89e4ff..595da45f9088 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) kfree_skb(skb); else { skb_push(skb, ETH_HLEN); - -#ifdef CONFIG_NET_POLL_CONTROLLER - if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) { - netpoll_send_skb(skb->dev->npinfo->netpoll, skb); - skb->dev->priv_flags &= ~IFF_IN_NETPOLL; - } else -#endif - dev_queue_xmit(skb); + dev_queue_xmit(skb); } } @@ -73,23 +66,9 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { -#ifdef CONFIG_NET_POLL_CONTROLLER - struct net_bridge *br = to->br; - if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) { - struct netpoll *np; - to->dev->npinfo = skb->dev->npinfo; - np = skb->dev->npinfo->netpoll; - np->real_dev = np->dev = to->dev; - to->dev->priv_flags |= IFF_IN_NETPOLL; - } -#endif skb->dev = to->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, br_forward_finish); -#ifdef CONFIG_NET_POLL_CONTROLLER - if (skb->dev->npinfo) - skb->dev->npinfo->netpoll->dev = br->dev; -#endif } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)