From c0cfe7faa12f189ef1024fce5a710791d0062355 Mon Sep 17 00:00:00 2001 From: "G. Liakhovetski" Date: Fri, 8 Jun 2007 19:15:17 -0700 Subject: [PATCH 01/17] [IrDA]: Fix Rx/Tx path race. From: G. Liakhovetski We need to switch to NRM _before_ sending the final packet otherwise we might hit a race condition where we get the first packet from the peer while we're still in LAP_XMIT_P. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/net/irda/irlap.h | 17 +++++++++++++++++ net/irda/irlap_event.c | 18 ------------------ net/irda/irlap_frame.c | 3 +++ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index f0248fb8e196..a3d370efb903 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -289,4 +289,21 @@ static inline void irlap_clear_disconnect(struct irlap_cb *self) self->disconnect_pending = FALSE; } +/* + * Function irlap_next_state (self, state) + * + * Switches state and provides debug information + * + */ +static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state) +{ + /* + if (!self || self->magic != LAP_MAGIC) + return; + + IRDA_DEBUG(4, "next LAP state = %s\n", irlap_state[state]); + */ + self->state = state; +} + #endif diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index 0b02073ffdf3..71c805506933 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -316,23 +316,6 @@ void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, } } -/* - * Function irlap_next_state (self, state) - * - * Switches state and provides debug information - * - */ -static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state) -{ - /* - if (!self || self->magic != LAP_MAGIC) - return; - - IRDA_DEBUG(4, "next LAP state = %s\n", irlap_state[state]); - */ - self->state = state; -} - /* * Function irlap_state_ndm (event, skb, frame) * @@ -1086,7 +1069,6 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event, } else { /* Final packet of window */ irlap_send_data_primary_poll(self, skb); - irlap_next_state(self, LAP_NRM_P); /* * Make sure state machine does not try to send diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 3c5a68e36414..3013c49ab975 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -798,16 +798,19 @@ void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb) self->vs = (self->vs + 1) % 8; self->ack_required = FALSE; + irlap_next_state(self, LAP_NRM_P); irlap_send_i_frame(self, tx_skb, CMD_FRAME); } else { IRDA_DEBUG(4, "%s(), sending unreliable frame\n", __FUNCTION__); if (self->ack_required) { irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); + irlap_next_state(self, LAP_NRM_P); irlap_send_rr_frame(self, CMD_FRAME); self->ack_required = FALSE; } else { skb->data[1] |= PF_BIT; + irlap_next_state(self, LAP_NRM_P); irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); } } From b7e773b869f49bbd69e9dad76b34d3552627fac5 Mon Sep 17 00:00:00 2001 From: "G. Liakhovetski" Date: Fri, 8 Jun 2007 19:15:56 -0700 Subject: [PATCH 02/17] [IrDA]: f-timer reloading when sending rejected frames. Jean II was right: you have to re-charge the final timer when resending rejected frames. Otherwise it triggers at a wrong time and can break the currently running communication. Reproducible under rt-preempt. Signed-off-by: G. Liakhovetski Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/irlap_event.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index 71c805506933..a8b8873aa263 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -1418,14 +1418,14 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event, */ self->remote_busy = FALSE; + /* Stop final timer */ + del_timer(&self->final_timer); + /* * Nr as expected? */ ret = irlap_validate_nr_received(self, info->nr); if (ret == NR_EXPECTED) { - /* Stop final timer */ - del_timer(&self->final_timer); - /* Update Nr received */ irlap_update_nr_received(self, info->nr); @@ -1457,14 +1457,12 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event, /* Resend rejected frames */ irlap_resend_rejected_frames(self, CMD_FRAME); - - /* Final timer ??? Jean II */ + irlap_start_final_timer(self, self->final_timeout * 2); irlap_next_state(self, LAP_NRM_P); } else if (ret == NR_INVALID) { IRDA_DEBUG(1, "%s(), Received RR with " "invalid nr !\n", __FUNCTION__); - del_timer(&self->final_timer); irlap_next_state(self, LAP_RESET_WAIT); From c9aca9da026036306aa00a928f6acb4b94eb3c33 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Mon, 4 Jun 2007 00:06:51 +0200 Subject: [PATCH 03/17] [PATCH] cfg80211: fix signed macaddress in sysfs Fix signedness mixup making mac addresses show up strangely (like 00:11:22:33:44:ffffffaa) in /sys/class/ieee80211/*/macaddress. Signed-off-by: David Lamparter Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 3ebae1442963..88aaacd9f822 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -33,7 +33,7 @@ static ssize_t _show_permaddr(struct device *dev, struct device_attribute *attr, char *buf) { - char *addr = dev_to_rdev(dev)->wiphy.perm_addr; + unsigned char *addr = dev_to_rdev(dev)->wiphy.perm_addr; return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); From 0107136c04290ddd765adc568fe7a335d355d17e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Jun 2007 08:07:13 +0200 Subject: [PATCH 04/17] [PATCH] mac80211: fix debugfs tx power reduction output This patch fixes a typo in mac80211's debugfs.c. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index bb6c0feb2d48..476c8486f789 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -112,7 +112,7 @@ DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(tx_power_reduction, 20, "%d.%d dBm", local->hw.conf.tx_power_reduction / 10, - local->hw.conf.tx_power_reduction & 10); + local->hw.conf.tx_power_reduction % 10); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : ""); From 14042cbefce4af12f7ca35d2604686154d803291 Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Fri, 8 Jun 2007 15:31:13 +0200 Subject: [PATCH 05/17] [PATCH] mac80211: Don't stop tx queue on master device while scanning. mac80211 stops the tx queues during scans. This is wrong with respect to the master deivce tx queue, since stopping it prevents any probes from being sent during the scan. Instead, they accumulate in the queue and are only sent after the scan is finished, which is obviously wrong. Signed-off-by: Mattias Nissler Signed-off-by: John W. Linville --- net/mac80211/ieee80211_sta.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 9f30ae4c2ab3..91b545c144c1 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -2592,11 +2592,17 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) read_lock(&local->sub_if_lock); list_for_each_entry(sdata, &local->sub_if_list, list) { + + /* No need to wake the master device. */ + if (sdata->dev == local->mdev) + continue; + if (sdata->type == IEEE80211_IF_TYPE_STA) { if (sdata->u.sta.associated) ieee80211_send_nullfunc(local, sdata, 0); ieee80211_sta_timer((unsigned long)sdata); } + netif_wake_queue(sdata->dev); } read_unlock(&local->sub_if_lock); @@ -2738,6 +2744,12 @@ static int ieee80211_sta_start_scan(struct net_device *dev, read_lock(&local->sub_if_lock); list_for_each_entry(sdata, &local->sub_if_list, list) { + + /* Don't stop the master interface, otherwise we can't transmit + * probes! */ + if (sdata->dev == local->mdev) + continue; + netif_stop_queue(sdata->dev); if (sdata->type == IEEE80211_IF_TYPE_STA && sdata->u.sta.associated) From 606f585e363527da9feaed79465132c0c661fd9e Mon Sep 17 00:00:00 2001 From: Konstantin Sharlaimov Date: Tue, 12 Jun 2007 14:16:59 -0700 Subject: [PATCH 06/17] [PPP_MPPE]: Fix "osize too small" check. Prevent mppe_decompress() from generating "osize too small" errors when checking for output buffer size. When receiving a packet of mru size the output buffer for decrypted data is 1 byte too small since mppe_decompress() tries to account for possible PFC, however later in code it is assumed no PFC. Adjusting the check prevented these errors from occurring. Signed-off-by: Konstantin Sharlaimov Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/ppp_mppe.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index d5bdd2574659..5ae80bbe2edc 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -493,14 +493,14 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, /* * Make sure we have enough room to decrypt the packet. - * Note that for our test we only subtract 1 byte whereas in - * mppe_compress() we added 2 bytes (+MPPE_OVHD); - * this is to account for possible PFC. + * To account for possible PFC we should only subtract 1 + * byte whereas in mppe_compress() we added 2 bytes (+MPPE_OVHD); + * However, we assume no PFC, thus subtracting 2 bytes. */ - if (osize < isize - MPPE_OVHD - 1) { + if (osize < isize - MPPE_OVHD - 2) { printk(KERN_DEBUG "mppe_decompress[%d]: osize too small! " "(have: %d need: %d)\n", state->unit, - osize, isize - MPPE_OVHD - 1); + osize, isize - MPPE_OVHD - 2); return DECOMP_ERROR; } osize = isize - MPPE_OVHD - 2; /* assume no PFC */ From 3d7dbeac58d0669c37e35a3b91bb41c0146395ce Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 12 Jun 2007 14:36:42 -0700 Subject: [PATCH 07/17] [TCP]: Disable TSO if MD5SIG is enabled. Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 97e294e82679..354721d67f69 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -878,6 +878,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, kfree(newkey); return -ENOMEM; } + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } if (tcp_alloc_md5sig_pool() == NULL) { kfree(newkey); @@ -1007,7 +1008,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, return -EINVAL; tp->md5sig_info = p; - + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4f06a51ad4fd..193d9d60bb7a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -590,6 +590,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, kfree(newkey); return -ENOMEM; } + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } tcp_alloc_md5sig_pool(); if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) { @@ -724,6 +725,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return -ENOMEM; tp->md5sig_info = p; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); From af15cc7b858c7653443ab64db2e41b69506450ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 12 Jun 2007 16:16:44 -0700 Subject: [PATCH 08/17] [TCP]: Fix left_out setting during FRTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without FRTO, the tcp_try_to_open is never called with lost_out > 0 (see tcp_time_to_recover). However, when FRTO is enabled, the !tp->lost condition is not used until end of FRTO because that way TCP avoids premature entry to fast recovery during FRTO. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 74683d81c3f1..ed4a1bda822d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2037,7 +2037,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - tp->left_out = tp->sacked_out; + tcp_sync_left_out(tp); if (tp->retrans_out == 0) tp->retrans_stamp = 0; From 66e1e3b20cbbf99da63e6c1af0fc6d39c2ed099a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Jun 2007 01:03:53 -0700 Subject: [PATCH 09/17] [TCP]: Set initial_ssthresh default to zero in Cubic and BIC. Because of the current default of 100, Cubic and BIC perform very poorly compared to standard Reno. In the worst case, this change makes Cubic and BIC as aggressive as Reno. So this change should be very safe. Signed-off-by: David S. Miller --- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cubic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 281c9f913257..dd9ef65ad3ff 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -29,7 +29,7 @@ static int fast_convergence = 1; static int max_increment = 16; static int low_window = 14; static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ -static int initial_ssthresh = 100; +static int initial_ssthresh; static int smooth_part = 20; module_param(fast_convergence, int, 0644); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 14224487b16b..ebfaac2f9f46 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -29,7 +29,7 @@ static int fast_convergence __read_mostly = 1; static int max_increment __read_mostly = 16; static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ -static int initial_ssthresh __read_mostly = 100; +static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; From d570ee490fb18220262cfe41284d7aede797ed4f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 15 May 2007 16:32:39 -0400 Subject: [PATCH 10/17] [SCTP]: Correctly set daddr for IPv6 sockets during peeloff During peeloff of AF_INET6 socket, the inet6_sk(sk)->daddr wasn't set correctly since the code was assuming IPv4 only. Now we use a correct call to set the destination address. Signed-off-by: Vlad Yasevich Acked-by: Sridhar Samudrala --- net/sctp/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4dcdabf56473..d370c945a757 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3550,6 +3550,7 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, struct sock *sk = asoc->base.sk; struct socket *sock; struct inet_sock *inetsk; + struct sctp_af *af; int err = 0; /* An association cannot be branched off from an already peeled-off @@ -3571,8 +3572,9 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, /* Make peeled-off sockets more like 1-1 accepted sockets. * Set the daddr and initialize id to something more random */ + af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family); + af->to_sk_daddr(&asoc->peer.primary_addr, sk); inetsk = inet_sk(sock->sk); - inetsk->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; inetsk->id = asoc->next_tsn ^ jiffies; *sockp = sock; From 8b35805693e1915829355723537f99f1b8bc9cc0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 15 May 2007 17:14:58 -0400 Subject: [PATCH 11/17] [SCTP]: Allow unspecified port in sctp_bindx() Allow sctp_bindx() to accept multiple address with unspecified port. In this case, all addresses inherit the first bound port. We still catch full mis-matches. Signed-off-by: Vlad Yasevich Acked-by: Sridhar Samudrala --- net/sctp/socket.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d370c945a757..a5b6e559451e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -333,12 +333,19 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!sp->pf->bind_verify(sp, addr)) return -EADDRNOTAVAIL; - /* We must either be unbound, or bind to the same port. */ - if (bp->port && (snum != bp->port)) { - SCTP_DEBUG_PRINTK("sctp_do_bind:" + /* We must either be unbound, or bind to the same port. + * It's OK to allow 0 ports if we are already bound. + * We'll just inhert an already bound port in this case + */ + if (bp->port) { + if (!snum) + snum = bp->port; + else if (snum != bp->port) { + SCTP_DEBUG_PRINTK("sctp_do_bind:" " New port %d does not match existing port " "%d.\n", snum, bp->port); - return -EINVAL; + return -EINVAL; + } } if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) From fe979ac169970b3d12facd6565766735862395c5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 23 May 2007 11:11:37 -0400 Subject: [PATCH 12/17] [SCTP] Fix leak in sctp_getsockopt_local_addrs when copy_to_user fails If the copy_to_user or copy_user calls fail in sctp_getsockopt_local_addrs(), the function should free locally allocated storage before returning error. Spotted by Coverity. Signed-off-by: Vlad Yasevich Acked-by: Sridhar Samudrala --- net/sctp/socket.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a5b6e559451e..45510c46c223 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4352,11 +4352,12 @@ copy_getaddrs: err = -EFAULT; goto error; } - if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) - return -EFAULT; + if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) { + err = -EFAULT; + goto error; + } if (put_user(bytes_copied, optlen)) - return -EFAULT; - + err = -EFAULT; error: kfree(addrs); return err; From c910b47e1811b3f8b184108c48de3d7af3e2999b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 7 Jun 2007 13:47:03 -0400 Subject: [PATCH 13/17] [SCTP] Update pmtu handling to be similar to tcp Introduce new function sctp_transport_update_pmtu that updates the transports and destination caches view of the path mtu. Signed-off-by: Vlad Yasevich Acked-by: Sridhar Samudrala --- include/net/sctp/structs.h | 1 + net/sctp/input.c | 16 ++------------- net/sctp/transport.c | 41 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5e81984b8478..dc0e70cb0f8b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1006,6 +1006,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); +void sctp_transport_update_pmtu(struct sctp_transport *, u32); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/input.c b/net/sctp/input.c index 885109fb3dda..45d6a644cf06 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -371,20 +371,8 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; if (t->param_flags & SPP_PMTUD_ENABLE) { - if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", - __FUNCTION__, pmtu, - SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - t->param_flags = (t->param_flags & ~SPP_PMTUD) | - SPP_PMTUD_DISABLE; - } else { - t->pathmtu = pmtu; - } + /* Update transports view of the MTU */ + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 961df275d5b9..e14c271cf28b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -241,6 +241,47 @@ void sctp_transport_pmtu(struct sctp_transport *transport) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } +/* this is a complete rip-off from __sk_dst_check + * the cookie is always 0 since this is how it's used in the + * pmtu code + */ +static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) +{ + struct dst_entry *dst = t->dst; + + if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { + dst_release(t->dst); + t->dst = NULL; + return NULL; + } + + return dst; +} + +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +{ + struct dst_entry *dst; + + if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { + printk(KERN_WARNING "%s: Reported pmtu %d too low, " + "using default minimum of %d\n", + __FUNCTION__, pmtu, + SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment size and disable + * pmtu discovery on this transport. + */ + t->pathmtu = SCTP_DEFAULT_MINSEGMENT; + t->param_flags = (t->param_flags & ~SPP_PMTUD) | + SPP_PMTUD_DISABLE; + } else { + t->pathmtu = pmtu; + } + + dst = sctp_transport_dst_check(t); + if (dst) + dst->ops->update_pmtu(dst, pmtu); +} + /* Caches the dst entry and source address for a transport's destination * address. */ From 8a4794914f9cf2681235ec2311e189fe307c28c7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 7 Jun 2007 14:21:05 -0400 Subject: [PATCH 14/17] [SCTP] Flag a pmtu change request Currently, if the socket is owned by the user, we drop the ICMP message. As a result SCTP forgets that path MTU changed and never adjusting it's estimate. This causes all subsequent packets to be fragmented. With this patch, we'll flag the association that it needs to udpate it's estimate based on the already updated routing information. Signed-off-by: Vlad Yasevich Acked-by: Sridhar Samudrala --- include/net/sctp/sctp.h | 7 +++++++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 4 ++++ net/sctp/input.c | 8 +++++++- net/sctp/socket.c | 3 +++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index dda72bf5b9b4..16baef4dab7e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -503,6 +503,13 @@ static inline int sctp_frag_point(const struct sctp_sock *sp, int pmtu) return frag; } +static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) +{ + + sctp_assoc_sync_pmtu(asoc); + asoc->pmtu_pending = 0; +} + /* Walk through a list of TLV parameters. Don't trust the * individual parameter lengths and instead depend on * the chunk length to indicate when to stop. Make sure diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index dc0e70cb0f8b..ee4559b11302 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -912,6 +912,9 @@ struct sctp_transport { */ __u16 pathmaxrxt; + /* is the Path MTU update pending on this tranport */ + __u8 pmtu_pending; + /* PMTU : The current known path MTU. */ __u32 pathmtu; @@ -1566,6 +1569,9 @@ struct sctp_association { */ __u16 pathmaxrxt; + /* Flag that path mtu update is pending */ + __u8 pmtu_pending; + /* Association : The smallest PMTU discovered for all of the * PMTU : peer's transport addresses. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index df94e3cdfba3..498edb0cd4e5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1231,6 +1231,10 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); + if (t->pmtu_pending && t->dst) { + sctp_transport_update_pmtu(t, dst_mtu(t->dst)); + t->pmtu_pending = 0; + } if (!pmtu || (t->pathmtu < pmtu)) pmtu = t->pathmtu; } diff --git a/net/sctp/input.c b/net/sctp/input.c index 45d6a644cf06..d57ff7f3c576 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -367,9 +367,15 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu)) + if (!t || (t->pathmtu == pmtu)) return; + if (sock_owned_by_user(sk)) { + asoc->pmtu_pending = 1; + t->pmtu_pending = 1; + return; + } + if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ sctp_transport_update_pmtu(t, pmtu); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 45510c46c223..6edaaa009d62 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1662,6 +1662,9 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_free; } + if (asoc->pmtu_pending) + sctp_assoc_pending_pmtu(asoc); + /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D * does not specify what this error is, but this looks like From 06ad391919b2078ec2e012f0593014b88e7a6c4e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 12 Jun 2007 15:26:22 -0400 Subject: [PATCH 15/17] [SCTP] Don't disable PMTU discovery when mtu is small Right now, when we receive a mtu estimate smaller then minim threshold in the ICMP message, we disable the path mtu discovery on the transport. This leads to the never increasing sctp fragmentation point even when the real path mtu has increased. Signed-off-by: Vlad Yasevich --- net/sctp/transport.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e14c271cf28b..5f467c914f80 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -271,8 +271,6 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) * pmtu discovery on this transport. */ t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - t->param_flags = (t->param_flags & ~SPP_PMTUD) | - SPP_PMTUD_DISABLE; } else { t->pathmtu = pmtu; } From d7ea5b91fad553e445bbe5d958b6a7b16222c092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 14 Jun 2007 12:58:26 -0700 Subject: [PATCH 16/17] [TCP]: Add missing break to TCP option parsing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This flaw does not affect any behavior (currently). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ed4a1bda822d..d6d0f9b6cdc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2932,6 +2932,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, opt_rx->sack_ok) { TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; } + break; #ifdef CONFIG_TCP_MD5SIG case TCPOPT_MD5SIG: /* From 74235a25c673f80147c1f975304888e8212a14d5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Jun 2007 13:02:55 -0700 Subject: [PATCH 17/17] [IPV6] addrconf: Fix IPv6 on tuntap tunnels The recent patch that added ipv6_hwtype is broken on tuntap tunnels. Indeed, it's broken on any device that does not pass the ipv6_hwtype test. The reason is that the original test only applies to autoconfiguration, not IPv6 support. IPv6 support is allowed on any device. In fact, even with the ipv6_hwtype patch applied you can still add IPv6 addresses to any interface that doesn't pass thw ipv6_hwtype test provided that they have a sufficiently large MTU. This is a serious problem because come deregistration time these devices won't be cleaned up properly. I've gone back and looked at the rationale for the patch. It appears that the real problem is that we were creating IPv6 devices even if the MTU was too small. So here's a patch which fixes that and reverts the ipv6_hwtype stuff. Thanks to Kanru Chen for reporting this issue. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5a5f8bd4597a..f96ed76d8fa4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2154,6 +2154,15 @@ static void addrconf_dev_config(struct net_device *dev) ASSERT_RTNL(); + if ((dev->type != ARPHRD_ETHER) && + (dev->type != ARPHRD_FDDI) && + (dev->type != ARPHRD_IEEE802_TR) && + (dev->type != ARPHRD_ARCNET) && + (dev->type != ARPHRD_INFINIBAND)) { + /* Alas, we support only Ethernet autoconfiguration. */ + return; + } + idev = addrconf_add_dev(dev); if (idev == NULL) return; @@ -2241,36 +2250,16 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) ip6_tnl_add_linklocal(idev); } -static int ipv6_hwtype(struct net_device *dev) -{ - if ((dev->type == ARPHRD_ETHER) || - (dev->type == ARPHRD_LOOPBACK) || - (dev->type == ARPHRD_SIT) || - (dev->type == ARPHRD_TUNNEL6) || - (dev->type == ARPHRD_FDDI) || - (dev->type == ARPHRD_IEEE802_TR) || - (dev->type == ARPHRD_ARCNET) || - (dev->type == ARPHRD_INFINIBAND)) - return 1; - - return 0; -} - static int addrconf_notify(struct notifier_block *this, unsigned long event, void * data) { struct net_device *dev = (struct net_device *) data; - struct inet6_dev *idev; + struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; - if (!ipv6_hwtype(dev)) - return NOTIFY_OK; - - idev = __in6_dev_get(dev); - switch(event) { case NETDEV_REGISTER: - if (!idev) { + if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); if (!idev) printk(KERN_WARNING "IPv6: add_dev failed for %s\n",