Merge branch 'lineage-18.0' into followmsi-11

This commit is contained in:
followmsi 2020-12-06 20:31:44 +01:00
commit 5b7eb403ff
89 changed files with 1891 additions and 613 deletions

View file

@ -130,17 +130,6 @@ somaxconn - INTEGER
Defaults to 128. See also tcp_max_syn_backlog for additional tuning
for TCP sockets.
tcp_abc - INTEGER
Controls Appropriate Byte Count (ABC) defined in RFC3465.
ABC is a way of increasing congestion window (cwnd) more slowly
in response to partial acknowledgments.
Possible values are:
0 increase cwnd once per acknowledgment (no ABC)
1 increase cwnd once per acknowledgment of full sized segment
2 allow increase cwnd by two if acknowledgment is
of two segments to compensate for delayed acknowledgments.
Default: 0 (off)
tcp_abort_on_overflow - BOOLEAN
If listening service is too slow to accept new connections,
reset them. Default state is FALSE. It means that if overflow
@ -469,6 +458,15 @@ tcp_syn_retries - INTEGER
tcp_timestamps - BOOLEAN
Enable timestamps as defined in RFC1323.
tcp_min_tso_segs - INTEGER
Minimal number of segments per TSO frame.
Since linux-3.12, TCP does an automatic sizing of TSO frames,
depending on flow rate, instead of filling 64Kbytes packets.
For specific usages, it's possible to force TCP to build big
TSO frames. Note that TCP stack might split too big TSO packets
if available window is too small.
Default: 2
tcp_tso_win_divisor - INTEGER
This allows control over what percentage of the congestion window
can be consumed by a single TSO frame.
@ -549,6 +547,17 @@ tcp_challenge_ack_limit - INTEGER
in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
Default: 100
tcp_limit_output_bytes - INTEGER
Controls TCP Small Queue limit per tcp socket.
TCP bulk sender tends to increase packets in flight until it
gets losses notifications. With SNDBUF autotuning, this can
result in a large amount of packets queued in qdisc/device
on the local machine, hurting latency of other flows, for
typical pfifo_fast qdiscs.
tcp_limit_output_bytes limits the number of bytes on qdisc
or device to reduce artificial RTT/cwnd and reduce bufferbloat.
Default: 131072
UDP variables:
udp_mem - vector of 3 INTEGERs: min, pressure, max

View file

@ -76,6 +76,8 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#ifdef __KERNEL__
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.

View file

@ -69,4 +69,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -69,4 +69,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* __ASM_AVR32_SOCKET_H */

View file

@ -71,6 +71,8 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -69,5 +69,7 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -69,4 +69,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -78,4 +78,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_IA64_SOCKET_H */

View file

@ -69,4 +69,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_M32R_SOCKET_H */

View file

@ -69,4 +69,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -89,6 +89,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#ifdef __KERNEL__
/** sock_type - Socket types

View file

@ -69,4 +69,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -68,6 +68,7 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 0x4024
#define SO_MAX_PACING_RATE 0x4025
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.

View file

@ -76,4 +76,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_POWERPC_SOCKET_H */

View file

@ -77,4 +77,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* _ASM_SOCKET_H */

View file

@ -65,6 +65,7 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 0x0027
#define SO_MAX_PACING_RATE 0x0028
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001

View file

@ -80,4 +80,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 47
#endif /* _XTENSA_SOCKET_H */

View file

@ -100,10 +100,16 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
static int dummy_dev_init(struct net_device *dev)
{
int i;
dev->dstats = alloc_percpu(struct pcpu_dstats);
if (!dev->dstats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct pcpu_dstats *dstats;
dstats = per_cpu_ptr(dev->dstats, i);
u64_stats_init(&dstats->syncp);
}
return 0;
}

View file

@ -347,10 +347,10 @@ be_get_ethtool_stats(struct net_device *netdev,
struct be_rx_stats *stats = rx_stats(rxo);
do {
start = u64_stats_fetch_begin_bh(&stats->sync);
start = u64_stats_fetch_begin_irq(&stats->sync);
data[base] = stats->rx_bytes;
data[base + 1] = stats->rx_pkts;
} while (u64_stats_fetch_retry_bh(&stats->sync, start));
} while (u64_stats_fetch_retry_irq(&stats->sync, start));
for (i = 2; i < ETHTOOL_RXSTATS_NUM; i++) {
p = (u8 *)stats + et_rx_stats[i].offset;
@ -363,19 +363,19 @@ be_get_ethtool_stats(struct net_device *netdev,
struct be_tx_stats *stats = tx_stats(txo);
do {
start = u64_stats_fetch_begin_bh(&stats->sync_compl);
start = u64_stats_fetch_begin_irq(&stats->sync_compl);
data[base] = stats->tx_compl;
} while (u64_stats_fetch_retry_bh(&stats->sync_compl, start));
} while (u64_stats_fetch_retry_irq(&stats->sync_compl, start));
do {
start = u64_stats_fetch_begin_bh(&stats->sync);
start = u64_stats_fetch_begin_irq(&stats->sync);
for (i = 1; i < ETHTOOL_TXSTATS_NUM; i++) {
p = (u8 *)stats + et_tx_stats[i].offset;
data[base + i] =
(et_tx_stats[i].size == sizeof(u64)) ?
*(u64 *)p : *(u32 *)p;
}
} while (u64_stats_fetch_retry_bh(&stats->sync, start));
} while (u64_stats_fetch_retry_irq(&stats->sync, start));
base += ETHTOOL_TXSTATS_NUM;
}
}

View file

@ -445,10 +445,10 @@ static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
for_all_rx_queues(adapter, rxo, i) {
const struct be_rx_stats *rx_stats = rx_stats(rxo);
do {
start = u64_stats_fetch_begin_bh(&rx_stats->sync);
start = u64_stats_fetch_begin_irq(&rx_stats->sync);
pkts = rx_stats(rxo)->rx_pkts;
bytes = rx_stats(rxo)->rx_bytes;
} while (u64_stats_fetch_retry_bh(&rx_stats->sync, start));
} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
stats->rx_packets += pkts;
stats->rx_bytes += bytes;
stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
@ -459,10 +459,10 @@ static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
for_all_tx_queues(adapter, txo, i) {
const struct be_tx_stats *tx_stats = tx_stats(txo);
do {
start = u64_stats_fetch_begin_bh(&tx_stats->sync);
start = u64_stats_fetch_begin_irq(&tx_stats->sync);
pkts = tx_stats(txo)->tx_pkts;
bytes = tx_stats(txo)->tx_bytes;
} while (u64_stats_fetch_retry_bh(&tx_stats->sync, start));
} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
stats->tx_packets += pkts;
stats->tx_bytes += bytes;
}
@ -1086,9 +1086,9 @@ static void be_eqd_update(struct be_adapter *adapter, struct be_eq_obj *eqo)
return;
do {
start = u64_stats_fetch_begin_bh(&stats->sync);
start = u64_stats_fetch_begin_irq(&stats->sync);
pkts = stats->rx_pkts;
} while (u64_stats_fetch_retry_bh(&stats->sync, start));
} while (u64_stats_fetch_retry_irq(&stats->sync, start));
stats->rx_pps = (unsigned long)(pkts - stats->rx_pkts_prev) / (delta / HZ);
stats->rx_pkts_prev = pkts;
@ -1823,6 +1823,9 @@ static int be_tx_cqs_create(struct be_adapter *adapter)
if (status)
return status;
u64_stats_init(&txo->stats.sync);
u64_stats_init(&txo->stats.sync_compl);
/* If num_evt_qs is less than num_tx_qs, then more than
* one txq share an eq
*/
@ -1888,6 +1891,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter)
if (rc)
return rc;
u64_stats_init(&rxo->stats.sync);
eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
if (rc)

View file

@ -2094,15 +2094,15 @@ static void igb_get_ethtool_stats(struct net_device *netdev,
ring = adapter->tx_ring[j];
do {
start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
data[i] = ring->tx_stats.packets;
data[i+1] = ring->tx_stats.bytes;
data[i+2] = ring->tx_stats.restart_queue;
} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
do {
start = u64_stats_fetch_begin_bh(&ring->tx_syncp2);
start = u64_stats_fetch_begin_irq(&ring->tx_syncp2);
restart2 = ring->tx_stats.restart_queue2;
} while (u64_stats_fetch_retry_bh(&ring->tx_syncp2, start));
} while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start));
data[i+2] += restart2;
i += IGB_TX_QUEUE_STATS_LEN;
@ -2110,13 +2110,13 @@ static void igb_get_ethtool_stats(struct net_device *netdev,
for (j = 0; j < adapter->num_rx_queues; j++) {
ring = adapter->rx_ring[j];
do {
start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
data[i] = ring->rx_stats.packets;
data[i+1] = ring->rx_stats.bytes;
data[i+2] = ring->rx_stats.drops;
data[i+3] = ring->rx_stats.csum_err;
data[i+4] = ring->rx_stats.alloc_failed;
} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
i += IGB_RX_QUEUE_STATS_LEN;
}
spin_unlock(&adapter->stats64_lock);

View file

@ -728,6 +728,8 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
/* For 82575, context index must be unique per ring. */
if (adapter->hw.mac.type == e1000_82575)
set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
u64_stats_init(&ring->tx_syncp);
u64_stats_init(&ring->tx_syncp2);
adapter->tx_ring[i] = ring;
}
/* Restore the adapter's original node */
@ -759,6 +761,7 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
if (adapter->hw.mac.type == e1000_i350)
set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
u64_stats_init(&ring->rx_syncp);
adapter->rx_ring[i] = ring;
}
/* Restore the adapter's original node */
@ -4658,10 +4661,10 @@ void igb_update_stats(struct igb_adapter *adapter,
}
do {
start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
_bytes = ring->rx_stats.bytes;
_packets = ring->rx_stats.packets;
} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
bytes += _bytes;
packets += _packets;
}
@ -4674,10 +4677,10 @@ void igb_update_stats(struct igb_adapter *adapter,
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igb_ring *ring = adapter->tx_ring[i];
do {
start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
_bytes = ring->tx_stats.bytes;
_packets = ring->tx_stats.packets;
} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
bytes += _bytes;
packets += _packets;
}

View file

@ -1093,10 +1093,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
}
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
start = u64_stats_fetch_begin_irq(&ring->syncp);
data[i] = ring->stats.packets;
data[i+1] = ring->stats.bytes;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
i += 2;
}
for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) {
@ -1109,10 +1109,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
}
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
start = u64_stats_fetch_begin_irq(&ring->syncp);
data[i] = ring->stats.packets;
data[i+1] = ring->stats.bytes;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
i += 2;
}

View file

@ -4497,6 +4497,8 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
if (!tx_ring->tx_buffer_info)
goto err;
u64_stats_init(&tx_ring->syncp);
/* round up to nearest 4K */
tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
tx_ring->size = ALIGN(tx_ring->size, 4096);
@ -4573,6 +4575,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
if (!rx_ring->rx_buffer_info)
goto err;
u64_stats_init(&rx_ring->syncp);
/* Round up to nearest 4K */
rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
@ -6480,10 +6484,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev,
if (ring) {
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
start = u64_stats_fetch_begin_irq(&ring->syncp);
packets = ring->stats.packets;
bytes = ring->stats.bytes;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
stats->rx_packets += packets;
stats->rx_bytes += bytes;
}
@ -6496,10 +6500,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev,
if (ring) {
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
start = u64_stats_fetch_begin_irq(&ring->syncp);
packets = ring->stats.packets;
bytes = ring->stats.bytes;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
stats->tx_packets += packets;
stats->tx_bytes += bytes;
}

View file

@ -3231,10 +3231,10 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
for (i = 0; i < adapter->num_rx_queues; i++) {
ring = &adapter->rx_ring[i];
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
start = u64_stats_fetch_begin_irq(&ring->syncp);
bytes = ring->total_bytes;
packets = ring->total_packets;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
stats->rx_bytes += bytes;
stats->rx_packets += packets;
}
@ -3242,10 +3242,10 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
for (i = 0; i < adapter->num_tx_queues; i++) {
ring = &adapter->tx_ring[i];
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
start = u64_stats_fetch_begin_irq(&ring->syncp);
bytes = ring->total_bytes;
packets = ring->total_packets;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
stats->tx_bytes += bytes;
stats->tx_packets += packets;
}

View file

@ -3892,19 +3892,19 @@ static struct rtnl_link_stats64 *sky2_get_stats(struct net_device *dev,
u64 _bytes, _packets;
do {
start = u64_stats_fetch_begin_bh(&sky2->rx_stats.syncp);
start = u64_stats_fetch_begin_irq(&sky2->rx_stats.syncp);
_bytes = sky2->rx_stats.bytes;
_packets = sky2->rx_stats.packets;
} while (u64_stats_fetch_retry_bh(&sky2->rx_stats.syncp, start));
} while (u64_stats_fetch_retry_irq(&sky2->rx_stats.syncp, start));
stats->rx_packets = _packets;
stats->rx_bytes = _bytes;
do {
start = u64_stats_fetch_begin_bh(&sky2->tx_stats.syncp);
start = u64_stats_fetch_begin_irq(&sky2->tx_stats.syncp);
_bytes = sky2->tx_stats.bytes;
_packets = sky2->tx_stats.packets;
} while (u64_stats_fetch_retry_bh(&sky2->tx_stats.syncp, start));
} while (u64_stats_fetch_retry_irq(&sky2->tx_stats.syncp, start));
stats->tx_packets = _packets;
stats->tx_bytes = _bytes;
@ -4750,6 +4750,9 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw,
sky2->hw = hw;
sky2->msg_enable = netif_msg_init(debug, default_msg);
u64_stats_init(&sky2->tx_stats.syncp);
u64_stats_init(&sky2->rx_stats.syncp);
/* Auto speed and flow control */
sky2->flags = SKY2_FLAG_AUTO_SPEED | SKY2_FLAG_AUTO_PAUSE;
if (hw->chip_id != CHIP_ID_YUKON_XL)

View file

@ -2073,6 +2073,10 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
vdev->config.tx_steering_type;
vpath->fifo.ndev = vdev->ndev;
vpath->fifo.pdev = vdev->pdev;
u64_stats_init(&vpath->fifo.stats.syncp);
u64_stats_init(&vpath->ring.stats.syncp);
if (vdev->config.tx_steering_type)
vpath->fifo.txq =
netdev_get_tx_queue(vdev->ndev, i);

View file

@ -1755,19 +1755,19 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
/* software stats */
do {
syncp_start = u64_stats_fetch_begin_bh(&np->swstats_rx_syncp);
syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
storage->rx_packets = np->stat_rx_packets;
storage->rx_bytes = np->stat_rx_bytes;
storage->rx_dropped = np->stat_rx_dropped;
storage->rx_missed_errors = np->stat_rx_missed_errors;
} while (u64_stats_fetch_retry_bh(&np->swstats_rx_syncp, syncp_start));
} while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
do {
syncp_start = u64_stats_fetch_begin_bh(&np->swstats_tx_syncp);
syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
storage->tx_packets = np->stat_tx_packets;
storage->tx_bytes = np->stat_tx_bytes;
storage->tx_dropped = np->stat_tx_dropped;
} while (u64_stats_fetch_retry_bh(&np->swstats_tx_syncp, syncp_start));
} while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
/* If the nic supports hw counters then retrieve latest values */
if (np->driver_data & DEV_HAS_STATISTICS_V123) {
@ -5544,6 +5544,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
spin_lock_init(&np->lock);
spin_lock_init(&np->hwstats_lock);
SET_NETDEV_DEV(dev, &pci_dev->dev);
u64_stats_init(&np->swstats_rx_syncp);
u64_stats_init(&np->swstats_tx_syncp);
init_timer(&np->oom_kick);
np->oom_kick.data = (unsigned long) dev;

View file

@ -828,6 +828,9 @@ retry:
/* enable PCI bus-mastering */
pci_set_master (pdev);
u64_stats_init(&tp->rx_stats.syncp);
u64_stats_init(&tp->tx_stats.syncp);
if (use_io) {
ioaddr = pci_iomap(pdev, 0, 0);
if (!ioaddr) {
@ -2547,16 +2550,16 @@ rtl8139_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
netdev_stats_to_stats64(stats, &dev->stats);
do {
start = u64_stats_fetch_begin_bh(&tp->rx_stats.syncp);
start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
stats->rx_packets = tp->rx_stats.packets;
stats->rx_bytes = tp->rx_stats.bytes;
} while (u64_stats_fetch_retry_bh(&tp->rx_stats.syncp, start));
} while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start));
do {
start = u64_stats_fetch_begin_bh(&tp->tx_stats.syncp);
start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp);
stats->tx_packets = tp->tx_stats.packets;
stats->tx_bytes = tp->tx_stats.bytes;
} while (u64_stats_fetch_retry_bh(&tp->tx_stats.syncp, start));
} while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start));
return stats;
}

View file

@ -5790,17 +5790,17 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
rtl8169_rx_missed(dev, ioaddr);
do {
start = u64_stats_fetch_begin_bh(&tp->rx_stats.syncp);
start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
stats->rx_packets = tp->rx_stats.packets;
stats->rx_bytes = tp->rx_stats.bytes;
} while (u64_stats_fetch_retry_bh(&tp->rx_stats.syncp, start));
} while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start));
do {
start = u64_stats_fetch_begin_bh(&tp->tx_stats.syncp);
start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp);
stats->tx_packets = tp->tx_stats.packets;
stats->tx_bytes = tp->tx_stats.bytes;
} while (u64_stats_fetch_retry_bh(&tp->tx_stats.syncp, start));
} while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start));
stats->rx_dropped = dev->stats.rx_dropped;
stats->tx_dropped = dev->stats.tx_dropped;

View file

@ -1093,6 +1093,8 @@ static void tile_net_register(void *dev_ptr)
info->egress_timer.data = (long)info;
info->egress_timer.function = tile_net_handle_egress_timer;
u64_stats_init(&info->stats.syncp);
priv->cpu[my_cpu] = info;
/*

View file

@ -975,6 +975,9 @@ static int __devinit rhine_init_one(struct pci_dev *pdev,
dev->base_addr = (unsigned long)ioaddr;
rp->base = ioaddr;
u64_stats_init(&rp->tx_stats.syncp);
u64_stats_init(&rp->rx_stats.syncp);
/* Get chip registers into a sane state */
rhine_power_init(dev);
rhine_hw_init(dev, pioaddr);

View file

@ -136,18 +136,18 @@ static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev,
unsigned int start;
do {
start = u64_stats_fetch_begin_bh(&dp->rsync);
start = u64_stats_fetch_begin_irq(&dp->rsync);
stats->rx_packets = dp->rx_packets;
stats->rx_bytes = dp->rx_bytes;
} while (u64_stats_fetch_retry_bh(&dp->rsync, start));
} while (u64_stats_fetch_retry_irq(&dp->rsync, start));
do {
start = u64_stats_fetch_begin_bh(&dp->tsync);
start = u64_stats_fetch_begin_irq(&dp->tsync);
stats->tx_packets = dp->tx_packets;
stats->tx_bytes = dp->tx_bytes;
} while (u64_stats_fetch_retry_bh(&dp->tsync, start));
} while (u64_stats_fetch_retry_irq(&dp->tsync, start));
stats->rx_dropped = dev->stats.rx_dropped;
stats->tx_dropped = dev->stats.tx_dropped;
@ -264,6 +264,7 @@ MODULE_PARM_DESC(numifbs, "Number of ifb devices");
static int __init ifb_init_one(int index)
{
struct net_device *dev_ifb;
struct ifb_private *dp;
int err;
dev_ifb = alloc_netdev(sizeof(struct ifb_private),
@ -272,6 +273,10 @@ static int __init ifb_init_one(int index)
if (!dev_ifb)
return -ENOMEM;
dp = netdev_priv(dev_ifb);
u64_stats_init(&dp->rsync);
u64_stats_init(&dp->tsync);
dev_ifb->rtnl_link_ops = &ifb_link_ops;
err = register_netdevice(dev_ifb);
if (err < 0)

View file

@ -137,10 +137,16 @@ static const struct ethtool_ops loopback_ethtool_ops = {
static int loopback_dev_init(struct net_device *dev)
{
int i;
dev->lstats = alloc_percpu(struct pcpu_lstats);
if (!dev->lstats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct pcpu_lstats *lb_stats;
lb_stats = per_cpu_ptr(dev->lstats, i);
u64_stats_init(&lb_stats->syncp);
}
return 0;
}

View file

@ -452,6 +452,7 @@ static int macvlan_init(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
const struct net_device *lowerdev = vlan->lowerdev;
int i;
dev->state = (dev->state & ~MACVLAN_STATE_MASK) |
(lowerdev->state & MACVLAN_STATE_MASK);
@ -468,6 +469,12 @@ static int macvlan_init(struct net_device *dev)
if (!vlan->pcpu_stats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct macvlan_pcpu_stats *mvlstats;
mvlstats = per_cpu_ptr(vlan->pcpu_stats, i);
u64_stats_init(&mvlstats->syncp);
}
return 0;
}
@ -498,13 +505,13 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
for_each_possible_cpu(i) {
p = per_cpu_ptr(vlan->pcpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&p->syncp);
start = u64_stats_fetch_begin_irq(&p->syncp);
rx_packets = p->rx_packets;
rx_bytes = p->rx_bytes;
rx_multicast = p->rx_multicast;
tx_packets = p->tx_packets;
tx_bytes = p->tx_bytes;
} while (u64_stats_fetch_retry_bh(&p->syncp, start));
} while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += rx_packets;
stats->rx_bytes += rx_bytes;

View file

@ -747,6 +747,12 @@ static int team_init(struct net_device *dev)
if (!team->pcpu_stats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct team_pcpu_stats *team_stats;
team_stats = per_cpu_ptr(team->pcpu_stats, i);
u64_stats_init(&team_stats->syncp);
}
for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
INIT_HLIST_HEAD(&team->port_hlist[i]);
INIT_LIST_HEAD(&team->port_list);
@ -925,13 +931,13 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
for_each_possible_cpu(i) {
p = per_cpu_ptr(team->pcpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&p->syncp);
start = u64_stats_fetch_begin_irq(&p->syncp);
rx_packets = p->rx_packets;
rx_bytes = p->rx_bytes;
rx_multicast = p->rx_multicast;
tx_packets = p->tx_packets;
tx_bytes = p->tx_bytes;
} while (u64_stats_fetch_retry_bh(&p->syncp, start));
} while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += rx_packets;
stats->rx_bytes += rx_bytes;

View file

@ -171,13 +171,13 @@ static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
unsigned int start;
do {
start = u64_stats_fetch_begin_bh(&stats->syncp);
start = u64_stats_fetch_begin_irq(&stats->syncp);
rx_packets = stats->rx_packets;
tx_packets = stats->tx_packets;
rx_bytes = stats->rx_bytes;
tx_bytes = stats->tx_bytes;
rx_dropped = stats->rx_dropped;
} while (u64_stats_fetch_retry_bh(&stats->syncp, start));
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
tot->rx_bytes += rx_bytes;
@ -230,6 +230,7 @@ static int veth_dev_init(struct net_device *dev)
{
struct veth_net_stats __percpu *stats;
struct veth_priv *priv;
int i;
stats = alloc_percpu(struct veth_net_stats);
if (stats == NULL)
@ -237,6 +238,13 @@ static int veth_dev_init(struct net_device *dev)
priv = netdev_priv(dev);
priv->stats = stats;
for_each_possible_cpu(i) {
struct pcpu_vstats *veth_stats;
veth_stats = per_cpu_ptr(dev->vstats, i);
u64_stats_init(&veth_stats->syncp);
}
return 0;
}

View file

@ -1077,6 +1077,13 @@ static int virtnet_probe(struct virtio_device *vdev)
if (vi->stats == NULL)
goto free;
for_each_possible_cpu(i) {
struct virtnet_stats *virtnet_stats;
virtnet_stats = per_cpu_ptr(vi->stats, i);
u64_stats_init(&virtnet_stats->tx_syncp);
u64_stats_init(&virtnet_stats->rx_syncp);
}
INIT_DELAYED_WORK(&vi->refill, refill_work);
sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));

View file

@ -1076,13 +1076,13 @@ static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
unsigned int start;
do {
start = u64_stats_fetch_begin_bh(&stats->syncp);
start = u64_stats_fetch_begin_irq(&stats->syncp);
rx_packets = stats->rx_packets;
tx_packets = stats->tx_packets;
rx_bytes = stats->rx_bytes;
tx_bytes = stats->tx_bytes;
} while (u64_stats_fetch_retry_bh(&stats->syncp, start));
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
@ -1314,6 +1314,12 @@ static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev
if (np->stats == NULL)
goto exit;
for_each_possible_cpu(i) {
struct netfront_stats *xen_nf_stats;
xen_nf_stats = per_cpu_ptr(np->stats, i);
u64_stats_init(&xen_nf_stats->syncp);
}
/* Initialise tx_skbs as a free chain containing every entry. */
np->tx_skb_freelist = 0;
for (i = 0; i < NET_TX_RING_SIZE; i++) {

View file

@ -72,4 +72,6 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
#define SO_MAX_PACING_RATE 44
#endif /* __ASM_GENERIC_SOCKET_H */

View file

@ -268,6 +268,7 @@ enum
LINUX_MIB_XFRMOUTPOLDEAD, /* XfrmOutPolDead */
LINUX_MIB_XFRMOUTPOLERROR, /* XfrmOutPolError */
LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/
LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */
__LINUX_MIB_XFRMMAX
};

View file

@ -168,6 +168,13 @@ struct tcp_info {
__u32 tcpi_rcv_space;
__u32 tcpi_total_retrans;
__u64 tcpi_pacing_rate;
__u64 tcpi_max_pacing_rate;
__u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
__u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
__u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */
__u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */
};
/* for TCP_MD5SIG socket option */
@ -314,16 +321,34 @@ struct tcp_sock {
* read the code and the spec side by side (and laugh ...)
* See RFC793 and RFC1122. The RFC writes these in capitals.
*/
u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
* sum(delta(rcv_nxt)), or how many bytes
* were acked.
*/
u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
* total number of segments in.
*/
u32 rcv_nxt; /* What we want to receive next */
u32 copied_seq; /* Head of yet unread data */
u32 rcv_wup; /* rcv_nxt on last window update sent */
u32 snd_nxt; /* Next sequence we send */
u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
* The total number of segments sent.
*/
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */
u32 snd_una; /* First byte we want an ack for */
u32 snd_sml; /* Last byte of the most recently transmitted small packet */
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
unsigned long tsq_flags;
/* Data for direct copy to user */
struct {
struct sk_buff_head prequeue;
@ -398,7 +423,6 @@ struct tcp_sock {
u32 sacked_out; /* SACK'd packets */
u32 fackets_out; /* FACK'd packets */
u32 tso_deferred;
u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
/* from STCP, retrans queue hinting */
struct sk_buff* lost_skb_hint;
@ -475,6 +499,12 @@ struct tcp_sock {
struct tcp_cookie_values *cookie_values;
};
enum tsq_flags {
TSQ_THROTTLED,
TSQ_QUEUED,
TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */
};
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;

View file

@ -27,8 +27,8 @@
* (On UP, there is no seqcount_t protection, a reader allowing interrupts could
* read partial values)
*
* 7) For softirq uses, readers can use u64_stats_fetch_begin_bh() and
* u64_stats_fetch_retry_bh() helpers
* 7) For irq and softirq uses, readers can use u64_stats_fetch_begin_irq() and
* u64_stats_fetch_retry_irq() helpers
*
* Usage :
*
@ -67,6 +67,13 @@ struct u64_stats_sync {
#endif
};
#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
# define u64_stats_init(syncp) seqcount_init(syncp.seq)
#else
# define u64_stats_init(syncp) do { } while (0)
#endif
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
@ -107,31 +114,31 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
}
/*
* In case softirq handlers can update u64 counters, readers can use following helpers
* In case irq handlers can update u64 counters, readers can use following helpers
* - SMP 32bit arches use seqcount protection, irq safe.
* - UP 32bit must disable BH.
* - UP 32bit must disable irqs.
* - 64bit have no problem atomically reading u64 values, irq safe.
*/
static inline unsigned int u64_stats_fetch_begin_bh(const struct u64_stats_sync *syncp)
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
return read_seqcount_begin(&syncp->seq);
#else
#if BITS_PER_LONG==32
local_bh_disable();
local_irq_disable();
#endif
return 0;
#endif
}
static inline bool u64_stats_fetch_retry_bh(const struct u64_stats_sync *syncp,
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
return read_seqcount_retry(&syncp->seq, start);
#else
#if BITS_PER_LONG==32
local_bh_enable();
local_irq_enable();
#endif
return false;
#endif

View file

@ -297,6 +297,7 @@ enum xfrm_attr_type_t {
XFRMA_MARK, /* struct xfrm_mark */
XFRMA_TFCPAD, /* __u32 */
XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */
XFRMA_SA_EXTRA_FLAGS, /* __u32 */
__XFRMA_MAX
#define XFRMA_MAX (__XFRMA_MAX - 1)
@ -367,6 +368,8 @@ struct xfrm_usersa_info {
#define XFRM_STATE_ESN 128
};
#define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1
struct xfrm_usersa_id {
xfrm_address_t daddr;
__be32 spi;

View file

@ -61,11 +61,24 @@ struct dst_entry {
#define DST_NOPEER 0x0040
#define DST_FAKE_RTABLE 0x0080
#define DST_XFRM_TUNNEL 0x0100
#define DST_XFRM_QUEUE 0x0200
unsigned short pending_confirm;
short error;
/* A non-zero value of dst->obsolete forces by-hand validation
* of the route entry. Positive values are set by the generic
* dst layer to indicate that the entry has been forcefully
* destroyed.
*
* Negative values are used by the implementation layer code to
* force invocation of the dst_ops->check() method.
*/
short obsolete;
#define DST_OBSOLETE_NONE 0
#define DST_OBSOLETE_DEAD 2
#define DST_OBSOLETE_FORCE_CHK -1
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
#ifdef CONFIG_IP_ROUTE_CLASSID
@ -380,7 +393,7 @@ extern struct dst_entry *dst_destroy(struct dst_entry *dst);
static inline void dst_free(struct dst_entry *dst)
{
if (dst->obsolete > 1)
if (dst->obsolete > 0)
return;
if (!atomic_read(&dst->__refcnt)) {
dst = dst_destroy(dst);

View file

@ -54,8 +54,10 @@ struct ip_tunnel_prl_entry {
\
err = ip_local_out(skb); \
if (likely(net_xmit_eval(err) == 0)) { \
u64_stats_update_begin(&(stats1)->syncp); \
(stats1)->tx_bytes += pkt_len; \
(stats1)->tx_packets++; \
u64_stats_update_end(&(stats1)->syncp); \
} else { \
(stats2)->tx_errors++; \
(stats2)->tx_aborted_errors++; \

View file

@ -208,6 +208,8 @@ struct cg_proto;
* @sk_wmem_queued: persistent queue size
* @sk_forward_alloc: space allocated forward
* @sk_allocation: allocation mode
* @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
* @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
* @sk_sndbuf: size of send buffer in bytes
* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@ -333,6 +335,8 @@ struct sock {
kmemcheck_bitfield_end(flags);
int sk_wmem_queued;
gfp_t sk_allocation;
u32 sk_pacing_rate; /* bytes per second */
u32 sk_max_pacing_rate;
netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps;
int sk_gso_type;
@ -857,6 +861,8 @@ struct proto {
int (*backlog_rcv) (struct sock *sk,
struct sk_buff *skb);
void (*release_cb)(struct sock *sk);
/* Keeping track of sk's, looking them up, and port selection methods. */
void (*hash)(struct sock *sk);
void (*unhash)(struct sock *sk);

View file

@ -245,7 +245,6 @@ extern int sysctl_tcp_dma_copybreak;
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_abc;
extern int sysctl_tcp_mtu_probing;
extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
@ -254,6 +253,8 @@ extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_cookie_size;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_default_init_rwnd;
/* sysctl variables for controlling various tcp parameters */
@ -328,6 +329,8 @@ extern struct proto tcp_prot;
extern void tcp_init_mem(struct net *net);
extern void tcp_tasklet_init(void);
extern void tcp_v4_err(struct sk_buff *skb, u32);
extern void tcp_shutdown (struct sock *sk, int how);
@ -341,6 +344,8 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size);
extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
extern void tcp_release_cb(struct sock *sk);
extern void tcp_wfree(struct sk_buff *skb);
extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, unsigned int len);
@ -542,7 +547,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
}
/* tcp.c */
extern void tcp_get_info(const struct sock *, struct tcp_info *);
void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,

View file

@ -162,6 +162,7 @@ struct xfrm_state {
xfrm_address_t saddr;
int header_len;
int trailer_len;
u32 extra_flags;
} props;
struct xfrm_lifetime_cfg lft;
@ -213,6 +214,9 @@ struct xfrm_state {
struct xfrm_lifetime_cur curlft;
struct tasklet_hrtimer mtimer;
/* used to fix curlft->add_time when changing date */
long saved_tmo;
/* Last used time */
unsigned long lastused;
@ -238,6 +242,7 @@ static inline struct net *xs_net(struct xfrm_state *x)
/* xflags - make enum if more show up */
#define XFRM_TIME_DEFER 1
#define XFRM_SOFT_EXPIRE 2
enum {
XFRM_STATE_VOID,
@ -495,6 +500,12 @@ struct xfrm_policy_walk {
u32 seq;
};
struct xfrm_policy_queue {
struct sk_buff_head hold_queue;
struct timer_list hold_timer;
unsigned long timeout;
};
struct xfrm_policy {
#ifdef CONFIG_NET_NS
struct net *xp_net;
@ -516,6 +527,7 @@ struct xfrm_policy {
struct xfrm_lifetime_cfg lft;
struct xfrm_lifetime_cur curlft;
struct xfrm_policy_walk_entry walk;
struct xfrm_policy_queue polq;
u8 type;
u8 action;
u8 flags;
@ -570,7 +582,7 @@ struct xfrm_mgr {
struct list_head list;
char *id;
int (*notify)(struct xfrm_state *x, const struct km_event *c);
int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp);
struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
int (*notify_policy)(struct xfrm_policy *x, int dir, const struct km_event *c);
@ -889,15 +901,15 @@ __be16 xfrm_flowi_dport(const struct flowi *fl, const union flowi_uli *uli)
return port;
}
extern int xfrm_selector_match(const struct xfrm_selector *sel,
const struct flowi *fl,
unsigned short family);
extern bool xfrm_selector_match(const struct xfrm_selector *sel,
const struct flowi *fl,
unsigned short family);
#ifdef CONFIG_SECURITY_NETWORK_XFRM
/* If neither has a context --> match
* Otherwise, both must have a context and the sids, doi, alg must match
*/
static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
{
return ((!s1 && !s2) ||
(s1 && s2 &&
@ -906,9 +918,9 @@ static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ct
(s1->ctx_alg == s2->ctx_alg)));
}
#else
static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
{
return 1;
return true;
}
#endif
@ -1149,6 +1161,8 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
}
}
extern void xfrm_garbage_collect(struct net *net);
#else
static inline void xfrm_sk_free_policy(struct sock *sk) {}
@ -1183,6 +1197,9 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
{
return 1;
}
static inline void xfrm_garbage_collect(struct net *net)
{
}
#endif
static __inline__
@ -1317,6 +1334,7 @@ struct xfrm_algo_desc {
char *name;
char *compat;
u8 available:1;
u8 pfkey_supported:1;
union {
struct xfrm_algo_aead_info aead;
struct xfrm_algo_auth_info auth;
@ -1556,8 +1574,8 @@ extern void xfrm_input_init(void);
extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq);
extern void xfrm_probe_algs(void);
extern int xfrm_count_auth_supported(void);
extern int xfrm_count_enc_supported(void);
extern int xfrm_count_pfkey_auth_supported(void);
extern int xfrm_count_pfkey_enc_supported(void);
extern struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx);
extern struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx);
extern struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id);
@ -1569,17 +1587,23 @@ extern struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe);
extern struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len,
int probe);
static inline int xfrm_addr_cmp(const xfrm_address_t *a,
const xfrm_address_t *b,
int family)
static inline bool xfrm6_addr_equal(const xfrm_address_t *a,
const xfrm_address_t *b)
{
return ipv6_addr_equal((const struct in6_addr *)a,
(const struct in6_addr *)b);
}
static inline bool xfrm_addr_equal(const xfrm_address_t *a,
const xfrm_address_t *b,
sa_family_t family)
{
switch (family) {
default:
case AF_INET:
return (__force u32)a->a4 - (__force u32)b->a4;
return ((__force u32)a->a4 ^ (__force u32)b->a4) == 0;
case AF_INET6:
return ipv6_addr_cmp((const struct in6_addr *)a,
(const struct in6_addr *)b);
return xfrm6_addr_equal(a, b);
}
}
@ -1685,12 +1709,11 @@ static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
{
if (m->m | m->v)
NLA_PUT(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m);
return 0;
int ret = 0;
nla_put_failure:
return -1;
if (m->m | m->v)
ret = nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m);
return ret;
}
#endif /* _NET_XFRM_H */

View file

@ -388,7 +388,6 @@ static const struct bin_table bin_net_ipv4_table[] = {
{ CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" },
{ CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" },
{ CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" },
{ CTL_INT, NET_TCP_ABC, "tcp_abc" },
{ CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" },
{ CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" },
{ CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" },

View file

@ -550,7 +550,7 @@ static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
int subclass = 0;
int subclass = 0, i;
netif_carrier_off(dev);
@ -602,6 +602,13 @@ static int vlan_dev_init(struct net_device *dev)
if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct vlan_pcpu_stats *vlan_stat;
vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
u64_stats_init(&vlan_stat->syncp);
}
return 0;
}
@ -667,13 +674,13 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&p->syncp);
start = u64_stats_fetch_begin_irq(&p->syncp);
rxpackets = p->rx_packets;
rxbytes = p->rx_bytes;
rxmulticast = p->rx_multicast;
txpackets = p->tx_packets;
txbytes = p->tx_bytes;
} while (u64_stats_fetch_retry_bh(&p->syncp, start));
} while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += rxpackets;
stats->rx_bytes += rxbytes;

View file

@ -80,11 +80,18 @@ out:
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
int i;
br->stats = alloc_percpu(struct br_cpu_netstats);
if (!br->stats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct br_cpu_netstats *br_dev_stats;
br_dev_stats = per_cpu_ptr(br->stats, i);
u64_stats_init(&br_dev_stats->syncp);
}
return 0;
}

View file

@ -94,7 +94,7 @@ loop:
* But we do not have state "obsoleted, but
* referenced by parent", so it is right.
*/
if (dst->obsolete > 1)
if (dst->obsolete > 0)
continue;
___dst_free(dst);
@ -203,7 +203,7 @@ static void ___dst_free(struct dst_entry *dst)
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
dst->input = dst->output = dst_discard;
dst->obsolete = 2;
dst->obsolete = DST_OBSOLETE_DEAD;
}
void __dst_free(struct dst_entry *dst)

View file

@ -792,6 +792,12 @@ set_rcvbuf:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
case SO_MAX_PACING_RATE:
sk->sk_max_pacing_rate = val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate);
break;
default:
ret = -ENOPROTOOPT;
break;
@ -1032,6 +1038,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_NOFCS:
v.val = !!sock_flag(sk, SOCK_NOFCS);
break;
case SO_MAX_PACING_RATE:
v.val = sk->sk_max_pacing_rate;
break;
default:
return -ENOPROTOOPT;
}
@ -2109,6 +2120,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, 0);
sk->sk_max_pacing_rate = ~0U;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@ -2145,6 +2158,10 @@ void release_sock(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_backlog.tail)
__release_sock(sk);
if (sk->sk_prot->release_cb)
sk->sk_prot->release_cb(sk);
sk->sk_lock.owned = 0;
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);

View file

@ -1143,7 +1143,7 @@ make_route:
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST);
rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
if (rt == NULL)
goto e_nobufs;
@ -1413,7 +1413,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
}
make_route:
rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST);
rt = dst_alloc(&dn_dst_ops, out_dev, 0, DST_OBSOLETE_NONE, DST_HOST);
if (rt == NULL)
goto e_nobufs;

View file

@ -1504,9 +1504,9 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
bhptr = per_cpu_ptr(mib[0], cpu);
syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
do {
start = u64_stats_fetch_begin_bh(syncp);
start = u64_stats_fetch_begin_irq(syncp);
v = *(((u64 *) bhptr) + offt);
} while (u64_stats_fetch_retry_bh(syncp, start));
} while (u64_stats_fetch_retry_irq(syncp, start));
res += v;
}
@ -1521,6 +1521,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
ptr[0] = __alloc_percpu(mibsize, align);
if (!ptr[0])
return -ENOMEM;
#if SNMP_ARRAY_SZ == 2
ptr[1] = __alloc_percpu(mibsize, align);
if (!ptr[1]) {
@ -1581,6 +1582,8 @@ static const struct net_protocol icmp_protocol = {
static __net_init int ipv4_mib_init_net(struct net *net)
{
int i;
if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
sizeof(struct tcp_mib),
__alignof__(struct tcp_mib)) < 0)
@ -1589,6 +1592,17 @@ static __net_init int ipv4_mib_init_net(struct net *net)
sizeof(struct ipstats_mib),
__alignof__(struct ipstats_mib)) < 0)
goto err_ip_mib;
for_each_possible_cpu(i) {
struct ipstats_mib *af_inet_stats;
af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
u64_stats_init(&af_inet_stats->syncp);
#if SNMP_ARRAY_SZ == 2
af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
u64_stats_init(&af_inet_stats->syncp);
#endif
}
if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
sizeof(struct linux_mib),
__alignof__(struct linux_mib)) < 0)

View file

@ -169,30 +169,49 @@ struct ipgre_net {
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
unsigned long rx_packets;
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
} __attribute__((aligned(4*sizeof(unsigned long))));
u64 rx_packets;
u64 rx_bytes;
u64 tx_packets;
u64 tx_bytes;
struct u64_stats_sync syncp;
};
static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
struct pcpu_tstats sum = { 0 };
int i;
for_each_possible_cpu(i) {
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
unsigned int start;
sum.rx_packets += tstats->rx_packets;
sum.rx_bytes += tstats->rx_bytes;
sum.tx_packets += tstats->tx_packets;
sum.tx_bytes += tstats->tx_bytes;
do {
start = u64_stats_fetch_begin_irq(&tstats->syncp);
rx_packets = tstats->rx_packets;
tx_packets = tstats->tx_packets;
rx_bytes = tstats->rx_bytes;
tx_bytes = tstats->tx_bytes;
} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
tot->rx_bytes += rx_bytes;
tot->tx_bytes += tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
dev->stats.tx_packets = sum.tx_packets;
dev->stats.tx_bytes = sum.tx_bytes;
return &dev->stats;
tot->multicast = dev->stats.multicast;
tot->rx_crc_errors = dev->stats.rx_crc_errors;
tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
tot->rx_length_errors = dev->stats.rx_length_errors;
tot->rx_errors = dev->stats.rx_errors;
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
tot->tx_dropped = dev->stats.tx_dropped;
tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
tot->tx_errors = dev->stats.tx_errors;
return tot;
}
/* Given src, dst and key, find appropriate for input tunnel. */
@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb)
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
__skb_tunnel_rx(skb, tunnel->dev);
@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_start_xmit = ipgre_tunnel_xmit,
.ndo_do_ioctl = ipgre_tunnel_ioctl,
.ndo_change_mtu = ipgre_tunnel_change_mtu,
.ndo_get_stats = ipgre_get_stats,
.ndo_get_stats64 = ipgre_get_stats64,
};
static void ipgre_dev_free(struct net_device *dev)
@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ipgre_tunnel_change_mtu,
.ndo_get_stats = ipgre_get_stats,
.ndo_get_stats64 = ipgre_get_stats64,
};
static void ipgre_tap_setup(struct net_device *dev)

View file

@ -63,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->props.mode = x->props.mode;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
t->props.extra_flags = x->props.extra_flags;
memcpy(&t->mark, &x->mark, sizeof(t->mark));
if (xfrm_init_state(t))

View file

@ -144,30 +144,45 @@ static void ipip_dev_free(struct net_device *dev);
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
unsigned long rx_packets;
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
} __attribute__((aligned(4*sizeof(unsigned long))));
u64 rx_packets;
u64 rx_bytes;
u64 tx_packets;
u64 tx_bytes;
struct u64_stats_sync syncp;
};
static struct net_device_stats *ipip_get_stats(struct net_device *dev)
static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
struct pcpu_tstats sum = { 0 };
int i;
for_each_possible_cpu(i) {
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
unsigned int start;
sum.rx_packets += tstats->rx_packets;
sum.rx_bytes += tstats->rx_bytes;
sum.tx_packets += tstats->tx_packets;
sum.tx_bytes += tstats->tx_bytes;
do {
start = u64_stats_fetch_begin_irq(&tstats->syncp);
rx_packets = tstats->rx_packets;
tx_packets = tstats->tx_packets;
rx_bytes = tstats->rx_bytes;
tx_bytes = tstats->tx_bytes;
} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
tot->rx_bytes += rx_bytes;
tot->tx_bytes += tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
dev->stats.tx_packets = sum.tx_packets;
dev->stats.tx_bytes = sum.tx_bytes;
return &dev->stats;
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
tot->tx_dropped = dev->stats.tx_dropped;
tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
tot->tx_errors = dev->stats.tx_errors;
tot->collisions = dev->stats.collisions;
return tot;
}
static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
__skb_tunnel_rx(skb, tunnel->dev);
@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = {
.ndo_start_xmit = ipip_tunnel_xmit,
.ndo_do_ioctl = ipip_tunnel_ioctl,
.ndo_change_mtu = ipip_tunnel_change_mtu,
.ndo_get_stats = ipip_get_stats,
.ndo_get_stats64 = ipip_get_stats64,
};
static void ipip_dev_free(struct net_device *dev)

View file

@ -2038,7 +2038,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
static struct rtable *rt_dst_alloc(struct net_device *dev,
bool nopolicy, bool noxfrm)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
DST_HOST |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
@ -2923,9 +2923,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
struct rtable *ort = (struct rtable *) dst_orig;
struct rtable *rt;
rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
struct dst_entry *new = &rt->dst;

View file

@ -26,7 +26,9 @@
#include <net/ping.h>
#include <net/tcp_memcontrol.h>
static int zero = 0;
static int one = 1;
static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@ -587,13 +589,6 @@ static struct ctl_table ipv4_table[] = {
.maxlen = TCP_CA_NAME_MAX,
.proc_handler = proc_tcp_congestion_control,
},
{
.procname = "tcp_abc",
.data = &sysctl_tcp_abc,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_mtu_probing",
.data = &sysctl_tcp_mtu_probing,
@ -622,6 +617,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_limit_output_bytes",
.data = &sysctl_tcp_limit_output_bytes,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_NET_DMA
{
.procname = "tcp_dma_copybreak",
@ -715,6 +717,15 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_tcp_default_init_rwnd
},
{
.procname = "tcp_min_tso_segs",
.data = &sysctl_tcp_min_tso_segs,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &gso_max_segs,
},
{
.procname = "udp_mem",
.data = &sysctl_udp_mem,

View file

@ -285,6 +285,8 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
int sysctl_tcp_min_tso_segs __read_mostly = 2;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@ -740,10 +742,24 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
xmit_size_goal = mss_now;
if (large_allowed && sk_can_gso(sk)) {
xmit_size_goal = ((sk->sk_gso_max_size - 1) -
inet_csk(sk)->icsk_af_ops->net_header_len -
inet_csk(sk)->icsk_ext_hdr_len -
tp->tcp_header_len);
u32 gso_size, hlen;
/* Maybe we should/could use sk->sk_prot->max_header here ? */
hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
inet_csk(sk)->icsk_ext_hdr_len +
tp->tcp_header_len;
/* Goal is to send at least one packet per ms,
* not one big TSO packet every 100 ms.
* This preserves ACK clocking and is consistent
* with tcp_tso_should_defer() heuristic.
*/
gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
gso_size = max_t(u32, gso_size,
sysctl_tcp_min_tso_segs * mss_now);
xmit_size_goal = min_t(u32, gso_size,
sk->sk_gso_max_size - 1 - hlen);
xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
@ -2144,7 +2160,6 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->packets_out = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0;
tp->bytes_acked = 0;
tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
@ -2483,11 +2498,12 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
#endif
/* Return information about state of tcp endpoint in API format. */
void tcp_get_info(const struct sock *sk, struct tcp_info *info)
void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
memset(info, 0, sizeof(*info));
@ -2546,6 +2562,19 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
sk->sk_pacing_rate : ~0ULL;
info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
sk->sk_max_pacing_rate : ~0ULL;
do {
start = u64_stats_fetch_begin_irq(&tp->syncp);
info->tcpi_bytes_acked = tp->bytes_acked;
info->tcpi_bytes_received = tp->bytes_received;
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
info->tcpi_segs_out = tp->segs_out;
info->tcpi_segs_in = tp->segs_in;
if (sk->sk_socket) {
struct file *filep = sk->sk_socket->file;
if (filep)
@ -2718,12 +2747,16 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int sum_truesize = 0;
struct tcphdr *th;
unsigned thlen;
unsigned int seq;
__be32 delta;
unsigned int oldlen;
unsigned int mss;
struct sk_buff *gso_skb = skb;
__sum16 newcheck;
bool ooo_okay, copy_destructor;
if (!pskb_may_pull(skb, sizeof(*th)))
goto out;
@ -2762,27 +2795,42 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
goto out;
}
copy_destructor = gso_skb->destructor == tcp_wfree;
ooo_okay = gso_skb->ooo_okay;
/* All segments but the first should have ooo_okay cleared */
skb->ooo_okay = 0;
segs = skb_segment(skb, features);
if (IS_ERR(segs))
goto out;
/* Only first segment might have ooo_okay set */
segs->ooo_okay = ooo_okay;
delta = htonl(oldlen + (thlen + mss));
skb = segs;
th = tcp_hdr(skb);
seq = ntohl(th->seq);
newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
do {
th->fin = th->psh = 0;
th->check = newcheck;
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check =
csum_fold(csum_partial(skb_transport_header(skb),
thlen, skb->csum));
seq += mss;
if (copy_destructor) {
skb->destructor = gso_skb->destructor;
skb->sk = gso_skb->sk;
sum_truesize += skb->truesize;
}
skb = skb->next;
th = tcp_hdr(skb);
@ -2790,6 +2838,19 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
th->cwr = 0;
} while (skb->next);
/* Following permits TCP Small Queues to work well with GSO :
* The callback to TCP stack will be called at the time last frag
* is freed at TX completion, and not right now when gso_skb
* is freed by GSO engine
*/
if (copy_destructor) {
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
sum_truesize += skb->truesize;
atomic_add(sum_truesize - gso_skb->truesize,
&skb->sk->sk_wmem_alloc);
}
delta = htonl(oldlen + (skb->tail - skb->transport_header) +
skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@ -3411,6 +3472,7 @@ void __init tcp_init(void)
tcp_secret_primary = &tcp_secret_one;
tcp_secret_retiring = &tcp_secret_two;
tcp_secret_secondary = &tcp_secret_two;
tcp_tasklet_init();
}
static int tcp_is_local(struct net *net, __be32 addr) {

View file

@ -310,28 +310,11 @@ void tcp_slow_start(struct tcp_sock *tp)
{
int cnt; /* increase in packets */
/* RFC3465: ABC Slow start
* Increase only after a full MSS of bytes is acked
*
* TCP sender SHOULD increase cwnd by the number of
* previously unacknowledged bytes ACKed by each incoming
* acknowledgment, provided the increase is not more than L
*/
if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
return;
if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */
else
cnt = tp->snd_cwnd; /* exponential increase */
/* RFC3465: ABC
* We MAY increase by 2 if discovered delayed ack
*/
if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
cnt <<= 1;
tp->bytes_acked = 0;
tp->snd_cwnd_cnt += cnt;
while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
tp->snd_cwnd_cnt -= tp->snd_cwnd;
@ -371,20 +354,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
/* In "safe" area, increase. */
if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp);
/* In dangerous area, increase slowly. */
else if (sysctl_tcp_abc) {
/* RFC3465: Appropriate Byte Count
* increase once for each full cwnd acked
*/
if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
} else {
else
tcp_cong_avoid_ai(tp, tp->snd_cwnd);
}
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);

View file

@ -101,7 +101,6 @@ int sysctl_tcp_nometrics_save __read_mostly;
int sysctl_tcp_thin_dupack __read_mostly;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_abc __read_mostly;
int sysctl_tcp_default_init_rwnd __read_mostly = TCP_DEFAULT_INIT_RCVWND;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
@ -701,6 +700,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
}
}
/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
* Note: TCP stack does not yet implement pacing.
* FQ packet scheduler can be used to implement cheap but effective
* TCP pacing, to smooth the burst on large writes when packets
* in flight is significantly lower than cwnd (or rwin)
*/
static void tcp_update_pacing_rate(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
u64 rate;
/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
rate = (u64)tp->mss_cache * 2 * (HZ << 3);
rate *= max(tp->snd_cwnd, tp->packets_out);
/* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
* be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
* We probably need usec resolution in the future.
* Note: This also takes care of possible srtt=0 case,
* when tcp_rtt_estimator() was not yet called.
*/
if (tp->srtt > 8 + 2)
do_div(rate, tp->srtt);
sk->sk_pacing_rate = min_t(u64, rate, sk->sk_max_pacing_rate);
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
* routine referred to above.
*/
@ -850,7 +877,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
const struct inet_connection_sock *icsk = inet_csk(sk);
tp->prior_ssthresh = 0;
tp->bytes_acked = 0;
if (icsk->icsk_ca_state < TCP_CA_CWR) {
tp->undo_marker = 0;
if (set_ssthresh)
@ -2210,7 +2236,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->frto_counter = 0;
tp->bytes_acked = 0;
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
@ -2259,7 +2284,6 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->bytes_acked = 0;
tcp_clear_retrans_partial(tp);
if (tcp_is_reno(tp))
@ -3175,7 +3199,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
TCP_ECN_queue_cwr(tp);
}
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tp->prior_cwnd = tp->snd_cwnd;
tp->prr_delivered = 0;
@ -3511,6 +3534,28 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
}
/* If we update tp->snd_una, also update tp->bytes_acked */
static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
{
u32 delta = ack - tp->snd_una;
u64_stats_update_begin(&tp->syncp);
tp->bytes_acked += delta;
u64_stats_update_end(&tp->syncp);
tp->snd_una = ack;
}
/* If we update tp->rcv_nxt, also update tp->bytes_received */
static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
{
u32 delta = seq - tp->rcv_nxt;
u64_stats_update_begin(&tp->syncp);
tp->bytes_received += delta;
u64_stats_update_end(&tp->syncp);
tp->rcv_nxt = seq;
}
/* Update our send window.
*
* Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@ -3546,7 +3591,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
}
}
tp->snd_una = ack;
tcp_snd_una_update(tp, ack);
return flag;
}
@ -3558,7 +3603,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
{
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tp->snd_cwnd_cnt = 0;
tp->bytes_acked = 0;
TCP_ECN_queue_cwr(tp);
tcp_moderate_cwnd(tp);
}
@ -3749,7 +3793,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
u32 prior_in_flight;
u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
u32 prior_fackets;
int prior_packets = tp->packets_out;
int prior_sacked = tp->sacked_out;
@ -3778,15 +3822,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, prior_snd_una))
flag |= FLAG_SND_UNA_ADVANCED;
if (sysctl_tcp_abc) {
if (icsk->icsk_ca_state < TCP_CA_CWR)
tp->bytes_acked += ack - prior_snd_una;
else if (icsk->icsk_ca_state == TCP_CA_Loss)
/* we assume just one segment left network */
tp->bytes_acked += min(ack - prior_snd_una,
tp->mss_cache);
}
prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
@ -3802,7 +3837,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* Note, we use the fact that SND.UNA>=SND.WL2.
*/
tcp_update_wl(tp, ack_seq);
tp->snd_una = ack;
tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
tcp_ca_event(sk, CA_EVENT_FAST_ACK);
@ -3864,6 +3899,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (dst)
dst_confirm(dst);
}
if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
tcp_update_pacing_rate(sk);
return 1;
no_queue:
@ -4474,7 +4512,7 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (tcp_hdr(skb)->fin)
tcp_fin(sk);
}
@ -4685,7 +4723,7 @@ queue_and_out:
skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (th->fin)
@ -5528,7 +5566,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_rcv_rtt_measure_ts(sk, skb);
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
}
if (copied_early)
@ -5558,7 +5596,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
__skb_pull(skb, tcp_header_len);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
}
tcp_event_data_recv(sk, skb);

View file

@ -1734,6 +1734,7 @@ process:
}
bh_lock_sock_nested(sk);
tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
#ifdef CONFIG_NET_DMA
@ -1890,6 +1891,7 @@ static int tcp_v4_init_sock(struct sock *sk)
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
@ -1907,6 +1909,7 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
u64_stats_init(&tp->syncp);
tp->reordering = sysctl_tcp_reordering;
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
@ -2636,6 +2639,7 @@ struct proto tcp_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,

View file

@ -464,12 +464,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
newtp->segs_in = 0;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up =
treq->snt_isn + 1 + tcp_s_data_size(oldtp);
tcp_prequeue_init(newtp);
INIT_LIST_HEAD(&newtp->tsq_node);
tcp_init_wl(newtp, treq->rcv_isn);
@ -490,7 +492,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
newtp->bytes_acked = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;

View file

@ -48,6 +48,9 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1;
*/
int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
/* Default TSQ limit of two TSO segments */
int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
@ -63,6 +66,8 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
@ -781,6 +786,140 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
return size;
}
/* TCP SMALL QUEUES (TSQ)
*
* TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev)
* to reduce RTT and bufferbloat.
* We do this using a special skb destructor (tcp_wfree).
*
* Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
* needs to be reallocated in a driver.
* The invariant being skb->truesize substracted from sk->sk_wmem_alloc
*
* Since transmit from skb destructor is forbidden, we use a tasklet
* to process all sockets that eventually need to send more skbs.
* We use one tasklet per cpu, with its own queue of sockets.
*/
struct tsq_tasklet {
struct tasklet_struct tasklet;
struct list_head head; /* queue of tcp sockets */
};
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
/*
* One tasklest per cpu tries to send more skbs.
* We run in tasklet context but need to disable irqs when
* transfering tsq->head because tcp_wfree() might
* interrupt us (non NAPI drivers)
*/
static void tcp_tasklet_func(unsigned long data)
{
struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
LIST_HEAD(list);
unsigned long flags;
struct list_head *q, *n;
struct tcp_sock *tp;
struct sock *sk;
local_irq_save(flags);
list_splice_init(&tsq->head, &list);
local_irq_restore(flags);
list_for_each_safe(q, n, &list) {
tp = list_entry(q, struct tcp_sock, tsq_node);
list_del(&tp->tsq_node);
sk = (struct sock *)tp;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 |
TCPF_CLOSING | TCPF_CLOSE_WAIT))
tcp_write_xmit(sk,
tcp_current_mss(sk),
0, 0,
GFP_ATOMIC);
} else {
/* defer the work to tcp_release_cb() */
set_bit(TSQ_OWNED, &tp->tsq_flags);
}
bh_unlock_sock(sk);
clear_bit(TSQ_QUEUED, &tp->tsq_flags);
sk_free(sk);
}
}
/**
* tcp_release_cb - tcp release_sock() callback
* @sk: socket
*
* called from release_sock() to perform protocol dependent
* actions before socket release.
*/
void tcp_release_cb(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) {
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 |
TCPF_CLOSING | TCPF_CLOSE_WAIT))
tcp_write_xmit(sk,
tcp_current_mss(sk),
0, 0,
GFP_ATOMIC);
}
}
EXPORT_SYMBOL(tcp_release_cb);
void __init tcp_tasklet_init(void)
{
int i;
for_each_possible_cpu(i) {
struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
INIT_LIST_HEAD(&tsq->head);
tasklet_init(&tsq->tasklet,
tcp_tasklet_func,
(unsigned long)tsq);
}
}
/*
* Write buffer destructor automatically called from kfree_skb.
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
!test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
unsigned long flags;
struct tsq_tasklet *tsq;
/* Keep a ref on socket.
* This last ref will be released in tcp_tasklet_func()
*/
atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
/* queue this socket to tasklet queue */
local_irq_save(flags);
tsq = &__get_cpu_var(tsq_tasklet);
list_add(&tp->tsq_node, &tsq->head);
tasklet_schedule(&tsq->tasklet);
local_irq_restore(flags);
} else {
sock_wfree(skb);
}
}
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@ -844,7 +983,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
skb_set_owner_w(skb, sk);
skb_orphan(skb);
skb->sk = sk;
skb->destructor = tcp_wfree;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
th = tcp_hdr(skb);
@ -902,6 +1045,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
tp->segs_out += tcp_skb_pcount(skb);
err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
if (likely(err <= 0))
return err;
@ -1562,7 +1707,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
/* If a full-sized TSO skb can be sent, do it. */
if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
sk->sk_gso_max_segs * tp->mss_cache))
tp->xmit_size_goal_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@ -1788,6 +1933,24 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* This allows for :
* - better RTT estimation and ACK scheduling
* - faster recovery
* - high rates
* Alas, some drivers / subsystems require a fair amount
* of queued bytes to ensure line rate.
* One example is wifi aggregation (802.11 AMPDU)
*/
limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
sk->sk_pacing_rate >> 10);
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
break;
}
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,

View file

@ -44,8 +44,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
/* DS disclosed */
top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
top_iph->tos = 0;
else
top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
XFRM_MODE_SKB_CB(skb)->tos);
flags = x->props.flags;

View file

@ -290,10 +290,24 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
static int snmp6_alloc_dev(struct inet6_dev *idev)
{
int i;
if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
sizeof(struct ipstats_mib),
__alignof__(struct ipstats_mib)) < 0)
goto err_ip;
for_each_possible_cpu(i) {
struct ipstats_mib *addrconf_stats;
addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i);
u64_stats_init(&addrconf_stats->syncp);
#if SNMP_ARRAY_SZ == 2
addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i);
u64_stats_init(&addrconf_stats->syncp);
#endif
}
idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
GFP_KERNEL);
if (!idev->stats.icmpv6dev)

View file

@ -1018,6 +1018,8 @@ static void ipv6_packet_cleanup(void)
static int __net_init ipv6_init_mibs(struct net *net)
{
int i;
if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
sizeof(struct udp_mib),
__alignof__(struct udp_mib)) < 0)
@ -1030,6 +1032,18 @@ static int __net_init ipv6_init_mibs(struct net *net)
sizeof(struct ipstats_mib),
__alignof__(struct ipstats_mib)) < 0)
goto err_ip_mib;
for_each_possible_cpu(i) {
struct ipstats_mib *af_inet6_stats;
af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i);
u64_stats_init(&af_inet6_stats->syncp);
#if SNMP_ARRAY_SZ == 2
af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i);
u64_stats_init(&af_inet6_stats->syncp);
#endif
}
if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
sizeof(struct icmpv6_mib),
__alignof__(struct icmpv6_mib)) < 0)

View file

@ -98,16 +98,25 @@ struct pcpu_tstats {
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
struct pcpu_tstats sum = { 0 };
struct pcpu_tstats tmp, sum = { 0 };
int i;
for_each_possible_cpu(i) {
unsigned int start;
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
sum.rx_packets += tstats->rx_packets;
sum.rx_bytes += tstats->rx_bytes;
sum.tx_packets += tstats->tx_packets;
sum.tx_bytes += tstats->tx_bytes;
do {
start = u64_stats_fetch_begin_irq(&tstats->syncp);
tmp.rx_packets = tstats->rx_packets;
tmp.rx_bytes = tstats->rx_bytes;
tmp.tx_packets = tstats->tx_packets;
tmp.tx_bytes = tstats->tx_bytes;
} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
sum.rx_packets += tmp.rx_packets;
sum.rx_bytes += tmp.rx_bytes;
sum.tx_packets += tmp.tx_packets;
sum.tx_bytes += tmp.tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
@ -761,8 +770,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
tstats = this_cpu_ptr(t->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
__skb_tunnel_rx(skb, t->dev);
@ -1408,11 +1419,18 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
int i;
t->dev = dev;
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct pcpu_tstats *ip6_tnl_stats;
ip6_tnl_stats = per_cpu_ptr(dev->tstats, i);
u64_stats_init(&ip6_tnl_stats->syncp);
}
return 0;
}

View file

@ -263,7 +263,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
struct net_device *dev,
int flags)
{
struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
struct rt6_info *rt = dst_alloc(ops, dev, 0, DST_OBSOLETE_NONE, flags);
if (rt)
memset(&rt->rt6i_table, 0,
@ -981,7 +981,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
struct dst_entry *new = NULL;
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));

View file

@ -87,31 +87,47 @@ struct sit_net {
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
unsigned long rx_packets;
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
} __attribute__((aligned(4*sizeof(unsigned long))));
u64 rx_packets;
u64 rx_bytes;
u64 tx_packets;
u64 tx_bytes;
struct u64_stats_sync syncp;
};
static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
struct pcpu_tstats sum = { 0 };
int i;
for_each_possible_cpu(i) {
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
unsigned int start;
sum.rx_packets += tstats->rx_packets;
sum.rx_bytes += tstats->rx_bytes;
sum.tx_packets += tstats->tx_packets;
sum.tx_bytes += tstats->tx_bytes;
do {
start = u64_stats_fetch_begin_irq(&tstats->syncp);
rx_packets = tstats->rx_packets;
tx_packets = tstats->tx_packets;
rx_bytes = tstats->rx_bytes;
tx_bytes = tstats->tx_bytes;
} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
tot->rx_bytes += rx_bytes;
tot->tx_bytes += tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
dev->stats.tx_packets = sum.tx_packets;
dev->stats.tx_bytes = sum.tx_bytes;
return &dev->stats;
tot->rx_errors = dev->stats.rx_errors;
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
tot->tx_dropped = dev->stats.tx_dropped;
tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
tot->tx_errors = dev->stats.tx_errors;
return tot;
}
/*
* Must be invoked with rcu_read_lock
*/
@ -588,8 +604,10 @@ static int ipip6_rcv(struct sk_buff *skb)
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
__skb_tunnel_rx(skb, tunnel->dev);
@ -1126,7 +1144,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_start_xmit = ipip6_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
.ndo_change_mtu = ipip6_tunnel_change_mtu,
.ndo_get_stats = ipip6_get_stats,
.ndo_get_stats64= ipip6_get_stats64,
};
static void ipip6_dev_free(struct net_device *dev)
@ -1154,6 +1172,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int i;
tunnel->dev = dev;
@ -1165,6 +1184,12 @@ static int ipip6_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct pcpu_tstats *ipip6_tunnel_stats;
ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i);
u64_stats_init(&ipip6_tunnel_stats->syncp);
}
return 0;
}
@ -1174,6 +1199,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
int i;
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@ -1186,6 +1212,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct pcpu_tstats *ipip6_fb_stats;
ipip6_fb_stats = per_cpu_ptr(dev->tstats, i);
u64_stats_init(&ipip6_fb_stats->syncp);
}
dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
return 0;

View file

@ -1660,6 +1660,7 @@ process:
}
bh_lock_sock_nested(sk);
tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
#ifdef CONFIG_NET_DMA
@ -1855,6 +1856,7 @@ static int tcp_v6_init_sock(struct sock *sk)
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
@ -2122,6 +2124,7 @@ struct proto tcpv6_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,

View file

@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
sizeof(top_iph->flow_lbl));
top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
dsfield = XFRM_MODE_SKB_CB(skb)->tos;
dsfield = INET_ECN_encapsulate(dsfield, dsfield);
if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
dsfield = 0;
else
dsfield = XFRM_MODE_SKB_CB(skb)->tos;
dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);

View file

@ -762,7 +762,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
}
/* identity & sensitivity */
if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family))
if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family))
size += sizeof(struct sadb_address) + sockaddr_size;
if (add_keys) {
@ -816,18 +816,21 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
sa->sadb_sa_auth = 0;
if (x->aalg) {
struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0;
sa->sadb_sa_auth = (a && a->pfkey_supported) ?
a->desc.sadb_alg_id : 0;
}
sa->sadb_sa_encrypt = 0;
BUG_ON(x->ealg && x->calg);
if (x->ealg) {
struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0);
sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
sa->sadb_sa_encrypt = (a && a->pfkey_supported) ?
a->desc.sadb_alg_id : 0;
}
/* KAME compatible: sadb_sa_encrypt is overloaded with calg id */
if (x->calg) {
struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0);
sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
sa->sadb_sa_encrypt = (a && a->pfkey_supported) ?
a->desc.sadb_alg_id : 0;
}
sa->sadb_sa_flags = 0;
@ -909,8 +912,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
if (!addr->sadb_address_prefixlen)
BUG();
if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr,
x->props.family)) {
if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr,
x->props.family)) {
addr = (struct sadb_address*) skb_put(skb,
sizeof(struct sadb_address)+sockaddr_size);
addr->sadb_address_len =
@ -1138,7 +1141,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
if (sa->sadb_sa_auth) {
int keysize = 0;
struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
if (!a) {
if (!a || !a->pfkey_supported) {
err = -ENOSYS;
goto out;
}
@ -1160,7 +1163,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
if (sa->sadb_sa_encrypt) {
if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) {
struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt);
if (!a) {
if (!a || !a->pfkey_supported) {
err = -ENOSYS;
goto out;
}
@ -1172,7 +1175,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
} else {
int keysize = 0;
struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt);
if (!a) {
if (!a || !a->pfkey_supported) {
err = -ENOSYS;
goto out;
}
@ -1321,7 +1324,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
if (hdr->sadb_msg_seq) {
x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
xfrm_state_put(x);
x = NULL;
}
@ -1578,13 +1581,13 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
struct sadb_msg *hdr;
int len, auth_len, enc_len, i;
auth_len = xfrm_count_auth_supported();
auth_len = xfrm_count_pfkey_auth_supported();
if (auth_len) {
auth_len *= sizeof(struct sadb_alg);
auth_len += sizeof(struct sadb_supported);
}
enc_len = xfrm_count_enc_supported();
enc_len = xfrm_count_pfkey_enc_supported();
if (enc_len) {
enc_len *= sizeof(struct sadb_alg);
enc_len += sizeof(struct sadb_supported);
@ -1615,6 +1618,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
if (!aalg)
break;
if (!aalg->pfkey_supported)
continue;
if (aalg->available)
*ap++ = aalg->desc;
}
@ -1634,6 +1639,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
if (!ealg)
break;
if (!ealg->pfkey_supported)
continue;
if (ealg->available)
*ap++ = ealg->desc;
}
@ -2360,6 +2367,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
out:
xfrm_pol_put(xp);
if (err == 0)
xfrm_garbage_collect(net);
return err;
}
@ -2609,6 +2618,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
out:
xfrm_pol_put(xp);
if (delete && err == 0)
xfrm_garbage_collect(net);
return err;
}
@ -2826,6 +2837,8 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
if (!aalg)
break;
if (!aalg->pfkey_supported)
continue;
if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
@ -2841,6 +2854,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg)
break;
if (!ealg->pfkey_supported)
continue;
if (!(ealg_tmpl_set(t, ealg) && ealg->available))
continue;
@ -2849,6 +2865,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg)
break;
if (!aalg->pfkey_supported)
continue;
if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
@ -2872,6 +2891,9 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
if (!aalg)
break;
if (!aalg->pfkey_supported)
continue;
if (aalg_tmpl_set(t, aalg) && aalg->available) {
struct sadb_comb *c;
c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
@ -2904,6 +2926,9 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
if (!ealg)
break;
if (!ealg->pfkey_supported)
continue;
if (!(ealg_tmpl_set(t, ealg) && ealg->available))
continue;
@ -2912,6 +2937,8 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
if (!aalg)
break;
if (!aalg->pfkey_supported)
continue;
if (!(aalg_tmpl_set(t, aalg) && aalg->available))
continue;
c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
@ -3028,7 +3055,7 @@ static u32 get_acqseq(void)
return res;
}
static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
{
struct sk_buff *skb;
struct sadb_msg *hdr;
@ -3109,7 +3136,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
pol->sadb_x_policy_dir = dir+1;
pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
pol->sadb_x_policy_reserved = 0;
pol->sadb_x_policy_id = xp->index;
pol->sadb_x_policy_priority = xp->priority;

View file

@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
struct ip_vs_dest **dest_p)
{
struct ip_vs_dest *dest;
unsigned atype;
unsigned int atype, i;
EnterFunction(2);
@ -869,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
if (!dest->stats.cpustats)
goto err_alloc;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *ip_vs_dest_stats;
ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
u64_stats_init(&ip_vs_dest_stats->syncp);
}
dest->af = svc->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
@ -1130,7 +1136,7 @@ static int
ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0;
int ret = 0, i;
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
@ -1174,6 +1180,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
if (!svc->stats.cpustats)
goto out_err;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *ip_vs_stats;
ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
u64_stats_init(&ip_vs_stats->syncp);
}
/* I'm the first user of the service */
atomic_set(&svc->usecnt, 0);
atomic_set(&svc->refcnt, 0);
@ -2131,10 +2143,10 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
__u64 inbytes, outbytes;
do {
start = u64_stats_fetch_begin_bh(&u->syncp);
start = u64_stats_fetch_begin_irq(&u->syncp);
inbytes = u->ustats.inbytes;
outbytes = u->ustats.outbytes;
} while (u64_stats_fetch_retry_bh(&u->syncp, start));
} while (u64_stats_fetch_retry_irq(&u->syncp, start));
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
i, u->ustats.conns, u->ustats.inpkts,
@ -3625,7 +3637,7 @@ static void ip_vs_genl_unregister(void)
#ifdef CONFIG_SYSCTL
int __net_init ip_vs_control_net_init_sysctl(struct net *net)
{
int idx;
int i, idx;
struct netns_ipvs *ipvs = net_ipvs(net);
struct ctl_table *tbl;
@ -3725,6 +3737,12 @@ int __net_init ip_vs_control_net_init(struct net *net)
if (!ipvs->tot_stats.cpustats)
return -ENOMEM;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *ipvs_tot_stats;
ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
u64_stats_init(&ipvs_tot_stats->syncp);
}
spin_lock_init(&ipvs->tot_stats.lock);
proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);

View file

@ -722,9 +722,9 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
do {
start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
start = u64_stats_fetch_begin_irq(&percpu_stats->sync);
local_stats = *percpu_stats;
} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
} while (u64_stats_fetch_retry_irq(&percpu_stats->sync, start));
stats->n_hit += local_stats.n_hit;
stats->n_missed += local_stats.n_missed;
@ -1261,6 +1261,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_table;
}
for_each_possible_cpu(i) {
struct dp_stats_percpu *dpath_stats;
dpath_stats = per_cpu_ptr(dp->stats_percpu, i);
u64_stats_init(&dpath_stats->sync);
}
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;

View file

@ -109,6 +109,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
{
struct vport *vport;
size_t alloc_size;
int i;
alloc_size = sizeof(struct vport);
if (priv_size) {
@ -131,6 +132,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
return ERR_PTR(-ENOMEM);
}
for_each_possible_cpu(i) {
struct pcpu_tstats *vport_stats;
vport_stats = per_cpu_ptr(vport->percpu_stats, i);
u64_stats_init(&vport_stats->syncp);
}
spin_lock_init(&vport->stats_lock);
return vport;
@ -265,9 +273,9 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
start = u64_stats_fetch_begin_irq(&percpu_stats->sync);
local_stats = *percpu_stats;
} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
} while (u64_stats_fetch_retry_irq(&percpu_stats->sync, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;

View file

@ -214,7 +214,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
{
/* If we don't have a fresh route, look one up */
if (!transport->dst || transport->dst->obsolete > 1) {
if (!transport->dst || transport->dst->obsolete) {
dst_release(transport->dst);
transport->af_specific->get_dst(transport, &transport->saddr,
&transport->fl, sk);

View file

@ -38,6 +38,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8,
.sadb_alg_ivlen = 8,
@ -54,6 +56,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12,
.sadb_alg_ivlen = 8,
@ -70,6 +74,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16,
.sadb_alg_ivlen = 8,
@ -86,6 +92,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8,
.sadb_alg_ivlen = 8,
@ -102,6 +110,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12,
.sadb_alg_ivlen = 8,
@ -118,6 +128,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16,
.sadb_alg_ivlen = 8,
@ -134,6 +146,8 @@ static struct xfrm_algo_desc aead_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC,
.sadb_alg_ivlen = 8,
@ -154,6 +168,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_AALG_NULL,
.sadb_alg_ivlen = 0,
@ -172,6 +188,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_AALG_MD5HMAC,
.sadb_alg_ivlen = 0,
@ -190,6 +208,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_AALG_SHA1HMAC,
.sadb_alg_ivlen = 0,
@ -208,6 +228,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
.sadb_alg_ivlen = 0,
@ -225,6 +247,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_AALG_SHA2_384HMAC,
.sadb_alg_ivlen = 0,
@ -242,6 +266,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_AALG_SHA2_512HMAC,
.sadb_alg_ivlen = 0,
@ -260,6 +286,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
.sadb_alg_ivlen = 0,
@ -277,6 +305,8 @@ static struct xfrm_algo_desc aalg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC,
.sadb_alg_ivlen = 0,
@ -298,6 +328,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_EALG_NULL,
.sadb_alg_ivlen = 0,
@ -316,6 +348,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_EALG_DESCBC,
.sadb_alg_ivlen = 8,
@ -334,6 +368,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_EALG_3DESCBC,
.sadb_alg_ivlen = 8,
@ -352,6 +388,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_CASTCBC,
.sadb_alg_ivlen = 8,
@ -370,6 +408,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
.sadb_alg_ivlen = 8,
@ -388,6 +428,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AESCBC,
.sadb_alg_ivlen = 8,
@ -406,6 +448,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_SERPENTCBC,
.sadb_alg_ivlen = 8,
@ -424,6 +468,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_CAMELLIACBC,
.sadb_alg_ivlen = 8,
@ -442,6 +488,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
.sadb_alg_ivlen = 8,
@ -459,6 +507,8 @@ static struct xfrm_algo_desc ealg_list[] = {
}
},
.pfkey_supported = 1,
.desc = {
.sadb_alg_id = SADB_X_EALG_AESCTR,
.sadb_alg_ivlen = 8,
@ -476,6 +526,7 @@ static struct xfrm_algo_desc calg_list[] = {
.threshold = 90,
}
},
.pfkey_supported = 1,
.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
},
{
@ -485,6 +536,7 @@ static struct xfrm_algo_desc calg_list[] = {
.threshold = 90,
}
},
.pfkey_supported = 1,
.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
},
{
@ -494,6 +546,7 @@ static struct xfrm_algo_desc calg_list[] = {
.threshold = 50,
}
},
.pfkey_supported = 1,
.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
},
};
@ -703,8 +756,7 @@ void xfrm_probe_algs(void)
}
for (i = 0; i < ealg_entries(); i++) {
status = crypto_has_blkcipher(ealg_list[i].name, 0,
CRYPTO_ALG_ASYNC);
status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0);
if (ealg_list[i].available != status)
ealg_list[i].available = status;
}
@ -718,27 +770,27 @@ void xfrm_probe_algs(void)
}
EXPORT_SYMBOL_GPL(xfrm_probe_algs);
int xfrm_count_auth_supported(void)
int xfrm_count_pfkey_auth_supported(void)
{
int i, n;
for (i = 0, n = 0; i < aalg_entries(); i++)
if (aalg_list[i].available)
if (aalg_list[i].available && aalg_list[i].pfkey_supported)
n++;
return n;
}
EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported);
int xfrm_count_enc_supported(void)
int xfrm_count_pfkey_enc_supported(void)
{
int i, n;
for (i = 0, n = 0; i < ealg_entries(); i++)
if (ealg_list[i].available)
if (ealg_list[i].available && ealg_list[i].pfkey_supported)
n++;
return n;
}
EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)

View file

@ -276,18 +276,16 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
struct crypto_comp * __percpu *tfms;
int cpu;
/* This can be any valid CPU ID so we don't need locking. */
cpu = raw_smp_processor_id();
list_for_each_entry(pos, &ipcomp_tfms_list, list) {
struct crypto_comp *tfm;
tfms = pos->tfms;
tfm = *per_cpu_ptr(tfms, cpu);
/* This can be any valid CPU ID so we don't need locking. */
tfm = __this_cpu_read(*pos->tfms);
if (!strcmp(crypto_comp_name(tfm), alg_name)) {
pos->users++;
return tfms;
return pos->tfms;
}
}

View file

@ -61,6 +61,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
}
spin_lock_bh(&x->lock);
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
err = -EINVAL;
goto error;
}
err = xfrm_state_check_expire(x);
if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);

View file

@ -34,6 +34,10 @@
#include "xfrm_hash.h"
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN 100
DEFINE_MUTEX(xfrm_cfg_mutex);
EXPORT_SYMBOL(xfrm_cfg_mutex);
@ -51,12 +55,12 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
static void xfrm_policy_queue_process(unsigned long arg);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir);
static inline int
static inline bool
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
{
const struct flowi4 *fl4 = &fl->u.ip4;
@ -69,7 +73,7 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
}
static inline int
static inline bool
__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
{
const struct flowi6 *fl6 = &fl->u.ip6;
@ -82,8 +86,8 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
}
int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
unsigned short family)
bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
unsigned short family)
{
switch (family) {
case AF_INET:
@ -91,7 +95,7 @@ int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
case AF_INET6:
return __xfrm6_selector_match(sel, fl);
}
return 0;
return false;
}
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
@ -269,8 +273,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
atomic_set(&policy->refcnt, 1);
skb_queue_head_init(&policy->polq.hold_queue);
setup_timer(&policy->timer, xfrm_policy_timer,
(unsigned long)policy);
setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
(unsigned long)policy);
policy->flo.ops = &xfrm_policy_fc_ops;
}
return policy;
@ -291,6 +298,16 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
}
EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_queue_purge(struct sk_buff_head *list)
{
struct sk_buff *skb;
while ((skb = skb_dequeue(list)) != NULL) {
dev_put(skb->dev);
kfree_skb(skb);
}
}
/* Rule must be locked. Release descentant resources, announce
* entry dead. The rule must be unlinked from lists to the moment.
*/
@ -301,6 +318,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
atomic_inc(&policy->genid);
del_timer(&policy->polq.hold_timer);
xfrm_queue_purge(&policy->polq.hold_queue);
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
@ -544,6 +564,46 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s
return 0;
}
static void xfrm_policy_requeue(struct xfrm_policy *old,
struct xfrm_policy *new)
{
struct xfrm_policy_queue *pq = &old->polq;
struct sk_buff_head list;
__skb_queue_head_init(&list);
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice_init(&pq->hold_queue, &list);
del_timer(&pq->hold_timer);
spin_unlock_bh(&pq->hold_queue.lock);
if (skb_queue_empty(&list))
return;
pq = &new->polq;
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice(&list, &pq->hold_queue);
pq->timeout = XFRM_QUEUE_TMO_MIN;
mod_timer(&pq->hold_timer, jiffies);
spin_unlock_bh(&pq->hold_queue.lock);
}
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
struct xfrm_policy *pol)
{
u32 mark = policy->mark.v & policy->mark.m;
if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
return true;
if ((mark & pol->mark.m) == pol->mark.v &&
policy->priority == pol->priority)
return true;
return false;
}
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
struct net *net = xp_net(policy);
@ -551,7 +611,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol;
struct hlist_head *chain;
struct hlist_node *entry, *newpos;
u32 mark = policy->mark.v & policy->mark.m;
write_lock_bh(&xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
@ -560,7 +619,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_for_each_entry(pol, entry, chain, bydst) {
if (pol->type == policy->type &&
!selector_cmp(&pol->selector, &policy->selector) &&
(mark & pol->mark.m) == pol->mark.v &&
xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl) {
@ -584,8 +643,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
atomic_inc(&flow_cache_genid);
if (delpol)
if (delpol) {
xfrm_policy_requeue(delpol, policy);
__xfrm_policy_unlink(delpol, dir);
}
policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
policy->curlft.add_time = get_seconds();
@ -877,7 +938,8 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
u8 type, u16 family, int dir)
{
const struct xfrm_selector *sel = &pol->selector;
int match, ret = -ESRCH;
int ret = -ESRCH;
bool match;
if (pol->family != family ||
(fl->flowi_mark & pol->mark.m) != pol->mark.v ||
@ -962,6 +1024,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}
static int flow_to_policy_dir(int dir)
{
if (XFRM_POLICY_IN == FLOW_DIR_IN &&
XFRM_POLICY_OUT == FLOW_DIR_OUT &&
XFRM_POLICY_FWD == FLOW_DIR_FWD)
return dir;
switch (dir) {
default:
case FLOW_DIR_IN:
return XFRM_POLICY_IN;
case FLOW_DIR_OUT:
return XFRM_POLICY_OUT;
case FLOW_DIR_FWD:
return XFRM_POLICY_FWD;
}
}
static struct flow_cache_object *
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
u8 dir, struct flow_cache_object *old_obj, void *ctx)
@ -971,7 +1051,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
if (old_obj)
xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
pol = __xfrm_policy_lookup(net, fl, family, dir);
pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
if (IS_ERR_OR_NULL(pol))
return ERR_CAST(pol);
@ -1006,8 +1086,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
read_lock_bh(&xfrm_policy_lock);
if ((pol = sk->sk_policy[dir]) != NULL) {
int match = xfrm_selector_match(&pol->selector, fl,
sk->sk_family);
bool match = xfrm_selector_match(&pol->selector, fl,
sk->sk_family);
int err = 0;
if (match) {
@ -1095,11 +1175,15 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
}
if (old_pol)
if (old_pol) {
if (pol)
xfrm_policy_requeue(old_pol, pol);
/* Unlinking succeeds always. This is the only function
* allowed to delete or replace socket policy.
*/
__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
}
write_unlock_bh(&xfrm_policy_lock);
if (old_pol) {
@ -1290,6 +1374,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f
* It means we need to try again resolving. */
if (xdst->num_xfrms > 0)
return NULL;
} else if (dst->flags & DST_XFRM_QUEUE) {
return NULL;
} else {
/* Real bundle */
if (stale_bundle(dst))
@ -1348,7 +1434,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
default:
BUG();
}
xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
if (likely(xdst)) {
memset(&xdst->u.rt6.rt6i_table, 0,
@ -1474,7 +1560,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst1->xfrm = xfrm[i];
xdst->xfrm_genid = xfrm[i]->genid;
dst1->obsolete = -1;
dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
dst1->flags |= DST_HOST;
dst1->lastuse = now;
@ -1653,6 +1739,171 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
return xdst;
}
static void xfrm_policy_queue_process(unsigned long arg)
{
int err = 0;
struct sk_buff *skb;
struct sock *sk;
struct dst_entry *dst;
struct net_device *dev;
struct xfrm_policy *pol = (struct xfrm_policy *)arg;
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
struct sk_buff_head list;
spin_lock(&pq->hold_queue.lock);
skb = skb_peek(&pq->hold_queue);
dst = skb_dst(skb);
sk = skb->sk;
xfrm_decode_session(skb, &fl, dst->ops->family);
spin_unlock(&pq->hold_queue.lock);
dst_hold(dst->path);
dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
sk, 0);
if (IS_ERR(dst))
goto purge_queue;
if (dst->flags & DST_XFRM_QUEUE) {
dst_release(dst);
if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
goto purge_queue;
pq->timeout = pq->timeout << 1;
mod_timer(&pq->hold_timer, jiffies + pq->timeout);
return;
}
dst_release(dst);
__skb_queue_head_init(&list);
spin_lock(&pq->hold_queue.lock);
pq->timeout = 0;
skb_queue_splice_init(&pq->hold_queue, &list);
spin_unlock(&pq->hold_queue.lock);
while (!skb_queue_empty(&list)) {
skb = __skb_dequeue(&list);
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
dst_hold(skb_dst(skb)->path);
dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
&fl, skb->sk, 0);
if (IS_ERR(dst)) {
dev_put(skb->dev);
kfree_skb(skb);
continue;
}
nf_reset(skb);
skb_dst_drop(skb);
skb_dst_set(skb, dst);
dev = skb->dev;
err = dst_output(skb);
dev_put(dev);
}
return;
purge_queue:
pq->timeout = 0;
xfrm_queue_purge(&pq->hold_queue);
}
static int xdst_queue_output(struct sk_buff *skb)
{
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
kfree_skb(skb);
return -EAGAIN;
}
skb_dst_force(skb);
dev_hold(skb->dev);
spin_lock_bh(&pq->hold_queue.lock);
if (!pq->timeout)
pq->timeout = XFRM_QUEUE_TMO_MIN;
sched_next = jiffies + pq->timeout;
if (del_timer(&pq->hold_timer)) {
if (time_before(pq->hold_timer.expires, sched_next))
sched_next = pq->hold_timer.expires;
}
__skb_queue_tail(&pq->hold_queue, skb);
mod_timer(&pq->hold_timer, sched_next);
spin_unlock_bh(&pq->hold_queue.lock);
return 0;
}
static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
struct dst_entry *dst,
const struct flowi *fl,
int num_xfrms,
u16 family)
{
int err;
struct net_device *dev;
struct dst_entry *dst1;
struct xfrm_dst *xdst;
xdst = xfrm_alloc_dst(net, family);
if (IS_ERR(xdst))
return xdst;
if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 ||
(fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP))
return xdst;
dst1 = &xdst->u.dst;
dst_hold(dst);
xdst->route = dst;
dst_copy_metrics(dst1, dst);
dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
dst1->lastuse = jiffies;
dst1->input = dst_discard;
dst1->output = xdst_queue_output;
dst_hold(dst);
dst1->child = dst;
dst1->path = dst;
xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
err = -ENODEV;
dev = dst->dev;
if (!dev)
goto free_dst;
err = xfrm_fill_dst(xdst, dev, fl);
if (err)
goto free_dst;
out:
return xdst;
free_dst:
dst_release(dst1);
xdst = ERR_PTR(err);
goto out;
}
static struct flow_cache_object *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
@ -1686,7 +1937,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
* previous cache entry */
if (xdst == NULL) {
num_pols = 1;
pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
pols[0] = __xfrm_policy_lookup(net, fl, family,
flow_to_policy_dir(dir));
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@ -1731,7 +1983,7 @@ make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
* either because the policy blocks, has no transformations or
* we could not build template (no xfrm_states).*/
xdst = xfrm_alloc_dst(net, family);
xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
@ -2219,12 +2471,13 @@ EXPORT_SYMBOL(__xfrm_route_forward);
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
* to "-1" to force all XFRM destinations to get validated by
* dst_ops->check on every use. We do this because when a
* normal route referenced by an XFRM dst is obsoleted we do
* not go looking around for all parent referencing XFRM dsts
* so that we can invalidate them. It is just too much work.
* Instead we make the checks here on every use. For example:
* to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
* get validated by dst_ops->check on every use. We do this
* because when a normal route referenced by an XFRM dst is
* obsoleted we do not go looking around for all parent
* referencing XFRM dsts so that we can invalidate them. It
* is just too much work. Instead we make the checks here on
* every use. For example:
*
* XFRM dst A --> IPv4 dst X
*
@ -2234,9 +2487,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
* stale_bundle() check.
*
* When a policy's bundle is pruned, we dst_free() the XFRM
* dst which causes it's ->obsolete field to be set to a
* positive non-zero integer. If an XFRM dst has been pruned
* like this, we want to force a new route lookup.
* dst which causes it's ->obsolete field to be set to
* DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like
* this, we want to force a new route lookup.
*/
if (dst->obsolete < 0 && !stale_bundle(dst))
return dst;
@ -2291,11 +2544,12 @@ static void __xfrm_garbage_collect(struct net *net)
}
}
static void xfrm_garbage_collect(struct net *net)
void xfrm_garbage_collect(struct net *net)
{
flow_cache_flush();
__xfrm_garbage_collect(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
@ -2338,6 +2592,9 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
(dst->dev && !netif_running(dst->dev)))
return 0;
if (dst->flags & DST_XFRM_QUEUE)
return 1;
last = NULL;
do {
@ -2645,7 +2902,7 @@ static void xfrm_policy_fini(struct net *net)
WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
htab = &net->xfrm.policy_bydst[dir];
sz = (htab->hmask + 1);
sz = (htab->hmask + 1) * sizeof(struct hlist_head);
WARN_ON(!hlist_empty(htab->table));
xfrm_hash_free(htab->table, sz);
}
@ -2770,25 +3027,25 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif
#ifdef CONFIG_XFRM_MIGRATE
static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
const struct xfrm_selector *sel_tgt)
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
const struct xfrm_selector *sel_tgt)
{
if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
if (sel_tgt->family == sel_cmp->family &&
xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
sel_cmp->family) == 0 &&
xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
sel_cmp->family) == 0 &&
xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
sel_cmp->family) &&
xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
sel_cmp->family) &&
sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
return 1;
return true;
}
} else {
if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
return 1;
return true;
}
}
return 0;
return false;
}
static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
@ -2836,10 +3093,10 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
switch (t->mode) {
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
m->old_family) == 0 &&
xfrm_addr_cmp(&t->saddr, &m->old_saddr,
m->old_family) == 0) {
if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
m->old_family) &&
xfrm_addr_equal(&t->saddr, &m->old_saddr,
m->old_family)) {
match = 1;
}
break;
@ -2905,10 +3162,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
return -EINVAL;
for (i = 0; i < num_migrate; i++) {
if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
m[i].old_family) == 0) &&
(xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
m[i].old_family) == 0))
if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
m[i].old_family) &&
xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
m[i].old_family))
return -EINVAL;
if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
xfrm_addr_any(&m[i].new_saddr, m[i].new_family))

View file

@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
SNMP_MIB_SENTINEL
};

View file

@ -242,11 +242,13 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
u32 diff;
struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
u32 seq = ntohl(net_seq);
u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
u32 pos;
if (!replay_esn->replay_window)
return;
pos = (replay_esn->seq - 1) % replay_esn->replay_window;
if (seq > replay_esn->seq) {
diff = seq - replay_esn->seq;
@ -332,6 +334,70 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
x->xflags &= ~XFRM_TIME_DEFER;
}
static void xfrm_replay_notify_esn(struct xfrm_state *x, int event)
{
u32 seq_diff, oseq_diff;
struct km_event c;
struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn;
/* we send notify messages in case
* 1. we updated on of the sequence numbers, and the seqno difference
* is at least x->replay_maxdiff, in this case we also update the
* timeout of our timer function
* 2. if x->replay_maxage has elapsed since last update,
* and there were changes
*
* The state structure must be locked!
*/
switch (event) {
case XFRM_REPLAY_UPDATE:
if (!x->replay_maxdiff)
break;
if (replay_esn->seq_hi == preplay_esn->seq_hi)
seq_diff = replay_esn->seq - preplay_esn->seq;
else
seq_diff = ~preplay_esn->seq + replay_esn->seq + 1;
if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
oseq_diff = replay_esn->oseq - preplay_esn->oseq;
else
oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1;
if (seq_diff < x->replay_maxdiff &&
oseq_diff < x->replay_maxdiff) {
if (x->xflags & XFRM_TIME_DEFER)
event = XFRM_REPLAY_TIMEOUT;
else
return;
}
break;
case XFRM_REPLAY_TIMEOUT:
if (memcmp(x->replay_esn, x->preplay_esn,
xfrm_replay_state_esn_len(replay_esn)) == 0) {
x->xflags |= XFRM_TIME_DEFER;
return;
}
break;
}
memcpy(x->preplay_esn, x->replay_esn,
xfrm_replay_state_esn_len(replay_esn));
c.event = XFRM_MSG_NEWAE;
c.data.aevent = event;
km_state_notify(x, &c);
if (x->replay_maxage &&
!mod_timer(&x->rtimer, jiffies + x->replay_maxage))
x->xflags &= ~XFRM_TIME_DEFER;
}
static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
{
int err = 0;
@ -508,7 +574,7 @@ static struct xfrm_replay xfrm_replay_esn = {
.advance = xfrm_replay_advance_esn,
.check = xfrm_replay_check_esn,
.recheck = xfrm_replay_recheck_esn,
.notify = xfrm_replay_notify_bmp,
.notify = xfrm_replay_notify_esn,
.overflow = xfrm_replay_overflow_esn,
};
@ -521,13 +587,12 @@ int xfrm_init_replay(struct xfrm_state *x)
replay_esn->bmp_len * sizeof(__u32) * 8)
return -EINVAL;
if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0)
return -EINVAL;
if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn)
x->repl = &xfrm_replay_esn;
else
x->repl = &xfrm_replay_bmp;
if (x->props.flags & XFRM_STATE_ESN) {
if (replay_esn->replay_window == 0)
return -EINVAL;
x->repl = &xfrm_replay_esn;
} else
x->repl = &xfrm_replay_bmp;
} else
x->repl = &xfrm_replay_legacy;

View file

@ -158,8 +158,8 @@ out_unlock:
mutex_unlock(&hash_resize_mutex);
}
static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
static DEFINE_SPINLOCK(xfrm_state_gc_lock);
@ -168,58 +168,45 @@ int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
{
struct xfrm_state_afinfo *afinfo;
if (unlikely(family >= NPROTO))
return NULL;
write_lock_bh(&xfrm_state_afinfo_lock);
afinfo = xfrm_state_afinfo[family];
if (unlikely(!afinfo))
write_unlock_bh(&xfrm_state_afinfo_lock);
return afinfo;
}
static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
__releases(xfrm_state_afinfo_lock)
{
write_unlock_bh(&xfrm_state_afinfo_lock);
}
static DEFINE_SPINLOCK(xfrm_type_lock);
int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
const struct xfrm_type **typemap;
int err = 0;
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
typemap = afinfo->type_map;
spin_lock_bh(&xfrm_type_lock);
if (likely(typemap[type->proto] == NULL))
typemap[type->proto] = type;
else
err = -EEXIST;
xfrm_state_unlock_afinfo(afinfo);
spin_unlock_bh(&xfrm_type_lock);
xfrm_state_put_afinfo(afinfo);
return err;
}
EXPORT_SYMBOL(xfrm_register_type);
int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
const struct xfrm_type **typemap;
int err = 0;
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
typemap = afinfo->type_map;
spin_lock_bh(&xfrm_type_lock);
if (unlikely(typemap[type->proto] != type))
err = -ENOENT;
else
typemap[type->proto] = NULL;
xfrm_state_unlock_afinfo(afinfo);
spin_unlock_bh(&xfrm_type_lock);
xfrm_state_put_afinfo(afinfo);
return err;
}
EXPORT_SYMBOL(xfrm_unregister_type);
@ -256,6 +243,7 @@ static void xfrm_put_type(const struct xfrm_type *type)
module_put(type->owner);
}
static DEFINE_SPINLOCK(xfrm_mode_lock);
int xfrm_register_mode(struct xfrm_mode *mode, int family)
{
struct xfrm_state_afinfo *afinfo;
@ -265,12 +253,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
if (unlikely(mode->encap >= XFRM_MODE_MAX))
return -EINVAL;
afinfo = xfrm_state_lock_afinfo(family);
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
err = -EEXIST;
modemap = afinfo->mode_map;
spin_lock_bh(&xfrm_mode_lock);
if (modemap[mode->encap])
goto out;
@ -283,7 +272,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
err = 0;
out:
xfrm_state_unlock_afinfo(afinfo);
spin_unlock_bh(&xfrm_mode_lock);
xfrm_state_put_afinfo(afinfo);
return err;
}
EXPORT_SYMBOL(xfrm_register_mode);
@ -297,19 +287,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
if (unlikely(mode->encap >= XFRM_MODE_MAX))
return -EINVAL;
afinfo = xfrm_state_lock_afinfo(family);
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
err = -ENOENT;
modemap = afinfo->mode_map;
spin_lock_bh(&xfrm_mode_lock);
if (likely(modemap[mode->encap] == mode)) {
modemap[mode->encap] = NULL;
module_put(mode->afinfo->owner);
err = 0;
}
xfrm_state_unlock_afinfo(afinfo);
spin_unlock_bh(&xfrm_mode_lock);
xfrm_state_put_afinfo(afinfo);
return err;
}
EXPORT_SYMBOL(xfrm_unregister_mode);
@ -415,8 +407,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
if (x->lft.hard_add_expires_seconds) {
long tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0)
goto expired;
if (tmo <= 0) {
if (x->xflags & XFRM_SOFT_EXPIRE) {
/* enter hard expire without soft expire first?!
* setting a new date could trigger this.
* workarbound: fix x->curflt.add_time by below:
*/
x->curlft.add_time = now - x->saved_tmo - 1;
tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
} else
goto expired;
}
if (tmo < next)
next = tmo;
}
@ -433,10 +434,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
if (x->lft.soft_add_expires_seconds) {
long tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0)
if (tmo <= 0) {
warn = 1;
else if (tmo < next)
x->xflags &= ~XFRM_SOFT_EXPIRE;
} else if (tmo < next) {
next = tmo;
x->xflags |= XFRM_SOFT_EXPIRE;
x->saved_tmo = tmo;
}
}
if (x->lft.soft_use_expires_seconds) {
long tmo = x->lft.soft_use_expires_seconds +
@ -686,7 +691,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
xfrm_addr_cmp(&x->id.daddr, daddr, family))
!xfrm_addr_equal(&x->id.daddr, daddr, family))
continue;
if ((mark & x->mark.m) != x->mark.v)
@ -710,8 +715,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) {
if (x->props.family != family ||
x->id.proto != proto ||
xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
xfrm_addr_cmp(&x->props.saddr, saddr, family))
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
!xfrm_addr_equal(&x->props.saddr, saddr, family))
continue;
if ((mark & x->mark.m) != x->mark.v)
@ -976,8 +981,8 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
if (x->props.family == family &&
x->props.reqid == reqid &&
(mark & x->mark.m) == x->mark.v &&
!xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
!xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
x->genid++;
}
}
@ -1011,8 +1016,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
x->id.spi != 0 ||
x->id.proto != proto ||
(mark & x->mark.m) != x->mark.v ||
xfrm_addr_cmp(&x->id.daddr, daddr, family) ||
xfrm_addr_cmp(&x->props.saddr, saddr, family))
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
!xfrm_addr_equal(&x->props.saddr, saddr, family))
continue;
xfrm_state_hold(x);
@ -1095,7 +1100,7 @@ int xfrm_state_add(struct xfrm_state *x)
if (use_spi && x->km.seq) {
x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
if (x1 && ((x1->id.proto != x->id.proto) ||
xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
!xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
to_put = x1;
x1 = NULL;
}
@ -1190,6 +1195,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
goto error;
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
x->curlft.add_time = orig->curlft.add_time;
x->km.state = orig->km.state;
@ -1221,10 +1227,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
continue;
if (m->reqid && x->props.reqid != m->reqid)
continue;
if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
m->old_family) ||
xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
m->old_family))
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
m->old_family))
continue;
xfrm_state_hold(x);
return x;
@ -1236,10 +1242,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
m->old_family) ||
xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
m->old_family))
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
m->old_family))
continue;
xfrm_state_hold(x);
return x;
@ -1264,7 +1270,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
/* add state */
if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
/* a care is needed when the destination address of the
state is to be updated as it is a part of triplet */
xfrm_state_insert(xc);
@ -1357,9 +1363,6 @@ int xfrm_state_check_expire(struct xfrm_state *x)
if (!x->curlft.use_time)
x->curlft.use_time = get_seconds();
if (x->km.state != XFRM_STATE_VALID)
return -EINVAL;
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
x->km.state = XFRM_STATE_EXPIRED;
@ -1635,27 +1638,26 @@ static void xfrm_replay_timer_handler(unsigned long data)
}
static LIST_HEAD(xfrm_km_list);
static DEFINE_RWLOCK(xfrm_km_lock);
void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
{
struct xfrm_mgr *km;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list)
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list)
if (km->notify_policy)
km->notify_policy(xp, dir, c);
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
}
void km_state_notify(struct xfrm_state *x, const struct km_event *c)
{
struct xfrm_mgr *km;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list)
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list)
if (km->notify)
km->notify(x, c);
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
}
EXPORT_SYMBOL(km_policy_notify);
@ -1685,13 +1687,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
int err = -EINVAL, acqret;
struct xfrm_mgr *km;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
acqret = km->acquire(x, t, pol);
if (!acqret)
err = acqret;
}
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(km_query);
@ -1701,14 +1703,14 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
int err = -EINVAL;
struct xfrm_mgr *km;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
if (km->new_mapping)
err = km->new_mapping(x, ipaddr, sport);
if (!err)
break;
}
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(km_new_mapping);
@ -1737,15 +1739,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
int ret;
struct xfrm_mgr *km;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
if (km->migrate) {
ret = km->migrate(sel, dir, type, m, num_migrate, k);
if (!ret)
err = ret;
}
}
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(km_migrate);
@ -1757,15 +1759,15 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
int ret;
struct xfrm_mgr *km;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
if (km->report) {
ret = km->report(net, proto, sel, addr);
if (!ret)
err = ret;
}
}
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(km_report);
@ -1789,14 +1791,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
goto out;
err = -EINVAL;
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
pol = km->compile_policy(sk, optname, data,
optlen, &err);
if (err >= 0)
break;
}
read_unlock(&xfrm_km_lock);
rcu_read_unlock();
if (err >= 0) {
xfrm_sk_policy_insert(sk, err, pol);
@ -1810,20 +1812,23 @@ out:
}
EXPORT_SYMBOL(xfrm_user_policy);
static DEFINE_SPINLOCK(xfrm_km_lock);
int xfrm_register_km(struct xfrm_mgr *km)
{
write_lock_bh(&xfrm_km_lock);
list_add_tail(&km->list, &xfrm_km_list);
write_unlock_bh(&xfrm_km_lock);
spin_lock_bh(&xfrm_km_lock);
list_add_tail_rcu(&km->list, &xfrm_km_list);
spin_unlock_bh(&xfrm_km_lock);
return 0;
}
EXPORT_SYMBOL(xfrm_register_km);
int xfrm_unregister_km(struct xfrm_mgr *km)
{
write_lock_bh(&xfrm_km_lock);
list_del(&km->list);
write_unlock_bh(&xfrm_km_lock);
spin_lock_bh(&xfrm_km_lock);
list_del_rcu(&km->list);
spin_unlock_bh(&xfrm_km_lock);
synchronize_rcu();
return 0;
}
EXPORT_SYMBOL(xfrm_unregister_km);
@ -1835,12 +1840,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
return -EINVAL;
if (unlikely(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
write_lock_bh(&xfrm_state_afinfo_lock);
spin_lock_bh(&xfrm_state_afinfo_lock);
if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
err = -ENOBUFS;
else
xfrm_state_afinfo[afinfo->family] = afinfo;
write_unlock_bh(&xfrm_state_afinfo_lock);
rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
spin_unlock_bh(&xfrm_state_afinfo_lock);
return err;
}
EXPORT_SYMBOL(xfrm_state_register_afinfo);
@ -1852,14 +1857,15 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
return -EINVAL;
if (unlikely(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
write_lock_bh(&xfrm_state_afinfo_lock);
spin_lock_bh(&xfrm_state_afinfo_lock);
if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
err = -EINVAL;
else
xfrm_state_afinfo[afinfo->family] = NULL;
RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
}
write_unlock_bh(&xfrm_state_afinfo_lock);
spin_unlock_bh(&xfrm_state_afinfo_lock);
synchronize_rcu();
return err;
}
EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
@ -1869,17 +1875,16 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
struct xfrm_state_afinfo *afinfo;
if (unlikely(family >= NPROTO))
return NULL;
read_lock(&xfrm_state_afinfo_lock);
afinfo = xfrm_state_afinfo[family];
rcu_read_lock();
afinfo = rcu_dereference(xfrm_state_afinfo[family]);
if (unlikely(!afinfo))
read_unlock(&xfrm_state_afinfo_lock);
rcu_read_unlock();
return afinfo;
}
static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
__releases(xfrm_state_afinfo_lock)
{
read_unlock(&xfrm_state_afinfo_lock);
rcu_read_unlock();
}
/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */

View file

@ -522,6 +522,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
copy_from_user_state(x, p);
if (attrs[XFRMA_SA_EXTRA_FLAGS])
x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
if ((err = attach_aead(&x->aead, &x->props.ealgo,
attrs[XFRMA_ALG_AEAD])))
goto error;
@ -782,48 +785,74 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
struct xfrm_usersa_info *p,
struct sk_buff *skb)
{
int ret = 0;
copy_to_user_state(x, p);
if (x->coaddr)
NLA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
if (x->lastused)
NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused);
if (x->aead)
NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead);
if (x->aalg) {
if (copy_to_user_auth(x->aalg, skb))
goto nla_put_failure;
NLA_PUT(skb, XFRMA_ALG_AUTH_TRUNC,
xfrm_alg_auth_len(x->aalg), x->aalg);
if (x->props.extra_flags) {
ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
x->props.extra_flags);
if (ret)
goto out;
}
if (x->ealg)
NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg);
if (x->calg)
NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
if (x->encap)
NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
if (x->tfcpad)
NLA_PUT_U32(skb, XFRMA_TFCPAD, x->tfcpad);
if (xfrm_mark_put(skb, &x->mark))
goto nla_put_failure;
if (x->replay_esn)
NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL,
xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn);
if (x->security && copy_sec_ctx(x->security, skb) < 0)
goto nla_put_failure;
return 0;
nla_put_failure:
return -EMSGSIZE;
if (x->coaddr) {
ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
if (ret)
goto out;
}
if (x->lastused) {
ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused);
if (ret)
goto out;
}
if (x->aead) {
ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead);
if (ret)
goto out;
}
if (x->aalg) {
ret = copy_to_user_auth(x->aalg, skb);
if (!ret)
ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC,
xfrm_alg_auth_len(x->aalg), x->aalg);
if (ret)
goto out;
}
if (x->ealg) {
ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg);
if (ret)
goto out;
}
if (x->calg) {
ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
if (ret)
goto out;
}
if (x->encap) {
ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
if (ret)
goto out;
}
if (x->tfcpad) {
ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad);
if (ret)
goto out;
}
ret = xfrm_mark_put(skb, &x->mark);
if (ret)
goto out;
if (x->replay_esn) {
ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
xfrm_replay_state_esn_len(x->replay_esn),
x->replay_esn);
if (ret)
goto out;
}
if (x->security)
ret = copy_sec_ctx(x->security, skb);
out:
return ret;
}
static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
@ -843,15 +872,12 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
p = nlmsg_data(nlh);
err = copy_to_user_state_extra(x, p, skb);
if (err)
goto nla_put_failure;
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return err;
}
static int xfrm_dump_sa_done(struct netlink_callback *cb)
@ -924,6 +950,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
struct xfrmu_spdinfo spc;
struct xfrmu_spdhinfo sph;
struct nlmsghdr *nlh;
int err;
u32 *f;
nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
@ -942,14 +969,15 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
sph.spdhcnt = si.spdhcnt;
sph.spdhmcnt = si.spdhmcnt;
NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
if (!err)
err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
@ -984,6 +1012,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
struct xfrmk_sadinfo si;
struct xfrmu_sadhinfo sh;
struct nlmsghdr *nlh;
int err;
u32 *f;
nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
@ -997,14 +1026,15 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
sh.sadhmcnt = si.sadhmcnt;
sh.sadhcnt = si.sadhcnt;
NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt);
NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh);
err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt);
if (!err)
err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
@ -1099,7 +1129,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
mark = xfrm_mark_get(attrs, &m);
if (p->info.seq) {
x = xfrm_find_acq_byseq(net, mark, p->info.seq);
if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
xfrm_state_put(x);
x = NULL;
}
@ -1459,9 +1489,8 @@ static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buf
static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
{
if (xp->security) {
if (xp->security)
return copy_sec_ctx(xp->security, skb);
}
return 0;
}
static inline size_t userpolicy_type_attrsize(void)
@ -1497,6 +1526,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
struct nlmsghdr *nlh;
int err;
nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq,
XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags);
@ -1505,22 +1535,19 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
p = nlmsg_data(nlh);
copy_to_user_policy(xp, p, dir);
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
if (copy_to_user_sec_ctx(xp, skb))
goto nlmsg_failure;
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
if (xfrm_mark_put(skb, &xp->mark))
goto nla_put_failure;
err = copy_to_user_tmpl(xp, skb);
if (!err)
err = copy_to_user_sec_ctx(xp, skb);
if (!err)
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int xfrm_dump_policy_done(struct netlink_callback *cb)
@ -1661,6 +1688,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
out:
xfrm_pol_put(xp);
if (delete && err == 0)
xfrm_garbage_collect(net);
return err;
}
@ -1710,6 +1739,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
{
struct xfrm_aevent_id *id;
struct nlmsghdr *nlh;
int err;
nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0);
if (nlh == NULL)
@ -1724,30 +1754,40 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
id->reqid = x->props.reqid;
id->flags = c->data.aevent;
if (x->replay_esn)
NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL,
xfrm_replay_state_esn_len(x->replay_esn),
x->replay_esn);
else
NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay);
if (x->replay_esn) {
err = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
xfrm_replay_state_esn_len(x->replay_esn),
x->replay_esn);
} else {
err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay),
&x->replay);
}
if (err)
goto out_cancel;
err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft);
if (err)
goto out_cancel;
NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft);
if (id->flags & XFRM_AE_RTHR)
NLA_PUT_U32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff);
if (id->flags & XFRM_AE_ETHR)
NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH,
x->replay_maxage * 10 / HZ);
if (xfrm_mark_put(skb, &x->mark))
goto nla_put_failure;
if (id->flags & XFRM_AE_RTHR) {
err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff);
if (err)
goto out_cancel;
}
if (id->flags & XFRM_AE_ETHR) {
err = nla_put_u32(skb, XFRMA_ETIMER_THRESH,
x->replay_maxage * 10 / HZ);
if (err)
goto out_cancel;
}
err = xfrm_mark_put(skb, &x->mark);
if (err)
goto out_cancel;
return nlmsg_end(skb, nlh);
nla_put_failure:
out_cancel:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
return err;
}
static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
@ -2171,7 +2211,7 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
const struct xfrm_migrate *mp;
struct xfrm_userpolicy_id *pol_id;
struct nlmsghdr *nlh;
int i;
int i, err;
nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0);
if (nlh == NULL)
@ -2183,21 +2223,25 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
memcpy(&pol_id->sel, sel, sizeof(pol_id->sel));
pol_id->dir = dir;
if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0))
goto nlmsg_failure;
if (copy_to_user_policy_type(type, skb) < 0)
goto nlmsg_failure;
if (k != NULL) {
err = copy_to_user_kmaddress(k, skb);
if (err)
goto out_cancel;
}
err = copy_to_user_policy_type(type, skb);
if (err)
goto out_cancel;
for (i = 0, mp = m ; i < num_migrate; i++, mp++) {
if (copy_to_user_migrate(mp, skb) < 0)
goto nlmsg_failure;
err = copy_to_user_migrate(mp, skb);
if (err)
goto out_cancel;
}
return nlmsg_end(skb, nlh);
nlmsg_failure:
out_cancel:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
return err;
}
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
@ -2277,9 +2321,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
[XFRMA_TFCPAD] = { .type = NLA_U32 },
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
};
static struct xfrm_link {
static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
@ -2313,7 +2358,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
struct xfrm_link *link;
const struct xfrm_link *link;
int type, err;
type = nlh->nlmsg_type;
@ -2370,6 +2415,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
{
struct xfrm_user_expire *ue;
struct nlmsghdr *nlh;
int err;
nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0);
if (nlh == NULL)
@ -2379,13 +2425,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
copy_to_user_state(x, &ue->state);
ue->hard = (c->data.hard != 0) ? 1 : 0;
if (xfrm_mark_put(skb, &x->mark))
goto nla_put_failure;
err = xfrm_mark_put(skb, &x->mark);
if (err)
return err;
return nlmsg_end(skb, nlh);
nla_put_failure:
return -EMSGSIZE;
}
static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
@ -2471,6 +2515,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
x->security->ctx_len);
if (x->coaddr)
l += nla_total_size(sizeof(*x->coaddr));
if (x->props.extra_flags)
l += nla_total_size(sizeof(x->props.extra_flags));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size(sizeof(u64));
@ -2486,7 +2532,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
struct nlmsghdr *nlh;
struct sk_buff *skb;
int len = xfrm_sa_len(x);
int headlen;
int headlen, err;
headlen = sizeof(*p);
if (c->event == XFRM_MSG_DELSA) {
@ -2501,8 +2547,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
return -ENOMEM;
nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0);
err = -EMSGSIZE;
if (nlh == NULL)
goto nla_put_failure;
goto out_free_skb;
p = nlmsg_data(nlh);
if (c->event == XFRM_MSG_DELSA) {
@ -2515,24 +2562,23 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
id->proto = x->id.proto;
attr = nla_reserve(skb, XFRMA_SA, sizeof(*p));
err = -EMSGSIZE;
if (attr == NULL)
goto nla_put_failure;
goto out_free_skb;
p = nla_data(attr);
}
if (copy_to_user_state_extra(x, p, skb))
goto nla_put_failure;
err = copy_to_user_state_extra(x, p, skb);
if (err)
goto out_free_skb;
nlmsg_end(skb, nlh);
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
nla_put_failure:
/* Somebody screwed up with xfrm_sa_len! */
WARN_ON(1);
out_free_skb:
kfree_skb(skb);
return -1;
return err;
}
static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c)
@ -2570,12 +2616,12 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
}
static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_tmpl *xt, struct xfrm_policy *xp,
int dir)
struct xfrm_tmpl *xt, struct xfrm_policy *xp)
{
__u32 seq = xfrm_get_acqseq();
struct xfrm_user_acquire *ua;
struct nlmsghdr *nlh;
__u32 seq = xfrm_get_acqseq();
int err;
nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0);
if (nlh == NULL)
@ -2585,31 +2631,29 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
memcpy(&ua->id, &x->id, sizeof(ua->id));
memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
copy_to_user_policy(xp, &ua->policy, dir);
copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT);
ua->aalgos = xt->aalgos;
ua->ealgos = xt->ealgos;
ua->calgos = xt->calgos;
ua->seq = x->km.seq = seq;
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
if (copy_to_user_state_sec_ctx(x, skb))
goto nlmsg_failure;
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
if (xfrm_mark_put(skb, &xp->mark))
goto nla_put_failure;
err = copy_to_user_tmpl(xp, skb);
if (!err)
err = copy_to_user_state_sec_ctx(x, skb);
if (!err)
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
struct xfrm_policy *xp, int dir)
struct xfrm_policy *xp)
{
struct net *net = xs_net(x);
struct sk_buff *skb;
@ -2618,7 +2662,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
if (skb == NULL)
return -ENOMEM;
if (build_acquire(skb, x, xt, xp, dir) < 0)
if (build_acquire(skb, x, xt, xp) < 0)
BUG();
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
@ -2697,8 +2741,9 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
int dir, const struct km_event *c)
{
struct xfrm_user_polexpire *upe;
struct nlmsghdr *nlh;
int hard = c->data.hard;
struct nlmsghdr *nlh;
int err;
nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0);
if (nlh == NULL)
@ -2706,22 +2751,20 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
upe = nlmsg_data(nlh);
copy_to_user_policy(xp, &upe->pol, dir);
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
if (copy_to_user_sec_ctx(xp, skb))
goto nlmsg_failure;
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
if (xfrm_mark_put(skb, &xp->mark))
goto nla_put_failure;
err = copy_to_user_tmpl(xp, skb);
if (!err)
err = copy_to_user_sec_ctx(xp, skb);
if (!err)
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
upe->hard = !!hard;
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
@ -2741,13 +2784,13 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
{
int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
struct net *net = xp_net(xp);
struct xfrm_userpolicy_info *p;
struct xfrm_userpolicy_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
int headlen;
int headlen, err;
headlen = sizeof(*p);
if (c->event == XFRM_MSG_DELPOLICY) {
@ -2763,8 +2806,9 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
return -ENOMEM;
nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0);
err = -EMSGSIZE;
if (nlh == NULL)
goto nlmsg_failure;
goto out_free_skb;
p = nlmsg_data(nlh);
if (c->event == XFRM_MSG_DELPOLICY) {
@ -2779,29 +2823,29 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
memcpy(&id->sel, &xp->selector, sizeof(id->sel));
attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p));
err = -EMSGSIZE;
if (attr == NULL)
goto nlmsg_failure;
goto out_free_skb;
p = nla_data(attr);
}
copy_to_user_policy(xp, p, dir);
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
if (xfrm_mark_put(skb, &xp->mark))
goto nla_put_failure;
err = copy_to_user_tmpl(xp, skb);
if (!err)
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (err)
goto out_free_skb;
nlmsg_end(skb, nlh);
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
nla_put_failure:
nlmsg_failure:
out_free_skb:
kfree_skb(skb);
return -1;
return err;
}
static int xfrm_notify_policy_flush(const struct km_event *c)
@ -2809,24 +2853,27 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
struct net *net = c->net;
struct nlmsghdr *nlh;
struct sk_buff *skb;
int err;
skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0);
err = -EMSGSIZE;
if (nlh == NULL)
goto nlmsg_failure;
if (copy_to_user_policy_type(c->data.type, skb) < 0)
goto nlmsg_failure;
goto out_free_skb;
err = copy_to_user_policy_type(c->data.type, skb);
if (err)
goto out_free_skb;
nlmsg_end(skb, nlh);
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
nlmsg_failure:
out_free_skb:
kfree_skb(skb);
return -1;
return err;
}
static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
@ -2869,14 +2916,14 @@ static int build_report(struct sk_buff *skb, u8 proto,
ur->proto = proto;
memcpy(&ur->sel, sel, sizeof(ur->sel));
if (addr)
NLA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
if (addr) {
int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
}
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int xfrm_send_report(struct net *net, u8 proto,

153
scripts/decode_stacktrace.sh Executable file
View file

@ -0,0 +1,153 @@
#!/bin/bash
# (c) 2014, Sasha Levin <sasha.levin@oracle.com>
#set -x
if [[ $# < 2 ]]; then
echo "Usage:"
echo " $0 [vmlinux] [base path] [modules path]"
exit 1
fi
vmlinux=$1
basepath=$2
modpath=$3
declare -A cache
declare -A modcache
parse_symbol() {
# The structure of symbol at this point is:
# ([name]+[offset]/[total length])
#
# For example:
# do_basic_setup+0x9c/0xbf
if [[ $module == "" ]] ; then
local objfile=$vmlinux
elif [[ "${modcache[$module]+isset}" == "isset" ]]; then
local objfile=${modcache[$module]}
else
[[ $modpath == "" ]] && return
local objfile=$(find "$modpath" -name $module.ko -print -quit)
[[ $objfile == "" ]] && return
modcache[$module]=$objfile
fi
# Remove the englobing parenthesis
symbol=${symbol#\(}
symbol=${symbol%\)}
# Strip the symbol name so that we could look it up
local name=${symbol%+*}
# Use 'nm vmlinux' to figure out the base address of said symbol.
# It's actually faster to call it every time than to load it
# all into bash.
if [[ "${cache[$module,$name]+isset}" == "isset" ]]; then
local base_addr=${cache[$module,$name]}
else
local base_addr=$(nm "$objfile" | grep -i ' t ' | awk "/ $name\$/ {print \$1}" | head -n1)
cache[$module,$name]="$base_addr"
fi
# Let's start doing the math to get the exact address into the
# symbol. First, strip out the symbol total length.
local expr=${symbol%/*}
# Now, replace the symbol name with the base address we found
# before.
expr=${expr/$name/0x$base_addr}
# Evaluate it to find the actual address
expr=$((expr))
local address=$(printf "%x\n" "$expr")
# Pass it to addr2line to get filename and line number
# Could get more than one result
if [[ "${cache[$module,$address]+isset}" == "isset" ]]; then
local code=${cache[$module,$address]}
else
local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address")
cache[$module,$address]=$code
fi
# addr2line doesn't return a proper error code if it fails, so
# we detect it using the value it prints so that we could preserve
# the offset/size into the function and bail out
if [[ $code == "??:0" ]]; then
return
fi
# Strip out the base of the path
code=${code//^$basepath/""}
# In the case of inlines, move everything to same line
code=${code//$'\n'/' '}
# Replace old address with pretty line numbers
symbol="$name ($code)"
}
decode_code() {
local scripts=`dirname "${BASH_SOURCE[0]}"`
echo "$1" | $scripts/decodecode
}
handle_line() {
local words
# Tokenize
read -a words <<<"$1"
# Remove hex numbers. Do it ourselves until it happens in the
# kernel
# We need to know the index of the last element before we
# remove elements because arrays are sparse
local last=$(( ${#words[@]} - 1 ))
for i in "${!words[@]}"; do
# Remove the address
if [[ ${words[$i]} =~ \[\<([^]]+)\>\] ]]; then
unset words[$i]
fi
# Format timestamps with tabs
if [[ ${words[$i]} == \[ && ${words[$i+1]} == *\] ]]; then
unset words[$i]
words[$i+1]=$(printf "[%13s\n" "${words[$i+1]}")
fi
done
if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
module=${words[$last]}
module=${module#\[}
module=${module%\]}
symbol=${words[$last-1]}
unset words[$last-1]
else
# The symbol is the last element, process it
symbol=${words[$last]}
module=
fi
unset words[$last]
parse_symbol # modifies $symbol
# Add up the line number to the symbol
echo "${words[@]}" "$symbol $module"
}
while read line; do
# Let's see if we have an address in the line
if [[ $line =~ \[\<([^]]+)\>\] ]] ||
[[ $line =~ [^+\ ]+\+0x[0-9a-f]+/0x[0-9a-f]+ ]]; then
# Translate address to line numbers
handle_line "$line"
# Is it a code line?
elif [[ $line == *Code:* ]]; then
decode_code "$line"
else
# Nothing special in this line, show it as is
echo "$line"
fi
done