Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next

Ben Hutchings says:

====================
1. Make EEH recovery work when using legacy interrupts, from Alexandre
   Rames.

2. Enable accelerated RFS for VLAN-tagged flows, from Andy Lutomirski.

3. Improve performance for non-TCP (and particularly UDP) traffic, which
   regressed in 3.10 when we switched to always allocating paged RX
   buffers.  Partly by Jon Cooper.

4. Some minor bug fixes to IOMMU detection, timestamping capabilities,
   and IRQ cleanup on the probe failure path.

I've dropped the RX skb cache, which improved some benchmarks but
perhaps needs some reworking to be more generally useful.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2013-06-25 16:11:41 -07:00
commit 3bae9db9aa
9 changed files with 114 additions and 59 deletions

View File

@ -21,8 +21,8 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
#include <linux/cpu_rmap.h>
#include <linux/aer.h>
#include <linux/interrupt.h>
#include "net_driver.h"
#include "efx.h"
#include "nic.h"
@ -1283,29 +1283,6 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
return count;
}
static int
efx_init_rx_cpu_rmap(struct efx_nic *efx, struct msix_entry *xentries)
{
#ifdef CONFIG_RFS_ACCEL
unsigned int i;
int rc;
efx->net_dev->rx_cpu_rmap = alloc_irq_cpu_rmap(efx->n_rx_channels);
if (!efx->net_dev->rx_cpu_rmap)
return -ENOMEM;
for (i = 0; i < efx->n_rx_channels; i++) {
rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
xentries[i].vector);
if (rc) {
free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
efx->net_dev->rx_cpu_rmap = NULL;
return rc;
}
}
#endif
return 0;
}
/* Probe the number and type of interrupts we are able to obtain, and
* the resulting numbers of channels and RX queues.
*/
@ -1359,11 +1336,6 @@ static int efx_probe_interrupts(struct efx_nic *efx)
efx->n_tx_channels = n_channels;
efx->n_rx_channels = n_channels;
}
rc = efx_init_rx_cpu_rmap(efx, xentries);
if (rc) {
pci_disable_msix(efx->pci_dev);
return rc;
}
for (i = 0; i < efx->n_channels; i++)
efx_get_channel(efx, i)->irq =
xentries[i].vector;
@ -1427,6 +1399,10 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq)
BUG_ON(efx->state == STATE_DISABLED);
if (efx->eeh_disabled_legacy_irq) {
enable_irq(efx->legacy_irq);
efx->eeh_disabled_legacy_irq = false;
}
if (efx->legacy_irq)
efx->legacy_irq_enabled = true;
efx_nic_enable_interrupts(efx);
@ -2365,7 +2341,7 @@ out:
* Returns 0 if the recovery mechanisms are unsuccessful.
* Returns a non-zero value otherwise.
*/
static int efx_try_recovery(struct efx_nic *efx)
int efx_try_recovery(struct efx_nic *efx)
{
#ifdef CONFIG_EEH
/* A PCI error can occur and not be seen by EEH because nothing
@ -2603,10 +2579,6 @@ static void efx_pci_remove_main(struct efx_nic *efx)
BUG_ON(efx->state == STATE_READY);
cancel_work_sync(&efx->reset_work);
#ifdef CONFIG_RFS_ACCEL
free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
efx->net_dev->rx_cpu_rmap = NULL;
#endif
efx_stop_interrupts(efx, false);
efx_nic_fini_interrupt(efx);
efx_fini_port(efx);

View File

@ -124,6 +124,7 @@ extern const struct ethtool_ops efx_ethtool_ops;
extern int efx_reset(struct efx_nic *efx, enum reset_type method);
extern void efx_reset_down(struct efx_nic *efx, enum reset_type method);
extern int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
extern int efx_try_recovery(struct efx_nic *efx);
/* Global */
extern void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);

View File

@ -1114,6 +1114,20 @@ static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
return 0;
}
int efx_ethtool_get_ts_info(struct net_device *net_dev,
struct ethtool_ts_info *ts_info)
{
struct efx_nic *efx = netdev_priv(net_dev);
/* Software capabilities */
ts_info->so_timestamping = (SOF_TIMESTAMPING_RX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE);
ts_info->phc_index = -1;
efx_ptp_get_ts_info(efx, ts_info);
return 0;
}
static int efx_ethtool_get_module_eeprom(struct net_device *net_dev,
struct ethtool_eeprom *ee,
u8 *data)
@ -1176,7 +1190,7 @@ const struct ethtool_ops efx_ethtool_ops = {
.get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size,
.get_rxfh_indir = efx_ethtool_get_rxfh_indir,
.set_rxfh_indir = efx_ethtool_set_rxfh_indir,
.get_ts_info = efx_ptp_get_ts_info,
.get_ts_info = efx_ethtool_get_ts_info,
.get_module_info = efx_ethtool_get_module_info,
.get_module_eeprom = efx_ethtool_get_module_eeprom,
};

View File

@ -1185,8 +1185,21 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
nhoff = skb_network_offset(skb);
if (skb->protocol != htons(ETH_P_IP))
if (skb->protocol == htons(ETH_P_8021Q)) {
EFX_BUG_ON_PARANOID(skb_headlen(skb) <
nhoff + sizeof(struct vlan_hdr));
if (((const struct vlan_hdr *)skb->data + nhoff)->
h_vlan_encapsulated_proto != htons(ETH_P_IP))
return -EPROTONOSUPPORT;
/* This is IP over 802.1q VLAN. We can't filter on the
* IP 5-tuple and the vlan together, so just strip the
* vlan header and filter on the IP part.
*/
nhoff += sizeof(struct vlan_hdr);
} else if (skb->protocol != htons(ETH_P_IP)) {
return -EPROTONOSUPPORT;
}
/* RFS must validate the IP header length before calling us */
EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip));

View File

@ -243,6 +243,7 @@ struct efx_rx_buffer {
#define EFX_RX_BUF_LAST_IN_PAGE 0x0001
#define EFX_RX_PKT_CSUMMED 0x0002
#define EFX_RX_PKT_DISCARD 0x0004
#define EFX_RX_PKT_TCP 0x0040
/**
* struct efx_rx_page_state - Page-based rx buffer state
@ -788,6 +789,7 @@ struct efx_nic {
const struct efx_nic_type *type;
int legacy_irq;
bool legacy_irq_enabled;
bool eeh_disabled_legacy_irq;
struct workqueue_struct *workqueue;
char workqueue_name[16];
struct work_struct reset_work;

View File

@ -14,6 +14,7 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/cpu_rmap.h>
#include "net_driver.h"
#include "bitfield.h"
#include "efx.h"
@ -1080,12 +1081,21 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
if (likely(rx_ev_pkt_ok)) {
/* If packet is marked as OK and packet type is TCP/IP or
* UDP/IP, then we can rely on the hardware checksum.
/* If packet is marked as OK then we can rely on the
* hardware checksum and classification.
*/
flags = (rx_ev_hdr_type == FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP ||
rx_ev_hdr_type == FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP) ?
EFX_RX_PKT_CSUMMED : 0;
flags = 0;
switch (rx_ev_hdr_type) {
case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP:
flags |= EFX_RX_PKT_TCP;
/* fall through */
case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP:
flags |= EFX_RX_PKT_CSUMMED;
/* fall through */
case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER:
case FSE_AZ_RX_EV_HDR_TYPE_OTHER:
break;
}
} else {
flags = efx_handle_rx_not_ok(rx_queue, event);
}
@ -1579,6 +1589,16 @@ static irqreturn_t efx_legacy_interrupt(int irq, void *dev_id)
efx_readd(efx, &reg, FR_BZ_INT_ISR0);
queues = EFX_EXTRACT_DWORD(reg, 0, 31);
/* Legacy interrupts are disabled too late by the EEH kernel
* code. Disable them earlier.
* If an EEH error occurred, the read will have returned all ones.
*/
if (EFX_DWORD_IS_ALL_ONES(reg) && efx_try_recovery(efx) &&
!efx->eeh_disabled_legacy_irq) {
disable_irq_nosync(efx->legacy_irq);
efx->eeh_disabled_legacy_irq = true;
}
/* Handle non-event-queue sources */
if (queues & (1U << efx->irq_level)) {
syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
@ -1687,6 +1707,7 @@ void efx_nic_push_rx_indir_table(struct efx_nic *efx)
int efx_nic_init_interrupt(struct efx_nic *efx)
{
struct efx_channel *channel;
unsigned int n_irqs;
int rc;
if (!EFX_INT_MODE_USE_MSI(efx)) {
@ -1707,7 +1728,19 @@ int efx_nic_init_interrupt(struct efx_nic *efx)
return 0;
}
#ifdef CONFIG_RFS_ACCEL
if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
efx->net_dev->rx_cpu_rmap =
alloc_irq_cpu_rmap(efx->n_rx_channels);
if (!efx->net_dev->rx_cpu_rmap) {
rc = -ENOMEM;
goto fail1;
}
}
#endif
/* Hook MSI or MSI-X interrupt */
n_irqs = 0;
efx_for_each_channel(channel, efx) {
rc = request_irq(channel->irq, efx_msi_interrupt,
IRQF_PROBE_SHARED, /* Not shared */
@ -1718,13 +1751,31 @@ int efx_nic_init_interrupt(struct efx_nic *efx)
"failed to hook IRQ %d\n", channel->irq);
goto fail2;
}
++n_irqs;
#ifdef CONFIG_RFS_ACCEL
if (efx->interrupt_mode == EFX_INT_MODE_MSIX &&
channel->channel < efx->n_rx_channels) {
rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
channel->irq);
if (rc)
goto fail2;
}
#endif
}
return 0;
fail2:
efx_for_each_channel(channel, efx)
#ifdef CONFIG_RFS_ACCEL
free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
efx->net_dev->rx_cpu_rmap = NULL;
#endif
efx_for_each_channel(channel, efx) {
if (n_irqs-- == 0)
break;
free_irq(channel->irq, &efx->channel[channel->channel]);
}
fail1:
return rc;
}
@ -1734,11 +1785,14 @@ void efx_nic_fini_interrupt(struct efx_nic *efx)
struct efx_channel *channel;
efx_oword_t reg;
#ifdef CONFIG_RFS_ACCEL
free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
efx->net_dev->rx_cpu_rmap = NULL;
#endif
/* Disable MSI/MSI-X interrupts */
efx_for_each_channel(channel, efx) {
if (channel->irq)
free_irq(channel->irq, &efx->channel[channel->channel]);
}
efx_for_each_channel(channel, efx)
free_irq(channel->irq, &efx->channel[channel->channel]);
/* ACK legacy interrupt */
if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)

View File

@ -254,8 +254,8 @@ extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf,
struct ethtool_ts_info;
extern void efx_ptp_probe(struct efx_nic *efx);
extern int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd);
extern int efx_ptp_get_ts_info(struct net_device *net_dev,
struct ethtool_ts_info *ts_info);
extern void efx_ptp_get_ts_info(struct efx_nic *efx,
struct ethtool_ts_info *ts_info);
extern bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
extern int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev);

View File

@ -1203,18 +1203,16 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init)
return 0;
}
int
efx_ptp_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info)
void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info)
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_ptp_data *ptp = efx->ptp_data;
if (!ptp)
return -EOPNOTSUPP;
return;
ts_info->so_timestamping = (SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE);
ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE);
ts_info->phc_index = ptp_clock_index(ptp->phc_clock);
ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON;
ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE |
@ -1224,7 +1222,6 @@ efx_ptp_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info)
1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT |
1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC |
1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
return 0;
}
int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd)

View File

@ -36,7 +36,7 @@
#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
/* Size of buffer allocated for skb header area. */
#define EFX_SKB_HEADERS 64u
#define EFX_SKB_HEADERS 128u
/* This is the percentage fill level below which new RX descriptors
* will be added to the RX descriptor ring.
@ -598,6 +598,8 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
/* Set the SKB flags */
skb_checksum_none_assert(skb);
if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED))
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (channel->type->receive_skb)
if (channel->type->receive_skb(channel, skb))
@ -627,7 +629,7 @@ void __efx_rx_packet(struct efx_channel *channel)
if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
if (!channel->type->receive_skb)
if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
else
efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
@ -675,7 +677,7 @@ static void efx_init_rx_recycle_ring(struct efx_nic *efx,
#ifdef CONFIG_PPC64
bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
#else
if (efx->pci_dev->dev.iommu_group)
if (iommu_present(&pci_bus_type))
bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
else
bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;