Merge branch 'hfi1' into merge-test

This commit is contained in:
Doug Ledford 2016-12-14 14:44:08 -05:00
commit 253f8b22e0
46 changed files with 1788 additions and 676 deletions

View File

@ -125,6 +125,7 @@ int node_affinity_init(void)
cpumask_weight(topology_sibling_cpumask(
cpumask_first(&node_affinity.proc.mask)
));
node_affinity.num_possible_nodes = num_possible_nodes();
node_affinity.num_online_nodes = num_online_nodes();
node_affinity.num_online_cpus = num_online_cpus();
@ -135,7 +136,7 @@ int node_affinity_init(void)
*/
init_real_cpu_mask();
hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
if (!hfi1_per_node_cntr)
return -ENOMEM;

View File

@ -70,14 +70,6 @@ struct cpu_mask_set {
uint gen;
};
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpumask real_cpu_mask;
/* spin lock to protect affinity struct */
spinlock_t lock;
};
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
@ -119,6 +111,7 @@ struct hfi1_affinity_node_list {
struct cpumask real_cpu_mask;
struct cpu_mask_set proc;
int num_core_siblings;
int num_possible_nodes;
int num_online_nodes;
int num_online_cpus;
struct mutex lock; /* protects affinity nodes */

View File

@ -8488,7 +8488,10 @@ static int do_8051_command(
*/
if (type == HCMD_WRITE_LCB_CSR) {
in_data |= ((*out_data) & 0xffffffffffull) << 8;
reg = ((((*out_data) >> 40) & 0xff) <<
/* must preserve COMPLETED - it is tied to hardware */
reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0);
reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK;
reg |= ((((*out_data) >> 40) & 0xff) <<
DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT)
| ((((*out_data) >> 48) & 0xffff) <<
DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
@ -9567,11 +9570,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
guid = ppd->guid;
guid = ppd->guids[HFI1_PORT_GUID_INDEX];
if (!guid) {
if (dd->base_guid)
guid = dd->base_guid + ppd->port - 1;
ppd->guid = guid;
ppd->guids[HFI1_PORT_GUID_INDEX] = guid;
}
/* Set linkinit_reason on power up per OPA spec */

View File

@ -415,6 +415,9 @@
#define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32
#define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0
#define ASIC_CFG_SCRATCH (ASIC + 0x000000000020)
#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08)
#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10)
#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18)
#define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050)
#define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308)
#define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull

View File

@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
return ret;
}
/* read the dc8051 memory */
static ssize_t dc8051_memory_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct hfi1_pportdata *ppd = private2ppd(file);
ssize_t rval;
void *tmp;
loff_t start, end;
/* the checks below expect the position to be positive */
if (*ppos < 0)
return -EINVAL;
tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL);
if (!tmp)
return -ENOMEM;
/*
* Fill in the requested portion of the temporary buffer from the
* 8051 memory. The 8051 memory read is done in terms of 8 bytes.
* Adjust start and end to fit. Skip reading anything if out of
* range.
*/
start = *ppos & ~0x7; /* round down */
if (start < DC8051_DATA_MEM_SIZE) {
end = (*ppos + count + 7) & ~0x7; /* round up */
if (end > DC8051_DATA_MEM_SIZE)
end = DC8051_DATA_MEM_SIZE;
rval = read_8051_data(ppd->dd, start, end - start,
(u64 *)(tmp + start));
if (rval)
goto done;
}
rval = simple_read_from_buffer(buf, count, ppos, tmp,
DC8051_DATA_MEM_SIZE);
done:
kfree(tmp);
return rval;
}
static ssize_t debugfs_lcb_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct hfi1_pportdata *ppd = private2ppd(file);
struct hfi1_devdata *dd = ppd->dd;
unsigned long total, csr_off;
u64 data;
if (*ppos < 0)
return -EINVAL;
/* only read 8 byte quantities */
if ((count % 8) != 0)
return -EINVAL;
/* offset must be 8-byte aligned */
if ((*ppos % 8) != 0)
return -EINVAL;
/* do nothing if out of range or zero count */
if (*ppos >= (LCB_END - LCB_START) || !count)
return 0;
/* reduce count if needed */
if (*ppos + count > LCB_END - LCB_START)
count = (LCB_END - LCB_START) - *ppos;
csr_off = LCB_START + *ppos;
for (total = 0; total < count; total += 8, csr_off += 8) {
if (read_lcb_csr(dd, csr_off, (u64 *)&data))
break; /* failed */
if (put_user(data, (unsigned long __user *)(buf + total)))
break;
}
*ppos += total;
return total;
}
static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hfi1_pportdata *ppd = private2ppd(file);
struct hfi1_devdata *dd = ppd->dd;
unsigned long total, csr_off, data;
if (*ppos < 0)
return -EINVAL;
/* only write 8 byte quantities */
if ((count % 8) != 0)
return -EINVAL;
/* offset must be 8-byte aligned */
if ((*ppos % 8) != 0)
return -EINVAL;
/* do nothing if out of range or zero count */
if (*ppos >= (LCB_END - LCB_START) || !count)
return 0;
/* reduce count if needed */
if (*ppos + count > LCB_END - LCB_START)
count = (LCB_END - LCB_START) - *ppos;
csr_off = LCB_START + *ppos;
for (total = 0; total < count; total += 8, csr_off += 8) {
if (get_user(data, (unsigned long __user *)(buf + total)))
break;
if (write_lcb_csr(dd, csr_off, data))
break; /* failed */
}
*ppos += total;
return total;
}
/*
* read the per-port QSFP data for ppd
*/
@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
qsfp2_debugfs_open, qsfp2_debugfs_release),
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL),
DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write),
};
static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)

View File

@ -793,8 +793,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
rvt_put_qp(qp);
}
}

View File

@ -207,6 +207,40 @@ done_asic:
/* magic character sequence that trails an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"
/* EPROM file types */
#define HFI1_EFT_PLATFORM_CONFIG 2
/* segment size - 128 KiB */
#define SEG_SIZE (128 * 1024)
struct hfi1_eprom_footer {
u32 oprom_size; /* size of the oprom, in bytes */
u16 num_table_entries;
u16 version; /* version of this footer */
u32 magic; /* must be last */
};
struct hfi1_eprom_table_entry {
u32 type; /* file type */
u32 offset; /* file offset from start of EPROM */
u32 size; /* file size, in bytes */
};
/*
* Calculate the max number of table entries that will fit within a directory
* buffer of size 'dir_size'.
*/
#define MAX_TABLE_ENTRIES(dir_size) \
(((dir_size) - sizeof(struct hfi1_eprom_footer)) / \
sizeof(struct hfi1_eprom_table_entry))
#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \
(sizeof(struct hfi1_eprom_table_entry) * (n)))
#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm')
#define FOOTER_VERSION 1
/*
* Read all of partition 1. The actual file is at the front. Adjust
* the returned size if a trailing image magic is found.
@ -241,6 +275,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
return 0;
}
/*
* The segment magic has been checked. There is a footer and table of
* contents present.
*
* directory is a u32 aligned buffer of size EP_PAGE_SIZE.
*/
static int read_segment_platform_config(struct hfi1_devdata *dd,
void *directory, void **data, u32 *size)
{
struct hfi1_eprom_footer *footer;
struct hfi1_eprom_table_entry *table;
struct hfi1_eprom_table_entry *entry;
void *buffer = NULL;
void *table_buffer = NULL;
int ret, i;
u32 directory_size;
u32 seg_base, seg_offset;
u32 bytes_available, ncopied, to_copy;
/* the footer is at the end of the directory */
footer = (struct hfi1_eprom_footer *)
(directory + EP_PAGE_SIZE - sizeof(*footer));
/* make sure the structure version is supported */
if (footer->version != FOOTER_VERSION)
return -EINVAL;
/* oprom size cannot be larger than a segment */
if (footer->oprom_size >= SEG_SIZE)
return -EINVAL;
/* the file table must fit in a segment with the oprom */
if (footer->num_table_entries >
MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size))
return -EINVAL;
/* find the file table start, which precedes the footer */
directory_size = DIRECTORY_SIZE(footer->num_table_entries);
if (directory_size <= EP_PAGE_SIZE) {
/* the file table fits into the directory buffer handed in */
table = (struct hfi1_eprom_table_entry *)
(directory + EP_PAGE_SIZE - directory_size);
} else {
/* need to allocate and read more */
table_buffer = kmalloc(directory_size, GFP_KERNEL);
if (!table_buffer)
return -ENOMEM;
ret = read_length(dd, SEG_SIZE - directory_size,
directory_size, table_buffer);
if (ret)
goto done;
table = table_buffer;
}
/* look for the platform configuration file in the table */
for (entry = NULL, i = 0; i < footer->num_table_entries; i++) {
if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) {
entry = &table[i];
break;
}
}
if (!entry) {
ret = -ENOENT;
goto done;
}
/*
* Sanity check on the configuration file size - it should never
* be larger than 4 KiB.
*/
if (entry->size > (4 * 1024)) {
dd_dev_err(dd, "Bad configuration file size 0x%x\n",
entry->size);
ret = -EINVAL;
goto done;
}
/* check for bogus offset and size that wrap when added together */
if (entry->offset + entry->size < entry->offset) {
dd_dev_err(dd,
"Bad configuration file start + size 0x%x+0x%x\n",
entry->offset, entry->size);
ret = -EINVAL;
goto done;
}
/* allocate the buffer to return */
buffer = kmalloc(entry->size, GFP_KERNEL);
if (!buffer) {
ret = -ENOMEM;
goto done;
}
/*
* Extract the file by looping over segments until it is fully read.
*/
seg_offset = entry->offset % SEG_SIZE;
seg_base = entry->offset - seg_offset;
ncopied = 0;
while (ncopied < entry->size) {
/* calculate data bytes available in this segment */
/* start with the bytes from the current offset to the end */
bytes_available = SEG_SIZE - seg_offset;
/* subtract off footer and table from segment 0 */
if (seg_base == 0) {
/*
* Sanity check: should not have a starting point
* at or within the directory.
*/
if (bytes_available <= directory_size) {
dd_dev_err(dd,
"Bad configuration file - offset 0x%x within footer+table\n",
entry->offset);
ret = -EINVAL;
goto done;
}
bytes_available -= directory_size;
}
/* calculate bytes wanted */
to_copy = entry->size - ncopied;
/* max out at the available bytes in this segment */
if (to_copy > bytes_available)
to_copy = bytes_available;
/*
* Read from the EPROM.
*
* The sanity check for entry->offset is done in read_length().
* The EPROM offset is validated against what the hardware
* addressing supports. In addition, if the offset is larger
* than the actual EPROM, it silently wraps. It will work
* fine, though the reader may not get what they expected
* from the EPROM.
*/
ret = read_length(dd, seg_base + seg_offset, to_copy,
buffer + ncopied);
if (ret)
goto done;
ncopied += to_copy;
/* set up for next segment */
seg_offset = footer->oprom_size;
seg_base += SEG_SIZE;
}
/* success */
ret = 0;
*data = buffer;
*size = entry->size;
done:
kfree(table_buffer);
if (ret)
kfree(buffer);
return ret;
}
/*
* Read the platform configuration file from the EPROM.
*
@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
* -EBUSY - not able to acquire access to the EPROM
* -ENOENT - no recognizable file written
* -ENOMEM - buffer could not be allocated
* -EINVAL - invalid EPROM contentents found
*/
int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
{
@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
if (ret)
return -EBUSY;
/* read the last page of P0 for the EPROM format magic */
ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
/* read the last page of the segment for the EPROM format magic */
ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
if (ret)
goto done;
/* last dword of P0 contains a magic indicator */
if (directory[EP_PAGE_DWORDS - 1] == 0) {
/* last dword of the segment contains a magic value */
if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) {
/* segment format */
ret = read_segment_platform_config(dd, directory, data, size);
} else {
/* partition format */
ret = read_partition_platform_config(dd, data, size);
goto done;
}
/* nothing recognized */
ret = -ENOENT;
done:
release_chip_resource(dd, CR_EPROM);
return ret;

View File

@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1;
const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 };
static const u8 all_pcie_serdes_broadcast = 0xe0;
static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
0,
SYSTEM_TABLE_MAX,
PORT_TABLE_MAX,
RX_PRESET_TABLE_MAX,
TX_PRESET_TABLE_MAX,
QSFP_ATTEN_TABLE_MAX,
VARIABLE_SETTINGS_TABLE_MAX
};
/* forwards */
static void dispose_one_firmware(struct firmware_details *fdet);
static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
@ -263,11 +273,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result)
u64 reg;
int count;
/* start the read at the given address */
reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
<< DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT)
| DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK;
/* step 1: set the address, clear enable */
reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
<< DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT;
write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg);
/* step 2: enable */
write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL,
reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK);
/* wait until ACCESS_COMPLETED is set */
count = 0;
@ -707,6 +719,9 @@ static int obtain_firmware(struct hfi1_devdata *dd)
&dd->pcidev->dev);
if (err) {
platform_config = NULL;
dd_dev_err(dd,
"%s: No default platform config file found\n",
__func__);
goto done;
}
dd->platform_config.data = platform_config->data;
@ -1761,8 +1776,17 @@ int parse_platform_config(struct hfi1_devdata *dd)
u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
int ret = -EINVAL; /* assume failure */
/*
* For integrated devices that did not fall back to the default file,
* the SI tuning information for active channels is acquired from the
* scratch register bitmap, thus there is no platform config to parse.
* Skip parsing in these situations.
*/
if (is_integrated(dd) && !platform_config_load)
return 0;
if (!dd->platform_config.data) {
dd_dev_info(dd, "%s: Missing config file\n", __func__);
dd_dev_err(dd, "%s: Missing config file\n", __func__);
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@ -1770,7 +1794,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
magic_num = *ptr;
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
dd_dev_info(dd, "%s: Bad config file\n", __func__);
dd_dev_err(dd, "%s: Bad config file\n", __func__);
goto bail;
}
@ -1797,9 +1821,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
header1 = *ptr;
header2 = *(ptr + 1);
if (header1 != ~header2) {
dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
__func__, (ptr - (u32 *)
dd->platform_config.data));
dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
__func__, (ptr - (u32 *)
dd->platform_config.data));
goto bail;
}
@ -1841,11 +1865,11 @@ int parse_platform_config(struct hfi1_devdata *dd)
table_length_dwords;
break;
default:
dd_dev_info(dd,
"%s: Unknown data table %d, offset %ld\n",
__func__, table_type,
(ptr - (u32 *)
dd->platform_config.data));
dd_dev_err(dd,
"%s: Unknown data table %d, offset %ld\n",
__func__, table_type,
(ptr - (u32 *)
dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@ -1865,11 +1889,11 @@ int parse_platform_config(struct hfi1_devdata *dd)
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
dd_dev_info(dd,
"%s: Unknown meta table %d, offset %ld\n",
__func__, table_type,
(ptr -
(u32 *)dd->platform_config.data));
dd_dev_err(dd,
"%s: Unknown meta table %d, offset %ld\n",
__func__, table_type,
(ptr -
(u32 *)dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@ -1884,10 +1908,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* Jump the table */
ptr += table_length_dwords;
if (crc != *ptr) {
dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
__func__, (ptr -
(u32 *)
dd->platform_config.data));
dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
__func__, (ptr -
(u32 *)dd->platform_config.data));
goto bail;
}
/* Jump the CRC DWORD */
@ -1901,6 +1924,84 @@ bail:
return ret;
}
static void get_integrated_platform_config_field(
struct hfi1_devdata *dd,
enum platform_config_table_type_encoding table_type,
int field_index, u32 *data)
{
struct hfi1_pportdata *ppd = dd->pport;
u8 *cache = ppd->qsfp_info.cache;
u32 tx_preset = 0;
switch (table_type) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX)
*data = ppd->max_power_class;
else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G)
*data = ppd->default_atten;
break;
case PLATFORM_CONFIG_PORT_TABLE:
if (field_index == PORT_TABLE_PORT_TYPE)
*data = ppd->port_type;
else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G)
*data = ppd->local_atten;
else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G)
*data = ppd->remote_atten;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY)
*data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >>
QSFP_RX_CDR_APPLY_SHIFT;
else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY)
*data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >>
QSFP_RX_EMP_APPLY_SHIFT;
else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY)
*data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >>
QSFP_RX_AMP_APPLY_SHIFT;
else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR)
*data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >>
QSFP_RX_CDR_SHIFT;
else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP)
*data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >>
QSFP_RX_EMP_SHIFT;
else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP)
*data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >>
QSFP_RX_AMP_SHIFT;
break;
case PLATFORM_CONFIG_TX_PRESET_TABLE:
if (cache[QSFP_EQ_INFO_OFFS] & 0x4)
tx_preset = ppd->tx_preset_eq;
else
tx_preset = ppd->tx_preset_noeq;
if (field_index == TX_PRESET_TABLE_PRECUR)
*data = (tx_preset & TX_PRECUR_SMASK) >>
TX_PRECUR_SHIFT;
else if (field_index == TX_PRESET_TABLE_ATTN)
*data = (tx_preset & TX_ATTN_SMASK) >>
TX_ATTN_SHIFT;
else if (field_index == TX_PRESET_TABLE_POSTCUR)
*data = (tx_preset & TX_POSTCUR_SMASK) >>
TX_POSTCUR_SHIFT;
else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY)
*data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >>
QSFP_TX_CDR_APPLY_SHIFT;
else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY)
*data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >>
QSFP_TX_EQ_APPLY_SHIFT;
else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR)
*data = (tx_preset & QSFP_TX_CDR_SMASK) >>
QSFP_TX_CDR_SHIFT;
else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ)
*data = (tx_preset & QSFP_TX_EQ_SMASK) >>
QSFP_TX_EQ_SHIFT;
break;
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
default:
break;
}
}
static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
int field, u32 *field_len_bits,
u32 *field_start_bits)
@ -1976,6 +2077,15 @@ int get_platform_config_field(struct hfi1_devdata *dd,
else
return -EINVAL;
if (is_integrated(dd) && !platform_config_load) {
/*
* Use saved configuration from ppd for integrated platforms
*/
get_integrated_platform_config_field(dd, table_type,
field_index, data);
return 0;
}
ret = get_platform_fw_field_metadata(dd, table_type, field_index,
&field_len_bits,
&field_start_bits);

View File

@ -512,6 +512,9 @@ struct rvt_sge_state;
#define HFI1_MIN_VLS_SUPPORTED 1
#define HFI1_MAX_VLS_SUPPORTED 8
#define HFI1_GUIDS_PER_PORT 5
#define HFI1_PORT_GUID_INDEX 0
static inline void incr_cntr64(u64 *cntr)
{
if (*cntr < (u64)-1LL)
@ -579,11 +582,20 @@ struct hfi1_pportdata {
struct kobject vl2mtu_kobj;
/* PHY support */
u32 port_type;
struct qsfp_data qsfp_info;
/* Values for SI tuning of SerDes */
u32 port_type;
u32 tx_preset_eq;
u32 tx_preset_noeq;
u32 rx_preset;
u8 local_atten;
u8 remote_atten;
u8 default_atten;
u8 max_power_class;
/* GUIDs for this interface, in host order, guids[0] is a port guid */
u64 guids[HFI1_GUIDS_PER_PORT];
/* GUID for this interface, in host order */
u64 guid;
/* GUID for peer interface, in host order */
u64 neighbor_guid;
@ -848,32 +860,29 @@ struct hfi1_devdata {
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
resource_size_t physaddr;
/* receive context data */
struct hfi1_ctxtdata **rcd;
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
struct send_context_info *send_contexts;
/* map hardware send contexts to software index */
u8 *hw_to_sw;
/* spinlock for allocating and releasing send context resources */
spinlock_t sc_lock;
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* lock for pio_map */
spinlock_t pio_map_lock;
/* Send Context initialization lock. */
spinlock_t sc_init_lock;
/* lock for sdma_map */
spinlock_t sde_map_lock;
/* array of kernel send contexts */
struct send_context **kernel_send_context;
/* array of vl maps */
struct pio_vl_map __rcu *pio_map;
/* seqlock for sc2vl */
seqlock_t sc2vl_lock;
u64 sc2vl[4];
/* Send Context initialization lock. */
spinlock_t sc_init_lock;
/* default flags to last descriptor */
u64 default_desc1;
/* fields common to all SDMA engines */
/* default flags to last descriptor */
u64 default_desc1;
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
dma_addr_t sdma_heads_phys;
void *sdma_pad_dma; /* DMA'ed by chip */
@ -884,8 +893,6 @@ struct hfi1_devdata {
u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
/* lock for sdma_map */
spinlock_t sde_map_lock;
/* array of engines sized by num_sdma */
struct sdma_engine *per_sdma;
/* array of vl maps */
@ -894,14 +901,11 @@ struct hfi1_devdata {
wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count;
u32 lcb_access_count; /* count of LCB users */
/* common data between shared ASIC HFIs in this OS */
struct hfi1_asic_data *asic_data;
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
struct hfi1_pportdata *pport;
/* mem-mapped pointer to base of PIO buffers */
void __iomem *piobase;
/*
@ -918,20 +922,13 @@ struct hfi1_devdata {
/* send context numbers and sizes for each type */
struct sc_config_sizes sc_sizes[SC_MAX];
u32 lcb_access_count; /* count of LCB users */
char *boardname; /* human readable board info */
/* device (not port) flags, basically device capabilities */
u32 flags;
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
/* percpu int_counter */
u64 __percpu *int_counter;
u64 __percpu *rcv_limit;
u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
@ -946,6 +943,7 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
u32 freezelen; /* max length of freezemsg */
u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@ -967,7 +965,6 @@ struct hfi1_devdata {
* IB link status cheaply
*/
struct hfi1_status *status;
u32 freezelen; /* max length of freezemsg */
/* revision register shadow */
u64 revision;
@ -995,6 +992,8 @@ struct hfi1_devdata {
u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
/* default link down value (poll/sleep) */
u8 link_default;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@ -1030,8 +1029,6 @@ struct hfi1_devdata {
u8 hfi1_id;
/* implementation code */
u8 icode;
/* default link down value (poll/sleep) */
u8 link_default;
/* vAU of this device */
u8 vau;
/* vCU of this device */
@ -1042,27 +1039,17 @@ struct hfi1_devdata {
u16 vl15_init;
/* Misc small ints */
/* Number of physical ports available */
u8 num_pports;
/* Lowest context number which can be used by user processes */
u8 first_user_ctxt;
u8 n_krcv_queues;
u8 qos_shift;
u8 qpn_mask;
u16 rhf_offset; /* offset of RHF within receive header entry */
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
struct platform_config_cache pcfg_cache;
struct diag_client *diag_client;
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@ -1077,6 +1064,9 @@ struct hfi1_devdata {
struct rcv_array_data rcv_entries;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
/*
* 64 bit synthetic counters
*/
@ -1109,11 +1099,11 @@ struct hfi1_devdata {
struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
atomic_t drop_packet;
u8 do_drop;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
/*
* Software counters for the status bits defined by the
@ -1136,47 +1126,70 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
/* receive interrupt functions */
rhf_rcv_function_ptr *rhf_rcv_function_map;
/* receive interrupt function */
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
/* Save the enabled LCB error bits */
u64 lcb_err_en;
/*
* Handlers for outgoing data so that snoop/capture does not
* have to have its hooks in the send path
*/
send_routine process_pio_send;
send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
struct hfi1_pportdata *pport;
/* receive context data */
struct hfi1_ctxtdata **rcd;
u64 __percpu *int_counter;
/* device (not port) flags, basically device capabilities */
u16 flags;
/* Number of physical ports available */
u8 num_pports;
/* Lowest context number which can be used by user processes */
u8 first_user_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
u64 sc2vl[4];
/* receive interrupt functions */
rhf_rcv_function_ptr *rhf_rcv_function_map;
u64 __percpu *rcv_limit;
u16 rhf_offset; /* offset of RHF within receive header entry */
/* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1;
u8 oui2;
u8 oui3;
u8 dc_shutdown;
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
u32 rcv_ovfl_cnt;
wait_queue_head_t event_queue;
/* Save the enabled LCB error bits */
u64 lcb_err_en;
u8 dc_shutdown;
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_dma;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
u32 rcv_ovfl_cnt;
/* Serialize ASPM enable/disable between multiple verbs contexts */
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
struct kobject kobj;
};
@ -1632,6 +1645,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index)
return ret;
}
/*
* Return the indexed GUID from the port GUIDs table.
*/
static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
WARN_ON(index >= HFI1_GUIDS_PER_PORT);
return cpu_to_be64(ppd->guids[index]);
}
/*
* Called by readers of cc_state only, must call under rcu_read_lock().
*/
@ -2003,6 +2027,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd)
return i2c_target(dd->hfi1_id);
}
/* Is this device integrated or discrete? */
static inline bool is_integrated(struct hfi1_devdata *dd)
{
return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
}
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))

View File

@ -64,6 +64,7 @@ struct sdma_engine;
/**
* struct iowait - linkage for delayed progress/waiting
* @list: used to add/insert into QP/PQ wait lists
* @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
@ -91,6 +92,11 @@ struct sdma_engine;
* so sleeping is not allowed.
*
* The wait_dma member along with the iow
*
* The lock field is used by waiters to record
* the seqlock_t that guards the list head.
* Waiters explicity know that, but the destroy
* code that unwaits QPs does not.
*/
struct iowait {
@ -103,6 +109,7 @@ struct iowait {
unsigned seq);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@ -141,6 +148,7 @@ static inline void iowait_init(
void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
wait->lock = NULL;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);

View File

@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
ni = (struct opa_node_info *)data;
/* GUID 0 is illegal */
if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
ni->base_version = OPA_MGMT_BASE_VERSION;
ni->class_version = OPA_SMI_CLASS_VERSION;
ni->node_type = 1; /* channel adapter */
ni->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
ni->system_image_guid = ib_hfi1_sys_image_guid;
/* Use first-port GUID as node */
ni->node_guid = cpu_to_be64(dd->pport->guid);
ni->node_guid = ibdev->node_guid;
ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
ni->device_id = cpu_to_be16(dd->pcidev->device);
ni->revision = cpu_to_be32(dd->minrev);
@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
/* GUID 0 is illegal */
if (smp->attr_mod || pidx >= dd->num_pports ||
dd->pport[pidx].guid == 0)
ibdev->node_guid == 0 ||
get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
else
nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
return reply((struct ib_mad_hdr *)smp);
}
nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
nip->base_version = OPA_MGMT_BASE_VERSION;
nip->class_version = OPA_SMI_CLASS_VERSION;
nip->node_type = 1; /* channel adapter */
nip->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
nip->sys_guid = ib_hfi1_sys_image_guid;
/* Use first-port GUID as node */
nip->node_guid = cpu_to_be64(dd->pport->guid);
nip->node_guid = ibdev->node_guid;
nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
nip->device_id = cpu_to_be16(dd->pcidev->device);
nip->revision = cpu_to_be32(dd->minrev);

View File

@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler,
struct list_head *del_list);
static void handle_remove(struct work_struct *work);
static struct mmu_notifier_ops mn_opts = {
static const struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
.invalidate_range_start = mmu_notifier_range_start,
};

View File

@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->hw_context = hw_context;
cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
sc->size = sc->credits * PIO_BLOCK_SIZE;
/* PIO Send Memory Address details */
#define PIO_ADDR_CONTEXT_MASK 0xfful
@ -1249,6 +1250,7 @@ int sc_enable(struct send_context *sc)
sc->free = 0;
sc->alloc_free = 0;
sc->fill = 0;
sc->fill_wrap = 0;
sc->sr_head = 0;
sc->sr_tail = 0;
sc->flags = 0;
@ -1392,7 +1394,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
unsigned long flags;
unsigned long avail;
unsigned long blocks = dwords_to_blocks(dw_len);
unsigned long start_fill;
u32 fill_wrap;
int trycount = 0;
u32 head, next;
@ -1417,9 +1419,7 @@ retry:
(sc->fill - sc->alloc_free);
if (blocks > avail) {
/* still no room, actively update */
spin_unlock_irqrestore(&sc->alloc_lock, flags);
sc_release_update(sc);
spin_lock_irqsave(&sc->alloc_lock, flags);
sc->alloc_free = ACCESS_ONCE(sc->free);
trycount++;
goto retry;
@ -1435,8 +1435,11 @@ retry:
head = sc->sr_head;
/* "allocate" the buffer */
start_fill = sc->fill;
sc->fill += blocks;
fill_wrap = sc->fill_wrap;
sc->fill_wrap += blocks;
if (sc->fill_wrap >= sc->credits)
sc->fill_wrap = sc->fill_wrap - sc->credits;
/*
* Fill the parts that the releaser looks at before moving the head.
@ -1465,11 +1468,8 @@ retry:
spin_unlock_irqrestore(&sc->alloc_lock, flags);
/* finish filling in the buffer outside the lock */
pbuf->start = sc->base_addr + ((start_fill % sc->credits)
* PIO_BLOCK_SIZE);
pbuf->size = sc->credits * PIO_BLOCK_SIZE;
pbuf->end = sc->base_addr + pbuf->size;
pbuf->block_count = blocks;
pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
pbuf->end = sc->base_addr + sc->size;
pbuf->qw_written = 0;
pbuf->carry_bytes = 0;
pbuf->carry.val64 = 0;
@ -1580,6 +1580,7 @@ static void sc_piobufavail(struct send_context *sc)
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
qps[n++] = qp;
}
@ -2035,28 +2036,17 @@ freesc15:
int init_credit_return(struct hfi1_devdata *dd)
{
int ret;
int num_numa;
int i;
num_numa = num_online_nodes();
/* enforce the expectation that the numas are compact */
for (i = 0; i < num_numa; i++) {
if (!node_online(i)) {
dd_dev_err(dd, "NUMA nodes are not compact\n");
ret = -EINVAL;
goto done;
}
}
dd->cr_base = kcalloc(
num_numa,
node_affinity.num_possible_nodes,
sizeof(struct credit_return_base),
GFP_KERNEL);
if (!dd->cr_base) {
ret = -ENOMEM;
goto done;
}
for (i = 0; i < num_numa; i++) {
for_each_node_with_cpus(i) {
int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
set_dev_node(&dd->pcidev->dev, i);
@ -2083,14 +2073,11 @@ done:
void free_credit_return(struct hfi1_devdata *dd)
{
int num_numa;
int i;
if (!dd->cr_base)
return;
num_numa = num_online_nodes();
for (i = 0; i < num_numa; i++) {
for (i = 0; i < node_affinity.num_possible_nodes; i++) {
if (dd->cr_base[i].va) {
dma_free_coherent(&dd->pcidev->dev,
TXE_NUM_CONTEXTS *

View File

@ -83,53 +83,55 @@ struct pio_buf {
void *arg; /* argument for cb */
void __iomem *start; /* buffer start address */
void __iomem *end; /* context end address */
unsigned long size; /* context size, in bytes */
unsigned long sent_at; /* buffer is sent when <= free */
u32 block_count; /* size of buffer, in blocks */
u32 qw_written; /* QW written so far */
u32 carry_bytes; /* number of valid bytes in carry */
union mix carry; /* pending unwritten bytes */
u16 qw_written; /* QW written so far */
u8 carry_bytes; /* number of valid bytes in carry */
};
/* cache line aligned pio buffer array */
union pio_shadow_ring {
struct pio_buf pbuf;
u64 unused[16]; /* cache line spacer */
} ____cacheline_aligned;
/* per-NUMA send context */
struct send_context {
/* read-only after init */
struct hfi1_devdata *dd; /* device */
void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
void __iomem *base_addr; /* start of PIO memory */
u32 __percpu *buffers_allocated;/* count of buffers allocated */
u32 size; /* context size, in bytes */
volatile __le64 *hw_free; /* HW free counter */
struct work_struct halt_work; /* halted context work queue entry */
unsigned long flags; /* flags */
int node; /* context home node */
int type; /* context type */
u32 sw_index; /* software index number */
u32 hw_context; /* hardware context number */
u32 credits; /* number of blocks in context */
u32 sr_size; /* size of the shadow ring */
u32 group; /* credit return group */
u16 flags; /* flags */
u8 type; /* context type */
u8 sw_index; /* software index number */
u8 hw_context; /* hardware context number */
u8 group; /* credit return group */
/* allocator fields */
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
u32 sr_head; /* shadow ring head */
unsigned long fill; /* official alloc count */
unsigned long alloc_free; /* copy of free (less cache thrash) */
u32 sr_head; /* shadow ring head */
u32 fill_wrap; /* tracks fill within ring */
u32 credits; /* number of blocks in context */
/* adding a new field here would make it part of this cacheline */
/* releaser fields */
spinlock_t release_lock ____cacheline_aligned_in_smp;
unsigned long free; /* official free count */
u32 sr_tail; /* shadow ring tail */
unsigned long free; /* official free count */
volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
u64 credit_ctrl; /* cache for credit control */
u32 credit_intr_count; /* count of credit intr users */
u32 __percpu *buffers_allocated;/* count of buffers allocated */
u64 credit_ctrl; /* cache for credit control */
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
struct work_struct halt_work; /* halted context work queue entry */
};
/* send context flags */

View File

@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
dest -= pbuf->sc->size;
dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
dest -= pbuf->sc->size;
dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
dest -= pbuf->sc->size;
dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
dest -= pbuf->size;
dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
dest += sizeof(u64);
}
dest -= pbuf->size;
dend -= pbuf->size;
dest -= pbuf->sc->size;
dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
*/
/* adjust if we've wrapped */
if (dest >= pbuf->end)
dest -= pbuf->size;
dest -= pbuf->sc->size;
/* jump to SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
dest -= pbuf->size;
dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;

View File

@ -49,6 +49,90 @@
#include "efivar.h"
#include "eprom.h"
static int validate_scratch_checksum(struct hfi1_devdata *dd)
{
u64 checksum = 0, temp_scratch = 0;
int i, j, version;
temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
/* Prevent power on default of all zeroes from passing checksum */
if (!version)
return 0;
/*
* ASIC scratch 0 only contains the checksum and bitmap version as
* fields of interest, both of which are handled separately from the
* loop below, so skip it
*/
checksum += version;
for (i = 1; i < ASIC_NUM_SCRATCH; i++) {
temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i));
for (j = sizeof(u64); j != 0; j -= 2) {
checksum += (temp_scratch & 0xFFFF);
temp_scratch >>= 16;
}
}
while (checksum >> 16)
checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16);
temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
temp_scratch &= CHECKSUM_SMASK;
temp_scratch >>= CHECKSUM_SHIFT;
if (checksum + temp_scratch == 0xFFFF)
return 1;
return 0;
}
static void save_platform_config_fields(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
u64 temp_scratch = 0, temp_dest = 0;
temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1);
temp_dest = temp_scratch &
(dd->hfi1_id ? PORT1_PORT_TYPE_SMASK :
PORT0_PORT_TYPE_SMASK);
ppd->port_type = temp_dest >>
(dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT :
PORT0_PORT_TYPE_SHIFT);
temp_dest = temp_scratch &
(dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK :
PORT0_LOCAL_ATTEN_SMASK);
ppd->local_atten = temp_dest >>
(dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT :
PORT0_LOCAL_ATTEN_SHIFT);
temp_dest = temp_scratch &
(dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK :
PORT0_REMOTE_ATTEN_SMASK);
ppd->remote_atten = temp_dest >>
(dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT :
PORT0_REMOTE_ATTEN_SHIFT);
temp_dest = temp_scratch &
(dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK :
PORT0_DEFAULT_ATTEN_SMASK);
ppd->default_atten = temp_dest >>
(dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT :
PORT0_DEFAULT_ATTEN_SHIFT);
temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 :
ASIC_CFG_SCRATCH_2);
ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT;
ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT;
ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT;
ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >>
QSFP_MAX_POWER_SHIFT;
}
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd)
u8 *temp_platform_config = NULL;
u32 esize;
ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
&esize);
if (!ret) {
/* success */
size = esize;
goto success;
if (is_integrated(dd)) {
if (validate_scratch_checksum(dd)) {
save_platform_config_fields(dd);
return;
}
dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
__func__);
dd_dev_err(dd,
"%s: Please update your BIOS to support active channels\n",
__func__);
} else {
ret = eprom_read_platform_config(dd,
(void **)&temp_platform_config,
&esize);
if (!ret) {
/* success */
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = esize;
return;
}
/* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
if (!ret) {
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
return;
}
}
/* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
if (!ret)
goto success;
dd_dev_info(dd,
"%s: Failed to get platform config from UEFI, falling back to request firmware\n",
__func__);
dd_dev_err(dd,
"%s: Failed to get platform config, falling back to sub-optimal default file\n",
__func__);
/* fall back to request firmware */
platform_config_load = 1;
return;
success:
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
}
void free_platform_config(struct hfi1_devdata *dd)
{
if (!platform_config_load) {
/*
* was loaded from EFI, release memory
* allocated by read_efi_var
* was loaded from EFI or the EPROM, release memory
* allocated by read_efi_var/eprom_read_platform_config
*/
kfree(dd->platform_config.data);
}
@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd)
void get_port_type(struct hfi1_pportdata *ppd)
{
int ret;
u32 temp;
ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
PORT_TABLE_PORT_TYPE, &ppd->port_type,
PORT_TABLE_PORT_TYPE, &temp,
4);
if (ret)
if (ret) {
ppd->port_type = PORT_TYPE_UNKNOWN;
return;
}
ppd->port_type = temp;
}
int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
@ -538,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
}
}
/*
* Return a special SerDes setting for low power AOC cables. The power class
* threshold and setting being used were all found by empirical testing.
*
* Summary of the logic:
*
* if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4)
* return 0xe
* return 0; // leave at default
*/
static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
{
u8 *cache = ppd->qsfp_info.cache;
int power_class;
/* QSFP only */
if (ppd->port_type != PORT_TYPE_QSFP)
return 0; /* leave at default */
/* active optical cables only */
switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
case 0x0 ... 0x9: /* fallthrough */
case 0xC: /* fallthrough */
case 0xE:
/* active AOC */
power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
if (power_class < QSFP_POWER_CLASS_4)
return 0xe;
}
return 0; /* leave at default */
}
static void apply_tunings(
struct hfi1_pportdata *ppd, u32 tx_preset_index,
u8 tuning_method, u32 total_atten, u8 limiting_active)
@ -606,7 +737,17 @@ static void apply_tunings(
tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
postcur = tx_preset;
config_data = precur | (attn << 8) | (postcur << 16);
/*
* NOTES:
* o The aoc_low_power_setting is applied to all lanes even
* though only lane 0's value is examined by the firmware.
* o A lingering low power setting after a cable swap does
* not occur. On cable unplug the 8051 is reset and
* restarted on cable insert. This resets all settings to
* their default, erasing any previous low power setting.
*/
config_data = precur | (attn << 8) | (postcur << 16) |
(aoc_low_power_setting(ppd) << 24);
apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
"Applying TX settings");

View File

@ -168,16 +168,6 @@ struct platform_config_cache {
struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX];
};
static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
0,
SYSTEM_TABLE_MAX,
PORT_TABLE_MAX,
RX_PRESET_TABLE_MAX,
TX_PRESET_TABLE_MAX,
QSFP_ATTEN_TABLE_MAX,
VARIABLE_SETTINGS_TABLE_MAX
};
/* This section defines default values and encodings for the
* fields defined for each table above
*/
@ -295,6 +285,123 @@ enum link_tuning_encoding {
OPA_UNKNOWN_TUNING
};
/*
* Shifts and masks for the link SI tuning values stuffed into the ASIC scratch
* registers for integrated platforms
*/
#define PORT0_PORT_TYPE_SHIFT 0
#define PORT0_LOCAL_ATTEN_SHIFT 4
#define PORT0_REMOTE_ATTEN_SHIFT 10
#define PORT0_DEFAULT_ATTEN_SHIFT 32
#define PORT1_PORT_TYPE_SHIFT 16
#define PORT1_LOCAL_ATTEN_SHIFT 20
#define PORT1_REMOTE_ATTEN_SHIFT 26
#define PORT1_DEFAULT_ATTEN_SHIFT 40
#define PORT0_PORT_TYPE_MASK 0xFUL
#define PORT0_LOCAL_ATTEN_MASK 0x3FUL
#define PORT0_REMOTE_ATTEN_MASK 0x3FUL
#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL
#define PORT1_PORT_TYPE_MASK 0xFUL
#define PORT1_LOCAL_ATTEN_MASK 0x3FUL
#define PORT1_REMOTE_ATTEN_MASK 0x3FUL
#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL
#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \
PORT0_PORT_TYPE_SHIFT)
#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \
PORT0_LOCAL_ATTEN_SHIFT)
#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \
PORT0_REMOTE_ATTEN_SHIFT)
#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \
PORT0_DEFAULT_ATTEN_SHIFT)
#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \
PORT1_PORT_TYPE_SHIFT)
#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \
PORT1_LOCAL_ATTEN_SHIFT)
#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \
PORT1_REMOTE_ATTEN_SHIFT)
#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \
PORT1_DEFAULT_ATTEN_SHIFT)
#define QSFP_MAX_POWER_SHIFT 0
#define TX_NO_EQ_SHIFT 4
#define TX_EQ_SHIFT 25
#define RX_SHIFT 46
#define QSFP_MAX_POWER_MASK 0xFUL
#define TX_NO_EQ_MASK 0x1FFFFFUL
#define TX_EQ_MASK 0x1FFFFFUL
#define RX_MASK 0xFFFFUL
#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \
QSFP_MAX_POWER_SHIFT)
#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT)
#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT)
#define RX_SMASK (RX_MASK << RX_SHIFT)
#define TX_PRECUR_SHIFT 0
#define TX_ATTN_SHIFT 4
#define QSFP_TX_CDR_APPLY_SHIFT 9
#define QSFP_TX_EQ_APPLY_SHIFT 10
#define QSFP_TX_CDR_SHIFT 11
#define QSFP_TX_EQ_SHIFT 12
#define TX_POSTCUR_SHIFT 16
#define TX_PRECUR_MASK 0xFUL
#define TX_ATTN_MASK 0x1FUL
#define QSFP_TX_CDR_APPLY_MASK 0x1UL
#define QSFP_TX_EQ_APPLY_MASK 0x1UL
#define QSFP_TX_CDR_MASK 0x1UL
#define QSFP_TX_EQ_MASK 0xFUL
#define TX_POSTCUR_MASK 0x1FUL
#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT)
#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT)
#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \
QSFP_TX_CDR_APPLY_SHIFT)
#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \
QSFP_TX_EQ_APPLY_SHIFT)
#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT)
#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT)
#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT)
#define QSFP_RX_CDR_APPLY_SHIFT 0
#define QSFP_RX_EMP_APPLY_SHIFT 1
#define QSFP_RX_AMP_APPLY_SHIFT 2
#define QSFP_RX_CDR_SHIFT 3
#define QSFP_RX_EMP_SHIFT 4
#define QSFP_RX_AMP_SHIFT 8
#define QSFP_RX_CDR_APPLY_MASK 0x1UL
#define QSFP_RX_EMP_APPLY_MASK 0x1UL
#define QSFP_RX_AMP_APPLY_MASK 0x1UL
#define QSFP_RX_CDR_MASK 0x1UL
#define QSFP_RX_EMP_MASK 0xFUL
#define QSFP_RX_AMP_MASK 0x3UL
#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \
QSFP_RX_CDR_APPLY_SHIFT)
#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \
QSFP_RX_EMP_APPLY_SHIFT)
#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \
QSFP_RX_AMP_APPLY_SHIFT)
#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT)
#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT)
#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT)
#define BITMAP_VERSION 1
#define BITMAP_VERSION_SHIFT 44
#define BITMAP_VERSION_MASK 0xFUL
#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \
BITMAP_VERSION_SHIFT)
#define CHECKSUM_SHIFT 48
#define CHECKSUM_MASK 0xFFFFUL
#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT)
/* platform.c */
void get_platform_config(struct hfi1_devdata *dd);
void free_platform_config(struct hfi1_devdata *dd);

View File

@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp)
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
seqlock_t *lock = priv->s_iowait.lock;
write_seqlock_irqsave(&dev->iowait_lock, flags);
if (!lock)
return;
write_seqlock_irqsave(lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
write_sequnlock_irqrestore(lock, flags);
}
static inline int opa_mtu_enum_to_int(int mtu)
@ -543,6 +546,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);

View File

@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else {
/* COMPARE_SWAP or FETCH_ADD */
qp->s_cur_sge = NULL;
ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
qp->s_cur_sge = &qp->s_ack_rdma_sge;
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
@ -335,7 +335,7 @@ normal:
*/
qp->s_ack_state = OP(SEND_ONLY);
qp->s_flags &= ~RVT_S_ACK_PENDING;
qp->s_cur_sge = NULL;
ps->s_txreq->ss = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
@ -351,7 +351,7 @@ normal:
qp->s_rdma_ack_cnt++;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
qp->s_cur_size = len;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
/* pbc */
ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
@ -801,8 +801,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
qp->s_cur_sge = ss;
qp->s_cur_size = len;
ps->s_txreq->ss = ss;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(
qp,
ohdr,
@ -1146,8 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
@ -1195,22 +1193,8 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
qp->s_last = s_last;
/* see post_send() */
barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
rvt_put_swqe(wqe);
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@ -1240,9 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
{
struct ib_wc wc;
unsigned i;
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
@ -1253,28 +1234,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@ -2295,7 +2262,7 @@ send_last:
hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@ -2410,8 +2377,7 @@ send_last:
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
if (len > pmtu)
qp->r_psn += (len - 1) / pmtu;
qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;

View File

@ -239,16 +239,6 @@ bail:
return ret;
}
static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
{
if (!index) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
return cpu_to_be64(ppd->guid);
}
return ibp->guids[index - 1];
}
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
hdr->sgid.global.interface_id =
grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
ibp->guids[grh->sgid_index - 1] :
cpu_to_be64(ppd_from_ibp(ibp)->guid);
grh->sgid_index < HFI1_GUIDS_PER_PORT ?
get_sguid(ibp, grh->sgid_index) :
get_sguid(ibp, HFI1_PORT_GUID_INDEX);
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
@ -777,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth1;
/* Construct the header. */
extra_bytes = -qp->s_cur_size & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
extra_bytes = -ps->s_txreq->s_cur_size & 3;
nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += hfi1_make_grh(ibp,
@ -952,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@ -964,32 +953,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->s_last = last;
/* See post_send() */
barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
wc.qp = &qp->ibqp;
if (status == IB_WC_SUCCESS)
wc.byte_len = wqe->length;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
status != IB_WC_SUCCESS);
}
rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;

View File

@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde,
sde->head_sn, tx->sn);
sde->head_sn++;
#endif
sdma_txclean(sde->dd, tx);
__sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
if (wait && iowait_sdma_dec(wait))
@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx)
}
/**
* sdma_txclean() - clean tx of mappings, descp *kmalloc's
* __sdma_txclean() - clean tx of mappings, descp *kmalloc's
* @dd: hfi1_devdata for unmapping
* @tx: tx request to clean
*
@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx)
* The code can be called multiple times without issue.
*
*/
void sdma_txclean(
void __sdma_txclean(
struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
@ -3080,7 +3080,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
tx->descp[i] = tx->descs[i];
return 0;
enomem:
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return -ENOMEM;
}
@ -3109,14 +3109,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return rval;
}
/* If coalesce buffer is allocated, copy data into it */
if (tx->coalesce_buf) {
if (type == SDMA_MAP_NONE) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return -EINVAL;
}
@ -3124,7 +3124,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
kvaddr = kmap(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return -EINVAL;
}
@ -3154,7 +3154,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return -ENOSPC;
}
@ -3196,7 +3196,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return rval;
}
}

View File

@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
int type, void *kvaddr, struct page *page,
unsigned long offset, u16 len);
int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
if (tx->num_desc)
__sdma_txclean(dd, tx);
}
/* helpers used by public routines */
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
@ -753,7 +759,7 @@ static inline int sdma_txadd_page(
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return -ENOSPC;
}
@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr(
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
sdma_txclean(dd, tx);
__sdma_txclean(dd, tx);
return -ENOSPC;
}

View File

@ -258,8 +258,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
ps->s_txreq->ss = &qp->s_sge;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps);
/* pbc */

View File

@ -354,8 +354,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
qp->s_hdrwords = 7;
qp->s_cur_size = wqe->length;
qp->s_cur_sge = &qp->s_sge;
ps->s_txreq->s_cur_size = wqe->length;
ps->s_txreq->ss = &qp->s_sge;
qp->s_srate = ah_attr->static_rate;
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;

View File

@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_MASK 0xff
#define AHG_KDETH_INTR_SHIFT 12
#define AHG_KDETH_SH_SHIFT 13
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_OM_LARGE 64
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
/* Last packet in the request */
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
/* Tx request flag bits */
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
/*
* For the last packet set the ACK request
* and disable header suppression.
*/
if (req->seqnum == req->info.npkts - 1)
tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
tx->flags |= (TXREQ_FLAGS_REQ_ACK |
TXREQ_FLAGS_REQ_DISABLE_SH);
/*
* Calculate the payload size - this is min of the fragment
@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
datalen = compute_data_length(req, tx);
/*
* Disable header suppression for the payload <= 8DWS.
* If there is an uncorrectable error in the receive
* data FIFO when the received payload size is less than
* or equal to 8DWS then the RxDmaDataFifoRdUncErr is
* not reported.There is set RHF.EccErr if the header
* is not suppressed.
*/
if (!datalen) {
SDMA_DBG(req,
"Request has data but pkt len is 0");
ret = -EFAULT;
goto free_tx;
} else if (datalen <= 32) {
tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
}
}
@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
LRH2PBC(lrhlen);
tx->hdr.pbc[0] = cpu_to_le16(pbclen);
}
ret = check_header_template(req, &tx->hdr,
lrhlen, datalen);
if (ret)
goto free_tx;
ret = sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_AHG_COPY,
sizeof(tx->hdr) + datalen,
@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req,
req->seqnum));
/* Set ACK request on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
hdr->bth[2] |= cpu_to_be32(1UL << 31);
/* Set the new offset */
@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req,
/* Set KDETH.TID based on value for this TID */
KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
EXP_TID_GET(tidval, IDX));
/* Clear KDETH.SH only on the last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
/* Clear KDETH.SH when DISABLE_SH flag is set */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
/*
* Set the KDETH.OFFSET and KDETH.OM based on size of
@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* BTH.PSN and BTH.A */
val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
((req->tidoffset / req->omfactor) & 0x7fff)));
/* KDETH.TIDCtrl, KDETH.TID */
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
(EXP_TID_GET(tidval, IDX) & 0x3ff));
/* Clear KDETH.SH on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
INTR) <<
AHG_KDETH_INTR_SHIFT);
val &= cpu_to_le16(~(1U << 13));
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
(EXP_TID_GET(tidval, IDX) & 0x3ff));
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
INTR) <<
AHG_KDETH_INTR_SHIFT));
} else {
AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
INTR) <<
AHG_KDETH_INTR_SHIFT));
}
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
}
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,

View File

@ -296,22 +296,6 @@ static inline int wss_exceeds_threshold(void)
return atomic_read(&wss.total_count) >= wss.threshold;
}
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
[IB_WR_REG_MR] = IB_WC_REG_MR
};
/*
* Length of header by opcode, 0 --> not supported
*/
@ -694,6 +678,7 @@ static void mem_timer(unsigned long data)
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
@ -769,6 +754,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
@ -788,10 +774,10 @@ static int wait_kmem(struct hfi1_ibdev *dev,
*/
static noinline int build_verbs_ulp_payload(
struct sdma_engine *sde,
struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx)
{
struct rvt_sge_state *ss = tx->ss;
struct rvt_sge *sg_list = ss->sg_list;
struct rvt_sge sge = ss->sge;
u8 num_sge = ss->num_sge;
@ -835,7 +821,6 @@ bail_txadd:
/* New API */
static int build_verbs_tx_desc(
struct sdma_engine *sde,
struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
struct hfi1_ahg_info *ahg_info,
@ -879,9 +864,9 @@ static int build_verbs_tx_desc(
goto bail_txadd;
}
/* add the ulp payload - if any. ss can be NULL for acks */
if (ss)
ret = build_verbs_ulp_payload(sde, ss, length, tx);
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
bail_txadd:
return ret;
}
@ -892,8 +877,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ahg_info *ahg_info = priv->s_ahg;
u32 hdrwords = qp->s_hdrwords;
struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
u32 len = ps->s_txreq->s_cur_size;
u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
@ -918,7 +902,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
plen);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
@ -980,6 +964,7 @@ static int pio_wait(struct rvt_qp *qp,
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
@ -1008,8 +993,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
{
struct hfi1_qp_priv *priv = qp->priv;
u32 hdrwords = qp->s_hdrwords;
struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
struct rvt_sge_state *ss = ps->s_txreq->ss;
u32 len = ps->s_txreq->s_cur_size;
u32 dwords = (len + 3) >> 2;
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
@ -1237,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
u8 op = get_opcode(h);
if (piothreshold &&
qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
(BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
@ -1483,15 +1468,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
int guid_index, __be64 *guid)
{
struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
if (guid_index == 0)
*guid = cpu_to_be64(ppd->guid);
else if (guid_index < HFI1_GUIDS_PER_PORT)
*guid = ibp->guids[guid_index - 1];
else
if (guid_index >= HFI1_GUIDS_PER_PORT)
return -EINVAL;
*guid = get_sguid(ibp, guid_index);
return 0;
}
@ -1610,6 +1591,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
dc8051_ver_min(ver));
}
static const char * const driver_cntr_names[] = {
/* must be element 0*/
"DRIVER_KernIntr",
"DRIVER_ErrorIntr",
"DRIVER_Tx_Errs",
"DRIVER_Rcv_Errs",
"DRIVER_HW_Errs",
"DRIVER_NoPIOBufs",
"DRIVER_CtxtsOpen",
"DRIVER_RcvLen_Errs",
"DRIVER_EgrBufFull",
"DRIVER_EgrHdrFull"
};
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
static int num_dev_cntrs;
static int num_port_cntrs;
static int cntr_names_initialized;
/*
* Convert a list of names separated by '\n' into an array of NULL terminated
* strings. Optionally some entries can be reserved in the array to hold extra
* external strings.
*/
static int init_cntr_names(const char *names_in,
const int names_len,
int num_extra_names,
int *num_cntrs,
const char ***cntr_names)
{
char *names_out, *p, **q;
int i, n;
n = 0;
for (i = 0; i < names_len; i++)
if (names_in[i] == '\n')
n++;
names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
GFP_KERNEL);
if (!names_out) {
*num_cntrs = 0;
*cntr_names = NULL;
return -ENOMEM;
}
p = names_out + (n + num_extra_names) * sizeof(char *);
memcpy(p, names_in, names_len);
q = (char **)names_out;
for (i = 0; i < n; i++) {
q[i] = p;
p = strchr(p, '\n');
*p++ = '\0';
}
*num_cntrs = n;
*cntr_names = (const char **)names_out;
return 0;
}
static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
int i, err;
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
err = init_cntr_names(dd->cntrnames,
dd->cntrnameslen,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
if (err)
return NULL;
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
driver_cntr_names[i];
err = init_cntr_names(dd->portcntrnames,
dd->portcntrnameslen,
0,
&num_port_cntrs,
&port_cntr_names);
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
return NULL;
}
cntr_names_initialized = 1;
}
if (!port_num)
return rdma_alloc_hw_stats_struct(
dev_cntr_names,
num_dev_cntrs + num_driver_cntrs,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
else
return rdma_alloc_hw_stats_struct(
port_cntr_names,
num_port_cntrs,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static u64 hfi1_sps_ints(void)
{
unsigned long flags;
struct hfi1_devdata *dd;
u64 sps_ints = 0;
spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) {
sps_ints += get_all_cpu_total(dd->int_counter);
}
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
return sps_ints;
}
static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
u8 port, int index)
{
u64 *values;
int count;
if (!port) {
u64 *stats = (u64 *)&hfi1_stats;
int i;
hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values);
values[num_dev_cntrs] = hfi1_sps_ints();
for (i = 1; i < num_driver_cntrs; i++)
values[num_dev_cntrs + i] = stats[i];
count = num_dev_cntrs + num_driver_cntrs;
} else {
struct hfi1_ibport *ibp = to_iport(ibdev, port);
hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values);
count = num_port_cntrs;
}
memcpy(stats->value, values, count * sizeof(u64));
return count;
}
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@ -1620,6 +1749,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
struct hfi1_ibdev *dev = &dd->verbs_dev;
struct ib_device *ibdev = &dev->rdi.ibdev;
struct hfi1_pportdata *ppd = dd->pport;
struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
size_t lcpysz = IB_DEVICE_NAME_MAX;
@ -1632,6 +1762,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
seqlock_init(&dev->iowait_lock);
seqlock_init(&dev->txwait_lock);
INIT_LIST_HEAD(&dev->txwait);
INIT_LIST_HEAD(&dev->memwait);
@ -1639,20 +1770,24 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
if (ret)
goto err_verbs_txreq;
/* Use first-port GUID as node guid */
ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX);
/*
* The system image GUID is supposed to be the same for all
* HFIs in a single system but since there can be other
* device types in the system, we can't be sure this is unique.
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid);
ib_hfi1_sys_image_guid = ibdev->node_guid;
lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->node_guid = cpu_to_be64(ppd->guid);
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dma_device = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@ -1767,6 +1902,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
kfree(dev_cntr_names);
kfree(port_cntr_names);
cntr_names_initialized = 0;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)

View File

@ -73,7 +73,6 @@ struct hfi1_packet;
#include "iowait.h"
#define HFI1_MAX_RDMA_ATOMIC 16
#define HFI1_GUIDS_PER_PORT 5
/*
* Increment this value if any changes that break userspace ABI
@ -169,8 +168,6 @@ struct hfi1_ibport {
struct rvt_qp __rcu *qp[2];
struct rvt_ibport rvp;
__be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */
/* the first 16 entries are sl_to_vl for !OPA */
u8 sl_to_sc[32];
u8 sc_to_sl[32];
@ -180,19 +177,20 @@ struct hfi1_ibdev {
struct rvt_dev_info rdi; /* Must be first */
/* QP numbers are shared by all IB ports */
/* protect wait lists */
seqlock_t iowait_lock;
/* protect txwait list */
seqlock_t txwait_lock ____cacheline_aligned_in_smp;
struct list_head txwait; /* list for wait verbs_txreq */
struct list_head memwait; /* list for wait kernel memory */
struct list_head txreq_free;
struct kmem_cache *verbs_txreq_cache;
struct timer_list mem_timer;
u64 n_piowait;
u64 n_piodrain;
u64 n_txwait;
u64 n_kmem_wait;
/* protect iowait lists */
seqlock_t iowait_lock ____cacheline_aligned_in_smp;
u64 n_piowait;
u64 n_piodrain;
struct timer_list mem_timer;
#ifdef CONFIG_DEBUG_FS
/* per HFI debugfs */
struct dentry *hfi1_ibdev_dbg;

View File

@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
kmem_cache_free(dev->verbs_txreq_cache, tx);
do {
seq = read_seqbegin(&dev->iowait_lock);
seq = read_seqbegin(&dev->txwait_lock);
if (!list_empty(&dev->txwait)) {
struct iowait *wait;
write_seqlock_irqsave(&dev->iowait_lock, flags);
write_seqlock_irqsave(&dev->txwait_lock, flags);
wait = list_first_entry(&dev->txwait, struct iowait,
list);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
/* refcount held until actual wake up */
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
write_sequnlock_irqrestore(&dev->txwait_lock, flags);
hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
break;
}
} while (read_seqretry(&dev->iowait_lock, seq));
} while (read_seqretry(&dev->txwait_lock, seq));
}
struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
{
struct verbs_txreq *tx = ERR_PTR(-EBUSY);
write_seqlock(&dev->iowait_lock);
write_seqlock(&dev->txwait_lock);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
dev->n_txwait++;
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
priv->s_iowait.lock = &dev->txwait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
out:
write_sequnlock(&dev->iowait_lock);
write_sequnlock(&dev->txwait_lock);
return tx;
}

View File

@ -65,6 +65,7 @@ struct verbs_txreq {
struct sdma_engine *sde;
struct send_context *psc;
u16 hdr_dwords;
u16 s_cur_size;
};
struct hfi1_ibdev;

View File

@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
if (list_empty(&qp->rspwait)) {
qp->r_flags |=
RVT_R_RSP_NAK;
atomic_inc(
&qp->refcount);
rvt_get_qp(qp);
list_add_tail(
&qp->rspwait,
&rcd->qp_wait_list);

View File

@ -941,8 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
@ -988,22 +986,8 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
qp->s_last = s_last;
/* see post_send() */
barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
rvt_put_swqe(wqe);
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@ -1032,9 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct qib_ibport *ibp)
{
struct ib_wc wc;
unsigned i;
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@ -1044,28 +1025,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
}
rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
@ -2112,8 +2079,7 @@ send_last:
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
if (len > pmtu)
qp->r_psn += (len - 1) / pmtu;
qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;

View File

@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@ -805,32 +804,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->s_last = last;
/* See post_send() */
barrier();
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
wc.qp = &qp->ibqp;
if (status == IB_WC_SUCCESS)
wc.byte_len = wqe->length;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
status != IB_WC_SUCCESS);
}
rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;

View File

@ -113,19 +113,6 @@ static unsigned int ib_qib_disable_sma;
module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_qib_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
};
/*
* System image GUID.
*/
@ -464,7 +451,7 @@ static void mem_timer(unsigned long data)
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
atomic_inc(&qp->refcount);
rvt_get_qp(qp);
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
}
@ -477,8 +464,7 @@ static void mem_timer(unsigned long data)
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
rvt_put_qp(qp);
}
}
@ -762,7 +748,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
atomic_inc(&qp->refcount);
rvt_get_qp(qp);
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
spin_lock_irqsave(&qp->s_lock, flags);
@ -772,8 +758,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
rvt_put_qp(qp);
} else
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
}
@ -808,7 +793,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
break;
avail -= qpp->s_tx->txreq.sg_count;
list_del_init(&qpp->iowait);
atomic_inc(&qp->refcount);
rvt_get_qp(qp);
qps[n++] = qp;
}
@ -822,8 +807,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
qib_schedule_send(qp);
}
spin_unlock(&qp->s_lock);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
rvt_put_qp(qp);
}
}
@ -1288,7 +1272,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
atomic_inc(&qp->refcount);
rvt_get_qp(qp);
qps[n++] = qp;
}
dd->f_wantpiobuf_intr(dd, 0);
@ -1306,8 +1290,7 @@ full:
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify qib_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
rvt_put_qp(qp);
}
}

View File

@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
smp_read_barrier_depends(); /* see rvt_cq_exit */
worker = cq->rdi->worker;
if (likely(worker)) {
spin_lock(&cq->rdi->n_cqs_lock);
if (likely(cq->rdi->worker)) {
cq->notify = RVT_CQ_NONE;
cq->triggered++;
kthread_queue_work(worker, &cq->comptask);
kthread_queue_work(cq->rdi->worker, &cq->comptask);
}
spin_unlock(&cq->rdi->n_cqs_lock);
}
spin_unlock_irqrestore(&cq->lock, flags);
@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
}
}
spin_lock(&rdi->n_cqs_lock);
spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
spin_unlock(&rdi->n_cqs_lock);
spin_unlock_irq(&rdi->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
rdi->n_cqs_allocated++;
spin_unlock(&rdi->n_cqs_lock);
spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip) {
spin_lock_irq(&rdi->pending_lock);
@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
struct rvt_dev_info *rdi = cq->rdi;
kthread_flush_work(&cq->comptask);
spin_lock(&rdi->n_cqs_lock);
spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--;
spin_unlock(&rdi->n_cqs_lock);
spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
@ -504,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
*/
int rvt_driver_cq_init(struct rvt_dev_info *rdi)
{
int ret = 0;
int cpu;
struct task_struct *task;
struct kthread_worker *worker;
if (rdi->worker)
return 0;
spin_lock_init(&rdi->n_cqs_lock);
rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
if (!rdi->worker)
return -ENOMEM;
kthread_init_worker(rdi->worker);
task = kthread_create_on_node(
kthread_worker_fn,
rdi->worker,
rdi->dparms.node,
"%s", rdi->dparms.cq_name);
if (IS_ERR(task)) {
kfree(rdi->worker);
rdi->worker = NULL;
return PTR_ERR(task);
}
set_user_nice(task, MIN_NICE);
spin_lock_init(&rdi->n_cqs_lock);
cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
kthread_bind(task, cpu);
wake_up_process(task);
return ret;
worker = kthread_create_worker_on_cpu(cpu, 0,
"%s", rdi->dparms.cq_name);
if (IS_ERR(worker))
return PTR_ERR(worker);
set_user_nice(worker->task, MIN_NICE);
rdi->worker = worker;
return 0;
}
/**
@ -541,13 +530,14 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
{
struct kthread_worker *worker;
worker = rdi->worker;
if (!worker)
/* block future queuing from send_complete() */
spin_lock_irq(&rdi->n_cqs_lock);
if (!rdi->worker) {
spin_unlock_irq(&rdi->n_cqs_lock);
return;
/* blocks future queuing from send_complete() */
}
rdi->worker = NULL;
smp_wmb(); /* See rdi_cq_enter */
kthread_flush_worker(worker);
kthread_stop(worker->task);
kfree(worker);
spin_unlock_irq(&rdi->n_cqs_lock);
kthread_destroy_worker(worker);
}

View File

@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
goto bail;
mqp->qp = qp;
atomic_inc(&qp->refcount);
rvt_get_qp(qp);
bail:
return mqp;
@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
struct rvt_qp *qp = mqp->qp;
/* Notify hfi1_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
rvt_put_qp(qp);
kfree(mqp);
}

View File

@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include "vt.h"
#include "mr.h"
#include "trace.h"
/**
* rvt_driver_mr_init - Init MR resources per driver
@ -84,6 +85,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
lkey_table_size = rdi->dparms.lkey_table_size;
}
rdi->lkey_table.max = 1 << lkey_table_size;
rdi->lkey_table.shift = 32 - lkey_table_size;
lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
rdi->lkey_table.table = (struct rvt_mregion __rcu **)
vmalloc_node(lk_tab_size, rdi->dparms.node);
@ -402,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
n++;
if (n == RVT_SEGSZ) {
m++;
@ -506,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
mr->mr.map[m]->segs[n].length = ps;
trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
mr->mr.length += ps;
return 0;
@ -692,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
if (++n == RVT_SEGSZ) {
m++;
n = 0;
@ -774,7 +779,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_mregion *mr;
unsigned n, m;
size_t off;
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
/*
* We use LKEY == zero for kernel virtual addresses
@ -782,12 +786,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
*/
rcu_read_lock();
if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
atomic_inc(&mr->refcount);
rvt_get_mr(mr);
rcu_read_unlock();
isge->mr = mr;
@ -798,8 +804,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0;
goto ok;
}
mr = rcu_dereference(
rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
@ -809,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
atomic_inc(&mr->refcount);
rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
@ -887,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
mr = rcu_dereference(rdi->dma_mr);
if (!mr)
goto bail;
atomic_inc(&mr->refcount);
rvt_get_mr(mr);
rcu_read_unlock();
sge->mr = mr;
@ -899,8 +904,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
goto ok;
}
mr = rcu_dereference(
rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
@ -909,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
atomic_inc(&mr->refcount);
rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;

View File

@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
[IB_WR_REG_MR] = IB_WC_REG_MR
};
EXPORT_SYMBOL(ib_rvt_wc_opcode);
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@ -884,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
return ret;
bail_ip:
kref_put(&qp->ip->ref, rvt_release_mmap_info);
if (qp->ip)
kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);

View File

@ -45,143 +45,10 @@
*
*/
#undef TRACE_SYSTEM_VAR
#define TRACE_SYSTEM_VAR rdmavt
#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RDMAVT_TRACE_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_vt.h>
#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi))
#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi))
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rdmavt
TRACE_EVENT(rvt_dbg,
TP_PROTO(struct rvt_dev_info *rdi,
const char *msg),
TP_ARGS(rdi, msg),
TP_STRUCT__entry(
RDI_DEV_ENTRY(rdi)
__string(msg, msg)
),
TP_fast_assign(
RDI_DEV_ASSIGN(rdi);
__assign_str(msg, msg);
),
TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
);
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_qphash
DECLARE_EVENT_CLASS(rvt_qphash_template,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u32, qpn)
__field(u32, bucket)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->bucket = bucket;
),
TP_printk(
"[%s] qpn 0x%x bucket %u",
__get_str(dev),
__entry->qpn,
__entry->bucket
)
);
DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket));
DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket));
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_tx
#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
#define show_wr_opcode(opcode) \
__print_symbolic(opcode, \
wr_opcode_name(RDMA_WRITE), \
wr_opcode_name(RDMA_WRITE_WITH_IMM), \
wr_opcode_name(SEND), \
wr_opcode_name(SEND_WITH_IMM), \
wr_opcode_name(RDMA_READ), \
wr_opcode_name(ATOMIC_CMP_AND_SWP), \
wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
wr_opcode_name(LSO), \
wr_opcode_name(SEND_WITH_INV), \
wr_opcode_name(RDMA_READ_WITH_INV), \
wr_opcode_name(LOCAL_INV), \
wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
#define POS_PRN \
"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
TRACE_EVENT(
rvt_post_one_wr,
TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
TP_ARGS(qp, wqe),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
__field(u32, qpn)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, length)
__field(u32, opcode)
__field(u32, size)
__field(u32, avail)
__field(u32, head)
__field(u32, last)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
__entry->psn = wqe->psn;
__entry->lpsn = wqe->lpsn;
__entry->length = wqe->length;
__entry->opcode = wqe->wr.opcode;
__entry->size = qp->s_size;
__entry->avail = qp->s_avail;
__entry->head = qp->s_head;
__entry->last = qp->s_last;
),
TP_printk(
POS_PRN,
__get_str(dev),
__entry->wr_id,
__entry->qpn,
__entry->psn,
__entry->lpsn,
__entry->length,
__entry->opcode, show_wr_opcode(__entry->opcode),
__entry->size,
__entry->avail,
__entry->head,
__entry->last
)
);
#endif /* __RDMAVT_TRACE_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>
#include "trace_rvt.h"
#include "trace_qp.h"
#include "trace_tx.h"
#include "trace_mr.h"

View File

@ -0,0 +1,112 @@
/*
* Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RVT_TRACE_MR_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_mr.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_mr
DECLARE_EVENT_CLASS(
rvt_mr_template,
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
__field(void *, vaddr)
__field(struct page *, page)
__field(size_t, len)
__field(u32, lkey)
__field(u16, m)
__field(u16, n)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
__entry->vaddr = v;
__entry->page = virt_to_page(v);
__entry->m = m;
__entry->n = n;
__entry->len = len;
),
TP_printk(
"[%s] vaddr %p page %p m %u n %u len %ld",
__get_str(dev),
__entry->vaddr,
__entry->page,
__entry->m,
__entry->n,
__entry->len
)
);
DEFINE_EVENT(
rvt_mr_template, rvt_mr_page_seg,
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len));
DEFINE_EVENT(
rvt_mr_template, rvt_mr_fmr_seg,
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len));
DEFINE_EVENT(
rvt_mr_template, rvt_mr_user_seg,
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len));
#endif /* __RVT_TRACE_MR_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_mr
#include <trace/define_trace.h>

View File

@ -0,0 +1,96 @@
/*
* Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RVT_TRACE_QP_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_vt.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_qp
DECLARE_EVENT_CLASS(rvt_qphash_template,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u32, qpn)
__field(u32, bucket)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->bucket = bucket;
),
TP_printk(
"[%s] qpn 0x%x bucket %u",
__get_str(dev),
__entry->qpn,
__entry->bucket
)
);
DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket));
DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket));
#endif /* __RVT_TRACE_QP_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_qp
#include <trace/define_trace.h>

View File

@ -0,0 +1,81 @@
/*
* Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RVT_TRACE_RVT_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_vt.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt
TRACE_EVENT(rvt_dbg,
TP_PROTO(struct rvt_dev_info *rdi,
const char *msg),
TP_ARGS(rdi, msg),
TP_STRUCT__entry(
RDI_DEV_ENTRY(rdi)
__string(msg, msg)
),
TP_fast_assign(
RDI_DEV_ASSIGN(rdi);
__assign_str(msg, msg);
),
TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
);
#endif /* __RVT_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_rvt
#include <trace/define_trace.h>

View File

@ -0,0 +1,132 @@
/*
* Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
#define __RVT_TRACE_TX_H
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_vt.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_tx
#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
#define show_wr_opcode(opcode) \
__print_symbolic(opcode, \
wr_opcode_name(RDMA_WRITE), \
wr_opcode_name(RDMA_WRITE_WITH_IMM), \
wr_opcode_name(SEND), \
wr_opcode_name(SEND_WITH_IMM), \
wr_opcode_name(RDMA_READ), \
wr_opcode_name(ATOMIC_CMP_AND_SWP), \
wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
wr_opcode_name(LSO), \
wr_opcode_name(SEND_WITH_INV), \
wr_opcode_name(RDMA_READ_WITH_INV), \
wr_opcode_name(LOCAL_INV), \
wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
#define POS_PRN \
"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
TRACE_EVENT(
rvt_post_one_wr,
TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
TP_ARGS(qp, wqe),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
__field(u32, qpn)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, length)
__field(u32, opcode)
__field(u32, size)
__field(u32, avail)
__field(u32, head)
__field(u32, last)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
__entry->psn = wqe->psn;
__entry->lpsn = wqe->lpsn;
__entry->length = wqe->length;
__entry->opcode = wqe->wr.opcode;
__entry->size = qp->s_size;
__entry->avail = qp->s_avail;
__entry->head = qp->s_head;
__entry->last = qp->s_last;
),
TP_printk(
POS_PRN,
__get_str(dev),
__entry->wr_id,
__entry->qpn,
__entry->psn,
__entry->lpsn,
__entry->length,
__entry->opcode, show_wr_opcode(__entry->opcode),
__entry->size,
__entry->avail,
__entry->head,
__entry->last
)
);
#endif /* __RVT_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_tx
#include <trace/define_trace.h>

View File

@ -185,6 +185,27 @@ struct rvt_driver_provided {
* check_support() for details.
*/
/* hot path calldowns in a single cacheline */
/*
* Give the driver a notice that there is send work to do. It is up to
* the driver to generally push the packets out, this just queues the
* work with the driver. There are two variants here. The no_lock
* version requires the s_lock not to be held. The other assumes the
* s_lock is held.
*/
void (*schedule_send)(struct rvt_qp *qp);
void (*schedule_send_no_lock)(struct rvt_qp *qp);
/* Driver specific work request checking */
int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is
* done by this call back.
*/
void (*do_send)(struct rvt_qp *qp);
/* Passed to ib core registration. Callback to create syfs files */
int (*port_callback)(struct ib_device *, u8, struct kobject *);
@ -222,22 +243,6 @@ struct rvt_driver_provided {
*/
void (*notify_qp_reset)(struct rvt_qp *qp);
/*
* Give the driver a notice that there is send work to do. It is up to
* the driver to generally push the packets out, this just queues the
* work with the driver. There are two variants here. The no_lock
* version requires the s_lock not to be held. The other assumes the
* s_lock is held.
*/
void (*schedule_send)(struct rvt_qp *qp);
void (*schedule_send_no_lock)(struct rvt_qp *qp);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is
* done by this call back.
*/
void (*do_send)(struct rvt_qp *qp);
/*
* Get a path mtu from the driver based on qp attributes.
*/
@ -324,9 +329,6 @@ struct rvt_driver_provided {
void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
/* Driver specific work request checking */
int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
/* Notify driver a mad agent has been created */
void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
@ -355,12 +357,12 @@ struct rvt_dev_info {
/* post send table */
const struct rvt_operation_params *post_parms;
struct rvt_mregion __rcu *dma_mr;
struct rvt_lkey_table lkey_table;
/* Driver specific helper functions */
struct rvt_driver_provided driver_f;
struct rvt_mregion __rcu *dma_mr;
struct rvt_lkey_table lkey_table;
/* Internal use */
int n_pds_allocated;
spinlock_t n_pds_lock; /* Protect pd allocated count */

View File

@ -90,11 +90,15 @@ struct rvt_mregion {
#define RVT_MAX_LKEY_TABLE_BITS 23
struct rvt_lkey_table {
spinlock_t lock; /* protect changes in this struct */
/* read mostly fields */
u32 max; /* size of the table */
u32 shift; /* lkey/rkey shift */
struct rvt_mregion __rcu **table;
/* writeable fields */
/* protect changes in this struct */
spinlock_t lock ____cacheline_aligned_in_smp;
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
u32 max; /* size of the table */
struct rvt_mregion __rcu **table;
};
/*

View File

@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdmavt_cq.h>
/*
* Atomic bit definitions for r_aflags.
*/
@ -484,6 +485,23 @@ static inline void rvt_put_qp(struct rvt_qp *qp)
wake_up(&qp->wait);
}
/**
* rvt_put_swqe - drop mr refs held by swqe
* @wqe - the send wqe
*
* This drops any mr references held by the swqe
*/
static inline void rvt_put_swqe(struct rvt_swqe *wqe)
{
int i;
for (i = 0; i < wqe->wr.num_sge; i++) {
struct rvt_sge *sge = &wqe->sg_list[i];
rvt_put_mr(sge->mr);
}
}
/**
* rvt_qp_wqe_reserve - reserve operation
* @qp - the rvt qp
@ -527,6 +545,65 @@ static inline void rvt_qp_wqe_unreserve(
}
}
extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
/**
* rvt_qp_swqe_complete() - insert send completion
* @qp - the qp
* @wqe - the send wqe
* @status - completion status
*
* Insert a send completion into the completion
* queue if the qp indicates it should be done.
*
* See IBTA 10.7.3.1 for info on completion
* control.
*/
static inline void rvt_qp_swqe_complete(
struct rvt_qp *qp,
struct rvt_swqe *wqe,
enum ib_wc_status status)
{
if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
return;
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode];
wc.qp = &qp->ibqp;
wc.byte_len = wqe->length;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
status != IB_WC_SUCCESS);
}
}
/**
* @qp - the qp pair
* @len - the length
*
* Perform a shift based mtu round up divide
*/
static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
{
return (len + qp->pmtu - 1) >> qp->log_pmtu;
}
/**
* @qp - the qp pair
* @len - the length
*
* Perform a shift based mtu divide
*/
static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
{
return len >> qp->log_pmtu;
}
extern const int ib_rvt_state_ops[];
struct rvt_dev_info;

View File

@ -75,7 +75,7 @@
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
#define HFI1_USER_SWMINOR 2
#define HFI1_USER_SWMINOR 3
/*
* We will encode the major/minor inside a single 32bit version number.