Linus Torvalds c552441373 Main batch of InfiniBand/RDMA changes for 3.11 merge window:
- AF_IB (native IB addressing) for CMA from Sean Hefty
  - New mlx5 driver for Mellanox Connect-IB adapters (including post merge request fixes)
  - SRP fixes from Bart Van Assche (including fix to first merge request)
  - qib HW driver updates
  - Resurrection of ocrdma HW driver development
  - uverbs conversion to create fds with O_CLOEXEC set
  - Other small changes and fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABCAAGBQJR30TKAAoJEENa44ZhAt0h854P/jvAhK5u+XTM5VyjAi0DKJ7P
 bWcsu+KxbOIFnjEdsYQl1mGP44gdO8GPZp7+JR5nDHDRpw9K76qy6QQiPbaF6Y8D
 cZH8Xlq4hzBfElTWBkExEemPrVUUq77j03FE9TBatdLAtEyYkgrNyqr7Ys6zVwVK
 ugR8nAahvnB7Jh1tsyZBBd9kfbWtXJnaGC8/Zk3Na4n4zXRAbr0DcnRF0sncTL38
 VFnWbi33OQAxu5bsb2jGec/SNP3BbNwspFPjSCKqiiItRaCj13JiHhrKKvVk4RZe
 hIRnPH47kjLRp2/PwBo6o+gTXZuRg48VGBx4CKUTwx1nCzPPN1iz9ZOfqUv9Qwcv
 LX8mxC7QS/Yvud4KeEBsj6kotb80EkRF2KV5RkIKCxQiwetGD9127bZylC8ttxGw
 2f6MzYtAGD4R4C10lO8N+59VugSg1xAvwsqz0a/jy2XyVHbI1ugQedzkB20x5WPY
 51S08ABvtU9yIxIYrw2VEaa/5WN+XJ6+LpG9QBAGXdMLiCiiAe7n/YzyXI6AgwaW
 Jl/uKr6H6/jEHUHKwkyqsmbpVGPhtGWu8deyr1oYvOEP4i48gcDqMQsfMcCISrQV
 MeQU3hS/obykUlNeqjmMI2CXrecqSsiq0hXd4DLaSoZ2Rb4Drx2Wj6sTQLIAgL2q
 GBYjHWMUpZXIFHQaH7am
 =nZh8
 -----END PGP SIGNATURE-----

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull InfiniBand/RDMA changes from Roland Dreier:
 - AF_IB (native IB addressing) for CMA from Sean Hefty
 - new mlx5 driver for Mellanox Connect-IB adapters (including post
   merge request fixes)
 - SRP fixes from Bart Van Assche (including fix to first merge request)
 - qib HW driver updates
 - resurrection of ocrdma HW driver development
 - uverbs conversion to create fds with O_CLOEXEC set
 - other small changes and fixes

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (66 commits)
  mlx5: Return -EFAULT instead of -EPERM
  IB/qib: Log all SDMA errors unconditionally
  IB/qib: Fix module-level leak
  mlx5_core: Adjust hca_cap.uar_page_sz to conform to Connect-IB spec
  IB/srp: Let srp_abort() return FAST_IO_FAIL if TL offline
  IB/uverbs: Use get_unused_fd_flags(O_CLOEXEC) instead of get_unused_fd()
  mlx5_core: Fixes for sparse warnings
  IB/mlx5: Make profile[] static in main.c
  mlx5: Fix parameter type of health_handler_t
  mlx5: Add driver for Mellanox Connect-IB adapters
  IB/core: Add reserved values to enums for low-level driver use
  IB/srp: Bump driver version and release date
  IB/srp: Make HCA completion vector configurable
  IB/srp: Maintain a single connection per I_T nexus
  IB/srp: Fail I/O fast if target offline
  IB/srp: Skip host settle delay
  IB/srp: Avoid skipping srp_reset_host() after a transport error
  IB/srp: Fix remove_one crash due to resource exhaustion
  IB/qib: New transmitter tunning settings for Dell 1.1 backplane
  IB/core: Fix error return code in add_port()
  ...
2013-07-13 12:57:21 -07:00

1101 lines
30 KiB
C

/*
* IBM eServer eHCA Infiniband device driver for Linux on POWER
*
* module start stop, hca detection
*
* Authors: Heiko J Schick <schickhj@de.ibm.com>
* Hoang-Nam Nguyen <hnguyen@de.ibm.com>
* Joachim Fenkes <fenkes@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
* All rights reserved.
*
* This source code is distributed under a dual license of GPL v2.0 and OpenIB
* BSD.
*
* OpenIB BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef CONFIG_PPC_64K_PAGES
#include <linux/slab.h>
#endif
#include <linux/notifier.h>
#include <linux/memory.h>
#include "ehca_classes.h"
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
#include "ehca_tools.h"
#include "hcp_if.h"
#define HCAD_VERSION "0029"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
MODULE_VERSION(HCAD_VERSION);
static bool ehca_open_aqp1 = 0;
static int ehca_hw_level = 0;
static bool ehca_poll_all_eqs = 1;
int ehca_debug_level = 0;
int ehca_nr_ports = -1;
bool ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
int ehca_static_rate = -1;
bool ehca_scaling_code = 0;
int ehca_lock_hcalls = -1;
int ehca_max_cq = -1;
int ehca_max_qp = -1;
module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO);
module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO);
module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO);
module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
MODULE_PARM_DESC(open_aqp1,
"Open AQP1 on startup (default: no)");
MODULE_PARM_DESC(debug_level,
"Amount of debug output (0: none (default), 1: traces, "
"2: some dumps, 3: lots)");
MODULE_PARM_DESC(hw_level,
"Hardware level (0: autosensing (default), "
"0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
MODULE_PARM_DESC(nr_ports,
"number of connected ports (-1: autodetect (default), "
"1: port one only, 2: two ports)");
MODULE_PARM_DESC(use_hp_mr,
"Use high performance MRs (default: no)");
MODULE_PARM_DESC(port_act_time,
"Time to wait for port activation (default: 30 sec)");
MODULE_PARM_DESC(poll_all_eqs,
"Poll all event queues periodically (default: yes)");
MODULE_PARM_DESC(static_rate,
"Set permanent static rate (default: no static rate)");
MODULE_PARM_DESC(scaling_code,
"Enable scaling code (default: no)");
MODULE_PARM_DESC(lock_hcalls,
"Serialize all hCalls made by the driver "
"(default: autodetect)");
MODULE_PARM_DESC(number_of_cqs,
"Max number of CQs which can be allocated "
"(default: autodetect)");
MODULE_PARM_DESC(number_of_qps,
"Max number of QPs which can be allocated "
"(default: autodetect)");
DEFINE_RWLOCK(ehca_qp_idr_lock);
DEFINE_RWLOCK(ehca_cq_idr_lock);
DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
static LIST_HEAD(shca_list); /* list of all registered ehcas */
DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;
#ifdef CONFIG_PPC_64K_PAGES
static struct kmem_cache *ctblk_cache;
void *ehca_alloc_fw_ctrlblock(gfp_t flags)
{
void *ret = kmem_cache_zalloc(ctblk_cache, flags);
if (!ret)
ehca_gen_err("Out of memory for ctblk");
return ret;
}
void ehca_free_fw_ctrlblock(void *ptr)
{
if (ptr)
kmem_cache_free(ctblk_cache, ptr);
}
#endif
int ehca2ib_return_code(u64 ehca_rc)
{
switch (ehca_rc) {
case H_SUCCESS:
return 0;
case H_RESOURCE: /* Resource in use */
case H_BUSY:
return -EBUSY;
case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
case H_CONSTRAINED: /* resource constraint */
case H_NO_MEM:
return -ENOMEM;
default:
return -EINVAL;
}
}
static int ehca_create_slab_caches(void)
{
int ret;
ret = ehca_init_pd_cache();
if (ret) {
ehca_gen_err("Cannot create PD SLAB cache.");
return ret;
}
ret = ehca_init_cq_cache();
if (ret) {
ehca_gen_err("Cannot create CQ SLAB cache.");
goto create_slab_caches2;
}
ret = ehca_init_qp_cache();
if (ret) {
ehca_gen_err("Cannot create QP SLAB cache.");
goto create_slab_caches3;
}
ret = ehca_init_av_cache();
if (ret) {
ehca_gen_err("Cannot create AV SLAB cache.");
goto create_slab_caches4;
}
ret = ehca_init_mrmw_cache();
if (ret) {
ehca_gen_err("Cannot create MR&MW SLAB cache.");
goto create_slab_caches5;
}
ret = ehca_init_small_qp_cache();
if (ret) {
ehca_gen_err("Cannot create small queue SLAB cache.");
goto create_slab_caches6;
}
#ifdef CONFIG_PPC_64K_PAGES
ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
EHCA_PAGESIZE, H_CB_ALIGNMENT,
SLAB_HWCACHE_ALIGN,
NULL);
if (!ctblk_cache) {
ehca_gen_err("Cannot create ctblk SLAB cache.");
ehca_cleanup_small_qp_cache();
ret = -ENOMEM;
goto create_slab_caches6;
}
#endif
return 0;
create_slab_caches6:
ehca_cleanup_mrmw_cache();
create_slab_caches5:
ehca_cleanup_av_cache();
create_slab_caches4:
ehca_cleanup_qp_cache();
create_slab_caches3:
ehca_cleanup_cq_cache();
create_slab_caches2:
ehca_cleanup_pd_cache();
return ret;
}
static void ehca_destroy_slab_caches(void)
{
ehca_cleanup_small_qp_cache();
ehca_cleanup_mrmw_cache();
ehca_cleanup_av_cache();
ehca_cleanup_qp_cache();
ehca_cleanup_cq_cache();
ehca_cleanup_pd_cache();
#ifdef CONFIG_PPC_64K_PAGES
if (ctblk_cache)
kmem_cache_destroy(ctblk_cache);
#endif
}
#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39)
#define EHCA_REVID EHCA_BMASK_IBM(40, 63)
static struct cap_descr {
u64 mask;
char *descr;
} hca_cap_descr[] = {
{ HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
{ HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
{ HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
{ HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
{ HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
{ HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
{ HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
{ HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
{ HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
{ HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
{ HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
{ HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
{ HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
{ HCA_CAP_SRQ, "HCA_CAP_SRQ" },
{ HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
{ HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
{ HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
};
static int ehca_sense_attributes(struct ehca_shca *shca)
{
int i, ret = 0;
u64 h_ret;
struct hipz_query_hca *rblock;
struct hipz_query_port *port;
const char *loc_code;
static const u32 pgsize_map[] = {
HCA_CAP_MR_PGSIZE_4K, 0x1000,
HCA_CAP_MR_PGSIZE_64K, 0x10000,
HCA_CAP_MR_PGSIZE_1M, 0x100000,
HCA_CAP_MR_PGSIZE_16M, 0x1000000,
};
ehca_gen_dbg("Probing adapter %s...",
shca->ofdev->dev.of_node->full_name);
loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
NULL);
if (loc_code)
ehca_gen_dbg(" ... location lode=%s", loc_code);
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_gen_err("Cannot allocate rblock memory.");
return -ENOMEM;
}
h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
if (h_ret != H_SUCCESS) {
ehca_gen_err("Cannot query device properties. h_ret=%lli",
h_ret);
ret = -EPERM;
goto sense_attributes1;
}
if (ehca_nr_ports == 1)
shca->num_ports = 1;
else
shca->num_ports = (u8)rblock->num_ports;
ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
if (ehca_hw_level == 0) {
u32 hcaaver;
u32 revid;
hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
if (hcaaver == 1) {
if (revid <= 3)
shca->hw_level = 0x10 | (revid + 1);
else
shca->hw_level = 0x14;
} else if (hcaaver == 2) {
if (revid == 0)
shca->hw_level = 0x21;
else if (revid == 0x10)
shca->hw_level = 0x22;
else if (revid == 0x20 || revid == 0x21)
shca->hw_level = 0x23;
}
if (!shca->hw_level) {
ehca_gen_warn("unknown hardware version"
" - assuming default level");
shca->hw_level = 0x22;
}
} else
shca->hw_level = ehca_hw_level;
ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
shca->hca_cap = rblock->hca_cap_indicators;
ehca_gen_dbg(" ... HCA capabilities:");
for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
/* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
* a firmware property, so it's valid across all adapters
*/
if (ehca_lock_hcalls == -1)
ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
shca->hca_cap);
/* translate supported MR page sizes; always support 4K */
shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
if (rblock->memory_page_size_supported & pgsize_map[i])
shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
/* Set maximum number of CQs and QPs to calculate EQ size */
if (shca->max_num_qps == -1)
shca->max_num_qps = min_t(int, rblock->max_qp,
EHCA_MAX_NUM_QUEUES);
else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
ehca_gen_warn("The requested number of QPs is out of range "
"(1 - %i) specified by HW. Value is set to %i",
rblock->max_qp, rblock->max_qp);
shca->max_num_qps = rblock->max_qp;
}
if (shca->max_num_cqs == -1)
shca->max_num_cqs = min_t(int, rblock->max_cq,
EHCA_MAX_NUM_QUEUES);
else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
ehca_gen_warn("The requested number of CQs is out of range "
"(1 - %i) specified by HW. Value is set to %i",
rblock->max_cq, rblock->max_cq);
}
/* query max MTU from first port -- it's the same for all ports */
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
ehca_gen_err("Cannot query port properties. h_ret=%lli",
h_ret);
ret = -EPERM;
goto sense_attributes1;
}
shca->max_mtu = port->max_mtu;
sense_attributes1:
ehca_free_fw_ctrlblock(rblock);
return ret;
}
static int init_node_guid(struct ehca_shca *shca)
{
int ret = 0;
struct hipz_query_hca *rblock;
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
}
if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query device properties");
ret = -EINVAL;
goto init_node_guid1;
}
memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
init_node_guid1:
ehca_free_fw_ctrlblock(rblock);
return ret;
}
static int ehca_init_device(struct ehca_shca *shca)
{
int ret;
ret = init_node_guid(shca);
if (ret)
return ret;
strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
shca->ib_device.owner = THIS_MODULE;
shca->ib_device.uverbs_abi_ver = 8;
shca->ib_device.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
shca->ib_device.node_type = RDMA_NODE_IB_CA;
shca->ib_device.phys_port_cnt = shca->num_ports;
shca->ib_device.num_comp_vectors = 1;
shca->ib_device.dma_device = &shca->ofdev->dev;
shca->ib_device.query_device = ehca_query_device;
shca->ib_device.query_port = ehca_query_port;
shca->ib_device.query_gid = ehca_query_gid;
shca->ib_device.query_pkey = ehca_query_pkey;
/* shca->in_device.modify_device = ehca_modify_device */
shca->ib_device.modify_port = ehca_modify_port;
shca->ib_device.alloc_ucontext = ehca_alloc_ucontext;
shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext;
shca->ib_device.alloc_pd = ehca_alloc_pd;
shca->ib_device.dealloc_pd = ehca_dealloc_pd;
shca->ib_device.create_ah = ehca_create_ah;
/* shca->ib_device.modify_ah = ehca_modify_ah; */
shca->ib_device.query_ah = ehca_query_ah;
shca->ib_device.destroy_ah = ehca_destroy_ah;
shca->ib_device.create_qp = ehca_create_qp;
shca->ib_device.modify_qp = ehca_modify_qp;
shca->ib_device.query_qp = ehca_query_qp;
shca->ib_device.destroy_qp = ehca_destroy_qp;
shca->ib_device.post_send = ehca_post_send;
shca->ib_device.post_recv = ehca_post_recv;
shca->ib_device.create_cq = ehca_create_cq;
shca->ib_device.destroy_cq = ehca_destroy_cq;
shca->ib_device.resize_cq = ehca_resize_cq;
shca->ib_device.poll_cq = ehca_poll_cq;
/* shca->ib_device.peek_cq = ehca_peek_cq; */
shca->ib_device.req_notify_cq = ehca_req_notify_cq;
/* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
shca->ib_device.get_dma_mr = ehca_get_dma_mr;
shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
shca->ib_device.reg_user_mr = ehca_reg_user_mr;
shca->ib_device.query_mr = ehca_query_mr;
shca->ib_device.dereg_mr = ehca_dereg_mr;
shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
shca->ib_device.alloc_mw = ehca_alloc_mw;
shca->ib_device.bind_mw = ehca_bind_mw;
shca->ib_device.dealloc_mw = ehca_dealloc_mw;
shca->ib_device.alloc_fmr = ehca_alloc_fmr;
shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
shca->ib_device.unmap_fmr = ehca_unmap_fmr;
shca->ib_device.dealloc_fmr = ehca_dealloc_fmr;
shca->ib_device.attach_mcast = ehca_attach_mcast;
shca->ib_device.detach_mcast = ehca_detach_mcast;
shca->ib_device.process_mad = ehca_process_mad;
shca->ib_device.mmap = ehca_mmap;
shca->ib_device.dma_ops = &ehca_dma_mapping_ops;
if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
shca->ib_device.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
shca->ib_device.create_srq = ehca_create_srq;
shca->ib_device.modify_srq = ehca_modify_srq;
shca->ib_device.query_srq = ehca_query_srq;
shca->ib_device.destroy_srq = ehca_destroy_srq;
shca->ib_device.post_srq_recv = ehca_post_srq_recv;
}
return ret;
}
static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
{
struct ehca_sport *sport = &shca->sport[port - 1];
struct ib_cq *ibcq;
struct ib_qp *ibqp;
struct ib_qp_init_attr qp_init_attr;
int ret;
if (sport->ibcq_aqp1) {
ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
return -EPERM;
}
ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0);
if (IS_ERR(ibcq)) {
ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
return PTR_ERR(ibcq);
}
sport->ibcq_aqp1 = ibcq;
if (sport->ibqp_sqp[IB_QPT_GSI]) {
ehca_err(&shca->ib_device, "AQP1 QP is already created.");
ret = -EPERM;
goto create_aqp1;
}
memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
qp_init_attr.send_cq = ibcq;
qp_init_attr.recv_cq = ibcq;
qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
qp_init_attr.cap.max_send_wr = 100;
qp_init_attr.cap.max_recv_wr = 100;
qp_init_attr.cap.max_send_sge = 2;
qp_init_attr.cap.max_recv_sge = 1;
qp_init_attr.qp_type = IB_QPT_GSI;
qp_init_attr.port_num = port;
qp_init_attr.qp_context = NULL;
qp_init_attr.event_handler = NULL;
qp_init_attr.srq = NULL;
ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
if (IS_ERR(ibqp)) {
ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
ret = PTR_ERR(ibqp);
goto create_aqp1;
}
sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
return 0;
create_aqp1:
ib_destroy_cq(sport->ibcq_aqp1);
return ret;
}
static int ehca_destroy_aqp1(struct ehca_sport *sport)
{
int ret;
ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
if (ret) {
ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
return ret;
}
ret = ib_destroy_cq(sport->ibcq_aqp1);
if (ret)
ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
return ret;
}
static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
}
static ssize_t ehca_store_debug_level(struct device_driver *ddp,
const char *buf, size_t count)
{
int value = (*buf) - '0';
if (value >= 0 && value <= 9)
ehca_debug_level = value;
return 1;
}
static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
ehca_show_debug_level, ehca_store_debug_level);
static struct attribute *ehca_drv_attrs[] = {
&driver_attr_debug_level.attr,
NULL
};
static struct attribute_group ehca_drv_attr_grp = {
.attrs = ehca_drv_attrs
};
static const struct attribute_group *ehca_drv_attr_groups[] = {
&ehca_drv_attr_grp,
NULL,
};
#define EHCA_RESOURCE_ATTR(name) \
static ssize_t ehca_show_##name(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct ehca_shca *shca; \
struct hipz_query_hca *rblock; \
int data; \
\
shca = dev_get_drvdata(dev); \
\
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \
if (!rblock) { \
dev_err(dev, "Can't allocate rblock memory.\n"); \
return 0; \
} \
\
if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
dev_err(dev, "Can't query device properties\n"); \
ehca_free_fw_ctrlblock(rblock); \
return 0; \
} \
\
data = rblock->name; \
ehca_free_fw_ctrlblock(rblock); \
\
if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \
return snprintf(buf, 256, "1\n"); \
else \
return snprintf(buf, 256, "%d\n", data); \
\
} \
static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
EHCA_RESOURCE_ATTR(num_ports);
EHCA_RESOURCE_ATTR(hw_ver);
EHCA_RESOURCE_ATTR(max_eq);
EHCA_RESOURCE_ATTR(cur_eq);
EHCA_RESOURCE_ATTR(max_cq);
EHCA_RESOURCE_ATTR(cur_cq);
EHCA_RESOURCE_ATTR(max_qp);
EHCA_RESOURCE_ATTR(cur_qp);
EHCA_RESOURCE_ATTR(max_mr);
EHCA_RESOURCE_ATTR(cur_mr);
EHCA_RESOURCE_ATTR(max_mw);
EHCA_RESOURCE_ATTR(cur_mw);
EHCA_RESOURCE_ATTR(max_pd);
EHCA_RESOURCE_ATTR(max_ah);
static ssize_t ehca_show_adapter_handle(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ehca_shca *shca = dev_get_drvdata(dev);
return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
static struct attribute *ehca_dev_attrs[] = {
&dev_attr_adapter_handle.attr,
&dev_attr_num_ports.attr,
&dev_attr_hw_ver.attr,
&dev_attr_max_eq.attr,
&dev_attr_cur_eq.attr,
&dev_attr_max_cq.attr,
&dev_attr_cur_cq.attr,
&dev_attr_max_qp.attr,
&dev_attr_cur_qp.attr,
&dev_attr_max_mr.attr,
&dev_attr_cur_mr.attr,
&dev_attr_max_mw.attr,
&dev_attr_cur_mw.attr,
&dev_attr_max_pd.attr,
&dev_attr_max_ah.attr,
NULL
};
static struct attribute_group ehca_dev_attr_grp = {
.attrs = ehca_dev_attrs
};
static int ehca_probe(struct platform_device *dev)
{
struct ehca_shca *shca;
const u64 *handle;
struct ib_pd *ibpd;
int ret, i, eq_size;
unsigned long flags;
handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
if (!handle) {
ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
dev->dev.of_node->full_name);
return -ENODEV;
}
if (!(*handle)) {
ehca_gen_err("Wrong eHCA handle for adapter: %s.",
dev->dev.of_node->full_name);
return -ENODEV;
}
shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
if (!shca) {
ehca_gen_err("Cannot allocate shca memory.");
return -ENOMEM;
}
mutex_init(&shca->modify_mutex);
atomic_set(&shca->num_cqs, 0);
atomic_set(&shca->num_qps, 0);
shca->max_num_qps = ehca_max_qp;
shca->max_num_cqs = ehca_max_cq;
for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
spin_lock_init(&shca->sport[i].mod_sqp_lock);
shca->ofdev = dev;
shca->ipz_hca_handle.handle = *handle;
dev_set_drvdata(&dev->dev, shca);
ret = ehca_sense_attributes(shca);
if (ret < 0) {
ehca_gen_err("Cannot sense eHCA attributes.");
goto probe1;
}
ret = ehca_init_device(shca);
if (ret) {
ehca_gen_err("Cannot init ehca device struct");
goto probe1;
}
eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
/* create event queues */
ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create EQ.");
goto probe1;
}
ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create NEQ.");
goto probe3;
}
/* create internal protection domain */
ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
if (IS_ERR(ibpd)) {
ehca_err(&shca->ib_device, "Cannot create internal PD.");
ret = PTR_ERR(ibpd);
goto probe4;
}
shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
shca->pd->ib_pd.device = &shca->ib_device;
/* create internal max MR */
ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
ret);
goto probe5;
}
ret = ib_register_device(&shca->ib_device, NULL);
if (ret) {
ehca_err(&shca->ib_device,
"ib_register_device() failed ret=%i", ret);
goto probe6;
}
/* create AQP1 for port 1 */
if (ehca_open_aqp1 == 1) {
shca->sport[0].port_state = IB_PORT_DOWN;
ret = ehca_create_aqp1(shca, 1);
if (ret) {
ehca_err(&shca->ib_device,
"Cannot create AQP1 for port 1.");
goto probe7;
}
}
/* create AQP1 for port 2 */
if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
shca->sport[1].port_state = IB_PORT_DOWN;
ret = ehca_create_aqp1(shca, 2);
if (ret) {
ehca_err(&shca->ib_device,
"Cannot create AQP1 for port 2.");
goto probe8;
}
}
ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
if (ret) /* only complain; we can live without attributes */
ehca_err(&shca->ib_device,
"Cannot create device attributes ret=%d", ret);
spin_lock_irqsave(&shca_list_lock, flags);
list_add(&shca->shca_list, &shca_list);
spin_unlock_irqrestore(&shca_list_lock, flags);
return 0;
probe8:
ret = ehca_destroy_aqp1(&shca->sport[0]);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy AQP1 for port 1. ret=%i", ret);
probe7:
ib_unregister_device(&shca->ib_device);
probe6:
ret = ehca_dereg_internal_maxmr(shca);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy internal MR. ret=%x", ret);
probe5:
ret = ehca_dealloc_pd(&shca->pd->ib_pd);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy internal PD. ret=%x", ret);
probe4:
ret = ehca_destroy_eq(shca, &shca->neq);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy NEQ. ret=%x", ret);
probe3:
ret = ehca_destroy_eq(shca, &shca->eq);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy EQ. ret=%x", ret);
probe1:
ib_dealloc_device(&shca->ib_device);
return -EINVAL;
}
static int ehca_remove(struct platform_device *dev)
{
struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
unsigned long flags;
int ret;
sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
if (ehca_open_aqp1 == 1) {
int i;
for (i = 0; i < shca->num_ports; i++) {
ret = ehca_destroy_aqp1(&shca->sport[i]);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy AQP1 for port %x "
"ret=%i", ret, i);
}
}
ib_unregister_device(&shca->ib_device);
ret = ehca_dereg_internal_maxmr(shca);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy internal MR. ret=%i", ret);
ret = ehca_dealloc_pd(&shca->pd->ib_pd);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy internal PD. ret=%i", ret);
ret = ehca_destroy_eq(shca, &shca->eq);
if (ret)
ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
ret = ehca_destroy_eq(shca, &shca->neq);
if (ret)
ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
ib_dealloc_device(&shca->ib_device);
spin_lock_irqsave(&shca_list_lock, flags);
list_del(&shca->shca_list);
spin_unlock_irqrestore(&shca_list_lock, flags);
return ret;
}
static struct of_device_id ehca_device_table[] =
{
{
.name = "lhca",
.compatible = "IBM,lhca",
},
{},
};
MODULE_DEVICE_TABLE(of, ehca_device_table);
static struct platform_driver ehca_driver = {
.probe = ehca_probe,
.remove = ehca_remove,
.driver = {
.name = "ehca",
.owner = THIS_MODULE,
.groups = ehca_drv_attr_groups,
.of_match_table = ehca_device_table,
},
};
void ehca_poll_eqs(unsigned long data)
{
struct ehca_shca *shca;
spin_lock(&shca_list_lock);
list_for_each_entry(shca, &shca_list, shca_list) {
if (shca->eq.is_initialized) {
/* call deadman proc only if eq ptr does not change */
struct ehca_eq *eq = &shca->eq;
int max = 3;
volatile u64 q_ofs, q_ofs2;
unsigned long flags;
spin_lock_irqsave(&eq->spinlock, flags);
q_ofs = eq->ipz_queue.current_q_offset;
spin_unlock_irqrestore(&eq->spinlock, flags);
do {
spin_lock_irqsave(&eq->spinlock, flags);
q_ofs2 = eq->ipz_queue.current_q_offset;
spin_unlock_irqrestore(&eq->spinlock, flags);
max--;
} while (q_ofs == q_ofs2 && max > 0);
if (q_ofs == q_ofs2)
ehca_process_eq(shca, 0);
}
}
mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
spin_unlock(&shca_list_lock);
}
static int ehca_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
static unsigned long ehca_dmem_warn_time;
unsigned long flags;
switch (action) {
case MEM_CANCEL_OFFLINE:
case MEM_CANCEL_ONLINE:
case MEM_ONLINE:
case MEM_OFFLINE:
return NOTIFY_OK;
case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
/* only ok if no hca is attached to the lpar */
spin_lock_irqsave(&shca_list_lock, flags);
if (list_empty(&shca_list)) {
spin_unlock_irqrestore(&shca_list_lock, flags);
return NOTIFY_OK;
} else {
spin_unlock_irqrestore(&shca_list_lock, flags);
if (printk_timed_ratelimit(&ehca_dmem_warn_time,
30 * 1000))
ehca_gen_err("DMEM operations are not allowed"
"in conjunction with eHCA");
return NOTIFY_BAD;
}
}
return NOTIFY_OK;
}
static struct notifier_block ehca_mem_nb = {
.notifier_call = ehca_mem_notifier,
};
static int __init ehca_module_init(void)
{
int ret;
printk(KERN_INFO "eHCA Infiniband Device Driver "
"(Version " HCAD_VERSION ")\n");
ret = ehca_create_comp_pool();
if (ret) {
ehca_gen_err("Cannot create comp pool.");
return ret;
}
ret = ehca_create_slab_caches();
if (ret) {
ehca_gen_err("Cannot create SLAB caches");
ret = -ENOMEM;
goto module_init1;
}
ret = ehca_create_busmap();
if (ret) {
ehca_gen_err("Cannot create busmap.");
goto module_init2;
}
ret = ibmebus_register_driver(&ehca_driver);
if (ret) {
ehca_gen_err("Cannot register eHCA device driver");
ret = -EINVAL;
goto module_init3;
}
ret = register_memory_notifier(&ehca_mem_nb);
if (ret) {
ehca_gen_err("Failed registering memory add/remove notifier");
goto module_init4;
}
if (ehca_poll_all_eqs != 1) {
ehca_gen_err("WARNING!!!");
ehca_gen_err("It is possible to lose interrupts.");
} else {
init_timer(&poll_eqs_timer);
poll_eqs_timer.function = ehca_poll_eqs;
poll_eqs_timer.expires = jiffies + HZ;
add_timer(&poll_eqs_timer);
}
return 0;
module_init4:
ibmebus_unregister_driver(&ehca_driver);
module_init3:
ehca_destroy_busmap();
module_init2:
ehca_destroy_slab_caches();
module_init1:
ehca_destroy_comp_pool();
return ret;
};
static void __exit ehca_module_exit(void)
{
if (ehca_poll_all_eqs == 1)
del_timer_sync(&poll_eqs_timer);
ibmebus_unregister_driver(&ehca_driver);
unregister_memory_notifier(&ehca_mem_nb);
ehca_destroy_busmap();
ehca_destroy_slab_caches();
ehca_destroy_comp_pool();
idr_destroy(&ehca_cq_idr);
idr_destroy(&ehca_qp_idr);
};
module_init(ehca_module_init);
module_exit(ehca_module_exit);