From c1f67a88bf62fac0f4151c007b361199c2cd1988 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 27 May 2009 14:36:16 -0700 Subject: [PATCH] IB/mthca: Add module parameter for number of MTTs per segment The current MTT allocator uses kmalloc() to allocate a buffer for its buddy allocator, and thus is limited in the amount of MTT segments that it can control. As a result, the size of memory that can be registered is limited too. This patch uses a module parameter to control the number of MTT entries that each segment represents, allowing more memory to be registered with the same number of segments. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_main.c | 17 ++++++++++++++--- drivers/infiniband/hw/mthca/mthca_mr.c | 16 ++++++++-------- drivers/infiniband/hw/mthca/mthca_profile.c | 4 ++-- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 6d55f9d748f6..8c2ed994d540 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1059,7 +1059,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); if (mthca_is_memfree(dev)) dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), - MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; + dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size; else dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 252590116df5..9ef611f6dd36 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -159,6 +159,7 @@ struct mthca_limits { int reserved_eqs; int num_mpts; int num_mtt_segs; + int mtt_seg_size; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 1d83cf7caf38..13da9f1d24c0 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -125,6 +125,10 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); MODULE_PARM_DESC(fmr_reserved_mtts, "number of memory translation table segments reserved for FMR"); +static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + static char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -162,6 +166,7 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) int err; u8 status; + mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); @@ -460,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev, } /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ - mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, - dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, + dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, - MTHCA_MTT_SEG_SIZE, + mdev->limits.mtt_seg_size, mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1, 0); @@ -1315,6 +1320,12 @@ static void __init mthca_validate_profile(void) printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", hca_profile.fmr_reserved_mtts); } + + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n", + log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8)); + log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); + } } static int __init mthca_init(void) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 882e6b735915..d606edf10858 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -220,7 +220,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, mtt->buddy = buddy; mtt->order = 0; - for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1) ++mtt->order; mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); @@ -267,7 +267,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, while (list_len > 0) { mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtt->first_seg * dev->limits.mtt_seg_size + start_index * 8); mtt_entry[1] = 0; for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) @@ -326,7 +326,7 @@ static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, u64 __iomem *mtts; int i; - mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size + start_index * sizeof (u64); for (i = 0; i < list_len; ++i) mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), @@ -345,10 +345,10 @@ static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, /* For Arbel, all MTTs must fit in the same page. */ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); /* Require full segments */ - BUG_ON(s % MTHCA_MTT_SEG_SIZE); + BUG_ON(s % dev->limits.mtt_seg_size); mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + - s / MTHCA_MTT_SEG_SIZE, &dma_handle); + s / dev->limits.mtt_seg_size, &dma_handle); BUG_ON(!mtts); @@ -479,7 +479,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, if (mr->mtt) mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + mr->mtt->first_seg * dev->limits.mtt_seg_size); if (0) { mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); @@ -626,7 +626,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_table; } - mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, @@ -908,7 +908,7 @@ int mthca_init_mr_table(struct mthca_dev *dev) dev->mr_table.mtt_base); dev->mr_table.tavor_fmr.mtt_base = - ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE); + ioremap(addr, mtts * dev->limits.mtt_seg_size); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index d168c2540611..8edb28a9a0e7 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -94,7 +94,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; - profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size; profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; profile[MTHCA_RES_UARC].size = request->uarc_size; @@ -232,7 +232,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, dev->limits.num_mtt_segs = profile[i].num; dev->mr_table.mtt_base = profile[i].start; init_hca->mtt_base = profile[i].start; - init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7; + init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7; break; case MTHCA_RES_UAR: dev->limits.num_uars = profile[i].num;