IOMMU Fixes for Linux v5.3-rc1

Including:
 
 	- Revert an Intel VT-d patch that caused boot problems on some
 	  machines
 
 	- Fix AMD IOMMU interrupts with x2apic enabled
 
 	- Fix a potential crash when Intel VT-d domain allocation fails
 
 	- Fix crash in Intel VT-d driver when accessing a domain without
 	  a flush queue
 
 	- Formatting fix for new Intel VT-d debugfs code
 
 	- Fix for use-after-free bug in IOVA code
 
 	- Fix for a NULL-pointer dereference in Intel VT-d driver when
 	  PCI hotplug is used
 
 	- Compilation fix for one of the previous fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEr9jSbILcajRFYWYyK/BELZcBGuMFAl06xUwACgkQK/BELZcB
 GuMsZQ/6Ap+3USes8+Fru1IwGCpBegPlHtl64/WGDdd3JJ5bF3upuPSMulr1U6sa
 GHWAHa6ZKwXbEe3NzVqo5jGTK6E6TocrGaVmxN+3Ncu/KMcJyew3bmJMw8xvbpF/
 VA4vGH7DbEaXOHXHIdCPbE5zOetB/AxoEeUbd45K3ZCvypEz7bTd6E0w9bXmBlml
 o+z77fRedi4C4srrjsPHgm8JwZYz/QvIRZp+Jr7XhRB/NkbC4ny6jrg+gNWqiqD8
 PouSHRw8g2TKzIrLH7syodYCwY+e4ZzCAUVjnyFihq/+qUxK4D3AiFBQeY/nkmd6
 wPtfTiKgY0My9z4sPjFRRanPIw3Su77ki1q61+MAZwI6ZGDRla7EP+NQw9h6fFp5
 Jdk4fStZA//Nr8zESpVnlYsbS31prJYPKxfroksS6skDB49mmGkPq2jPvrLFJbgz
 +qqCfpoICTSuUKPduCDVGY1oKkNP5n+pZ9cgPxO3LWwlHhtLAwlo4PjNNLfBLeSC
 bP7yswgeq/CYywaIAU6tDFfA0AoEnj4q1/v8K93T8zEWu23e3XJQp864DncJPZHc
 cxcsX3Q2DdASses1GKn/PRRBMdHMv+yRa8evKPwc7q7K2xuTXuHCXL62kCewbbgD
 ph5MSdv0vSHUBSdRnmvk0LL1FBRc3DkQMCnb/z0xxlM22sQs2v0=
 =F+hM
 -----END PGP SIGNATURE-----

Merge tag 'iommu-fixes-v5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU fixes from Joerg Roedel:

 - revert an Intel VT-d patch that caused boot problems on some machines

 - fix AMD IOMMU interrupts with x2apic enabled

 - fix a potential crash when Intel VT-d domain allocation fails

 - fix crash in Intel VT-d driver when accessing a domain without a
   flush queue

 - formatting fix for new Intel VT-d debugfs code

 - fix for use-after-free bug in IOVA code

 - fix for a NULL-pointer dereference in Intel VT-d driver when PCI
   hotplug is used

 - compilation fix for one of the previous fixes

* tag 'iommu-fixes-v5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
  iommu/amd: Add support for X2APIC IOMMU interrupts
  iommu/iova: Fix compilation error with !CONFIG_IOMMU_IOVA
  iommu/vt-d: Print pasid table entries MSB to LSB in debugfs
  iommu/iova: Remove stale cached32_node
  iommu/vt-d: Check if domain->pgd was allocated
  iommu/vt-d: Don't queue_iova() if there is no flush queue
  iommu/vt-d: Avoid duplicated pci dma alias consideration
  Revert "iommu/vt-d: Consolidate domain_init() to avoid duplication"
This commit is contained in:
Linus Torvalds 2019-07-26 10:04:19 -07:00
commit b381c016c5
6 changed files with 220 additions and 101 deletions

View File

@ -23,6 +23,8 @@
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/apic.h>
#include <asm/msidef.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
@ -1920,6 +1922,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
return 0;
}
#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2)
#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8)
#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
/**
* Setup the IntCapXT registers with interrupt routing information
* based on the PCI MSI capability block registers, accessed via
* MMIO MSI address low/hi and MSI data registers.
*/
static void iommu_update_intcapxt(struct amd_iommu *iommu)
{
u64 val;
u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
if (x2apic_enabled())
dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
val = XT_INT_VEC(data & 0xFF) |
XT_INT_DEST_MODE(dm) |
XT_INT_DEST_LO(dest) |
XT_INT_DEST_HI(dest);
/**
* Current IOMMU implemtation uses the same IRQ for all
* 3 IOMMU interrupts.
*/
writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
}
static void _irq_notifier_notify(struct irq_affinity_notify *notify,
const cpumask_t *mask)
{
struct amd_iommu *iommu;
for_each_iommu(iommu) {
if (iommu->dev->irq == notify->irq) {
iommu_update_intcapxt(iommu);
break;
}
}
}
static void _irq_notifier_release(struct kref *ref)
{
}
static int iommu_init_intcapxt(struct amd_iommu *iommu)
{
int ret;
struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
/**
* IntCapXT requires XTSup=1, which can be inferred
* amd_iommu_xt_mode.
*/
if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
return 0;
/**
* Also, we need to setup notifier to update the IntCapXT registers
* whenever the irq affinity is changed from user-space.
*/
notify->irq = iommu->dev->irq;
notify->notify = _irq_notifier_notify,
notify->release = _irq_notifier_release,
ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
if (ret) {
pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
iommu->devid, iommu->dev->irq);
return ret;
}
iommu_update_intcapxt(iommu);
iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
return ret;
}
static int iommu_init_msi(struct amd_iommu *iommu)
{
int ret;
@ -1936,6 +2022,10 @@ static int iommu_init_msi(struct amd_iommu *iommu)
return ret;
enable_faults:
ret = iommu_init_intcapxt(iommu);
if (ret)
return ret;
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)

View File

@ -60,6 +60,12 @@
#define MMIO_PPR_LOG_OFFSET 0x0038
#define MMIO_GA_LOG_BASE_OFFSET 0x00e0
#define MMIO_GA_LOG_TAIL_OFFSET 0x00e8
#define MMIO_MSI_ADDR_LO_OFFSET 0x015C
#define MMIO_MSI_ADDR_HI_OFFSET 0x0160
#define MMIO_MSI_DATA_OFFSET 0x0164
#define MMIO_INTCAPXT_EVT_OFFSET 0x0170
#define MMIO_INTCAPXT_PPR_OFFSET 0x0178
#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
@ -150,6 +156,7 @@
#define CONTROL_GALOG_EN 0x1CULL
#define CONTROL_GAINT_EN 0x1DULL
#define CONTROL_XT_EN 0x32ULL
#define CONTROL_INTCAPXT_EN 0x33ULL
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
@ -592,6 +599,8 @@ struct amd_iommu {
/* DebugFS Info */
struct dentry *debugfs;
#endif
/* IRQ notifier for IntCapXT interrupt */
struct irq_affinity_notify intcapxt_notify;
};
static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)

View File

@ -162,9 +162,9 @@ static inline void print_tbl_walk(struct seq_file *m)
(u64)0, (u64)0, (u64)0);
else
seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n",
tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0],
tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[2],
tbl_wlk->pasid_tbl_entry->val[1],
tbl_wlk->pasid_tbl_entry->val[2]);
tbl_wlk->pasid_tbl_entry->val[0]);
}
static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry,

View File

@ -339,8 +339,6 @@ static void domain_exit(struct dmar_domain *domain);
static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
static void __dmar_remove_one_dev_info(struct device_domain_info *info);
static void domain_context_clear(struct intel_iommu *iommu,
struct device *dev);
static int domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu);
static bool device_is_rmrr_locked(struct device *dev);
@ -1833,9 +1831,65 @@ static inline int guestwidth_to_adjustwidth(int gaw)
return agaw;
}
static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
int guest_width)
{
int adjust_width, agaw;
unsigned long sagaw;
int err;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
err = init_iova_flush_queue(&domain->iovad,
iommu_flush_iova, iova_entry_free);
if (err)
return err;
domain_reserve_special_ranges(domain);
/* calculate AGAW */
if (guest_width > cap_mgaw(iommu->cap))
guest_width = cap_mgaw(iommu->cap);
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
agaw = width_to_agaw(adjust_width);
sagaw = cap_sagaw(iommu->cap);
if (!test_bit(agaw, &sagaw)) {
/* hardware doesn't support it, choose a bigger one */
pr_debug("Hardware doesn't support agaw %d\n", agaw);
agaw = find_next_bit(&sagaw, 5, agaw);
if (agaw >= 5)
return -ENODEV;
}
domain->agaw = agaw;
if (ecap_coherent(iommu->ecap))
domain->iommu_coherency = 1;
else
domain->iommu_coherency = 0;
if (ecap_sc_support(iommu->ecap))
domain->iommu_snooping = 1;
else
domain->iommu_snooping = 0;
if (intel_iommu_superpage)
domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
else
domain->iommu_superpage = 0;
domain->nid = iommu->node;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
return 0;
}
static void domain_exit(struct dmar_domain *domain)
{
struct page *freelist;
/* Remove associated devices and clear attached or cached domains */
domain_remove_dev_info(domain);
@ -1843,9 +1897,12 @@ static void domain_exit(struct dmar_domain *domain)
/* destroy iovas */
put_iova_domain(&domain->iovad);
freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
if (domain->pgd) {
struct page *freelist;
dma_free_pagelist(freelist);
freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
dma_free_pagelist(freelist);
}
free_domain_mem(domain);
}
@ -2048,26 +2105,9 @@ out_unlock:
return ret;
}
struct domain_context_mapping_data {
struct dmar_domain *domain;
struct intel_iommu *iommu;
struct pasid_table *table;
};
static int domain_context_mapping_cb(struct pci_dev *pdev,
u16 alias, void *opaque)
{
struct domain_context_mapping_data *data = opaque;
return domain_context_mapping_one(data->domain, data->iommu,
data->table, PCI_BUS_NUM(alias),
alias & 0xff);
}
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev)
{
struct domain_context_mapping_data data;
struct pasid_table *table;
struct intel_iommu *iommu;
u8 bus, devfn;
@ -2077,17 +2117,7 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
return -ENODEV;
table = intel_pasid_get_table(dev);
if (!dev_is_pci(dev))
return domain_context_mapping_one(domain, iommu, table,
bus, devfn);
data.domain = domain;
data.iommu = iommu;
data.table = table;
return pci_for_each_dma_alias(to_pci_dev(dev),
&domain_context_mapping_cb, &data);
return domain_context_mapping_one(domain, iommu, table, bus, devfn);
}
static int domain_context_mapped_cb(struct pci_dev *pdev,
@ -2513,31 +2543,6 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
return 0;
}
static int domain_init(struct dmar_domain *domain, int guest_width)
{
int adjust_width;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->iommu_superpage = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
{
struct device_domain_info *info;
@ -2575,19 +2580,11 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
domain = alloc_domain(0);
if (!domain)
return NULL;
if (domain_init(domain, gaw)) {
if (domain_init(domain, iommu, gaw)) {
domain_exit(domain);
return NULL;
}
if (init_iova_flush_queue(&domain->iovad,
iommu_flush_iova,
iova_entry_free)) {
pr_warn("iova flush queue initialization failed\n");
intel_iommu_strict = 1;
}
out:
return domain;
}
@ -2692,6 +2689,8 @@ static int domain_prepare_identity_map(struct device *dev,
return iommu_domain_identity_map(domain, start, end);
}
static int md_domain_init(struct dmar_domain *domain, int guest_width);
static int __init si_domain_init(int hw)
{
struct dmar_rmrr_unit *rmrr;
@ -2702,7 +2701,7 @@ static int __init si_domain_init(int hw)
if (!si_domain)
return -EFAULT;
if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
domain_exit(si_domain);
return -EFAULT;
}
@ -3564,7 +3563,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
freelist = domain_unmap(domain, start_pfn, last_pfn);
if (intel_iommu_strict || (pdev && pdev->untrusted)) {
if (intel_iommu_strict || (pdev && pdev->untrusted) ||
!has_iova_flush_queue(&domain->iovad)) {
iommu_flush_iotlb_psi(iommu, domain, start_pfn,
nrpages, !freelist, 0);
/* free iova */
@ -4758,28 +4758,6 @@ out_free_dmar:
return ret;
}
static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
{
struct intel_iommu *iommu = opaque;
domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
return 0;
}
/*
* NB - intel-iommu lacks any sort of reference counting for the users of
* dependent devices. If multiple endpoints have intersecting dependent
* devices, unbinding the driver from any one of them will possibly leave
* the others unable to operate.
*/
static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
{
if (!iommu || !dev || !dev_is_pci(dev))
return;
pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
}
static void __dmar_remove_one_dev_info(struct device_domain_info *info)
{
struct dmar_domain *domain;
@ -4800,7 +4778,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
PASID_RID2PASID);
iommu_disable_dev_iotlb(info);
domain_context_clear(iommu, info->dev);
domain_context_clear_one(iommu, info->bus, info->devfn);
intel_pasid_free_table(info->dev);
}
@ -4829,6 +4807,31 @@ static void dmar_remove_one_dev_info(struct device *dev)
spin_unlock_irqrestore(&device_domain_lock, flags);
}
static int md_domain_init(struct dmar_domain *domain, int guest_width)
{
int adjust_width;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->iommu_superpage = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
@ -4843,7 +4846,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
pr_err("Can't allocate dmar_domain\n");
return NULL;
}
if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
pr_err("Domain initialization failed\n");
domain_exit(dmar_domain);
return NULL;

View File

@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
}
EXPORT_SYMBOL_GPL(init_iova_domain);
bool has_iova_flush_queue(struct iova_domain *iovad)
{
return !!iovad->fq;
}
static void free_iova_flush_queue(struct iova_domain *iovad)
{
if (!iovad->fq)
if (!has_iova_flush_queue(iovad))
return;
if (timer_pending(&iovad->fq_timer))
@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
{
struct iova_fq __percpu *queue;
int cpu;
atomic64_set(&iovad->fq_flush_start_cnt, 0);
atomic64_set(&iovad->fq_flush_finish_cnt, 0);
iovad->fq = alloc_percpu(struct iova_fq);
if (!iovad->fq)
queue = alloc_percpu(struct iova_fq);
if (!queue)
return -ENOMEM;
iovad->flush_cb = flush_cb;
@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad,
for_each_possible_cpu(cpu) {
struct iova_fq *fq;
fq = per_cpu_ptr(iovad->fq, cpu);
fq = per_cpu_ptr(queue, cpu);
fq->head = 0;
fq->tail = 0;
spin_lock_init(&fq->lock);
}
smp_wmb();
iovad->fq = queue;
timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
atomic_set(&iovad->fq_timer_on, 0);
@ -127,8 +137,9 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
struct iova *cached_iova;
cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
if (free->pfn_hi < iovad->dma_32bit_pfn &&
free->pfn_lo >= cached_iova->pfn_lo) {
if (free == cached_iova ||
(free->pfn_hi < iovad->dma_32bit_pfn &&
free->pfn_lo >= cached_iova->pfn_lo)) {
iovad->cached32_node = rb_next(&free->node);
iovad->max32_alloc_size = iovad->dma_32bit_pfn;
}

View File

@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn);
bool has_iova_flush_queue(struct iova_domain *iovad);
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad,
{
}
static inline bool has_iova_flush_queue(struct iova_domain *iovad)
{
return false;
}
static inline int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb,
iova_entry_dtor entry_dtor)