mirror of
https://github.com/xemu-project/xemu.git
synced 2024-11-24 12:09:58 +00:00
virtio, pci: fixes, features
virtio is using region caches for performance iommu support for IOTLBs misc fixes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJYp1SeAAoJECgfDbjSjVRpLa0H/RXYSz7DJ+5y39vzmeHSSTmR XrLZ7p4eJrhxxiIJ6RBzzKyAUHSFjfj8ROy1G89+0HhUomrvP9mWvuQOX16XWqCQ BX+T6CiXzORaTpuHaCNPH0hE1rL4itbUKjvgFo3S4BoEDD/6t3WnSfTRw21zE28v CNzlMZLm+ta3lGBaMvJR0+evIiu6FuRmyt9WNQHY6NEZz41TAMmZyfDWqnMUhVCl xBYgvwnmydP1jVhvNqOBnwb2LFEqXGT6vZKuViTZo/Rs0l8LU0P3B/tiUCT4JV3M 8qMc2m7AeFgysYFNy7qCcjzhpfr7rZsv3Zo2e+PKL+v9wbcFNCwOwtY/9tWdn3U= =lL5C -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging virtio, pci: fixes, features virtio is using region caches for performance iommu support for IOTLBs misc fixes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Fri 17 Feb 2017 19:53:02 GMT # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (23 commits) intel_iommu: vtd_slpt_level_shift check level intel_iommu: convert dbg macros to trace for trans intel_iommu: convert dbg macros to traces for inv intel_iommu: renaming gpa to iova where proper intel_iommu: simplify irq region translation intel_iommu: add "caching-mode" option vfio: allow to notify unmap for very large region vfio: introduce vfio_get_vaddr() vfio: trace map/unmap for notify as well pcie: simplify pcie_add_capability() virtio: Fix no interrupt when not creating msi controller virtio: use VRingMemoryRegionCaches for avail and used rings virtio: check for vring setup in virtio_queue_update_used_idx virtio: use VRingMemoryRegionCaches for descriptor ring virtio: add MemoryListener to cache ring translations virtio: use MemoryRegionCache to access descriptors exec: make address_space_cache_destroy idempotent virtio: use address_space_map/unmap to access descriptors virtio: add virtio_*_phys_cached memory: make memory_listener_unregister idempotent ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
d514cfd763
124
docs/nvdimm.txt
Normal file
124
docs/nvdimm.txt
Normal file
@ -0,0 +1,124 @@
|
||||
QEMU Virtual NVDIMM
|
||||
===================
|
||||
|
||||
This document explains the usage of virtual NVDIMM (vNVDIMM) feature
|
||||
which is available since QEMU v2.6.0.
|
||||
|
||||
The current QEMU only implements the persistent memory mode of vNVDIMM
|
||||
device and not the block window mode.
|
||||
|
||||
Basic Usage
|
||||
-----------
|
||||
|
||||
The storage of a vNVDIMM device in QEMU is provided by the memory
|
||||
backend (i.e. memory-backend-file and memory-backend-ram). A simple
|
||||
way to create a vNVDIMM device at startup time is done via the
|
||||
following command line options:
|
||||
|
||||
-machine pc,nvdimm
|
||||
-m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
|
||||
-object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE
|
||||
-device nvdimm,id=nvdimm1,memdev=mem1
|
||||
|
||||
Where,
|
||||
|
||||
- the "nvdimm" machine option enables vNVDIMM feature.
|
||||
|
||||
- "slots=$N" should be equal to or larger than the total amount of
|
||||
normal RAM devices and vNVDIMM devices, e.g. $N should be >= 2 here.
|
||||
|
||||
- "maxmem=$MAX_SIZE" should be equal to or larger than the total size
|
||||
of normal RAM devices and vNVDIMM devices, e.g. $MAX_SIZE should be
|
||||
>= $RAM_SIZE + $NVDIMM_SIZE here.
|
||||
|
||||
- "object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE"
|
||||
creates a backend storage of size $NVDIMM_SIZE on a file $PATH. All
|
||||
accesses to the virtual NVDIMM device go to the file $PATH.
|
||||
|
||||
"share=on/off" controls the visibility of guest writes. If
|
||||
"share=on", then guest writes will be applied to the backend
|
||||
file. If another guest uses the same backend file with option
|
||||
"share=on", then above writes will be visible to it as well. If
|
||||
"share=off", then guest writes won't be applied to the backend
|
||||
file and thus will be invisible to other guests.
|
||||
|
||||
- "device nvdimm,id=nvdimm1,memdev=mem1" creates a virtual NVDIMM
|
||||
device whose storage is provided by above memory backend device.
|
||||
|
||||
Multiple vNVDIMM devices can be created if multiple pairs of "-object"
|
||||
and "-device" are provided.
|
||||
|
||||
For above command line options, if the guest OS has the proper NVDIMM
|
||||
driver, it should be able to detect a NVDIMM device which is in the
|
||||
persistent memory mode and whose size is $NVDIMM_SIZE.
|
||||
|
||||
Note:
|
||||
|
||||
1. Prior to QEMU v2.8.0, if memory-backend-file is used and the actual
|
||||
backend file size is not equal to the size given by "size" option,
|
||||
QEMU will truncate the backend file by ftruncate(2), which will
|
||||
corrupt the existing data in the backend file, especially for the
|
||||
shrink case.
|
||||
|
||||
QEMU v2.8.0 and later check the backend file size and the "size"
|
||||
option. If they do not match, QEMU will report errors and abort in
|
||||
order to avoid the data corruption.
|
||||
|
||||
2. QEMU v2.6.0 only puts a basic alignment requirement on the "size"
|
||||
option of memory-backend-file, e.g. 4KB alignment on x86. However,
|
||||
QEMU v.2.7.0 puts an additional alignment requirement, which may
|
||||
require a larger value than the basic one, e.g. 2MB on x86. This
|
||||
change breaks the usage of memory-backend-file that only satisfies
|
||||
the basic alignment.
|
||||
|
||||
QEMU v2.8.0 and later remove the additional alignment on non-s390x
|
||||
architectures, so the broken memory-backend-file can work again.
|
||||
|
||||
Label
|
||||
-----
|
||||
|
||||
QEMU v2.7.0 and later implement the label support for vNVDIMM devices.
|
||||
To enable label on vNVDIMM devices, users can simply add
|
||||
"label-size=$SZ" option to "-device nvdimm", e.g.
|
||||
|
||||
-device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K
|
||||
|
||||
Note:
|
||||
|
||||
1. The minimal label size is 128KB.
|
||||
|
||||
2. QEMU v2.7.0 and later store labels at the end of backend storage.
|
||||
If a memory backend file, which was previously used as the backend
|
||||
of a vNVDIMM device without labels, is now used for a vNVDIMM
|
||||
device with label, the data in the label area at the end of file
|
||||
will be inaccessible to the guest. If any useful data (e.g. the
|
||||
meta-data of the file system) was stored there, the latter usage
|
||||
may result guest data corruption (e.g. breakage of guest file
|
||||
system).
|
||||
|
||||
Hotplug
|
||||
-------
|
||||
|
||||
QEMU v2.8.0 and later implement the hotplug support for vNVDIMM
|
||||
devices. Similarly to the RAM hotplug, the vNVDIMM hotplug is
|
||||
accomplished by two monitor commands "object_add" and "device_add".
|
||||
|
||||
For example, the following commands add another 4GB vNVDIMM device to
|
||||
the guest:
|
||||
|
||||
(qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=new_nvdimm.img,size=4G
|
||||
(qemu) device_add nvdimm,id=nvdimm2,memdev=mem2
|
||||
|
||||
Note:
|
||||
|
||||
1. Each hotplugged vNVDIMM device consumes one memory slot. Users
|
||||
should always ensure the memory option "-m ...,slots=N" specifies
|
||||
enough number of slots, i.e.
|
||||
N >= number of RAM devices +
|
||||
number of statically plugged vNVDIMM devices +
|
||||
number of hotplugged vNVDIMM devices
|
||||
|
||||
2. The similar is required for the memory option "-m ...,maxmem=M", i.e.
|
||||
M >= size of RAM devices +
|
||||
size of statically plugged vNVDIMM devices +
|
||||
size of hotplugged vNVDIMM devices
|
1
exec.c
1
exec.c
@ -3166,6 +3166,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache)
|
||||
xen_invalidate_map_cache_entry(cache->ptr);
|
||||
}
|
||||
memory_region_unref(cache->mr);
|
||||
cache->mr = NULL;
|
||||
}
|
||||
|
||||
/* Called from RCU critical section. This function has the same
|
||||
|
@ -147,7 +147,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
|
||||
g_free(s);
|
||||
}
|
||||
|
||||
static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
|
||||
static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
|
||||
VirtQueue *vq)
|
||||
{
|
||||
VirtIOBlock *s = (VirtIOBlock *)vdev;
|
||||
@ -155,7 +155,7 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
|
||||
assert(s->dataplane);
|
||||
assert(s->dataplane_started);
|
||||
|
||||
virtio_blk_handle_vq(s, vq);
|
||||
return virtio_blk_handle_vq(s, vq);
|
||||
}
|
||||
|
||||
/* Context: QEMU global mutex held */
|
||||
|
@ -581,10 +581,11 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||
bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||
{
|
||||
VirtIOBlockReq *req;
|
||||
MultiReqBuffer mrb = {};
|
||||
bool progress = false;
|
||||
|
||||
blk_io_plug(s->blk);
|
||||
|
||||
@ -592,6 +593,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
|
||||
while ((req = virtio_blk_get_request(s, vq))) {
|
||||
progress = true;
|
||||
if (virtio_blk_handle_request(req, &mrb)) {
|
||||
virtqueue_detach_element(req->vq, &req->elem, 0);
|
||||
virtio_blk_free_request(req);
|
||||
@ -607,6 +609,12 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||
}
|
||||
|
||||
blk_io_unplug(s->blk);
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void virtio_blk_handle_output_do(VirtIOBlock *s, VirtQueue *vq)
|
||||
{
|
||||
virtio_blk_handle_vq(s, vq);
|
||||
}
|
||||
|
||||
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
||||
@ -622,7 +630,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
||||
return;
|
||||
}
|
||||
}
|
||||
virtio_blk_handle_vq(s, vq);
|
||||
virtio_blk_handle_output_do(s, vq);
|
||||
}
|
||||
|
||||
static void virtio_blk_dma_restart_bh(void *opaque)
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "sysemu/kvm.h"
|
||||
#include "hw/i386/apic_internal.h"
|
||||
#include "kvm_i386.h"
|
||||
#include "trace.h"
|
||||
|
||||
/*#define DEBUG_INTEL_IOMMU*/
|
||||
#ifdef DEBUG_INTEL_IOMMU
|
||||
@ -167,6 +168,7 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
|
||||
/* The shift of an addr for a certain level of paging structure */
|
||||
static inline uint32_t vtd_slpt_level_shift(uint32_t level)
|
||||
{
|
||||
assert(level != 0);
|
||||
return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
|
||||
}
|
||||
|
||||
@ -259,11 +261,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
|
||||
uint64_t *key = g_malloc(sizeof(*key));
|
||||
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
|
||||
|
||||
VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
|
||||
" slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
|
||||
domain_id);
|
||||
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
|
||||
if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
|
||||
VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset");
|
||||
trace_vtd_iotlb_reset("iotlb exceeds size limit");
|
||||
vtd_reset_iotlb(s);
|
||||
}
|
||||
|
||||
@ -474,22 +474,19 @@ static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
|
||||
/* Set the IWC field and try to generate an invalidation completion interrupt */
|
||||
static void vtd_generate_completion_event(IntelIOMMUState *s)
|
||||
{
|
||||
VTD_DPRINTF(INV, "completes an invalidation wait command with "
|
||||
"Interrupt Flag");
|
||||
if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
|
||||
VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
|
||||
"serviced by software, "
|
||||
"new invalidation event is not generated");
|
||||
trace_vtd_inv_desc_wait_irq("One pending, skip current");
|
||||
return;
|
||||
}
|
||||
vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
|
||||
vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
|
||||
if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
|
||||
VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
|
||||
"event is not generated");
|
||||
trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
|
||||
"new event not generated");
|
||||
return;
|
||||
} else {
|
||||
/* Generate the interrupt event */
|
||||
trace_vtd_inv_desc_wait_irq("Generating complete event");
|
||||
vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
|
||||
vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
|
||||
}
|
||||
@ -507,8 +504,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
|
||||
|
||||
addr = s->root + index * sizeof(*re);
|
||||
if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
|
||||
VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64
|
||||
" + %"PRIu8, s->root, index);
|
||||
trace_vtd_re_invalid(re->rsvd, re->val);
|
||||
re->val = 0;
|
||||
return -VTD_FR_ROOT_TABLE_INV;
|
||||
}
|
||||
@ -526,15 +522,10 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
|
||||
{
|
||||
dma_addr_t addr;
|
||||
|
||||
if (!vtd_root_entry_present(root)) {
|
||||
VTD_DPRINTF(GENERAL, "error: root-entry is not present");
|
||||
return -VTD_FR_ROOT_ENTRY_P;
|
||||
}
|
||||
/* we have checked that root entry is present */
|
||||
addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
|
||||
if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
|
||||
VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64
|
||||
" + %"PRIu8,
|
||||
(uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index);
|
||||
trace_vtd_re_invalid(root->rsvd, root->val);
|
||||
return -VTD_FR_CONTEXT_TABLE_INV;
|
||||
}
|
||||
ce->lo = le64_to_cpu(ce->lo);
|
||||
@ -575,12 +566,12 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
|
||||
return slpte;
|
||||
}
|
||||
|
||||
/* Given a gpa and the level of paging structure, return the offset of current
|
||||
* level.
|
||||
/* Given an iova and the level of paging structure, return the offset
|
||||
* of current level.
|
||||
*/
|
||||
static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level)
|
||||
static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
|
||||
{
|
||||
return (gpa >> vtd_slpt_level_shift(level)) &
|
||||
return (iova >> vtd_slpt_level_shift(level)) &
|
||||
((1ULL << VTD_SL_LEVEL_BITS) - 1);
|
||||
}
|
||||
|
||||
@ -628,12 +619,12 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
|
||||
}
|
||||
}
|
||||
|
||||
/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
|
||||
/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
|
||||
* of the translation, can be used for deciding the size of large page.
|
||||
*/
|
||||
static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
|
||||
uint64_t *slptep, uint32_t *slpte_level,
|
||||
bool *reads, bool *writes)
|
||||
static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
|
||||
uint64_t *slptep, uint32_t *slpte_level,
|
||||
bool *reads, bool *writes)
|
||||
{
|
||||
dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
|
||||
uint32_t level = vtd_get_level_from_context_entry(ce);
|
||||
@ -642,11 +633,11 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
|
||||
uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
|
||||
uint64_t access_right_check;
|
||||
|
||||
/* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG
|
||||
* and AW in context-entry.
|
||||
/* Check if @iova is above 2^X-1, where X is the minimum of MGAW
|
||||
* in CAP_REG and AW in context-entry.
|
||||
*/
|
||||
if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
|
||||
VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa);
|
||||
if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
|
||||
VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
|
||||
return -VTD_FR_ADDR_BEYOND_MGAW;
|
||||
}
|
||||
|
||||
@ -654,13 +645,13 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
|
||||
access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
|
||||
|
||||
while (true) {
|
||||
offset = vtd_gpa_level_offset(gpa, level);
|
||||
offset = vtd_iova_level_offset(iova, level);
|
||||
slpte = vtd_get_slpte(addr, offset);
|
||||
|
||||
if (slpte == (uint64_t)-1) {
|
||||
VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
|
||||
"entry at level %"PRIu32 " for gpa 0x%"PRIx64,
|
||||
level, gpa);
|
||||
"entry at level %"PRIu32 " for iova 0x%"PRIx64,
|
||||
level, iova);
|
||||
if (level == vtd_get_level_from_context_entry(ce)) {
|
||||
/* Invalid programming of context-entry */
|
||||
return -VTD_FR_CONTEXT_ENTRY_INV;
|
||||
@ -672,8 +663,8 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
|
||||
*writes = (*writes) && (slpte & VTD_SL_W);
|
||||
if (!(slpte & access_right_check)) {
|
||||
VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
|
||||
"gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
|
||||
(is_write ? "write" : "read"), gpa, slpte);
|
||||
"iova 0x%"PRIx64 " slpte 0x%"PRIx64,
|
||||
(is_write ? "write" : "read"), iova, slpte);
|
||||
return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
|
||||
}
|
||||
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
|
||||
@ -706,12 +697,11 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
|
||||
}
|
||||
|
||||
if (!vtd_root_entry_present(&re)) {
|
||||
VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present",
|
||||
bus_num);
|
||||
/* Not error - it's okay we don't have root entry. */
|
||||
trace_vtd_re_not_present(bus_num);
|
||||
return -VTD_FR_ROOT_ENTRY_P;
|
||||
} else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
|
||||
VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry "
|
||||
"hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val);
|
||||
trace_vtd_re_invalid(re.rsvd, re.val);
|
||||
return -VTD_FR_ROOT_ENTRY_RSVD;
|
||||
}
|
||||
|
||||
@ -721,22 +711,17 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
|
||||
}
|
||||
|
||||
if (!vtd_context_entry_present(ce)) {
|
||||
VTD_DPRINTF(GENERAL,
|
||||
"error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
|
||||
"is not present", devfn, bus_num);
|
||||
/* Not error - it's okay we don't have context entry. */
|
||||
trace_vtd_ce_not_present(bus_num, devfn);
|
||||
return -VTD_FR_CONTEXT_ENTRY_P;
|
||||
} else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
|
||||
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
|
||||
VTD_DPRINTF(GENERAL,
|
||||
"error: non-zero reserved field in context-entry "
|
||||
"hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo);
|
||||
trace_vtd_ce_invalid(ce->hi, ce->lo);
|
||||
return -VTD_FR_CONTEXT_ENTRY_RSVD;
|
||||
}
|
||||
/* Check if the programming of context-entry is valid */
|
||||
if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) {
|
||||
VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in "
|
||||
"context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
ce->hi, ce->lo);
|
||||
trace_vtd_ce_invalid(ce->hi, ce->lo);
|
||||
return -VTD_FR_CONTEXT_ENTRY_INV;
|
||||
} else {
|
||||
switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
|
||||
@ -745,9 +730,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
|
||||
case VTD_CONTEXT_TT_DEV_IOTLB:
|
||||
break;
|
||||
default:
|
||||
VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
|
||||
"context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
ce->hi, ce->lo);
|
||||
trace_vtd_ce_invalid(ce->hi, ce->lo);
|
||||
return -VTD_FR_CONTEXT_ENTRY_INV;
|
||||
}
|
||||
}
|
||||
@ -818,34 +801,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
||||
bool writes = true;
|
||||
VTDIOTLBEntry *iotlb_entry;
|
||||
|
||||
/* Check if the request is in interrupt address range */
|
||||
if (vtd_is_interrupt_addr(addr)) {
|
||||
if (is_write) {
|
||||
/* FIXME: since we don't know the length of the access here, we
|
||||
* treat Non-DWORD length write requests without PASID as
|
||||
* interrupt requests, too. Withoud interrupt remapping support,
|
||||
* we just use 1:1 mapping.
|
||||
*/
|
||||
VTD_DPRINTF(MMU, "write request to interrupt address "
|
||||
"gpa 0x%"PRIx64, addr);
|
||||
entry->iova = addr & VTD_PAGE_MASK_4K;
|
||||
entry->translated_addr = addr & VTD_PAGE_MASK_4K;
|
||||
entry->addr_mask = ~VTD_PAGE_MASK_4K;
|
||||
entry->perm = IOMMU_WO;
|
||||
return;
|
||||
} else {
|
||||
VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
|
||||
"gpa 0x%"PRIx64, addr);
|
||||
vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We have standalone memory region for interrupt addresses, we
|
||||
* should never receive translation requests in this region.
|
||||
*/
|
||||
assert(!vtd_is_interrupt_addr(addr));
|
||||
|
||||
/* Try to fetch slpte form IOTLB */
|
||||
iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
|
||||
if (iotlb_entry) {
|
||||
VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
|
||||
" slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr,
|
||||
iotlb_entry->slpte, iotlb_entry->domain_id);
|
||||
trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
|
||||
iotlb_entry->domain_id);
|
||||
slpte = iotlb_entry->slpte;
|
||||
reads = iotlb_entry->read_flags;
|
||||
writes = iotlb_entry->write_flags;
|
||||
@ -854,10 +820,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
||||
}
|
||||
/* Try to fetch context-entry from cache first */
|
||||
if (cc_entry->context_cache_gen == s->context_cache_gen) {
|
||||
VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
|
||||
"(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")",
|
||||
bus_num, devfn, cc_entry->context_entry.hi,
|
||||
cc_entry->context_entry.lo, cc_entry->context_cache_gen);
|
||||
trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
|
||||
cc_entry->context_entry.lo,
|
||||
cc_entry->context_cache_gen);
|
||||
ce = cc_entry->context_entry;
|
||||
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
|
||||
} else {
|
||||
@ -866,30 +831,26 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
|
||||
if (ret_fr) {
|
||||
ret_fr = -ret_fr;
|
||||
if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
|
||||
VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
|
||||
"requests through this context-entry "
|
||||
"(with FPD Set)");
|
||||
trace_vtd_fault_disabled();
|
||||
} else {
|
||||
vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* Update context-cache */
|
||||
VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d "
|
||||
"(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")",
|
||||
bus_num, devfn, ce.hi, ce.lo,
|
||||
cc_entry->context_cache_gen, s->context_cache_gen);
|
||||
trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
|
||||
cc_entry->context_cache_gen,
|
||||
s->context_cache_gen);
|
||||
cc_entry->context_entry = ce;
|
||||
cc_entry->context_cache_gen = s->context_cache_gen;
|
||||
}
|
||||
|
||||
ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level,
|
||||
&reads, &writes);
|
||||
ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
|
||||
&reads, &writes);
|
||||
if (ret_fr) {
|
||||
ret_fr = -ret_fr;
|
||||
if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
|
||||
VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
|
||||
"through this context-entry (with FPD Set)");
|
||||
trace_vtd_fault_disabled();
|
||||
} else {
|
||||
vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
|
||||
}
|
||||
@ -939,6 +900,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
|
||||
|
||||
static void vtd_context_global_invalidate(IntelIOMMUState *s)
|
||||
{
|
||||
trace_vtd_inv_desc_cc_global();
|
||||
s->context_cache_gen++;
|
||||
if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
|
||||
vtd_reset_context_cache(s);
|
||||
@ -978,9 +940,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
|
||||
uint16_t mask;
|
||||
VTDBus *vtd_bus;
|
||||
VTDAddressSpace *vtd_as;
|
||||
uint16_t devfn;
|
||||
uint8_t bus_n, devfn;
|
||||
uint16_t devfn_it;
|
||||
|
||||
trace_vtd_inv_desc_cc_devices(source_id, func_mask);
|
||||
|
||||
switch (func_mask & 3) {
|
||||
case 0:
|
||||
mask = 0; /* No bits in the SID field masked */
|
||||
@ -996,16 +960,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
|
||||
break;
|
||||
}
|
||||
mask = ~mask;
|
||||
VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
|
||||
" mask %"PRIu16, source_id, mask);
|
||||
vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
|
||||
|
||||
bus_n = VTD_SID_TO_BUS(source_id);
|
||||
vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
|
||||
if (vtd_bus) {
|
||||
devfn = VTD_SID_TO_DEVFN(source_id);
|
||||
for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
|
||||
vtd_as = vtd_bus->dev_as[devfn_it];
|
||||
if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
|
||||
VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
|
||||
devfn_it);
|
||||
trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
|
||||
VTD_PCI_FUNC(devfn_it));
|
||||
vtd_as->context_cache_entry.context_cache_gen = 0;
|
||||
}
|
||||
}
|
||||
@ -1046,6 +1010,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
|
||||
|
||||
static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
|
||||
{
|
||||
trace_vtd_iotlb_reset("global invalidation recved");
|
||||
vtd_reset_iotlb(s);
|
||||
}
|
||||
|
||||
@ -1318,9 +1283,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
|
||||
{
|
||||
if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
|
||||
(inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
|
||||
VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation "
|
||||
"Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
inv_desc->hi, inv_desc->lo);
|
||||
trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
|
||||
@ -1332,21 +1295,18 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
|
||||
|
||||
/* FIXME: need to be masked with HAW? */
|
||||
dma_addr_t status_addr = inv_desc->hi;
|
||||
VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64,
|
||||
status_data, status_addr);
|
||||
trace_vtd_inv_desc_wait_sw(status_addr, status_data);
|
||||
status_data = cpu_to_le32(status_data);
|
||||
if (dma_memory_write(&address_space_memory, status_addr, &status_data,
|
||||
sizeof(status_data))) {
|
||||
VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write");
|
||||
trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
} else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
|
||||
/* Interrupt flag */
|
||||
VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion");
|
||||
vtd_generate_completion_event(s);
|
||||
} else {
|
||||
VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: "
|
||||
"hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo);
|
||||
trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1355,30 +1315,29 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
|
||||
static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
|
||||
VTDInvDesc *inv_desc)
|
||||
{
|
||||
uint16_t sid, fmask;
|
||||
|
||||
if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
|
||||
VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache "
|
||||
"Invalidate Descriptor");
|
||||
trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
|
||||
case VTD_INV_DESC_CC_DOMAIN:
|
||||
VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
|
||||
(uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
|
||||
trace_vtd_inv_desc_cc_domain(
|
||||
(uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
|
||||
/* Fall through */
|
||||
case VTD_INV_DESC_CC_GLOBAL:
|
||||
VTD_DPRINTF(INV, "global invalidation");
|
||||
vtd_context_global_invalidate(s);
|
||||
break;
|
||||
|
||||
case VTD_INV_DESC_CC_DEVICE:
|
||||
vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo),
|
||||
VTD_INV_DESC_CC_FM(inv_desc->lo));
|
||||
sid = VTD_INV_DESC_CC_SID(inv_desc->lo);
|
||||
fmask = VTD_INV_DESC_CC_FM(inv_desc->lo);
|
||||
vtd_context_device_invalidate(s, sid, fmask);
|
||||
break;
|
||||
|
||||
default:
|
||||
VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache "
|
||||
"Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
inv_desc->hi, inv_desc->lo);
|
||||
trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1392,22 +1351,19 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
|
||||
|
||||
if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
|
||||
(inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
|
||||
VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB "
|
||||
"Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
inv_desc->hi, inv_desc->lo);
|
||||
trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
|
||||
case VTD_INV_DESC_IOTLB_GLOBAL:
|
||||
VTD_DPRINTF(INV, "global invalidation");
|
||||
trace_vtd_inv_desc_iotlb_global();
|
||||
vtd_iotlb_global_invalidate(s);
|
||||
break;
|
||||
|
||||
case VTD_INV_DESC_IOTLB_DOMAIN:
|
||||
domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
|
||||
VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
|
||||
domain_id);
|
||||
trace_vtd_inv_desc_iotlb_domain(domain_id);
|
||||
vtd_iotlb_domain_invalidate(s, domain_id);
|
||||
break;
|
||||
|
||||
@ -1415,20 +1371,16 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
|
||||
domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
|
||||
addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
|
||||
am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
|
||||
VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
|
||||
" addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
|
||||
trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
|
||||
if (am > VTD_MAMV) {
|
||||
VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
|
||||
"%"PRIu8, (uint8_t)VTD_MAMV);
|
||||
trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
vtd_iotlb_page_invalidate(s, domain_id, addr, am);
|
||||
break;
|
||||
|
||||
default:
|
||||
VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate "
|
||||
"Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
inv_desc->hi, inv_desc->lo);
|
||||
trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1527,33 +1479,28 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
|
||||
|
||||
switch (desc_type) {
|
||||
case VTD_INV_DESC_CC:
|
||||
VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64
|
||||
" lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
|
||||
trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo);
|
||||
if (!vtd_process_context_cache_desc(s, &inv_desc)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
case VTD_INV_DESC_IOTLB:
|
||||
VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64
|
||||
" lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
|
||||
trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo);
|
||||
if (!vtd_process_iotlb_desc(s, &inv_desc)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
case VTD_INV_DESC_WAIT:
|
||||
VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64
|
||||
" lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
|
||||
trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
|
||||
if (!vtd_process_wait_desc(s, &inv_desc)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
case VTD_INV_DESC_IEC:
|
||||
VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache "
|
||||
"Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
|
||||
inv_desc.hi, inv_desc.lo);
|
||||
trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo);
|
||||
if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
|
||||
return false;
|
||||
}
|
||||
@ -1568,9 +1515,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
|
||||
break;
|
||||
|
||||
default:
|
||||
VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
|
||||
"hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
|
||||
inv_desc.hi, inv_desc.lo, desc_type);
|
||||
trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
|
||||
return false;
|
||||
}
|
||||
s->iq_head++;
|
||||
@ -2049,7 +1994,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
|
||||
is_write, &ret);
|
||||
VTD_DPRINTF(MMU,
|
||||
"bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
|
||||
" gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
|
||||
" iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
|
||||
VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
|
||||
vtd_as->devfn, addr, ret.translated_addr);
|
||||
return ret;
|
||||
@ -2115,6 +2060,7 @@ static Property vtd_properties[] = {
|
||||
DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
|
||||
ON_OFF_AUTO_AUTO),
|
||||
DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
|
||||
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
@ -2496,6 +2442,10 @@ static void vtd_init(IntelIOMMUState *s)
|
||||
s->ecap |= VTD_ECAP_DT;
|
||||
}
|
||||
|
||||
if (s->caching_mode) {
|
||||
s->cap |= VTD_CAP_CM;
|
||||
}
|
||||
|
||||
vtd_reset_context_cache(s);
|
||||
vtd_reset_iotlb(s);
|
||||
|
||||
|
@ -202,6 +202,7 @@
|
||||
#define VTD_CAP_MAMV (VTD_MAMV << 48)
|
||||
#define VTD_CAP_PSI (1ULL << 39)
|
||||
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
|
||||
#define VTD_CAP_CM (1ULL << 7)
|
||||
|
||||
/* Supported Adjusted Guest Address Widths */
|
||||
#define VTD_CAP_SAGAW_SHIFT 8
|
||||
|
@ -3,6 +3,34 @@
|
||||
# hw/i386/x86-iommu.c
|
||||
x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
|
||||
|
||||
# hw/i386/intel_iommu.c
|
||||
vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
|
||||
vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
|
||||
vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
|
||||
vtd_inv_desc_cc_global(void) "context invalidate globally"
|
||||
vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8
|
||||
vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16
|
||||
vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
|
||||
vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
|
||||
vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
|
||||
vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
|
||||
vtd_inv_desc_wait_irq(const char *msg) "%s"
|
||||
vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
|
||||
vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
|
||||
vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64
|
||||
vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
|
||||
vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
|
||||
vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
|
||||
vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
|
||||
vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
|
||||
vtd_fault_disabled(void) "Fault processing disabled for context entry"
|
||||
|
||||
# hw/i386/amd_iommu.c
|
||||
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
|
||||
amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
|
||||
|
@ -1130,7 +1130,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
|
||||
static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
|
||||
size_t size)
|
||||
{
|
||||
VirtIONet *n = qemu_get_nic_opaque(nc);
|
||||
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
|
||||
@ -1233,6 +1234,17 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
|
||||
size_t size)
|
||||
{
|
||||
ssize_t r;
|
||||
|
||||
rcu_read_lock();
|
||||
r = virtio_net_receive_rcu(nc, buf, size);
|
||||
rcu_read_unlock();
|
||||
return r;
|
||||
}
|
||||
|
||||
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
|
||||
|
||||
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
|
||||
|
@ -610,7 +610,8 @@ bool pcie_cap_is_arifwd_enabled(const PCIDevice *dev)
|
||||
* uint16_t ext_cap_size
|
||||
*/
|
||||
|
||||
static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id,
|
||||
/* Passing a cap_id value > 0xffff will return 0 and put end of list in prev */
|
||||
static uint16_t pcie_find_capability_list(PCIDevice *dev, uint32_t cap_id,
|
||||
uint16_t *prev_p)
|
||||
{
|
||||
uint16_t prev = 0;
|
||||
@ -664,30 +665,24 @@ void pcie_add_capability(PCIDevice *dev,
|
||||
uint16_t cap_id, uint8_t cap_ver,
|
||||
uint16_t offset, uint16_t size)
|
||||
{
|
||||
uint32_t header;
|
||||
uint16_t next;
|
||||
|
||||
assert(offset >= PCI_CONFIG_SPACE_SIZE);
|
||||
assert(offset < offset + size);
|
||||
assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
|
||||
assert(size >= 8);
|
||||
assert(pci_is_express(dev));
|
||||
|
||||
if (offset == PCI_CONFIG_SPACE_SIZE) {
|
||||
header = pci_get_long(dev->config + offset);
|
||||
next = PCI_EXT_CAP_NEXT(header);
|
||||
} else {
|
||||
if (offset != PCI_CONFIG_SPACE_SIZE) {
|
||||
uint16_t prev;
|
||||
|
||||
/* 0 is reserved cap id. use internally to find the last capability
|
||||
in the linked list */
|
||||
next = pcie_find_capability_list(dev, 0, &prev);
|
||||
|
||||
/*
|
||||
* 0xffffffff is not a valid cap id (it's a 16 bit field). use
|
||||
* internally to find the last capability in the linked list.
|
||||
*/
|
||||
pcie_find_capability_list(dev, 0xffffffff, &prev);
|
||||
assert(prev >= PCI_CONFIG_SPACE_SIZE);
|
||||
assert(next == 0);
|
||||
pcie_ext_cap_set_next(dev, prev, offset);
|
||||
}
|
||||
pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
|
||||
pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, 0));
|
||||
|
||||
/* Make capability read-only by default */
|
||||
memset(dev->wmask + offset, 0, size);
|
||||
|
@ -49,35 +49,35 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
|
||||
static bool virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
|
||||
VirtQueue *vq)
|
||||
{
|
||||
VirtIOSCSI *s = (VirtIOSCSI *)vdev;
|
||||
|
||||
assert(s->ctx && s->dataplane_started);
|
||||
virtio_scsi_handle_cmd_vq(s, vq);
|
||||
return virtio_scsi_handle_cmd_vq(s, vq);
|
||||
}
|
||||
|
||||
static void virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
|
||||
static bool virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
|
||||
VirtQueue *vq)
|
||||
{
|
||||
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
|
||||
|
||||
assert(s->ctx && s->dataplane_started);
|
||||
virtio_scsi_handle_ctrl_vq(s, vq);
|
||||
return virtio_scsi_handle_ctrl_vq(s, vq);
|
||||
}
|
||||
|
||||
static void virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
|
||||
static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
|
||||
VirtQueue *vq)
|
||||
{
|
||||
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
|
||||
|
||||
assert(s->ctx && s->dataplane_started);
|
||||
virtio_scsi_handle_event_vq(s, vq);
|
||||
return virtio_scsi_handle_event_vq(s, vq);
|
||||
}
|
||||
|
||||
static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
|
||||
void (*fn)(VirtIODevice *vdev, VirtQueue *vq))
|
||||
VirtIOHandleAIOOutput fn)
|
||||
{
|
||||
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
|
||||
int rc;
|
||||
|
@ -436,13 +436,16 @@ static inline void virtio_scsi_release(VirtIOSCSI *s)
|
||||
}
|
||||
}
|
||||
|
||||
void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
{
|
||||
VirtIOSCSIReq *req;
|
||||
bool progress = false;
|
||||
|
||||
while ((req = virtio_scsi_pop_req(s, vq))) {
|
||||
progress = true;
|
||||
virtio_scsi_handle_ctrl_req(s, req);
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
||||
@ -591,10 +594,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||
scsi_req_unref(sreq);
|
||||
}
|
||||
|
||||
void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
{
|
||||
VirtIOSCSIReq *req, *next;
|
||||
int ret = 0;
|
||||
bool progress = false;
|
||||
|
||||
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
|
||||
|
||||
@ -602,6 +606,7 @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
|
||||
while ((req = virtio_scsi_pop_req(s, vq))) {
|
||||
progress = true;
|
||||
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
|
||||
if (!ret) {
|
||||
QTAILQ_INSERT_TAIL(&reqs, req, next);
|
||||
@ -624,6 +629,7 @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
|
||||
virtio_scsi_handle_cmd_req_submit(s, req);
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
|
||||
@ -752,11 +758,13 @@ out:
|
||||
virtio_scsi_release(s);
|
||||
}
|
||||
|
||||
void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||
{
|
||||
if (s->events_dropped) {
|
||||
virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
|
||||
|
@ -294,18 +294,55 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
|
||||
section->offset_within_address_space & (1ULL << 63);
|
||||
}
|
||||
|
||||
/* Called with rcu_read_lock held. */
|
||||
static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
bool *read_only)
|
||||
{
|
||||
MemoryRegion *mr;
|
||||
hwaddr xlat;
|
||||
hwaddr len = iotlb->addr_mask + 1;
|
||||
bool writable = iotlb->perm & IOMMU_WO;
|
||||
|
||||
/*
|
||||
* The IOMMU TLB entry we have just covers translation through
|
||||
* this IOMMU to its immediate target. We need to translate
|
||||
* it the rest of the way through to memory.
|
||||
*/
|
||||
mr = address_space_translate(&address_space_memory,
|
||||
iotlb->translated_addr,
|
||||
&xlat, &len, writable);
|
||||
if (!memory_region_is_ram(mr)) {
|
||||
error_report("iommu map to non memory area %"HWADDR_PRIx"",
|
||||
xlat);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translation truncates length to the IOMMU page size,
|
||||
* check that it did not truncate too much.
|
||||
*/
|
||||
if (len & iotlb->addr_mask) {
|
||||
error_report("iommu has granularity incompatible with target AS");
|
||||
return false;
|
||||
}
|
||||
|
||||
*vaddr = memory_region_get_ram_ptr(mr) + xlat;
|
||||
*read_only = !writable || mr->readonly;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
|
||||
VFIOContainer *container = giommu->container;
|
||||
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
MemoryRegion *mr;
|
||||
hwaddr xlat;
|
||||
hwaddr len = iotlb->addr_mask + 1;
|
||||
bool read_only;
|
||||
void *vaddr;
|
||||
int ret;
|
||||
|
||||
trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
|
||||
trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
|
||||
iova, iova + iotlb->addr_mask);
|
||||
|
||||
if (iotlb->target_as != &address_space_memory) {
|
||||
error_report("Wrong target AS \"%s\", only system memory is allowed",
|
||||
@ -313,34 +350,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The IOMMU TLB entry we have just covers translation through
|
||||
* this IOMMU to its immediate target. We need to translate
|
||||
* it the rest of the way through to memory.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
mr = address_space_translate(&address_space_memory,
|
||||
iotlb->translated_addr,
|
||||
&xlat, &len, iotlb->perm & IOMMU_WO);
|
||||
if (!memory_region_is_ram(mr)) {
|
||||
error_report("iommu map to non memory area %"HWADDR_PRIx"",
|
||||
xlat);
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* Translation truncates length to the IOMMU page size,
|
||||
* check that it did not truncate too much.
|
||||
*/
|
||||
if (len & iotlb->addr_mask) {
|
||||
error_report("iommu has granularity incompatible with target AS");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
|
||||
vaddr = memory_region_get_ram_ptr(mr) + xlat;
|
||||
if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* vaddr is only valid until rcu_read_unlock(). But after
|
||||
* vfio_dma_map has set up the mapping the pages will be
|
||||
* pinned by the kernel. This makes sure that the RAM backend
|
||||
* of vaddr will always be there, even if the memory object is
|
||||
* destroyed and its backing memory munmap-ed.
|
||||
*/
|
||||
ret = vfio_dma_map(container, iova,
|
||||
iotlb->addr_mask + 1, vaddr,
|
||||
!(iotlb->perm & IOMMU_WO) || mr->readonly);
|
||||
read_only);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx", %p) = %d (%m)",
|
||||
|
@ -84,7 +84,7 @@ vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
|
||||
# hw/vfio/common.c
|
||||
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
|
||||
vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64
|
||||
vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64
|
||||
vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "iommu %s @ %"PRIx64" - %"PRIx64
|
||||
vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add %"PRIx64" - %"PRIx64
|
||||
vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] %"PRIx64" - %"PRIx64
|
||||
vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] %"PRIx64" - %"PRIx64" [%p]"
|
||||
|
@ -60,6 +60,13 @@ typedef struct VRingUsed
|
||||
VRingUsedElem ring[0];
|
||||
} VRingUsed;
|
||||
|
||||
typedef struct VRingMemoryRegionCaches {
|
||||
struct rcu_head rcu;
|
||||
MemoryRegionCache desc;
|
||||
MemoryRegionCache avail;
|
||||
MemoryRegionCache used;
|
||||
} VRingMemoryRegionCaches;
|
||||
|
||||
typedef struct VRing
|
||||
{
|
||||
unsigned int num;
|
||||
@ -68,6 +75,7 @@ typedef struct VRing
|
||||
hwaddr desc;
|
||||
hwaddr avail;
|
||||
hwaddr used;
|
||||
VRingMemoryRegionCaches *caches;
|
||||
} VRing;
|
||||
|
||||
struct VirtQueue
|
||||
@ -97,13 +105,58 @@ struct VirtQueue
|
||||
|
||||
uint16_t vector;
|
||||
VirtIOHandleOutput handle_output;
|
||||
VirtIOHandleOutput handle_aio_output;
|
||||
VirtIOHandleAIOOutput handle_aio_output;
|
||||
VirtIODevice *vdev;
|
||||
EventNotifier guest_notifier;
|
||||
EventNotifier host_notifier;
|
||||
QLIST_ENTRY(VirtQueue) node;
|
||||
};
|
||||
|
||||
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
|
||||
{
|
||||
if (!caches) {
|
||||
return;
|
||||
}
|
||||
|
||||
address_space_cache_destroy(&caches->desc);
|
||||
address_space_cache_destroy(&caches->avail);
|
||||
address_space_cache_destroy(&caches->used);
|
||||
g_free(caches);
|
||||
}
|
||||
|
||||
static void virtio_init_region_cache(VirtIODevice *vdev, int n)
|
||||
{
|
||||
VirtQueue *vq = &vdev->vq[n];
|
||||
VRingMemoryRegionCaches *old = vq->vring.caches;
|
||||
VRingMemoryRegionCaches *new;
|
||||
hwaddr addr, size;
|
||||
int event_size;
|
||||
|
||||
event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
|
||||
|
||||
addr = vq->vring.desc;
|
||||
if (!addr) {
|
||||
return;
|
||||
}
|
||||
new = g_new0(VRingMemoryRegionCaches, 1);
|
||||
size = virtio_queue_get_desc_size(vdev, n);
|
||||
address_space_cache_init(&new->desc, vdev->dma_as,
|
||||
addr, size, false);
|
||||
|
||||
size = virtio_queue_get_used_size(vdev, n) + event_size;
|
||||
address_space_cache_init(&new->used, vdev->dma_as,
|
||||
vq->vring.used, size, true);
|
||||
|
||||
size = virtio_queue_get_avail_size(vdev, n) + event_size;
|
||||
address_space_cache_init(&new->avail, vdev->dma_as,
|
||||
vq->vring.avail, size, false);
|
||||
|
||||
atomic_rcu_set(&vq->vring.caches, new);
|
||||
if (old) {
|
||||
call_rcu(old, virtio_free_region_cache, rcu);
|
||||
}
|
||||
}
|
||||
|
||||
/* virt queue functions */
|
||||
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
|
||||
{
|
||||
@ -117,101 +170,125 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
|
||||
vring->used = vring_align(vring->avail +
|
||||
offsetof(VRingAvail, ring[vring->num]),
|
||||
vring->align);
|
||||
virtio_init_region_cache(vdev, n);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
|
||||
hwaddr desc_pa, int i)
|
||||
MemoryRegionCache *cache, int i)
|
||||
{
|
||||
address_space_read(vdev->dma_as, desc_pa + i * sizeof(VRingDesc),
|
||||
MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
|
||||
address_space_read_cached(cache, i * sizeof(VRingDesc),
|
||||
desc, sizeof(VRingDesc));
|
||||
virtio_tswap64s(vdev, &desc->addr);
|
||||
virtio_tswap32s(vdev, &desc->len);
|
||||
virtio_tswap16s(vdev, &desc->flags);
|
||||
virtio_tswap16s(vdev, &desc->next);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline uint16_t vring_avail_flags(VirtQueue *vq)
|
||||
{
|
||||
hwaddr pa;
|
||||
pa = vq->vring.avail + offsetof(VRingAvail, flags);
|
||||
return virtio_lduw_phys(vq->vdev, pa);
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
hwaddr pa = offsetof(VRingAvail, flags);
|
||||
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline uint16_t vring_avail_idx(VirtQueue *vq)
|
||||
{
|
||||
hwaddr pa;
|
||||
pa = vq->vring.avail + offsetof(VRingAvail, idx);
|
||||
vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
hwaddr pa = offsetof(VRingAvail, idx);
|
||||
vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
|
||||
return vq->shadow_avail_idx;
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
|
||||
{
|
||||
hwaddr pa;
|
||||
pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
|
||||
return virtio_lduw_phys(vq->vdev, pa);
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
hwaddr pa = offsetof(VRingAvail, ring[i]);
|
||||
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline uint16_t vring_get_used_event(VirtQueue *vq)
|
||||
{
|
||||
return vring_avail_ring(vq, vq->vring.num);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
|
||||
int i)
|
||||
{
|
||||
hwaddr pa;
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
hwaddr pa = offsetof(VRingUsed, ring[i]);
|
||||
virtio_tswap32s(vq->vdev, &uelem->id);
|
||||
virtio_tswap32s(vq->vdev, &uelem->len);
|
||||
pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
|
||||
address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
|
||||
(void *)uelem, sizeof(VRingUsedElem));
|
||||
address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
|
||||
address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static uint16_t vring_used_idx(VirtQueue *vq)
|
||||
{
|
||||
hwaddr pa;
|
||||
pa = vq->vring.used + offsetof(VRingUsed, idx);
|
||||
return virtio_lduw_phys(vq->vdev, pa);
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
hwaddr pa = offsetof(VRingUsed, idx);
|
||||
return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
|
||||
{
|
||||
hwaddr pa;
|
||||
pa = vq->vring.used + offsetof(VRingUsed, idx);
|
||||
virtio_stw_phys(vq->vdev, pa, val);
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
hwaddr pa = offsetof(VRingUsed, idx);
|
||||
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
|
||||
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
|
||||
vq->used_idx = val;
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
|
||||
{
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
VirtIODevice *vdev = vq->vdev;
|
||||
hwaddr pa;
|
||||
pa = vq->vring.used + offsetof(VRingUsed, flags);
|
||||
virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
|
||||
hwaddr pa = offsetof(VRingUsed, flags);
|
||||
uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
||||
|
||||
virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
|
||||
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
|
||||
{
|
||||
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
|
||||
VirtIODevice *vdev = vq->vdev;
|
||||
hwaddr pa;
|
||||
pa = vq->vring.used + offsetof(VRingUsed, flags);
|
||||
virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
|
||||
hwaddr pa = offsetof(VRingUsed, flags);
|
||||
uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
|
||||
|
||||
virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
|
||||
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
|
||||
{
|
||||
VRingMemoryRegionCaches *caches;
|
||||
hwaddr pa;
|
||||
if (!vq->notification) {
|
||||
return;
|
||||
}
|
||||
pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
|
||||
virtio_stw_phys(vq->vdev, pa, val);
|
||||
|
||||
caches = atomic_rcu_read(&vq->vring.caches);
|
||||
pa = offsetof(VRingUsed, ring[vq->vring.num]);
|
||||
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
|
||||
}
|
||||
|
||||
void virtio_queue_set_notification(VirtQueue *vq, int enable)
|
||||
{
|
||||
vq->notification = enable;
|
||||
|
||||
rcu_read_lock();
|
||||
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
||||
vring_set_avail_event(vq, vring_avail_idx(vq));
|
||||
} else if (enable) {
|
||||
@ -223,6 +300,7 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable)
|
||||
/* Expose avail event/used flags before caller checks the avail idx. */
|
||||
smp_mb();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
int virtio_queue_ready(VirtQueue *vq)
|
||||
@ -231,8 +309,9 @@ int virtio_queue_ready(VirtQueue *vq)
|
||||
}
|
||||
|
||||
/* Fetch avail_idx from VQ memory only when we really need to know if
|
||||
* guest has added some buffers. */
|
||||
int virtio_queue_empty(VirtQueue *vq)
|
||||
* guest has added some buffers.
|
||||
* Called within rcu_read_lock(). */
|
||||
static int virtio_queue_empty_rcu(VirtQueue *vq)
|
||||
{
|
||||
if (vq->shadow_avail_idx != vq->last_avail_idx) {
|
||||
return 0;
|
||||
@ -241,6 +320,20 @@ int virtio_queue_empty(VirtQueue *vq)
|
||||
return vring_avail_idx(vq) == vq->last_avail_idx;
|
||||
}
|
||||
|
||||
int virtio_queue_empty(VirtQueue *vq)
|
||||
{
|
||||
bool empty;
|
||||
|
||||
if (vq->shadow_avail_idx != vq->last_avail_idx) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
empty = vring_avail_idx(vq) == vq->last_avail_idx;
|
||||
rcu_read_unlock();
|
||||
return empty;
|
||||
}
|
||||
|
||||
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
unsigned int len)
|
||||
{
|
||||
@ -319,6 +412,7 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
unsigned int len, unsigned int idx)
|
||||
{
|
||||
@ -339,6 +433,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
vring_used_write(vq, &uelem, idx);
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
void virtqueue_flush(VirtQueue *vq, unsigned int count)
|
||||
{
|
||||
uint16_t old, new;
|
||||
@ -362,10 +457,13 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
|
||||
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
|
||||
unsigned int len)
|
||||
{
|
||||
rcu_read_lock();
|
||||
virtqueue_fill(vq, elem, len, 0);
|
||||
virtqueue_flush(vq, 1);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
|
||||
{
|
||||
uint16_t num_heads = vring_avail_idx(vq) - idx;
|
||||
@ -385,6 +483,7 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
|
||||
return num_heads;
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
|
||||
unsigned int *head)
|
||||
{
|
||||
@ -408,7 +507,7 @@ enum {
|
||||
};
|
||||
|
||||
static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
|
||||
hwaddr desc_pa, unsigned int max,
|
||||
MemoryRegionCache *desc_cache, unsigned int max,
|
||||
unsigned int *next)
|
||||
{
|
||||
/* If this descriptor says it doesn't chain, we're done. */
|
||||
@ -426,7 +525,7 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
|
||||
return VIRTQUEUE_READ_DESC_ERROR;
|
||||
}
|
||||
|
||||
vring_desc_read(vdev, desc, desc_pa, *next);
|
||||
vring_desc_read(vdev, desc, desc_cache, *next);
|
||||
return VIRTQUEUE_READ_DESC_MORE;
|
||||
}
|
||||
|
||||
@ -434,29 +533,38 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
unsigned int *out_bytes,
|
||||
unsigned max_in_bytes, unsigned max_out_bytes)
|
||||
{
|
||||
unsigned int idx;
|
||||
VirtIODevice *vdev = vq->vdev;
|
||||
unsigned int max, idx;
|
||||
unsigned int total_bufs, in_total, out_total;
|
||||
VRingMemoryRegionCaches *caches;
|
||||
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
||||
int64_t len = 0;
|
||||
int rc;
|
||||
|
||||
rcu_read_lock();
|
||||
idx = vq->last_avail_idx;
|
||||
|
||||
total_bufs = in_total = out_total = 0;
|
||||
|
||||
max = vq->vring.num;
|
||||
caches = atomic_rcu_read(&vq->vring.caches);
|
||||
if (caches->desc.len < max * sizeof(VRingDesc)) {
|
||||
virtio_error(vdev, "Cannot map descriptor ring");
|
||||
goto err;
|
||||
}
|
||||
|
||||
while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
|
||||
VirtIODevice *vdev = vq->vdev;
|
||||
unsigned int max, num_bufs, indirect = 0;
|
||||
MemoryRegionCache *desc_cache = &caches->desc;
|
||||
unsigned int num_bufs;
|
||||
VRingDesc desc;
|
||||
hwaddr desc_pa;
|
||||
unsigned int i;
|
||||
|
||||
max = vq->vring.num;
|
||||
num_bufs = total_bufs;
|
||||
|
||||
if (!virtqueue_get_head(vq, idx++, &i)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
desc_pa = vq->vring.desc;
|
||||
vring_desc_read(vdev, &desc, desc_pa, i);
|
||||
vring_desc_read(vdev, &desc, desc_cache, i);
|
||||
|
||||
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
||||
if (desc.len % sizeof(VRingDesc)) {
|
||||
@ -471,11 +579,18 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
}
|
||||
|
||||
/* loop over the indirect descriptor table */
|
||||
indirect = 1;
|
||||
len = address_space_cache_init(&indirect_desc_cache,
|
||||
vdev->dma_as,
|
||||
desc.addr, desc.len, false);
|
||||
desc_cache = &indirect_desc_cache;
|
||||
if (len < desc.len) {
|
||||
virtio_error(vdev, "Cannot map indirect buffer");
|
||||
goto err;
|
||||
}
|
||||
|
||||
max = desc.len / sizeof(VRingDesc);
|
||||
desc_pa = desc.addr;
|
||||
num_bufs = i = 0;
|
||||
vring_desc_read(vdev, &desc, desc_pa, i);
|
||||
vring_desc_read(vdev, &desc, desc_cache, i);
|
||||
}
|
||||
|
||||
do {
|
||||
@ -494,17 +609,19 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
goto done;
|
||||
}
|
||||
|
||||
rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
|
||||
rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
|
||||
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
||||
|
||||
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!indirect)
|
||||
total_bufs = num_bufs;
|
||||
else
|
||||
if (desc_cache == &indirect_desc_cache) {
|
||||
address_space_cache_destroy(&indirect_desc_cache);
|
||||
total_bufs++;
|
||||
} else {
|
||||
total_bufs = num_bufs;
|
||||
}
|
||||
}
|
||||
|
||||
if (rc < 0) {
|
||||
@ -512,12 +629,14 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
||||
}
|
||||
|
||||
done:
|
||||
address_space_cache_destroy(&indirect_desc_cache);
|
||||
if (in_bytes) {
|
||||
*in_bytes = in_total;
|
||||
}
|
||||
if (out_bytes) {
|
||||
*out_bytes = out_total;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
|
||||
err:
|
||||
@ -651,9 +770,12 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
|
||||
void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
{
|
||||
unsigned int i, head, max;
|
||||
hwaddr desc_pa = vq->vring.desc;
|
||||
VRingMemoryRegionCaches *caches;
|
||||
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
|
||||
MemoryRegionCache *desc_cache;
|
||||
int64_t len;
|
||||
VirtIODevice *vdev = vq->vdev;
|
||||
VirtQueueElement *elem;
|
||||
VirtQueueElement *elem = NULL;
|
||||
unsigned out_num, in_num;
|
||||
hwaddr addr[VIRTQUEUE_MAX_SIZE];
|
||||
struct iovec iov[VIRTQUEUE_MAX_SIZE];
|
||||
@ -663,8 +785,9 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
if (unlikely(vdev->broken)) {
|
||||
return NULL;
|
||||
}
|
||||
if (virtio_queue_empty(vq)) {
|
||||
return NULL;
|
||||
rcu_read_lock();
|
||||
if (virtio_queue_empty_rcu(vq)) {
|
||||
goto done;
|
||||
}
|
||||
/* Needed after virtio_queue_empty(), see comment in
|
||||
* virtqueue_num_heads(). */
|
||||
@ -677,11 +800,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
|
||||
if (vq->inuse >= vq->vring.num) {
|
||||
virtio_error(vdev, "Virtqueue size exceeded");
|
||||
return NULL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
|
||||
return NULL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
||||
@ -689,18 +812,33 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
}
|
||||
|
||||
i = head;
|
||||
vring_desc_read(vdev, &desc, desc_pa, i);
|
||||
|
||||
caches = atomic_rcu_read(&vq->vring.caches);
|
||||
if (caches->desc.len < max * sizeof(VRingDesc)) {
|
||||
virtio_error(vdev, "Cannot map descriptor ring");
|
||||
goto done;
|
||||
}
|
||||
|
||||
desc_cache = &caches->desc;
|
||||
vring_desc_read(vdev, &desc, desc_cache, i);
|
||||
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
||||
if (desc.len % sizeof(VRingDesc)) {
|
||||
virtio_error(vdev, "Invalid size for indirect buffer table");
|
||||
return NULL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* loop over the indirect descriptor table */
|
||||
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
|
||||
desc.addr, desc.len, false);
|
||||
desc_cache = &indirect_desc_cache;
|
||||
if (len < desc.len) {
|
||||
virtio_error(vdev, "Cannot map indirect buffer");
|
||||
goto done;
|
||||
}
|
||||
|
||||
max = desc.len / sizeof(VRingDesc);
|
||||
desc_pa = desc.addr;
|
||||
i = 0;
|
||||
vring_desc_read(vdev, &desc, desc_pa, i);
|
||||
vring_desc_read(vdev, &desc, desc_cache, i);
|
||||
}
|
||||
|
||||
/* Collect all the descriptors */
|
||||
@ -731,7 +869,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
goto err_undo_map;
|
||||
}
|
||||
|
||||
rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
|
||||
rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
|
||||
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
||||
|
||||
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
|
||||
@ -753,11 +891,15 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
|
||||
vq->inuse++;
|
||||
|
||||
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
|
||||
done:
|
||||
address_space_cache_destroy(&indirect_desc_cache);
|
||||
rcu_read_unlock();
|
||||
|
||||
return elem;
|
||||
|
||||
err_undo_map:
|
||||
virtqueue_undo_map_desc(out_num, in_num, iov);
|
||||
return NULL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* virtqueue_drop_all:
|
||||
@ -1219,6 +1361,7 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
|
||||
vdev->vq[n].vring.desc = desc;
|
||||
vdev->vq[n].vring.avail = avail;
|
||||
vdev->vq[n].vring.used = used;
|
||||
virtio_init_region_cache(vdev, n);
|
||||
}
|
||||
|
||||
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
|
||||
@ -1287,14 +1430,16 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
|
||||
virtio_queue_update_rings(vdev, n);
|
||||
}
|
||||
|
||||
static void virtio_queue_notify_aio_vq(VirtQueue *vq)
|
||||
static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
|
||||
{
|
||||
if (vq->vring.desc && vq->handle_aio_output) {
|
||||
VirtIODevice *vdev = vq->vdev;
|
||||
|
||||
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
|
||||
vq->handle_aio_output(vdev, vq);
|
||||
return vq->handle_aio_output(vdev, vq);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void virtio_queue_notify_vq(VirtQueue *vq)
|
||||
@ -1383,6 +1528,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
|
||||
}
|
||||
}
|
||||
|
||||
/* Called within rcu_read_lock(). */
|
||||
static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
|
||||
{
|
||||
uint16_t old, new;
|
||||
@ -1408,7 +1554,12 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
|
||||
|
||||
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
|
||||
{
|
||||
if (!virtio_should_notify(vdev, vq)) {
|
||||
bool should_notify;
|
||||
rcu_read_lock();
|
||||
should_notify = virtio_should_notify(vdev, vq);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!should_notify) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1433,15 +1584,25 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
|
||||
event_notifier_set(&vq->guest_notifier);
|
||||
}
|
||||
|
||||
static void virtio_irq(VirtQueue *vq)
|
||||
{
|
||||
virtio_set_isr(vq->vdev, 0x1);
|
||||
virtio_notify_vector(vq->vdev, vq->vector);
|
||||
}
|
||||
|
||||
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
|
||||
{
|
||||
if (!virtio_should_notify(vdev, vq)) {
|
||||
bool should_notify;
|
||||
rcu_read_lock();
|
||||
should_notify = virtio_should_notify(vdev, vq);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!should_notify) {
|
||||
return;
|
||||
}
|
||||
|
||||
trace_virtio_notify(vdev, vq);
|
||||
virtio_set_isr(vq->vdev, 0x1);
|
||||
virtio_notify_vector(vdev, vq->vector);
|
||||
virtio_irq(vq);
|
||||
}
|
||||
|
||||
void virtio_notify_config(VirtIODevice *vdev)
|
||||
@ -1896,6 +2057,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < num; i++) {
|
||||
if (vdev->vq[i].vring.desc) {
|
||||
uint16_t nheads;
|
||||
@ -1930,6 +2092,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1937,9 +2100,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
|
||||
void virtio_cleanup(VirtIODevice *vdev)
|
||||
{
|
||||
qemu_del_vm_change_state_handler(vdev->vmstate);
|
||||
g_free(vdev->config);
|
||||
g_free(vdev->vq);
|
||||
g_free(vdev->vector_queues);
|
||||
}
|
||||
|
||||
static void virtio_vmstate_change(void *opaque, int running, RunState state)
|
||||
@ -2059,7 +2219,11 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
|
||||
|
||||
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
|
||||
{
|
||||
vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
|
||||
rcu_read_lock();
|
||||
if (vdev->vq[n].vring.desc) {
|
||||
vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
|
||||
@ -2081,7 +2245,7 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
|
||||
{
|
||||
VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
|
||||
if (event_notifier_test_and_clear(n)) {
|
||||
virtio_notify_vector(vq->vdev, vq->vector);
|
||||
virtio_irq(vq);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2125,16 +2289,17 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
|
||||
{
|
||||
EventNotifier *n = opaque;
|
||||
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
|
||||
bool progress;
|
||||
|
||||
if (virtio_queue_empty(vq)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtio_queue_notify_aio_vq(vq);
|
||||
progress = virtio_queue_notify_aio_vq(vq);
|
||||
|
||||
/* In case the handler function re-enabled notifications */
|
||||
virtio_queue_set_notification(vq, 0);
|
||||
return true;
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
|
||||
@ -2146,7 +2311,7 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
|
||||
}
|
||||
|
||||
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
|
||||
VirtIOHandleOutput handle_output)
|
||||
VirtIOHandleAIOOutput handle_output)
|
||||
{
|
||||
if (handle_output) {
|
||||
vq->handle_aio_output = handle_output;
|
||||
@ -2200,6 +2365,19 @@ void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_memory_listener_commit(MemoryListener *listener)
|
||||
{
|
||||
VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
||||
if (vdev->vq[i].vring.num == 0) {
|
||||
break;
|
||||
}
|
||||
virtio_init_region_cache(vdev, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_device_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||
@ -2222,6 +2400,9 @@ static void virtio_device_realize(DeviceState *dev, Error **errp)
|
||||
error_propagate(errp, err);
|
||||
return;
|
||||
}
|
||||
|
||||
vdev->listener.commit = virtio_memory_listener_commit;
|
||||
memory_listener_register(&vdev->listener, vdev->dma_as);
|
||||
}
|
||||
|
||||
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
|
||||
@ -2244,6 +2425,36 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp)
|
||||
vdev->bus_name = NULL;
|
||||
}
|
||||
|
||||
static void virtio_device_free_virtqueues(VirtIODevice *vdev)
|
||||
{
|
||||
int i;
|
||||
if (!vdev->vq) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
|
||||
VRingMemoryRegionCaches *caches;
|
||||
if (vdev->vq[i].vring.num == 0) {
|
||||
break;
|
||||
}
|
||||
caches = atomic_read(&vdev->vq[i].vring.caches);
|
||||
atomic_set(&vdev->vq[i].vring.caches, NULL);
|
||||
virtio_free_region_cache(caches);
|
||||
}
|
||||
g_free(vdev->vq);
|
||||
}
|
||||
|
||||
static void virtio_device_instance_finalize(Object *obj)
|
||||
{
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(obj);
|
||||
|
||||
memory_listener_unregister(&vdev->listener);
|
||||
virtio_device_free_virtqueues(vdev);
|
||||
|
||||
g_free(vdev->config);
|
||||
g_free(vdev->vector_queues);
|
||||
}
|
||||
|
||||
static Property virtio_properties[] = {
|
||||
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
@ -2370,6 +2581,7 @@ static const TypeInfo virtio_device_info = {
|
||||
.parent = TYPE_DEVICE,
|
||||
.instance_size = sizeof(VirtIODevice),
|
||||
.class_init = virtio_device_class_init,
|
||||
.instance_finalize = virtio_device_instance_finalize,
|
||||
.abstract = true,
|
||||
.class_size = sizeof(VirtioDeviceClass),
|
||||
};
|
||||
|
@ -1426,6 +1426,8 @@ struct MemoryRegionCache {
|
||||
bool is_write;
|
||||
};
|
||||
|
||||
#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mr = NULL })
|
||||
|
||||
/* address_space_cache_init: prepare for repeated access to a physical
|
||||
* memory region
|
||||
*
|
||||
|
@ -257,6 +257,8 @@ struct IntelIOMMUState {
|
||||
uint8_t womask[DMAR_REG_SIZE]; /* WO (write only - read returns 0) */
|
||||
uint32_t version;
|
||||
|
||||
bool caching_mode; /* RO - is cap CM enabled? */
|
||||
|
||||
dma_addr_t root; /* Current root table pointer */
|
||||
bool root_extended; /* Type of root table (extended or not) */
|
||||
bool dmar_enabled; /* Set if DMA remapping is enabled */
|
||||
|
@ -156,6 +156,58 @@ static inline uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint16_t virtio_lduw_phys_cached(VirtIODevice *vdev,
|
||||
MemoryRegionCache *cache,
|
||||
hwaddr pa)
|
||||
{
|
||||
if (virtio_access_is_big_endian(vdev)) {
|
||||
return lduw_be_phys_cached(cache, pa);
|
||||
}
|
||||
return lduw_le_phys_cached(cache, pa);
|
||||
}
|
||||
|
||||
static inline uint32_t virtio_ldl_phys_cached(VirtIODevice *vdev,
|
||||
MemoryRegionCache *cache,
|
||||
hwaddr pa)
|
||||
{
|
||||
if (virtio_access_is_big_endian(vdev)) {
|
||||
return ldl_be_phys_cached(cache, pa);
|
||||
}
|
||||
return ldl_le_phys_cached(cache, pa);
|
||||
}
|
||||
|
||||
static inline uint64_t virtio_ldq_phys_cached(VirtIODevice *vdev,
|
||||
MemoryRegionCache *cache,
|
||||
hwaddr pa)
|
||||
{
|
||||
if (virtio_access_is_big_endian(vdev)) {
|
||||
return ldq_be_phys_cached(cache, pa);
|
||||
}
|
||||
return ldq_le_phys_cached(cache, pa);
|
||||
}
|
||||
|
||||
static inline void virtio_stw_phys_cached(VirtIODevice *vdev,
|
||||
MemoryRegionCache *cache,
|
||||
hwaddr pa, uint16_t value)
|
||||
{
|
||||
if (virtio_access_is_big_endian(vdev)) {
|
||||
stw_be_phys_cached(cache, pa, value);
|
||||
} else {
|
||||
stw_le_phys_cached(cache, pa, value);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void virtio_stl_phys_cached(VirtIODevice *vdev,
|
||||
MemoryRegionCache *cache,
|
||||
hwaddr pa, uint32_t value)
|
||||
{
|
||||
if (virtio_access_is_big_endian(vdev)) {
|
||||
stl_be_phys_cached(cache, pa, value);
|
||||
} else {
|
||||
stl_le_phys_cached(cache, pa, value);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s)
|
||||
{
|
||||
*s = virtio_tswap16(vdev, *s);
|
||||
|
@ -80,6 +80,6 @@ typedef struct MultiReqBuffer {
|
||||
bool is_write;
|
||||
} MultiReqBuffer;
|
||||
|
||||
void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
|
||||
bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
|
||||
|
||||
#endif
|
||||
|
@ -126,9 +126,9 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
|
||||
VirtIOHandleOutput cmd);
|
||||
|
||||
void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp);
|
||||
void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
|
||||
void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq);
|
||||
void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq);
|
||||
bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
|
||||
bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq);
|
||||
bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq);
|
||||
void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req);
|
||||
void virtio_scsi_free_req(VirtIOSCSIReq *req);
|
||||
void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
|
||||
|
@ -85,6 +85,7 @@ struct VirtIODevice
|
||||
uint32_t generation;
|
||||
int nvectors;
|
||||
VirtQueue *vq;
|
||||
MemoryListener listener;
|
||||
uint16_t device_id;
|
||||
bool vm_running;
|
||||
bool broken; /* device in invalid state, needs reset */
|
||||
@ -154,6 +155,7 @@ void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
|
||||
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
|
||||
|
||||
typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *);
|
||||
typedef bool (*VirtIOHandleAIOOutput)(VirtIODevice *, VirtQueue *);
|
||||
|
||||
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
|
||||
VirtIOHandleOutput handle_output);
|
||||
@ -284,8 +286,7 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
|
||||
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
|
||||
void virtio_queue_host_notifier_read(EventNotifier *n);
|
||||
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
|
||||
void (*fn)(VirtIODevice *,
|
||||
VirtQueue *));
|
||||
VirtIOHandleAIOOutput handle_output);
|
||||
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
|
||||
VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
|
||||
|
||||
|
5
memory.c
5
memory.c
@ -2371,8 +2371,13 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
|
||||
|
||||
void memory_listener_unregister(MemoryListener *listener)
|
||||
{
|
||||
if (!listener->address_space) {
|
||||
return;
|
||||
}
|
||||
|
||||
QTAILQ_REMOVE(&memory_listeners, listener, link);
|
||||
QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
|
||||
listener->address_space = NULL;
|
||||
}
|
||||
|
||||
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
|
||||
|
Loading…
Reference in New Issue
Block a user