xemu/dma-helpers.c

436 lines
11 KiB
C
Raw Normal View History

/*
* DMA helper functions
*
* Copyright (c) 2009 Red Hat
*
* This work is licensed under the terms of the GNU General Public License
* (GNU GPL), version 2 or later.
*/
#include "dma.h"
#include "trace.h"
#include "range.h"
#include "qemu-thread.h"
/* #define DEBUG_IOMMU */
static void do_dma_memory_set(dma_addr_t addr, uint8_t c, dma_addr_t len)
iommu: Add universal DMA helper functions Not that long ago, every device implementation using DMA directly accessed guest memory using cpu_physical_memory_*(). This meant that adding support for a guest visible IOMMU would require changing every one of these devices to go through IOMMU translation. Shortly before qemu 1.0, I made a start on fixing this by providing helper functions for PCI DMA. These are currently just stubs which call the direct access functions, but mean that an IOMMU can be implemented in one place, rather than for every PCI device. Clearly, this doesn't help for non PCI devices, which could also be IOMMU translated on some platforms. It is also problematic for the devices which have both PCI and non-PCI version (e.g. OHCI, AHCI) - we cannot use the the pci_dma_*() functions, because they assume the presence of a PCIDevice, but we don't want to have to check between pci_dma_*() and cpu_physical_memory_*() every time we do a DMA in the device code. This patch makes the first step on addressing both these problems, by introducing new (stub) dma helper functions which can be used for any DMA capable device. These dma functions take a DMAContext *, a new (currently empty) variable describing the DMA address space in which the operation is to take place. NULL indicates untranslated DMA directly into guest physical address space. The intention is that in future non-NULL values will given information about any necessary IOMMU translation. DMA using devices must obtain a DMAContext (or, potentially, contexts) from their bus or platform. For now this patch just converts the PCI wrappers to be implemented in terms of the universal wrappers, converting other drivers can take place over time. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> Cc: Richard Henderson <rth@twiddle.net> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2012-06-27 04:50:38 +00:00
{
#define FILLBUF_SIZE 512
uint8_t fillbuf[FILLBUF_SIZE];
int l;
memset(fillbuf, c, FILLBUF_SIZE);
while (len > 0) {
l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
cpu_physical_memory_rw(addr, fillbuf, l, true);
len -= len;
addr += len;
}
}
int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
{
dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
if (dma_has_iommu(dma)) {
return iommu_dma_memory_set(dma, addr, c, len);
}
do_dma_memory_set(addr, c, len);
iommu: Add universal DMA helper functions Not that long ago, every device implementation using DMA directly accessed guest memory using cpu_physical_memory_*(). This meant that adding support for a guest visible IOMMU would require changing every one of these devices to go through IOMMU translation. Shortly before qemu 1.0, I made a start on fixing this by providing helper functions for PCI DMA. These are currently just stubs which call the direct access functions, but mean that an IOMMU can be implemented in one place, rather than for every PCI device. Clearly, this doesn't help for non PCI devices, which could also be IOMMU translated on some platforms. It is also problematic for the devices which have both PCI and non-PCI version (e.g. OHCI, AHCI) - we cannot use the the pci_dma_*() functions, because they assume the presence of a PCIDevice, but we don't want to have to check between pci_dma_*() and cpu_physical_memory_*() every time we do a DMA in the device code. This patch makes the first step on addressing both these problems, by introducing new (stub) dma helper functions which can be used for any DMA capable device. These dma functions take a DMAContext *, a new (currently empty) variable describing the DMA address space in which the operation is to take place. NULL indicates untranslated DMA directly into guest physical address space. The intention is that in future non-NULL values will given information about any necessary IOMMU translation. DMA using devices must obtain a DMAContext (or, potentially, contexts) from their bus or platform. For now this patch just converts the PCI wrappers to be implemented in terms of the universal wrappers, converting other drivers can take place over time. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> Cc: Richard Henderson <rth@twiddle.net> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2012-06-27 04:50:38 +00:00
return 0;
}
void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
{
qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
qsg->nsg = 0;
qsg->nalloc = alloc_hint;
qsg->size = 0;
qsg->dma = dma;
}
void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
{
if (qsg->nsg == qsg->nalloc) {
qsg->nalloc = 2 * qsg->nalloc + 1;
qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
}
qsg->sg[qsg->nsg].base = base;
qsg->sg[qsg->nsg].len = len;
qsg->size += len;
++qsg->nsg;
}
void qemu_sglist_destroy(QEMUSGList *qsg)
{
g_free(qsg->sg);
memset(qsg, 0, sizeof(*qsg));
}
typedef struct {
BlockDriverAIOCB common;
BlockDriverState *bs;
BlockDriverAIOCB *acb;
QEMUSGList *sg;
uint64_t sector_num;
DMADirection dir;
bool in_cancel;
int sg_cur_index;
dma_addr_t sg_cur_byte;
QEMUIOVector iov;
QEMUBH *bh;
DMAIOFunc *io_func;
} DMAAIOCB;
static void dma_bdrv_cb(void *opaque, int ret);
static void reschedule_dma(void *opaque)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
qemu_bh_delete(dbs->bh);
dbs->bh = NULL;
dma_bdrv_cb(dbs, 0);
}
static void continue_after_map_failure(void *opaque)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
dbs->bh = qemu_bh_new(reschedule_dma, dbs);
qemu_bh_schedule(dbs->bh);
}
static void dma_bdrv_unmap(DMAAIOCB *dbs)
{
int i;
for (i = 0; i < dbs->iov.niov; ++i) {
dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
dbs->iov.iov[i].iov_len, dbs->dir,
dbs->iov.iov[i].iov_len);
}
qemu_iovec_reset(&dbs->iov);
}
static void dma_complete(DMAAIOCB *dbs, int ret)
{
trace_dma_complete(dbs, ret, dbs->common.cb);
dma_bdrv_unmap(dbs);
if (dbs->common.cb) {
dbs->common.cb(dbs->common.opaque, ret);
}
qemu_iovec_destroy(&dbs->iov);
if (dbs->bh) {
qemu_bh_delete(dbs->bh);
dbs->bh = NULL;
}
if (!dbs->in_cancel) {
/* Requests may complete while dma_aio_cancel is in progress. In
* this case, the AIOCB should not be released because it is still
* referenced by dma_aio_cancel. */
qemu_aio_release(dbs);
}
}
static void dma_bdrv_cb(void *opaque, int ret)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
dma_addr_t cur_addr, cur_len;
void *mem;
trace_dma_bdrv_cb(dbs, ret);
dbs->acb = NULL;
dbs->sector_num += dbs->iov.size / 512;
dma_bdrv_unmap(dbs);
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
return;
}
while (dbs->sg_cur_index < dbs->sg->nsg) {
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
if (!mem)
break;
qemu_iovec_add(&dbs->iov, mem, cur_len);
dbs->sg_cur_byte += cur_len;
if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
dbs->sg_cur_byte = 0;
++dbs->sg_cur_index;
}
}
if (dbs->iov.size == 0) {
trace_dma_map_wait(dbs);
cpu_register_map_client(dbs, continue_after_map_failure);
return;
}
dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
dbs->iov.size / 512, dma_bdrv_cb, dbs);
assert(dbs->acb);
}
static void dma_aio_cancel(BlockDriverAIOCB *acb)
{
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
trace_dma_aio_cancel(dbs);
if (dbs->acb) {
BlockDriverAIOCB *acb = dbs->acb;
dbs->acb = NULL;
dbs->in_cancel = true;
bdrv_aio_cancel(acb);
dbs->in_cancel = false;
}
dbs->common.cb = NULL;
dma_complete(dbs, 0);
}
static AIOPool dma_aio_pool = {
.aiocb_size = sizeof(DMAAIOCB),
.cancel = dma_aio_cancel,
};
BlockDriverAIOCB *dma_bdrv_io(
BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
void *opaque, DMADirection dir)
{
DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
dbs->acb = NULL;
dbs->bs = bs;
dbs->sg = sg;
dbs->sector_num = sector_num;
dbs->sg_cur_index = 0;
dbs->sg_cur_byte = 0;
dbs->dir = dir;
dbs->io_func = io_func;
dbs->bh = NULL;
qemu_iovec_init(&dbs->iov, sg->nsg);
dma_bdrv_cb(dbs, 0);
return &dbs->common;
}
BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
DMA_DIRECTION_FROM_DEVICE);
}
BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
DMA_DIRECTION_TO_DEVICE);
}
static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
DMADirection dir)
{
uint64_t resid;
int sg_cur_index;
resid = sg->size;
sg_cur_index = 0;
len = MIN(len, resid);
while (len > 0) {
ScatterGatherEntry entry = sg->sg[sg_cur_index++];
int32_t xfer = MIN(len, entry.len);
dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
ptr += xfer;
len -= xfer;
resid -= xfer;
}
return resid;
}
uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg)
{
return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE);
}
uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
{
return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
}
void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
QEMUSGList *sg, enum BlockAcctType type)
{
bdrv_acct_start(bs, cookie, sg->size, type);
}
bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
DMADirection dir)
{
target_phys_addr_t paddr, plen;
#ifdef DEBUG_IOMMU
fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
" len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
#endif
while (len) {
if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
return false;
}
/* The translation might be valid for larger regions. */
if (plen > len) {
plen = len;
}
len -= plen;
addr += plen;
}
return true;
}
int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
void *buf, dma_addr_t len, DMADirection dir)
{
target_phys_addr_t paddr, plen;
int err;
#ifdef DEBUG_IOMMU
fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x"
DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
#endif
while (len) {
err = dma->translate(dma, addr, &paddr, &plen, dir);
if (err) {
/*
* In case of failure on reads from the guest, we clean the
* destination buffer so that a device that doesn't test
* for errors will not expose qemu internal memory.
*/
memset(buf, 0, len);
return -1;
}
/* The translation might be valid for larger regions. */
if (plen > len) {
plen = len;
}
cpu_physical_memory_rw(paddr, buf, plen,
dir == DMA_DIRECTION_FROM_DEVICE);
len -= plen;
addr += plen;
buf += plen;
}
return 0;
}
int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
dma_addr_t len)
{
target_phys_addr_t paddr, plen;
int err;
#ifdef DEBUG_IOMMU
fprintf(stderr, "dma_memory_set context=%p addr=0x" DMA_ADDR_FMT
" len=0x" DMA_ADDR_FMT "\n", dma, addr, len);
#endif
while (len) {
err = dma->translate(dma, addr, &paddr, &plen,
DMA_DIRECTION_FROM_DEVICE);
if (err) {
return err;
}
/* The translation might be valid for larger regions. */
if (plen > len) {
plen = len;
}
do_dma_memory_set(paddr, c, plen);
len -= plen;
addr += plen;
}
return 0;
}
void dma_context_init(DMAContext *dma, DMATranslateFunc translate,
DMAMapFunc map, DMAUnmapFunc unmap)
{
#ifdef DEBUG_IOMMU
fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
dma, translate, map, unmap);
#endif
dma->translate = translate;
dma->map = map;
dma->unmap = unmap;
}
void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
DMADirection dir)
{
int err;
target_phys_addr_t paddr, plen;
void *buf;
if (dma->map) {
return dma->map(dma, addr, len, dir);
}
plen = *len;
err = dma->translate(dma, addr, &paddr, &plen, dir);
if (err) {
return NULL;
}
/*
* If this is true, the virtual region is contiguous,
* but the translated physical region isn't. We just
* clamp *len, much like cpu_physical_memory_map() does.
*/
if (plen < *len) {
*len = plen;
}
buf = cpu_physical_memory_map(paddr, &plen,
dir == DMA_DIRECTION_FROM_DEVICE);
*len = plen;
return buf;
}
void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
DMADirection dir, dma_addr_t access_len)
{
if (dma->unmap) {
dma->unmap(dma, buffer, len, dir, access_len);
return;
}
cpu_physical_memory_unmap(buffer, len,
dir == DMA_DIRECTION_FROM_DEVICE,
access_len);
}