freedreno/drm: Add virtio backend

Add a new backend to enable using native driver in a VM guest, via a new virtgpu context type which (indirectly) makes host kernel interface available in guest and handles the details of mapping buffers to guest, etc. Note that fence-fd's are currently a bit awkward, in that they get signaled by the guest kernel driver (drm/virtio) once virglrenderer in the host has processed the execbuf, not when host kernel has signaled the submit fence. For passing buffers to the host (virtio-wl) the egl context in virglrenderer is used to create a fence on the host side. But use of out-fence-fd's in guest could have slightly unexpected results. For this reason we limit all submitqueues to default priority (so they cannot be preepmted by host egl context). AFAICT virgl and venus have a similar problem, which will eventually be solveable once we have RESOURCE_CREATE_SYNC. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14900>
2024-11-23 07:19:50 +00:00 · 2022-02-02 10:56:26 -08:00 · 2022-02-02 10:56:26 -08:00 · 802f4da5ee
commit 802f4da5ee
parent 2200d674e4
17 changed files with 1852 additions and 6 deletions
--- a/include/drm-uapi/virtgpu_drm.h
+++ b/include/drm-uapi/virtgpu_drm.h
@ -163,6 +163,7 @@ struct drm_virtgpu_resource_create_blob {
 #define VIRTGPU_BLOB_FLAG_USE_MAPPABLE     0x0001
 #define VIRTGPU_BLOB_FLAG_USE_SHAREABLE    0x0002
 #define VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004
+#define VIRTGPU_BLOB_FLAG_USE_INTERNAL     0x0008   /* not-mappable, not-shareable */
 	/* zero is invalid blob_mem */
 	__u32 blob_mem;
 	__u32 blob_flags;
--- a/meson.build
+++ b/meson.build
@ -272,6 +272,7 @@ with_panfrost_vk = _vulkan_drivers.contains('panfrost')
 with_swrast_vk = _vulkan_drivers.contains('swrast')
 with_virtio_vk = _vulkan_drivers.contains('virtio-experimental')
 with_freedreno_kgsl = get_option('freedreno-kgsl')
+with_freedreno_virtio = get_option('freedreno-virtio')
 with_broadcom_vk = _vulkan_drivers.contains('broadcom')
 with_imagination_vk = _vulkan_drivers.contains('imagination-experimental')
 with_imagination_srv = get_option('imagination-srv')
--- a/meson_options.txt
+++ b/meson_options.txt
@ -201,6 +201,12 @@ option(
  value : false,
  description : 'use kgsl backend for freedreno vulkan driver',
 )
+option(
+  'freedreno-virtio',
+  type : 'boolean',
+  value : false,
+  description : 'use experimental virtio backend for freedreno (currently only gallium driver)',
+)
 option(
  'imagination-srv',
  type : 'boolean',
--- a/src/freedreno/drm/freedreno_device.c
+++ b/src/freedreno/drm/freedreno_device.c
@ -34,11 +34,14 @@
 #include "freedreno_priv.h"

 struct fd_device *msm_device_new(int fd, drmVersionPtr version);
+#if HAVE_FREEDRENO_VIRTIO
+struct fd_device *virtio_device_new(int fd, drmVersionPtr version);
+#endif

 struct fd_device *
 fd_device_new(int fd)
 {
-   struct fd_device *dev;
+   struct fd_device *dev = NULL;
   drmVersionPtr version;

   /* figure out if we are kgsl or msm drm driver: */
@ -53,20 +56,25 @@ fd_device_new(int fd)
      if (version->version_major != 1) {
         ERROR_MSG("unsupported version: %u.%u.%u", version->version_major,
                   version->version_minor, version->version_patchlevel);
-         dev = NULL;
         goto out;
      }

      dev = msm_device_new(fd, version);
-      dev->version = version->version_minor;
+#if HAVE_FREEDRENO_VIRTIO
+   } else if (!strcmp(version->name, "virtio_gpu")) {
+      DEBUG_MSG("virtio_gpu DRM device");
+      dev = virtio_device_new(fd, version);
+#endif
 #if HAVE_FREEDRENO_KGSL
   } else if (!strcmp(version->name, "kgsl")) {
      DEBUG_MSG("kgsl DRM device");
      dev = kgsl_device_new(fd);
 #endif
-   } else {
-      ERROR_MSG("unknown device: %s", version->name);
-      dev = NULL;
+   }
+
+   if (!dev) {
+      INFO_MSG("unsupported device: %s", version->name);
+      goto out;
   }

 out:
@ -114,6 +122,10 @@ fd_device_open(void)
   int fd;

   fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
+#if HAVE_FREEDRENO_VIRTIO
+   if (fd < 0)
+      fd = drmOpenWithType("virtio_gpu", NULL, DRM_NODE_RENDER);
+#endif
   if (fd < 0)
      return NULL;

--- a/src/freedreno/drm/freedreno_drmif.h
+++ b/src/freedreno/drm/freedreno_drmif.h
@ -107,6 +107,9 @@ struct fd_fence {
 /* Hint that the bo will be exported/shared: */
 #define FD_BO_SHARED              BITSET_BIT(5)

+/* backend private bo flags: */
+#define _FD_BO_VIRTIO_SHM         BITSET_BIT(6)
+
 /* bo access flags: (keep aligned to MSM_PREP_x) */
 #define FD_BO_PREP_READ   BITSET_BIT(0)
 #define FD_BO_PREP_WRITE  BITSET_BIT(1)
--- a/src/freedreno/drm/meson.build
+++ b/src/freedreno/drm/meson.build
@ -52,6 +52,21 @@ libfreedreno_drm_msm_files = files(
 )
 libfreedreno_drm_files += libfreedreno_drm_msm_files

+libfreedreno_drm_virtio_files = files(
+  'virtio/msm_proto.h',
+  'virtio/virtio_bo.c',
+  'virtio/virtio_device.c',
+  'virtio/virtio_pipe.c',
+  'virtio/virtio_priv.h',
+  'virtio/virtio_ringbuffer.c',
+  'virtio/virtio_ringbuffer.h',
+)
+if with_freedreno_virtio
+  libfreedreno_drm_files += libfreedreno_drm_virtio_files
+  libfreedreno_drm_flags += '-DHAVE_FREEDRENO_VIRTIO'
+  libfreedreno_drm_includes += inc_virtio_gpu
+endif
+
 libfreedreno_drm = static_library(
  'freedreno_drm',
  [
--- a/src/freedreno/drm/msm/msm_device.c
+++ b/src/freedreno/drm/msm/msm_device.c
@ -58,6 +58,7 @@ msm_device_new(int fd, drmVersionPtr version)

   dev = &msm_dev->base;
   dev->funcs = &funcs;
+   dev->version = version->version_minor;

   /* async submit_queue currently only used for msm_submit_sp: */
   if (version->version_minor >= FD_VERSION_SOFTPIN) {
--- a/src/freedreno/drm/virtio/msm_proto.h
+++ b/src/freedreno/drm/virtio/msm_proto.h
@ -0,0 +1,374 @@
+/*
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MSM_PROTO_H_
+#define MSM_PROTO_H_
+
+/**
+ * General protocol notes:
+ * 1) Request (req) messages are generally sent over DRM_VIRTGPU_EXECBUFFER
+ *    but can also be sent via DRM_VIRTGPU_RESOURCE_CREATE_BLOB (in which
+ *    case they are processed by the host before ctx->get_blob())
+ * 2) Response (rsp) messages are returned via shmem->rsp_mem, at an offset
+ *    specified by the guest in the req message.  Not all req messages have
+ *    a rsp.
+ * 3) Host and guest could have different pointer sizes, ie. 32b guest and
+ *    64b host, or visa versa, so similar to kernel uabi, req and rsp msgs
+ *    should be explicitly padded to avoid 32b vs 64b struct padding issues
+ */
+
+/**
+ * Defines the layout of shmem buffer used for host->guest communication.
+ */
+struct msm_shmem {
+   /**
+    * The sequence # of last cmd processed by the host
+    */
+   uint32_t seqno;
+
+   /* TODO maybe separate flags for host to increment when:
+    * a) host CPU error (like async SUBMIT failed, etc)
+    * b) global reset count, if it hasn't incremented guest
+    *    can skip synchonous getparam..
+    */
+   uint16_t error;
+   uint16_t rsp_mem_len;
+
+   /**
+    * Memory to use for response messages.  The offset to use for the
+    * response message is allocated by the guest, and specified by
+    * msm_ccmd_req:rsp_off.
+    */
+   uint8_t rsp_mem[0x4000-8];
+};
+
+#define DEFINE_CAST(parent, child)                                             \
+   static inline struct child *to_##child(const struct parent *x)              \
+   {                                                                           \
+      return (struct child *)x;                                                \
+   }
+
+/*
+ * Possible cmd types for "command stream", ie. payload of EXECBUF ioctl:
+ */
+enum msm_ccmd {
+   MSM_CCMD_NOP = 1,         /* No payload, can be used to sync with host */
+   MSM_CCMD_IOCTL_SIMPLE,
+   MSM_CCMD_GEM_NEW,
+   MSM_CCMD_GEM_INFO,
+   MSM_CCMD_GEM_CPU_PREP,
+   MSM_CCMD_GEM_SET_NAME,
+   MSM_CCMD_GEM_SUBMIT,
+   MSM_CCMD_GEM_UPLOAD,
+   MSM_CCMD_SUBMITQUEUE_QUERY,
+   MSM_CCMD_WAIT_FENCE,
+   MSM_CCMD_SET_DEBUGINFO,
+   MSM_CCMD_LAST,
+};
+
+struct msm_ccmd_req {
+   uint32_t cmd;
+   uint32_t len;
+   uint32_t seqno;
+
+   /* Offset into shmem ctrl buffer to write response.  The host ensures
+    * that it doesn't write outside the bounds of the ctrl buffer, but
+    * otherwise it is up to the guest to manage allocation of where responses
+    * should be written in the ctrl buf.
+    */
+   uint32_t rsp_off;
+};
+
+struct msm_ccmd_rsp {
+   uint32_t len;
+};
+
+#define MSM_CCMD(_cmd, _len) (struct msm_ccmd_req){ \
+       .cmd = MSM_CCMD_##_cmd,                      \
+       .len = (_len),                               \
+   }
+
+/*
+ * MSM_CCMD_NOP
+ */
+struct msm_ccmd_nop_req {
+   struct msm_ccmd_req hdr;
+};
+
+/*
+ * MSM_CCMD_IOCTL_SIMPLE
+ *
+ * Forward simple/flat IOC_RW or IOC_W ioctls.  Limited ioctls are supported.
+ */
+struct msm_ccmd_ioctl_simple_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t cmd;
+   uint8_t payload[];
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_ioctl_simple_req)
+
+struct msm_ccmd_ioctl_simple_rsp {
+   struct msm_ccmd_rsp hdr;
+
+   /* ioctl return value, interrupted syscalls are handled on the host without
+    * returning to the guest.
+    */
+   int32_t ret;
+
+   /* The output payload for IOC_RW ioctls, the payload is the same size as
+    * msm_context_cmd_ioctl_simple_req.
+    *
+    * For IOC_W ioctls (userspace writes, kernel reads) this is zero length.
+    */
+   uint8_t payload[];
+};
+
+/*
+ * MSM_CCMD_GEM_NEW
+ *
+ * GEM buffer allocation, maps to DRM_MSM_GEM_NEW plus DRM_MSM_GEM_INFO to
+ * get the BO's iova (to avoid extra guest<->host round trip)
+ */
+struct msm_ccmd_gem_new_req {
+   struct msm_ccmd_req hdr;
+
+   uint64_t size;
+   uint32_t flags;
+   uint32_t blob_id;
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_new_req)
+
+struct msm_ccmd_gem_new_rsp {
+   struct msm_ccmd_rsp hdr;
+
+   int32_t ret;
+   uint32_t host_handle; /* host side GEM handle, used for cmdstream submit */
+   uint64_t iova;
+};
+
+/*
+ * MSM_CCMD_GEM_INFO
+ *
+ * Returns similar information as MSM_CCMD_GEM_NEW, but for imported BO's,
+ * which don't have a blob_id in our context, but do have a resource-id
+ */
+struct msm_ccmd_gem_info_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t res_id;
+   uint32_t blob_mem;   // TODO do we need this?
+   uint32_t blob_id;    // TODO do we need this?
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_info_req)
+
+struct msm_ccmd_gem_info_rsp {
+   struct msm_ccmd_rsp hdr;
+
+   int32_t ret;
+   uint32_t host_handle; /* host side GEM handle, used for cmdstream submit */
+   uint64_t iova;
+   uint32_t pad;
+   uint32_t size;        /* true size of bo on host side */
+};
+
+/*
+ * MSM_CCMD_GEM_CPU_PREP
+ *
+ * Maps to DRM_MSM_GEM_CPU_PREP
+ *
+ * Note: currently this uses a relative timeout mapped to absolute timeout
+ * on the host, because I don't think we can rely on monotonic time being
+ * aligned between host and guest.  This has the slight drawback of not
+ * handling interrupted syscalls on the guest side, but since the actual
+ * waiting happens on the host side (after guest execbuf ioctl returns)
+ * this shouldn't be *that* much of a problem.
+ *
+ * If we could rely on host and guest times being aligned, we could use
+ * MSM_CCMD_IOCTL_SIMPLE instead
+ */
+struct msm_ccmd_gem_cpu_prep_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t host_handle;
+   uint32_t op;
+   uint64_t timeout;
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_cpu_prep_req)
+
+struct msm_ccmd_gem_cpu_prep_rsp {
+   struct msm_ccmd_rsp hdr;
+
+   int32_t ret;
+};
+
+/*
+ * MSM_CCMD_GEM_SET_NAME
+ *
+ * Maps to DRM_MSM_GEM_INFO:MSM_INFO_SET_NAME
+ *
+ * No response.
+ */
+struct msm_ccmd_gem_set_name_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t host_handle;
+   /* Note: packet size aligned to 4 bytes, so the string name may
+    * be shorter than the packet header indicates.
+    */
+   uint32_t len;
+   uint8_t  payload[];
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_set_name_req)
+
+/*
+ * MSM_CCMD_GEM_SUBMIT
+ *
+ * Maps to DRM_MSM_GEM_SUBMIT
+ *
+ * The actual for-reals cmdstream submission.  Note this intentionally
+ * does not support relocs, since we already require a non-ancient
+ * kernel.
+ *
+ * Note, no in/out fence-fd, that synchronization is handled on guest
+ * kernel side (ugg).. need to come up with a better story for fencing.
+ * We probably need to sort something out for that to handle syncobjs.
+ *
+ * Note that the bo handles referenced are the host handles, so that
+ * they can be directly passed to the host kernel without translation.
+ *
+ * TODO we can pack the payload tighter (and enforce no-relocs) if we
+ * defined our own structs, at the cost of host userspace having to
+ * do a bit more work.  Is it worth it?  It could probably be done
+ * without extra overhead in guest userspace..
+ *
+ * No response.
+ */
+struct msm_ccmd_gem_submit_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t flags;
+   uint32_t queue_id;
+   uint32_t nr_bos;
+   uint32_t nr_cmds;
+
+   /**
+    * What userspace expects the next seqno fence to be.  To avoid having
+    * to wait for host, the guest tracks what it expects to be the next
+    * returned seqno fence.  This is passed to guest just for error
+    * checking.
+    */
+   uint32_t fence;
+
+   /**
+    * Payload is first an array of 'struct drm_msm_gem_submit_bo' of
+    * length determined by nr_bos (note that handles are host handles),
+    * followed by an array of 'struct drm_msm_gem_submit_cmd' of length
+    * determined by nr_cmds
+    */
+   int8_t   payload[];
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_submit_req)
+
+/*
+ * MSM_CCMD_GEM_UPLOAD
+ *
+ * Upload data to a GEM buffer
+ *
+ * No response.
+ */
+struct msm_ccmd_gem_upload_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t host_handle;
+   uint32_t pad;
+   uint32_t off;
+
+   /* Note: packet size aligned to 4 bytes, so the string name may
+    * be shorter than the packet header indicates.
+    */
+   uint32_t len;
+   uint8_t  payload[];
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_upload_req)
+
+/*
+ * MSM_CCMD_SUBMITQUEUE_QUERY
+ *
+ * Maps to DRM_MSM_SUBMITQUEUE_QUERY
+ */
+struct msm_ccmd_submitqueue_query_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t queue_id;
+   uint32_t param;
+   uint32_t len;   /* size of payload in rsp */
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_submitqueue_query_req)
+
+struct msm_ccmd_submitqueue_query_rsp {
+   struct msm_ccmd_rsp hdr;
+
+   int32_t  ret;
+   uint32_t out_len;
+   uint8_t  payload[];
+};
+
+/*
+ * MSM_CCMD_WAIT_FENCE
+ *
+ * Maps to DRM_MSM_WAIT_FENCE
+ *
+ * Note: currently this uses a relative timeout mapped to absolute timeout
+ * on the host, because I don't think we can rely on monotonic time being
+ * aligned between host and guest.  This has the slight drawback of not
+ * handling interrupted syscalls on the guest side, but since the actual
+ * waiting happens on the host side (after guest execbuf ioctl returns)
+ * this shouldn't be *that* much of a problem.
+ *
+ * If we could rely on host and guest times being aligned, we could use
+ * MSM_CCMD_IOCTL_SIMPLE instead
+ */
+struct msm_ccmd_wait_fence_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t queue_id;
+   uint32_t fence;
+   uint64_t timeout;
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_wait_fence_req)
+
+struct msm_ccmd_wait_fence_rsp {
+   struct msm_ccmd_rsp hdr;
+
+   int32_t ret;
+};
+
+/*
+ * MSM_CCMD_SET_DEBUGINFO
+ *
+ * Set per-guest-process debug info (comm and cmdline).  For GPU faults/
+ * crashes, it isn't too useful to see the crosvm (for ex.) comm/cmdline,
+ * since the host process is only a proxy.  This allows the guest to
+ * pass through the guest process comm and commandline for debugging
+ * purposes.
+ *
+ * No response.
+ */
+struct msm_ccmd_set_debuginfo_req {
+   struct msm_ccmd_req hdr;
+
+   uint32_t comm_len;
+   uint32_t cmdline_len;
+
+   /**
+    * Payload is first the comm string followed by cmdline string, padded
+    * out to a multiple of 4.
+    */
+   int8_t   payload[];
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_set_debuginfo_req)
+
+#endif /* MSM_PROTO_H_ */
--- a/src/freedreno/drm/virtio/virtio_bo.c
+++ b/src/freedreno/drm/virtio/virtio_bo.c
@ -0,0 +1,374 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "virtio_priv.h"
+
+static int
+bo_allocate(struct virtio_bo *virtio_bo)
+{
+   struct fd_bo *bo = &virtio_bo->base;
+   if (!virtio_bo->offset) {
+      struct drm_virtgpu_map req = {
+         .handle = bo->handle,
+      };
+      int ret;
+
+      ret = drmIoctl(bo->dev->fd, DRM_IOCTL_VIRTGPU_MAP, &req);
+      if (ret) {
+         ERROR_MSG("alloc failed: %s", strerror(errno));
+         return ret;
+      }
+
+      virtio_bo->offset = req.offset;
+   }
+
+   return 0;
+}
+
+static int
+virtio_bo_offset(struct fd_bo *bo, uint64_t *offset)
+{
+   struct virtio_bo *virtio_bo = to_virtio_bo(bo);
+   int ret = bo_allocate(virtio_bo);
+   if (ret)
+      return ret;
+   *offset = virtio_bo->offset;
+   return 0;
+}
+
+static int
+virtio_bo_cpu_prep_guest(struct fd_bo *bo)
+{
+   struct drm_virtgpu_3d_wait args = {
+         .handle = bo->handle,
+   };
+   int ret;
+
+   /* Side note, this ioctl is defined as IO_WR but should be IO_W: */
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_VIRTGPU_WAIT, &args);
+   if (ret && errno == EBUSY)
+      return -EBUSY;
+
+   return 0;
+}
+
+static int
+virtio_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
+{
+   int ret;
+
+   /*
+    * Wait first in the guest, to avoid a blocking call in host.
+    * If implicit sync it used, we still need to *also* wait in
+    * host, if it is a shared buffer, because the guest doesn't
+    * know about usage of the bo in the host (or other guests).
+    */
+
+   ret = virtio_bo_cpu_prep_guest(bo);
+   if (ret)
+      goto out;
+
+   /* If buffer is not shared, then it is not shared with host,
+    * so we don't need to worry about implicit sync in host:
+    */
+   if (!bo->shared)
+      goto out;
+
+   /* If buffer is shared, but we are using explicit sync, no
+    * need to fallback to implicit sync in host:
+    */
+   if (pipe && to_virtio_pipe(pipe)->no_implicit_sync)
+      goto out;
+
+   struct msm_ccmd_gem_cpu_prep_req req = {
+         .hdr = MSM_CCMD(GEM_CPU_PREP, sizeof(req)),
+         .host_handle = to_virtio_bo(bo)->host_handle,
+         .op = op,
+         .timeout = 5000000000,
+   };
+   struct msm_ccmd_gem_cpu_prep_rsp *rsp;
+
+   rsp = virtio_alloc_rsp(bo->dev, &req.hdr, sizeof(*rsp));
+
+   ret = virtio_execbuf(bo->dev, &req.hdr, true);
+   if (ret)
+      goto out;
+
+   ret = rsp->ret;
+
+out:
+   return ret;
+}
+
+static void
+virtio_bo_cpu_fini(struct fd_bo *bo)
+{
+   /* no-op */
+}
+
+static int
+virtio_bo_madvise(struct fd_bo *bo, int willneed)
+{
+   /* TODO:
+    * Currently unsupported, synchronous WILLNEED calls would introduce too
+    * much latency.. ideally we'd keep state in the guest and only flush
+    * down to host when host is under memory pressure.  (Perhaps virtio-balloon
+    * could signal this?)
+    */
+   return willneed;
+}
+
+static uint64_t
+virtio_bo_iova(struct fd_bo *bo)
+{
+   /* The shmem bo is allowed to have no iova, as it is only used for
+    * guest<->host communications:
+    */
+   assert(bo->iova || (to_virtio_bo(bo)->blob_id == 0));
+   return bo->iova;
+}
+
+static void
+virtio_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
+{
+   char name[32];
+   int sz;
+
+   /* Note, we cannot set name on the host for the shmem bo, as
+    * that isn't a real gem obj on the host side.. not having
+    * an iova is a convenient way to detect this case:
+    */
+   if (!bo->iova)
+      return;
+
+   sz = vsnprintf(name, sizeof(name), fmt, ap);
+   sz = MIN2(sz, sizeof(name));
+
+   unsigned req_len = sizeof(struct msm_ccmd_gem_set_name_req) + align(sz, 4);
+
+   uint8_t buf[req_len];
+   struct msm_ccmd_gem_set_name_req *req = (void *)buf;
+
+   req->hdr = MSM_CCMD(GEM_SET_NAME, req_len);
+   req->host_handle = to_virtio_bo(bo)->host_handle;
+   req->len = sz;
+
+   memcpy(req->payload, name, sz);
+
+   virtio_execbuf(bo->dev, &req->hdr, false);
+}
+
+static void
+virtio_bo_upload(struct fd_bo *bo, void *src, unsigned len)
+{
+   unsigned req_len = sizeof(struct msm_ccmd_gem_upload_req) + align(len, 4);
+
+   uint8_t buf[req_len];
+   struct msm_ccmd_gem_upload_req *req = (void *)buf;
+
+   req->hdr = MSM_CCMD(GEM_UPLOAD, req_len);
+   req->host_handle = to_virtio_bo(bo)->host_handle;
+   req->pad = 0;
+   req->off = 0;
+   req->len = len;
+
+   memcpy(req->payload, src, len);
+
+   virtio_execbuf(bo->dev, &req->hdr, false);
+}
+
+static void
+virtio_bo_destroy(struct fd_bo *bo)
+{
+   struct virtio_bo *virtio_bo = to_virtio_bo(bo);
+   free(virtio_bo);
+}
+
+static const struct fd_bo_funcs funcs = {
+   .offset = virtio_bo_offset,
+   .cpu_prep = virtio_bo_cpu_prep,
+   .cpu_fini = virtio_bo_cpu_fini,
+   .madvise = virtio_bo_madvise,
+   .iova = virtio_bo_iova,
+   .set_name = virtio_bo_set_name,
+   .upload = virtio_bo_upload,
+   .destroy = virtio_bo_destroy,
+};
+
+static struct fd_bo *
+bo_from_handle(struct fd_device *dev, uint32_t size, uint32_t handle)
+{
+   struct virtio_bo *virtio_bo;
+   struct fd_bo *bo;
+
+   virtio_bo = calloc(1, sizeof(*virtio_bo));
+   if (!virtio_bo)
+      return NULL;
+
+   bo = &virtio_bo->base;
+   bo->size = size;
+   bo->funcs = &funcs;
+   bo->handle = handle;
+
+   return bo;
+}
+
+/* allocate a new buffer object from existing handle */
+struct fd_bo *
+virtio_bo_from_handle(struct fd_device *dev, uint32_t size, uint32_t handle)
+{
+   struct fd_bo *bo = bo_from_handle(dev, size, handle);
+   struct drm_virtgpu_resource_info args = {
+         .bo_handle = handle,
+   };
+   int ret;
+
+   ret = drmCommandWriteRead(dev->fd, DRM_VIRTGPU_RESOURCE_INFO, &args, sizeof(args));
+   if (ret) {
+      INFO_MSG("failed to get resource info: %s", strerror(errno));
+      goto fail;
+   }
+
+   struct msm_ccmd_gem_info_req req = {
+         .hdr = MSM_CCMD(GEM_INFO, sizeof(req)),
+         .res_id = args.res_handle,
+         .blob_mem = args.blob_mem,
+         .blob_id = p_atomic_inc_return(&to_virtio_device(dev)->next_blob_id),
+   };
+
+   struct msm_ccmd_gem_info_rsp *rsp =
+         virtio_alloc_rsp(dev, &req.hdr, sizeof(*rsp));
+
+   ret = virtio_execbuf(dev, &req.hdr, true);
+   if (ret) {
+      INFO_MSG("failed to get gem info: %s", strerror(errno));
+      goto fail;
+   }
+   if (rsp->ret) {
+      INFO_MSG("failed (on host) to get gem info: %s", strerror(rsp->ret));
+      goto fail;
+   }
+
+   struct virtio_bo *virtio_bo = to_virtio_bo(bo);
+
+   virtio_bo->blob_id = req.blob_id;
+   virtio_bo->host_handle = rsp->host_handle;
+   bo->iova = rsp->iova;
+
+   /* If the imported buffer is allocated via virgl context (for example
+    * minigbm/arc-cros-gralloc) then the guest gem object size is fake,
+    * potentially not accounting for UBWC meta data, required pitch
+    * alignment, etc.  But in the import path the gallium driver checks
+    * that the size matches the minimum size based on layout.  So replace
+    * the guest potentially-fake size with the real size from the host:
+    */
+   bo->size = rsp->size;
+
+   return bo;
+
+fail:
+   virtio_bo_destroy(bo);
+   return NULL;
+}
+
+/* allocate a buffer object: */
+struct fd_bo *
+virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
+{
+   struct virtio_device *virtio_dev = to_virtio_device(dev);
+   struct drm_virtgpu_resource_create_blob args = {
+         .blob_mem   = VIRTGPU_BLOB_MEM_HOST3D,
+         .size       = size,
+   };
+   struct msm_ccmd_gem_new_req req = {
+         .hdr = MSM_CCMD(GEM_NEW, sizeof(req)),
+         .size = size,
+   };
+   struct msm_ccmd_gem_new_rsp *rsp = NULL;
+   int ret;
+
+   if (flags & FD_BO_SCANOUT)
+      req.flags |= MSM_BO_SCANOUT;
+
+   if (flags & FD_BO_GPUREADONLY)
+      req.flags |= MSM_BO_GPU_READONLY;
+
+   if (flags & FD_BO_CACHED_COHERENT) {
+      req.flags |= MSM_BO_CACHED_COHERENT;
+   } else {
+      req.flags |= MSM_BO_WC;
+   }
+
+   if (flags & _FD_BO_VIRTIO_SHM) {
+      args.blob_id = 0;
+      args.blob_flags = VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+   } else {
+      if (flags & (FD_BO_SHARED | FD_BO_SCANOUT)) {
+         args.blob_flags = VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE |
+               VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
+      } else if (!(flags & FD_BO_NOMAP)) {
+         args.blob_flags = VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+      }
+      args.blob_id = p_atomic_inc_return(&virtio_dev->next_blob_id);
+      args.cmd = VOID2U64(&req);
+      args.cmd_size = sizeof(req);
+
+      /* tunneled cmds are processed separately on host side,
+       * before the renderer->get_blob() callback.. the blob_id
+       * is used to like the created bo to the get_blob() call
+       */
+      req.blob_id = args.blob_id;
+
+      rsp = virtio_alloc_rsp(dev, &req.hdr, sizeof(*rsp));
+   }
+
+   simple_mtx_lock(&virtio_dev->eb_lock);
+   if (rsp)
+      req.hdr.seqno = ++virtio_dev->next_seqno;
+   ret = drmIoctl(dev->fd, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args);
+   simple_mtx_unlock(&virtio_dev->eb_lock);
+   if (ret)
+      goto fail;
+
+   struct fd_bo *bo = bo_from_handle(dev, size, args.bo_handle);
+   struct virtio_bo *virtio_bo = to_virtio_bo(bo);
+
+   virtio_bo->blob_id = args.blob_id;
+
+   if (rsp) {
+      /* RESOURCE_CREATE_BLOB is async, so we need to wait for host..
+       * which is a bit unfortunate, but better to sync here than
+       * add extra code to check if we need to wait each time we
+       * emit a reloc.
+       */
+      virtio_host_sync(dev, &req.hdr);
+
+      virtio_bo->host_handle = rsp->host_handle;
+      bo->iova = rsp->iova;
+   }
+
+   return bo;
+
+fail:
+   return NULL;
+}
--- a/src/freedreno/drm/virtio/virtio_device.c
+++ b/src/freedreno/drm/virtio/virtio_device.c
@ -0,0 +1,309 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "util/u_process.h"
+
+#include "virtio_priv.h"
+
+static void
+virtio_device_destroy(struct fd_device *dev)
+{
+   struct virtio_device *virtio_dev = to_virtio_device(dev);
+   fd_bo_del_locked(virtio_dev->shmem_bo);
+}
+
+static const struct fd_device_funcs funcs = {
+   .bo_new = virtio_bo_new,
+   .bo_from_handle = virtio_bo_from_handle,
+   .pipe_new = virtio_pipe_new,
+   .destroy = virtio_device_destroy,
+};
+
+static int
+get_capset(int fd, struct virgl_renderer_capset_drm *caps)
+{
+   struct drm_virtgpu_get_caps args = {
+         .cap_set_id = VIRGL_RENDERER_CAPSET_DRM,
+         .cap_set_ver = 0,
+         .addr = VOID2U64(caps),
+         .size = sizeof(*caps),
+   };
+
+   return drmIoctl(fd, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
+}
+
+static int
+set_context(int fd)
+{
+   struct drm_virtgpu_context_set_param params[] = {
+         { VIRTGPU_CONTEXT_PARAM_CAPSET_ID, VIRGL_RENDERER_CAPSET_DRM },
+   };
+   struct drm_virtgpu_context_init args = {
+      .num_params = ARRAY_SIZE(params),
+      .ctx_set_params = VOID2U64(params),
+   };
+
+   return drmIoctl(fd, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
+}
+
+static void
+set_debuginfo(struct fd_device *dev)
+{
+   const char *comm = util_get_process_name();
+   static char cmdline[0x1000+1];
+   int fd = open("/proc/self/cmdline", O_RDONLY);
+   if (fd < 0)
+      return;
+
+   int n = read(fd, cmdline, sizeof(cmdline) - 1);
+   if (n < 0)
+      return;
+
+   /* arguments are separated by NULL, convert to spaces: */
+   for (int i = 0; i < n; i++) {
+      if (cmdline[i] == '\0') {
+         cmdline[i] = ' ';
+      }
+   }
+
+   cmdline[n] = '\0';
+
+   unsigned comm_len = strlen(comm) + 1;
+   unsigned cmdline_len = strlen(cmdline) + 1;
+
+   struct msm_ccmd_set_debuginfo_req *req;
+
+   unsigned req_len = align(sizeof(*req) + comm_len + cmdline_len, 4);
+
+   req = malloc(req_len);
+
+   req->hdr         = MSM_CCMD(SET_DEBUGINFO, req_len);
+   req->comm_len    = comm_len;
+   req->cmdline_len = cmdline_len;
+
+   memcpy(&req->payload[0], comm, comm_len);
+   memcpy(&req->payload[comm_len], cmdline, cmdline_len);
+
+   virtio_execbuf(dev, &req->hdr, false);
+
+   free(req);
+}
+
+struct fd_device *
+virtio_device_new(int fd, drmVersionPtr version)
+{
+   struct virgl_renderer_capset_drm caps;
+   struct virtio_device *virtio_dev;
+   struct fd_device *dev;
+   int ret;
+
+   STATIC_ASSERT(FD_BO_PREP_READ == MSM_PREP_READ);
+   STATIC_ASSERT(FD_BO_PREP_WRITE == MSM_PREP_WRITE);
+   STATIC_ASSERT(FD_BO_PREP_NOSYNC == MSM_PREP_NOSYNC);
+
+   /* Debug option to force fallback to virgl: */
+   if (debug_get_bool_option("FD_NO_VIRTIO", false))
+      return NULL;
+
+   ret = get_capset(fd, &caps);
+   if (ret) {
+      INFO_MSG("could not get caps: %s", strerror(errno));
+      return NULL;
+   }
+
+   if (caps.context_type != VIRTGPU_DRM_CONTEXT_MSM) {
+      INFO_MSG("wrong context_type: %u", caps.context_type);
+      return NULL;
+   }
+
+   INFO_MSG("wire_format_version: %u", caps.wire_format_version);
+   INFO_MSG("version_major:       %u", caps.version_major);
+   INFO_MSG("version_minor:       %u", caps.version_minor);
+   INFO_MSG("version_patchlevel:  %u", caps.version_patchlevel);
+   INFO_MSG("has_cached_coherent: %u", caps.u.msm.has_cached_coherent);
+
+   if (caps.wire_format_version != 1) {
+      ERROR_MSG("Unsupported protocol version: %u", caps.wire_format_version);
+      return NULL;
+   }
+
+   if ((caps.version_major != 1) || (caps.version_minor < FD_VERSION_SOFTPIN)) {
+      ERROR_MSG("unsupported version: %u.%u.%u", caps.version_major,
+                caps.version_minor, caps.version_patchlevel);
+      return NULL;
+   }
+
+   ret = set_context(fd);
+   if (ret) {
+      INFO_MSG("Could not set context type: %s", strerror(errno));
+      return NULL;
+   }
+
+   virtio_dev = calloc(1, sizeof(*virtio_dev));
+   if (!virtio_dev)
+      return NULL;
+
+   dev = &virtio_dev->base;
+   dev->funcs = &funcs;
+   dev->fd = fd;
+   dev->version = caps.version_minor;
+   dev->has_cached_coherent = caps.u.msm.has_cached_coherent;
+
+   p_atomic_set(&virtio_dev->next_blob_id, 1);
+
+   util_queue_init(&dev->submit_queue, "sq", 8, 1, 0, NULL);
+
+   dev->bo_size = sizeof(struct virtio_bo);
+
+   simple_mtx_init(&virtio_dev->rsp_lock, mtx_plain);
+   simple_mtx_init(&virtio_dev->eb_lock, mtx_plain);
+
+   set_debuginfo(dev);
+
+   return dev;
+}
+
+void *
+virtio_alloc_rsp(struct fd_device *dev, struct msm_ccmd_req *req, uint32_t sz)
+{
+   struct virtio_device *virtio_dev = to_virtio_device(dev);
+   unsigned off;
+
+   simple_mtx_lock(&virtio_dev->rsp_lock);
+
+   sz = align(sz, 8);
+
+   if ((virtio_dev->next_rsp_off + sz) >= sizeof(virtio_dev->shmem->rsp_mem))
+      virtio_dev->next_rsp_off = 0;
+
+   off = virtio_dev->next_rsp_off;
+   virtio_dev->next_rsp_off += sz;
+
+   simple_mtx_unlock(&virtio_dev->rsp_lock);
+
+   req->rsp_off = off;
+
+   struct msm_ccmd_rsp *rsp = (void *)&virtio_dev->shmem->rsp_mem[off];
+   rsp->len = sz;
+
+   return rsp;
+}
+
+/**
+ * Helper for "execbuf" ioctl.. note that in virtgpu execbuf is just
+ * a generic "send commands to host", not necessarily specific to
+ * cmdstream execution.
+ */
+int
+virtio_execbuf_fenced(struct fd_device *dev, struct msm_ccmd_req *req,
+                      int in_fence_fd, int *out_fence_fd)
+{
+   struct virtio_device *virtio_dev = to_virtio_device(dev);
+
+   simple_mtx_lock(&virtio_dev->eb_lock);
+   req->seqno = ++virtio_dev->next_seqno;
+
+#define COND(bool, val) ((bool) ? (val) : 0)
+   struct drm_virtgpu_execbuffer eb = {
+         .flags = COND(out_fence_fd, VIRTGPU_EXECBUF_FENCE_FD_OUT) |
+                  COND(in_fence_fd != -1, VIRTGPU_EXECBUF_FENCE_FD_IN),
+         .fence_fd = in_fence_fd,
+         .size  = req->len,
+         .command = VOID2U64(req),
+   };
+
+   int ret = drmIoctl(dev->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &eb);
+   simple_mtx_unlock(&virtio_dev->eb_lock);
+   if (ret) {
+      ERROR_MSG("EXECBUFFER failed: %s", strerror(errno));
+      return ret;
+   }
+
+   if (out_fence_fd)
+      *out_fence_fd = eb.fence_fd;
+
+   return 0;
+}
+
+int
+virtio_execbuf(struct fd_device *dev, struct msm_ccmd_req *req, bool sync)
+{
+   int ret = virtio_execbuf_fenced(dev, req, -1, NULL);
+
+   if (ret)
+      return ret;
+
+   if (sync)
+      virtio_host_sync(dev, req);
+
+   return 0;
+}
+
+/**
+ * Wait until host as processed the specified request.
+ */
+void
+virtio_host_sync(struct fd_device *dev, const struct msm_ccmd_req *req)
+{
+   struct virtio_device *virtio_dev = to_virtio_device(dev);
+
+   while (fd_fence_before(virtio_dev->shmem->seqno, req->seqno))
+      sched_yield();
+}
+
+/**
+ * Helper for simple pass-thru ioctls
+ */
+int
+virtio_simple_ioctl(struct fd_device *dev, unsigned cmd, void *_req)
+{
+   unsigned req_len = sizeof(struct msm_ccmd_ioctl_simple_req);
+   unsigned rsp_len = sizeof(struct msm_ccmd_ioctl_simple_rsp);
+
+   req_len += _IOC_SIZE(cmd);
+   if (cmd & IOC_OUT)
+      rsp_len += _IOC_SIZE(cmd);
+
+   uint8_t buf[req_len];
+   struct msm_ccmd_ioctl_simple_req *req = (void *)buf;
+   struct msm_ccmd_ioctl_simple_rsp *rsp;
+
+   req->hdr = MSM_CCMD(IOCTL_SIMPLE, req_len);
+   req->cmd = cmd;
+   memcpy(req->payload, _req, _IOC_SIZE(cmd));
+
+   rsp = virtio_alloc_rsp(dev, &req->hdr, rsp_len);
+
+   int ret = virtio_execbuf(dev, &req->hdr, true);
+
+   if (cmd & IOC_OUT)
+      memcpy(_req, rsp->payload, _IOC_SIZE(cmd));
+
+   ret = rsp->ret;
+
+   return ret;
+}
--- a/src/freedreno/drm/virtio/virtio_pipe.c
+++ b/src/freedreno/drm/virtio/virtio_pipe.c
@ -0,0 +1,275 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "util/slab.h"
+
+#include "freedreno_ringbuffer_sp.h"
+#include "virtio_priv.h"
+
+static int
+query_param(struct fd_pipe *pipe, uint32_t param, uint64_t *value)
+{
+   struct virtio_pipe *virtio_pipe = to_virtio_pipe(pipe);
+   struct drm_msm_param req = {
+      .pipe = virtio_pipe->pipe,
+      .param = param,
+   };
+   int ret;
+
+   ret = virtio_simple_ioctl(pipe->dev, DRM_IOCTL_MSM_GET_PARAM, &req);
+   if (ret)
+      return ret;
+
+   *value = req.value;
+
+   return 0;
+}
+
+static int
+query_queue_param(struct fd_pipe *pipe, uint32_t param, uint64_t *value)
+{
+   struct msm_ccmd_submitqueue_query_req req = {
+         .hdr = MSM_CCMD(SUBMITQUEUE_QUERY, sizeof(req)),
+         .queue_id = to_virtio_pipe(pipe)->queue_id,
+         .param = param,
+         .len = sizeof(*value),
+   };
+   struct msm_ccmd_submitqueue_query_rsp *rsp;
+   unsigned rsp_len = sizeof(*rsp) + req.len;
+
+   rsp = virtio_alloc_rsp(pipe->dev, &req.hdr, rsp_len);
+
+   int ret = virtio_execbuf(pipe->dev, &req.hdr, true);
+   if (ret)
+      goto out;
+
+   memcpy(value, rsp->payload, req.len);
+
+   ret = rsp->ret;
+
+out:
+   return ret;
+}
+
+static int
+virtio_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
+                   uint64_t *value)
+{
+   struct virtio_pipe *virtio_pipe = to_virtio_pipe(pipe);
+   switch (param) {
+   case FD_DEVICE_ID: // XXX probably get rid of this..
+   case FD_GPU_ID:
+      *value = virtio_pipe->gpu_id;
+      return 0;
+   case FD_GMEM_SIZE:
+      *value = virtio_pipe->gmem;
+      return 0;
+   case FD_GMEM_BASE:
+      *value = virtio_pipe->gmem_base;
+      return 0;
+   case FD_CHIP_ID:
+      *value = virtio_pipe->chip_id;
+      return 0;
+   case FD_MAX_FREQ:
+      return query_param(pipe, MSM_PARAM_MAX_FREQ, value);
+   case FD_TIMESTAMP:
+      return query_param(pipe, MSM_PARAM_TIMESTAMP, value);
+   case FD_NR_RINGS:
+      /* TODO need to not rely on host egl ctx for fence if
+       * we want to support multiple priority levels
+       */
+      return 1;
+//      return query_param(pipe, MSM_PARAM_NR_RINGS, value);
+   case FD_PP_PGTABLE:
+      return query_param(pipe, MSM_PARAM_PP_PGTABLE, value);
+   case FD_CTX_FAULTS:
+      return query_queue_param(pipe, MSM_SUBMITQUEUE_PARAM_FAULTS, value);
+   case FD_GLOBAL_FAULTS:
+      return query_param(pipe, MSM_PARAM_FAULTS, value);
+   case FD_SUSPEND_COUNT:
+      return query_param(pipe, MSM_PARAM_SUSPENDS, value);
+   default:
+      ERROR_MSG("invalid param id: %d", param);
+      return -1;
+   }
+}
+
+static int
+virtio_pipe_wait(struct fd_pipe *pipe, const struct fd_fence *fence, uint64_t timeout)
+{
+   struct msm_ccmd_wait_fence_req req = {
+         .hdr = MSM_CCMD(WAIT_FENCE, sizeof(req)),
+         .queue_id = to_virtio_pipe(pipe)->queue_id,
+         .fence = fence->kfence,
+         .timeout = timeout,
+   };
+   struct msm_ccmd_submitqueue_query_rsp *rsp;
+
+   rsp = virtio_alloc_rsp(pipe->dev, &req.hdr, sizeof(*rsp));
+
+   int ret = virtio_execbuf(pipe->dev, &req.hdr, true);
+   if (ret)
+      goto out;
+
+   ret = rsp->ret;
+
+out:
+   return ret;
+}
+
+static int
+open_submitqueue(struct fd_pipe *pipe, uint32_t prio)
+{
+   struct drm_msm_submitqueue req = {
+      .flags = 0,
+      .prio = prio,
+   };
+   uint64_t nr_rings = 1;
+   int ret;
+
+   virtio_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings);
+
+   req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1);
+
+   ret = virtio_simple_ioctl(pipe->dev, DRM_IOCTL_MSM_SUBMITQUEUE_NEW, &req);
+   if (ret) {
+      ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno));
+      return ret;
+   }
+
+   to_virtio_pipe(pipe)->queue_id = req.id;
+
+   return 0;
+}
+
+static void
+close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id)
+{
+   virtio_simple_ioctl(pipe->dev, DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE, &queue_id);
+}
+
+static void
+virtio_pipe_destroy(struct fd_pipe *pipe)
+{
+   struct virtio_pipe *virtio_pipe = to_virtio_pipe(pipe);
+
+   close_submitqueue(pipe, virtio_pipe->queue_id);
+   fd_pipe_sp_ringpool_fini(pipe);
+   free(virtio_pipe);
+}
+
+static const struct fd_pipe_funcs funcs = {
+   .ringbuffer_new_object = fd_ringbuffer_sp_new_object,
+   .submit_new = virtio_submit_new,
+   .flush = fd_pipe_sp_flush,
+   .get_param = virtio_pipe_get_param,
+   .wait = virtio_pipe_wait,
+   .destroy = virtio_pipe_destroy,
+};
+
+static uint64_t
+get_param(struct fd_pipe *pipe, uint32_t param)
+{
+   uint64_t value;
+   int ret = query_param(pipe, param, &value);
+   if (ret) {
+      ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno));
+      return 0;
+   }
+   return value;
+}
+
+static void
+init_shmem(struct fd_device *dev)
+{
+   struct virtio_device *virtio_dev = to_virtio_device(dev);
+
+   simple_mtx_lock(&virtio_dev->rsp_lock);
+
+   /* One would like to do this in virtio_device_new(), but we'd
+    * have to bypass/reinvent fd_bo_new()..
+    */
+   if (unlikely(!virtio_dev->shmem)) {
+      virtio_dev->shmem_bo = fd_bo_new(dev, sizeof(*virtio_dev->shmem),
+                                       _FD_BO_VIRTIO_SHM, "shmem");
+      virtio_dev->shmem = fd_bo_map(virtio_dev->shmem_bo);
+
+      virtio_dev->shmem_bo->bo_reuse = NO_CACHE;
+   }
+
+   simple_mtx_unlock(&virtio_dev->rsp_lock);
+}
+
+struct fd_pipe *
+virtio_pipe_new(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
+{
+   static const uint32_t pipe_id[] = {
+      [FD_PIPE_3D] = MSM_PIPE_3D0,
+      [FD_PIPE_2D] = MSM_PIPE_2D0,
+   };
+   struct virtio_pipe *virtio_pipe = NULL;
+   struct fd_pipe *pipe = NULL;
+
+   init_shmem(dev);
+
+   virtio_pipe = calloc(1, sizeof(*virtio_pipe));
+   if (!virtio_pipe) {
+      ERROR_MSG("allocation failed");
+      goto fail;
+   }
+
+   pipe = &virtio_pipe->base;
+
+   pipe->funcs = &funcs;
+
+   /* initialize before get_param(): */
+   pipe->dev = dev;
+   virtio_pipe->pipe = pipe_id[id];
+
+   /* these params should be supported since the first version of drm/msm: */
+   virtio_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID);
+   virtio_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE);
+   virtio_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID);
+
+   if (fd_device_version(pipe->dev) >= FD_VERSION_GMEM_BASE)
+      virtio_pipe->gmem_base = get_param(pipe, MSM_PARAM_GMEM_BASE);
+
+   if (!(virtio_pipe->gpu_id || virtio_pipe->chip_id))
+      goto fail;
+
+   INFO_MSG("Pipe Info:");
+   INFO_MSG(" GPU-id:          %d", virtio_pipe->gpu_id);
+   INFO_MSG(" Chip-id:         0x%016"PRIx64, virtio_pipe->chip_id);
+   INFO_MSG(" GMEM size:       0x%08x", virtio_pipe->gmem);
+
+   if (open_submitqueue(pipe, prio))
+      goto fail;
+
+   fd_pipe_sp_ringpool_init(pipe);
+
+   return pipe;
+fail:
+   if (pipe)
+      fd_pipe_del(pipe);
+   return NULL;
+}
--- a/src/freedreno/drm/virtio/virtio_priv.h
+++ b/src/freedreno/drm/virtio/virtio_priv.h
@ -0,0 +1,121 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VIRTIO_PRIV_H_
+#define VIRTIO_PRIV_H_
+
+#include <poll.h>
+
+#include "freedreno_priv.h"
+
+#include "util/u_atomic.h"
+#include "util/slab.h"
+#include "util/timespec.h"
+
+#include "pipe/p_defines.h"
+
+#include "drm-uapi/virtgpu_drm.h"
+/* We also use some types/defines from the host drm/msm uabi: */
+#include "drm-uapi/msm_drm.h"
+
+#define VIRGL_RENDERER_UNSTABLE_APIS 1
+#include "virglrenderer_hw.h"
+#include "msm_proto.h"
+
+struct virtio_device {
+   struct fd_device base;
+
+   struct fd_bo *shmem_bo;
+   struct msm_shmem *shmem;
+   uint32_t next_rsp_off;
+   simple_mtx_t rsp_lock;
+   simple_mtx_t eb_lock;
+
+   uint32_t next_blob_id;
+   uint32_t next_seqno;
+};
+FD_DEFINE_CAST(fd_device, virtio_device);
+
+struct fd_device *virtio_device_new(int fd, drmVersionPtr version);
+
+struct virtio_pipe {
+   struct fd_pipe base;
+   uint32_t pipe;
+   uint32_t gpu_id;
+   uint64_t chip_id;
+   uint64_t gmem_base;
+   uint32_t gmem;
+   uint32_t queue_id;
+   struct slab_parent_pool ring_pool;
+
+   /**
+    * If we *ever* see an in-fence-fd, assume that userspace is
+    * not relying on implicit fences.
+    */
+   bool no_implicit_sync;
+
+   /**
+    * We know that the kernel allocated fence seqno's sequentially per-
+    * submitqueue in a range 1..INT_MAX, which is incremented *after* any
+    * point where the submit ioctl could be restarted.  So we just *guess*
+    * what the next seqno fence will be to avoid having to synchronize the
+    * submit with the host.
+    *
+    * TODO maybe we need version_minor bump so we can make the 1..INT_MAX
+    * assumption.. it is only really true after:
+    *
+    *   ca3ffcbeb0c8 ("drm/msm/gpu: Don't allow zero fence_id")
+    */
+   int32_t next_submit_fence;
+};
+FD_DEFINE_CAST(fd_pipe, virtio_pipe);
+
+struct fd_pipe *virtio_pipe_new(struct fd_device *dev, enum fd_pipe_id id,
+                                uint32_t prio);
+
+struct fd_submit *virtio_submit_new(struct fd_pipe *pipe);
+
+struct virtio_bo {
+   struct fd_bo base;
+   uint64_t offset;
+
+   uint32_t host_handle;
+   uint32_t blob_id;
+};
+FD_DEFINE_CAST(fd_bo, virtio_bo);
+
+struct fd_bo *virtio_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags);
+struct fd_bo *virtio_bo_from_handle(struct fd_device *dev, uint32_t size,
+                                    uint32_t handle);
+
+/*
+ * Internal helpers:
+ */
+void *virtio_alloc_rsp(struct fd_device *dev, struct msm_ccmd_req *hdr, uint32_t sz);
+int virtio_execbuf_fenced(struct fd_device *dev, struct msm_ccmd_req *req,
+                          int in_fence_fd, int *out_fence_fd);
+int virtio_execbuf(struct fd_device *dev, struct msm_ccmd_req *req, bool sync);
+void virtio_host_sync(struct fd_device *dev, const struct msm_ccmd_req *req);
+int virtio_simple_ioctl(struct fd_device *dev, unsigned cmd, void *req);
+
+#endif /* VIRTIO_PRIV_H_ */
--- a/src/freedreno/drm/virtio/virtio_ringbuffer.c
+++ b/src/freedreno/drm/virtio/virtio_ringbuffer.c
@ -0,0 +1,191 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <pthread.h>
+
+#include "util/os_file.h"
+
+#include "drm/freedreno_ringbuffer_sp.h"
+#include "virtio_priv.h"
+
+static int
+flush_submit_list(struct list_head *submit_list)
+{
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(last_submit(submit_list));
+   struct virtio_pipe *virtio_pipe = to_virtio_pipe(fd_submit->base.pipe);
+   struct fd_device *dev = virtio_pipe->base.dev;
+
+   unsigned nr_cmds = 0;
+
+   /* Determine the number of extra cmds's from deferred submits that
+    * we will be merging in:
+    */
+   foreach_submit (submit, submit_list) {
+      assert(submit->pipe == &virtio_pipe->base);
+      nr_cmds += to_fd_ringbuffer_sp(submit->primary)->u.nr_cmds;
+   }
+
+   /* TODO we can get rid of the extra copy into the req by just
+    * assuming the max amount that nr->bos will grow is by the
+    * nr_cmds, and just over-allocate a bit.
+    */
+
+   struct drm_msm_gem_submit_cmd cmds[nr_cmds];
+
+   unsigned cmd_idx = 0;
+
+   /* Build up the table of cmds, and for all but the last submit in the
+    * list, merge their bo tables into the last submit.
+    */
+   foreach_submit_safe (submit, submit_list) {
+      struct fd_ringbuffer_sp *deferred_primary =
+         to_fd_ringbuffer_sp(submit->primary);
+
+      for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
+         cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
+         cmds[cmd_idx].submit_idx =
+               fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
+         cmds[cmd_idx].submit_offset = deferred_primary->offset;
+         cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
+         cmds[cmd_idx].pad = 0;
+         cmds[cmd_idx].nr_relocs = 0;
+
+         cmd_idx++;
+      }
+
+      /* We are merging all the submits in the list into the last submit,
+       * so the remainder of the loop body doesn't apply to the last submit
+       */
+      if (submit == last_submit(submit_list)) {
+         DEBUG_MSG("merged %u submits", cmd_idx);
+         break;
+      }
+
+      struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
+      for (unsigned i = 0; i < fd_deferred_submit->nr_bos; i++) {
+         /* Note: if bo is used in both the current submit and the deferred
+          * submit being merged, we expect to hit the fast-path as we add it
+          * to the current submit:
+          */
+         fd_submit_append_bo(fd_submit, fd_deferred_submit->bos[i]);
+      }
+
+      /* Now that the cmds/bos have been transfered over to the current submit,
+       * we can remove the deferred submit from the list and drop it's reference
+       */
+      list_del(&submit->node);
+      fd_submit_del(submit);
+   }
+
+   /* Needs to be after get_cmd() as that could create bos/cmds table:
+    *
+    * NOTE allocate on-stack in the common case, but with an upper-
+    * bound to limit on-stack allocation to 4k:
+    */
+   const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096;
+   bool bos_on_stack = fd_submit->nr_bos < bo_limit;
+   struct drm_msm_gem_submit_bo
+      _submit_bos[bos_on_stack ? fd_submit->nr_bos : 0];
+   struct drm_msm_gem_submit_bo *submit_bos;
+   if (bos_on_stack) {
+      submit_bos = _submit_bos;
+   } else {
+      submit_bos = malloc(fd_submit->nr_bos * sizeof(submit_bos[0]));
+   }
+
+   for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
+      submit_bos[i].flags = fd_submit->bos[i]->reloc_flags;
+      submit_bos[i].handle = to_virtio_bo(fd_submit->bos[i])->host_handle;
+      submit_bos[i].presumed = 0;
+   }
+
+   if (virtio_pipe->next_submit_fence <= 0)
+      virtio_pipe->next_submit_fence = 1;
+
+   uint32_t kfence = virtio_pipe->next_submit_fence++;
+
+   /* TODO avoid extra memcpy, and populate bo's and cmds directly
+    * into the req msg
+    */
+   unsigned bos_len = fd_submit->nr_bos * sizeof(struct drm_msm_gem_submit_bo);
+   unsigned cmd_len = nr_cmds * sizeof(struct drm_msm_gem_submit_cmd);
+   unsigned req_len = sizeof(struct msm_ccmd_gem_submit_req) + bos_len + cmd_len;
+   struct msm_ccmd_gem_submit_req *req = malloc(req_len);
+
+   req->hdr      = MSM_CCMD(GEM_SUBMIT, req_len);
+   req->flags    = virtio_pipe->pipe;
+   req->queue_id = virtio_pipe->queue_id;
+   req->nr_bos   = fd_submit->nr_bos;
+   req->nr_cmds  = nr_cmds;
+   req->fence    = kfence;
+
+   memcpy(req->payload, submit_bos, bos_len);
+   memcpy(req->payload + bos_len, cmds, cmd_len);
+
+   struct fd_submit_fence *out_fence = fd_submit->out_fence;
+   int *out_fence_fd = NULL;
+
+   if (out_fence) {
+      out_fence->fence.kfence = kfence;
+      out_fence->fence.ufence = fd_submit->base.fence;
+      /* Even if gallium driver hasn't requested a fence-fd, request one.
+       * This way, if we have to block waiting for the fence, we can do
+       * it in the guest, rather than in the single-threaded host.
+       */
+      out_fence->use_fence_fd = true;
+      out_fence_fd = &out_fence->fence_fd;
+   }
+
+   if (fd_submit->in_fence_fd != -1) {
+      virtio_pipe->no_implicit_sync = true;
+   }
+
+   if (virtio_pipe->no_implicit_sync) {
+      req->flags |= MSM_SUBMIT_NO_IMPLICIT;
+   }
+
+   virtio_execbuf_fenced(dev, &req->hdr, fd_submit->in_fence_fd, out_fence_fd);
+
+   free(req);
+
+   if (!bos_on_stack)
+      free(submit_bos);
+
+   if (fd_submit->in_fence_fd != -1)
+      close(fd_submit->in_fence_fd);
+
+   return 0;
+}
+
+struct fd_submit *
+virtio_submit_new(struct fd_pipe *pipe)
+{
+   /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
+   STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
+   STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
+   STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
+
+   return fd_submit_sp_new(pipe, flush_submit_list);
+}
--- a/src/freedreno/drm/virtio/virtio_ringbuffer.h
+++ b/src/freedreno/drm/virtio/virtio_ringbuffer.h
@ -0,0 +1,134 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef X
+#undef X
+#endif
+
+/*
+ * TODO make this magic easier to share btw msm_ringbuffer_sp and virtio_ringbuffer
+ */
+
+#if PTRSZ == 32
+#define X(n) n##_32
+#else
+#define X(n) n##_64
+#endif
+
+static void X(emit_reloc_common)(struct fd_ringbuffer *ring,
+                                 const struct fd_reloc *reloc)
+{
+   (*ring->cur++) = (uint32_t)reloc->iova;
+#if PTRSZ == 64
+   (*ring->cur++) = (uint32_t)(reloc->iova >> 32);
+#endif
+}
+
+static void X(virtio_ringbuffer_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
+                                                   const struct fd_reloc *reloc)
+{
+   X(emit_reloc_common)(ring, reloc);
+
+   assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
+
+   struct virtio_ringbuffer *virtio_ring = to_virtio_ringbuffer(ring);
+
+   struct virtio_submit *virtio_submit = to_virtio_submit(virtio_ring->u.submit);
+
+   virtio_submit_append_bo(virtio_submit, reloc->bo);
+}
+
+static void X(virtio_ringbuffer_emit_reloc_obj)(struct fd_ringbuffer *ring,
+                                                const struct fd_reloc *reloc)
+{
+   X(emit_reloc_common)(ring, reloc);
+
+   assert(ring->flags & _FD_RINGBUFFER_OBJECT);
+
+   struct virtio_ringbuffer *virtio_ring = to_virtio_ringbuffer(ring);
+
+   /* Avoid emitting duplicate BO references into the list.  Ringbuffer
+    * objects are long-lived, so this saves ongoing work at draw time in
+    * exchange for a bit at context setup/first draw.  And the number of
+    * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
+    * hurt much.
+    */
+   if (!virtio_ringbuffer_references_bo(ring, reloc->bo)) {
+      APPEND(&virtio_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
+   }
+}
+
+static uint32_t X(virtio_ringbuffer_emit_reloc_ring)(
+   struct fd_ringbuffer *ring, struct fd_ringbuffer *target, uint32_t cmd_idx)
+{
+   struct virtio_ringbuffer *virtio_target = to_virtio_ringbuffer(target);
+   struct fd_bo *bo;
+   uint32_t size;
+
+   if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
+       (cmd_idx < virtio_target->u.nr_cmds)) {
+      bo = virtio_target->u.cmds[cmd_idx].ring_bo;
+      size = virtio_target->u.cmds[cmd_idx].size;
+   } else {
+      bo = virtio_target->ring_bo;
+      size = offset_bytes(target->cur, target->start);
+   }
+
+   if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+      X(virtio_ringbuffer_emit_reloc_obj)(ring, &(struct fd_reloc){
+                .bo = bo,
+                .iova = bo->iova + virtio_target->offset,
+                .offset = virtio_target->offset,
+             });
+   } else {
+      X(virtio_ringbuffer_emit_reloc_nonobj)(ring, &(struct fd_reloc){
+                .bo = bo,
+                .iova = bo->iova + virtio_target->offset,
+                .offset = virtio_target->offset,
+             });
+   }
+
+   if (!(target->flags & _FD_RINGBUFFER_OBJECT))
+      return size;
+
+   struct virtio_ringbuffer *virtio_ring = to_virtio_ringbuffer(ring);
+
+   if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+      for (unsigned i = 0; i < virtio_target->u.nr_reloc_bos; i++) {
+         struct fd_bo *target_bo = virtio_target->u.reloc_bos[i];
+         if (!virtio_ringbuffer_references_bo(ring, target_bo))
+            APPEND(&virtio_ring->u, reloc_bos, fd_bo_ref(target_bo));
+      }
+   } else {
+      // TODO it would be nice to know whether we have already
+      // seen this target before.  But hopefully we hit the
+      // append_bo() fast path enough for this to not matter:
+      struct virtio_submit *virtio_submit = to_virtio_submit(virtio_ring->u.submit);
+
+      for (unsigned i = 0; i < virtio_target->u.nr_reloc_bos; i++) {
+         virtio_submit_append_bo(virtio_submit, virtio_target->u.reloc_bos[i]);
+      }
+   }
+
+   return size;
+}
--- a/src/meson.build
+++ b/src/meson.build
@ -25,6 +25,7 @@ inc_gallium = include_directories('gallium/include')
 inc_gallium_aux = include_directories('gallium/auxiliary')
 inc_amd_common = include_directories('amd/common')
 inc_tool = include_directories('tool')
+inc_virtio_gpu = include_directories('virtio/virtio-gpu')
 pps_datasources = []
 pps_includes = []

--- a/src/virtio/virtio-gpu/drm_hw.h
+++ b/src/virtio/virtio-gpu/drm_hw.h
@ -0,0 +1,25 @@
+/*
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef DRM_HW_H_
+#define DRM_HW_H_
+
+struct virgl_renderer_capset_drm {
+   uint32_t wire_format_version;
+   /* Underlying drm device version: */
+   uint32_t version_major;
+   uint32_t version_minor;
+   uint32_t version_patchlevel;
+#define VIRTGPU_DRM_CONTEXT_MSM   1
+   uint32_t context_type;
+   uint32_t pad;
+   union {
+      struct {
+         uint32_t has_cached_coherent;
+      } msm;  /* context_type == VIRTGPU_DRM_CONTEXT_MSM */
+   } u;
+};
+
+#endif /* DRM_HW_H_ */
--- a/src/virtio/virtio-gpu/virglrenderer_hw.h
+++ b/src/virtio/virtio-gpu/virglrenderer_hw.h
@ -26,6 +26,7 @@

 #include "venus_hw.h"
 #include "virgl_hw.h"
+#include "drm_hw.h"

 #ifdef VIRGL_RENDERER_UNSTABLE_APIS
 enum virgl_renderer_capset {
@ -33,6 +34,8 @@ enum virgl_renderer_capset {
   VIRGL_RENDERER_CAPSET_VIRGL2                  = 2,
   /* 3 is reserved for gfxstream */
   VIRGL_RENDERER_CAPSET_VENUS                   = 4,
+   /* 5 is reserved for cross-domain */
+   VIRGL_RENDERER_CAPSET_DRM                     = 6,
 };
 #endif