Bug 1790730 - Add basic infrastructure for gpu buffer texture. r=gfx-reviewers,nical

For some time, we've been wanting to move away from use of the GPU
cache, due to driver complexity and performance bugs when updating
the shared texture.

Instead, we can create one or more buffers that contain the relevant
information for the primitives that are currently dirty + visible,
and being batched for drawing.

By pooling these and only reusing them when the GPU has finished
referencing them, we hope to simplify the management of GPU buffers
compared to the GPU cache, and also remove driver stalls we see
in various cases related to the GPU cache (note: the buffer recycling
will be implemented in follow up commits).

For now, this basic infrastructure will allow us to do some experiments
and profiling with using this, while building some of the planned
performance optimizations for clip-mask rendering.

Differential Revision: https://phabricator.services.mozilla.com/D157284
This commit is contained in:
Glenn Watson 2022-09-18 22:21:13 +00:00
parent 26106b8a0e
commit 97ba22d406
7 changed files with 229 additions and 2 deletions

View File

@ -0,0 +1,27 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifdef WR_VERTEX_SHADER
uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuBuffer;
ivec2 get_gpu_buffer_uv(HIGHP_FS_ADDRESS int address) {
return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
}
vec4 fetch_from_gpu_buffer_1(HIGHP_FS_ADDRESS int address) {
ivec2 uv = get_gpu_buffer_uv(address);
return texelFetch(sGpuBuffer, uv, 0);
}
vec4[2] fetch_from_gpu_buffer_2(HIGHP_FS_ADDRESS int address) {
ivec2 uv = get_gpu_buffer_uv(address);
return vec4[2](
TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(0, 0)),
TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(1, 0))
);
}
#endif

View File

@ -20,12 +20,12 @@ use crate::picture::{Picture3DContext, PictureCompositeMode, TileKey, calculate_
use crate::prim_store::{PrimitiveInstanceKind, ClipData, PrimitiveInstanceIndex};
use crate::prim_store::{PrimitiveInstance, PrimitiveOpacity, SegmentInstanceIndex};
use crate::prim_store::{BrushSegment, ClipMaskKind, ClipTaskIndex};
use crate::prim_store::VECS_PER_SEGMENT;
use crate::prim_store::{VECS_PER_SEGMENT};
use crate::render_target::RenderTargetContext;
use crate::render_task_graph::{RenderTaskId, RenderTaskGraph};
use crate::render_task::{RenderTaskAddress, RenderTaskKind};
use crate::renderer::{BlendMode, ShaderColorMode};
use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, GpuBufferBuilder};
use crate::resource_cache::{GlyphFetchResult, ImageProperties, ImageRequest};
use crate::space::SpaceMapper;
use crate::surface::SurfaceTileDescriptor;
@ -827,6 +827,7 @@ impl BatchBuilder {
root_spatial_node_index: SpatialNodeIndex,
surface_spatial_node_index: SpatialNodeIndex,
z_generator: &mut ZBufferIdGenerator,
_gpu_buffer_builder: &mut GpuBufferBuilder,
) {
let is_anti_aliased = ctx.data_stores.prim_has_anti_aliasing(prim_instance);

View File

@ -21,6 +21,7 @@ use crate::prim_store::{PictureIndex};
use crate::prim_store::{DeferredResolve, PrimitiveInstance};
use crate::profiler::{self, TransactionProfile};
use crate::render_backend::{DataStores, ScratchBuffer};
use crate::renderer::GpuBufferBuilder;
use crate::render_target::{RenderTarget, PictureCacheTarget, TextureCacheRenderTarget, PictureCacheTargetKind};
use crate::render_target::{RenderTargetContext, RenderTargetKind, AlphaRenderTarget, ColorRenderTarget};
use crate::render_task_graph::{RenderTaskGraph, Pass, SubPassSurface};
@ -771,6 +772,7 @@ pub fn build_render_pass(
let task_id = sub_pass.task_ids[0];
let task = &render_tasks[task_id];
let target_rect = task.get_target_rect();
let mut gpu_buffer_builder = GpuBufferBuilder::new();
match task.kind {
RenderTaskKind::Picture(ref pic_task) => {
@ -803,6 +805,7 @@ pub fn build_render_pass(
pic_task.raster_spatial_node_index,
pic_task.surface_spatial_node_index,
z_generator,
&mut gpu_buffer_builder,
);
});
@ -827,6 +830,7 @@ pub fn build_render_pass(
},
dirty_rect: scissor_rect,
valid_rect,
gpu_buffer: gpu_buffer_builder.finalize(),
};
pass.picture_cache.push(target);
@ -841,6 +845,7 @@ pub fn build_render_pass(
},
dirty_rect: tile_task.scissor_rect,
valid_rect: tile_task.valid_rect,
gpu_buffer: gpu_buffer_builder.finalize(),
};
pass.picture_cache.push(target);

View File

@ -20,6 +20,7 @@ use crate::prim_store::gradient::{
FastLinearGradientInstance, LinearGradientInstance, RadialGradientInstance,
ConicGradientInstance,
};
use crate::renderer::{GpuBuffer, GpuBufferBuilder};
use crate::render_backend::DataStores;
use crate::render_task::{RenderTaskKind, RenderTaskAddress};
use crate::render_task::{RenderTask, ScalingTask, SvgFilterInfo};
@ -262,6 +263,7 @@ impl RenderTarget for ColorRenderTarget {
) {
profile_scope!("build");
let mut merged_batches = AlphaBatchContainer::new(None);
let mut gpu_buffer_builder = GpuBufferBuilder::new();
for task_id in &self.alpha_tasks {
profile_scope!("alpha_task");
@ -312,6 +314,7 @@ impl RenderTarget for ColorRenderTarget {
pic_task.raster_spatial_node_index,
pic_task.surface_spatial_node_index,
z_generator,
&mut gpu_buffer_builder,
);
});
@ -597,6 +600,7 @@ pub struct PictureCacheTarget {
pub clear_color: Option<ColorF>,
pub dirty_rect: DeviceIntRect,
pub valid_rect: DeviceIntRect,
pub gpu_buffer: GpuBuffer,
}
#[cfg_attr(feature = "capture", derive(Serialize))]

View File

@ -0,0 +1,152 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
TODO:
Recycle GpuBuffers in a pool (support return from render thread)
Efficiently allow writing to buffer (better push interface)
Support other texel types (e.g. i32)
*/
use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
use api::units::{DeviceIntSize, LayoutRect};
use api::{ColorF, PremultipliedColorF};
use crate::device::Texel;
unsafe impl Texel for GpuBufferBlock {}
/// A single texel in RGBAF32 texture - 16 bytes.
#[derive(Copy, Clone, Debug, MallocSizeOf)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuBufferBlock {
data: [f32; 4],
}
#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuBufferAddress {
pub u: u16,
pub v: u16,
}
impl GpuBufferAddress {
#[allow(dead_code)]
pub fn as_int(self) -> i32 {
// TODO(gw): Temporarily encode GPU Cache addresses as a single int.
// In the future, we can change the PrimitiveInstanceData struct
// to use 2x u16 for the vertex attribute instead of an i32.
self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
}
}
impl GpuBufferBlock {
pub const EMPTY: Self = GpuBufferBlock { data: [0.0; 4] };
}
impl Into<GpuBufferBlock> for LayoutRect {
fn into(self) -> GpuBufferBlock {
GpuBufferBlock {
data: [
self.min.x,
self.min.y,
self.max.x,
self.max.y,
],
}
}
}
impl Into<GpuBufferBlock> for ColorF {
fn into(self) -> GpuBufferBlock {
GpuBufferBlock {
data: [
self.r,
self.g,
self.b,
self.a,
],
}
}
}
impl Into<GpuBufferBlock> for PremultipliedColorF {
fn into(self) -> GpuBufferBlock {
GpuBufferBlock {
data: [
self.r,
self.g,
self.b,
self.a,
],
}
}
}
pub struct GpuBufferBuilder {
data: Vec<GpuBufferBlock>,
}
impl GpuBufferBuilder {
pub fn new() -> Self {
GpuBufferBuilder {
data: Vec::new(),
}
}
#[allow(dead_code)]
pub fn push(
&mut self,
blocks: &[GpuBufferBlock],
) -> GpuBufferAddress {
assert!(blocks.len() < MAX_VERTEX_TEXTURE_WIDTH);
if self.data.len() + blocks.len() >= MAX_VERTEX_TEXTURE_WIDTH {
while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 {
self.data.push(GpuBufferBlock::EMPTY);
}
}
let index = self.data.len();
self.data.extend_from_slice(blocks);
GpuBufferAddress {
u: (index % MAX_VERTEX_TEXTURE_WIDTH) as u16,
v: (index / MAX_VERTEX_TEXTURE_WIDTH) as u16,
}
}
pub fn finalize(mut self) -> GpuBuffer {
let required_len = (self.data.len() + MAX_VERTEX_TEXTURE_WIDTH-1) & !(MAX_VERTEX_TEXTURE_WIDTH-1);
for _ in 0 .. required_len - self.data.len() {
self.data.push(GpuBufferBlock::EMPTY);
}
let len = self.data.len();
assert!(len % MAX_VERTEX_TEXTURE_WIDTH == 0);
GpuBuffer {
data: self.data,
size: DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, (len / MAX_VERTEX_TEXTURE_WIDTH) as i32),
}
}
}
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuBuffer {
pub data: Vec<GpuBufferBlock>,
pub size: DeviceIntSize,
}
impl GpuBuffer {
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
}

View File

@ -115,6 +115,7 @@ use std::collections::hash_map::Entry;
use time::precise_time_ns;
mod debug;
mod gpu_buffer;
mod gpu_cache;
mod shade;
mod vertex;
@ -124,6 +125,7 @@ pub(crate) mod init;
pub use debug::DebugRenderer;
pub use shade::{Shaders, SharedShaders};
pub use vertex::{desc, VertexArrayKind, MAX_VERTEX_TEXTURE_WIDTH};
pub use gpu_buffer::{GpuBuffer, GpuBufferBuilder};
/// The size of the array of each type of vertex data texture that
/// is round-robin-ed each frame during bind_frame_data. Doing this
@ -328,6 +330,7 @@ pub(crate) enum TextureSampler {
PrimitiveHeadersF,
PrimitiveHeadersI,
ClipMask,
GpuBuffer,
}
impl TextureSampler {
@ -356,6 +359,7 @@ impl Into<TextureSlot> for TextureSampler {
TextureSampler::PrimitiveHeadersF => TextureSlot(7),
TextureSampler::PrimitiveHeadersI => TextureSlot(8),
TextureSampler::ClipMask => TextureSlot(9),
TextureSampler::GpuBuffer => TextureSlot(10),
}
}
}
@ -2353,6 +2357,34 @@ impl Renderer {
let _gm = self.gpu_profiler.start_marker("picture cache target");
let framebuffer_kind = FramebufferKind::Other;
// Upload experimental GPU buffer texture if there is any data present
// TODO: Recycle these textures, upload via PBO or best approach for platform
let gpu_buffer_texture = if target.gpu_buffer.is_empty() {
None
} else {
let gpu_buffer_texture = self.device.create_texture(
ImageBufferKind::Texture2D,
ImageFormat::RGBAF32,
target.gpu_buffer.size.width,
target.gpu_buffer.size.height,
TextureFilter::Nearest,
None,
);
self.device.bind_texture(
TextureSampler::GpuBuffer,
&gpu_buffer_texture,
Swizzle::default(),
);
self.device.upload_texture_immediate(
&gpu_buffer_texture,
&target.gpu_buffer.data,
);
Some(gpu_buffer_texture)
};
{
let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
self.device.bind_draw_target(draw_target);
@ -2452,6 +2484,10 @@ impl Renderer {
}
}
if let Some(gpu_buffer_texture) = gpu_buffer_texture {
self.device.delete_texture(gpu_buffer_texture);
}
self.device.invalidate_depth_target();
}

View File

@ -277,6 +277,7 @@ impl LazilyCompiledShader {
("sGpuCache", TextureSampler::GpuCache),
("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
("sGpuBuffer", TextureSampler::GpuBuffer),
],
);
}
@ -294,6 +295,7 @@ impl LazilyCompiledShader {
("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
("sClipMask", TextureSampler::ClipMask),
("sGpuBuffer", TextureSampler::GpuBuffer),
],
);
}