Bug 1536732 - Add experimental pixel-local-storage render path to WR r=kvark

Add an experimental code path that makes use of the pixel local
storage extension available on many mobile GPUs.

This code path is currently disabled by default, as the support
is not complete for all primitives and blend modes. The initial
aim is to get feature parity with the existing renderer.

Once that's complete, we can take advantage of the (minimum)
12 bytes per pixel of high speed on-tile memory to store custom
data.

Clip masks are a good use case for this, since they map 1:1 with
the position of the fragment they are clipping. Using this for
clip masks allows us to handle clipping on mobile GPUs in a much
more efficient way - we can skip (a) separate render targets,
(b) target resolve (c) sample the mask texture during rendering.

Depends on D24123

Differential Revision: https://phabricator.services.mozilla.com/D24124

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Glenn Watson 2019-03-21 02:59:04 +00:00
parent 8f2eeb4b40
commit 8452fc14bc
14 changed files with 387 additions and 27 deletions

View File

@ -1041,7 +1041,7 @@ fn wr_device_new(gl_context: *mut c_void, pc: Option<&mut WrProgramCache>)
None => None,
};
Device::new(gl, resource_override_path, upload_method, cached_programs)
Device::new(gl, resource_override_path, upload_method, cached_programs, false)
}
// Call MakeCurrent before this.
@ -1134,6 +1134,7 @@ pub extern "C" fn wr_window_new(window_id: WrWindowId,
precache_flags,
namespace_alloc_by_client: true,
enable_picture_caching,
allow_pixel_local_storage_support: false,
..Default::default()
};
@ -2936,6 +2937,7 @@ pub extern "C" fn wr_shaders_new(gl_context: *mut c_void,
let opts = RendererOptions {
precache_flags,
allow_pixel_local_storage_support: false,
..Default::default()
};

View File

@ -156,8 +156,7 @@ void main(void) {
#endif
#endif
// TODO(gw): Handle pre-multiply common code here as required.
oFragColor = frag.color;
write_output(frag.color);
#endif
}
#endif

View File

@ -0,0 +1,27 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Initialize the pixel local storage area by reading the current
// framebuffer color. We might be able to skip this in future by
// making the opaque pass also write to pixel local storage.
#define PLS_WRITEONLY
#include shared
#ifdef WR_VERTEX_SHADER
in vec4 aRect;
void main(void) {
vec2 pos = aRect.xy + aPosition.xy * aRect.zw;
gl_Position = uTransform * vec4(pos, 0.0, 1.0);
}
#endif
#ifdef WR_FRAGMENT_SHADER
void main(void) {
// Store current framebuffer color in our custom PLS struct.
PLS.color = gl_LastFragColorARM;
}
#endif

View File

@ -0,0 +1,29 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Write the final value stored in pixel local store out to normal
// fragment outputs. This will be the color that gets resolved out
// to main memory.
#define PLS_READONLY
#include shared
#ifdef WR_VERTEX_SHADER
in vec4 aRect;
void main(void) {
vec2 pos = aRect.xy + aPosition.xy * aRect.zw;
gl_Position = uTransform * vec4(pos, 0.0, 1.0);
}
#endif
#ifdef WR_FRAGMENT_SHADER
out vec4 oFragColor;
void main(void) {
// Write the final color value in pixel local storage out as a fragment color.
oFragColor = PLS.color;
}
#endif

View File

@ -112,6 +112,6 @@ void main(void) {
float alpha = do_clip();
float perspective_divisor = mix(gl_FragCoord.w, 1.0, vLayerAndPerspective.y);
vec2 uv = clamp(vUv * perspective_divisor, vUvSampleBounds.xy, vUvSampleBounds.zw);
oFragColor = alpha * textureLod(sPrevPassColor, vec3(uv, vLayerAndPerspective.x), 0.0);
write_output(alpha * textureLod(sPrevPassColor, vec3(uv, vLayerAndPerspective.x), 0.0));
}
#endif

View File

@ -300,7 +300,7 @@ void main(void) {
oFragColor = vColor * alpha_mask;
oFragBlend = alpha_mask * vColor.a;
#else
oFragColor = vColor * mask * alpha;
write_output(vColor * mask * alpha);
#endif
}
#endif

View File

@ -2,6 +2,16 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifdef WR_FEATURE_PIXEL_LOCAL_STORAGE
// For now, we need both extensions here, in order to initialize
// the PLS to the current framebuffer color. In future, we can
// possibly remove that requirement, or at least support the
// other framebuffer fetch extensions that provide the same
// functionality.
#extension GL_EXT_shader_pixel_local_storage : require
#extension GL_ARM_shader_framebuffer_fetch : require
#endif
#ifdef WR_FEATURE_TEXTURE_EXTERNAL
// Please check https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt
// for this extension.
@ -48,12 +58,62 @@
#ifdef WR_FRAGMENT_SHADER
// Uniform inputs
// Fragment shader outputs
#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
layout(location = 0, index = 0) out vec4 oFragColor;
layout(location = 0, index = 1) out vec4 oFragBlend;
#ifdef WR_FEATURE_PIXEL_LOCAL_STORAGE
// Define the storage class of the pixel local storage.
// If defined as writable, it's a compile time error to
// have a normal fragment output variable declared.
#if defined(PLS_READONLY)
#define PLS_BLOCK __pixel_local_inEXT
#elif defined(PLS_WRITEONLY)
#define PLS_BLOCK __pixel_local_outEXT
#else
#define PLS_BLOCK __pixel_localEXT
#endif
// The structure of pixel local storage. Right now, it's
// just the current framebuffer color. In future, we have
// (at least) 12 bytes of space we can store extra info
// here (such as clip mask values).
PLS_BLOCK FrameBuffer {
layout(rgba8) highp vec4 color;
} PLS;
#ifndef PLS_READONLY
// Write the output of a fragment shader to PLS. Applies
// premultipled alpha blending by default, since the blender
// is disabled when PLS is active.
// TODO(gw): Properly support alpha blend mode for webgl / canvas.
void write_output(vec4 color) {
PLS.color = color + PLS.color * (1.0 - color.a);
}
// Write a raw value straight to PLS, if the fragment shader has
// already applied blending.
void write_output_raw(vec4 color) {
PLS.color = color;
}
#endif
#ifndef PLS_WRITEONLY
// Retrieve the current framebuffer color. Useful in conjunction with
// the write_output_raw function.
vec4 get_current_framebuffer_color() {
return PLS.color;
}
#endif
#else
out vec4 oFragColor;
// Fragment shader outputs
#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
layout(location = 0, index = 0) out vec4 oFragColor;
layout(location = 0, index = 1) out vec4 oFragBlend;
#else
out vec4 oFragColor;
#endif
// Write an output color in normal (non-PLS) shaders.
void write_output(vec4 color) {
oFragColor = color;
}
#endif
#define EPSILON 0.0001

View File

@ -396,6 +396,9 @@ pub struct AlphaBatchContainer {
/// in. Each region will have scissor rect set before drawing.
pub regions: Vec<DeviceIntRect>,
pub tile_blits: Vec<TileBlit>,
/// The rectangle of the owning render target that this
/// set of batches affects.
pub task_rect: DeviceIntRect,
}
impl AlphaBatchContainer {
@ -409,6 +412,7 @@ impl AlphaBatchContainer {
task_scissor_rect,
regions,
tile_blits: Vec::new(),
task_rect: DeviceIntRect::zero(),
}
}
@ -417,7 +421,9 @@ impl AlphaBatchContainer {
self.alpha_batches.is_empty()
}
fn merge(&mut self, batch_list: BatchList) {
fn merge(&mut self, batch_list: BatchList, task_rect: &DeviceIntRect) {
self.task_rect = self.task_rect.union(task_rect);
for other_batch in batch_list.opaque_batch_list.batches {
let batch_index = self.opaque_batches.iter().position(|batch| {
batch.key.is_compatible_with(&other_batch.key)
@ -517,6 +523,7 @@ impl AlphaBatchBuilder {
mut self,
batch_containers: &mut Vec<AlphaBatchContainer>,
merged_batches: &mut AlphaBatchContainer,
task_rect: DeviceIntRect,
) {
for batch_list in &mut self.batch_lists {
batch_list.finalize();
@ -525,7 +532,7 @@ impl AlphaBatchBuilder {
if self.can_merge() {
let batch_list = self.batch_lists.pop().unwrap();
debug_assert!(batch_list.tile_blits.is_empty());
merged_batches.merge(batch_list);
merged_batches.merge(batch_list, &task_rect);
} else {
for batch_list in self.batch_lists {
batch_containers.push(AlphaBatchContainer {
@ -534,6 +541,7 @@ impl AlphaBatchBuilder {
task_scissor_rect: self.task_scissor_rect,
regions: batch_list.regions,
tile_blits: batch_list.tile_blits,
task_rect,
});
}
}

View File

@ -885,7 +885,10 @@ pub struct Capabilities {
/// bound to a non-0th layer of a texture array. This is buggy on
/// Adreno devices.
pub supports_blit_to_texture_array: bool,
/// Whether we can use the pixel local storage functionality that
/// is available on some mobile GPUs. This allows fast access to
/// the per-pixel tile memory.
pub supports_pixel_local_storage: bool,
}
#[derive(Clone, Debug)]
@ -1114,6 +1117,7 @@ impl Device {
resource_override_path: Option<PathBuf>,
upload_method: UploadMethod,
cached_programs: Option<Rc<ProgramCache>>,
allow_pixel_local_storage_support: bool,
) -> Device {
// On debug builds, assert that each GL call is error-free. We don't do
// this on release builds because the synchronous call can stall the
@ -1229,6 +1233,17 @@ impl Device {
// a non-0th layer of a texture array is not supported.
let supports_blit_to_texture_array = !renderer_name.starts_with("Adreno");
// Check if the device supports the two extensions needed in order to use
// pixel local storage.
// TODO(gw): Consider if we can remove fb fetch / init, by using PLS for opaque pass too.
// TODO(gw): Support EXT_shader_framebuffer_fetch as well.
let ext_pixel_local_storage = supports_extension(&extensions, "GL_EXT_shader_pixel_local_storage");
let ext_framebuffer_fetch = supports_extension(&extensions, "GL_ARM_shader_framebuffer_fetch");
let supports_pixel_local_storage =
allow_pixel_local_storage_support &&
ext_framebuffer_fetch &&
ext_pixel_local_storage;
// On Adreno GPUs PBO texture upload is only performed asynchronously
// if the stride of the data in the PBO is a multiple of 256 bytes.
// Other platforms may have similar requirements and should be added
@ -1251,6 +1266,7 @@ impl Device {
supports_multisampling: false, //TODO
supports_copy_image_sub_data,
supports_blit_to_texture_array,
supports_pixel_local_storage,
},
bgra_format_internal,
@ -2950,6 +2966,18 @@ impl Device {
supports_extension(&self.extensions, extension)
}
/// Enable the pixel local storage functionality. Caller must
/// have already confirmed the device supports this.
pub fn enable_pixel_local_storage(&mut self, enable: bool) {
debug_assert!(self.capabilities.supports_pixel_local_storage);
if enable {
self.gl.enable(gl::SHADER_PIXEL_LOCAL_STORAGE_EXT);
} else {
self.gl.disable(gl::SHADER_PIXEL_LOCAL_STORAGE_EXT);
}
}
pub fn echo_driver_messages(gl: &gl::Gl) {
for msg in gl.get_debug_messages() {
let level = match msg.severity {

View File

@ -164,6 +164,26 @@ pub struct PrimitiveInstanceData {
data: [i32; 4],
}
/// Vertex format for resolve style operations with pixel local storage.
#[derive(Debug, Clone)]
#[repr(C)]
pub struct ResolveInstanceData {
rect: [f32; 4],
}
impl ResolveInstanceData {
pub fn new(rect: DeviceIntRect) -> Self {
ResolveInstanceData {
rect: [
rect.origin.x as f32,
rect.origin.y as f32,
rect.size.width as f32,
rect.size.height as f32,
],
}
}
}
#[derive(Debug, Copy, Clone)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]

View File

@ -64,7 +64,7 @@ use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
use gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
#[cfg(feature = "pathfinder")]
use gpu_glyph_renderer::GpuGlyphRenderer;
use gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, ScalingInstance, TransformData};
use gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, ScalingInstance, TransformData, ResolveInstanceData};
use internal_types::{TextureSource, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
use internal_types::{CacheTextureId, DebugOutput, FastHashMap, LayerIndex, RenderedDocument, ResultMsg};
use internal_types::{TextureCacheAllocationKind, TextureCacheUpdate, TextureUpdateList, TextureUpdateSource};
@ -647,6 +647,23 @@ pub(crate) mod desc {
instance_attributes: &[],
};
pub const RESOLVE: VertexDescriptor = VertexDescriptor {
vertex_attributes: &[
VertexAttribute {
name: "aPosition",
count: 2,
kind: VertexAttributeKind::F32,
},
],
instance_attributes: &[
VertexAttribute {
name: "aRect",
count: 4,
kind: VertexAttributeKind::F32,
},
],
};
pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
vertex_attributes: &[
VertexAttribute {
@ -743,6 +760,7 @@ pub(crate) enum VertexArrayKind {
Scale,
LineDecoration,
Gradient,
Resolve,
}
#[derive(Clone, Debug, PartialEq)]
@ -1574,6 +1592,7 @@ pub struct RendererVAOs {
line_vao: VAO,
scale_vao: VAO,
gradient_vao: VAO,
resolve_vao: VAO,
}
/// The renderer is responsible for submitting to the GPU the work prepared by the
@ -1744,9 +1763,14 @@ impl Renderer {
options.resource_override_path.clone(),
options.upload_method.clone(),
options.cached_programs.take(),
options.allow_pixel_local_storage_support,
);
let ext_dual_source_blending = !options.disable_dual_source_blending &&
let ext_dual_source_blending =
!options.disable_dual_source_blending &&
// If using pixel local storage, subpixel AA isn't supported (we disable it on all
// mobile devices explicitly anyway).
!device.get_capabilities().supports_pixel_local_storage &&
device.supports_extension("GL_ARB_blend_func_extended") &&
device.supports_extension("GL_ARB_explicit_attrib_location");
@ -1891,6 +1915,7 @@ impl Renderer {
let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
let line_vao = device.create_vao_with_new_instances(&desc::LINE, &prim_vao);
let gradient_vao = device.create_vao_with_new_instances(&desc::GRADIENT, &prim_vao);
let resolve_vao = device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao);
let texture_cache_upload_pbo = device.create_pbo();
let texture_resolver = TextureResolver::new(&mut device);
@ -2095,6 +2120,7 @@ impl Renderer {
border_vao,
scale_vao,
gradient_vao,
resolve_vao,
line_vao,
},
transforms_texture,
@ -2879,6 +2905,7 @@ impl Renderer {
self.texture_resolver.end_frame(&mut self.device, cpu_frame_id);
self.device.end_frame();
});
if framebuffer_size.is_some() {
self.last_time = current_time;
}
@ -3527,6 +3554,23 @@ impl Renderer {
self.set_blend(true, framebuffer_kind);
let mut prev_blend_mode = BlendMode::None;
// If the device supports pixel local storage, initialize the PLS buffer for
// the transparent pass. This involves reading the current framebuffer value
// and storing that in PLS.
// TODO(gw): This is quite expensive and relies on framebuffer fetch being
// available. We can probably switch the opaque pass over to use
// PLS too, and remove this pass completely.
if self.device.get_capabilities().supports_pixel_local_storage {
// TODO(gw): If using PLS, the fixed function blender is disabled. It's possible
// we could take advantage of this by skipping batching on the blend
// mode in these cases.
self.init_pixel_local_storage(
alpha_batch_container.task_rect,
projection,
stats,
);
}
for batch in &alpha_batch_container.alpha_batches {
self.shaders.borrow_mut()
.get(&batch.key, self.debug_flags)
@ -3631,6 +3675,17 @@ impl Renderer {
}
}
// If the device supports pixel local storage, resolve the PLS values.
// This pass reads the final PLS color value, and writes it to a normal
// fragment output.
if self.device.get_capabilities().supports_pixel_local_storage {
self.resolve_pixel_local_storage(
alpha_batch_container.task_rect,
projection,
stats,
);
}
self.device.disable_depth();
self.set_blend(false, framebuffer_kind);
self.gpu_profile.finish_sampler(transparent_sampler);
@ -4490,6 +4545,66 @@ impl Renderer {
frame.has_been_rendered = true;
}
/// Initialize the PLS block, by reading the current framebuffer color.
pub fn init_pixel_local_storage(
&mut self,
task_rect: DeviceIntRect,
projection: &Transform3D<f32>,
stats: &mut RendererStats,
) {
self.device.enable_pixel_local_storage(true);
self.shaders
.borrow_mut()
.pls_init
.bind(
&mut self.device,
projection,
&mut self.renderer_errors,
);
let instances = [
ResolveInstanceData::new(task_rect),
];
self.draw_instanced_batch(
&instances,
VertexArrayKind::Resolve,
&BatchTextures::no_texture(),
stats,
);
}
/// Resolve the current PLS structure, writing it to a fragment color output.
pub fn resolve_pixel_local_storage(
&mut self,
task_rect: DeviceIntRect,
projection: &Transform3D<f32>,
stats: &mut RendererStats,
) {
self.shaders
.borrow_mut()
.pls_resolve
.bind(
&mut self.device,
projection,
&mut self.renderer_errors,
);
let instances = [
ResolveInstanceData::new(task_rect),
];
self.draw_instanced_batch(
&instances,
VertexArrayKind::Resolve,
&BatchTextures::no_texture(),
stats,
);
self.device.enable_pixel_local_storage(false);
}
pub fn debug_renderer<'b>(&'b mut self) -> Option<&'b mut DebugRenderer> {
self.debug.get_mut(&mut self.device)
}
@ -4916,6 +5031,7 @@ impl Renderer {
self.device.delete_pbo(self.texture_cache_upload_pbo);
self.texture_resolver.deinit(&mut self.device);
self.device.delete_vao(self.vaos.prim_vao);
self.device.delete_vao(self.vaos.resolve_vao);
self.device.delete_vao(self.vaos.clip_vao);
self.device.delete_vao(self.vaos.gradient_vao);
self.device.delete_vao(self.vaos.blur_vao);
@ -5191,6 +5307,11 @@ pub struct RendererOptions {
/// it is a performance win. The default is false, which tends to be best
/// performance on lower end / integrated GPUs.
pub gpu_supports_fast_clears: bool,
/// If true, allow WR to use pixel local storage if the device supports it.
/// For now, this defaults to false since the code is still experimental
/// and not complete. This option will probably be removed once support is
/// complete, and WR can implicitly choose whether to make use of PLS.
pub allow_pixel_local_storage_support: bool,
}
impl Default for RendererOptions {
@ -5230,6 +5351,7 @@ impl Default for RendererOptions {
enable_picture_caching: false,
testing: false,
gpu_supports_fast_clears: false,
allow_pixel_local_storage_support: false,
}
}
}
@ -5696,6 +5818,7 @@ fn get_vao<'a>(vertex_array_kind: VertexArrayKind,
VertexArrayKind::Scale => &vaos.scale_vao,
VertexArrayKind::LineDecoration => &vaos.line_vao,
VertexArrayKind::Gradient => &vaos.gradient_vao,
VertexArrayKind::Resolve => &vaos.resolve_vao,
}
}
@ -5713,6 +5836,7 @@ fn get_vao<'a>(vertex_array_kind: VertexArrayKind,
VertexArrayKind::Scale => &vaos.scale_vao,
VertexArrayKind::LineDecoration => &vaos.line_vao,
VertexArrayKind::Gradient => &vaos.gradient_vao,
VertexArrayKind::Resolve => &vaos.resolve_vao,
}
}

View File

@ -52,6 +52,7 @@ const DEBUG_OVERDRAW_FEATURE: &str = "DEBUG_OVERDRAW";
const DITHERING_FEATURE: &str = "DITHERING";
const DUAL_SOURCE_FEATURE: &str = "DUAL_SOURCE_BLENDING";
const FAST_PATH_FEATURE: &str = "FAST_PATH";
const PIXEL_LOCAL_STORAGE_FEATURE: &str = "PIXEL_LOCAL_STORAGE";
pub(crate) enum ShaderKind {
Primitive,
@ -63,6 +64,7 @@ pub(crate) enum ShaderKind {
VectorStencil,
#[allow(dead_code)]
VectorCover,
Resolve,
}
pub struct LazilyCompiledShader {
@ -125,7 +127,7 @@ impl LazilyCompiledShader {
) -> Result<&mut Program, ShaderError> {
if self.program.is_none() {
let program = match self.kind {
ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text => {
ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text | ShaderKind::Resolve => {
create_prim_shader(
self.name,
device,
@ -175,6 +177,7 @@ impl LazilyCompiledShader {
ShaderKind::VectorStencil => VertexArrayKind::VectorStencil,
ShaderKind::VectorCover => VertexArrayKind::VectorCover,
ShaderKind::ClipCache => VertexArrayKind::Clip,
ShaderKind::Resolve => VertexArrayKind::Resolve,
};
let vertex_descriptor = match vertex_format {
@ -187,6 +190,7 @@ impl LazilyCompiledShader {
VertexArrayKind::VectorCover => &desc::VECTOR_COVER,
VertexArrayKind::Border => &desc::BORDER,
VertexArrayKind::Scale => &desc::SCALE,
VertexArrayKind::Resolve => &desc::RESOLVE,
};
device.link_program(program, vertex_descriptor)?;
@ -265,6 +269,7 @@ impl BrushShader {
features: &[&'static str],
precache_flags: ShaderPrecacheFlags,
dual_source: bool,
use_pixel_local_storage: bool,
) -> Result<Self, ShaderError> {
let opaque = LazilyCompiledShader::new(
ShaderKind::Brush,
@ -276,6 +281,9 @@ impl BrushShader {
let mut alpha_features = features.to_vec();
alpha_features.push(ALPHA_FEATURE);
if use_pixel_local_storage {
alpha_features.push(PIXEL_LOCAL_STORAGE_FEATURE);
}
let alpha = LazilyCompiledShader::new(
ShaderKind::Brush,
@ -285,7 +293,10 @@ impl BrushShader {
precache_flags,
)?;
let dual_source = if dual_source {
// If using PLS, we disable all subpixel AA implicitly. Subpixel AA is always
// disabled on mobile devices anyway, due to uncertainty over the subpixel
// layout configuration.
let dual_source = if dual_source && !use_pixel_local_storage {
let mut dual_source_features = alpha_features.to_vec();
dual_source_features.push(DUAL_SOURCE_FEATURE);
@ -498,6 +509,12 @@ pub struct Shaders {
pub ps_text_run: TextShader,
pub ps_text_run_dual_source: TextShader,
// Helper shaders for pixel local storage render paths.
// pls_init: Initialize pixel local storage, based on current framebuffer value.
// pls_resolve: Convert pixel local storage, writing out to fragment value.
pub pls_init: LazilyCompiledShader,
pub pls_resolve: LazilyCompiledShader,
ps_split_composite: LazilyCompiledShader,
}
@ -507,12 +524,17 @@ impl Shaders {
gl_type: GlType,
options: &RendererOptions,
) -> Result<Self, ShaderError> {
let use_pixel_local_storage = device
.get_capabilities()
.supports_pixel_local_storage;
let brush_solid = BrushShader::new(
"brush_solid",
device,
&[],
options.precache_flags,
false,
use_pixel_local_storage,
)?;
let brush_blend = BrushShader::new(
@ -521,6 +543,7 @@ impl Shaders {
&[],
options.precache_flags,
false,
use_pixel_local_storage,
)?;
let brush_mix_blend = BrushShader::new(
@ -529,6 +552,7 @@ impl Shaders {
&[],
options.precache_flags,
false,
use_pixel_local_storage,
)?;
let brush_radial_gradient = BrushShader::new(
@ -541,6 +565,7 @@ impl Shaders {
},
options.precache_flags,
false,
use_pixel_local_storage,
)?;
let brush_linear_gradient = BrushShader::new(
@ -553,6 +578,7 @@ impl Shaders {
},
options.precache_flags,
false,
use_pixel_local_storage,
)?;
let cs_blur_a8 = LazilyCompiledShader::new(
@ -603,6 +629,28 @@ impl Shaders {
options.precache_flags,
)?;
let pls_precache_flags = if use_pixel_local_storage {
options.precache_flags
} else {
ShaderPrecacheFlags::empty()
};
let pls_init = LazilyCompiledShader::new(
ShaderKind::Resolve,
"pls_init",
&[PIXEL_LOCAL_STORAGE_FEATURE],
device,
pls_precache_flags,
)?;
let pls_resolve = LazilyCompiledShader::new(
ShaderKind::Resolve,
"pls_resolve",
&[PIXEL_LOCAL_STORAGE_FEATURE],
device,
pls_precache_flags,
)?;
let cs_scale_a8 = LazilyCompiledShader::new(
ShaderKind::Cache(VertexArrayKind::Scale),
"cs_scale",
@ -619,9 +667,17 @@ impl Shaders {
options.precache_flags,
)?;
// TODO(gw): The split composite + text shader are special cases - the only
// shaders used during normal scene rendering that aren't a brush
// shader. Perhaps we can unify these in future?
let mut extra_features = Vec::new();
if use_pixel_local_storage {
extra_features.push(PIXEL_LOCAL_STORAGE_FEATURE);
}
let ps_text_run = TextShader::new("ps_text_run",
device,
&[],
&extra_features,
options.precache_flags,
)?;
@ -637,6 +693,14 @@ impl Shaders {
dual_source_precache_flags,
)?;
let ps_split_composite = LazilyCompiledShader::new(
ShaderKind::Primitive,
"ps_split_composite",
&extra_features,
device,
options.precache_flags,
)?;
// All image configuration.
let mut image_features = Vec::new();
let mut brush_image = Vec::new();
@ -656,6 +720,7 @@ impl Shaders {
&image_features,
options.precache_flags,
!options.disable_dual_source_blending,
use_pixel_local_storage,
)?);
}
image_features.clear();
@ -682,6 +747,7 @@ impl Shaders {
&yuv_features,
options.precache_flags,
false,
use_pixel_local_storage,
)?;
let index = Self::get_yuv_shader_index(
*image_buffer_kind,
@ -723,14 +789,6 @@ impl Shaders {
options.precache_flags,
)?;
let ps_split_composite = LazilyCompiledShader::new(
ShaderKind::Primitive,
"ps_split_composite",
&[],
device,
options.precache_flags,
)?;
Ok(Shaders {
cs_blur_a8,
cs_blur_rgba8,
@ -751,6 +809,8 @@ impl Shaders {
cs_clip_rectangle_fast,
cs_clip_box_shadow,
cs_clip_image,
pls_init,
pls_resolve,
ps_text_run,
ps_text_run_dual_source,
ps_split_composite,
@ -822,6 +882,8 @@ impl Shaders {
self.cs_clip_rectangle_fast.deinit(device);
self.cs_clip_box_shadow.deinit(device);
self.cs_clip_image.deinit(device);
self.pls_init.deinit(device);
self.pls_resolve.deinit(device);
self.ps_text_run.deinit(device);
self.ps_text_run_dual_source.deinit(device);
for shader in self.brush_image {

View File

@ -450,6 +450,7 @@ impl RenderTarget for ColorRenderTarget {
batch_builder.build(
&mut self.alpha_batch_containers,
&mut merged_batches,
target_rect,
);
}
_ => {

View File

@ -588,7 +588,7 @@ fn render<'a>(
// Default the profile overlay on for android.
if cfg!(target_os = "android") {
debug_flags.toggle(DebugFlags::PROFILER_DBG);
debug_flags.toggle(DebugFlags::PROFILER_DBG | DebugFlags::COMPACT_PROFILER);
wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags));
}