i965/skl: Add fast color clear infrastructure

Patch was originally called: i965/skl: Enable fast color clears on SKL Skylake introduces some differences in the way that fast clears are programmed and in the restrictions for using fast clears. Since some of these are non-obvious, and fast clears are currently disabled globally, we can enable the simple stuff here and leave the weirder stuff and separately reviewable work. Based on a patch originally from Kristian. Note that within this patch the change in scaling factors could be achieved with this hunk instead. I've opted to keep things more like how the docs describe it however. --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -150,9 +150,13 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, /* In release builds, fall through */ case I915_TILING_Y: *width_px = 32 / mt->cpp; - *height = 4; + if (brw->gen >= 9) + *height = 2; + else + *height = 4; v2: Add braces for the multiline (Matt + Chad) Comment updates (requested by Chad) Modified commit message Commit message from Chad explaining the MCS height change (Chad) Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Neil Roberts <neil@linux.intel.com> Reviewed-by: Chad Versace <chad.versace@intel.com>
2024-12-03 21:23:15 +00:00 · 2015-04-14 14:57:51 -07:00 · 2015-04-14 14:57:51 -07:00 · d23aa634e0
commit d23aa634e0
parent 2f7d2fd997
4 changed files with 81 additions and 20 deletions
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
 }

 static void
-get_fast_clear_rect(struct gl_framebuffer *fb,
+get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
                    struct intel_renderbuffer *irb, struct rect *rect)
 {
   unsigned int x_align, y_align;
@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
       */
      intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
      x_align *= 16;
-      y_align *= 32;
+
+      /* SKL+ line alignment requirement for Y-tiled are half those of the prior
+       * generations.
+       */
+      if (brw->gen >= 9)
+         y_align *= 16;
+      else
+         y_align *= 32;

      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
       *     terms of (width,height) of the RT.
       *
       *     MSAA  Width of Clear Rect  Height of Clear Rect
+       *      2X     Ceil(1/8*width)      Ceil(1/2*height)
       *      4X     Ceil(1/8*width)      Ceil(1/2*height)
       *      8X     Ceil(1/2*width)      Ceil(1/2*height)
+       *     16X         width            Ceil(1/2*height)
       *
       * The text "with upper left co-ordinate to coincide with actual
       * rectangle being cleared" is a little confusing--it seems to imply
@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
      case 8:
         x_scaledown = 2;
         break;
+      case 16:
+         x_scaledown = 1;
+         break;
      default:
         unreachable("Unexpected sample count for fast clear");
      }
@ -357,18 +369,25 @@ is_color_fast_clear_compatible(struct brw_context *brw,

 /**
 * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE.
+ * SURFACE_STATE (DWORD 12-15 on SKL+).
 */
-static uint32_t
-compute_fast_clear_color_bits(const union gl_color_union *color)
+static void
+set_fast_clear_color(struct brw_context *brw,
+                     struct intel_mipmap_tree *mt,
+                     const union gl_color_union *color)
 {
-   uint32_t bits = 0;
-   for (int i = 0; i < 4; i++) {
-      /* Testing for non-0 works for integer and float colors */
-      if (color->f[i] != 0.0f)
-         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+   if (brw->gen >= 9) {
+      mt->gen9_fast_clear_color = *color;
+   } else {
+      mt->fast_clear_color_value = 0;
+      for (int i = 0; i < 4; i++) {
+         /* Testing for non-0 works for integer and float colors */
+         if (color->f[i] != 0.0f) {
+             mt->fast_clear_color_value |=
+                1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+         }
+      }
   }
-   return bits;
 }

 static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
@ -510,8 +529,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,

      switch (clear_type) {
      case FAST_CLEAR:
-         irb->mt->fast_clear_color_value =
-            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+         set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
         irb->need_downsample = true;

         /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
@ -527,7 +545,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
         irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
         irb->need_downsample = true;
         fast_clear_buffers |= 1 << index;
-         get_fast_clear_rect(fb, irb, &fast_clear_rect);
+         get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
         break;

      case REP_CLEAR:
@ -662,8 +680,9 @@ get_resolve_rect(struct brw_context *brw,
    *
    * The scaledown factors in the table that follows are related to the
    * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
-    * multiplier.  For IVB and HSW, we divide by two, for BDW we multiply
-    * by 8 and 16 and 8 and 8 for SKL.
+    * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
+    * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
+    * by a factor of 2.
    */

   intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
@ -709,6 +728,10 @@ brw_meta_resolve_color(struct brw_context *brw,

   brw_bind_rep_write_shader(brw, (float *) fast_clear_color);

+   /* SKL+ also has a resolve mode for compressed render targets and thus more
+    * bits to let us select the type of resolve.  For fast clear resolves, it
+    * turns out we can use the same value as pre-SKL though.
+    */
   set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);

   mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@ -187,7 +187,21 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
                           struct intel_mipmap_tree *mt,
                           uint32_t *surf)
 {
-   surf[7] |= mt->fast_clear_color_value;
+   if (brw->gen >= 9) {
+#define check_fast_clear_val(x) \
+      assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \
+             mt->gen9_fast_clear_color.f[x] == 1.0)
+      check_fast_clear_val(0);
+      check_fast_clear_val(1);
+      check_fast_clear_val(2);
+      check_fast_clear_val(3);
+#undef check_fast_clear_val
+      surf[12] = mt->gen9_fast_clear_color.ui[0];
+      surf[13] = mt->gen9_fast_clear_color.ui[1];
+      surf[14] = mt->gen9_fast_clear_color.ui[2];
+      surf[15] = mt->gen9_fast_clear_color.ui[3];
+   } else
+      surf[7] |= mt->fast_clear_color_value;
 }

 static void
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@ -192,6 +192,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
 *
 *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
 *       64bpp, and 128bpp.
+ *
+ * From the Skylake documentation, it is made clear that X-tiling is no longer
+ * supported:
+ *
+ *     - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
+ *     non-MSRTs only.
 */
 static bool
 intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
@ -1495,6 +1501,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
   intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
   unsigned width_divisor = block_width_px * 4;
   unsigned height_divisor = block_height * 8;
+
+   /* The Skylake MCS is twice as tall as the Broadwell MCS.
+    *
+    * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines
+    * in the main surface. In Skylake, it's two bits.  The extra bit
+    * doubles the MCS height, not width, because in Skylake the MCS is always
+    * Y-tiled.
+    */
+   if (brw->gen >= 9)
+      height_divisor /= 2;
+
   unsigned mcs_width =
      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
   unsigned mcs_height =
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@ -633,15 +633,22 @@ struct intel_mipmap_tree
    * The SURFACE_STATE bits associated with the last fast color clear to this
    * color mipmap tree, if any.
    *
-    * This value will only ever contain ones in bits 28-31, so it is safe to
-    * OR into dword 7 of SURFACE_STATE.
+    * Prior to GEN9 there is a single bit for RGBA clear values which gives you
+    * the option of 2^4 clear colors. Each bit determines if the color channel
+    * is fully saturated or unsaturated (Cherryview does add a 32b value per
+    * channel, but it is globally applied instead of being part of the render
+    * surface state). Starting with GEN9, the surface state accepts a 32b value
+    * for each color channel.
    *
    * @see RENDER_SURFACE_STATE.RedClearColor
    * @see RENDER_SURFACE_STATE.GreenClearColor
    * @see RENDER_SURFACE_STATE.BlueClearColor
    * @see RENDER_SURFACE_STATE.AlphaClearColor
    */
-   uint32_t fast_clear_color_value;
+   union {
+      uint32_t fast_clear_color_value;
+      union gl_color_union gen9_fast_clear_color;
+   };

   /**
    * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS