From adc2cbcaaeae6078ec85b433eee0ac05b31fe121 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 3 Aug 2011 11:00:36 -0400
Subject: [PATCH 2/5] r600g: add htile support

htile is used for HiZ and HiS support and fast
Z/S clears.  This commit just adds the htile setup.
Fast Z/S clears are not implemented yet.

Based on initial patches from:
Pierre-Eric Pelloux-Prayer <pelloux@gmail.com>

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c    |   32 +++++++--
 src/gallium/drivers/r600/r600_pipe.h          |    1 +
 src/gallium/drivers/r600/r600_state.c         |   85 +++++++++++++++++++++++--
 src/gallium/drivers/r600/r600d.h              |    2 +
 src/gallium/winsys/r600/drm/r600_hw_context.c |    4 +
 src/gallium/winsys/r600/drm/r600d.h           |    3 +
 6 files changed, 115 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c9eaf94..5d3c709 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -746,6 +746,9 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 
 	rstate->id = R600_PIPE_STATE_DSA;
 	/* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
+	/* Z order is a driver provided hint to the hw.  In most cases leave it to early then late.
+	 * short shaders -> late Z, medium shaders -> early Z, and long shaders -> ReZ
+	 */
 	db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
 	stencil_ref_mask = 0;
 	stencil_ref_mask_bf = 0;
@@ -787,9 +790,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 
 	/* misc */
 	db_render_control = 0;
-	db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
-		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+	/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+	db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF) |
+		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_OFF) |
+		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_OFF);
 	/* TODO db_render_override depends on query */
 	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
@@ -808,6 +812,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
 	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
+	/* There are 2 sets of HiS states the hw updates when HiS is enabled.  Each state has an associated
+	 * func/ref/mask.  The results of these 2 states are stored per htile. There are two sets so that
+	 * the driver can update one while the other is in use.  The ENABLE0/1 bits select which one(s)
+	 * are active.
+	 */
 	r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL);
@@ -1387,6 +1396,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	unsigned level;
 	unsigned pitch, slice, format, stencil_format;
 	unsigned offset;
+	uint32_t stencil_offset, htile_offset, db_z_info;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1405,6 +1415,17 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
 	stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
+	stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
+	db_z_info = S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format);
+
+	if (1) {
+		htile_offset = ((surf->aligned_height * rtex->pitch_in_blocks[level]) + stencil_offset + 255) & ~255;
+		r600_pipe_state_add_reg(rstate, R_028014_DB_HTILE_DATA_BASE,
+					(offset + htile_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
+		r600_pipe_state_add_reg(rstate, R_028ABC_DB_HTILE_SURFACE,
+					r600_htile_settings(rctx, state->zsbuf->texture), 0xFFFFFFFF, NULL);
+		db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+	}
 
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
 				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
@@ -1412,9 +1433,6 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
 
 	if (stencil_format) {
-		uint32_t stencil_offset;
-
-		stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
 		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
 					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
 		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
@@ -1426,7 +1444,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 				S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo);
 
 	r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
-				S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format),
+				db_z_info,
 				0xFFFFFFFF, rbuffer->bo);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
 				S_028058_PITCH_TILE_MAX(pitch),
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6f399ed..ee37663 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -301,6 +301,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 				 enum pipe_texture_target target,
 				 unsigned sample_count,
 				 unsigned usage);
+uint32_t r600_htile_settings(struct r600_pipe_context *rctx, struct pipe_resource *zbuf);
 
 /* r600_texture.c */
 void r600_init_screen_texture_functions(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 487b1df..b18d73e 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -797,6 +797,9 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 
 	rstate->id = R600_PIPE_STATE_DSA;
 	/* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
+	/* Z order is a driver provided hint to the hw.  In most cases leave it to early then late.
+	 * short shaders -> late Z, medium shaders -> early Z, and long shaders -> ReZ
+	 */
 	db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
 	stencil_ref_mask = 0;
 	stencil_ref_mask_bf = 0;
@@ -838,9 +841,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 
 	/* misc */
 	db_render_control = 0;
-	db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
-		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
-		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+	/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+	db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF) |
+		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_OFF) |
+		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_OFF);
 	/* TODO db_render_override depends on query */
 	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
@@ -861,6 +865,12 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
 	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
+	/* There are 2 sets of HiS states the hw updates when HiS is enabled.  Each state has an associated
+	 * func/ref/mask.  The results of these 2 states are stored per htile. There are two sets so that
+	 * the driver can update one while the other is in use.  The ENABLE0/1 bits select which one(s)
+	 * are active.
+	 */
+	r600_pipe_state_add_reg(rstate, R_028D28_DB_SRESULTS_COMPARE_STATE0, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
@@ -1464,6 +1474,58 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 				0x00000000, 0xFFFFFFFF, NULL);
 }
 
+uint32_t r600_htile_settings(struct r600_pipe_context *rctx, struct pipe_resource *zbuf)
+{
+	unsigned num_tile_pipes;
+	unsigned tile_pipes_per_DB;
+	unsigned max_pixels_per_DB;
+	unsigned width_per_DB;
+	const unsigned k = 1024;
+	uint32_t htile_settings = 0;
+
+	num_tile_pipes = r600_get_num_tile_pipes(rctx->screen->radeon);
+
+	tile_pipes_per_DB =  num_tile_pipes / r600_get_num_backends(rctx->screen->radeon);
+	max_pixels_per_DB = (zbuf->width0 * zbuf->height0 * tile_pipes_per_DB) / num_tile_pipes;
+
+	/* eg is always 8x8 */
+	if (rctx->family >= CHIP_CEDAR)
+		htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1);
+
+	if (max_pixels_per_DB <= 64 * k) {
+	        htile_settings |= S_028D24_LINEAR(1) | S_028D24_PRELOAD(1);
+	} else if (max_pixels_per_DB <= 128 * k) {
+	        htile_settings |= S_028D24_FULL_CACHE(1) | S_028D24_LINEAR(1) |
+			S_028D24_PRELOAD(1);
+	} else if (max_pixels_per_DB <= 256 * k) {
+	        htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_FULL_CACHE(1) |
+			S_028D24_LINEAR(1) | S_028D24_PRELOAD(1);
+	} else if (max_pixels_per_DB <= 512 * k) {
+	        htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+			S_028D24_FULL_CACHE(1) | S_028D24_LINEAR(1) |
+			S_028D24_PRELOAD(1);
+	} else {
+	        width_per_DB = (zbuf->width0 * tile_pipes_per_DB) / num_tile_pipes;
+	        if (width_per_DB <= 512) {
+			htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+				S_028D24_FULL_CACHE(1) | S_028D24_PRELOAD(1) |
+				S_028D24_PREFETCH_WIDTH(16) | S_028D24_PREFETCH_HEIGHT(4) |
+				S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		} else if (width_per_DB <= 1024) {
+			htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+				S_028D24_FULL_CACHE(1) | S_028D24_PRELOAD(1) |
+				S_028D24_PREFETCH_WIDTH(16) | S_028D24_PREFETCH_HEIGHT(2) |
+				S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		} else {
+			htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+				S_028D24_FULL_CACHE(1) | S_028D24_PRELOAD(1) |
+				S_028D24_PREFETCH_WIDTH(16) |
+				S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		}
+	}
+	return htile_settings;
+}
+
 static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
 			const struct pipe_framebuffer_state *state)
 {
@@ -1473,6 +1535,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	unsigned level;
 	unsigned pitch, slice, format;
 	unsigned offset;
+	uint32_t db_depth_info, htile_offset;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1490,6 +1553,16 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	pitch = rtex->pitch_in_blocks[level] / 8 - 1;
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
+	db_depth_info = S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format);
+
+	if (1) {
+		htile_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
+		r600_pipe_state_add_reg(rstate, R_028014_DB_HTILE_DATA_BASE,
+					(offset + htile_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
+		r600_pipe_state_add_reg(rstate, R_028D24_DB_HTILE_SURFACE,
+					r600_htile_settings(rctx, state->zsbuf->texture), 0xFFFFFFFF, NULL);
+		db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
+	}
 
 	r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
 				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
@@ -1497,9 +1570,11 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 				S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
 				0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+	/* Update ZRANGE_PRECISION based on zmin/max/test.  E.g., if clear value is 0 and ztest
+	 * is >, set ZRANGE_PRECISION to 0 (zmin is the base).  If the clear value is 
+	 */
 	r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
-				S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format),
-				0xFFFFFFFF, rbuffer->bo);
+				db_depth_info, 0xFFFFFFFF, rbuffer->bo);
 	r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
 				(surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL);
 }
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index f6eec24..cbd0ef5 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -1600,6 +1600,7 @@
 #define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
 #define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
 #define   C_028004_SLICE_MAX                           0xFF001FFF
+#define R_028014_DB_HTILE_DATA_BASE                  0x028014
 #define R_028D24_DB_HTILE_SURFACE                    0x028D24
 #define   S_028D24_HTILE_WIDTH(x)                      (((x) & 0x1) << 0)
 #define   G_028D24_HTILE_WIDTH(x)                      (((x) >> 0) & 0x1)
@@ -2195,6 +2196,7 @@
 #define R_02880C_DB_SHADER_CONTROL                   0x02880C
 #define R_028D0C_DB_RENDER_CONTROL                   0x028D0C
 #define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
+#define R_028D28_DB_SRESULTS_COMPARE_STATE0          0x028D28
 #define R_028D2C_DB_SRESULTS_COMPARE_STATE1          0x028D2C
 #define R_028D30_DB_PRELOAD_CONTROL                  0x028D30
 #define R_028D44_DB_ALPHA_TO_MASK                    0x028D44
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 30af4e8..20ff0fd 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -485,6 +485,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{R_028C38_CB_CLRCMP_DST, 0, 0, 0},
 	{R_028C3C_CB_CLRCMP_MSK, 0, 0, 0},
 	{R_028C48_PA_SC_AA_MASK, 0, 0, 0},
+	{R_028D28_DB_SRESULTS_COMPARE_STATE0, 0, 0, 0},
 	{R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0},
 	{R_028D44_DB_ALPHA_TO_MASK, 0, 0, 0},
 	{R_02800C_DB_DEPTH_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_DEPTH, 0},
@@ -494,6 +495,9 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0, 0},
 	{R_028D0C_DB_RENDER_CONTROL, 0, 0, 0},
 	{R_028D10_DB_RENDER_OVERRIDE, 0, 0, 0},
+	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+	{R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0, 0},
+	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028D24_DB_HTILE_SURFACE, 0, 0, 0},
 	{R_028D30_DB_PRELOAD_CONTROL, 0, 0, 0},
 	{R_028D34_DB_PREFETCH_LIMIT, 0, 0, 0},
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
index 4a19dcf..3ce508a 100644
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ b/src/gallium/winsys/r600/drm/r600d.h
@@ -311,6 +311,7 @@
 #define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
 #define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
 #define   C_028004_SLICE_MAX                           0xFF001FFF
+#define R_028014_DB_HTILE_DATA_BASE                  0x028014
 #define R_028D24_DB_HTILE_SURFACE                    0x028D24
 #define   S_028D24_HTILE_WIDTH(x)                      (((x) & 0x1) << 0)
 #define   G_028D24_HTILE_WIDTH(x)                      (((x) >> 0) & 0x1)
@@ -817,6 +818,7 @@
 #define R_028000_DB_DEPTH_SIZE                       0x028000
 #define R_028004_DB_DEPTH_VIEW                       0x028004
 #define R_028010_DB_DEPTH_INFO                       0x028010
+#define R_028014_DB_HTILE_DATA_BASE                  0x028014
 #define R_028D24_DB_HTILE_SURFACE                    0x028D24
 #define R_028D34_DB_PREFETCH_LIMIT                   0x028D34
 #define R_0286D4_SPI_INTERP_CONTROL_0                0x0286D4
@@ -880,6 +882,7 @@
 #define   S_028D0C_COPY_SAMPLE(x)                      (((x) & 0x1) << 8)
 #define   S_028D0C_R700_PERFECT_ZPASS_COUNTS(x)        (((x) & 0x1) << 15)
 #define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
+#define R_028D28_DB_SRESULTS_COMPARE_STATE0          0x028D28
 #define R_028D2C_DB_SRESULTS_COMPARE_STATE1          0x028D2C
 #define R_028D30_DB_PRELOAD_CONTROL                  0x028D30
 #define R_028D44_DB_ALPHA_TO_MASK                    0x028D44
-- 
1.7.1.1

