From a065db41cd2fd4a72c67f08fd6a499a94d4cd3d2 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Wed, 8 Feb 2012 13:40:04 -0500
Subject: [PATCH] r600g: add htile support

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup. Fast Z/S clear is
enabled. But we don't take full advantage of HiS with that
patch. Following regs needs more tweaking :
DB_SRESULTS_COMPARE_STATE0
DB_SRESULTS_COMPARE_STATE1
DB_PRELOAD_CONTROL

Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c   |   51 +++++++++---
 src/gallium/drivers/r600/r600_blit.c         |    7 ++
 src/gallium/drivers/r600/r600_hw_context.c   |    1 +
 src/gallium/drivers/r600/r600_pipe.h         |    5 +-
 src/gallium/drivers/r600/r600_resource.h     |    7 ++-
 src/gallium/drivers/r600/r600_state.c        |   52 ++++++++-----
 src/gallium/drivers/r600/r600_state_common.c |    5 +-
 src/gallium/drivers/r600/r600_texture.c      |  109 ++++++++++++++++++++++++++
 src/gallium/drivers/r600/r600d.h             |   13 +++
 9 files changed, 212 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index dd67e4b..3e87cdd 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -854,7 +854,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa);
 	unsigned db_depth_control, alpha_test_control, alpha_ref;
-	unsigned db_render_override, db_render_control;
 	struct r600_pipe_state *rstate;
 
 	if (dsa == NULL) {
@@ -900,27 +899,20 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	}
 	dsa->alpha_ref = alpha_ref;
 
-	/* misc */
-	db_render_control = 0;
-	db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
-		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
-	/* TODO db_render_override depends on query */
-	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, NULL, 0);
 	/* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
 	 * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
 	 * evergreen_pipe_shader_ps().*/
-	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, NULL, 0);
+	/* There are 2 sets of HiS states the hw updates when HiS is enabled.  Each state has an associated
+	 * func/ref/mask.  The results of these 2 states are stored per htile. There are two sets so that
+	 * the driver can update one while the other is in use.  The ENABLE0/1 bits select which one(s)
+	 * are active.
+	 */
 	r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, NULL, 0);
-	dsa->db_render_override = db_render_override;
 
 	return rstate;
 }
@@ -1642,6 +1634,7 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
 	uint64_t offset;
 	unsigned level, first_layer, pitch, slice, format, array_mode;
 	unsigned macro_aspect, tile_split, bankh, bankw, z_info, nbanks;
+	unsigned db_render_override, db_render_control;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1769,6 +1762,33 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
 		}
 	}
 
+	db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
+				S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+				S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+	db_render_control = 0;
+	if (rtex->hyperz) {
+		uint64_t htile_offset = rtex->hyperz->surface.level[0].offset;
+
+		htile_offset += r600_resource_va(rctx->context.screen, (void*)rtex->hyperz);
+		r600_pipe_state_add_reg(rstate, R_028014_DB_HTILE_DATA_BASE,
+					htile_offset >> 8, &rtex->hyperz->resource,
+					RADEON_USAGE_READWRITE);
+		r600_pipe_state_add_reg(rstate, R_028ABC_DB_HTILE_SURFACE,
+					rtex->htile_surface, NULL, 0);
+		z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+		/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+		db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_ENABLE) |
+				S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_OFF) |
+				S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_OFF);
+		r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, rtex->sclear_value, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, rtex->dclear_value, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, rtex->db_preload_control, NULL, 0);
+	}
+
+	rctx->db_render_override = db_render_override;
+	rctx->db_render_control = db_render_control;
+	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, z_info,
 				&rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
@@ -2498,6 +2518,9 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 
 	rstate->nregs = 0;
 
+	/* Z order is a driver provided hint to the hw.  In most cases leave it to early then late.
+	 * short shaders -> late Z, medium shaders -> early Z, and long shaders -> ReZ
+	 */
 	db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
 	for (i = 0; i < rshader->ninput; i++) {
 		/* evergreen NUM_INTERP only contains values interpolated into the LDS,
@@ -2728,6 +2751,8 @@ void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 	rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
 	r600_pipe_state_add_reg(rstate,
 				R_028000_DB_RENDER_CONTROL,
+				S_028000_DEPTH_CLEAR_ENABLE(1) |
+				S_028000_STENCIL_CLEAR_ENABLE(1) |
 				S_028000_DEPTH_COPY_ENABLE(1) |
 				S_028000_STENCIL_COPY_ENABLE(1) |
 				S_028000_COPY_CENTROID(1),
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index ff720c9..54ef566 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -233,6 +233,13 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx,
 				     unsigned width, unsigned height)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_resource_texture *rtex;
+	float dclear_value = depth;
+
+	/* check clear status */
+	rtex = (struct r600_resource_texture*)dst->texture;
+	rtex->dclear_value = dclear_value;
+	rtex->sclear_value = stencil;
 
 	r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
 	util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index ab51b3e..d00c41b 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -454,6 +454,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0},
 	{R_028D0C_DB_RENDER_CONTROL, 0, 0},
 	{R_028D10_DB_RENDER_OVERRIDE, 0, 0},
+	{R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0},
 	{R_028D24_DB_HTILE_SURFACE, 0, 0},
 	{R_028D30_DB_PRELOAD_CONTROL, 0, 0},
 	{R_028D34_DB_PREFETCH_LIMIT, 0, 0},
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index f130617..a1a4a09 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -118,6 +118,7 @@ struct r600_screen {
 
 	unsigned			num_contexts;
 	unsigned			use_surface;
+	unsigned			hyperz_enabled;
 
 	/* for thread-safe write accessing to num_contexts */
 	pipe_mutex			mutex_num_contexts;
@@ -150,8 +151,6 @@ struct r600_pipe_blend {
 struct r600_pipe_dsa {
 	struct r600_pipe_state		rstate;
 	unsigned			alpha_ref;
-	unsigned			db_render_override;
-	unsigned			db_render_control;
 	ubyte				valuemask[2];
 	ubyte				writemask[2];
 };
@@ -237,6 +236,8 @@ struct r600_context {
 	struct r600_vertex_element	*vertex_elements;
 	struct r600_pipe_resource_state	fs_resource[PIPE_MAX_ATTRIBS];
 	struct pipe_framebuffer_state	framebuffer;
+	unsigned			db_render_override;
+	unsigned			db_render_control;
 	unsigned			cb_target_mask;
 	unsigned			cb_color_control;
 	unsigned			pa_sc_line_stipple;
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 8b90b12..c488a24 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -57,10 +57,15 @@ struct r600_resource_texture {
 	unsigned			tile_type;
 	unsigned			depth;
 	unsigned			dirty_db;
-	struct r600_resource_texture    *stencil; /* Stencil is in a separate buffer on Evergreen. */
+	struct r600_resource_texture	*stencil; /* Stencil is in a separate buffer on Evergreen. */
 	struct r600_resource_texture	*flushed_depth_texture;
 	boolean				is_flushing_texture;
 	struct radeon_surface		surface;
+	struct r600_resource_texture	*hyperz; /* hyperz */
+	uint32_t			htile_surface;
+	uint32_t			db_preload_control;
+	uint32_t			sclear_value;
+	float				dclear_value;
 };
 
 #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 0d83fa6..9a30299 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -854,7 +854,6 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa);
 	unsigned db_depth_control, alpha_test_control, alpha_ref;
-	unsigned db_render_override, db_render_control;
 	struct r600_pipe_state *rstate;
 
 	if (dsa == NULL) {
@@ -900,28 +899,14 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 	}
 	dsa->alpha_ref = alpha_ref;
 
-	/* misc */
-	db_render_control = 0;
-	db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
-		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
-		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
-	/* TODO db_render_override depends on query */
-	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, NULL, 0);
 
-	dsa->db_render_override = db_render_override;
-	dsa->db_render_control = db_render_control;
-
 	return rstate;
 }
 
@@ -1677,6 +1662,7 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
 	struct r600_resource_texture *rtex;
 	struct r600_surface *surf;
 	unsigned level, pitch, slice, format, offset, array_mode;
+	unsigned db_render_override, db_depth_info, db_render_control;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1734,8 +1720,37 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
 					S_028004_SLICE_MAX(state->zsbuf->u.tex.last_layer),
 					NULL, 0);
 	}
+
+	db_depth_info = S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format);
+	db_render_control = 0;
+	db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
+		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
+		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+	if (rtex->hyperz) {
+		uint64_t htile_offset = rtex->hyperz->surface.level[0].offset;
+
+		r600_pipe_state_add_reg(rstate, R_028014_DB_HTILE_DATA_BASE,
+					htile_offset >> 8, &rtex->hyperz->resource,
+					RADEON_USAGE_READWRITE);
+		r600_pipe_state_add_reg(rstate, R_028D24_DB_HTILE_SURFACE,
+					rtex->htile_surface, NULL, 0);
+		db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
+		/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+		db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF) |
+				S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_OFF) |
+				S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_OFF);
+		r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, rtex->sclear_value, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, rtex->dclear_value, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, rtex->db_preload_control, NULL, 0);
+	}
+
+	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, NULL, 0);
+	rctx->db_render_override = db_render_override;
+	rctx->db_render_control = db_render_control;
+
 	r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
-				S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format),
+				db_depth_info,
 				&rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
 				(surf->aligned_height / 8) - 1, NULL, 0);
@@ -2406,14 +2421,13 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx)
 	dsa_state = (struct r600_pipe_dsa*)rstate;
 
 	db_render_control =
+		S_028D0C_DEPTH_CLEAR_ENABLE(1) |
+		S_028D0C_STENCIL_CLEAR_ENABLE(1) |
 		S_028D0C_DEPTH_COPY_ENABLE(1) |
 		S_028D0C_STENCIL_COPY_ENABLE(1) |
 		S_028D0C_COPY_CENTROID(1);
-
 	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, NULL, 0);
 
-	dsa_state->db_render_control = db_render_control;
-
 	return rstate;
 }
 
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 21e4bd1..84a0173 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -736,7 +736,6 @@ static void r600_update_derived_state(struct r600_context *rctx)
 void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_pipe_dsa *dsa = (struct r600_pipe_dsa*)rctx->states[R600_PIPE_STATE_DSA];
 	struct pipe_draw_info info = *dinfo;
 	struct r600_draw rdraw = {};
 	struct pipe_index_buffer ib = {};
@@ -854,8 +853,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 
 	r600_context_pipe_state_set(rctx, &rctx->vgt);
 
-	rdraw.db_render_override = dsa->db_render_override;
-	rdraw.db_render_control = dsa->db_render_control;
+	rdraw.db_render_override = rctx->db_render_override;
+	rdraw.db_render_control = rctx->db_render_control;
 
 	/* Emit states. */
 	r600_need_cs_space(rctx, 0, TRUE);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 4e2e600..2775ed4 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -413,6 +413,57 @@ static void r600_setup_miptree(struct pipe_screen *screen,
 	rtex->size = offset;
 }
 
+static uint32_t r600_htile_settings(struct r600_screen *rscreen, struct r600_resource_texture *zbuf)
+{
+	unsigned num_tile_pipes;
+	unsigned tile_pipes_per_DB;
+	unsigned max_pixels_per_DB;
+	unsigned width_per_DB;
+	const unsigned k = 1024;
+	uint32_t htile_settings = 0;
+
+	num_tile_pipes = rscreen->info.r600_num_tile_pipes;
+	tile_pipes_per_DB =  num_tile_pipes / rscreen->info.r600_num_backends;
+	max_pixels_per_DB = (zbuf->surface.npix_x * zbuf->surface.npix_y * tile_pipes_per_DB) / num_tile_pipes;
+
+	/* eg is always 8x8 */
+	if (rscreen->family >= CHIP_CEDAR)
+		htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1);
+
+	if (max_pixels_per_DB <= 64 * k) {
+		htile_settings |= S_028D24_LINEAR(1) | S_028D24_PRELOAD(1);
+	} else if (max_pixels_per_DB <= 128 * k) {
+		htile_settings |= S_028D24_FULL_CACHE(1) | S_028D24_LINEAR(1) |
+				S_028D24_PRELOAD(1);
+	} else if (max_pixels_per_DB <= 256 * k) {
+		htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_FULL_CACHE(1) |
+				S_028D24_LINEAR(1) | S_028D24_PRELOAD(1);
+	} else if (max_pixels_per_DB <= 512 * k) {
+		htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+				S_028D24_FULL_CACHE(1) | S_028D24_LINEAR(1) |
+				S_028D24_PRELOAD(1);
+	} else {
+		width_per_DB = (zbuf->surface.npix_x * tile_pipes_per_DB) / num_tile_pipes;
+		if (width_per_DB <= 512) {
+			htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+					S_028D24_FULL_CACHE(1) | S_028D24_PRELOAD(1) |
+					S_028D24_PREFETCH_WIDTH(16) | S_028D24_PREFETCH_HEIGHT(4) |
+					S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		} else if (width_per_DB <= 1024) {
+				htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+					S_028D24_FULL_CACHE(1) | S_028D24_PRELOAD(1) |
+					S_028D24_PREFETCH_WIDTH(16) | S_028D24_PREFETCH_HEIGHT(2) |
+					S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		} else {
+			htile_settings |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1) |
+					S_028D24_FULL_CACHE(1) | S_028D24_PRELOAD(1) |
+					S_028D24_PREFETCH_WIDTH(16) |
+					S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		}
+	}
+	return htile_settings;
+}
+
 /* Figure out whether u_blitter will fallback to a transfer operation.
  * If so, don't use a staging resource.
  */
@@ -495,6 +546,7 @@ static const struct u_resource_vtbl r600_texture_vtbl =
 };
 
 DEBUG_GET_ONCE_BOOL_OPTION(use_surface, "R600_SURF", TRUE);
+DEBUG_GET_ONCE_BOOL_OPTION(hyperz_enabled, "R600_HYPERZ", TRUE);
 
 static struct r600_resource_texture *
 r600_texture_create_object(struct pipe_screen *screen,
@@ -514,8 +566,13 @@ r600_texture_create_object(struct pipe_screen *screen,
 	/* FIXME ugly temporary hack to allow to switch btw current code
 	 * and common surface allocator code
 	 */
+	if (debug_get_option_hyperz_enabled()) {
+		rscreen->hyperz_enabled = 1;
+	}
 	if (debug_get_option_use_surface()) {
 		rscreen->use_surface = 1;
+	} else {
+		rscreen->hyperz_enabled = 0;
 	}
 
 	rtex = CALLOC_STRUCT(r600_resource_texture);
@@ -574,6 +631,58 @@ r600_texture_create_object(struct pipe_screen *screen,
 		/* Proceed in creating the depth buffer. */
 	}
 
+	rtex->hyperz = NULL;
+	if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+	    util_format_is_depth_or_stencil(base->format) &&
+	    rscreen->use_surface &&
+	    rscreen->hyperz_enabled &&
+	    rscreen->info.drm_minor >= 14) {
+		struct pipe_resource hyperz;
+		struct radeon_surface hsurface;
+		unsigned max_x, max_y, psize, htile_max;
+
+		/* Allocate the hyperz buffer. */
+		hyperz = *base;
+		hyperz.format = PIPE_FORMAT_L32_UINT;
+		hsurface = *surface;
+		/* FIXME compute htile width & height depending on buffer size
+		 * and number of pipes
+		 */
+		hsurface.npix_x = hsurface.npix_x * hsurface.blk_w;
+		hsurface.npix_y = hsurface.npix_y * hsurface.blk_h;
+		hsurface.blk_w = 4;
+		hsurface.blk_h = 4;
+		hsurface.bpe = 4;
+		rtex->htile_surface = r600_htile_settings(rscreen, rtex);
+		if (G_028D24_HTILE_WIDTH(rtex->htile_surface)) {
+			hsurface.blk_w = 8;
+		}
+		if (G_028D24_HTILE_HEIGHT(rtex->htile_surface)) {
+			hsurface.blk_h = 8;
+		}
+
+		psize = (rscreen->family >= CHIP_CEDAR) ? 64 : 32;
+		max_x = (hsurface.npix_x + hsurface.blk_w - 1) / hsurface.blk_w;
+		max_y = (hsurface.npix_y + hsurface.blk_h - 1) / hsurface.blk_h;
+		max_x = (max_x + psize - 1) / psize;
+		max_y = (max_y + psize - 1) / psize;
+		htile_max = 8192 / ((psize / hsurface.blk_w) * (psize / hsurface.blk_h));
+		while ((max_x * max_y) > htile_max) {
+			max_x -= 1;
+			max_y -= 1;
+		}
+		rtex->db_preload_control = S_028D30_MAX_X(max_x) | S_028D30_MAX_Y(max_y);
+//		rtex->db_preload_control = 0;
+
+		rtex->hyperz = r600_texture_create_object(screen, &hyperz, array_mode, 0,
+							  max_buffer_size, NULL, TRUE, &hsurface);
+		if (!rtex->hyperz) {
+			FREE(rtex);
+			return NULL;
+		}
+		/* Proceed in creating the depth buffer. */
+	}
+
 	/* only mark depth textures the HW can hit as depth textures */
 	if (util_format_is_depth_or_stencil(rtex->real_format) && permit_hardware_blit(screen, base))
 		rtex->depth = 1;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 3c3238a..b7b4dea 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -663,6 +663,7 @@
 #define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
 #define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
 #define   C_028004_SLICE_MAX                           0xFF001FFF
+#define R_028014_DB_HTILE_DATA_BASE                  0x00028014
 #define R_028D24_DB_HTILE_SURFACE                    0x028D24
 #define   S_028D24_HTILE_WIDTH(x)                      (((x) & 0x1) << 0)
 #define   G_028D24_HTILE_WIDTH(x)                      (((x) >> 0) & 0x1)
@@ -2248,6 +2249,18 @@
 #define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
 #define R_028D2C_DB_SRESULTS_COMPARE_STATE1          0x028D2C
 #define R_028D30_DB_PRELOAD_CONTROL                  0x028D30
+#define   S_028D30_START_X(x)                          (((x) & 0xFF) << 0)
+#define   G_028D30_START_X(x)                          (((x) >> 0) & 0xFF)
+#define   C_028D30_START_X                             0xFFFFFF00
+#define   S_028D30_START_Y(x)                          (((x) & 0xFF) << 8)
+#define   G_028D30_START_Y(x)                          (((x) >> 8) & 0xFF)
+#define   C_028D30_START_Y                             0xFFFF00FF
+#define   S_028D30_MAX_X(x)                            (((x) & 0xFF) << 16)
+#define   G_028D30_MAX_X(x)                            (((x) >> 16) & 0xFF)
+#define   C_028D30_MAX_X                               0xFF00FFFF
+#define   S_028D30_MAX_Y(x)                            (((x) & 0xFF) << 24)
+#define   G_028D30_MAX_Y(x)                            (((x) >> 24) & 0xFF)
+#define   C_028D30_MAX_Y                               0x00FFFFFF
 #define R_028D44_DB_ALPHA_TO_MASK                    0x028D44
 #define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
 #define R_0286CC_SPI_PS_IN_CONTROL_0                 0x0286CC
-- 
1.7.7.1

