From e28666346687ed0374ffe628e2fe7c69f18dc37b Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Tue, 5 Jun 2012 15:59:03 -0400
Subject: [PATCH] r600g: add htile support v6

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 src/gallium/drivers/r600/evergreen_hw_context.c |    8 +-
 src/gallium/drivers/r600/evergreen_state.c      |   79 ++++++--
 src/gallium/drivers/r600/evergreend.h           |    4 +
 src/gallium/drivers/r600/r600_blit.c            |   43 ++++
 src/gallium/drivers/r600/r600_hw_context.c      |    4 +
 src/gallium/drivers/r600/r600_pipe.c            |    1 +
 src/gallium/drivers/r600/r600_pipe.h            |   13 +-
 src/gallium/drivers/r600/r600_resource.h        |    5 +
 src/gallium/drivers/r600/r600_state.c           |   62 +++++--
 src/gallium/drivers/r600/r600_state_common.c    |   43 +++--
 src/gallium/drivers/r600/r600_texture.c         |  233 +++++++++++++++++++++++
 src/gallium/drivers/r600/r600d.h                |    5 +
 12 files changed, 446 insertions(+), 54 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 2ab29c9..6682d5e 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -43,7 +43,6 @@ static const struct r600_reg evergreen_ctl_const_list[] = {
 };
 
 static const struct r600_reg evergreen_context_reg_list[] = {
-	{R_028000_DB_RENDER_CONTROL, 0, 0},
 	{R_028008_DB_DEPTH_VIEW, 0, 0},
 	{R_028010_DB_RENDER_OVERRIDE2, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0},
@@ -63,6 +62,9 @@ static const struct r600_reg evergreen_context_reg_list[] = {
 	{GROUP_FORCE_NEW_BLOCK, 0, 0},
 	{R_028058_DB_DEPTH_SIZE, 0, 0},
 	{R_02805C_DB_DEPTH_SLICE, 0, 0},
+	{R_02802C_DB_DEPTH_CLEAR, 0, 0},
+	{R_028ABC_DB_HTILE_SURFACE, 0, 0},
+	{R_028AC8_DB_PRELOAD_CONTROL, 0, 0},
 	{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
 	{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
 	{R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
@@ -303,7 +305,6 @@ static const struct r600_reg evergreen_context_reg_list[] = {
 };
 
 static const struct r600_reg cayman_context_reg_list[] = {
-	{R_028000_DB_RENDER_CONTROL, 0, 0},
 	{R_028008_DB_DEPTH_VIEW, 0, 0},
 	{R_028010_DB_RENDER_OVERRIDE2, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0},
@@ -323,6 +324,9 @@ static const struct r600_reg cayman_context_reg_list[] = {
 	{GROUP_FORCE_NEW_BLOCK, 0, 0},
 	{R_028058_DB_DEPTH_SIZE, 0, 0},
 	{R_02805C_DB_DEPTH_SLICE, 0, 0},
+	{R_02802C_DB_DEPTH_CLEAR, 0, 0},
+	{R_028ABC_DB_HTILE_SURFACE, 0, 0},
+	{R_028AC8_DB_PRELOAD_CONTROL, 0, 0},
 	{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
 	{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
 	{R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index b618ca8..3e6deb8 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -710,9 +710,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
 		}
 	}
 	blend->cb_target_mask = target_mask;
-	
-	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
-				color_control);
+	blend->cb_color_control = color_control;
 	/* only have dual source on MRT0 */
 	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
 	for (int i = 0; i < 8; i++) {
@@ -755,7 +753,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa);
 	unsigned db_depth_control, alpha_test_control, alpha_ref;
-	unsigned db_render_control;
 	struct r600_pipe_state *rstate;
 
 	if (dsa == NULL) {
@@ -803,9 +800,7 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	dsa->alpha_ref = alpha_ref;
 
 	/* misc */
-	db_render_control = 0;
 	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control);
-	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control);
 	return rstate;
 }
 
@@ -1647,6 +1642,29 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
 		}
 	}
 
+	/* hyperz */
+	if (rtex->hyperz) {
+		uint64_t htile_offset = rtex->hyperz->surface.level[0].offset;
+
+		z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+		r600_pipe_state_add_reg_bo(rstate, R_028014_DB_HTILE_DATA_BASE,
+					   htile_offset >> 8, &rtex->hyperz->resource,
+					   RADEON_USAGE_READWRITE);
+		/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+		r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, fui(rtex->dclear_value));
+		r600_pipe_state_add_reg(rstate, R_028ABC_DB_HTILE_SURFACE, rtex->db_htile_surface);
+		r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, rtex->db_preload_control);
+		if (!rctx->db_misc_state.hyperz) {
+			rctx->db_misc_state.hyperz = TRUE;
+			r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+		}
+	} else {
+		if (rctx->db_misc_state.hyperz) {
+			rctx->db_misc_state.hyperz = FALSE;
+			r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+		}
+	}
+
 	r600_pipe_state_add_reg_bo(rstate, R_028040_DB_Z_INFO, z_info,
 				&rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
@@ -1707,17 +1725,38 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
 {
 	struct radeon_winsys_cs *cs = rctx->cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
+	unsigned db_render_override = 0;
+	unsigned db_render_control = 0;
 	unsigned db_count_control = 0;
-	unsigned db_render_override =
-		S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
-		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 
+	if (a->hyperz) {
+		/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+		db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF) |
+				      S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+				      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+		if (a->clear_depthstencil) {
+			db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
+		}
+		if (a->resummarize) {
+			db_render_control |= S_028000_RESUMMARIZE_ENABLE(1);
+			db_render_override |= S_02800C_FORCE_Z_READ(1);
+		}
+	} else {
+		db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
+				      S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+				      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+	}
 	if (a->occlusion_query_enabled) {
 		db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
 		db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
 	}
+	if (a->flush_depthstencil_enabled) {
+		db_render_control |= S_028000_DEPTH_COPY_ENABLE(1) |
+				     S_028000_STENCIL_COPY_ENABLE(1) |
+				     S_028000_COPY_CENTROID(1);
+	}
 
+	r600_write_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control);
 	r600_write_context_reg(cs, R_028004_DB_COUNT_CONTROL, db_count_control);
 	r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
 }
@@ -1988,16 +2027,20 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */
 	r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */
 
-	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
+	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 1);
 	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
-	r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+
+	r600_store_context_reg_seq(cb, R_009830_DB_DEBUG, 3);
+	r600_store_value(cb, 0); /* R_009830_DB_DEBUG */
+	r600_store_value(cb, 0); /* R_009834_DB_DEBUG2 */
+	r600_store_value(cb, 0); /* R_009838_DB_DEBUG3 */
+	r600_store_config_reg(cb, R_009854_DB_WATERMARKS, 0x00420204);
 
 	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
 
-	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2);
 	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
 	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
-	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
 
 	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
 	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
@@ -2486,10 +2529,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
 	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
 
-	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2);
 	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
 	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
-	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
 
 	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
 
@@ -2794,11 +2836,6 @@ void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 	memset(&dsa, 0, sizeof(dsa));
 
 	rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
-	r600_pipe_state_add_reg(rstate,
-				R_028000_DB_RENDER_CONTROL,
-				S_028000_DEPTH_COPY_ENABLE(1) |
-				S_028000_STENCIL_COPY_ENABLE(1) |
-				S_028000_COPY_CENTROID(1));
 	/* Don't set the 'is_flush' flag in r600_pipe_dsa, evergreen doesn't need it. */
 	return rstate;
 }
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 3b6d730..c458d9e 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1580,6 +1580,10 @@
 #define   S_028008_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
 #define   G_028008_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
 #define   C_028008_SLICE_MAX                           0xFF001FFF
+#define R_009830_DB_DEBUG                            0x00009830
+#define R_009834_DB_DEBUG2                           0x00009834
+#define R_009838_DB_DEBUG3                           0x00009838
+#define R_009854_DB_WATERMARKS                       0x00009854
 #define R_02800C_DB_RENDER_OVERRIDE                  0x0002800C
 #define   V_02800C_FORCE_OFF                         0
 #define   V_02800C_FORCE_ENABLE                      1
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 1122f3e..dada0a5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -24,6 +24,7 @@
 #include "util/u_surface.h"
 #include "util/u_blitter.h"
 #include "util/u_format.h"
+#include "r600d.h"
 
 enum r600_blitter_op /* bitmask */
 {
@@ -147,6 +148,8 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t
 					(struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl);
 
 			r600_blitter_begin(ctx, R600_DECOMPRESS);
+			rctx->db_misc_state.flush_depthstencil_enabled = TRUE;
+			r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
 			util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth);
 			r600_blitter_end(ctx);
 
@@ -203,6 +206,36 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct pipe_framebuffer_state *fb = &rctx->framebuffer;
 
+	/* if hyperz enabled just clear hyperz */
+	if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTHSTENCIL)) {
+		struct r600_resource_texture *rtex;
+
+		rtex = (struct r600_resource_texture*)fb->zsbuf->texture;
+		if (rtex->hyperz) {
+			/* set clear value, as we use R600_CLEAR_SURFACE
+			 * the framebuffer state will be reset with proper
+			 * depth clear value
+			 */
+			rtex->dclear_value = depth;
+
+			r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
+			if (buffers & PIPE_CLEAR_DEPTH) {
+				rctx->db_misc_state.hyperz = TRUE;
+				rctx->db_misc_state.clear_depthstencil = TRUE;
+				r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+			}
+			util_blitter_clear_depth_stencil(rctx->blitter,
+							 fb->zsbuf,
+							 buffers & PIPE_CLEAR_DEPTHSTENCIL,
+							 depth, stencil,
+							 0, 0,
+							 fb->zsbuf->texture->width0,
+							 fb->zsbuf->texture->height0);
+			r600_blitter_end(ctx);
+			buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
+		}
+	}
+
 	r600_blitter_begin(ctx, R600_CLEAR);
 	util_blitter_clear(rctx->blitter, fb->width, fb->height,
 			   fb->nr_cbufs, buffers, fb->nr_cbufs ? fb->cbufs[0]->format : PIPE_FORMAT_NONE,
@@ -252,8 +285,18 @@ static void r600_hw_copy_region(struct pipe_context *ctx,
 				const struct pipe_box *src_box)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_resource_texture *rtex;
 
 	r600_blitter_begin(ctx, R600_COPY);
+	rtex = (struct r600_resource_texture*)dst;
+	if (rtex->hyperz) {
+		/* when we copy to a depth buffer we need to resummarize
+		 * the htile surface
+		 */
+		rctx->db_misc_state.hyperz = TRUE;
+		rctx->db_misc_state.resummarize = TRUE;
+		r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+	}
 	util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
 				  src, src_level, src_box, TRUE);
 	r600_blitter_end(ctx);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index a022436..088834a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -367,7 +367,11 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{GROUP_FORCE_NEW_BLOCK, 0, 0},
 	{R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0},
 	{R_028A6C_VGT_GS_OUT_PRIM_TYPE, 0, 0},
+	{R_02802C_DB_DEPTH_CLEAR, 0, 0},
+	{GROUP_FORCE_NEW_BLOCK, 0, 0},
+	{R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0},
 	{R_028D24_DB_HTILE_SURFACE, 0, 0},
+	{R_028D30_DB_PRELOAD_CONTROL, 0, 0},
 	{R_028D34_DB_PREFETCH_LIMIT, 0, 0},
 	{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
 	{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e0ee823..7004ff9 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -942,6 +942,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 
 	rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", TRUE);
 	rscreen->glsl_feature_level = debug_get_bool_option("R600_GLSL130", TRUE) ? 130 : 120;
+	rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE);
 
 	rscreen->global_pool = compute_memory_pool_new(1024*16, rscreen);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index f2865d2..c10cbc1 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -76,9 +76,12 @@ struct r600_surface_sync_cmd {
 };
 
 struct r600_db_misc_state {
-	struct r600_atom atom;
-	bool occlusion_query_enabled;
-	bool flush_depthstencil_enabled;
+	struct r600_atom	atom;
+	bool			occlusion_query_enabled;
+	bool			flush_depthstencil_enabled;
+	bool			clear_depthstencil;
+	bool			hyperz;
+	bool			resummarize;
 };
 
 enum r600_pipe_state_id {
@@ -132,6 +135,7 @@ struct r600_screen {
 	struct r600_pipe_fences		fences;
 
 	bool				use_surface_alloc;
+	bool				use_hyperz;
 	int 				glsl_feature_level;
 
 	/*for compute global memory binding, we allocate stuff here, instead of
@@ -171,8 +175,7 @@ struct r600_pipe_dsa {
 	unsigned			alpha_ref;
 	ubyte				valuemask[2];
 	ubyte				writemask[2];
-	bool				is_flush;
-	unsigned                        sx_alpha_test_control;
+	unsigned			sx_alpha_test_control;
 };
 
 struct r600_vertex_element
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index d401e40..e8f2c23 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -63,6 +63,11 @@ struct r600_resource_texture {
 	struct r600_resource_texture	*flushed_depth_texture;
 	boolean				is_flushing_texture;
 	struct radeon_surface		surface;
+	unsigned			db_prefetch_limit;
+	unsigned			db_htile_surface;
+	unsigned			db_preload_control;
+	float				dclear_value;
+	struct r600_resource_texture	*hyperz;
 };
 
 #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 124eba2..8c1a542 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1000,7 +1000,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	}
 
 	if (tmp->is_depth && !tmp->is_flushing_texture) {
-	        r600_texture_depth_flush(ctx, texture, TRUE);
+		r600_texture_depth_flush(ctx, texture, TRUE);
 		tmp = tmp->flushed_depth_texture;
 	}
 
@@ -1559,6 +1559,7 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
 	struct r600_resource_texture *rtex;
 	struct r600_surface *surf;
 	unsigned level, pitch, slice, format, offset, array_mode;
+	unsigned db_depth_info;
 
 	if (state->zsbuf == NULL)
 		return;
@@ -1602,6 +1603,30 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
 	}
 
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
+	db_depth_info = S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format);
+
+	/* hyperz */
+	if (rtex->hyperz) {
+		uint64_t htile_offset = rtex->hyperz->surface.level[0].offset;
+
+		db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
+		r600_pipe_state_add_reg_bo(rstate, R_028014_DB_HTILE_DATA_BASE,
+					   htile_offset >> 8, &rtex->hyperz->resource,
+					   RADEON_USAGE_READWRITE);
+		r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, fui(rtex->dclear_value));
+		r600_pipe_state_add_reg(rstate, R_028D24_DB_HTILE_SURFACE, rtex->db_htile_surface);
+		r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, rtex->db_preload_control);
+		r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, rtex->db_prefetch_limit);
+		if (!rctx->db_misc_state.hyperz) {
+			rctx->db_misc_state.hyperz = TRUE;
+			r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+		}
+	} else {
+		if (rctx->db_misc_state.hyperz) {
+			rctx->db_misc_state.hyperz = FALSE;
+			r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+		}
+	}
 
 	r600_pipe_state_add_reg_bo(rstate, R_02800C_DB_DEPTH_BASE,
 				offset >> 8, &rtex->resource, RADEON_USAGE_READWRITE);
@@ -1615,8 +1640,8 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
 					S_028004_SLICE_MAX(state->zsbuf->u.tex.last_layer));
 	}
 	r600_pipe_state_add_reg_bo(rstate, R_028010_DB_DEPTH_INFO,
-				S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format),
-				&rtex->resource, RADEON_USAGE_READWRITE);
+				   db_depth_info,
+				   &rtex->resource, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
 				(surf->aligned_height / 8) - 1);
 }
@@ -1678,10 +1703,27 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 	struct radeon_winsys_cs *cs = rctx->cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
-	unsigned db_render_override =
-		S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
-		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
-		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+	unsigned db_render_override = 0;
+
+	if (a->hyperz) {
+		/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
+		db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF) |
+				      S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
+				      S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+		if (a->clear_depthstencil) {
+			db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1);
+		}
+		if (a->resummarize) {
+			db_render_control |= S_028D0C_RESUMMARIZE_ENABLE(1);
+			db_render_override |= S_028D10_FORCE_Z_READ(1) |
+					      S_028D10_FAST_Z_DISABLE(1) |
+					      S_028D10_FORCE_FULL_Z_RANGE(1);
+		}
+	} else {
+		db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) |
+				      S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
+				      S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+	}
 
 	if (a->occlusion_query_enabled) {
 		if (rctx->chip_class >= R700) {
@@ -2144,9 +2186,8 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
 
 	r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
 
-	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
+	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 1);
 	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
-	r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
 
 	r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3);
 	r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */
@@ -2409,7 +2450,6 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx)
 {
 	struct pipe_depth_stencil_alpha_state dsa;
 	struct r600_pipe_state *rstate;
-	struct r600_pipe_dsa *dsa_state;
 	boolean quirk = false;
 
 	if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 ||
@@ -2429,7 +2469,5 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx)
 	}
 
 	rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
-	dsa_state = (struct r600_pipe_dsa*)rstate;
-	dsa_state->is_flush = true;
 	return rstate;
 }
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 00e1bd0..c17520f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -159,8 +159,12 @@ void r600_bind_blend_state(struct pipe_context *ctx, void *state)
 	rctx->states[rstate->id] = rstate;
 	rctx->cb_target_mask = blend->cb_target_mask;
 	/* Replace every bit except MULTIWRITE_ENABLE. */
-	rctx->cb_color_control &= ~C_028808_MULTIWRITE_ENABLE;
-	rctx->cb_color_control |= blend->cb_color_control & C_028808_MULTIWRITE_ENABLE;
+	if (rctx->chip_class <= R700) {
+		rctx->cb_color_control &= ~C_028808_MULTIWRITE_ENABLE;
+		rctx->cb_color_control |= blend->cb_color_control & C_028808_MULTIWRITE_ENABLE;
+	} else {
+		rctx->cb_color_control = blend->cb_color_control;
+	}
 	rctx->dual_src_blend = blend->dual_src_blend;
 	r600_context_pipe_state_set(rctx, rstate);
 }
@@ -258,11 +262,6 @@ void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
 	ref.writemask[1] = dsa->writemask[1];
 
 	r600_set_stencil_ref(ctx, &ref);
-
-	if (rctx->db_misc_state.flush_depthstencil_enabled != dsa->is_flush) {
-		rctx->db_misc_state.flush_depthstencil_enabled = dsa->is_flush;
-		r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
-	}
 }
 
 void r600_set_max_scissor(struct r600_context *rctx)
@@ -458,8 +457,10 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
 	r600_inval_shader_cache(rctx);
 	r600_context_pipe_state_set(rctx, &rctx->ps_shader->rstate);
 
-	rctx->cb_color_control &= C_028808_MULTIWRITE_ENABLE;
-	rctx->cb_color_control |= S_028808_MULTIWRITE_ENABLE(!!rctx->ps_shader->shader.fs_write_all);
+	if (rctx->chip_class <= R700) {
+		rctx->cb_color_control &= C_028808_MULTIWRITE_ENABLE;
+		rctx->cb_color_control |= S_028808_MULTIWRITE_ENABLE(!!rctx->ps_shader->shader.fs_write_all);
+	}
 
 	if (rctx->ps_shader && rctx->vs_shader) {
 		r600_adjust_gprs(rctx);
@@ -802,8 +803,14 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 		r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info.primitive_restart);
 		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028A0C_PA_SC_LINE_STIPPLE, 0);
-		if (rctx->chip_class <= R700)
+		if (rctx->db_misc_state.clear_depthstencil) {
+			/* when clearing htile we need to make sure CB doesn't waste
+			 * bandwidth exporting anything
+			 */
+			r600_pipe_state_add_reg(&rctx->vgt, R_028808_CB_COLOR_CONTROL, 0);
+		} else {
 			r600_pipe_state_add_reg(&rctx->vgt, R_028808_CB_COLOR_CONTROL, rctx->cb_color_control);
+		}
 		r600_pipe_state_add_reg(&rctx->vgt, R_02881C_PA_CL_VS_OUT_CNTL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028810_PA_CL_CLIP_CNTL, 0);
 	}
@@ -824,8 +831,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 	else if (prim == V_008958_DI_PT_LINESTRIP) 
 		ls_mask = 2;
 	r600_pipe_state_mod_reg(&rctx->vgt, S_028A0C_AUTO_RESET_CNTL(ls_mask) | rctx->pa_sc_line_stipple);
-	if (rctx->chip_class <= R700)
-		r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_color_control);
+	r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_color_control);
 	r600_pipe_state_mod_reg(&rctx->vgt,
 				rctx->vs_shader->pa_cl_vs_out_cntl |
 				(rctx->rasterizer->clip_plane_enable & rctx->vs_shader->shader.clip_dist_write));
@@ -883,8 +889,17 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
 
 	rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING;
 
-	if (rctx->framebuffer.zsbuf)
-	{
+	/* clear hyperz */
+	if (rctx->db_misc_state.clear_depthstencil |
+	    rctx->db_misc_state.resummarize |
+	    rctx->db_misc_state.flush_depthstencil_enabled) {
+		rctx->db_misc_state.flush_depthstencil_enabled = FALSE;
+		rctx->db_misc_state.clear_depthstencil = FALSE;
+		rctx->db_misc_state.resummarize = FALSE;
+		r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+	}
+
+	if (rctx->framebuffer.zsbuf) {
 		struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
 		((struct r600_resource_texture *)tex)->dirty_db = TRUE;
 	}
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 5b15990..b703c5f 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -478,6 +478,10 @@ static void r600_texture_destroy(struct pipe_screen *screen,
 	if (rtex->stencil)
 		pipe_resource_reference((struct pipe_resource **)&rtex->stencil, NULL);
 
+	if (rtex->hyperz) {
+		pipe_resource_reference((struct pipe_resource **)&rtex->hyperz, NULL);
+	}
+
 	pb_reference(&resource->buf, NULL);
 	FREE(rtex);
 }
@@ -494,6 +498,179 @@ static const struct u_resource_vtbl r600_texture_vtbl =
 	NULL				/* transfer_inline_write */
 };
 
+static void r600_htile_settings(struct r600_screen *rscreen,
+				struct r600_resource_texture *zbuf,
+				struct radeon_surface *hsurface)
+{
+	unsigned max_pixels_per_DB;
+	unsigned width_per_DB;
+	unsigned npipes = rscreen->info.r600_num_tile_pipes;
+	const unsigned k = 1024;
+
+	max_pixels_per_DB = (hsurface->npix_x * hsurface->npix_y) / rscreen->info.r600_num_backends;
+
+	zbuf->db_prefetch_limit = (hsurface->npix_y / 8) - 1;
+	zbuf->db_preload_control = 0;
+
+	/* eg is always 8x8 */
+	if (rscreen->family >= CHIP_CEDAR) {
+		zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1);
+	}
+	zbuf->db_htile_surface |= S_028D24_PRELOAD(1);
+
+	if (max_pixels_per_DB <= 64 * k) {
+		zbuf->db_htile_surface |= S_028D24_LINEAR(1);
+	} else if (max_pixels_per_DB <= 128 * k) {
+		zbuf->db_htile_surface |= S_028D24_FULL_CACHE(1) | S_028D24_LINEAR(1);
+	} else if (max_pixels_per_DB <= 256 * k) {
+		zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) |
+					S_028D24_FULL_CACHE(1) |
+					S_028D24_LINEAR(1);
+	} else if (max_pixels_per_DB <= 512 * k) {
+		zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) |
+					S_028D24_HTILE_HEIGHT(1) |
+					S_028D24_FULL_CACHE(1) |
+					S_028D24_LINEAR(1);
+	} else {
+		unsigned npix_x, npix_y, startx, starty;
+		float a, b;
+
+		/* htile size is x*y = 8 * 8 * 8k * num_db and we keep
+		 * aspect ratio so x/y == zbuffer_x/ zbuffer_y
+		 */
+		a = ((float)hsurface->npix_x) / ((float)hsurface->npix_y);
+		b = 8 * 8 * 8 * k * rscreen->info.r600_num_backends;
+		npix_y = ((unsigned)sqrt(b / a)) & ~7;
+		npix_x = ((unsigned)(b / ((float)npix_y))) & ~7;
+		startx = (hsurface->npix_x - npix_x) >> 6;
+		starty = (hsurface->npix_y - npix_y) >> 6;
+		zbuf->db_preload_control = S_028D30_START_X(startx) |
+					   S_028D30_START_Y(starty) |
+					   S_028D30_MAX_X(startx + (npix_x >> 5)) |
+					   S_028D30_MAX_Y(starty + (npix_y >> 5));
+		zbuf->db_prefetch_limit = (npix_y / 8) - 1;
+		/* r6xx, r7xx have issue with preload window, don't use it */
+		if (rscreen->family >= CHIP_CEDAR) {
+			zbuf->db_htile_surface |= S_028D24_HTILE_USES_PRELOAD_WIN(1);
+		}
+
+		width_per_DB = hsurface->npix_x / rscreen->info.r600_num_backends;
+		if (width_per_DB <= 512) {
+			zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) |
+						S_028D24_HTILE_HEIGHT(1) |
+						S_028D24_FULL_CACHE(1) |
+						S_028D24_PREFETCH_WIDTH(16) |
+						S_028D24_PREFETCH_HEIGHT(4);
+		} else if (width_per_DB <= 1024) {
+			zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) |
+						S_028D24_HTILE_HEIGHT(1) |
+						S_028D24_FULL_CACHE(1) |
+						S_028D24_PREFETCH_WIDTH(16) |
+						S_028D24_PREFETCH_HEIGHT(2);
+		} else {
+			zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) |
+						S_028D24_HTILE_HEIGHT(1) |
+						S_028D24_FULL_CACHE(1) |
+						S_028D24_PREFETCH_WIDTH(16);
+		}
+	}
+
+	if (G_028D24_LINEAR(zbuf->db_htile_surface)) {
+		/* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */
+		hsurface->npix_x = align(hsurface->npix_x, 16 * 8);
+		/* hsurface->npix_y is npipes htiles aligned == npipes * 8 pixel aligned */
+		hsurface->npix_y = align(hsurface->npix_y, npipes * 8);
+	} else {
+		unsigned tmp;
+
+		/* htile widht & hsurface->npix_y (8 or 4) make 2 bits number */
+		tmp = zbuf->db_htile_surface & 3;
+		/* align is htile align * 8, htile align vary according to
+		 * number of pipe and tile width and hsurface->npix_y
+		 */
+		switch (npipes) {
+		case 8:
+			switch (tmp) {
+			case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				hsurface->npix_x = align(hsurface->npix_x, 64 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 64 * 8);
+				break;
+			case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
+			case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 64 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 32 * 8);
+				break;
+			case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 32 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 32 * 8);
+				break;
+			default:
+				break;
+			}
+			break;
+		case 4:
+			switch (tmp) {
+			case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				hsurface->npix_x = align(hsurface->npix_x, 64 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 32 * 8);
+				break;
+			case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
+			case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 32 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 32 * 8);
+				break;
+			case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 32 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 16 * 8);
+				break;
+			default:
+				break;
+			}
+			break;
+		case 2:
+			switch (tmp) {
+			case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				hsurface->npix_x = align(hsurface->npix_x, 32 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 32 * 8);
+				break;
+			case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
+			case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 32 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 16 * 8);
+				break;
+			case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 16 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 16 * 8);
+				break;
+			default:
+				break;
+			}
+			break;
+		case 1:
+			switch (tmp) {
+			case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+				hsurface->npix_x = align(hsurface->npix_x, 32 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 16 * 8);
+				break;
+			case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
+			case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 16 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 16 * 8);
+				break;
+			case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
+				hsurface->npix_x = align(hsurface->npix_x, 16 * 8);
+				hsurface->npix_y = align(hsurface->npix_y, 8 * 8);
+				break;
+			default:
+				break;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 static struct r600_resource_texture *
 r600_texture_create_object(struct pipe_screen *screen,
 			   const struct pipe_resource *base,
@@ -520,6 +697,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 	resource->b.b.screen = screen;
 	rtex->pitch_override = pitch_in_bytes_override;
 	rtex->real_format = base->format;
+	rtex->dirty_db = FALSE;
+	rtex->hyperz = NULL;
 
 	/* We must split depth and stencil into two separate buffers on Evergreen. */
 	if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) &&
@@ -579,6 +758,60 @@ r600_texture_create_object(struct pipe_screen *screen,
 		}
 	}
 
+	if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+	    util_format_is_depth_or_stencil(base->format) &&
+	    rscreen->use_surface_alloc &&
+	    rscreen->use_hyperz &&
+	    rscreen->info.drm_minor >= 14 &&
+	    base->target == PIPE_TEXTURE_2D) {
+		struct pipe_resource hyperz;
+		struct radeon_surface hsurface;
+		void *ptr;
+
+		/* Allocate the hyperz buffer. */
+		hyperz = *base;
+		hyperz.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+		hsurface = *surface;
+		hsurface.npix_x = rtex->surface.level[0].pitch_bytes / rtex->surface.bpe;
+		hsurface.npix_y = rtex->surface.level[0].nblk_y * rtex->surface.blk_h;
+		hsurface.npix_x = align(hsurface.npix_x, 8);
+		hsurface.npix_y = align(hsurface.npix_y, 8);
+		hsurface.blk_w = 4;
+		hsurface.blk_h = 4;
+		hsurface.bpe = 4;
+		hsurface.flags = RADEON_SURF_CLR(hsurface.flags, MODE);
+		r600_htile_settings(rscreen, rtex, &hsurface);
+		if (G_028D24_HTILE_WIDTH(rtex->db_htile_surface)) {
+			hsurface.blk_w = 8;
+		}
+		if (G_028D24_HTILE_HEIGHT(rtex->db_htile_surface)) {
+			hsurface.blk_h = 8;
+		}
+		hyperz.width0 = hsurface.npix_x / hsurface.blk_w;
+		hyperz.height0 = hsurface.npix_y / hsurface.blk_h;
+		hyperz.last_level = 0;
+		hyperz.nr_samples = 1;
+		hyperz.bind = PIPE_BIND_RENDER_TARGET;
+		hyperz.flags = 0;
+
+		rtex->hyperz = r600_texture_create_object(screen, &hyperz, array_mode, 0,
+							  max_buffer_size, NULL, TRUE, &hsurface);
+		if (!rtex->hyperz) {
+			FREE(rtex);
+			return NULL;
+		}
+
+		/* this is ugly but it's needed so that hyperz works without
+		 * glitch. Otherwise various tile will have wrong hyperz value.
+		 * memset could be replaced with a blit. Value use to memset
+		 * doesn't matter it could be random.
+		 */
+		ptr = rscreen->ws->buffer_map((void*)rtex->hyperz->resource.buf, NULL, PB_USAGE_UNSYNCHRONIZED);
+		if (ptr) {
+			memset(ptr, 0, hyperz.width0 * hyperz.height0 * 4);
+		}
+	}
+
 	/* If we initialized separate stencil for Evergreen. place it after depth. */
 	if (rtex->stencil) {
 		unsigned stencil_align, stencil_offset;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 933d99e..04a42b9 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -659,6 +659,7 @@
 #define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
 #define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
 #define   C_028004_SLICE_MAX                           0xFF001FFF
+#define R_028014_DB_HTILE_DATA_BASE                  0x028014
 #define R_028D24_DB_HTILE_SURFACE                    0x028D24
 #define   S_028D24_HTILE_WIDTH(x)                      (((x) & 0x1) << 0)
 #define   G_028D24_HTILE_WIDTH(x)                      (((x) >> 0) & 0x1)
@@ -2249,6 +2250,10 @@
 #define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
 #define R_028D2C_DB_SRESULTS_COMPARE_STATE1          0x028D2C
 #define R_028D30_DB_PRELOAD_CONTROL                  0x028D30
+#define   S_028D30_START_X(x)                          (((x) & 0xff) << 0)
+#define   S_028D30_START_Y(x)                          (((x) & 0xff) << 8)
+#define   S_028D30_MAX_X(x)                            (((x) & 0xff) << 16)
+#define   S_028D30_MAX_Y(x)                            (((x) & 0xff) << 24)
 #define R_028D44_DB_ALPHA_TO_MASK                    0x028D44
 #define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
 #define R_0286CC_SPI_PS_IN_CONTROL_0                 0x0286CC
-- 
1.7.7.6

