From 5985d18381bcded466d582eb9f989b8a503b178d Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 5 Jun 2012 15:59:03 -0400 Subject: [PATCH] r600g: add htile support v4 htile is used for HiZ and HiS support and fast Z/S clears. This commit just adds the htile setup and Fast Z clear. We don't take full advantage of HiS with that patch. v2 really use fast clear, still random issue with some tiles need to try more flush combination, fix depth/stencil texture decompression v3 fix random issue on r6xx/r7xx v4 rebase on top of lastest mesa, disable CB export when clearing htile surface to avoid wasting bandwidth Signed-off-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/evergreen_hw_context.c | 3 + src/gallium/drivers/r600/evergreen_state.c | 64 +++++-- src/gallium/drivers/r600/evergreend.h | 4 + src/gallium/drivers/r600/r600_blit.c | 32 +++ src/gallium/drivers/r600/r600_hw_context.c | 4 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/r600/r600_pipe.h | 9 +- src/gallium/drivers/r600/r600_resource.h | 5 + src/gallium/drivers/r600/r600_state.c | 39 +++- src/gallium/drivers/r600/r600_state_common.c | 17 ++- src/gallium/drivers/r600/r600_texture.c | 232 +++++++++++++++++++++++ src/gallium/drivers/r600/r600d.h | 5 + 12 files changed, 381 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 2ab29c9..193ce8b 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -63,6 +63,9 @@ static const struct r600_reg evergreen_context_reg_list[] = { {GROUP_FORCE_NEW_BLOCK, 0, 0}, {R_028058_DB_DEPTH_SIZE, 0, 0}, {R_02805C_DB_DEPTH_SLICE, 0, 0}, + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, + {R_028ABC_DB_HTILE_SURFACE, 0, 0}, + {R_028AC8_DB_PRELOAD_CONTROL, 0, 0}, {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index b618ca8..4b92e89 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -755,7 +755,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa); unsigned db_depth_control, alpha_test_control, alpha_ref; - unsigned db_render_control; struct r600_pipe_state *rstate; if (dsa == NULL) { @@ -803,9 +802,7 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, dsa->alpha_ref = alpha_ref; /* misc */ - db_render_control = 0; r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control); - r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control); return rstate; } @@ -1647,6 +1644,35 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta } } + rctx->db_misc_state.db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); + + /* hyperz */ + if (rtex->hyperz) { + uint64_t htile_offset = rtex->hyperz->surface.level[0].offset; + + z_info |= S_028040_TILE_SURFACE_ENABLE(1); + r600_pipe_state_add_reg_bo(rstate, R_028014_DB_HTILE_DATA_BASE, + htile_offset >> 8, &rtex->hyperz->resource, + RADEON_USAGE_READWRITE); + /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */ + rctx->db_misc_state.db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF) | + S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); + + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, fui(rtex->dclear_value)); + r600_pipe_state_add_reg(rstate, R_028ABC_DB_HTILE_SURFACE, rtex->db_htile_surface); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, rtex->db_preload_control); + if (rctx->db_misc_state.db_render_control & S_028000_DEPTH_CLEAR_ENABLE(1)) { + /* when clearing htile we need to make sure CB doesn't waste + * bandwidth exporting anything + */ + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, 0); + } + } + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); + r600_pipe_state_add_reg_bo(rstate, R_028040_DB_Z_INFO, z_info, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, @@ -1707,19 +1733,23 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ { struct radeon_winsys_cs *cs = rctx->cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; + unsigned db_render_override = a->db_render_override; + unsigned db_render_control = a->db_render_control; unsigned db_count_control = 0; - unsigned db_render_override = - S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); if (a->occlusion_query_enabled) { db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1); db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); } + if (a->flush_depthstencil_enabled) { + db_render_control |= S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1); + } r600_write_context_reg(cs, R_028004_DB_COUNT_CONTROL, db_count_control); r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); + r600_write_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control); } static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) @@ -1988,16 +2018,20 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */ r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */ - r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 1); r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ - r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ + + r600_store_context_reg_seq(cb, R_009830_DB_DEBUG, 3); + r600_store_value(cb, 0); /* R_009830_DB_DEBUG */ + r600_store_value(cb, 0); /* R_009834_DB_DEBUG2 */ + r600_store_value(cb, 0); /* R_009838_DB_DEBUG3 */ + r600_store_config_reg(cb, R_009854_DB_WATERMARKS, 0x00420204); r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3); + r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2); r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); @@ -2486,10 +2520,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F); r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); - r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3); + r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 2); r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ - r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00); @@ -2794,11 +2827,6 @@ void *evergreen_create_db_flush_dsa(struct r600_context *rctx) memset(&dsa, 0, sizeof(dsa)); rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); - r600_pipe_state_add_reg(rstate, - R_028000_DB_RENDER_CONTROL, - S_028000_DEPTH_COPY_ENABLE(1) | - S_028000_STENCIL_COPY_ENABLE(1) | - S_028000_COPY_CENTROID(1)); /* Don't set the 'is_flush' flag in r600_pipe_dsa, evergreen doesn't need it. */ return rstate; } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 3b6d730..c458d9e 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1580,6 +1580,10 @@ #define S_028008_SLICE_MAX(x) (((x) & 0x7FF) << 13) #define G_028008_SLICE_MAX(x) (((x) >> 13) & 0x7FF) #define C_028008_SLICE_MAX 0xFF001FFF +#define R_009830_DB_DEBUG 0x00009830 +#define R_009834_DB_DEBUG2 0x00009834 +#define R_009838_DB_DEBUG3 0x00009838 +#define R_009854_DB_WATERMARKS 0x00009854 #define R_02800C_DB_RENDER_OVERRIDE 0x0002800C #define V_02800C_FORCE_OFF 0 #define V_02800C_FORCE_ENABLE 1 diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 1122f3e..45332ac 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -24,6 +24,7 @@ #include "util/u_surface.h" #include "util/u_blitter.h" #include "util/u_format.h" +#include "r600d.h" enum r600_blitter_op /* bitmask */ { @@ -203,6 +204,37 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, struct r600_context *rctx = (struct r600_context *)ctx; struct pipe_framebuffer_state *fb = &rctx->framebuffer; + /* if hyperz enabled just clear hyperz */ + if (fb->zsbuf && (buffers & PIPE_CLEAR_DEPTHSTENCIL)) { + struct r600_resource_texture *rtex; + + rtex = (struct r600_resource_texture*)fb->zsbuf->texture; + if (rtex->hyperz) { +#if 1 + /* set clear value, as we use R600_CLEAR_SURFACE + * the framebuffer state will be reset with proper + * depth clear value + */ + rtex->dclear_value = depth; + + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); + rctx->db_misc_state.db_render_control = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + rctx->db_misc_state.db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1); + } + util_blitter_clear_depth_stencil(rctx->blitter, + fb->zsbuf, + buffers & PIPE_CLEAR_DEPTHSTENCIL, + depth, stencil, + 0, 0, + fb->zsbuf->texture->width0, + fb->zsbuf->texture->height0); + r600_blitter_end(ctx); +#endif + buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; + } + } + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, fb->nr_cbufs ? fb->cbufs[0]->format : PIPE_FORMAT_NONE, diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index a022436..088834a 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -367,7 +367,11 @@ static const struct r600_reg r600_context_reg_list[] = { {GROUP_FORCE_NEW_BLOCK, 0, 0}, {R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0}, {R_028A6C_VGT_GS_OUT_PRIM_TYPE, 0, 0}, + {R_02802C_DB_DEPTH_CLEAR, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0}, + {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0}, {R_028D24_DB_HTILE_SURFACE, 0, 0}, + {R_028D30_DB_PRELOAD_CONTROL, 0, 0}, {R_028D34_DB_PREFETCH_LIMIT, 0, 0}, {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index e0ee823..7004ff9 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -942,6 +942,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", TRUE); rscreen->glsl_feature_level = debug_get_bool_option("R600_GLSL130", TRUE) ? 130 : 120; + rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE); rscreen->global_pool = compute_memory_pool_new(1024*16, rscreen); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index f2865d2..8be17c1 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -76,9 +76,11 @@ struct r600_surface_sync_cmd { }; struct r600_db_misc_state { - struct r600_atom atom; - bool occlusion_query_enabled; - bool flush_depthstencil_enabled; + struct r600_atom atom; + bool occlusion_query_enabled; + bool flush_depthstencil_enabled; + unsigned db_render_control; + unsigned db_render_override; }; enum r600_pipe_state_id { @@ -132,6 +134,7 @@ struct r600_screen { struct r600_pipe_fences fences; bool use_surface_alloc; + bool use_hyperz; int glsl_feature_level; /*for compute global memory binding, we allocate stuff here, instead of diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index d401e40..e8f2c23 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -63,6 +63,11 @@ struct r600_resource_texture { struct r600_resource_texture *flushed_depth_texture; boolean is_flushing_texture; struct radeon_surface surface; + unsigned db_prefetch_limit; + unsigned db_htile_surface; + unsigned db_preload_control; + float dclear_value; + struct r600_resource_texture *hyperz; }; #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 124eba2..8427ca3 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1559,6 +1559,7 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate, struct r600_resource_texture *rtex; struct r600_surface *surf; unsigned level, pitch, slice, format, offset, array_mode; + unsigned db_depth_info; if (state->zsbuf == NULL) return; @@ -1602,6 +1603,30 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate, } format = r600_translate_dbformat(state->zsbuf->texture->format); + db_depth_info = S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format); + rctx->db_misc_state.db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + + /* hyperz */ + if (rtex->hyperz) { + uint64_t htile_offset = rtex->hyperz->surface.level[0].offset; + + db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1); + r600_pipe_state_add_reg_bo(rstate, R_028014_DB_HTILE_DATA_BASE, + htile_offset >> 8, &rtex->hyperz->resource, + RADEON_USAGE_READWRITE); + /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */ + rctx->db_misc_state.db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF) | + S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, fui(rtex->dclear_value)); + r600_pipe_state_add_reg(rstate, R_028D24_DB_HTILE_SURFACE, rtex->db_htile_surface); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, rtex->db_preload_control); + r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, rtex->db_prefetch_limit); + } + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); r600_pipe_state_add_reg_bo(rstate, R_02800C_DB_DEPTH_BASE, offset >> 8, &rtex->resource, RADEON_USAGE_READWRITE); @@ -1615,8 +1640,8 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate, S_028004_SLICE_MAX(state->zsbuf->u.tex.last_layer)); } r600_pipe_state_add_reg_bo(rstate, R_028010_DB_DEPTH_INFO, - S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format), - &rtex->resource, RADEON_USAGE_READWRITE); + db_depth_info, + &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, (surf->aligned_height / 8) - 1); } @@ -1677,11 +1702,8 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom { struct radeon_winsys_cs *cs = rctx->cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; - unsigned db_render_control = 0; - unsigned db_render_override = - S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + unsigned db_render_control = a->db_render_control; + unsigned db_render_override = a->db_render_override; if (a->occlusion_query_enabled) { if (rctx->chip_class >= R700) { @@ -2144,9 +2166,8 @@ void r600_init_atom_start_cs(struct r600_context *rctx) r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); - r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 1); r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ - r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */ r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3); r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 00e1bd0..0f09305 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -802,8 +802,16 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info.primitive_restart); r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance); r600_pipe_state_add_reg(&rctx->vgt, R_028A0C_PA_SC_LINE_STIPPLE, 0); - if (rctx->chip_class <= R700) - r600_pipe_state_add_reg(&rctx->vgt, R_028808_CB_COLOR_CONTROL, rctx->cb_color_control); + if (rctx->chip_class <= R700) { + if (rctx->db_misc_state.db_render_control & S_028D0C_DEPTH_CLEAR_ENABLE(1)) { + /* when clearing htile we need to make sure CB doesn't waste + * bandwidth exporting anything + */ + r600_pipe_state_add_reg(&rctx->vgt, R_028808_CB_COLOR_CONTROL, 0); + } else { + r600_pipe_state_add_reg(&rctx->vgt, R_028808_CB_COLOR_CONTROL, rctx->cb_color_control); + } + } r600_pipe_state_add_reg(&rctx->vgt, R_02881C_PA_CL_VS_OUT_CNTL, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028810_PA_CL_CLIP_CNTL, 0); } @@ -883,8 +891,9 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING; - if (rctx->framebuffer.zsbuf) - { + /* clear hyperz */ + rctx->db_misc_state.db_render_control = 0; + if (rctx->framebuffer.zsbuf) { struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture; ((struct r600_resource_texture *)tex)->dirty_db = TRUE; } diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 5b15990..3820369 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -478,6 +478,10 @@ static void r600_texture_destroy(struct pipe_screen *screen, if (rtex->stencil) pipe_resource_reference((struct pipe_resource **)&rtex->stencil, NULL); + if (rtex->hyperz) { + pipe_resource_reference((struct pipe_resource **)&rtex->hyperz, NULL); + } + pb_reference(&resource->buf, NULL); FREE(rtex); } @@ -494,6 +498,179 @@ static const struct u_resource_vtbl r600_texture_vtbl = NULL /* transfer_inline_write */ }; +static void r600_htile_settings(struct r600_screen *rscreen, + struct r600_resource_texture *zbuf, + struct radeon_surface *hsurface) +{ + unsigned max_pixels_per_DB; + unsigned width_per_DB; + unsigned npipes = rscreen->info.r600_num_tile_pipes; + const unsigned k = 1024; + + max_pixels_per_DB = (hsurface->npix_x * hsurface->npix_y) / rscreen->info.r600_num_backends; + + zbuf->db_prefetch_limit = (hsurface->npix_y / 8) - 1; + zbuf->db_preload_control = 0; + + /* eg is always 8x8 */ + if (rscreen->family >= CHIP_CEDAR) { + zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | S_028D24_HTILE_HEIGHT(1); + } + zbuf->db_htile_surface |= S_028D24_PRELOAD(1); + + if (max_pixels_per_DB <= 64 * k) { + zbuf->db_htile_surface |= S_028D24_LINEAR(1); + } else if (max_pixels_per_DB <= 128 * k) { + zbuf->db_htile_surface |= S_028D24_FULL_CACHE(1) | S_028D24_LINEAR(1); + } else if (max_pixels_per_DB <= 256 * k) { + zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | + S_028D24_FULL_CACHE(1) | + S_028D24_LINEAR(1); + } else if (max_pixels_per_DB <= 512 * k) { + zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | + S_028D24_HTILE_HEIGHT(1) | + S_028D24_FULL_CACHE(1) | + S_028D24_LINEAR(1); + } else { + unsigned npix_x, npix_y, startx, starty; + float a, b; + + /* htile size is x*y = 8 * 8 * 8k * num_db and we keep + * aspect ratio so x/y == zbuffer_x/ zbuffer_y + */ + a = ((float)hsurface->npix_x) / ((float)hsurface->npix_y); + b = 8 * 8 * 8 * k * rscreen->info.r600_num_backends; + npix_y = ((unsigned)sqrt(b / a)) & ~7; + npix_x = ((unsigned)(b / ((float)npix_y))) & ~7; + startx = (hsurface->npix_x - npix_x) >> 6; + starty = (hsurface->npix_y - npix_y) >> 6; + zbuf->db_preload_control = S_028D30_START_X(startx) | + S_028D30_START_Y(starty) | + S_028D30_MAX_X(startx + (npix_x >> 5)) | + S_028D30_MAX_Y(starty + (npix_y >> 5)); + zbuf->db_prefetch_limit = (npix_y / 8) - 1; + /* r6xx, r7xx have issue with preload window, don't use it */ + if (rscreen->family >= CHIP_CEDAR) { + zbuf->db_htile_surface |= S_028D24_HTILE_USES_PRELOAD_WIN(1); + } + + width_per_DB = hsurface->npix_x / rscreen->info.r600_num_backends; + if (width_per_DB <= 512) { + zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | + S_028D24_HTILE_HEIGHT(1) | + S_028D24_FULL_CACHE(1) | + S_028D24_PREFETCH_WIDTH(16) | + S_028D24_PREFETCH_HEIGHT(4); + } else if (width_per_DB <= 1024) { + zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | + S_028D24_HTILE_HEIGHT(1) | + S_028D24_FULL_CACHE(1) | + S_028D24_PREFETCH_WIDTH(16) | + S_028D24_PREFETCH_HEIGHT(2); + } else { + zbuf->db_htile_surface |= S_028D24_HTILE_WIDTH(1) | + S_028D24_HTILE_HEIGHT(1) | + S_028D24_FULL_CACHE(1) | + S_028D24_PREFETCH_WIDTH(16); + } + } + + if (G_028D24_LINEAR(zbuf->db_htile_surface)) { + /* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */ + hsurface->npix_x = align(hsurface->npix_x, 16 * 8); + /* hsurface->npix_y is npipes htiles aligned == npipes * 8 pixel aligned */ + hsurface->npix_y = align(hsurface->npix_y, npipes * 8); + } else { + unsigned tmp; + + /* htile widht & hsurface->npix_y (8 or 4) make 2 bits number */ + tmp = zbuf->db_htile_surface & 3; + /* align is htile align * 8, htile align vary according to + * number of pipe and tile width and hsurface->npix_y + */ + switch (npipes) { + case 8: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + hsurface->npix_x = align(hsurface->npix_x, 64 * 8); + hsurface->npix_y = align(hsurface->npix_y, 64 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 64 * 8); + hsurface->npix_y = align(hsurface->npix_y, 32 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 32 * 8); + hsurface->npix_y = align(hsurface->npix_y, 32 * 8); + break; + default: + break; + } + break; + case 4: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + hsurface->npix_x = align(hsurface->npix_x, 64 * 8); + hsurface->npix_y = align(hsurface->npix_y, 32 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 32 * 8); + hsurface->npix_y = align(hsurface->npix_y, 32 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 32 * 8); + hsurface->npix_y = align(hsurface->npix_y, 16 * 8); + break; + default: + break; + } + break; + case 2: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + hsurface->npix_x = align(hsurface->npix_x, 32 * 8); + hsurface->npix_y = align(hsurface->npix_y, 32 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 32 * 8); + hsurface->npix_y = align(hsurface->npix_y, 16 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 16 * 8); + hsurface->npix_y = align(hsurface->npix_y, 16 * 8); + break; + default: + break; + } + break; + case 1: + switch (tmp) { + case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + hsurface->npix_x = align(hsurface->npix_x, 32 * 8); + hsurface->npix_y = align(hsurface->npix_y, 16 * 8); + break; + case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ + case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 16 * 8); + hsurface->npix_y = align(hsurface->npix_y, 16 * 8); + break; + case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ + hsurface->npix_x = align(hsurface->npix_x, 16 * 8); + hsurface->npix_y = align(hsurface->npix_y, 8 * 8); + break; + default: + break; + } + break; + default: + break; + } + } +} + static struct r600_resource_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -520,6 +697,7 @@ r600_texture_create_object(struct pipe_screen *screen, resource->b.b.screen = screen; rtex->pitch_override = pitch_in_bytes_override; rtex->real_format = base->format; + rtex->hyperz = NULL; /* We must split depth and stencil into two separate buffers on Evergreen. */ if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) && @@ -579,6 +757,60 @@ r600_texture_create_object(struct pipe_screen *screen, } } + if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) && + util_format_is_depth_or_stencil(base->format) && + rscreen->use_surface_alloc && + rscreen->use_hyperz && + rscreen->info.drm_minor >= 14 && + base->target == PIPE_TEXTURE_2D) { + struct pipe_resource hyperz; + struct radeon_surface hsurface; + void *ptr; + + /* Allocate the hyperz buffer. */ + hyperz = *base; + hyperz.format = PIPE_FORMAT_A8R8G8B8_UNORM; + hsurface = *surface; + hsurface.npix_x = rtex->surface.level[0].pitch_bytes / rtex->surface.bpe; + hsurface.npix_y = rtex->surface.level[0].nblk_y * rtex->surface.blk_h; + hsurface.npix_x = align(hsurface.npix_x, 8); + hsurface.npix_y = align(hsurface.npix_y, 8); + hsurface.blk_w = 4; + hsurface.blk_h = 4; + hsurface.bpe = 4; + hsurface.flags = RADEON_SURF_CLR(hsurface.flags, MODE); + r600_htile_settings(rscreen, rtex, &hsurface); + if (G_028D24_HTILE_WIDTH(rtex->db_htile_surface)) { + hsurface.blk_w = 8; + } + if (G_028D24_HTILE_HEIGHT(rtex->db_htile_surface)) { + hsurface.blk_h = 8; + } + hyperz.width0 = hsurface.npix_x / hsurface.blk_w; + hyperz.height0 = hsurface.npix_y / hsurface.blk_h; + hyperz.last_level = 0; + hyperz.nr_samples = 1; + hyperz.bind = PIPE_BIND_RENDER_TARGET; + hyperz.flags = 0; + + rtex->hyperz = r600_texture_create_object(screen, &hyperz, array_mode, 0, + max_buffer_size, NULL, TRUE, &hsurface); + if (!rtex->hyperz) { + FREE(rtex); + return NULL; + } + + /* this is ugly but it's needed so that hyperz works without + * glitch. Otherwise various tile will have wrong hyperz value. + * memset could be replaced with a blit. Value use to memset + * doesn't matter it could be random. + */ + ptr = rscreen->ws->buffer_map((void*)rtex->hyperz->resource.buf, NULL, PB_USAGE_UNSYNCHRONIZED); + if (ptr) { + memset(ptr, 0, hyperz.width0 * hyperz.height0 * 4); + } + } + /* If we initialized separate stencil for Evergreen. place it after depth. */ if (rtex->stencil) { unsigned stencil_align, stencil_offset; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 933d99e..04a42b9 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -659,6 +659,7 @@ #define S_028004_SLICE_MAX(x) (((x) & 0x7FF) << 13) #define G_028004_SLICE_MAX(x) (((x) >> 13) & 0x7FF) #define C_028004_SLICE_MAX 0xFF001FFF +#define R_028014_DB_HTILE_DATA_BASE 0x028014 #define R_028D24_DB_HTILE_SURFACE 0x028D24 #define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0) #define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1) @@ -2249,6 +2250,10 @@ #define R_028D10_DB_RENDER_OVERRIDE 0x028D10 #define R_028D2C_DB_SRESULTS_COMPARE_STATE1 0x028D2C #define R_028D30_DB_PRELOAD_CONTROL 0x028D30 +#define S_028D30_START_X(x) (((x) & 0xff) << 0) +#define S_028D30_START_Y(x) (((x) & 0xff) << 8) +#define S_028D30_MAX_X(x) (((x) & 0xff) << 16) +#define S_028D30_MAX_Y(x) (((x) & 0xff) << 24) #define R_028D44_DB_ALPHA_TO_MASK 0x028D44 #define R_028868_SQ_PGM_RESOURCES_VS 0x028868 #define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -- 1.7.7.6