From 3a460a14b9603159f10d89da27b559c36a184e27 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 25 Nov 2009 10:33:17 +1000 Subject: [PATCH] r600: refactor code to help future acceleration speedups. This changes the vertex buffer index to be an offset, and records the start of the vb for each operation and uses that to set the operations up. This still flushes after each operation to make sure we have no regressions in non-kms/kms cases. Signed-off-by: Dave Airlie --- src/r600_exa.c | 99 +++++++++++++++++++++++++--------------- src/r600_textured_videofuncs.c | 18 +++++-- src/r6xx_accel.c | 4 +- src/radeon.h | 3 +- src/radeon_kms.c | 6 ++- 5 files changed, 83 insertions(+), 47 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 4c63378..331711c 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -359,6 +359,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ErrorF("PM: 0x%08x\n", pm); #endif + accel_state->vb_start_op = accel_state->vb_offset; return TRUE; } @@ -371,12 +372,14 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) struct radeon_accel_state *accel_state = info->accel_state; float *vb; - if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 8)) > accel_state->vb_total) { R600DoneSolid(pPix); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)x1; vb[1] = (float)y1; @@ -387,7 +390,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) vb[4] = (float)x2; vb[5] = (float)y2; - accel_state->vb_index += 3; + accel_state->vb_offset += (3*8); } @@ -403,13 +406,13 @@ R600DoneSolid(PixmapPtr pPix) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - accel_state->vb_size = accel_state->vb_index * 8; + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -426,11 +429,12 @@ R600DoneSolid(PixmapPtr pPix) accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ + accel_state->vb_size -= accel_state->vb_start_op; vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = 8 / 4; vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); @@ -450,6 +454,8 @@ R600DoneSolid(PixmapPtr pPix) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->vb_start_op = 0; + R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -651,6 +657,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); + + accel_state->vb_start_op = accel_state->vb_offset; } static void @@ -664,13 +672,13 @@ R600DoCopy(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - accel_state->vb_size = accel_state->vb_index * 16; + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -687,11 +695,13 @@ R600DoCopy(ScrnInfoPtr pScrn) accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ + accel_state->vb_size -= accel_state->vb_start_op; + vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = 16 / 4; vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); @@ -710,6 +720,7 @@ R600DoCopy(ScrnInfoPtr pScrn) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->vb_start_op = 0; R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -723,12 +734,14 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, struct radeon_accel_state *accel_state = info->accel_state; float *vb; - if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) { R600DoCopy(pScrn); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -745,7 +758,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, vb[10] = (float)(srcX + w); vb[11] = (float)(srcY + h); - accel_state->vb_index += 3; + accel_state->vb_offset += (3 * 16); } static Bool @@ -1888,6 +1901,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); + accel_state->vb_start_op = accel_state->vb_offset; + return TRUE; } @@ -1906,12 +1921,14 @@ static void R600Composite(PixmapPtr pDst, srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ if (accel_state->msk_pic) { - if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 24)) > accel_state->vb_total) { R600DoneComposite(pDst); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -1934,13 +1951,16 @@ static void R600Composite(PixmapPtr pDst, vb[16] = (float)(maskX + w); vb[17] = (float)(maskY + h); + accel_state->vb_offset += 3 * 24; } else { - if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) { R600DoneComposite(pDst); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -1956,9 +1976,10 @@ static void R600Composite(PixmapPtr pDst, vb[9] = (float)(dstY + h); vb[10] = (float)(srcX + w); vb[11] = (float)(srcY + h); + accel_state->vb_offset += 3 * 16; + } - accel_state->vb_index += 3; } @@ -1973,30 +1994,13 @@ static void R600DoneComposite(PixmapPtr pDst) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - /* Vertex buffer setup */ - if (accel_state->msk_pic) { - accel_state->vb_size = accel_state->vb_index * 24; - vtx_res.id = SQ_VTX_RESOURCE_vs; - vtx_res.vtx_size_dw = 24 / 4; - vtx_res.vtx_num_entries = accel_state->vb_size / 4; - vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; - vtx_res.bo = accel_state->vb_bo; - } else { - accel_state->vb_size = accel_state->vb_index * 16; - vtx_res.id = SQ_VTX_RESOURCE_vs; - vtx_res.vtx_size_dw = 16 / 4; - vtx_res.vtx_num_entries = accel_state->vb_size / 4; - vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; - vtx_res.bo = accel_state->vb_bo; - } + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || (info->ChipFamily == CHIP_FAMILY_RV620) || @@ -2011,6 +2015,24 @@ static void R600DoneComposite(PixmapPtr pDst) accel_state->vb_size, accel_state->vb_mc_addr, accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); + accel_state->vb_size -= accel_state->vb_start_op; + + /* Vertex buffer setup */ + if (accel_state->msk_pic) { + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 24 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; + vtx_res.bo = accel_state->vb_bo; + } else { + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 16 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; + vtx_res.bo = accel_state->vb_bo; + } set_vtx_resource(pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -2027,8 +2049,8 @@ static void R600DoneComposite(PixmapPtr pDst) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->vb_start_op = 0; R600CPFlushIndirect(pScrn, accel_state->ib); - } Bool @@ -2336,6 +2358,9 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h); R600DoCopy(pScrn); + + if (info->cs) + radeon_cs_flush_indirect(pScrn); r = radeon_bo_map(scratch, 0); if (r) { diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 7598429..7d0cfa7 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -65,13 +65,13 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - accel_state->vb_size = accel_state->vb_index * 16; + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -88,11 +88,12 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ + accel_state->vb_size -= accel_state->vb_start_op; vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = 16 / 4; vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); @@ -111,6 +112,7 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + accel_state->vb_start_op = 0; R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -564,6 +566,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); + accel_state->vb_start_op = accel_state->vb_offset; + vs_alu_consts[0] = 1.0 / pPriv->w; vs_alu_consts[1] = 1.0 / pPriv->h; vs_alu_consts[2] = 0.0; @@ -595,12 +599,14 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) int dstX, dstY, dstw, dsth; float *vb; - if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) { R600DoneTexturedVideo(pScrn); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); dstX = pBox->x1 + dstxoff; dstY = pBox->y1 + dstyoff; @@ -632,7 +638,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) vb[10] = (float)(srcX + srcw); vb[11] = (float)(srcY + srch); - accel_state->vb_index += 3; + accel_state->vb_offset += 3 * 16; pBox++; } diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index a89bfb3..50afaed 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -86,6 +86,8 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) if (info->accel_state->vb_ptr) { radeon_bo_unmap(info->accel_state->vb_bo); info->accel_state->vb_ptr = NULL; + info->accel_state->vb_offset = 0; + info->accel_state->vb_start_op = 0; } if (CS_FULL(info->cs)) { radeon_cs_flush_indirect(pScrn); @@ -1183,8 +1185,8 @@ r600_vb_get(ScrnInfoPtr pScrn) accel_state->vb_total = (accel_state->ib->total / 2); accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + accel_state->vb_offset = 0; } - accel_state->vb_index = 0; return TRUE; } diff --git a/src/radeon.h b/src/radeon.h index 0dbaa52..745ee8e 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -703,12 +703,13 @@ struct radeon_accel_state { Bool vsync; drmBufPtr ib; - int vb_index; + int vb_offset; uint64_t vb_mc_addr; int vb_total; void *vb_ptr; uint32_t vb_size; struct radeon_bo *vb_bo; + uint32_t vb_start_op; // shader storage ExaOffscreenArea *shaders; diff --git a/src/radeon_kms.c b/src/radeon_kms.c index cafc329..b9228c1 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -80,8 +80,10 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn) return; if (info->accel_state->vb_ptr) { - radeon_bo_unmap(info->accel_state->vb_bo); - info->accel_state->vb_ptr = NULL; + radeon_bo_unmap(info->accel_state->vb_bo); + info->accel_state->vb_ptr = NULL; + info->accel_state->vb_start_op = 0; + info->accel_state->vb_offset = 0; } radeon_cs_emit(info->cs); -- 1.6.5.2