From ecb84d5df4ece6b417edf392035f6c7c4321c372 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 11 Jun 2011 17:05:08 +1000 Subject: [PATCH] drm/radeon/kms: attempt to avoid copying data twice on coherent cards. On coherent systems (not-AGP) the IB should be in cached memory so should be just as fast, so we can avoid copying to temporary pages and just use it directly. This patch is broken in some way though. --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_cs.c | 48 ++++++++++++++++++++++++----------- drivers/gpu/drm/radeon/radeon_drv.c | 4 +++ 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ba643b5..596780b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -93,6 +93,7 @@ extern int radeon_audio; extern int radeon_disp_priority; extern int radeon_hw_i2c; extern int radeon_pcie_gen2; +extern int radeon_copy1; /* * Copy from radeon_drv.h so we don't have to include both and have conflicting diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index fae00c0..3033836 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -156,12 +156,14 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EFAULT; } } else { - p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); - p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) { - kfree(p->chunks[i].kpage[0]); - kfree(p->chunks[i].kpage[1]); - return -ENOMEM; + if (p->rdev->flags & RADEON_IS_AGP || !radeon_copy1) { + p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); + p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) { + kfree(p->chunks[i].kpage[0]); + kfree(p->chunks[i].kpage[1]); + return -ENOMEM; + } } p->chunks[i].kpage_idx[0] = -1; p->chunks[i].kpage_idx[1] = -1; @@ -207,8 +209,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) kfree(parser->relocs_ptr); for (i = 0; i < parser->nchunks; i++) { kfree(parser->chunks[i].kdata); - kfree(parser->chunks[i].kpage[0]); - kfree(parser->chunks[i].kpage[1]); + if (parser->rdev->flags & RADEON_IS_AGP || !radeon_copy1) { + kfree(parser->chunks[i].kpage[0]); + kfree(parser->chunks[i].kpage[1]); + } } kfree(parser->chunks); kfree(parser->chunks_array); @@ -306,6 +310,11 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; int i; int size = PAGE_SIZE; + void *ptr; + bool copy1 = radeon_copy1; + + if (p->rdev->flags & RADEON_IS_AGP) + copy1 = false; for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)), @@ -316,23 +325,32 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) } } - new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; - if (pg_idx == ibc->last_page_index) { size = (ibc->length_dw * 4) % PAGE_SIZE; - if (size == 0) - size = PAGE_SIZE; + if (size == 0) + size = PAGE_SIZE; + } + + if (!copy1) { + new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; + ptr = ibc->kpage[new_page]; + } else { + new_page = 0; + ptr = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4)); + ibc->kpage[new_page] = ptr; } - if (DRM_COPY_FROM_USER(ibc->kpage[new_page], + if (DRM_COPY_FROM_USER(ptr, ibc->user_ptr + (pg_idx * PAGE_SIZE), size)) { p->parser_error = -EFAULT; return 0; } - /* copy to IB here */ - memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); + if (!copy1) { + /* copy to IB here */ + memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); + } ibc->last_copied_page = pg_idx; ibc->kpage_idx[new_page] = pg_idx; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 73dfbe8..6230cc0 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -117,6 +117,7 @@ int radeon_audio = 0; int radeon_disp_priority = 0; int radeon_hw_i2c = 0; int radeon_pcie_gen2 = 0; +int radeon_copy1 = 0; MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers"); module_param_named(no_wb, radeon_no_wb, int, 0444); @@ -163,6 +164,9 @@ module_param_named(hw_i2c, radeon_hw_i2c, int, 0444); MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (1 = enable)"); module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444); +MODULE_PARM_DESC(copy1, "One Copy test"); +module_param_named(copy1, radeon_copy1, int, 0644); + static int radeon_suspend(struct drm_device *dev, pm_message_t state) { drm_radeon_private_t *dev_priv = dev->dev_private; -- 1.7.5.2