From 4ababc9e160e1b44654353fc44750320cd9c235d Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Thu, 17 Jan 2013 13:14:40 -0500
Subject: [PATCH] drm/radeon: keep original user requested placement around

This is to improve performance, the original bo placement is preserved
as much as possible and we don't try to ping pong move bo btw different
placement at each command stream submission.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  5 +-
 drivers/gpu/drm/radeon/radeon_device.c | 11 ++++
 drivers/gpu/drm/radeon/radeon_object.c | 94 ++++++++++++++++------------------
 drivers/gpu/drm/radeon/radeon_ttm.c    | 21 ++++++--
 4 files changed, 73 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ce85cf5..22caefe 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -318,7 +318,7 @@ struct radeon_bo {
 	/* Protected by gem.mutex */
 	struct list_head		list;
 	/* Protected by tbo.reserved */
-	u32				placements[3];
+	u32				placements;
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
@@ -1627,6 +1627,8 @@ struct radeon_device {
 	/* ACPI interface */
 	struct radeon_atif		atif;
 	struct radeon_atcs		atcs;
+	/* default bo placements shared by all bo */
+	u32				placements[2];
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -1861,7 +1863,6 @@ extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
 extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index cd75626..ab7473d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1031,6 +1031,17 @@ int radeon_device_init(struct radeon_device *rdev,
 		radeon_agp_disable(rdev);
 	}
 
+	/* default placement share by all bo */
+	rdev->placements[0] = TTM_PL_FLAG_TT;
+	rdev->placements[1] = TTM_PL_FLAG_SYSTEM;
+	if (rdev->flags & RADEON_IS_AGP && radeon_agpmode != -1) {
+		rdev->placements[0] |= TTM_PL_FLAG_WC;
+		rdev->placements[1] |= TTM_PL_FLAG_WC;
+	} else {
+		rdev->placements[0] |= TTM_PL_FLAG_CACHED;
+		rdev->placements[1] |= TTM_PL_FLAG_CACHED;
+	}
+
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d3aface..785b17e 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -77,37 +77,6 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
-{
-	u32 c = 0;
-
-	rbo->placement.fpfn = 0;
-	rbo->placement.lpfn = 0;
-	rbo->placement.placement = rbo->placements;
-	rbo->placement.busy_placement = rbo->placements;
-	if (domain & RADEON_GEM_DOMAIN_VRAM)
-		rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
-					TTM_PL_FLAG_VRAM;
-	if (domain & RADEON_GEM_DOMAIN_GTT) {
-		if (rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
-		} else {
-			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
-		}
-	}
-	if (domain & RADEON_GEM_DOMAIN_CPU) {
-		if (rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM;
-		} else {
-			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
-		}
-	}
-	if (!c)
-		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-	rbo->placement.num_placement = c;
-	rbo->placement.num_busy_placement = c;
-}
-
 int radeon_bo_create(struct radeon_device *rdev,
 		     unsigned long size, int byte_align, bool kernel, u32 domain,
 		     struct sg_table *sg, struct radeon_bo **bo_ptr)
@@ -146,7 +115,19 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->surface_reg = -1;
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
-	radeon_ttm_placement_from_domain(bo, domain);
+
+	if (domain & RADEON_GEM_DOMAIN_VRAM) {
+		bo->placements = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+	} else {
+		bo->placements = rdev->placements[0];
+	}
+	bo->placement.fpfn = 0;
+	bo->placement.lpfn = 0;
+	bo->placement.placement = &bo->placements;
+	bo->placement.busy_placement = &rdev->placements[0];
+	bo->placement.num_placement = 1;
+	bo->placement.num_busy_placement = 2;
+
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -154,6 +135,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 			acc_size, sg, &radeon_ttm_bo_destroy);
 	up_read(&rdev->pm.mclk_lock);
 	if (unlikely(r != 0)) {
+DRM_INFO("%s failed 0x%08x VRAM %d\n", __func__, size, !!(domain & RADEON_GEM_DOMAIN_VRAM));
 		return r;
 	}
 	*bo_ptr = bo;
@@ -214,7 +196,7 @@ void radeon_bo_unref(struct radeon_bo **bo)
 int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 			     u64 *gpu_addr)
 {
-	int r, i;
+	int r;
 
 	if (bo->pin_count) {
 		bo->pin_count++;
@@ -234,11 +216,21 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
+
+	bo->placement.fpfn = 0;
+	bo->placement.lpfn = 0;
+	bo->placement.placement = &bo->placements;
+	bo->placement.busy_placement = NULL;
+	bo->placement.num_placement = 1;
+	bo->placement.num_busy_placement = 0;
 	if (domain == RADEON_GEM_DOMAIN_VRAM) {
 		/* force to pin into visible video ram */
 		bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+		bo->placements = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+	} else {
+		bo->placements = bo->rdev->placements[0];
 	}
+
 	if (max_offset) {
 		u64 lpfn = max_offset >> PAGE_SHIFT;
 
@@ -248,8 +240,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 		if (lpfn < bo->placement.lpfn)
 			bo->placement.lpfn = lpfn;
 	}
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+	bo->placements |= TTM_PL_FLAG_NO_EVICT;
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		bo->pin_count = 1;
@@ -268,7 +259,7 @@ int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
 
 int radeon_bo_unpin(struct radeon_bo *bo)
 {
-	int r, i;
+	int r;
 
 	if (!bo->pin_count) {
 		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
@@ -277,8 +268,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
 	bo->pin_count--;
 	if (bo->pin_count)
 		return 0;
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+	bo->placements &= ~TTM_PL_FLAG_NO_EVICT;
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r != 0))
 		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
@@ -350,7 +340,6 @@ int radeon_bo_list_validate(struct list_head *head)
 {
 	struct radeon_bo_list *lobj;
 	struct radeon_bo *bo;
-	u32 domain;
 	int r;
 
 	r = ttm_eu_reserve_buffers(head);
@@ -360,17 +349,15 @@ int radeon_bo_list_validate(struct list_head *head)
 	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->bo;
 		if (!bo->pin_count) {
-			domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
-			
-		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
-			r = ttm_bo_validate(&bo->tbo, &bo->placement,
-						true, false);
+			bo->placement.fpfn = 0;
+			bo->placement.lpfn = 0;
+			bo->placement.placement = &bo->placements;
+			bo->placement.busy_placement = &bo->rdev->placements[0];
+			bo->placement.num_placement = 1;
+			bo->placement.num_busy_placement = 1;
+
+			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 			if (unlikely(r)) {
-				if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
-					domain |= RADEON_GEM_DOMAIN_GTT;
-					goto retry;
-				}
 				return r;
 			}
 		}
@@ -583,8 +570,13 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		offset = bo->mem.start << PAGE_SHIFT;
 		if ((offset + size) > rdev->mc.visible_vram_size) {
 			/* hurrah the memory is not visible ! */
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
+			rbo->placement.fpfn = 0;
 			rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
+			rbo->placements = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+			rbo->placement.placement = &rbo->placements;
+			rbo->placement.busy_placement = &rdev->placements[0];
+			rbo->placement.num_placement = 1;
+			rbo->placement.num_busy_placement = 2;
 			r = ttm_bo_validate(bo, &rbo->placement, false, false);
 			if (unlikely(r != 0))
 				return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1d8ff2f..fc23419 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -188,14 +188,25 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
-		else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		rbo->placement.fpfn = 0;
+		rbo->placement.lpfn = 0;
+		rbo->placement.placement = &rbo->rdev->placements[0];
+		rbo->placement.busy_placement = &rbo->rdev->placements[1];
+		rbo->placement.num_placement = 1;
+		rbo->placement.num_busy_placement = 1;
+		if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false) {
+			rbo->placement.placement = &rbo->rdev->placements[1];
+		}
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		rbo->placement.fpfn = 0;
+		rbo->placement.lpfn = 0;
+		rbo->placement.placement = &rbo->rdev->placements[1];
+		rbo->placement.busy_placement = &rbo->rdev->placements[1];
+		rbo->placement.num_placement = 1;
+		rbo->placement.num_busy_placement = 1;
+		break;
 	}
 	*placement = rbo->placement;
 }
-- 
1.7.11.7

