From a5645b6d3c17ddb55598bf53906cb66e5eedaa28 Mon Sep 17 00:00:00 2001
From: "Kristian H. Kristensen" <hoegsberg@chromium.org>
Date: Tue, 18 Dec 2018 10:48:01 -0800
Subject: [PATCH] drm/msm: Sparse cmd buffer dumping

Trace the command buffer to determine which parts of buffers to dump.

Signed-off-by: Kristian H. Kristensen <hoegsberg@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.h |   1 +
 drivers/gpu/drm/msm/msm_rd.c  | 247 +++++++++++++++++++++++++++++++---
 2 files changed, 227 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index c5d9bd3e47a8d..d0d0794f624fa 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -160,6 +160,7 @@ struct msm_gem_submit {
 		uint32_t flags;
 		struct msm_gem_object *obj;
 		uint64_t iova;
+		uint64_t start, end; /* Range to dump for rd. */
 	} bos[0];
 };
 
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 25fc161ab6181..3f0e5d675fdd1 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -51,6 +51,8 @@
 #include "msm_gpu.h"
 #include "msm_gem.h"
 
+#include "adreno/adreno_pm4.xml.h"
+
 static bool rd_full = false;
 MODULE_PARM_DESC(rd_full, "If true, $debugfs/.../rd will snapshot all buffer contents");
 module_param_named(rd_full, rd_full, bool, 0600);
@@ -353,6 +355,203 @@ static void snapshot_buf(struct msm_rd_state *rd,
 	msm_gem_put_vaddr(&obj->base);
 }
 
+static int
+find_bo(struct msm_gem_submit *submit, uint64_t iova)
+{
+	int i;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct msm_gem_object *obj = submit->bos[i].obj;
+
+		if (submit->bos[i].iova <= iova &&
+		    iova < submit->bos[i].iova + obj->base.size)
+			return i;
+	}
+
+	return -1;
+}
+
+static void
+update_bo_range(struct msm_gem_submit *submit, int i, uint64_t start, uint32_t end)
+{
+	if (start < submit->bos[i].start)
+		submit->bos[i].start = start;
+	if (submit->bos[i].end < end)
+		submit->bos[i].end = end;
+}
+
+static inline uint64_t
+get_offset(const uint32_t *p)
+{
+	return (uint64_t) p[0] | ((uint64_t) p[1] << 32);
+}
+
+
+static void
+mark_range(struct msm_gem_submit *submit, uint64_t iova, uint32_t length)
+{
+	struct msm_gem_object *obj;
+	uint64_t end;
+	int i;
+
+	i = find_bo(submit, iova);
+	if (i == -1) {
+		DRM_ERROR("range not found\n");
+		return;
+	}
+
+	obj = submit->bos[i].obj;
+	end = min(iova + length * 4, submit->bos[i].iova + obj->base.size);
+	update_bo_range(submit, i, iova, end);
+}
+
+struct cp_load_state6 {
+	uint32_t state_type;
+	uint32_t state_src;
+	uint32_t state_block;
+	uint32_t num_unit;
+	uint64_t address;
+};
+
+static inline struct cp_load_state6
+decode_cp_load_state6(const uint32_t *dw)
+{
+	return (struct cp_load_state6) {
+		.state_type	= FIELD(dw[1], CP_LOAD_STATE6_0_STATE_TYPE),
+		.state_src	= FIELD(dw[1], CP_LOAD_STATE6_0_STATE_SRC),
+		.state_block	= FIELD(dw[1], CP_LOAD_STATE6_0_STATE_BLOCK),
+		.num_unit	= FIELD(dw[1], CP_LOAD_STATE6_0_NUM_UNIT),
+		.address	= get_offset(&dw[2])
+	};
+}
+
+static void
+decode_ib(struct msm_gem_submit *submit, uint64_t iova, uint32_t length)
+{
+	struct msm_gem_object *obj;
+	const char *buf;
+	uint64_t end = iova + length * 4;
+	uint32_t *dwords;
+	int obj_index;
+	uint32_t i, j;
+
+	obj_index = find_bo(submit, iova);
+	if (obj_index == -1) {
+		DRM_ERROR("range not found\n");
+		return;
+	}
+
+	obj = submit->bos[obj_index].obj;
+	if (submit->bos[obj_index].iova + obj->base.size < end) {
+		DRM_ERROR("ib out of range for bo %p, truncating %lx vs %lx\n",
+			  obj, submit->bos[obj_index].iova + obj->base.size, end);
+		end = submit->bos[obj_index].iova;
+	}
+
+	update_bo_range(submit, obj_index, iova, end);
+
+	buf = msm_gem_get_vaddr_active(&obj->base);
+	if (IS_ERR(buf))
+		return;
+
+	dwords = (uint32_t *) (buf + (iova - submit->bos[obj_index].iova));
+	for (i = 0; i < length; i++) {
+		uint32_t pkt = dwords[i] >> 28;
+		uint32_t cnt = dwords[i] & 0x7f;
+
+		switch (pkt) {
+		case 7: {
+			uint32_t opc = (dwords[i] >> 16) & 0x7f;
+			switch(opc) {
+			case CP_INDIRECT_BUFFER: {
+				uint64_t ib_offset = get_offset(&dwords[i + 1]);
+				uint32_t ib_length = dwords[i + 3];
+
+				decode_ib(submit, ib_offset, ib_length);
+				break;
+			}
+			case CP_SET_DRAW_STATE: {
+				for (j = 0; j < cnt; j += 3) {
+					uint32_t group_length = dwords[i + 1 + j] & 0xffff;
+					uint64_t group_offset = get_offset(&dwords[i + 2 + j]);
+					if (group_length > 0)
+						decode_ib(submit, group_offset, group_length);
+
+				}
+				break;
+			}
+			case CP_LOAD_STATE6_FRAG:
+			case CP_LOAD_STATE6_GEOM: {
+				uint32_t num_dwords = 0;
+				const struct cp_load_state6 state = decode_cp_load_state6(&dwords[i]);
+
+				if (state.state_src != SS6_INDIRECT)
+					break;
+
+				if (state.state_type == ST6_CONSTANTS) {
+					switch (state.state_block) {
+					case SB6_VS_TEX:
+					case SB6_HS_TEX:
+					case SB6_DS_TEX:
+					case SB6_GS_TEX:
+					case SB6_FS_TEX:
+					case SB6_CS_TEX:
+						/* Textures, 16 dwords per sampler state */
+						num_dwords = state.num_unit * 16;
+						break;
+					case SB6_VS_SHADER:
+					case SB6_HS_SHADER:
+					case SB6_DS_SHADER:
+					case SB6_GS_SHADER:
+					case SB6_FS_SHADER:
+					case SB6_CS_SHADER:
+						/* Constants, 4 dwords per unit */
+						num_dwords = state.num_unit * 4;
+						break;
+					}
+				} else {
+					switch (state.state_block) {
+					case SB6_VS_TEX:
+					case SB6_HS_TEX:
+					case SB6_DS_TEX:
+					case SB6_GS_TEX:
+					case SB6_FS_TEX:
+					case SB6_CS_TEX:
+						/* Samplers, 4 dwords per sampler state */
+						num_dwords = state.num_unit * 4;
+						break;
+					case SB6_VS_SHADER:
+					case SB6_HS_SHADER:
+					case SB6_DS_SHADER:
+					case SB6_GS_SHADER:
+					case SB6_FS_SHADER:
+					case SB6_CS_SHADER:
+						/* Shaders, 32 dwords per unit */
+						num_dwords = state.num_unit * 32;
+						break;
+					}
+				}
+
+				mark_range(submit, state.address, num_dwords);
+			}
+			default:
+				break;
+			}
+			break;
+		}
+		case 4:
+			/* type 4 command, register write */
+			break;
+		default:
+			DRM_ERROR("  dword[%04x]: %08x unknown class\n", i, dwords[i]);
+			break;
+		}
+		i += cnt;
+	}
+
+	msm_gem_put_vaddr(&obj->base);
+}
+
 /* called under struct_mutex */
 void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
 		const char *fmt, ...)
@@ -360,6 +559,8 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
 	struct drm_device *dev = submit->dev;
 	struct task_struct *task;
 	char msg[256];
+	uint64_t main_iova;
+	uint32_t main_length, total_size, sparse_size;
 	int i, n;
 
 	if (!rd->open)
@@ -394,32 +595,36 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
 
 	rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
 
-	for (i = 0; rd_full && i < submit->nr_bos; i++)
-		snapshot_buf(rd, submit, i, 0, 0);
+	for (i = 0; i < submit->nr_bos; i++) {
+		submit->bos[i].start = ~0u;
+		submit->bos[i].end = 0;
+	}
 
 	for (i = 0; i < submit->nr_cmds; i++) {
-		uint64_t iova = submit->cmd[i].iova;
-		uint32_t szd  = submit->cmd[i].size; /* in dwords */
-
-		/* snapshot cmdstream bo's (if we haven't already): */
-		if (!rd_full) {
-			snapshot_buf(rd, submit, submit->cmd[i].idx,
-					submit->cmd[i].iova, szd * 4);
+		if (submit->cmd[i].type == MSM_SUBMIT_CMD_BUF) {
+			main_iova = submit->cmd[i].iova;
+			main_length = submit->cmd[i].size;
+			decode_ib(submit, submit->cmd[i].iova,
+				  submit->cmd[i].size);
+			break;
 		}
+	}
 
-		switch (submit->cmd[i].type) {
-		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
-			/* ignore IB-targets, we've logged the buffer, the
-			 * parser tool will follow the IB based on the logged
-			 * buffer/gpuaddr, so nothing more to do.
-			 */
-			break;
-		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
-		case MSM_SUBMIT_CMD_BUF:
-			rd_write_section(rd, RD_CMDSTREAM_ADDR,
-				(uint32_t[3]){ iova, szd, iova >> 32 }, 12);
-			break;
+	total_size = 0;
+	sparse_size = 0;
+	for (i = 0; i < submit->nr_bos; i++) {
+		total_size += submit->bos[i].obj->base.size;
+		if (submit->bos[i].start < submit->bos[i].end) {
+			snapshot_buf(rd, submit, i, submit->bos[i].start,
+				     submit->bos[i].end - submit->bos[i].start);
+			sparse_size += submit->bos[i].end - submit->bos[i].start;
 		}
 	}
+
+	DRM_ERROR("submit nr_bos %d, sparse dump, wrote %d bytes, full dump %d bytes\n",
+		  submit->nr_bos, sparse_size, total_size);
+
+	rd_write_section(rd, RD_CMDSTREAM_ADDR,
+			 (uint32_t[3]){ main_iova, main_length, main_iova >> 32 }, 12);
 }
 #endif
-- 
2.18.1

