==== r6xx/r7xx ==== 1. Write Linear (r6xx/r7xx) DW 1 [31:28] - command = 0010b [23] - 0 [15:0] - count (DW) DW 2 [31:2] - dest addr [31:2] DW 3 [9:8] - swap [7:0] - dest addr [39:32] DW 4..N [31:0] - data 2. Write Tiled (r6xx/r7xx) DW 1 [31:28] - command = 0010b [23] - 1 [15:0] - count (DW) DW 2 [31:0] - base_256 [39:8] - tiled surface address DW 3 [31] - 0 [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:10] - height_max - height minus 1 of linear surface [9:0] - pitch_tile_max DW 4 [31:12] - slice_tile_max [10:0] - z DW 5 [29:17] - y [15:3] - x [2:1] - swap DW 6..n [31:0] - data 3a. Copy Linear (r6xx) DW 1 [31:28] - command = 0011b [23] - 0 [15:0] - count (DW, must be even) DW 2 [31:2] - dest addr [31:2] DW 3 [31:2] - src addr [31:2] DW 4 [25:24] - swap [23:16] - dest addr [39:32] [9:8] - swap [7:0] - src addr [39:32] 3b. Copy Linear (r7xx) DW 1 [31:28] - command = 0011b [23] - 0 [15:0] - count (DW) DW 2 [31:2] - dest addr [31:2] DW 3 [31:2] - src addr [31:2] DW 4 [9:8] - swap [7:0] - dest addr [39:32] DW 5 [9:8] - swap [7:0] - src addr [39:32] 4. Copy Tiled (r6xx/r7xx) DW 1 [31:28] - command = 0011b [23] - 1 [15:0] - count (DW) DW 2 [31:0] - base_256 [39:8] - tiled surface address DW 3 [31] - detile (1 = T2L, 0 = L2T) [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:10] - height_max - height minus 1 of linear surface [9:0] - pitch_tile_max DW 4 [31:12] - slice_tile_max [10:0] - z DW 5 [29:17] - y [15:3] - x [2:1] - swap DW 6 [31:2] - addr [31:2] - linear surface address DW 7 [9:8] - swap [7:0] - addr [39:32] - linear surface address 5. Indirect Buffer (r6xx/r7xx) DW 1 [31:28] - command = 0100b [15:0] - count = 0 DW 2 [31:8] - IB base [31:8] DW 3 [31:16] - IB size (DW) [7:0] - IB base [39:32] 6. Semaphore (r6xx/r7xx) DW 1 [31:28] - command = 0101b [22] - signal [15:0] - count = 0 DW 2 [31:3] - semaphore addr [31:3] DW 3 [7:0] - semaphore addr [39:32] 7. Fence (r6xx/r7xx) DW 1 [31:28] - command = 0110b [15:0] - count = 0 DW 2 [31:2] - fence addr [31:2] DW 3 [7:0] - fence addr [39:32] 8. Trap (r6xx/r7xx) DW 1 [31:28] - command = 0111b [15:0] - count = 0 9. No-Op (r6xx/r7xx) DW 1 [31:28] - command = 1111b [15:0] - count = 0 10. Constant Fill (r7xx) DW 1 [31:28] - command = 1101b [23] - 0 [15:0] - count (DW) DW 2 [31:2] - dst addr [31:2] DW 3 [31:0] - constant data DW 4 [23:16] - dst addr [39:32] ==== evergreen/ni/si ==== 1. Write Linear (evergreen/ni/si) DW 1 [31:28] - command = 0010b [23] - 0 [19:0] - count (DW) DW 2 [31:2] - dest addr [31:2] DW 3 [9:8] - swap [7:0] - dest addr [39:32] DW 4..N [31:0] - data 2. Write Tiled (evergreen/ni/si) DW 1 [31:28] - command = 0010b [23] - 1 [19:0] - count (DW) DW 2 [31:0] - base_256 [39:8] - tiled surface address DW 3 [31] - 0 [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 4 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 5 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 6 [28:18] - z [13:0] - x DW 7 [31:30] - swap [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [13:0] - y DW 8..n [31:0] - data 3. Write PTE/PDE (ni/si) DW 1 [31:28] - command = 0010b [27:20] - 01000010b [19:0] - count (DW) DW 2 [31:2] - dest addr [31:2] DW 3 [7:0] - dest addr [39:32] DW 4 [31:0] - mask [31:0] DW 5 [31:0] - mask [63:32] DW 6 [31:0] - value [31:0] DW 7 [31:0] - value [63:32] DW 8 [31:0] - increment [31:0] DW 9 [31:0] - increment [63:32] 4. Copy L2L, DW aligned (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 00000000b [19:0] - count (DW) DW 2 [31:2] - dest addr [31:2] DW 3 [31:2] - src addr [31:2] DW 4 [9:8] - swap [7:0] - dest addr [39:32] DW 5 [9:8] - swap [7:0] - src addr [39:32] 5. Copy L2L, byte aligned (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01000000b [19:0] - count (bytes) DW 2 [31:0] - dest addr [31:0] DW 3 [31:0] - src addr [31:0] DW 4 [9:8] - swap [7:0] - dest addr [39:32] DW 5 [9:8] - swap [7:0] - src addr [39:32] 6. Copy L2L, DW aligned, broadcast (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01000100b [19:0] - count (DW) DW 2 [31:2] - dest addr1 [31:2] DW 3 [31:2] - dest addr2 [31:2] DW 4 [31:2] - src addr [31:2] DW 5 [9:8] - swap [7:0] - dest addr1 [39:32] DW 6 [9:8] - swap [7:0] - dest addr2 [39:32] DW 7 [9:8] - swap [7:0] - src addr [39:32] 7. Copy L2L, partial (ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01000001b [19:0] - count = 0 DW 2 [31:2] - src addr [31:2] DW 3 [31:13] - src pitch (bytes) [9:8] - swap [7:0] - src addr [39:32] DW 4 [31:0] - src slice pitch (bytes) DW 5 [31:2] - dest addr [31:2] DW 6 [31:13] - dest pitch (bytes) [9:8] - swap [7:0] - dest addr [39:32] DW 7 [31:0] - dest slice pitch (bytes) DW 8 [29:16] - dy (pixels) [13:0] - dx (pixels) DW 9 [31:29] - size - log2 bytes per pixel [10:0] - dz (pixels) 8. Copy L2T/T2L (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 00001000b [19:0] - count (DW) DW 2 [31:0] - base_256 [39:8] - tiled surface address DW 3 [31] - detile (1 = T2L, 0 = L2T) [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 4 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 5 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 6 [28:18] - z [13:0] - x DW 7 [31:30] - swap [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [13:0] - y DW 8 [31:2] - addr [31:2] - linear surface address DW 9 [31:30] - swap [7:0] - addr [39:32] - linear surface address 9. Copy L2T Frame to Field (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01001000b [19:0] - count (DW) DW 2 [31:0] - odd_base_256 [39:8] - tiled surface address DW 3 [31:0] - even_base_256 [39:8] - tiled surface address DW 4 [31] - 0 [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 5 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 6 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 7 [28:18] - z [13:0] - x DW 8 [31:30] - swap [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [13:0] - y DW 9 [31:2] - addr [31:2] - linear surface address DW 10 [31:30] - swap [7:0] - addr [39:32] - linear surface address 10. Copy L2T broadcast (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01001011b [19:0] - count (DW) DW 2 [31:0] - dest0_base_256 [39:8] - tiled surface address DW 3 [31:0] - dest1_base_256 [39:8] - tiled surface address DW 4 [31] - 0 [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 5 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 6 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 7 [28:18] - z [13:0] - x DW 8 [31:30] - swap1 [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [20:19] - swap0 [13:0] - y DW 9 [31:2] - addr [31:2] - linear surface address DW 10 [31:30] - swap [7:0] - addr [39:32] - linear surface address 11. Copy L2T/T2L (tile units) (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01001100b [15:0] - count (tiles) DW 2 [31:0] - base_256 [39:8] - tiled surface address DW 3 [31] - detile (1 = T2L, 0 = L2T) [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 4 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 5 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 6 [28:18] - z [13:0] - x (tile boundary) DW 7 [31:30] - swap [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [13:0] - y (tile boundary) DW 8 [31:2] - addr [31:2] - linear surface address DW 9 [31:30] - swap [7:0] - addr [39:32] - linear surface address 12. Copy L2T broadcast (tile units) (evergreen/ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01001111b [15:0] - count (tiles) DW 2 [31:0] - dest0_base_256 [39:8] - tiled surface address DW 3 [31:0] - dest1_base_256 [39:8] - tiled surface address DW 4 [31] - 0 [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 5 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 6 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 7 [28:18] - z [13:0] - x (tile boundary) DW 8 [31:30] - swap1 [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [20:19] - swap0 [13:0] - y (tile boundary) DW 9 [31:2] - addr [31:2] - linear surface address DW 10 [31:30] - swap [7:0] - addr [39:32] - linear surface address 13. Copy L2T/T2L, partial (ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01001001b [19:0] - count (DW) DW 2 [31:0] - base_256 [39:8] - tiled surface address DW 3 [31] - detile (1 = T2L, 0 = L2T) [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect DW 4 [29:16] - height_max - height minus 1 of linear surface [10:0] - pitch_tile_max DW 5 [30:26] - pipe_config (si only) [21:0] - slice_tile_max DW 6 [28:18] - z [13:0] - x DW 7 [31:30] - swap [28:27] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [26:25] - num banks [23:21] - tile split [13:0] - y DW 8 [31:2] - addr [31:2] - linear surface address DW 9 [31:13] - linear pitch [9:8] - swap [7:0] - addr [39:32] - linear surface address DW 10 [31:0] - linear slice pitch (bytes) DW 11 [29:16] - dy (pixels) [13:0] - dx (pixels) DW 12 [10:0] - dz (pixels) 14. Copy T2T, partial (tile units) (ni/si) DW 1 [31:28] - command = 0011b [27:20] - 01001101b [19:0] - count = 0 DW 2 [31:0] - src base_256 [39:8] - tiled surface address DW 3 [29:16] - src height_max [10:0] - src pitch_tile_max DW 4 [21:0] - src slice_tile_max DW 5 [31:0] - dest base_256 [39:8] - tiled surface address DW 6 [29:16] - dest height_max [10:0] - dest pitch_tile_max DW 7 [30:26] - pipe_config (si only) [21:0] - dest slice_tile_max DW 8 [31] - detile (1 = T2L, 0 = L2T) [30:27] - array mode [26:24] - size - log2 bytes per pixel [22:21] - bank height [19:18] - bank width [17:16] - mt aspect [13:12] - swap1 [10:9] - MT/non_disp (si: 28:27 = MT[1:0], eg/ni: 28:27 = non_disp[0:1]) [8:7] - num banks [5:3] - tile split [2:1] - swap0 DW 8 [29:16] - src_x (low 3 bits MBZ) [13:0] - dst_x (low 3 bits MBZ) DW 9 [29:16] - src_y (low 3 bits MBZ) [13:0] - dst_y (low 3 bits MBZ) DW 10 [26:16] - src_z [10:0] - dst_z DW 11 [29:16] - dy (low 3 bits MBZ) [13:0] - dx (low 3 bits MBZ) DW 12 [10:0] - dz 15. Indirect Buffer (evergreen/ni/si) DW 1 [31:28] - command = 0100b [22:20] - vmid (ni/si only) [19:0] - count = 0 DW 2 [31:5] - IB base [31:5] DW 3 [31:12] - IB size (DW) [7:0] - IB base [39:32] 16. Semaphore (evergreen/ni/si) DW 1 [31:28] - command = 0101b [23] - mailbox [22] - signal [19:0] - count = 0 DW 2 [31:3] - semaphore addr [31:3] DW 3 [7:0] - semaphore addr [39:32] 17. Fence (evergreen/ni/si) DW 1 [31:28] - command = 0110b [19:0] - count = 0 DW 2 [31:2] - fence addr [31:2] DW 3 [7:0] - fence addr [39:32] 18. Trap (evergreen/ni/si) DW 1 [31:28] - command = 0111b [19:0] - count = 0 19. No-Op (evergreen/ni/si) DW 1 [31:28] - command = 1111b [15:0] - count = 0 20. Constant Fill (evergreen/ni/si) DW 1 [31:28] - command = 1101b [23] - 0 [19:0] - count (DW) DW 2 [31:2] - dst addr [31:2] DW 3 [31:0] - constant data DW 4 [23:16] - dst addr [39:32] 21. SRBM Write (evergreen/ni/si) DW 1 [31:28] - command = 1001b [27:26] - 00b [19:0] - count = 0 DW 2 [19:16] - byte enable [15:0] - register (DW aligned) DW 3 [31:0] - data 22. SRBM Read/Poll (evergreen/ni) DW 1 [31:28] - command = 1001b [27] - 1 [26] - poll [19:0] - count = 0 DW 2 [31:20] - retry count [15:0] - register (DW aligned) DW 3 [31:0] - data DW 4 [31:0] - mask DW 5 [31:0] - value 23. Poll RegMem (si) DW 1 [31:28] - command = 1110b [27] - M (0 = register, 1 = memory) [19:0] - count = 0 DW 2 [31:2] - addr [31:2] or register (DW aligned) DW 3 [27:16] - retry count [7:0] - addr [39:32] or 0 DW 4 [31:0] - mask DW 5 [31:0] - reference DW 6 [30:28] - func 000b - always 001b - < 010b - <= 011b - == 100b - != 101b - >= 110b - > [16:0] - poll interval ==== cik ==== 1. Copy Linear (cik) DW 1 [31:16] - extra info = 0000000000000000b [15:8] - sub-opcode = 0 [7:0] - opcode = 1 DW 2 [21:0] - count (bytes) DW 3 [25:24] - src swap [17:16] - dst swap DW 4 [31:0] - src addr [31:0] DW 5 [31:0] - src addr [63:32] DW 6 [31:0] - dst addr [31:0] DW 7 [31:0] - dst addr [63:32] 2. Copy Linear, broadcast (cik) DW 1 [31:16] - extra info = 0000100000000000b [15:8] - sub-opcode = 0 [7:0] - opcode = 1 DW 2 [21:0] - count (bytes) DW 3 [25:24] - src swap [17:16] - dst1 swap [9:8] - dst2 swap DW 4 [31:0] - src addr [31:0] DW 5 [31:0] - src addr [63:32] DW 6 [31:0] - dst1 addr [31:0] (dst1 and dst2 must have same alignment; dst1 4:0 == dst2 4:0) DW 7 [31:0] - dst1 addr [63:32] DW 8 [31:0] - dst2 addr [31:0] (dst1 and dst2 must have same alignment; dst1 4:0 == dst2 4:0) DW 9 [31:0] - dst2 addr [63:32] 3. Copy Linear, sub-window (cik) DW 1 [31:29] - element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) [15:8] - sub-opcode = 4 [7:0] - opcode = 1 DW 2 [31:0] - src addr [31:0] DW 3 [31:0] - src addr [63:32] DW 4 [29:16] - src y [13:0] - src x DW 5 [29:16] - src pitch (pixels - 1) [11:0] - src z DW 6 [27:0] - src slice pitch (pixels - 1) DW 7 [31:0] - dst addr [31:0] DW 8 [31:0] - dst addr [63:32] DW 9 [29:16] - dst y [13:0] - dst x DW 10 [29:16] - dst pitch (pixels - 1) [11:0] - dst z DW 11 [27:0] - dst slice pitch (pixels - 1) DW 12 [29:16] - rect y [13:0] - rect x DW 13 [25:24] - src swap [17:16] - dst swap [11:0] - rect z 4. Copy Tiled (cik) This packet assumes the linear and tiled buffers have the same dimensions. x, y, z coordinates apply to both linear and tiled surfaces. DW 1 [31] - detile (1 = T2L, 0 = L2T) [15:8] - sub-opcode = 1 [7:0] - opcode = 1 DW 2 [31:0] - tiled addr [31:0] (256 byte aligned; 7:0 must be 0) DW 3 [31:0] - tiled addr [63:32] DW 4 [29:16] - height [10:0] - pitch (tiles) DW 5 [21:0] - slice pitch (tiles) DW 6 [30:26] - pipe config [25:24] - macro tile aspect [22:21] - num banks [19:18] - bank height [16:15] - bank width [13:11] - tile split [10:8] - micro tile mode [6:3] - array mode [2:0] - element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 7 [29:16] - y [13:0] - x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 8 [25:24] - tiled swap [17:16] - linear swap [11:0] - z DW 9 [31:0] - linear addr [31:0] (DW aligned; 1:0 must be 0) DW 10 [31:0] - linear addr [63:32] DW 11 [18:0] - linear pitch DW 12 [19:0] - count (DW) 5. Copy L2T, broadcast (cik) DW 1 [31:16] - extra info = 0000100000000000b [15:8] - sub-opcode = 1 [7:0] - opcode = 1 DW 2 [31:0] - tiled addr 1 [31:0] (256 byte aligned; 7:0 must be 0) DW 3 [31:0] - tiled addr 1 [63:32] DW 4 [31:0] - tiled addr 2 [31:0] (256 byte aligned; 7:0 must be 0) DW 5 [31:0] - tiled addr 2 [63:32] DW 5 [29:16] - height [10:0] - pitch (tiles) DW 6 [21:0] - slice pitch (tiles) DW 7 [30:26] - pipe config [25:24] - macro tile aspect [22:21] - num banks [19:18] - bank height [16:15] - bank width [13:11] - tile split [10:8] - micro tile mode [6:3] - array mode [2:0] - element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 8 [29:16] - y [13:0] - x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 9 [11:0] - z DW 10 [25:24] - tiled 1 swap [17:16] - linear swap [9:8] - tiled 2 swap DW 11 [31:0] - linear addr [31:0] (DW aligned; 1:0 must be 0) DW 12 [31:0] - linear addr [63:32] DW 13 [18:0] - linear pitch DW 14 [19:0] - count (DW) 6. Copy L2T, frame to field (cik) DW 1 [31:16] - extra info = 0000010000000000b [15:8] - sub-opcode = 1 [7:0] - opcode = 1 DW 2 [31:0] - tiled addr 1 [31:0] (256 byte aligned; 7:0 must be 0) DW 3 [31:0] - tiled addr 1 [63:32] DW 4 [31:0] - tiled addr 2 [31:0] (256 byte aligned; 7:0 must be 0) DW 5 [31:0] - tiled addr 2 [63:32] DW 5 [29:16] - height [10:0] - pitch (tiles) DW 6 [21:0] - slice pitch (tiles) DW 7 [30:26] - pipe config [25:24] - macro tile aspect [22:21] - num banks [19:18] - bank height [16:15] - bank width [13:11] - tile split [10:8] - micro tile mode [6:3] - array mode [2:0] - element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 8 [29:16] - y [13:0] - x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 9 [11:0] - z DW 10 [25:24] - tiled 1 swap [17:16] - linear swap [9:8] - tiled 2 swap DW 11 [31:0] - linear addr [31:0] (DW aligned; 1:0 must be 0) DW 12 [31:0] - linear addr [63:32] DW 13 [19:0] - count (DW) 7. Copy Tiled, sub-window (cik) DW 1 [31] - detile (1 = T2L, 0 = L2T) [15:8] - sub-opcode = 5 [7:0] - opcode = 1 DW 2 [31:0] - tiled addr [31:0] (256 byte aligned; 7:0 must be 0) DW 3 [31:0] - tiled addr [63:32] DW 4 [29:16] - tiled y [13:0] - tiled x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 5 [26:16] - pitch (tiles) [11:0] - tiled z DW 6 [21:0] - slice pitch (tiles) DW 7 [30:26] - pipe config [25:24] - macro tile aspect [22:21] - num banks [19:18] - bank height [16:15] - bank width [13:11] - tile split [10:8] - micro tile mode [6:3] - array mode [2:0] - element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 8 [31:0] - linear addr [31:0] (DW aligned; 1:0 must be 0) DW 9 [31:0] - linear addr [63:32] DW 10 [29:16] - linear y [13:0] - linear x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 11 [29:16] - linear pitch (pixels - 1) [11:0] - linear z DW 12 [27:0] - linear slice pitch (pixels - 1) DW 13 [29:16] - rect y [13:0] - rect x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 14 [25:24] - tiled swap [17:16] - linear swap [11:0] - rect z 8. T2T Copy, sub-window (cik) DW 1 [15:8] - sub-opcode = 6 [7:0] - opcode = 1 DW 2 [31:0] - src addr [31:0] (256 byte aligned; 7:0 must be 0) DW 3 [31:0] - src addr [63:32] DW 4 [29:16] - src y (aligned to tile; 8 pixel boundary; 18:16 must be 0) [13:0] - src x (aligned to tile; 8 pixel boundary; 2:0 must be 0) DW 5 [26:16] - src pitch (tiles) [11:0] - src z DW 6 [21:0] - src slice pitch (tiles) DW 7 [30:26] - src pipe config [25:24] - src macro tile aspect [22:21] - src num banks [19:18] - src bank height [16:15] - src bank width [13:11] - src tile split [10:8] - src micro tile mode (must be same as dst micro tile mode, unless src = display, dst = rotate) [6:3] - src array mode [2:0] - src element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 8 [31:0] - dst addr [31:0] (256 byte aligned; 7:0 must be 0) DW 9 [31:0] - dst addr [63:32] DW 10 [29:16] - dst y (aligned to tile; 8 pixel boundary; 18:16 must be 0) [13:0] - dst x (aligned to tile; 8 pixel boundary; 2:0 must be 0) DW 11 [26:16] - dst pitch (tiles) [11:0] - dst z DW 12 [21:0] - dst slice pitch (tiles) DW 13 [30:26] - dst pipe config [25:24] - dst macro tile aspect [22:21] - dst num banks [19:18] - dst bank height [16:15] - dst bank width [13:11] - dst tile split [10:8] - dst micro tile mode (must be same as src micro tile mode, unless src = display, dst = rotate) [6:3] - dst array mode [2:0] - dst element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 14 [29:16] - rect y (18:16 must be 0) [13:0] - rect x (2:0 must be 0) DW 15 [25:24] - src swap [17:16] - dst swap [11:0] - rect z (2:0 must be 0) 9. Structured Buffer Copy, AOS/SOA (cik) DW 1 [31] - direction (1 = structured to linear, 0 = linear to structured) [15:8] - sub-opcode = 3 [7:0] - opcode = 1 DW 2 [31:0] - structured buffer addr [31:0] (DW aligned; 1:0 must be 0) DW 3 [31:0] - structured buffer addr [63:32] DW 4 [31:0] - start index DW 5 [31:0] - count (count == 0 means 1 stride) DW 6 [25:24] - structured buffer swap [17:16] - linear swap [10:0] - stride DW 7 [31:0] - linear addr [31:0] (DW aligned; 1:0 must be 0) DW 8 [31:0] - linear addr [63:32] 10. Write Linear (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 2 DW 2 [31:0] - addr [31:0] (DW aligned; 1:0 must be 0) DW 3 [31:0] - addr [63:32] DW 4 [25:24] - swap [19:0] - count (DW) DW 5..N [31:0] - data 11. Write Tiled (cik) DW 1 [15:8] - sub-opcode = 1 [7:0] - opcode = 2 DW 2 [31:0] - tiled addr [31:0] (256 byte aligned; 7:0 must be 0) DW 3 [31:0] - tiled addr [63:32] DW 4 [29:16] - height [10:0] - pitch (tiles) DW 5 [21:0] - slice pitch (tiles) DW 6 [30:26] - pipe config [25:24] - macro tile aspect [22:21] - num banks [19:18] - bank height [16:15] - bank width [13:11] - tile split [10:8] - micro tile mode [6:3] - array mode [2:0] - element_size (0=8bpp, 1=16bpp, 2=32bpp, 3=64bpp, 4=128bpp) DW 7 [29:16] - y [13:0] - x (DW aligned; multiple of 4 for 8bpp, multiple of 2 for 16bpp) DW 8 [25:24] - tiled swap [11:0] - z DW 9 [19:0] - count (DW) DW 10..N [31:0] - data 12. Constant Fill (cik) DW 1 [31:30] - size (00b = byte fill, 10b = DW fill) [17:16] - swap [15:8] - sub-opcode = 0 [7:0] - opcode = 11 DW 2 [31:0] - addr [31:0] (if size = 10b, DW aligned; 1:0 must be 0) DW 3 [31:0] - addr [63:32] DW 4 [31:0] - data DW 5 [21:0] - count (bytes) (if size = 10b, DW aligned; 1:0 must be 0) 13. Indirect Buffer (cik) DW 1 [19:16] - vmid [15:8] - sub-opcode = 0 [7:0] - opcode = 4 DW 2 [31:0] - IB base [31:0] (32 byte aligned) DW 3 [31:0] - IB base [63:32] DW 4 [19:0] - IB size (DW) 14. Fence (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 5 DW 2 [31:0] - addr [31:0] (DW aligned; 1:0 must be 0) DW 3 [31:0] - addr [63:32] DW 4 [31:0] - data 14. Trap (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 6 15. Semaphore (cik) DW 1 [31] - mailbox [30] - signal [15:8] - sub-opcode = 0 [7:0] - opcode = 7 DW 2 [31:3] - semaphore addr [31:3] DW 3 [31:0] - semaphore addr [39:32] 16. Poll RegMem (cik) DW 1 [31] - m (1 = memory, 0 = register) [30:28] - func 000b - always 001b - < 010b - <= 011b - == 100b - != 101b - >= 110b - > [27:26] - op (00b = traditional wait_reg_mem, 01b = write_reg wait_reg_mem write_reg; 01b only valid when m == 0) [15:8] - sub-opcode = 0 [7:0] - opcode = 8 DW 2 [31:2] - addr [31:2] or register (DW aligned) DW 3 [31:0] - addr [63:32] or pre-poll register data if op == 01b DW 4 [31:0] - reference DW 5 [31:0] - mask DW 6 [27:16] - retry count (0xfff = retry forever) [15:0] - poll interval 17. Conditional Execution (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 9 DW 2 [31:0] - bool addr [31:0] (DW aligned; 1:0 must be 0) DW 3 [31:0] - bool addr [39:32] DW 4 [13:0] - exec count 18. Write PTE/PDE (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 12 DW 2 [31:0] - dest addr [31:0] (must be aligned to 8 bytes) DW 3 [31:0] - dest addr [63:32] DW 4 [31:0] - mask [31:0] DW 5 [31:0] - mask [63:32] DW 6 [31:0] - value [31:0] DW 7 [31:0] - value [63:32] DW 8 [31:0] - increment [31:0] DW 9 [31:0] - increment [63:32] DW 10 [18:0] - count (2 DW units; 1 entry) 19. SRBM Write (cik) DW 1 [31:28] - byte enable [15:8] - sub-opcode = 0 [7:0] - opcode = 14 DW 2 [15:0] - register (DW aligned) DW 3 [31:0] - data 20. NOP (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 0 21. Set Local Timestamp (cik) DW 1 [15:8] - sub-opcode = 0 [7:0] - opcode = 13 DW 2 [31:0] - data [31:0] DW 3 [31:0] - data [63:32] 22. Get Local Timestamp (cik) DW 1 [15:8] - sub-opcode = 1 [7:0] - opcode = 13 DW 2 [31:3] - addr [31:3] DW 3 [31:0] - addr [63:32] 23. Get Global Timestamp (cik) DW 1 [15:8] - sub-opcode = 2 [7:0] - opcode = 13 DW 2 [31:3] - addr [31:3] DW 3 [31:0] - addr [63:32]