Writing test log into TestResults.qpa dEQP Core vulkan-cts-1.3.7.1-86-g4340e6e70bc62746e7cd03e8d03541e446f4f17f (0x4340e6e7) starting.. target implementation = 'Default' INFO [GENERAL] LNL Fulsim Version 220818-71451-LNL Aug 18 2022 18:17:41 INFO [GENERAL] [x64 Build] INFO [GENERAL] Fulsim command line: -device lnl.1tx2x4x8.a0 -dumpConfig INFO [DEVICE_MNGR] -device argument was not explicit on the file to load. -device lnl.1tx2x4x8.a0 Aubload will attempt to find a matching xml file in well-known paths Relative Path :config/fleur_de_lis/devices/lnl.1tx2x4x8.a0.map.xml ==> /home/idr/devel/intel-graphics/FulsimBin/lnl/config/fleur_de_lis/devices/lnl.1tx2x4x8.a0.map.xml INFO [DEVICE_MNGR] Aubload found a config file at : /home/idr/devel/intel-graphics/FulsimBin/lnl/config/fleur_de_lis/devices/lnl.1tx2x4x8.a0.map.xml [Info]FDL Resource path for the map file being loaded: /home/idr/devel/intel-graphics/FulsimBin/lnl/config/fleur_de_lis [Info]FDL Resource path for the map file being loaded: /home/idr/devel/intel-graphics/FulsimBin/lnl/config/fleur_de_lis/d15/d15_lnl_a0.map.xml /home/idr/devel/intel-graphics/FulsimBin/lnl/config/fleur_de_lis [Info]Device: LNL.2x4x8.a.0 INFO [DEVICE_MNGR] Cobalt has Loaded a Fleur De Lis Configuration. [Warn]ExecuteConfigGen>Unable to locate ConfigGen application. Current directory: "/tmp/hwconfig-NyQLXk" ConfigGen Path override given: Updating FDL Bios using ConfigGen will be skipped. Default Bios will be used INFO [DEVICE_MNGR] ConfigGen response was empty -- custom bios config will not be used INFO [COBALT_CORE] UserTime: 0.47 seconds INFO [COBALT_CORE] SystemTime: 0.01 seconds INFO [COBALT_CORE] WallClockTime: 0.483213 seconds INFO [COBALT_CORE] Max VM Usage (VmPeak / PeakPagefileUsage): 120992 KB INFO [COBALT_CORE] Max VM Residence (VmHWM / PeakWorkingSetSize): 65032 KB Success WARNING [tile_0.m_gt_core.DvRender.m_slice_0.DSS_0.eu_cluster.tdl.tdl_message_channel][ 461200 ps | 1153 clk ] TDL mmio register MM_OFFSET_CS_TDL_PAGE_FAULT_MODE is not implemented. Write is ignored. DRM_I915_QUERY: unhandled query id: 65540 DRM_I915_GETPARAM: unhandled param 56 MESA-INTEL: warning: Vulkan not yet supported on Intel(R) Graphics (LNL) DRM_I915_GETPARAM: unhandled param 55 DRM_I915_GETPARAM: unhandled param 58 DRM_I915_GEM_CONTEXT_SETPARAM: unhandled param 13 DRM_I915_QUERY: unhandled query id: 3 DRM_I915_GETPARAM: unhandled param 54 DRM_I915_QUERY: unhandled query id: 65540 DRM_I915_GETPARAM: unhandled param 56 MESA-INTEL: warning: Vulkan not yet supported on Intel(R) Graphics (LNL) DRM_I915_GETPARAM: unhandled param 55 DRM_I915_GETPARAM: unhandled param 58 DRM_I915_GEM_CONTEXT_SETPARAM: unhandled param 13 DRM_I915_QUERY: unhandled query id: 3 DRM_I915_GETPARAM: unhandled param 54 INFO [tile_0.m_gt_core.DvRender.Geom.FF0.SvgStateVarUnitGlobalRender][ 208461200 ps | 521153 clk ] Invalid Vertex Buffer programming by test or driver: VERTEX_BUFFER_STATE.Null Vertex Buffer must be set when the VERTEX_BUFFER_STATE.Buffer Size is 0x0.BSpec:https://gfxspecs.intel.com/Predator/Home/Index/56282 RCS0: 0x00006210 is a privileged register in a non-privileged batch buffer. Register write will be Noop-ed! Test case 'dEQP-VK.compute.pipeline.cooperative_matrix.khr_r.matrixmuladd.sint8_sint32.buffer.rowmajor'.. NIR (from SPIR-V) for MESA_SHADER_COMPUTE shader: shader: MESA_SHADER_COMPUTE source_sha1: {0xde452e1a, 0xdfc6e380, 0xa65f6252, 0xf3081c68, 0x6fc6bfb0} internal: false workgroup_size: 64, 2, 1 stage: 5 next_stage: 0 num_ssbos: 4 subgroup_size: 2 ptr_size: 0 inputs: 0 outputs: 0 uniforms: 0 decl_var system INTERP_MODE_NONE none uint #0 (SYSTEM_VALUE_SUBGROUP_ID) decl_var system INTERP_MODE_NONE none uvec3 #1 (SYSTEM_VALUE_WORKGROUP_ID) decl_var ssbo INTERP_MODE_NONE restrict block #2 (~0, 0, 0) decl_var ssbo INTERP_MODE_NONE restrict block #3 (~0, 0, 1) decl_var ssbo INTERP_MODE_NONE restrict block #4 (~0, 0, 2) decl_var ssbo INTERP_MODE_NONE restrict block #5 (~0, 0, 3) decl_var INTERP_MODE_NONE none coopmat #6 decl_var INTERP_MODE_NONE none coopmat #7 decl_var INTERP_MODE_NONE none coopmat #8 decl_var INTERP_MODE_NONE none coopmat #9 decl_function main (0 params) impl main { decl_var INTERP_MODE_NONE none uvec2 #10 decl_var INTERP_MODE_NONE none uvec2 #11 decl_var INTERP_MODE_NONE none uint #12 decl_var INTERP_MODE_NONE none uint #13 decl_var INTERP_MODE_NONE none uint #14 decl_var INTERP_MODE_NONE none uint #15 decl_var INTERP_MODE_NONE none coopmat #16 decl_var INTERP_MODE_NONE none coopmat #17 decl_var INTERP_MODE_NONE none coopmat #18 decl_var INTERP_MODE_NONE none coopmat cmat_bitcast decl_var INTERP_MODE_NONE none coopmat cmat_ssa decl_var INTERP_MODE_NONE none coopmat cmat_bitcast#19 decl_var INTERP_MODE_NONE none coopmat cmat_ssa#20 decl_var INTERP_MODE_NONE none coopmat cmat_bitcast#21 decl_var INTERP_MODE_NONE none coopmat cmat_ssa#22 decl_var INTERP_MODE_NONE none coopmat cmat_ssa#23 decl_var INTERP_MODE_NONE none coopmat cmat_ssa#24 decl_var INTERP_MODE_NONE none coopmat cmat_ssa#25 decl_var INTERP_MODE_NONE none coopmat cmat_muladd decl_var INTERP_MODE_NONE none coopmat cmat_ssa#26 con block b0: // preds: 32 %0 = deref_var � (system uint) 32 %1 = @load_deref (%0) (access=none) 32 %2 = load_const (0x00000002) 32 %3 = umod %1, %2 (0x2) 32 %4 = deref_var � (system uint) 32 %5 = @load_deref (%4) (access=none) 32 %6 = load_const (0x00000002) 32 %7 = udiv %5, %6 (0x2) 32x2 %8 = vec2 %3, %7 32 %9 = deref_var (function_temp uvec2) @store_deref (%9, %8) (wrmask=xy, access=none) 32 %10 = deref_var  (system uvec3) 32x3 %11 = @load_deref (%10) (access=none) 32x2 %16 = load_const (0x00000002, 0x00000002) 32x2 %17 = imul %11.xy, %16 (0x2, 0x2) 32 %18 = deref_var (function_temp uvec2) 32x2 %19 = @load_deref (%18) (access=none) 32x2 %20 = iadd %17, %19 32 %21 = deref_var (function_temp uvec2) @store_deref (%21, %20) (wrmask=xy, access=none) 32 %22 = deref_var (function_temp uvec2) 32x2 %25 = @load_deref (%22) (access=none) 32 %27 = load_const (0x00000800 = 2048) 32 %28 = imul %27 (0x800), %25.y 32 %29 = deref_var (function_temp uvec2) 32x2 %32 = @load_deref (%29) (access=none) 32 %34 = load_const (0x00000020 = 32) 32 %35 = imul %34 (0x20), %32.x 32 %36 = iadd %28, %35 32 %37 = deref_var (function_temp uint) @store_deref (%37, %36) (wrmask=x, access=none) 32 %38 = deref_var (function_temp uvec2) 32x2 %41 = @load_deref (%38) (access=none) 32 %43 = load_const (0x00001000 = 4096) 32 %44 = imul %43 (0x1000), %41.y 32 %45 = deref_var (function_temp uvec2) 32x2 %48 = @load_deref (%45) (access=none) 32 %50 = load_const (0x00000010 = 16) 32 %51 = imul %50 (0x10), %48.x 32 %52 = iadd %44, %51 32 %53 = deref_var (function_temp uint) @store_deref (%53, %52) (wrmask=x, access=none) 32 %54 = deref_var (function_temp uvec2) 32x2 %57 = @load_deref (%54) (access=none) 32 %59 = load_const (0x00000400 = 1024) 32 %60 = imul %59 (0x400), %57.y 32 %61 = deref_var (function_temp uvec2) 32x2 %64 = @load_deref (%61) (access=none) 32 %66 = load_const (0x00000010 = 16) 32 %67 = imul %66 (0x10), %64.x 32 %68 = iadd %60, %67 32 %69 = deref_var  (function_temp uint) @store_deref (%69, %68) (wrmask=x, access=none) 32 %70 = deref_var (function_temp uvec2) 32x2 %73 = @load_deref (%70) (access=none) 32 %75 = load_const (0x00000400 = 1024) 32 %76 = imul %75 (0x400), %73.y 32 %77 = deref_var (function_temp uvec2) 32x2 %80 = @load_deref (%77) (access=none) 32 %82 = load_const (0x00000010 = 16) 32 %83 = imul %82 (0x10), %80.x 32 %84 = iadd %76, %83 32 %85 = deref_var  (function_temp uint) @store_deref (%85, %84) (wrmask=x, access=none) 32 %86 = deref_var (function_temp uint) 32 %87 = @load_deref (%86) (access=none) 32 %88 = load_const (0x00000000 = 0.000000) 32x4 %89 = @vulkan_resource_index (%88 (0x0)) (desc_set=0, binding=0, desc_type=SSBO) 32x4 %90 = @load_vulkan_descriptor (%89) (desc_type=SSBO) 32x4 %91 = deref_cast (block *)%90 (ssbo block) (ptr_stride=0, align_mul=4, align_offset=0) 32x4 %92 = deref_struct &%91->field0 (ssbo int8_t[]) // &((block *)%90)->field0 32x4 %93 = deref_array &(*%92)[%87] (ssbo int8_t) // &((block *)%90)->field0[%87] 32 %94 = load_const (0x00000100 = 0.000000 = 256) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo|global) 32 %95 = deref_var &cmat_bitcast (function_temp coopmat) @cmat_load (%95, %93, %94 (0x100)) (matrix_layout=row_major) 32 %96 = deref_var  (function_temp coopmat) 32 %97 = deref_var &cmat_bitcast (function_temp coopmat) @cmat_copy (%96, %97) 32 %98 = deref_var  (function_temp coopmat) 32 %99 = deref_var &cmat_ssa (function_temp coopmat) @cmat_copy (%99, %98) 32 %100 = deref_var  (shader_temp coopmat) 32 %101 = deref_var &cmat_ssa (function_temp coopmat) @cmat_copy (%100, %101) 32 %102 = deref_var (function_temp uint) 32 %103 = @load_deref (%102) (access=none) 32 %104 = load_const (0x00000000 = 0.000000) 32x4 %105 = @vulkan_resource_index (%104 (0x0)) (desc_set=0, binding=1, desc_type=SSBO) 32x4 %106 = @load_vulkan_descriptor (%105) (desc_type=SSBO) 32x4 %107 = deref_cast (block *)%106 (ssbo block) (ptr_stride=0, align_mul=4, align_offset=0) 32x4 %108 = deref_struct &%107->field0 (ssbo int8_t[]) // &((block *)%106)->field0 32x4 %109 = deref_array &(*%108)[%103] (ssbo int8_t) // &((block *)%106)->field0[%103] 32 %110 = load_const (0x00000080 = 0.000000 = 128) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo|global) 32 %111 = deref_var &cmat_bitcast#19 (function_temp coopmat) @cmat_load (%111, %109, %110 (0x80)) (matrix_layout=row_major) 32 %112 = deref_var  (function_temp coopmat) 32 %113 = deref_var &cmat_bitcast#19 (function_temp coopmat) @cmat_copy (%112, %113) 32 %114 = deref_var  (function_temp coopmat) 32 %115 = deref_var &cmat_ssa#20 (function_temp coopmat) @cmat_copy (%115, %114) 32 %116 = deref_var  (shader_temp coopmat) 32 %117 = deref_var &cmat_ssa#20 (function_temp coopmat) @cmat_copy (%116, %117) 32 %118 = deref_var  (function_temp uint) 32 %119 = @load_deref (%118) (access=none) 32 %120 = load_const (0x00000000 = 0.000000) 32x4 %121 = @vulkan_resource_index (%120 (0x0)) (desc_set=0, binding=2, desc_type=SSBO) 32x4 %122 = @load_vulkan_descriptor (%121) (desc_type=SSBO) 32x4 %123 = deref_cast (block *)%122 (ssbo block) (ptr_stride=0, align_mul=4, align_offset=0) 32x4 %124 = deref_struct &%123->field0 (ssbo int[]) // &((block *)%122)->field0 32x4 %125 = deref_array &(*%124)[%119] (ssbo int) // &((block *)%122)->field0[%119] 32 %126 = load_const (0x00000080 = 0.000000 = 128) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo|global) 32 %127 = deref_var &cmat_bitcast#21 (function_temp coopmat) @cmat_load (%127, %125, %126 (0x80)) (matrix_layout=row_major) 32 %128 = deref_var  (function_temp coopmat) 32 %129 = deref_var &cmat_bitcast#21 (function_temp coopmat) @cmat_copy (%128, %129) 32 %130 = deref_var  (function_temp coopmat) 32 %131 = deref_var &cmat_ssa#22 (function_temp coopmat) @cmat_copy (%131, %130) 32 %132 = deref_var  (shader_temp coopmat) 32 %133 = deref_var &cmat_ssa#22 (function_temp coopmat) @cmat_copy (%132, %133) 32 %134 = deref_var  (shader_temp coopmat) 32 %135 = deref_var &cmat_ssa#23 (function_temp coopmat) @cmat_copy (%135, %134) 32 %136 = deref_var  (shader_temp coopmat) 32 %137 = deref_var &cmat_ssa#24 (function_temp coopmat) @cmat_copy (%137, %136) 32 %138 = deref_var  (shader_temp coopmat) 32 %139 = deref_var &cmat_ssa#25 (function_temp coopmat) @cmat_copy (%139, %138) 32 %140 = deref_var &cmat_ssa#23 (function_temp coopmat) 32 %141 = deref_var &cmat_ssa#24 (function_temp coopmat) 32 %142 = deref_var &cmat_ssa#25 (function_temp coopmat) 32 %143 = deref_var &cmat_muladd (function_temp coopmat) @cmat_muladd (%143, %140, %141, %142) (saturate=0, cmat_signed=A|B|C|Result) 32 %144 = deref_var (shader_temp coopmat) 32 %145 = deref_var &cmat_muladd (function_temp coopmat) @cmat_copy (%144, %145) 32 %146 = deref_var (shader_temp coopmat) 32 %147 = deref_var &cmat_ssa#26 (function_temp coopmat) @cmat_copy (%147, %146) 32 %148 = deref_var  (function_temp uint) 32 %149 = @load_deref (%148) (access=none) 32 %150 = load_const (0x00000000 = 0.000000) 32x4 %151 = @vulkan_resource_index (%150 (0x0)) (desc_set=0, binding=3, desc_type=SSBO) 32x4 %152 = @load_vulkan_descriptor (%151) (desc_type=SSBO) 32x4 %153 = deref_cast (block *)%152 (ssbo block) (ptr_stride=0, align_mul=4, align_offset=0) 32x4 %154 = deref_struct &%153->field0 (ssbo int[]) // &((block *)%152)->field0 32x4 %155 = deref_array &(*%154)[%149] (ssbo int) // &((block *)%152)->field0[%149] 32 %156 = load_const (0x00000080 = 0.000000 = 128) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=REL|AVAILABLE, mem_modes=ssbo|global) 32 %157 = deref_var &cmat_ssa#26 (function_temp coopmat) @cmat_store (%155, %157, %156 (0x80)) (matrix_layout=row_major) // succs: b1 block b1: } NIR (SSA form) for compute shader: shader: MESA_SHADER_COMPUTE source_sha1: {0xde452e1a, 0xdfc6e380, 0xa65f6252, 0xf3081c68, 0x6fc6bfb0} internal: false workgroup_size: 64, 2, 1 stage: 5 next_stage: 0 num_ssbos: 4 system_values_read: 0x00000000'00000000'00000102 subgroup_size: 32 divergence_analysis_run: true bit_sizes_int: 0x28 separate_shader: true writes_memory: true ptr_size: 0 inputs: 0 outputs: 0 uniforms: 16 decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #0 (~0, 0, 0) decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #1 (~0, 0, 1) decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #2 (~0, 0, 2) decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #3 (~0, 0, 3) decl_function main (0 params) impl main { con block b0: // preds: con 32 %0 = @load_subgroup_id con 32 %1 = load_const (0x00000001) con 32 %2 = iand %0, %1 (0x1) con 32 %3 = ushr %0, %1 (0x1) con 32 %4 = load_const (0x00000000) con 32x3 %5 = @load_uniform (%4 (0x0)) (base=4, range=12, dest_type=uint) con 32x3 %6 = @load_workgroup_id_zero_base con 32 %7 = iadd %6.x, %5.x con 32 %8 = iadd %6.y, %5.y con 32 %9 = ishl %7, %1 (0x1) con 32 %10 = ishl %8, %1 (0x1) con 32 %11 = iadd %9, %2 con 32 %12 = iadd %10, %3 con 32 %13 = load_const (0x0000000b = 11) con 32 %14 = ishl %12, %13 (0xb) con 32 %15 = load_const (0x00000005) con 32 %16 = ishl %11, %15 (0x5) con 32 %17 = load_const (0x0000000c = 12) con 32 %18 = ishl %12, %17 (0xc) con 32 %19 = load_const (0x00000004) con 32 %20 = ishl %11, %19 (0x4) con 32 %21 = load_const (0x0000000a = 10) con 32 %22 = ishl %12, %21 (0xa) con 32 %23 = load_const (0xdeaddead = -6264320714451451904.000000 = -559030611 = 3735936685) con 32 %24 = load_const (0x00000002) con 32 %25 = @resource_intel (%23 (0xdeaddead), %24 (0x2), %4 (0x0), %4 (0x0)) (desc_set=0, binding=0, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo) div 32 %26 = @load_subgroup_invocation con 32 %27 = load_const (0x00000007) div 32 %28 = iand %26, %27 (0x7) con 32 %29 = load_const (0x00000003) div 32 %30 = ushr %26, %29 (0x3) con 32 %31 = load_const (0x00000006) div 32 %32 = ishl %30, %31 (0x6) div 32 %33 = iadd %32, %28 div 32 %34 = ishl %33, %24 (0x2) div 32 %35 = iadd3 %14, %16, %34 div 32 %36 = @load_ssbo (%25, %35) (access=none, align_mul=4, align_offset=0) con 32 %37 = load_const (0x00000400 = 1024) div 32 %38 = iadd %35, %37 (0x400) div 32 %39 = @load_ssbo (%25, %38) (access=none, align_mul=4, align_offset=0) con 32 %40 = @resource_intel (%23 (0xdeaddead), %29 (0x3), %4 (0x0), %4 (0x0)) (desc_set=0, binding=1, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo) con 32 %41 = load_const (0x00000080 = 128) div 32 %42 = ushr %26, %19 (0x4) con 32 %43 = load_const (0x0000000f = 15) div 32 %44 = iand %26, %43 (0xf) con 32 %45 = load_const (0x00000009) div 32 %46 = ishl %42, %45 (0x9) div 32 %47 = iadd %46, %44 div 32 %48 = iadd3 %18, %20, %47 div 8 %49 = @load_ssbo (%40, %48) (access=none, align_mul=1, align_offset=0) div 32 %50 = iadd %48, %41 (0x80) div 8 %51 = @load_ssbo (%40, %50) (access=none, align_mul=1, align_offset=0) con 32 %52 = load_const (0x00000100 = 256) div 32 %53 = iadd %48, %52 (0x100) div 8 %54 = @load_ssbo (%40, %53) (access=none, align_mul=1, align_offset=0) con 32 %55 = load_const (0x00000180 = 384) div 32 %56 = iadd %48, %55 (0x180) div 8 %57 = @load_ssbo (%40, %56) (access=none, align_mul=1, align_offset=0) div 32 %58 = pack_32_4x8_split %49, %51, %54, %57 div 32 %59 = iadd %48, %37 (0x400) div 8 %60 = @load_ssbo (%40, %59) (access=none, align_mul=1, align_offset=0) con 32 %61 = load_const (0x00000480 = 1152) div 32 %62 = iadd %48, %61 (0x480) div 8 %63 = @load_ssbo (%40, %62) (access=none, align_mul=1, align_offset=0) con 32 %64 = load_const (0x00000500 = 1280) div 32 %65 = iadd %48, %64 (0x500) div 8 %66 = @load_ssbo (%40, %65) (access=none, align_mul=1, align_offset=0) con 32 %67 = load_const (0x00000580 = 1408) div 32 %68 = iadd %48, %67 (0x580) div 8 %69 = @load_ssbo (%40, %68) (access=none, align_mul=1, align_offset=0) div 32 %70 = pack_32_4x8_split %60, %63, %66, %69 con 32 %71 = load_const (0x00000800 = 2048) div 32 %72 = iadd %48, %71 (0x800) div 8 %73 = @load_ssbo (%40, %72) (access=none, align_mul=1, align_offset=0) con 32 %74 = load_const (0x00000880 = 2176) div 32 %75 = iadd %48, %74 (0x880) div 8 %76 = @load_ssbo (%40, %75) (access=none, align_mul=1, align_offset=0) con 32 %77 = load_const (0x00000900 = 2304) div 32 %78 = iadd %48, %77 (0x900) div 8 %79 = @load_ssbo (%40, %78) (access=none, align_mul=1, align_offset=0) con 32 %80 = load_const (0x00000980 = 2432) div 32 %81 = iadd %48, %80 (0x980) div 8 %82 = @load_ssbo (%40, %81) (access=none, align_mul=1, align_offset=0) div 32 %83 = pack_32_4x8_split %73, %76, %79, %82 con 32 %84 = load_const (0x00000c00 = 3072) div 32 %85 = iadd %48, %84 (0xc00) div 8 %86 = @load_ssbo (%40, %85) (access=none, align_mul=1, align_offset=0) con 32 %87 = load_const (0x00000c80 = 3200) div 32 %88 = iadd %48, %87 (0xc80) div 8 %89 = @load_ssbo (%40, %88) (access=none, align_mul=1, align_offset=0) con 32 %90 = load_const (0x00000d00 = 3328) div 32 %91 = iadd %48, %90 (0xd00) div 8 %92 = @load_ssbo (%40, %91) (access=none, align_mul=1, align_offset=0) con 32 %93 = load_const (0x00000d80 = 3456) div 32 %94 = iadd %48, %93 (0xd80) div 8 %95 = @load_ssbo (%40, %94) (access=none, align_mul=1, align_offset=0) div 32 %96 = pack_32_4x8_split %86, %89, %92, %95 con 32 %97 = @resource_intel (%23 (0xdeaddead), %19 (0x4), %4 (0x0), %4 (0x0)) (desc_set=0, binding=2, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo) div 32 %98 = ishl %42, %27 (0x7) div 32 %99 = iadd %98, %44 div 32 %100 = iadd3 %22, %20, %99 div 32 %101 = ishl %100, %24 (0x2) div 32 %102 = @load_ssbo (%97, %101) (access=none, align_mul=4, align_offset=0) div 32 %103 = iadd %101, %37 (0x400) div 32 %104 = @load_ssbo (%97, %103) (access=none, align_mul=4, align_offset=0) div 32 %105 = iadd %101, %71 (0x800) div 32 %106 = @load_ssbo (%97, %105) (access=none, align_mul=4, align_offset=0) div 32 %107 = iadd %101, %84 (0xc00) div 32 %108 = @load_ssbo (%97, %107) (access=none, align_mul=4, align_offset=0) div 32x4 %109 = vec4 %58, %70, %83, %96 div 32x2 %110 = vec2 %36, %39 div 32x4 %111 = vec4 %102, %104, %106, %108 div 32x4 %112 = @dpas_intel (%111, %110, %109) (dest_type=int32, src_type=int8, saturate=0, cmat_signed=A|B|C|Result, systolic_depth=8, repeat_count=8) con 32 %113 = @resource_intel (%23 (0xdeaddead), %15 (0x5), %4 (0x0), %4 (0x0)) (desc_set=0, binding=3, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=REL|AVAILABLE, mem_modes=ssbo) div 32 %114 = mov %112.x @store_ssbo (%114, %113, %101) (wrmask=x, access=none, align_mul=4, align_offset=0) div 32 %115 = mov %112.y @store_ssbo (%115, %113, %103) (wrmask=x, access=none, align_mul=4, align_offset=0) div 32 %116 = mov %112.z @store_ssbo (%116, %113, %105) (wrmask=x, access=none, align_mul=4, align_offset=0) div 32 %117 = mov %112.w @store_ssbo (%117, %113, %107) (wrmask=x, access=none, align_mul=4, align_offset=0) // succs: b1 block b1: } NIR (final form) for compute shader: shader: MESA_SHADER_COMPUTE source_sha1: {0xde452e1a, 0xdfc6e380, 0xa65f6252, 0xf3081c68, 0x6fc6bfb0} internal: false workgroup_size: 64, 2, 1 stage: 5 next_stage: 0 num_ssbos: 4 system_values_read: 0x00000000'00000000'00000102 subgroup_size: 32 divergence_analysis_run: true bit_sizes_int: 0x28 separate_shader: true writes_memory: true ptr_size: 0 inputs: 0 outputs: 0 uniforms: 16 decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #0 (~0, 0, 0) decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #1 (~0, 0, 1) decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #2 (~0, 0, 2) decl_var ssbo INTERP_MODE_NONE restrict readonly writeonly block #3 (~0, 0, 3) decl_function main (0 params) impl main { con block b0: // preds: con 32 %0 = @load_subgroup_id con 32 %1 = load_const (0x00000001) con 32 %2 = iand %0, %1 (0x1) con 32 %3 = ushr %0, %1 (0x1) con 32 %4 = load_const (0x00000000) con 32x3 %5 = @load_uniform (%4 (0x0)) (base=4, range=12, dest_type=uint) con 32x3 %6 = @load_workgroup_id_zero_base con 32 %7 = iadd %6.x, %5.x con 32 %8 = iadd %6.y, %5.y con 32 %9 = ishl %7, %1 (0x1) con 32 %10 = ishl %8, %1 (0x1) con 32 %11 = iadd %9, %2 con 32 %12 = iadd %10, %3 con 32 %13 = load_const (0x0000000b = 11) con 32 %14 = ishl %12, %13 (0xb) con 32 %15 = load_const (0x00000005) con 32 %16 = ishl %11, %15 (0x5) con 32 %17 = load_const (0x0000000c = 12) con 32 %18 = ishl %12, %17 (0xc) con 32 %19 = load_const (0x00000004) con 32 %20 = ishl %11, %19 (0x4) con 32 %21 = load_const (0x0000000a = 10) con 32 %22 = ishl %12, %21 (0xa) con 32 %23 = load_const (0xdeaddead = -6264320714451451904.000000 = -559030611 = 3735936685) con 32 %24 = load_const (0x00000002) con 32 %25 = @resource_intel (%23 (0xdeaddead), %24 (0x2), %4 (0x0), %4 (0x0)) (desc_set=0, binding=0, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo) div 32 %26 = @load_subgroup_invocation con 32 %27 = load_const (0x00000007) div 32 %28 = iand %26, %27 (0x7) con 32 %29 = load_const (0x00000003) div 32 %30 = ushr %26, %29 (0x3) con 32 %31 = load_const (0x00000006) div 32 %32 = ishl %30, %31 (0x6) div 32 %33 = iadd %32, %28 div 32 %34 = ishl %33, %24 (0x2) div 32 %35 = iadd3 %14, %16, %34 div 32 %36 = @load_ssbo (%25, %35) (access=none, align_mul=4, align_offset=0) con 32 %37 = load_const (0x00000400 = 1024) div 32 %38 = iadd %35, %37 (0x400) div 32 %39 = @load_ssbo (%25, %38) (access=none, align_mul=4, align_offset=0) con 32 %40 = @resource_intel (%23 (0xdeaddead), %29 (0x3), %4 (0x0), %4 (0x0)) (desc_set=0, binding=1, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo) con 32 %41 = load_const (0x00000080 = 128) div 32 %42 = ushr %26, %19 (0x4) con 32 %43 = load_const (0x0000000f = 15) div 32 %44 = iand %26, %43 (0xf) con 32 %45 = load_const (0x00000009) div 32 %46 = ishl %42, %45 (0x9) div 32 %47 = iadd %46, %44 div 32 %48 = iadd3 %18, %20, %47 div 8 %49 = @load_ssbo (%40, %48) (access=none, align_mul=1, align_offset=0) div 32 %50 = iadd %48, %41 (0x80) div 8 %51 = @load_ssbo (%40, %50) (access=none, align_mul=1, align_offset=0) con 32 %52 = load_const (0x00000100 = 256) div 32 %53 = iadd %48, %52 (0x100) div 8 %54 = @load_ssbo (%40, %53) (access=none, align_mul=1, align_offset=0) con 32 %55 = load_const (0x00000180 = 384) div 32 %56 = iadd %48, %55 (0x180) div 8 %57 = @load_ssbo (%40, %56) (access=none, align_mul=1, align_offset=0) div 32 %58 = pack_32_4x8_split %49, %51, %54, %57 div 32 %59 = iadd %48, %37 (0x400) div 8 %60 = @load_ssbo (%40, %59) (access=none, align_mul=1, align_offset=0) con 32 %61 = load_const (0x00000480 = 1152) div 32 %62 = iadd %48, %61 (0x480) div 8 %63 = @load_ssbo (%40, %62) (access=none, align_mul=1, align_offset=0) con 32 %64 = load_const (0x00000500 = 1280) div 32 %65 = iadd %48, %64 (0x500) div 8 %66 = @load_ssbo (%40, %65) (access=none, align_mul=1, align_offset=0) con 32 %67 = load_const (0x00000580 = 1408) div 32 %68 = iadd %48, %67 (0x580) div 8 %69 = @load_ssbo (%40, %68) (access=none, align_mul=1, align_offset=0) div 32 %70 = pack_32_4x8_split %60, %63, %66, %69 con 32 %71 = load_const (0x00000800 = 2048) div 32 %72 = iadd %48, %71 (0x800) div 8 %73 = @load_ssbo (%40, %72) (access=none, align_mul=1, align_offset=0) con 32 %74 = load_const (0x00000880 = 2176) div 32 %75 = iadd %48, %74 (0x880) div 8 %76 = @load_ssbo (%40, %75) (access=none, align_mul=1, align_offset=0) con 32 %77 = load_const (0x00000900 = 2304) div 32 %78 = iadd %48, %77 (0x900) div 8 %79 = @load_ssbo (%40, %78) (access=none, align_mul=1, align_offset=0) con 32 %80 = load_const (0x00000980 = 2432) div 32 %81 = iadd %48, %80 (0x980) div 8 %82 = @load_ssbo (%40, %81) (access=none, align_mul=1, align_offset=0) div 32 %83 = pack_32_4x8_split %73, %76, %79, %82 con 32 %84 = load_const (0x00000c00 = 3072) div 32 %85 = iadd %48, %84 (0xc00) div 8 %86 = @load_ssbo (%40, %85) (access=none, align_mul=1, align_offset=0) con 32 %87 = load_const (0x00000c80 = 3200) div 32 %88 = iadd %48, %87 (0xc80) div 8 %89 = @load_ssbo (%40, %88) (access=none, align_mul=1, align_offset=0) con 32 %90 = load_const (0x00000d00 = 3328) div 32 %91 = iadd %48, %90 (0xd00) div 8 %92 = @load_ssbo (%40, %91) (access=none, align_mul=1, align_offset=0) con 32 %93 = load_const (0x00000d80 = 3456) div 32 %94 = iadd %48, %93 (0xd80) div 8 %95 = @load_ssbo (%40, %94) (access=none, align_mul=1, align_offset=0) div 32 %96 = pack_32_4x8_split %86, %89, %92, %95 con 32 %97 = @resource_intel (%23 (0xdeaddead), %19 (0x4), %4 (0x0), %4 (0x0)) (desc_set=0, binding=2, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=ACQ|VISIBLE, mem_modes=ssbo) div 32 %98 = ishl %42, %27 (0x7) div 32 %99 = iadd %98, %44 div 32 %100 = iadd3 %22, %20, %99 div 32 %101 = ishl %100, %24 (0x2) div 32 %102 = @load_ssbo (%97, %101) (access=none, align_mul=4, align_offset=0) div 32 %103 = iadd %101, %37 (0x400) div 32 %104 = @load_ssbo (%97, %103) (access=none, align_mul=4, align_offset=0) div 32 %105 = iadd %101, %71 (0x800) div 32 %106 = @load_ssbo (%97, %105) (access=none, align_mul=4, align_offset=0) div 32 %107 = iadd %101, %84 (0xc00) div 32 %108 = @load_ssbo (%97, %107) (access=none, align_mul=4, align_offset=0) div 32x4 %109 = vec4 %58, %70, %83, %96 div 32x2 %110 = vec2 %36, %39 div 32x4 %111 = vec4 %102, %104, %106, %108 div 32x4 %112 = @dpas_intel (%111, %110, %109) (dest_type=int32, src_type=int8, saturate=0, cmat_signed=A|B|C|Result, systolic_depth=8, repeat_count=8) con 32 %113 = @resource_intel (%23 (0xdeaddead), %15 (0x5), %4 (0x0), %4 (0x0)) (desc_set=0, binding=3, resource_intel=0, resource_block_intel=-1) @barrier (execution_scope=NONE, memory_scope=QUEUE_FAMILY, mem_semantics=REL|AVAILABLE, mem_modes=ssbo) div 32 %114 = mov %112.x @store_ssbo (%114, %113, %101) (wrmask=x, access=none, align_mul=4, align_offset=0) div 32 %115 = mov %112.y @store_ssbo (%115, %113, %103) (wrmask=x, access=none, align_mul=4, align_offset=0) div 32 %116 = mov %112.z @store_ssbo (%116, %113, %105) (wrmask=x, access=none, align_mul=4, align_offset=0) div 32 %117 = mov %112.w @store_ssbo (%117, %113, %107) (wrmask=x, access=none, align_mul=4, align_offset=0) // succs: b1 block b1: } Native code for unnamed compute shader (null) (src_hash 0x00000000) (sha1 441dc88ac1b7e88d214067c26bccfc61ea40341f) SIMD32 shader: 119 instructions. 0 loops. 2284 cycles. 0:0 spills:fills, 22 sends, scheduled with mode top-down. Promoted 0 constants. Compacted 1904 to 1824 bytes (4%) START B0 (2284 cycles) and(1) g112<1>UD g0<0,1,0>UD 0xffffffc0UD { align1 WE_all 1N }; mov(32) g74<1>UD g0.1<0,1,0>UD { align1 compacted }; mov(32) g76<1>UD g0.6<0,1,0>UD { align1 compacted }; and(32) g3<1>UD g0.2<0,1,0>UD 0x000000ffUD { align1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@4 }; send(1) g1UD g112UD nullUD 0x0210d500 0x00000000 ugm MsgDesc: ( load, a32, d32, V16, transpose, L1STATE_L3MOCS dst_len = 2, src0_len = 2, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $0 }; mov(8) g2<1>UW 0x76543210V { align1 WE_all 1Q }; and(32) g5<1>UD g3<8,8,1>UD 0x00000001UD { align1 I@2 }; shr(32) g7<1>UD g3<8,8,1>UD 0x00000001UD { align1 }; add(8) g2.8<1>UW g2<8,8,1>UW 0x0008UW { align1 WE_all 1Q I@3 }; add(16) g2.16<1>UW g2<16,16,1>UW 0x0010UW { align1 WE_all 1H I@1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.dst }; add(32) g9<1>D g74<1,1,0>D g1.1<0,1,0>D { align1 I@7 compacted }; add(32) g11<1>D g76<1,1,0>D g1.2<0,1,0>D { align1 I@7 compacted }; shl(32) g13<1>D g9<8,8,1>D 0x00000001UD { align1 I@2 }; shl(32) g15<1>D g11<8,8,1>D 0x00000001UD { align1 I@2 }; add(32) g17<1>D g13<1,1,0>D g5<1,1,0>D { align1 I@2 compacted }; add(32) g19<1>D g15<1,1,0>D g7<1,1,0>D { align1 I@2 compacted }; shl(32) g23<1>D g17<8,8,1>D 0x00000005UD { align1 I@2 }; shl(32) g27<1>D g17<8,8,1>D 0x00000004UD { align1 }; shl(32) g21<1>D g19<8,8,1>D 0x0000000bUD { align1 I@3 }; shl(32) g25<1>D g19<8,8,1>D 0x0000000cUD { align1 }; shl(32) g29<1>D g19<8,8,1>D 0x0000000aUD { align1 }; sync allwr(1) null<0,1,0>UB { align1 WE_all 1N }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@7 }; send(1) g77UD g0UD nullUD 0x0210151f 0x00000000 ugm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 2, src0_len = 2, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $1.dst }; mov(32) g31<1>D g2<16,16,1>UW { align1 }; and(32) g33<1>UD g31<8,8,1>UD 0x00000007UD { align1 I@1 }; shr(32) g35<1>UD g31<8,8,1>UD 0x00000003UD { align1 }; shl(32) g37<1>D g35<8,8,1>D 0x00000006UD { align1 I@1 }; add(32) g39<1>D g37<1,1,0>D g33<1,1,0>D { align1 I@1 compacted }; shl(32) g41<1>D g39<8,8,1>D 0x00000002UD { align1 I@1 }; add3(32) g87<1>D g21<8,8,1>D g23<8,8,1>D g41<1,1,1>D { align1 I@1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) g69UD g87UD nullUD 0x64201502 0x02000000 ugm MsgDesc: ( load_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 $2 }; add(32) g89<1>D g87<8,8,1>D 1024D { align1 $2.src }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) g71UD g89UD nullUD 0x64201502 0x02000000 ugm MsgDesc: ( load_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 $3 }; sync allwr(1) null<0,1,0>UB { align1 WE_all 1N }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $1.src }; send(1) g78UD g0UD nullUD 0x0210151f 0x00000000 ugm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 2, src0_len = 2, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $4 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $4.dst }; shr(32) g42<1>UD g31<8,8,1>UD 0x00000004UD { align1 }; and(32) g44<1>UD g31<8,8,1>UD 0x0000000fUD { align1 }; shl(32) g46<1>D g42<8,8,1>D 0x00000009UD { align1 I@2 }; add(32) g48<1>D g46<1,1,0>D g44<1,1,0>D { align1 I@1 compacted }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $3.src }; add3(32) g90<1>D g25<8,8,1>D g27<8,8,1>D g48<1,1,1>D { align1 I@1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) g49UD g90UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $5 }; add(32) g92<1>D g90<8,8,1>D 128D { align1 $5.src }; add(32) g94<1>D g90<8,8,1>D 256D { align1 }; add(32) g96<1>D g90<8,8,1>D 384D { align1 }; add(32) g98<1>D g90<8,8,1>D 1024D { align1 }; add(32) g100<1>D g90<8,8,1>D 1152D { align1 }; add(32) g102<1>D g90<8,8,1>D 1280D { align1 }; add(32) g104<1>D g90<8,8,1>D 1408D { align1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@6 }; send(32) g52UD g94UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $6 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@5 }; send(32) g54UD g96UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $7 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@4 }; send(32) g56UD g98UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $8 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@3 }; send(32) g58UD g100UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $9 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@2 }; send(32) g60UD g102UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $10 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) g62UD g104UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $11 }; mov(32) g81<4>UB g49<32,8,4>UB { align1 $5.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) g50UD g92UD nullUD 0x64200900 0x03000000 ugm MsgDesc: ( load, a32, d8u32, V1, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 3 base_offset 0 { align1 $12 }; mov(32) g83<4>UB g56<32,8,4>UB { align1 $8.dst }; mov(32) g83.1<4>UB g58<32,8,4>UB { align1 @1 $9.dst }; mov(32) g83.2<4>UB g60<32,8,4>UB { align1 @1 $10.dst }; mov(32) g83.3<4>UB g62<32,8,4>UB { align1 @1 $11.dst }; mov(32) g81.1<4>UB g50<32,8,4>UB { align1 @5 $12.dst }; mov(32) g81.2<4>UB g52<32,8,4>UB { align1 @1 $6.dst }; mov(32) g81.3<4>UB g54<32,8,4>UB { align1 @1 $7.dst }; sync allwr(1) null<0,1,0>UB { align1 WE_all 1N }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $4.src }; send(1) g79UD g0UD nullUD 0x0210151f 0x00000000 ugm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 2, src0_len = 2, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $13 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $13.dst }; shl(32) g63<1>D g42<8,8,1>D 0x00000007UD { align1 }; add(32) g65<1>D g63<1,1,0>D g44<1,1,0>D { align1 I@1 compacted }; add3(32) g67<1>D g29<8,8,1>D g27<8,8,1>D g65<1,1,1>D { align1 I@1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $11.src }; shl(32) g105<1>D g67<8,8,1>D 0x00000002UD { align1 I@1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) g85UD g105UD nullUD 0x64201502 0x04000000 ugm MsgDesc: ( load_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 4 base_offset 0 { align1 $14 }; add(32) g107<1>D g105<8,8,1>D 1024D { align1 $14.src }; add(32) g109<1>D g105<8,8,1>D 2048D { align1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.src }; add(32) g111<1>D g105<8,8,1>D 3072D { align1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@3 }; send(32) g87UD g107UD nullUD 0x64201502 0x04000000 ugm MsgDesc: ( load_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 4, src0_len = 4, src1_len = 0 bti ) BTI 4 base_offset 0 { align1 $15 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $14.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $15.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $2.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $3.dst }; dpas.8x8(16) g73<1>D g85<1,1,0>D g81<1,1,0>B g69<1,1,0>B { align1 WE_all 1H I@7 $0 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $13.src }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.src }; send(1) g86UD g0UD nullUD 0x0210151f 0x00000000 ugm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 2, src0_len = 2, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $1 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $1.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.dst }; send(32) nullUD g105UD g73UD 0x64001506 0x05000080 ugm MsgDesc: ( store_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 0, src0_len = 4, src1_len = 4 bti ) BTI 5 base_offset 0 { align1 $2 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $15.src }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.dst }; send(32) nullUD g107UD g75UD 0x64001506 0x05000080 ugm MsgDesc: ( store_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 0, src0_len = 4, src1_len = 4 bti ) BTI 5 base_offset 0 { align1 $3 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@2 }; send(32) nullUD g109UD g77UD 0x64001506 0x05000080 ugm MsgDesc: ( store_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 0, src0_len = 4, src1_len = 4 bti ) BTI 5 base_offset 0 { align1 $4 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N $0.dst }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@1 }; send(32) nullUD g111UD g79UD 0x64001506 0x05000080 ugm MsgDesc: ( store_cmask, a32, d32, x, L1STATE_L3MOCS dst_len = 0, src0_len = 4, src1_len = 4 bti ) BTI 5 base_offset 0 { align1 $5 }; mov(16) g126<1>UD g0<1,1,0>UD { align1 WE_all 1H $1.src compacted }; send(32) nullUD g126UD nullUD 0x02000000 0x00000000 gateway MsgDesc: (open) mlen 2 ex_mlen 0 rlen 0 { align1 WE_all I@1 EOT }; END B0 ***** **** *** ** * FATAL * ** *** **** ***** CheckDependencyReg>Warning: (W) send.ugm (1|M0) r86 r0 null:0 0x0 0x0210151F {$1} // wr:1+0, rd:1; fence.ugm.evict.tile Prior OOO instruction read r86 without setting a SBID. Current instruction does not have a source dependency. Requires setting SBID {$<#>} on prior instruction and source dependency {$<#>.src} on this instruction. ***** **** *** ** END FATAL ** *** **** ***** Fail TaskNodeAsync failed with exception, what(): MsgStream::Terminate with code -1 FATAL ../../../Cobalt/framework/lib/fs_scheduler/FsStateMachine.cpp:625 (in function Crank) [COBALT_CORE] FsScheduler task terminated with exception ======================================================= Terminating with exception: Terminate due to CB_CHECK/CB_FATAL log message : FsScheduler task terminated with exception ======================================================= Modeling time at the end of simulation: 664771600 picoseconds Num default clock ticks: 1661929 Hardware[group] : CommandStreamers[group] : tile_0.m_gt_core.DvRender.rcs_adapter : time: 0.07998 sec. , clocks: 92 , task state: exception tile_0.m_gt_core.DvRender.m_ccs_adapters_0 : time: 0.00194 sec. , clocks: 8 , task state: idle tile_0.m_gt_core.blitterPipelineModule_0.BCS : time: 0.000446 sec. , clocks: 5 , task state: idle tile_0.m_gt_core.blitterPipelineModule_8.BCS : time: 0 sec. , clocks: 0 , task state: idle UncoreGroup[group] : GuC DMA : time: 0 sec. , clocks: 0 , task state: idle GuC_SCHED : time: 0.002777 sec. , clocks: 31938 , task state: active MGSR coroutine task : time: 0 sec. , clocks: 0 , task state: idle PowerManagement[group] : PM : time: 0 sec. , clocks: 0 , task state: idle standalone_media[group] : CommandStreamers[group] : VDBOX_CS_0_0 : time: 0 sec. , clocks: 0 , task state: idle VEBOX_CS_0_0 : time: 0 sec. , clocks: 0 , task state: idle standalone_media.m_standalone_media.GscPipe.GSCCS : time: 0 sec. , clocks: 0 , task state: idle UncoreGroup[group] : GuC DMA : time: 0 sec. , clocks: 0 , task state: idle GuC_SCHED : time: 0 sec. , clocks: 0 , task state: idle MGSR coroutine task : time: 0 sec. , clocks: 0 , task state: idle PowerManagement[group] : PM : time: 0 sec. , clocks: 0 , task state: idle Software[group] : Software Main Task : time: 0.09209 sec. , clocks: 31960 , task state: active Differed MMIO Writer : time: 0.000432 sec. , clocks: 33 , task state: idle Main State Machine: - software active clocks: 31993 - hardware active clocks: 32043 Overall active: 64036 Total context switches: 1661930 - software user time: 0.092523 sec. - hardware user time: 0.085145 sec. INFO [COBALT_CORE] UserTime: 1.19 seconds INFO [COBALT_CORE] SystemTime: 0.27 seconds INFO [COBALT_CORE] WallClockTime: 1.54799 seconds INFO [COBALT_CORE] Max VM Usage (VmPeak / PeakPagefileUsage): 1344168 KB INFO [COBALT_CORE] Max VM Residence (VmHWM / PeakWorkingSetSize): 1116684 KB Unhandled exception Releasing GSCstub... INFO : CFE[0] did not receive all EOTs at end of test DSS[0] has 4 threads pending INFO [tile_0.m_gt_core.DvRender.ComputeRouter.m_cfeg.CfegLoadManager][ 664771600 ps | 1661929 clk ] CFEG did not receive all EOTs at end of test:16 threads pending INFO [tile_0.m_gt_core.DvRender.ComputeRouter.m_cfeg.CfegLoadManager][ 664771600 ps | 1661929 clk ] Simulation ended before pending threads have retired.Please check the log for Unhandled Exceptions sim-drm: timeout waiting for interrupt Batch offset=0x0 len=0x0 on queue 0 (aperture: 2.8Mb, 0.0Mb VRAM only) BO: addr=0xffffeffeffff0000-0xffffeffeffff1fff size= 8KB handle=00009 capture=1 vram_only=0 name=workaround BO: addr=0xffffeffefff50000-0xffffeffefff50fff size= 4KB handle=00022 capture=0 vram_only=1 name=bo-sync BO: addr=0x00000001bfff0000-0x00000001bfff0fff size= 4KB handle=00020 capture=1 vram_only=0 name=direct surfaces BO: addr=0xffffeffefff70000-0xffffeffefff70fff size= 4KB handle=00019 capture=0 vram_only=0 name=user BO: addr=0x0000000300000000-0x000000030003ffff size= 256KB handle=00004 capture=1 vram_only=0 name=instruction pool BO: addr=0x00000000c0000000-0x00000000c00fffff size= 1024KB handle=00007 capture=1 vram_only=0 name=binding table pool BO: addr=0x0000000100000000-0x000000010000ffff size= 64KB handle=00005 capture=1 vram_only=0 name=scratch surface state pool BO: addr=0x0000000100800000-0x000000010080ffff size= 64KB handle=00006 capture=1 vram_only=0 name=internal surface state pool BO: addr=0x0000000200000000-0x000000020003ffff size= 256KB handle=00002 capture=1 vram_only=0 name=dynamic pool BO: addr=0x0000000200040000-0x000000020007ffff size= 256KB handle=00012 capture=1 vram_only=0 name=dynamic pool BO: addr=0x00000003c0000000-0x00000003c003ffff size= 256KB handle=00003 capture=1 vram_only=0 name=dynamic pool (db) BO: addr=0x00000003c0040000-0x00000003c007ffff size= 256KB handle=00013 capture=1 vram_only=0 name=dynamic pool (db) BO: addr=0x0000000000200000-0x000000000023ffff size= 256KB handle=00001 capture=1 vram_only=0 name=general pool BO: addr=0x0000000480000000-0x000000048000ffff size= 64KB handle=00008 capture=1 vram_only=0 name=push descriptor buffer state pool BO: addr=0xffffeffefffb0000-0xffffeffefffb3fff size= 16KB handle=00015 capture=0 vram_only=0 name=user BO: addr=0xffffeffefffa0000-0xffffeffefffa7fff size= 32KB handle=00016 capture=0 vram_only=0 name=user BO: addr=0xffffeffefff90000-0xffffeffefff97fff size= 32KB handle=00017 capture=0 vram_only=0 name=user BO: addr=0xffffeffefff80000-0xffffeffefff87fff size= 32KB handle=00018 capture=0 vram_only=0 name=user BO: addr=0xffffeffefff60000-0xffffeffefff61fff size= 8KB handle=00021 capture=1 vram_only=0 name=batch Fail (vk.queueSubmit(queue, 1u, &submitInfo, *fence): VK_ERROR_DEVICE_LOST at vkCmdUtil.cpp:410) DONE! Test run totals: Passed: 0/1 (0.0%) Failed: 1/1 (100.0%) Not supported: 0/1 (0.0%) Warnings: 0/1 (0.0%) Waived: 0/1 (0.0%)