jit vs 0000 mul(8) g12<1>F g4<8,8,1>F g2<0,1,0>F { align1 1Q }; 0008 mul(8) g13<1>F g4<8,8,1>F g2.1<0,1,0>F { align1 1Q }; 0010 mul(8) g14<1>F g4<8,8,1>F g2.2<0,1,0>F { align1 1Q }; 0018 mul(8) g15<1>F g4<8,8,1>F g2.3<0,1,0>F { align1 1Q }; 0020 mov.sat(8) g124<1>F g8<8,8,1>F { align1 1Q }; 0028 mov.sat(8) g125<1>F g9<8,8,1>F { align1 1Q }; 0030 mov.sat(8) g126<1>F g10<8,8,1>F { align1 1Q }; 0038 mov.sat(8) g127<1>F g11<8,8,1>F { align1 1Q }; 0040 mov(8) g119<1>UD g1<8,8,1>UD { align1 WE_all 1Q }; 0048 mad(8) g16<1>F g12<4,4,1>F g2.4<0,1,0>F g5<4,4,1>F { align16 1Q }; 0058 mad(8) g17<1>F g13<4,4,1>F g2.5<0,1,0>F g5<4,4,1>F { align16 1Q }; 0068 mad(8) g18<1>F g14<4,4,1>F g2.6<0,1,0>F g5<4,4,1>F { align16 1Q }; 0078 mad(8) g19<1>F g15<4,4,1>F g2.7<0,1,0>F g5<4,4,1>F { align16 1Q }; 0088 mad(8) g20<1>F g16<4,4,1>F g3.0<0,1,0>F g6<4,4,1>F { align16 1Q }; 0098 mad(8) g21<1>F g17<4,4,1>F g3.1<0,1,0>F g6<4,4,1>F { align16 1Q }; 00a8 mad(8) g22<1>F g18<4,4,1>F g3.2<0,1,0>F g6<4,4,1>F { align16 1Q }; 00b8 mad(8) g23<1>F g19<4,4,1>F g3.3<0,1,0>F g6<4,4,1>F { align16 1Q }; 00c8 mad(8) g120<1>F g20<4,4,1>F g3.4<0,1,0>F g7<4,4,1>F { align16 1Q }; 00d8 mad(8) g121<1>F g21<4,4,1>F g3.5<0,1,0>F g7<4,4,1>F { align16 1Q }; 00e8 mad(8) g122<1>F g22<4,4,1>F g3.6<0,1,0>F g7<4,4,1>F { align16 1Q }; 00f8 mad(8) g123<1>F g23<4,4,1>F g3.7<0,1,0>F g7<4,4,1>F { align16 1Q }; 0108 send(8) null<1>F g119<8,8,1>F urb 1 SIMD8 write mlen 9 rlen 0 { align1 1Q EOT }; # --- initial codegen # load constants r1 = load_region g307.0<8,8,1>4 store_region, r1, g2.0<8,8,1>4 r3 = load_region g308.0<8,8,1>4 store_region, r3, g3.0<8,8,1>4 # vertex fetch r6 = load_region g172.0<8,8,1>4 # vertex fetch: ve 0: offset 0, pitch 24, format 64 r8 = shli r6, 1 r9 = addd r8, r6 r10 = shli r9, 3 r11 = load_region g130.0<8,8,1>4 r12 = gather r11, 0(0x7fcd4d66f00c,r10,1) r13 = load_region g130.0<8,8,1>4 r14 = gather r13, 4(0x7fcd4d66f00c,r10,1) r15 = load_region g130.0<8,8,1>4 r16 = gather r15, 8(0x7fcd4d66f00c,r10,1) r17 = imm 1065353216d 1.000000f store_region, r12, g175.0<8,8,1>4 store_region, r14, g176.0<8,8,1>4 store_region, r16, g177.0<8,8,1>4 r21 = imm 1065353216d 1.000000f store_region, r21, g178.0<8,8,1>4 # vertex fetch: ve 1: offset 0, pitch 24, format 64 r24 = shli r6, 1 r25 = addd r24, r6 r26 = shli r25, 3 r27 = load_region g130.0<8,8,1>4 r28 = gather r27, 0(0x7fcd4d66f000,r26,1) r29 = load_region g130.0<8,8,1>4 r30 = gather r29, 4(0x7fcd4d66f000,r26,1) r31 = load_region g130.0<8,8,1>4 r32 = gather r31, 8(0x7fcd4d66f000,r26,1) r33 = imm 1065353216d 1.000000f store_region, r28, g179.0<8,8,1>4 store_region, r30, g180.0<8,8,1>4 store_region, r32, g181.0<8,8,1>4 r37 = imm 1065353216d 1.000000f store_region, r37, g182.0<8,8,1>4 # copy vue r40 = load_region g175.0<8,8,1>4 store_region, r40, g4.0<8,8,1>4 r42 = load_region g176.0<8,8,1>4 store_region, r42, g5.0<8,8,1>4 r44 = load_region g177.0<8,8,1>4 store_region, r44, g6.0<8,8,1>4 r46 = load_region g178.0<8,8,1>4 store_region, r46, g7.0<8,8,1>4 r48 = load_region g179.0<8,8,1>4 store_region, r48, g8.0<8,8,1>4 r50 = load_region g180.0<8,8,1>4 store_region, r50, g9.0<8,8,1>4 r52 = load_region g181.0<8,8,1>4 store_region, r52, g10.0<8,8,1>4 r54 = load_region g182.0<8,8,1>4 store_region, r54, g11.0<8,8,1>4 # eu vs r57 = load_region g4.0<8,8,1>4 r58 = load_region g2.0<0,1,0>4 r59 = mulf r57, r58 store_region, r59, g12.0<8,8,1>4 r61 = load_region g4.0<8,8,1>4 r62 = load_region g2.4<0,1,0>4 r63 = mulf r61, r62 store_region, r63, g13.0<8,8,1>4 r65 = load_region g4.0<8,8,1>4 r66 = load_region g2.8<0,1,0>4 r67 = mulf r65, r66 store_region, r67, g14.0<8,8,1>4 r69 = load_region g4.0<8,8,1>4 r70 = load_region g2.12<0,1,0>4 r71 = mulf r69, r70 store_region, r71, g15.0<8,8,1>4 r73 = load_region g8.0<8,8,1>4 r74 = imm 0d 0.000000f r75 = imm 1065353216d 1.000000f r76 = maxf r73, r74 r77 = minf r76, r75 store_region, r77, g124.0<8,8,1>4 r79 = load_region g9.0<8,8,1>4 r80 = imm 0d 0.000000f r81 = imm 1065353216d 1.000000f r82 = maxf r79, r80 r83 = minf r82, r81 store_region, r83, g125.0<8,8,1>4 r85 = load_region g10.0<8,8,1>4 r86 = imm 0d 0.000000f r87 = imm 1065353216d 1.000000f r88 = maxf r85, r86 r89 = minf r88, r87 store_region, r89, g126.0<8,8,1>4 r91 = load_region g11.0<8,8,1>4 r92 = imm 0d 0.000000f r93 = imm 1065353216d 1.000000f r94 = maxf r91, r92 r95 = minf r94, r93 store_region, r95, g127.0<8,8,1>4 r97 = load_region g1.0<8,8,1>4 store_region, r97, g119.0<8,8,1>4 r99 = load_region g12.0<8,8,1>4 r100 = load_region g2.16<0,1,0>4 r101 = load_region g5.0<8,8,1>4 r102 = maddf r100, r101, r99 store_region, r102, g16.0<8,8,1>4 r104 = load_region g13.0<8,8,1>4 r105 = load_region g2.20<0,1,0>4 r106 = load_region g5.0<8,8,1>4 r107 = maddf r105, r106, r104 store_region, r107, g17.0<8,8,1>4 r109 = load_region g14.0<8,8,1>4 r110 = load_region g2.24<0,1,0>4 r111 = load_region g5.0<8,8,1>4 r112 = maddf r110, r111, r109 store_region, r112, g18.0<8,8,1>4 r114 = load_region g15.0<8,8,1>4 r115 = load_region g2.28<0,1,0>4 r116 = load_region g5.0<8,8,1>4 r117 = maddf r115, r116, r114 store_region, r117, g19.0<8,8,1>4 r119 = load_region g16.0<8,8,1>4 r120 = load_region g3.0<0,1,0>4 r121 = load_region g6.0<8,8,1>4 r122 = maddf r120, r121, r119 store_region, r122, g20.0<8,8,1>4 r124 = load_region g17.0<8,8,1>4 r125 = load_region g3.4<0,1,0>4 r126 = load_region g6.0<8,8,1>4 r127 = maddf r125, r126, r124 store_region, r127, g21.0<8,8,1>4 r129 = load_region g18.0<8,8,1>4 r130 = load_region g3.8<0,1,0>4 r131 = load_region g6.0<8,8,1>4 r132 = maddf r130, r131, r129 store_region, r132, g22.0<8,8,1>4 r134 = load_region g19.0<8,8,1>4 r135 = load_region g3.12<0,1,0>4 r136 = load_region g6.0<8,8,1>4 r137 = maddf r135, r136, r134 store_region, r137, g23.0<8,8,1>4 r139 = load_region g20.0<8,8,1>4 r140 = load_region g3.16<0,1,0>4 r141 = load_region g7.0<8,8,1>4 r142 = maddf r140, r141, r139 store_region, r142, g120.0<8,8,1>4 r144 = load_region g21.0<8,8,1>4 r145 = load_region g3.20<0,1,0>4 r146 = load_region g7.0<8,8,1>4 r147 = maddf r145, r146, r144 store_region, r147, g121.0<8,8,1>4 r149 = load_region g22.0<8,8,1>4 r150 = load_region g3.24<0,1,0>4 r151 = load_region g7.0<8,8,1>4 r152 = maddf r150, r151, r149 store_region, r152, g122.0<8,8,1>4 r154 = load_region g23.0<8,8,1>4 r155 = load_region g3.28<0,1,0>4 r156 = load_region g7.0<8,8,1>4 r157 = maddf r155, r156, r154 store_region, r157, g123.0<8,8,1>4 # urb write: length 8, offset 1 r160 = load_region g120.0<8,8,1>4 store_region, r160, g179.0<8,8,1>4 r162 = load_region g121.0<8,8,1>4 store_region, r162, g180.0<8,8,1>4 r164 = load_region g122.0<8,8,1>4 store_region, r164, g181.0<8,8,1>4 r166 = load_region g123.0<8,8,1>4 store_region, r166, g182.0<8,8,1>4 r168 = load_region g124.0<8,8,1>4 store_region, r168, g183.0<8,8,1>4 r170 = load_region g125.0<8,8,1>4 store_region, r170, g184.0<8,8,1>4 r172 = load_region g126.0<8,8,1>4 store_region, r172, g185.0<8,8,1>4 r174 = load_region g127.0<8,8,1>4 store_region, r174, g186.0<8,8,1>4 # perspective divide r177 = load_region g182.0<8,8,1>4 r178 = rcp r177 r179 = imm 1073741824d 2.000000f r180 = nmaddf r177, r178, r179 r181 = mulf r178, r180 r182 = load_region g179.0<8,8,1>4 r183 = mulf r182, r181 store_region, r183, g179.0<8,8,1>4 r185 = load_region g180.0<8,8,1>4 r186 = mulf r185, r181 store_region, r186, g180.0<8,8,1>4 r188 = load_region g181.0<8,8,1>4 r189 = mulf r188, r181 store_region, r189, g181.0<8,8,1>4 store_region, r181, g182.0<8,8,1>4 # clip tests r193 = load_region g173.8<0,1,0>4 r194 = load_region g173.16<0,1,0>4 r195 = load_region g173.12<0,1,0>4 r196 = load_region g173.20<0,1,0>4 r197 = load_region g179.0<8,8,1>4 r198 = load_region g180.0<8,8,1>4 r199 = cmp r193, r197, op 1 r200 = cmp r194, r197, op 14 r201 = cmp r195, r198, op 1 r202 = cmp r196, r198, op 14 r203 = or r199, r200 r204 = or r201, r202 r205 = or r203, r204 store_region, r205, g175.0<8,8,1>4 # viewport transform r208 = load_region g173.24<0,1,0>4 r209 = load_region g173.28<0,1,0>4 r210 = load_region g174.0<0,1,0>4 r211 = load_region g174.4<0,1,0>4 r212 = load_region g174.8<0,1,0>4 r213 = load_region g174.12<0,1,0>4 r214 = load_region g179.0<8,8,1>4 r215 = load_region g180.0<8,8,1>4 r216 = load_region g181.0<8,8,1>4 r217 = maddf r214, r208, r211 r218 = maddf r215, r209, r212 r219 = maddf r216, r210, r213 store_region, r217, g179.0<8,8,1>4 store_region, r218, g180.0<8,8,1>4 store_region, r219, g181.0<8,8,1>4 eot # --- after copy propatation # load constants r1 = load_region g307.0<8,8,1>4 store_region, r1, g2.0<8,8,1>4 r3 = load_region g308.0<8,8,1>4 store_region, r3, g3.0<8,8,1>4 # vertex fetch r6 = load_region g172.0<8,8,1>4 # vertex fetch: ve 0: offset 0, pitch 24, format 64 r8 = shli r6, 1 r9 = addd r8, r6 r10 = shli r9, 3 r11 = load_region g130.0<8,8,1>4 r12 = gather r11, 0(0x7fcd4d66f00c,r10,1) r13 = load_region g130.0<8,8,1>4 r14 = gather r13, 4(0x7fcd4d66f00c,r10,1) r15 = load_region g130.0<8,8,1>4 r16 = gather r15, 8(0x7fcd4d66f00c,r10,1) r17 = imm 1065353216d 1.000000f store_region, r12, g175.0<8,8,1>4 store_region, r14, g176.0<8,8,1>4 store_region, r16, g177.0<8,8,1>4 r21 = imm 1065353216d 1.000000f store_region, r21, g178.0<8,8,1>4 # vertex fetch: ve 1: offset 0, pitch 24, format 64 r24 = shli r6, 1 r25 = addd r24, r6 r26 = shli r25, 3 r27 = load_region g130.0<8,8,1>4 r28 = gather r27, 0(0x7fcd4d66f000,r26,1) r29 = load_region g130.0<8,8,1>4 r30 = gather r29, 4(0x7fcd4d66f000,r26,1) r31 = load_region g130.0<8,8,1>4 r32 = gather r31, 8(0x7fcd4d66f000,r26,1) r33 = imm 1065353216d 1.000000f store_region, r28, g179.0<8,8,1>4 store_region, r30, g180.0<8,8,1>4 store_region, r32, g181.0<8,8,1>4 r37 = imm 1065353216d 1.000000f store_region, r37, g182.0<8,8,1>4 # copy vue r40 = load_region g175.0<8,8,1>4 store_region, r12, g4.0<8,8,1>4 r42 = load_region g176.0<8,8,1>4 store_region, r14, g5.0<8,8,1>4 r44 = load_region g177.0<8,8,1>4 store_region, r16, g6.0<8,8,1>4 r46 = load_region g178.0<8,8,1>4 store_region, r21, g7.0<8,8,1>4 r48 = load_region g179.0<8,8,1>4 store_region, r28, g8.0<8,8,1>4 r50 = load_region g180.0<8,8,1>4 store_region, r30, g9.0<8,8,1>4 r52 = load_region g181.0<8,8,1>4 store_region, r32, g10.0<8,8,1>4 r54 = load_region g182.0<8,8,1>4 store_region, r37, g11.0<8,8,1>4 # eu vs r57 = load_region g4.0<8,8,1>4 r58 = load_region g2.0<0,1,0>4 r59 = mulf r12, r58 store_region, r59, g12.0<8,8,1>4 r61 = load_region g4.0<8,8,1>4 r62 = load_region g2.4<0,1,0>4 r63 = mulf r12, r62 store_region, r63, g13.0<8,8,1>4 r65 = load_region g4.0<8,8,1>4 r66 = load_region g2.8<0,1,0>4 r67 = mulf r12, r66 store_region, r67, g14.0<8,8,1>4 r69 = load_region g4.0<8,8,1>4 r70 = load_region g2.12<0,1,0>4 r71 = mulf r12, r70 store_region, r71, g15.0<8,8,1>4 r73 = load_region g8.0<8,8,1>4 r74 = imm 0d 0.000000f r75 = imm 1065353216d 1.000000f r76 = maxf r28, r74 r77 = minf r76, r75 store_region, r77, g124.0<8,8,1>4 r79 = load_region g9.0<8,8,1>4 r80 = imm 0d 0.000000f r81 = imm 1065353216d 1.000000f r82 = maxf r30, r80 r83 = minf r82, r81 store_region, r83, g125.0<8,8,1>4 r85 = load_region g10.0<8,8,1>4 r86 = imm 0d 0.000000f r87 = imm 1065353216d 1.000000f r88 = maxf r32, r86 r89 = minf r88, r87 store_region, r89, g126.0<8,8,1>4 r91 = load_region g11.0<8,8,1>4 r92 = imm 0d 0.000000f r93 = imm 1065353216d 1.000000f r94 = maxf r37, r92 r95 = minf r94, r93 store_region, r95, g127.0<8,8,1>4 r97 = load_region g1.0<8,8,1>4 store_region, r97, g119.0<8,8,1>4 r99 = load_region g12.0<8,8,1>4 r100 = load_region g2.16<0,1,0>4 r101 = load_region g5.0<8,8,1>4 r102 = maddf r100, r14, r59 store_region, r102, g16.0<8,8,1>4 r104 = load_region g13.0<8,8,1>4 r105 = load_region g2.20<0,1,0>4 r106 = load_region g5.0<8,8,1>4 r107 = maddf r105, r14, r63 store_region, r107, g17.0<8,8,1>4 r109 = load_region g14.0<8,8,1>4 r110 = load_region g2.24<0,1,0>4 r111 = load_region g5.0<8,8,1>4 r112 = maddf r110, r14, r67 store_region, r112, g18.0<8,8,1>4 r114 = load_region g15.0<8,8,1>4 r115 = load_region g2.28<0,1,0>4 r116 = load_region g5.0<8,8,1>4 r117 = maddf r115, r14, r71 store_region, r117, g19.0<8,8,1>4 r119 = load_region g16.0<8,8,1>4 r120 = load_region g3.0<0,1,0>4 r121 = load_region g6.0<8,8,1>4 r122 = maddf r120, r16, r102 store_region, r122, g20.0<8,8,1>4 r124 = load_region g17.0<8,8,1>4 r125 = load_region g3.4<0,1,0>4 r126 = load_region g6.0<8,8,1>4 r127 = maddf r125, r16, r107 store_region, r127, g21.0<8,8,1>4 r129 = load_region g18.0<8,8,1>4 r130 = load_region g3.8<0,1,0>4 r131 = load_region g6.0<8,8,1>4 r132 = maddf r130, r16, r112 store_region, r132, g22.0<8,8,1>4 r134 = load_region g19.0<8,8,1>4 r135 = load_region g3.12<0,1,0>4 r136 = load_region g6.0<8,8,1>4 r137 = maddf r135, r16, r117 store_region, r137, g23.0<8,8,1>4 r139 = load_region g20.0<8,8,1>4 r140 = load_region g3.16<0,1,0>4 r141 = load_region g7.0<8,8,1>4 r142 = maddf r140, r21, r122 store_region, r142, g120.0<8,8,1>4 r144 = load_region g21.0<8,8,1>4 r145 = load_region g3.20<0,1,0>4 r146 = load_region g7.0<8,8,1>4 r147 = maddf r145, r21, r127 store_region, r147, g121.0<8,8,1>4 r149 = load_region g22.0<8,8,1>4 r150 = load_region g3.24<0,1,0>4 r151 = load_region g7.0<8,8,1>4 r152 = maddf r150, r21, r132 store_region, r152, g122.0<8,8,1>4 r154 = load_region g23.0<8,8,1>4 r155 = load_region g3.28<0,1,0>4 r156 = load_region g7.0<8,8,1>4 r157 = maddf r155, r21, r137 store_region, r157, g123.0<8,8,1>4 # urb write: length 8, offset 1 r160 = load_region g120.0<8,8,1>4 store_region, r142, g179.0<8,8,1>4 r162 = load_region g121.0<8,8,1>4 store_region, r147, g180.0<8,8,1>4 r164 = load_region g122.0<8,8,1>4 store_region, r152, g181.0<8,8,1>4 r166 = load_region g123.0<8,8,1>4 store_region, r157, g182.0<8,8,1>4 r168 = load_region g124.0<8,8,1>4 store_region, r77, g183.0<8,8,1>4 r170 = load_region g125.0<8,8,1>4 store_region, r83, g184.0<8,8,1>4 r172 = load_region g126.0<8,8,1>4 store_region, r89, g185.0<8,8,1>4 r174 = load_region g127.0<8,8,1>4 store_region, r95, g186.0<8,8,1>4 # perspective divide r177 = load_region g182.0<8,8,1>4 r178 = rcp r157 r179 = imm 1073741824d 2.000000f r180 = nmaddf r157, r178, r179 r181 = mulf r178, r180 r182 = load_region g179.0<8,8,1>4 r183 = mulf r142, r181 store_region, r183, g179.0<8,8,1>4 r185 = load_region g180.0<8,8,1>4 r186 = mulf r147, r181 store_region, r186, g180.0<8,8,1>4 r188 = load_region g181.0<8,8,1>4 r189 = mulf r152, r181 store_region, r189, g181.0<8,8,1>4 store_region, r181, g182.0<8,8,1>4 # clip tests r193 = load_region g173.8<0,1,0>4 r194 = load_region g173.16<0,1,0>4 r195 = load_region g173.12<0,1,0>4 r196 = load_region g173.20<0,1,0>4 r197 = load_region g179.0<8,8,1>4 r198 = load_region g180.0<8,8,1>4 r199 = cmp r193, r183, op 1 r200 = cmp r194, r183, op 14 r201 = cmp r195, r186, op 1 r202 = cmp r196, r186, op 14 r203 = or r199, r200 r204 = or r201, r202 r205 = or r203, r204 store_region, r205, g175.0<8,8,1>4 # viewport transform r208 = load_region g173.24<0,1,0>4 r209 = load_region g173.28<0,1,0>4 r210 = load_region g174.0<0,1,0>4 r211 = load_region g174.4<0,1,0>4 r212 = load_region g174.8<0,1,0>4 r213 = load_region g174.12<0,1,0>4 r214 = load_region g179.0<8,8,1>4 r215 = load_region g180.0<8,8,1>4 r216 = load_region g181.0<8,8,1>4 r217 = maddf r183, r208, r211 r218 = maddf r186, r209, r212 r219 = maddf r189, r210, r213 store_region, r217, g179.0<8,8,1>4 store_region, r218, g180.0<8,8,1>4 store_region, r219, g181.0<8,8,1>4 eot # --- after dce # load constants r1 = load_region g307.0<8,8,1>4 store_region, r1, g2.0<8,8,1>4 r3 = load_region g308.0<8,8,1>4 store_region, r3, g3.0<8,8,1>4 # vertex fetch r6 = load_region g172.0<8,8,1>4 # vertex fetch: ve 0: offset 0, pitch 24, format 64 r8 = shli r6, 1 r9 = addd r8, r6 r10 = shli r9, 3 r11 = load_region g130.0<8,8,1>4 r12 = gather r11, 0(0x7fcd4d66f00c,r10,1) r13 = load_region g130.0<8,8,1>4 r14 = gather r13, 4(0x7fcd4d66f00c,r10,1) r15 = load_region g130.0<8,8,1>4 r16 = gather r15, 8(0x7fcd4d66f00c,r10,1) r21 = imm 1065353216d 1.000000f # vertex fetch: ve 1: offset 0, pitch 24, format 64 r24 = shli r6, 1 r25 = addd r24, r6 r26 = shli r25, 3 r27 = load_region g130.0<8,8,1>4 r28 = gather r27, 0(0x7fcd4d66f000,r26,1) r29 = load_region g130.0<8,8,1>4 r30 = gather r29, 4(0x7fcd4d66f000,r26,1) r31 = load_region g130.0<8,8,1>4 r32 = gather r31, 8(0x7fcd4d66f000,r26,1) r37 = imm 1065353216d 1.000000f # copy vue # eu vs r58 = load_region g2.0<0,1,0>4 r59 = mulf r12, r58 r62 = load_region g2.4<0,1,0>4 r63 = mulf r12, r62 r66 = load_region g2.8<0,1,0>4 r67 = mulf r12, r66 r70 = load_region g2.12<0,1,0>4 r71 = mulf r12, r70 r74 = imm 0d 0.000000f r75 = imm 1065353216d 1.000000f r76 = maxf r28, r74 r77 = minf r76, r75 r80 = imm 0d 0.000000f r81 = imm 1065353216d 1.000000f r82 = maxf r30, r80 r83 = minf r82, r81 r86 = imm 0d 0.000000f r87 = imm 1065353216d 1.000000f r88 = maxf r32, r86 r89 = minf r88, r87 r92 = imm 0d 0.000000f r93 = imm 1065353216d 1.000000f r94 = maxf r37, r92 r95 = minf r94, r93 r100 = load_region g2.16<0,1,0>4 r102 = maddf r100, r14, r59 r105 = load_region g2.20<0,1,0>4 r107 = maddf r105, r14, r63 r110 = load_region g2.24<0,1,0>4 r112 = maddf r110, r14, r67 r115 = load_region g2.28<0,1,0>4 r117 = maddf r115, r14, r71 r120 = load_region g3.0<0,1,0>4 r122 = maddf r120, r16, r102 r125 = load_region g3.4<0,1,0>4 r127 = maddf r125, r16, r107 r130 = load_region g3.8<0,1,0>4 r132 = maddf r130, r16, r112 r135 = load_region g3.12<0,1,0>4 r137 = maddf r135, r16, r117 r140 = load_region g3.16<0,1,0>4 r142 = maddf r140, r21, r122 r145 = load_region g3.20<0,1,0>4 r147 = maddf r145, r21, r127 r150 = load_region g3.24<0,1,0>4 r152 = maddf r150, r21, r132 r155 = load_region g3.28<0,1,0>4 r157 = maddf r155, r21, r137 # urb write: length 8, offset 1 store_region, r77, g183.0<8,8,1>4 store_region, r83, g184.0<8,8,1>4 store_region, r89, g185.0<8,8,1>4 store_region, r95, g186.0<8,8,1>4 # perspective divide r178 = rcp r157 r179 = imm 1073741824d 2.000000f r180 = nmaddf r157, r178, r179 r181 = mulf r178, r180 r183 = mulf r142, r181 r186 = mulf r147, r181 r189 = mulf r152, r181 store_region, r181, g182.0<8,8,1>4 # clip tests r193 = load_region g173.8<0,1,0>4 r194 = load_region g173.16<0,1,0>4 r195 = load_region g173.12<0,1,0>4 r196 = load_region g173.20<0,1,0>4 r199 = cmp r193, r183, op 1 r200 = cmp r194, r183, op 14 r201 = cmp r195, r186, op 1 r202 = cmp r196, r186, op 14 r203 = or r199, r200 r204 = or r201, r202 r205 = or r203, r204 store_region, r205, g175.0<8,8,1>4 # viewport transform r208 = load_region g173.24<0,1,0>4 r209 = load_region g173.28<0,1,0>4 r210 = load_region g174.0<0,1,0>4 r211 = load_region g174.4<0,1,0>4 r212 = load_region g174.8<0,1,0>4 r213 = load_region g174.12<0,1,0>4 r217 = maddf r183, r208, r211 r218 = maddf r186, r209, r212 r219 = maddf r189, r210, r213 store_region, r217, g179.0<8,8,1>4 store_region, r218, g180.0<8,8,1>4 store_region, r219, g181.0<8,8,1>4 eot # --- after ra # load constants r0 = load_region g307.0<8,8,1>4 store_region, r0, g2.0<8,8,1>4 r0 = load_region g308.0<8,8,1>4 store_region, r0, g3.0<8,8,1>4 # vertex fetch r0 = load_region g172.0<8,8,1>4 # vertex fetch: ve 0: offset 0, pitch 24, format 64 r1 = shli r0, 1 r1 = addd r1, r0 r1 = shli r1, 3 r2 = load_region g130.0<8,8,1>4 r3 = gather r2, 0(0x7fcd4d66f00c,r1,1) r2 = load_region g130.0<8,8,1>4 r4 = gather r2, 4(0x7fcd4d66f00c,r1,1) r2 = load_region g130.0<8,8,1>4 r5 = gather r2, 8(0x7fcd4d66f00c,r1,1) r1 = imm 1065353216d 1.000000f # vertex fetch: ve 1: offset 0, pitch 24, format 64 r2 = shli r0, 1 r0 = addd r2, r0 r0 = shli r0, 3 r2 = load_region g130.0<8,8,1>4 r6 = gather r2, 0(0x7fcd4d66f000,r0,1) r2 = load_region g130.0<8,8,1>4 r7 = gather r2, 4(0x7fcd4d66f000,r0,1) r2 = load_region g130.0<8,8,1>4 r8 = gather r2, 8(0x7fcd4d66f000,r0,1) r0 = imm 1065353216d 1.000000f # copy vue # eu vs r2 = load_region g2.0<0,1,0>4 r2 = mulf r3, r2 r9 = load_region g2.4<0,1,0>4 r9 = mulf r3, r9 r10 = load_region g2.8<0,1,0>4 r10 = mulf r3, r10 r11 = load_region g2.12<0,1,0>4 r3 = mulf r3, r11 r11 = imm 0d 0.000000f r12 = imm 1065353216d 1.000000f r6 = maxf r6, r11 r6 = minf r6, r12 r11 = imm 0d 0.000000f r12 = imm 1065353216d 1.000000f r7 = maxf r7, r11 r7 = minf r7, r12 r11 = imm 0d 0.000000f r12 = imm 1065353216d 1.000000f r8 = maxf r8, r11 r8 = minf r8, r12 r11 = imm 0d 0.000000f r12 = imm 1065353216d 1.000000f r0 = maxf r0, r11 r0 = minf r0, r12 r11 = load_region g2.16<0,1,0>4 r11 = maddf r11, r4, r2 r2 = load_region g2.20<0,1,0>4 r2 = maddf r2, r4, r9 r9 = load_region g2.24<0,1,0>4 r9 = maddf r9, r4, r10 r10 = load_region g2.28<0,1,0>4 r10 = maddf r10, r4, r3 r3 = load_region g3.0<0,1,0>4 r3 = maddf r3, r5, r11 r4 = load_region g3.4<0,1,0>4 r4 = maddf r4, r5, r2 r2 = load_region g3.8<0,1,0>4 r2 = maddf r2, r5, r9 r9 = load_region g3.12<0,1,0>4 r9 = maddf r9, r5, r10 r5 = load_region g3.16<0,1,0>4 r5 = maddf r5, r1, r3 r3 = load_region g3.20<0,1,0>4 r3 = maddf r3, r1, r4 r4 = load_region g3.24<0,1,0>4 r4 = maddf r4, r1, r2 r2 = load_region g3.28<0,1,0>4 r2 = maddf r2, r1, r9 # urb write: length 8, offset 1 store_region, r6, g183.0<8,8,1>4 store_region, r7, g184.0<8,8,1>4 store_region, r8, g185.0<8,8,1>4 store_region, r0, g186.0<8,8,1>4 # perspective divide r0 = rcp r2 r1 = imm 1073741824d 2.000000f r2 = nmaddf r2, r0, r1 r0 = mulf r0, r2 r1 = mulf r5, r0 r2 = mulf r3, r0 r3 = mulf r4, r0 store_region, r0, g182.0<8,8,1>4 # clip tests r0 = load_region g173.8<0,1,0>4 r4 = load_region g173.16<0,1,0>4 r5 = load_region g173.12<0,1,0>4 r6 = load_region g173.20<0,1,0>4 r0 = cmp r0, r1, op 1 r4 = cmp r4, r1, op 14 r5 = cmp r5, r2, op 1 r6 = cmp r6, r2, op 14 r0 = or r0, r4 r4 = or r5, r6 r0 = or r0, r4 store_region, r0, g175.0<8,8,1>4 # viewport transform r0 = load_region g173.24<0,1,0>4 r4 = load_region g173.28<0,1,0>4 r5 = load_region g174.0<0,1,0>4 r6 = load_region g174.4<0,1,0>4 r7 = load_region g174.8<0,1,0>4 r8 = load_region g174.12<0,1,0>4 r1 = maddf r1, r0, r6 r2 = maddf r2, r4, r7 r3 = maddf r3, r5, r8 store_region, r1, g179.0<8,8,1>4 store_region, r2, g180.0<8,8,1>4 store_region, r3, g181.0<8,8,1>4 eot # --- code emit # load constants r0 = load_region g307.0<8,8,1>4 vmovdqa 0x2660(%rdi),%ymm0 store_region, r0, g2.0<8,8,1>4 vmovdqa %ymm0,0x40(%rdi) r0 = load_region g308.0<8,8,1>4 vmovdqa 0x2680(%rdi),%ymm0 store_region, r0, g3.0<8,8,1>4 vmovdqa %ymm0,0x60(%rdi) # vertex fetch r0 = load_region g172.0<8,8,1>4 vmovdqa 0x1580(%rdi),%ymm0 # vertex fetch: ve 0: offset 0, pitch 24, format 64 r1 = shli r0, 1 vpslld $0x1,%ymm0,%ymm1 r1 = addd r1, r0 vpaddd %ymm1,%ymm0,%ymm1 r1 = shli r1, 3 vpslld $0x3,%ymm1,%ymm1 r2 = load_region g130.0<8,8,1>4 vmovdqa 0x1040(%rdi),%ymm2 r3 = gather r2, 0(0x7fcd4d66f00c,r1,1) mov -0x120d(%rip),%rax # 0x0000000000000078 vpgatherdd %ymm2,(%rax,%ymm1,1),%ymm3 r2 = load_region g130.0<8,8,1>4 vmovdqa 0x1040(%rdi),%ymm2 r4 = gather r2, 4(0x7fcd4d66f00c,r1,1) mov -0x121a(%rip),%rax # 0x0000000000000080 vpgatherdd %ymm2,0x4(%rax,%ymm1,1),%ymm4 r2 = load_region g130.0<8,8,1>4 vmovdqa 0x1040(%rdi),%ymm2 r5 = gather r2, 8(0x7fcd4d66f00c,r1,1) mov -0x1228(%rip),%rax # 0x0000000000000088 vpgatherdd %ymm2,0x8(%rax,%ymm1,1),%ymm5 r1 = imm 1065353216d 1.000000f vpbroadcastd -0x1230(%rip),%ymm1 # 0x0000000000000090 # vertex fetch: ve 1: offset 0, pitch 24, format 64 r2 = shli r0, 1 vpslld $0x1,%ymm0,%ymm2 r0 = addd r2, r0 vpaddd %ymm2,%ymm0,%ymm0 r0 = shli r0, 3 vpslld $0x3,%ymm0,%ymm0 r2 = load_region g130.0<8,8,1>4 vmovdqa 0x1040(%rdi),%ymm2 r6 = gather r2, 0(0x7fcd4d66f000,r0,1) mov -0x1245(%rip),%rax # 0x0000000000000098 vpgatherdd %ymm2,(%rax,%ymm0,1),%ymm6 r2 = load_region g130.0<8,8,1>4 vmovdqa 0x1040(%rdi),%ymm2 r7 = gather r2, 4(0x7fcd4d66f000,r0,1) mov -0x1252(%rip),%rax # 0x00000000000000a0 vpgatherdd %ymm2,0x4(%rax,%ymm0,1),%ymm7 r2 = load_region g130.0<8,8,1>4 vmovdqa 0x1040(%rdi),%ymm2 r8 = gather r2, 8(0x7fcd4d66f000,r0,1) mov -0x1260(%rip),%rax # 0x00000000000000a8 vpgatherdd %ymm2,0x8(%rax,%ymm0,1),%ymm8 r0 = imm 1065353216d 1.000000f vpbroadcastd -0x1268(%rip),%ymm0 # 0x00000000000000b0 # copy vue # eu vs r2 = load_region g2.0<0,1,0>4 vpbroadcastd 0x40(%rdi),%ymm2 r2 = mulf r3, r2 vmulps %ymm3,%ymm2,%ymm2 r9 = load_region g2.4<0,1,0>4 vpbroadcastd 0x44(%rdi),%ymm9 r9 = mulf r3, r9 vmulps %ymm3,%ymm9,%ymm9 r10 = load_region g2.8<0,1,0>4 vpbroadcastd 0x48(%rdi),%ymm10 r10 = mulf r3, r10 vmulps %ymm3,%ymm10,%ymm10 r11 = load_region g2.12<0,1,0>4 vpbroadcastd 0x4c(%rdi),%ymm11 r3 = mulf r3, r11 vmulps %ymm3,%ymm11,%ymm3 r11 = imm 0d 0.000000f vpbroadcastd -0x12a1(%rip),%ymm11 # 0x00000000000000b4 r12 = imm 1065353216d 1.000000f vpbroadcastd -0x12a6(%rip),%ymm12 # 0x00000000000000b8 r6 = maxf r6, r11 vmaxps %ymm6,%ymm11,%ymm6 r6 = minf r6, r12 vminps %ymm6,%ymm12,%ymm6 r11 = imm 0d 0.000000f vpbroadcastd -0x12b3(%rip),%ymm11 # 0x00000000000000bc r12 = imm 1065353216d 1.000000f vpbroadcastd -0x12b8(%rip),%ymm12 # 0x00000000000000c0 r7 = maxf r7, r11 vmaxps %ymm7,%ymm11,%ymm7 r7 = minf r7, r12 vminps %ymm7,%ymm12,%ymm7 r11 = imm 0d 0.000000f vpbroadcastd -0x12c5(%rip),%ymm11 # 0x00000000000000c4 r12 = imm 1065353216d 1.000000f vpbroadcastd -0x12ca(%rip),%ymm12 # 0x00000000000000c8 r8 = maxf r8, r11 vmaxps %ymm8,%ymm11,%ymm8 r8 = minf r8, r12 vminps %ymm8,%ymm12,%ymm8 r11 = imm 0d 0.000000f vpbroadcastd -0x12d9(%rip),%ymm11 # 0x00000000000000cc r12 = imm 1065353216d 1.000000f vpbroadcastd -0x12de(%rip),%ymm12 # 0x00000000000000d0 r0 = maxf r0, r11 vmaxps %ymm0,%ymm11,%ymm0 r0 = minf r0, r12 vminps %ymm0,%ymm12,%ymm0 r11 = load_region g2.16<0,1,0>4 vpbroadcastd 0x50(%rdi),%ymm11 r11 = maddf r11, r4, r2 vfmadd132ps %ymm4,%ymm2,%ymm11 r2 = load_region g2.20<0,1,0>4 vpbroadcastd 0x54(%rdi),%ymm2 r2 = maddf r2, r4, r9 vfmadd132ps %ymm4,%ymm9,%ymm2 r9 = load_region g2.24<0,1,0>4 vpbroadcastd 0x58(%rdi),%ymm9 r9 = maddf r9, r4, r10 vfmadd132ps %ymm4,%ymm10,%ymm9 r10 = load_region g2.28<0,1,0>4 vpbroadcastd 0x5c(%rdi),%ymm10 r10 = maddf r10, r4, r3 vfmadd132ps %ymm4,%ymm3,%ymm10 r3 = load_region g3.0<0,1,0>4 vpbroadcastd 0x60(%rdi),%ymm3 r3 = maddf r3, r5, r11 vfmadd132ps %ymm5,%ymm11,%ymm3 r4 = load_region g3.4<0,1,0>4 vpbroadcastd 0x64(%rdi),%ymm4 r4 = maddf r4, r5, r2 vfmadd132ps %ymm5,%ymm2,%ymm4 r2 = load_region g3.8<0,1,0>4 vpbroadcastd 0x68(%rdi),%ymm2 r2 = maddf r2, r5, r9 vfmadd132ps %ymm5,%ymm9,%ymm2 r9 = load_region g3.12<0,1,0>4 vpbroadcastd 0x6c(%rdi),%ymm9 r9 = maddf r9, r5, r10 vfmadd132ps %ymm5,%ymm10,%ymm9 r5 = load_region g3.16<0,1,0>4 vpbroadcastd 0x70(%rdi),%ymm5 r5 = maddf r5, r1, r3 vfmadd132ps %ymm1,%ymm3,%ymm5 r3 = load_region g3.20<0,1,0>4 vpbroadcastd 0x74(%rdi),%ymm3 r3 = maddf r3, r1, r4 vfmadd132ps %ymm1,%ymm4,%ymm3 r4 = load_region g3.24<0,1,0>4 vpbroadcastd 0x78(%rdi),%ymm4 r4 = maddf r4, r1, r2 vfmadd132ps %ymm1,%ymm2,%ymm4 r2 = load_region g3.28<0,1,0>4 vpbroadcastd 0x7c(%rdi),%ymm2 r2 = maddf r2, r1, r9 vfmadd132ps %ymm1,%ymm9,%ymm2 # urb write: length 8, offset 1 store_region, r6, g183.0<8,8,1>4 vmovdqa %ymm6,0x16e0(%rdi) store_region, r7, g184.0<8,8,1>4 vmovdqa %ymm7,0x1700(%rdi) store_region, r8, g185.0<8,8,1>4 vmovdqa %ymm8,0x1720(%rdi) store_region, r0, g186.0<8,8,1>4 vmovdqa %ymm0,0x1740(%rdi) # perspective divide r0 = rcp r2 vrcpps %ymm2,%ymm0 r1 = imm 1073741824d 2.000000f vpbroadcastd -0x13b7(%rip),%ymm1 # 0x00000000000000d4 r2 = nmaddf r2, r0, r1 vfnmadd132ps %ymm0,%ymm1,%ymm2 r0 = mulf r0, r2 vmulps %ymm0,%ymm2,%ymm0 r1 = mulf r5, r0 vmulps %ymm5,%ymm0,%ymm1 r2 = mulf r3, r0 vmulps %ymm3,%ymm0,%ymm2 r3 = mulf r4, r0 vmulps %ymm4,%ymm0,%ymm3 store_region, r0, g182.0<8,8,1>4 vmovdqa %ymm0,0x16c0(%rdi) # clip tests r0 = load_region g173.8<0,1,0>4 vpbroadcastd 0x15a8(%rdi),%ymm0 r4 = load_region g173.16<0,1,0>4 vpbroadcastd 0x15b0(%rdi),%ymm4 r5 = load_region g173.12<0,1,0>4 vpbroadcastd 0x15ac(%rdi),%ymm5 r6 = load_region g173.20<0,1,0>4 vpbroadcastd 0x15b4(%rdi),%ymm6 r0 = cmp r0, r1, op 1 vcmpltps %ymm0,%ymm1,%ymm0 r4 = cmp r4, r1, op 14 vcmpgtps %ymm4,%ymm1,%ymm4 r5 = cmp r5, r2, op 1 vcmpltps %ymm5,%ymm2,%ymm5 r6 = cmp r6, r2, op 14 vcmpgtps %ymm6,%ymm2,%ymm6 r0 = or r0, r4 vpor %ymm0,%ymm4,%ymm0 r4 = or r5, r6 vpor %ymm5,%ymm6,%ymm4 r0 = or r0, r4 vpor %ymm0,%ymm4,%ymm0 store_region, r0, g175.0<8,8,1>4 vmovdqa %ymm0,0x15e0(%rdi) # viewport transform r0 = load_region g173.24<0,1,0>4 vpbroadcastd 0x15b8(%rdi),%ymm0 r4 = load_region g173.28<0,1,0>4 vpbroadcastd 0x15bc(%rdi),%ymm4 r5 = load_region g174.0<0,1,0>4 vpbroadcastd 0x15c0(%rdi),%ymm5 r6 = load_region g174.4<0,1,0>4 vpbroadcastd 0x15c4(%rdi),%ymm6 r7 = load_region g174.8<0,1,0>4 vpbroadcastd 0x15c8(%rdi),%ymm7 r8 = load_region g174.12<0,1,0>4 vpbroadcastd 0x15cc(%rdi),%ymm8 r1 = maddf r1, r0, r6 vfmadd132ps %ymm0,%ymm6,%ymm1 r2 = maddf r2, r4, r7 vfmadd132ps %ymm4,%ymm7,%ymm2 r3 = maddf r3, r5, r8 vfmadd132ps %ymm5,%ymm8,%ymm3 store_region, r1, g179.0<8,8,1>4 vmovdqa %ymm1,0x1660(%rdi) store_region, r2, g180.0<8,8,1>4 vmovdqa %ymm2,0x1680(%rdi) store_region, r3, g181.0<8,8,1>4 vmovdqa %ymm3,0x16a0(%rdi) eot retq